ena/unify/
mod.rs

1// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Union-find implementation. The main type is `UnificationTable`.
12//!
13//! You can define your own type for the *keys* in the table, but you
14//! must implement `UnifyKey` for that type. The assumption is that
15//! keys will be newtyped integers, hence we require that they
16//! implement `Copy`.
17//!
18//! Keys can have values associated with them. The assumption is that
19//! these values are cheaply cloneable (ideally, `Copy`), and some of
20//! the interfaces are oriented around that assumption. If you just
21//! want the classical "union-find" algorithm where you group things
22//! into sets, use the `Value` type of `()`.
23//!
24//! When you have keys with non-trivial values, you must also define
25//! how those values can be merged. As part of doing this, you can
26//! define the "error" type to return on error; if errors are not
27//! possible, use `NoError` (an uninstantiable struct). Using this
28//! type also unlocks various more ergonomic methods (e.g., `union()`
29//! in place of `unify_var_var()`).
30//!
31//! The best way to see how it is used is to read the `tests.rs` file;
32//! search for e.g. `UnitKey`.
33
34use std::fmt::Debug;
35use std::marker;
36use std::ops::Range;
37
38use snapshot_vec::{self as sv, UndoLog};
39use undo_log::{UndoLogs, VecLog};
40
41mod backing_vec;
42pub use self::backing_vec::{
43    Delegate, InPlace, UnificationStore, UnificationStoreBase, UnificationStoreMut,
44};
45
46#[cfg(feature = "persistent")]
47pub use self::backing_vec::Persistent;
48
49#[cfg(test)]
50mod tests;
51
52/// This trait is implemented by any type that can serve as a type
53/// variable. We call such variables *unification keys*. For example,
54/// this trait is implemented by `IntVid`, which represents integral
55/// variables.
56///
57/// Each key type has an associated value type `V`. For example, for
58/// `IntVid`, this is `Option<IntVarValue>`, representing some
59/// (possibly not yet known) sort of integer.
60///
61/// Clients are expected to provide implementations of this trait; you
62/// can see some examples in the `test` module.
63pub trait UnifyKey: Copy + Clone + Debug + PartialEq {
64    type Value: UnifyValue;
65
66    fn index(&self) -> u32;
67
68    fn from_index(u: u32) -> Self;
69
70    fn tag() -> &'static str;
71
72    /// You should return first the key that should be used as root,
73    /// then the other key (that will then point to the new root).
74    ///
75    /// NB. The only reason to implement this method is if you want to
76    /// control what value is returned from `find()`. In general, it
77    /// is better to let the unification table determine the root,
78    /// since overriding the rank can cause execution time to increase
79    /// dramatically.
80    #[allow(unused_variables)]
81    fn order_roots(
82        a: Self,
83        a_value: &Self::Value,
84        b: Self,
85        b_value: &Self::Value,
86    ) -> Option<(Self, Self)> {
87        None
88    }
89}
90
91/// Trait implemented for **values** associated with a unification
92/// key. This trait defines how to merge the values from two keys that
93/// are unioned together. This merging can be fallible. If you attempt
94/// to union two keys whose values cannot be merged, then the error is
95/// propagated up and the two keys are not unioned.
96///
97/// This crate provides implementations of `UnifyValue` for `()`
98/// (which is infallible) and `Option<T>` (where `T: UnifyValue`). The
99/// option implementation merges two sum-values using the `UnifyValue`
100/// implementation of `T`.
101///
102/// See also `EqUnifyValue`, which is a convenience trait for cases
103/// where the "merge" operation succeeds only if the two values are
104/// equal.
105pub trait UnifyValue: Clone + Debug {
106    /// Defines the type to return when merging of two values fails.
107    /// If merging is infallible, use the special struct `NoError`
108    /// found in this crate, which unlocks various more convenient
109    /// methods on the unification table.
110    type Error;
111
112    /// Given two values, produce a new value that combines them.
113    /// If that is not possible, produce an error.
114    fn unify_values(value1: &Self, value2: &Self) -> Result<Self, Self::Error>;
115}
116
117/// A convenient helper for unification values which must be equal or
118/// else an error occurs. For example, if you are unifying types in a
119/// simple functional language, this may be appropriate, since (e.g.)
120/// you can't unify a type variable bound to `int` with one bound to
121/// `float` (but you can unify two type variables both bound to
122/// `int`).
123///
124/// Any type which implements `EqUnifyValue` automatially implements
125/// `UnifyValue`; if the two values are equal, merging is permitted.
126/// Otherwise, the error `(v1, v2)` is returned, where `v1` and `v2`
127/// are the two unequal values.
128pub trait EqUnifyValue: Eq + Clone + Debug {}
129
130impl<T: EqUnifyValue> UnifyValue for T {
131    type Error = (T, T);
132
133    fn unify_values(value1: &Self, value2: &Self) -> Result<Self, Self::Error> {
134        if value1 == value2 {
135            Ok(value1.clone())
136        } else {
137            Err((value1.clone(), value2.clone()))
138        }
139    }
140}
141
142/// A struct which can never be instantiated. Used
143/// for the error type for infallible cases.
144#[derive(Debug)]
145pub struct NoError {
146    _dummy: (),
147}
148
149/// Value of a unification key. We implement Tarjan's union-find
150/// algorithm: when two keys are unified, one of them is converted
151/// into a "redirect" pointing at the other. These redirects form a
152/// DAG: the roots of the DAG (nodes that are not redirected) are each
153/// associated with a value of type `V` and a rank. The rank is used
154/// to keep the DAG relatively balanced, which helps keep the running
155/// time of the algorithm under control. For more information, see
156/// <http://en.wikipedia.org/wiki/Disjoint-set_data_structure>.
157#[derive(PartialEq, Clone, Debug)]
158pub struct VarValue<K: UnifyKey> {
159    parent: K,       // if equal to self, this is a root
160    value: K::Value, // value assigned (only relevant to root)
161    rank: u32,       // max depth (only relevant to root)
162}
163
164/// Table of unification keys and their values. You must define a key type K
165/// that implements the `UnifyKey` trait. Unification tables can be used in two-modes:
166///
167/// - in-place (`UnificationTable<InPlace<K>>` or `InPlaceUnificationTable<K>`):
168///   - This is the standard mutable mode, where the array is modified
169///     in place.
170///   - To do backtracking, you can employ the `snapshot` and `rollback_to`
171///     methods.
172/// - persistent (`UnificationTable<Persistent<K>>` or `PersistentUnificationTable<K>`):
173///   - In this mode, we use a persistent vector to store the data, so that
174///     cloning the table is an O(1) operation.
175///   - This implies that ordinary operations are quite a bit slower though.
176///   - Requires the `persistent` feature be selected in your Cargo.toml file.
177#[derive(Clone, Debug, Default)]
178pub struct UnificationTable<S: UnificationStoreBase> {
179    /// Indicates the current value of each key.
180    values: S,
181}
182
183pub type UnificationStorage<K> = Vec<VarValue<K>>;
184pub type UnificationTableStorage<K> = UnificationTable<InPlace<K, UnificationStorage<K>, ()>>;
185
186/// A unification table that uses an "in-place" vector.
187#[allow(type_alias_bounds)]
188pub type InPlaceUnificationTable<
189    K: UnifyKey,
190    V: sv::VecLike<Delegate<K>> = Vec<VarValue<K>>,
191    L = VecLog<UndoLog<Delegate<K>>>,
192> = UnificationTable<InPlace<K, V, L>>;
193
194/// A unification table that uses a "persistent" vector.
195#[cfg(feature = "persistent")]
196#[allow(type_alias_bounds)]
197pub type PersistentUnificationTable<K: UnifyKey> = UnificationTable<Persistent<K>>;
198
199/// At any time, users may snapshot a unification table.  The changes
200/// made during the snapshot may either be *committed* or *rolled back*.
201pub struct Snapshot<S: UnificationStore> {
202    // Link snapshot to the unification store `S` of the table.
203    marker: marker::PhantomData<S>,
204    snapshot: S::Snapshot,
205}
206
207impl<K: UnifyKey> VarValue<K> {
208    fn new_var(key: K, value: K::Value) -> VarValue<K> {
209        VarValue::new(key, value, 0)
210    }
211
212    fn new(parent: K, value: K::Value, rank: u32) -> VarValue<K> {
213        VarValue {
214            parent: parent, // this is a root
215            value: value,
216            rank: rank,
217        }
218    }
219
220    fn redirect(&mut self, to: K) {
221        self.parent = to;
222    }
223
224    fn root(&mut self, rank: u32, value: K::Value) {
225        self.rank = rank;
226        self.value = value;
227    }
228}
229
230impl<K> UnificationTableStorage<K>
231where
232    K: UnifyKey,
233{
234    /// Creates a `UnificationTable` using an external `undo_log`, allowing mutating methods to be
235    /// called if `L` does not implement `UndoLogs`
236    pub fn with_log<'a, L>(
237        &'a mut self,
238        undo_log: L,
239    ) -> UnificationTable<InPlace<K, &'a mut UnificationStorage<K>, L>>
240    where
241        L: UndoLogs<sv::UndoLog<Delegate<K>>>,
242    {
243        UnificationTable {
244            values: InPlace {
245                values: self.values.values.with_log(undo_log),
246            },
247        }
248    }
249}
250
251// We can't use V:LatticeValue, much as I would like to,
252// because frequently the pattern is that V=Option<U> for some
253// other type parameter U, and we have no way to say
254// Option<U>:LatticeValue.
255
256impl<S: UnificationStoreBase + Default> UnificationTable<S> {
257    pub fn new() -> Self {
258        Self::default()
259    }
260}
261
262impl<S: UnificationStore> UnificationTable<S> {
263    /// Starts a new snapshot. Each snapshot must be either
264    /// rolled back or committed in a "LIFO" (stack) order.
265    pub fn snapshot(&mut self) -> Snapshot<S> {
266        Snapshot {
267            marker: marker::PhantomData::<S>,
268            snapshot: self.values.start_snapshot(),
269        }
270    }
271
272    /// Reverses all changes since the last snapshot. Also
273    /// removes any keys that have been created since then.
274    pub fn rollback_to(&mut self, snapshot: Snapshot<S>) {
275        debug!("{}: rollback_to()", S::tag());
276        self.values.rollback_to(snapshot.snapshot);
277    }
278
279    /// Commits all changes since the last snapshot. Of course, they
280    /// can still be undone if there is a snapshot further out.
281    pub fn commit(&mut self, snapshot: Snapshot<S>) {
282        debug!("{}: commit()", S::tag());
283        self.values.commit(snapshot.snapshot);
284    }
285
286    /// Returns the keys of all variables created since the `snapshot`.
287    pub fn vars_since_snapshot(&self, snapshot: &Snapshot<S>) -> Range<S::Key> {
288        let range = self.values.values_since_snapshot(&snapshot.snapshot);
289        S::Key::from_index(range.start as u32)..S::Key::from_index(range.end as u32)
290    }
291}
292
293impl<S: UnificationStoreBase> UnificationTable<S> {
294    /// Returns the number of keys created so far.
295    pub fn len(&self) -> usize {
296        self.values.len()
297    }
298
299    /// Obtains the current value for a particular key.
300    /// Not for end-users; they can use `probe_value`.
301    fn value(&self, key: S::Key) -> &VarValue<S::Key> {
302        &self.values[key.index() as usize]
303    }
304}
305
306impl<S: UnificationStoreMut> UnificationTable<S> {
307    /// Creates a fresh key with the given value.
308    pub fn new_key(&mut self, value: S::Value) -> S::Key {
309        let len = self.values.len();
310        let key: S::Key = UnifyKey::from_index(len as u32);
311        self.values.push(VarValue::new_var(key, value));
312        debug!("{}: created new key: {:?}", S::tag(), key);
313        key
314    }
315
316    /// Reserve memory for `num_new_keys` to be created. Does not
317    /// actually create the new keys; you must then invoke `new_key`.
318    pub fn reserve(&mut self, num_new_keys: usize) {
319        self.values.reserve(num_new_keys);
320    }
321
322    /// Clears all unifications that have been performed, resetting to
323    /// the initial state. The values of each variable are given by
324    /// the closure.
325    pub fn reset_unifications(&mut self, mut value: impl FnMut(S::Key) -> S::Value) {
326        self.values.reset_unifications(|i| {
327            let key = UnifyKey::from_index(i as u32);
328            let value = value(key);
329            VarValue::new_var(key, value)
330        });
331    }
332
333    /// Find the root node for `vid`. This uses the standard
334    /// union-find algorithm with path compression:
335    /// <http://en.wikipedia.org/wiki/Disjoint-set_data_structure>.
336    ///
337    /// NB. This is a building-block operation and you would probably
338    /// prefer to call `probe` below.
339    ///
340    /// This is an always-inlined version of this function for the hot
341    /// callsites. `uninlined_get_root_key` is the never-inlined version.
342    #[inline(always)]
343    fn inlined_get_root_key(&mut self, vid: S::Key) -> S::Key {
344        let v = self.value(vid);
345        if v.parent == vid {
346            return vid;
347        }
348
349        let redirect = v.parent;
350        let root_key: S::Key = self.uninlined_get_root_key(redirect);
351        if root_key != redirect {
352            // Path compression
353            self.update_value(vid, |value| value.parent = root_key);
354        }
355
356        root_key
357    }
358
359    // This is a never-inlined version of this function for cold callsites.
360    // 'inlined_get_root_key` is the always-inlined version.
361    #[inline(never)]
362    fn uninlined_get_root_key(&mut self, vid: S::Key) -> S::Key {
363        self.inlined_get_root_key(vid)
364    }
365
366    fn update_value<OP>(&mut self, key: S::Key, op: OP)
367    where
368        OP: FnOnce(&mut VarValue<S::Key>),
369    {
370        self.values.update(key.index() as usize, op);
371        debug!("Updated variable {:?} to {:?}", key, self.value(key));
372    }
373
374    /// Either redirects `node_a` to `node_b` or vice versa, depending
375    /// on the relative rank. The value associated with the new root
376    /// will be `new_value`.
377    ///
378    /// NB: This is the "union" operation of "union-find". It is
379    /// really more of a building block. If the values associated with
380    /// your key are non-trivial, you would probably prefer to call
381    /// `unify_var_var` below.
382    fn unify_roots(&mut self, key_a: S::Key, key_b: S::Key, new_value: S::Value) {
383        debug!("unify(key_a={:?}, key_b={:?})", key_a, key_b);
384
385        let rank_a = self.value(key_a).rank;
386        let rank_b = self.value(key_b).rank;
387        if let Some((new_root, redirected)) = S::Key::order_roots(
388            key_a,
389            &self.value(key_a).value,
390            key_b,
391            &self.value(key_b).value,
392        ) {
393            // compute the new rank for the new root that they chose;
394            // this may not be the optimal choice.
395            let new_rank = if new_root == key_a {
396                debug_assert!(redirected == key_b);
397                if rank_a > rank_b {
398                    rank_a
399                } else {
400                    rank_b + 1
401                }
402            } else {
403                debug_assert!(new_root == key_b);
404                debug_assert!(redirected == key_a);
405                if rank_b > rank_a {
406                    rank_b
407                } else {
408                    rank_a + 1
409                }
410            };
411            self.redirect_root(new_rank, redirected, new_root, new_value);
412        } else if rank_a > rank_b {
413            // a has greater rank, so a should become b's parent,
414            // i.e., b should redirect to a.
415            self.redirect_root(rank_a, key_b, key_a, new_value);
416        } else if rank_a < rank_b {
417            // b has greater rank, so a should redirect to b.
418            self.redirect_root(rank_b, key_a, key_b, new_value);
419        } else {
420            // If equal, redirect one to the other and increment the
421            // other's rank.
422            self.redirect_root(rank_a + 1, key_a, key_b, new_value);
423        }
424    }
425
426    /// Internal method to redirect `old_root_key` (which is currently
427    /// a root) to a child of `new_root_key` (which will remain a
428    /// root). The rank and value of `new_root_key` will be updated to
429    /// `new_rank` and `new_value` respectively.
430    fn redirect_root(
431        &mut self,
432        new_rank: u32,
433        old_root_key: S::Key,
434        new_root_key: S::Key,
435        new_value: S::Value,
436    ) {
437        self.update_value(old_root_key, |old_root_value| {
438            old_root_value.redirect(new_root_key);
439        });
440        self.update_value(new_root_key, |new_root_value| {
441            new_root_value.root(new_rank, new_value);
442        });
443    }
444}
445
446/// ////////////////////////////////////////////////////////////////////////
447/// Public API
448
449impl<S, K, V> UnificationTable<S>
450where
451    S: UnificationStoreBase<Key = K, Value = V>,
452    K: UnifyKey<Value = V>,
453    V: UnifyValue,
454{
455    /// Obtains current value for key without any pointer chasing; may return `None` if key has been union'd.
456    #[inline]
457    pub fn try_probe_value<'a, K1>(&'a self, id: K1) -> Option<&'a V>
458        where
459            K1: Into<K>,
460            K: 'a,
461    {
462        let id = id.into();
463        let v = self.value(id);
464        if v.parent == id {
465            return Some(&v.value);
466        }
467        None
468    }
469}
470
471impl<S, K, V> UnificationTable<S>
472where
473    S: UnificationStoreMut<Key = K, Value = V>,
474    K: UnifyKey<Value = V>,
475    V: UnifyValue,
476{
477    /// Unions two keys without the possibility of failure; only
478    /// applicable when unify values use `NoError` as their error
479    /// type.
480    pub fn union<K1, K2>(&mut self, a_id: K1, b_id: K2)
481    where
482        K1: Into<K>,
483        K2: Into<K>,
484        V: UnifyValue<Error = NoError>,
485    {
486        self.unify_var_var(a_id, b_id).unwrap();
487    }
488
489    /// Unions a key and a value without the possibility of failure;
490    /// only applicable when unify values use `NoError` as their error
491    /// type.
492    pub fn union_value<K1>(&mut self, id: K1, value: V)
493    where
494        K1: Into<K>,
495        V: UnifyValue<Error = NoError>,
496    {
497        self.unify_var_value(id, value).unwrap();
498    }
499
500    /// Given two keys, indicates whether they have been unioned together.
501    pub fn unioned<K1, K2>(&mut self, a_id: K1, b_id: K2) -> bool
502    where
503        K1: Into<K>,
504        K2: Into<K>,
505    {
506        self.find(a_id) == self.find(b_id)
507    }
508
509    /// Given a key, returns the (current) root key.
510    pub fn find<K1>(&mut self, id: K1) -> K
511    where
512        K1: Into<K>,
513    {
514        let id = id.into();
515        self.uninlined_get_root_key(id)
516    }
517
518    /// Unions together two variables, merging their values. If
519    /// merging the values fails, the error is propagated and this
520    /// method has no effect.
521    pub fn unify_var_var<K1, K2>(&mut self, a_id: K1, b_id: K2) -> Result<(), V::Error>
522    where
523        K1: Into<K>,
524        K2: Into<K>,
525    {
526        let a_id = a_id.into();
527        let b_id = b_id.into();
528
529        let root_a = self.uninlined_get_root_key(a_id);
530        let root_b = self.uninlined_get_root_key(b_id);
531
532        if root_a == root_b {
533            return Ok(());
534        }
535
536        let combined = V::unify_values(&self.value(root_a).value, &self.value(root_b).value)?;
537
538        Ok(self.unify_roots(root_a, root_b, combined))
539    }
540
541    /// Sets the value of the key `a_id` to `b`, attempting to merge
542    /// with the previous value.
543    pub fn unify_var_value<K1>(&mut self, a_id: K1, b: V) -> Result<(), V::Error>
544    where
545        K1: Into<K>,
546    {
547        let a_id = a_id.into();
548        let root_a = self.uninlined_get_root_key(a_id);
549        let value = V::unify_values(&self.value(root_a).value, &b)?;
550        self.update_value(root_a, |node| node.value = value);
551        Ok(())
552    }
553
554    /// Returns the current value for the given key. If the key has
555    /// been union'd, this will give the value from the current root.
556    pub fn probe_value<K1>(&mut self, id: K1) -> V
557    where
558        K1: Into<K>,
559    {
560        self.inlined_probe_value(id)
561    }
562
563    // An always-inlined version of `probe_value`, for hot callsites.
564    #[inline(always)]
565    pub fn inlined_probe_value<K1>(&mut self, id: K1) -> V
566    where
567        K1: Into<K>,
568    {
569        let id = id.into();
570        let id = self.inlined_get_root_key(id);
571        self.value(id).value.clone()
572    }
573}
574
575///////////////////////////////////////////////////////////////////////////
576
577impl UnifyValue for () {
578    type Error = NoError;
579
580    fn unify_values(_: &(), _: &()) -> Result<(), NoError> {
581        Ok(())
582    }
583}
584
585impl<V: UnifyValue> UnifyValue for Option<V> {
586    type Error = V::Error;
587
588    fn unify_values(a: &Option<V>, b: &Option<V>) -> Result<Self, V::Error> {
589        match (a, b) {
590            (&None, &None) => Ok(None),
591            (&Some(ref v), &None) | (&None, &Some(ref v)) => Ok(Some(v.clone())),
592            (&Some(ref a), &Some(ref b)) => match V::unify_values(a, b) {
593                Ok(v) => Ok(Some(v)),
594                Err(err) => Err(err),
595            },
596        }
597    }
598}