Implementation

struct Node {
    Node(const int id) : id(id), parent(this), depth(0) {};
    int depth;
    const int id;
    Node *parent;
};

class DisjointSet {
public:

    DisjointSet(const int ids) : num_ids(ids) {
        init_find.reserve(ids);
        for (int i = 1; i <= ids; i++) 
            make_set(i);
    }

    int find(const int x) {
        if (init_find.find(x) == init_find.end()) return -1;
        else return _find(x);
    }

    void union_(const int x, const int y) {
        prev_op = "union_(" + to_string(x) + ", " + to_string(y) + ")";
        int x_parent = find(x);
        int y_parent = find(y);
        if (init_find[y_parent]->depth <= init_find[x_parent]->depth) 
            init_find[y_parent]->parent = init_find[x_parent]->parent;

        else if (init_find[y_parent]->depth > init_find[x_parent]->depth) 
            init_find[x_parent]->parent = init_find[y_parent]->parent;

        // doesnt matter which, but assuming x.id < y.id I arbitrarily chose smaller one
        if (init_find[y_parent]->depth == init_find[x_parent]->depth) 
            init_find[x_parent]->depth++;
    }

    void print_paths_to_root() {
        cout << prev_op << endl;
        for (int i = 1; i <= num_ids; i++) {
            cout << setw(3) << i << ": ";
            Node *iter = init_find[i];
            while (iter->id != iter->parent->id) {
                cout << iter->parent->id << ", ";
                iter = iter->parent;
            }
            cout << endl;
        }
        cout << endl;
    }

    unordered_map<int, vector<int>> get_disjoint_sets() {

        unordered_map<int, vector<int>> roots;

        // simplify things - ensures no duplicates in the vector
        for (int i = 1; i < num_ids; i++) find(i);

        for (int i = 1; i <= num_ids; i++) {
            vector<int> to_root;
            Node *iter = init_find[i];
            to_root.push_back(iter->id);
            while (iter->id != iter->parent->id) {
                to_root.push_back(iter->parent->id);
                iter = iter->parent;
            }
            int root = to_root.back();
            to_root.pop_back();

            if (roots.find(root) != roots.end()) {
                for (int i : to_root) {
                    roots[root].push_back(i);
                }
            }
            else {
                roots.insert({root, to_root});
            }
        }

        return roots;
    }

    static void print_sets(const unordered_map<int, vector<int>> &elms) {
        int set_num = 1;
        for (auto i : elms) {
            cout << "set " << set_num++ << ": ";
            cout << i.first;
            for (int j : i.second) {
                cout << ", " << j;
            }
            cout << endl;
        }
        cout << endl;
    }

private:
    string prev_op;
    const size_t num_ids;

    void make_set(const int x) {
        if (init_find.find(x) != init_find.end()) return;
        else init_find.insert({ x, new Node(x) });
    }

    int _find(const int x) {
        Node *iter = init_find[x]->parent;

        if (iter->id == iter->parent->id) {
            return iter->id;
        }
        else {
            // path compression
            while (iter->id != iter->parent->id) 
                iter = iter->parent;
            init_find[x]->parent = init_find[iter->id];
            return iter->id;
        }
    }

    unordered_map<int, Node*> init_find;
};


int main() {
    DisjointSet ds(7);
    ds.print_paths_to_root();                  ds.print_sets(ds.get_disjoint_sets()); //pause();
    ds.union_(1, 2); ds.print_paths_to_root(); ds.print_sets(ds.get_disjoint_sets()); //pause();
    ds.union_(2, 3); ds.print_paths_to_root(); ds.print_sets(ds.get_disjoint_sets()); //pause();
    ds.union_(4, 5); ds.print_paths_to_root(); ds.print_sets(ds.get_disjoint_sets()); //pause();
    ds.union_(6, 7); ds.print_paths_to_root(); ds.print_sets(ds.get_disjoint_sets()); //pause();
    ds.union_(5, 6); ds.print_paths_to_root(); ds.print_sets(ds.get_disjoint_sets()); //pause();
    ds.union_(3, 7); ds.print_paths_to_root(); ds.print_sets(ds.get_disjoint_sets()); //pause();
}

Output: (using the same example from before)

  1:
  2:
  3:
  4:
  5:
  6:
  7:

set 1: 1
set 2: 2
set 3: 3
set 4: 4
set 5: 5
set 6: 6
set 7: 7

union_(1, 2)
  1:
  2: 1,
  3:
  4:
  5:
  6:
  7:

set 1: 1, 2
set 2: 3
set 3: 4
set 4: 5
set 5: 6
set 6: 7

union_(2, 3)
  1:
  2: 1,
  3: 1,
  4:
  5:
  6:
  7:

set 1: 1, 2, 3
set 2: 4
set 3: 5
set 4: 6
set 5: 7

union_(4, 5)
  1:
  2: 1,
  3: 1,
  4:
  5: 4,
  6:
  7:

set 1: 1, 2, 3
set 2: 4, 5
set 3: 6
set 4: 7

union_(6, 7)
  1:
  2: 1,
  3: 1,
  4:
  5: 4,
  6:
  7: 6,

set 1: 1, 2, 3
set 2: 4, 5
set 3: 6, 7

union_(5, 6)
  1:
  2: 1,
  3: 1,
  4:
  5: 4,
  6: 4,
  7: 6, 4,

set 1: 1, 2, 3
set 2: 4, 5, 6, 7, 6

union_(3, 7)
  1: 4,
  2: 1, 4,
  3: 1, 4,
  4:
  5: 4,
  6: 4,
  7: 4,

set 1: 4, 1, 2, 3, 5, 6, 7

Discussion

How does using a hash table come into play?

It helps us will immediately identifying the pointer (and hence where exactly it currently exists within the tree) using an integer identifier. We can use this to trace to the root.

How do we know we've reached the root?

Both the parent and current node have the same id.

PreviousDisjoint Sets NextEularian Cycle

Last updated 5 years ago

Was this helpful?