-
Notifications
You must be signed in to change notification settings - Fork 843
Add a Tarjan's Strongly Connected Component utilty #6790
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,257 @@ | ||
| /* | ||
| * Copyright 2024 WebAssembly Community Group participants | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #ifndef wasm_support_strongly_connected_components_h | ||
| #define wasm_support_strongly_connected_components_h | ||
|
|
||
| #include <cassert> | ||
| #include <optional> | ||
| #include <unordered_map> | ||
| #include <unordered_set> | ||
| #include <vector> | ||
|
|
||
| #include <iostream> | ||
|
|
||
| namespace wasm { | ||
|
|
||
| // A CRTP utility implementing Tarjan's Strongly Connected Component algorithm | ||
| // (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) | ||
| // in terms of iterators. Given the beginning and end iterators over the | ||
| // elements in a graph an implementation of `pushChildren` in the CRTP subclass | ||
| // that pushes an element's children, provides an iterable over the SCCs, each | ||
| // of which is an iterable over the elements in the SCC. All implemented | ||
| // iterators are input iterators that mutate the underlying state, so this | ||
| // utility can only be used for single-pass algorithms. | ||
| template<typename It, typename Class> struct SCCs { | ||
| SCCs(It inputIt, It inputEnd) : inputIt(inputIt), inputEnd(inputEnd) {} | ||
|
|
||
| private: | ||
| using T = typename It::value_type; | ||
|
|
||
| // The iterators over the graph we are calculating the SCCs for. | ||
| It inputIt; | ||
| It inputEnd; | ||
|
|
||
| // Stack of pending elements to visit, used instead of a recursive visitor. | ||
| struct WorkItem { | ||
| T item; | ||
| std::optional<T> parent = std::nullopt; | ||
| bool processedChildren = false; | ||
| }; | ||
| std::vector<WorkItem> workStack; | ||
| // The Tarjan's algorithm stack. Similar to a DFS stack, but elements are only | ||
| // popped off once they are committed to a strongly connected component. | ||
| // Elements stay on the stack after they are visited iff they have a back edge | ||
| // to an element earlier in the stack. | ||
| std::vector<T> stack; | ||
| std::unordered_set<T> stackElems; | ||
| // Indices assigned to each element in the order they are visited. | ||
| std::unordered_map<T, size_t> indices; | ||
| // The smallest index of the elements reachable from each element through | ||
| // its subtree. | ||
| std::unordered_map<T, size_t> minReachable; | ||
|
|
||
| // The parent to record when calling into the subclass to push children. | ||
| std::optional<T> currParent; | ||
|
|
||
| // The root (i.e. deepest element in the stack) for the current SCC. Empty | ||
| // whenever we have yet to find the next SCC. | ||
| std::optional<T> currRoot; | ||
|
|
||
| bool stepToNextGroup() { | ||
| while (inputIt != inputEnd || !workStack.empty()) { | ||
| if (workStack.empty()) { | ||
| workStack.push_back({*inputIt++}); | ||
| } | ||
| while (!workStack.empty()) { | ||
| auto& work = workStack.back(); | ||
| auto& item = work.item; | ||
|
|
||
| if (!work.processedChildren) { | ||
| auto [it, inserted] = indices.insert({item, indices.size()}); | ||
| auto index = it->second; | ||
| if (inserted) { | ||
| // This is a new item we have never seen before, so initialize its | ||
| // associated data and recurse. | ||
| minReachable[item] = index; | ||
| stack.push_back(item); | ||
| stackElems.insert(item); | ||
|
|
||
| // Leave the element on the work stack because we will have to do | ||
| // more work after we have finished processing its children. | ||
| work.processedChildren = true; | ||
| currParent = item; | ||
| static_cast<Class*>(this)->pushChildren(item); | ||
| currParent = std::nullopt; | ||
| // Process the pushed children first; we will come back to this item | ||
| // later. | ||
| continue; | ||
| } | ||
|
|
||
| if (stackElems.count(item)) { | ||
| assert(work.parent); | ||
| // Item is already in the current SCC. Update the parent's | ||
| // minReachable if this child has a smaller index than we have seen | ||
| // so far. | ||
| auto& min = minReachable[*work.parent]; | ||
| min = std::min(min, index); | ||
| } else { | ||
| // Item is in an SCC we have already processed, so ignore it | ||
| // entirely. | ||
| } | ||
| // Do not recurse for this item we have seen before. We are done with | ||
| // it. | ||
| workStack.pop_back(); | ||
| continue; | ||
| } | ||
|
|
||
| // We have finished processing the children for the current element, so | ||
| // we know it's final minReachable value. Use it to potentially update | ||
| // the parent's minReachable value. | ||
| if (work.parent) { | ||
| auto& parentMin = minReachable[*work.parent]; | ||
| parentMin = std::min(parentMin, minReachable[item]); | ||
| } | ||
|
|
||
| if (minReachable[item] == indices[item]) { | ||
| // This element reaches and is reachable by all shallower elements in | ||
| // the stack (otherwise they would have already been popped) and does | ||
| // not itself reach any deeper elements, so we have found an SCC and | ||
| // the current item is its root. | ||
| currRoot = item; | ||
| workStack.pop_back(); | ||
| return true; | ||
| } | ||
| workStack.pop_back(); | ||
| } | ||
| } | ||
| // We are at the end. | ||
| return false; | ||
| } | ||
|
|
||
| void stepToNextElem() { | ||
| assert(currRoot); | ||
| if (stack.back() == *currRoot) { | ||
| // This was the last element in the current SCC. We have to find the next | ||
| // SCC now. | ||
| currRoot = std::nullopt; | ||
| } | ||
| stackElems.erase(stack.back()); | ||
| stack.pop_back(); | ||
| } | ||
|
|
||
| void pushChildren(const T& parent) { | ||
| static_assert(&SCCs<It, Class>::pushChildren != &Class::pushChildren, | ||
| "SCCs subclass must implement `pushChildren`"); | ||
| } | ||
|
|
||
| protected: | ||
| // Call this from `Class::pushChildren` to add a child. | ||
| void push(const T& item) { | ||
| assert(currParent); | ||
| workStack.push_back({item, currParent}); | ||
| } | ||
|
|
||
| public: | ||
| struct SCC { | ||
| SCCs<It, Class>* parent; | ||
|
|
||
| // Iterate over the elements in a strongly connected component. | ||
| struct Iterator { | ||
| using value_type = T; | ||
| using difference_type = std::ptrdiff_t; | ||
| using reference = T&; | ||
| using pointer = T*; | ||
| using iterator_category = std::input_iterator_tag; | ||
|
|
||
| SCCs<It, Class>* parent; | ||
| std::optional<T> val = std::nullopt; | ||
|
|
||
| bool isEnd() const { return !parent || !parent->currRoot; } | ||
| bool operator==(const Iterator& other) const { | ||
| return isEnd() == other.isEnd(); | ||
| } | ||
| bool operator!=(const Iterator& other) const { return !(*this == other); } | ||
| T operator*() { return *val; } | ||
| T* operator->() { return &*val; } | ||
| void setVal() { | ||
| if (isEnd()) { | ||
| val = std::nullopt; | ||
| } else { | ||
| val = parent->stack.back(); | ||
| } | ||
| } | ||
|
|
||
| Iterator& operator++() { | ||
| parent->stepToNextElem(); | ||
| setVal(); | ||
| return *this; | ||
| } | ||
| Iterator operator++(int) { | ||
| auto it = *this; | ||
| ++(*this); | ||
| return it; | ||
| } | ||
| }; | ||
|
|
||
| Iterator begin() { | ||
| Iterator it = {parent}; | ||
| it.setVal(); | ||
| return it; | ||
| } | ||
| Iterator end() { return {nullptr}; } | ||
| }; | ||
|
|
||
| // Iterate over the strongly connected components of the graph. | ||
| struct Iterator { | ||
| using value_type = SCC; | ||
| using different_type = std::ptrdiff_t; | ||
| using reference = SCC; | ||
| using pointer = SCC*; | ||
| using iterator_category = std::input_iterator_tag; | ||
|
|
||
| // scc.parent is null iff we are at the end. | ||
| SCC scc; | ||
|
|
||
| bool isEnd() const { return !scc.parent; } | ||
| bool operator==(const Iterator& other) const { | ||
| return isEnd() == other.isEnd(); | ||
| } | ||
| bool operator!=(const Iterator& other) const { return !(*this == other); } | ||
| SCC operator*() { return scc; } | ||
| SCC* operator->() { return &scc; } | ||
| Iterator& operator++() { | ||
| // Skip the rest of the current SCC, if for some reason it was not | ||
| // consumed. | ||
| for (auto elem : *(*this)) { | ||
| (void)elem; | ||
| } | ||
| if (!scc.parent->stepToNextGroup()) { | ||
| // We are at the end, so mark ourselves as such. | ||
| scc.parent = nullptr; | ||
| } | ||
| return *this; | ||
| } | ||
| void operator++(int) { ++(*this); } | ||
| }; | ||
|
|
||
| Iterator begin() { return ++Iterator{this}; } | ||
| Iterator end() { return {nullptr}; } | ||
| }; | ||
|
|
||
| } // namespace wasm | ||
|
|
||
| #endif // wasm_support_strongly_connected_components_h | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would these three perhaps be more efficient as a single map of a struct like this?
(Also those names match the names in the paper.)