/src/spirv-tools/source/cfa.h
Line | Count | Source |
1 | | // Copyright (c) 2015-2016 The Khronos Group Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #ifndef SOURCE_CFA_H_ |
16 | | #define SOURCE_CFA_H_ |
17 | | |
18 | | #include <stddef.h> |
19 | | |
20 | | #include <algorithm> |
21 | | #include <cassert> |
22 | | #include <cstdint> |
23 | | #include <functional> |
24 | | #include <map> |
25 | | #include <unordered_map> |
26 | | #include <unordered_set> |
27 | | #include <utility> |
28 | | #include <vector> |
29 | | |
30 | | namespace spvtools { |
31 | | |
32 | | // Control Flow Analysis of control flow graphs of basic block nodes |BB|. |
33 | | template <class BB> |
34 | | class CFA { |
35 | | using bb_ptr = BB*; |
36 | | using cbb_ptr = const BB*; |
37 | | using bb_iter = typename std::vector<BB*>::const_iterator; |
38 | | using get_blocks_func = std::function<const std::vector<BB*>*(const BB*)>; |
39 | | |
40 | | struct block_info { |
41 | | cbb_ptr block; ///< pointer to the block |
42 | | bb_iter iter; ///< Iterator to the current child node being processed |
43 | | }; |
44 | | |
45 | | /// Returns true if a block with @p id is found in the @p work_list vector |
46 | | /// |
47 | | /// @param[in] work_list Set of blocks visited in the depth first |
48 | | /// traversal |
49 | | /// of the CFG |
50 | | /// @param[in] id The ID of the block being checked |
51 | | /// |
52 | | /// @return true if the edge work_list.back().block->id() => id is a back-edge |
53 | | static bool FindInWorkList(const std::vector<block_info>& work_list, |
54 | | uint32_t id); |
55 | | |
56 | | public: |
57 | | /// @brief Depth first traversal starting from the \p entry BasicBlock |
58 | | /// |
59 | | /// This function performs a depth first traversal from the \p entry |
60 | | /// BasicBlock and calls the pre/postorder functions when it needs to process |
61 | | /// the node in pre order, post order. |
62 | | /// |
63 | | /// @param[in] entry The root BasicBlock of a CFG |
64 | | /// @param[in] successor_func A function which will return a pointer to the |
65 | | /// successor nodes |
66 | | /// @param[in] preorder A function that will be called for every block in a |
67 | | /// CFG following preorder traversal semantics |
68 | | /// @param[in] postorder A function that will be called for every block in a |
69 | | /// CFG following postorder traversal semantics |
70 | | /// @param[in] terminal A function that will be called to determine if the |
71 | | /// search should stop at the given node. |
72 | | /// NOTE: The @p successor_func and predecessor_func each return a pointer to |
73 | | /// a collection such that iterators to that collection remain valid for the |
74 | | /// lifetime of the algorithm. |
75 | | static void DepthFirstTraversal(const BB* entry, |
76 | | get_blocks_func successor_func, |
77 | | std::function<void(cbb_ptr)> preorder, |
78 | | std::function<void(cbb_ptr)> postorder, |
79 | | std::function<bool(cbb_ptr)> terminal); |
80 | | |
81 | | /// @brief Depth first traversal starting from the \p entry BasicBlock |
82 | | /// |
83 | | /// This function performs a depth first traversal from the \p entry |
84 | | /// BasicBlock and calls the pre/postorder functions when it needs to process |
85 | | /// the node in pre order, post order. It also calls the backedge function |
86 | | /// when a back edge is encountered. The backedge function can be empty. The |
87 | | /// runtime of the algorithm is improved if backedge is empty. |
88 | | /// |
89 | | /// @param[in] entry The root BasicBlock of a CFG |
90 | | /// @param[in] successor_func A function which will return a pointer to the |
91 | | /// successor nodes |
92 | | /// @param[in] preorder A function that will be called for every block in a |
93 | | /// CFG following preorder traversal semantics |
94 | | /// @param[in] postorder A function that will be called for every block in a |
95 | | /// CFG following postorder traversal semantics |
96 | | /// @param[in] backedge A function that will be called when a backedge is |
97 | | /// encountered during a traversal. |
98 | | /// @param[in] terminal A function that will be called to determine if the |
99 | | /// search should stop at the given node. |
100 | | /// NOTE: The @p successor_func and predecessor_func each return a pointer to |
101 | | /// a collection such that iterators to that collection remain valid for the |
102 | | /// lifetime of the algorithm. |
103 | | static void DepthFirstTraversal( |
104 | | const BB* entry, get_blocks_func successor_func, |
105 | | std::function<void(cbb_ptr)> preorder, |
106 | | std::function<void(cbb_ptr)> postorder, |
107 | | std::function<void(cbb_ptr, cbb_ptr)> backedge, |
108 | | std::function<bool(cbb_ptr)> terminal); |
109 | | |
110 | | /// @brief Calculates dominator edges for a set of blocks |
111 | | /// |
112 | | /// Computes dominators using the algorithm of Cooper, Harvey, and Kennedy |
113 | | /// "A Simple, Fast Dominance Algorithm", 2001. |
114 | | /// |
115 | | /// The algorithm assumes there is a unique root node (a node without |
116 | | /// predecessors), and it is therefore at the end of the postorder vector. |
117 | | /// |
118 | | /// This function calculates the dominator edges for a set of blocks in the |
119 | | /// CFG. |
120 | | /// Uses the dominator algorithm by Cooper et al. |
121 | | /// |
122 | | /// @param[in] postorder A vector of blocks in post order traversal |
123 | | /// order |
124 | | /// in a CFG |
125 | | /// @param[in] predecessor_func Function used to get the predecessor nodes of |
126 | | /// a |
127 | | /// block |
128 | | /// |
129 | | /// @return the dominator tree of the graph, as a vector of pairs of nodes. |
130 | | /// The first node in the pair is a node in the graph. The second node in the |
131 | | /// pair is its immediate dominator in the sense of Cooper et.al., where a |
132 | | /// block |
133 | | /// without predecessors (such as the root node) is its own immediate |
134 | | /// dominator. |
135 | | static std::vector<std::pair<BB*, BB*>> CalculateDominators( |
136 | | const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func); |
137 | | |
138 | | // Computes a minimal set of root nodes required to traverse, in the forward |
139 | | // direction, the CFG represented by the given vector of blocks, and successor |
140 | | // and predecessor functions. When considering adding two nodes, each having |
141 | | // predecessors, favour using the one that appears earlier on the input blocks |
142 | | // list. |
143 | | static std::vector<BB*> TraversalRoots(const std::vector<BB*>& blocks, |
144 | | get_blocks_func succ_func, |
145 | | get_blocks_func pred_func); |
146 | | |
147 | | static void ComputeAugmentedCFG( |
148 | | std::vector<BB*>& ordered_blocks, BB* pseudo_entry_block, |
149 | | BB* pseudo_exit_block, |
150 | | std::unordered_map<const BB*, std::vector<BB*>>* augmented_successors_map, |
151 | | std::unordered_map<const BB*, std::vector<BB*>>* |
152 | | augmented_predecessors_map, |
153 | | get_blocks_func succ_func, get_blocks_func pred_func); |
154 | | }; |
155 | | |
156 | | template <class BB> |
157 | | bool CFA<BB>::FindInWorkList(const std::vector<block_info>& work_list, |
158 | 641k | uint32_t id) { |
159 | 5.38M | for (const auto& b : work_list) { |
160 | 5.38M | if (b.block->id() == id) return true; |
161 | 5.38M | } |
162 | 602k | return false; |
163 | 641k | } Unexecuted instantiation: spvtools::CFA<spvtools::opt::BasicBlock>::FindInWorkList(std::__1::vector<spvtools::CFA<spvtools::opt::BasicBlock>::block_info, std::__1::allocator<spvtools::CFA<spvtools::opt::BasicBlock>::block_info> > const&, unsigned int) Unexecuted instantiation: spvtools::CFA<spvtools::opt::DominatorTreeNode>::FindInWorkList(std::__1::vector<spvtools::CFA<spvtools::opt::DominatorTreeNode>::block_info, std::__1::allocator<spvtools::CFA<spvtools::opt::DominatorTreeNode>::block_info> > const&, unsigned int) spvtools::CFA<spvtools::val::BasicBlock>::FindInWorkList(std::__1::vector<spvtools::CFA<spvtools::val::BasicBlock>::block_info, std::__1::allocator<spvtools::CFA<spvtools::val::BasicBlock>::block_info> > const&, unsigned int) Line | Count | Source | 158 | 641k | uint32_t id) { | 159 | 5.38M | for (const auto& b : work_list) { | 160 | 5.38M | if (b.block->id() == id) return true; | 161 | 5.38M | } | 162 | 602k | return false; | 163 | 641k | } |
|
164 | | |
165 | | template <class BB> |
166 | | void CFA<BB>::DepthFirstTraversal(const BB* entry, |
167 | | get_blocks_func successor_func, |
168 | | std::function<void(cbb_ptr)> preorder, |
169 | | std::function<void(cbb_ptr)> postorder, |
170 | 562k | std::function<bool(cbb_ptr)> terminal) { |
171 | 562k | DepthFirstTraversal(entry, successor_func, preorder, postorder, |
172 | 562k | /* backedge = */ {}, terminal); |
173 | 562k | } spvtools::CFA<spvtools::opt::BasicBlock>::DepthFirstTraversal(spvtools::opt::BasicBlock const*, std::__1::function<std::__1::vector<spvtools::opt::BasicBlock*, std::__1::allocator<spvtools::opt::BasicBlock*> > const* (spvtools::opt::BasicBlock const*)>, std::__1::function<void (spvtools::opt::BasicBlock const*)>, std::__1::function<void (spvtools::opt::BasicBlock const*)>, std::__1::function<bool (spvtools::opt::BasicBlock const*)>) Line | Count | Source | 170 | 256k | std::function<bool(cbb_ptr)> terminal) { | 171 | 256k | DepthFirstTraversal(entry, successor_func, preorder, postorder, | 172 | 256k | /* backedge = */ {}, terminal); | 173 | 256k | } |
spvtools::CFA<spvtools::opt::DominatorTreeNode>::DepthFirstTraversal(spvtools::opt::DominatorTreeNode const*, std::__1::function<std::__1::vector<spvtools::opt::DominatorTreeNode*, std::__1::allocator<spvtools::opt::DominatorTreeNode*> > const* (spvtools::opt::DominatorTreeNode const*)>, std::__1::function<void (spvtools::opt::DominatorTreeNode const*)>, std::__1::function<void (spvtools::opt::DominatorTreeNode const*)>, std::__1::function<bool (spvtools::opt::DominatorTreeNode const*)>) Line | Count | Source | 170 | 58.7k | std::function<bool(cbb_ptr)> terminal) { | 171 | 58.7k | DepthFirstTraversal(entry, successor_func, preorder, postorder, | 172 | 58.7k | /* backedge = */ {}, terminal); | 173 | 58.7k | } |
spvtools::CFA<spvtools::val::BasicBlock>::DepthFirstTraversal(spvtools::val::BasicBlock const*, std::__1::function<std::__1::vector<spvtools::val::BasicBlock*, std::__1::allocator<spvtools::val::BasicBlock*> > const* (spvtools::val::BasicBlock const*)>, std::__1::function<void (spvtools::val::BasicBlock const*)>, std::__1::function<void (spvtools::val::BasicBlock const*)>, std::__1::function<bool (spvtools::val::BasicBlock const*)>) Line | Count | Source | 170 | 247k | std::function<bool(cbb_ptr)> terminal) { | 171 | 247k | DepthFirstTraversal(entry, successor_func, preorder, postorder, | 172 | 247k | /* backedge = */ {}, terminal); | 173 | 247k | } |
|
174 | | |
175 | | template <class BB> |
176 | | void CFA<BB>::DepthFirstTraversal( |
177 | | const BB* entry, get_blocks_func successor_func, |
178 | | std::function<void(cbb_ptr)> preorder, |
179 | | std::function<void(cbb_ptr)> postorder, |
180 | | std::function<void(cbb_ptr, cbb_ptr)> backedge, |
181 | 592k | std::function<bool(cbb_ptr)> terminal) { |
182 | 592k | assert(successor_func && "The successor function cannot be empty."); |
183 | 592k | assert(preorder && "The preorder function cannot be empty."); |
184 | 592k | assert(postorder && "The postorder function cannot be empty."); |
185 | 592k | assert(terminal && "The terminal function cannot be empty."); |
186 | | |
187 | 592k | std::unordered_set<uint32_t> processed; |
188 | | |
189 | | /// NOTE: work_list is the sequence of nodes from the root node to the node |
190 | | /// being processed in the traversal |
191 | 592k | std::vector<block_info> work_list; |
192 | 592k | work_list.reserve(10); |
193 | | |
194 | 592k | work_list.push_back({entry, std::begin(*successor_func(entry))}); |
195 | 592k | preorder(entry); |
196 | 592k | processed.insert(entry->id()); |
197 | | |
198 | 38.3M | while (!work_list.empty()) { |
199 | 37.7M | block_info& top = work_list.back(); |
200 | 37.7M | if (terminal(top.block) || top.iter == end(*successor_func(top.block))) { |
201 | 16.0M | postorder(top.block); |
202 | 16.0M | work_list.pop_back(); |
203 | 21.7M | } else { |
204 | 21.7M | BB* child = *top.iter; |
205 | 21.7M | top.iter++; |
206 | 21.7M | if (backedge && FindInWorkList(work_list, child->id())) { |
207 | 39.1k | backedge(top.block, child); |
208 | 39.1k | } |
209 | 21.7M | if (processed.count(child->id()) == 0) { |
210 | 15.4M | preorder(child); |
211 | 15.4M | work_list.emplace_back( |
212 | 15.4M | block_info{child, std::begin(*successor_func(child))}); |
213 | 15.4M | processed.insert(child->id()); |
214 | 15.4M | } |
215 | 21.7M | } |
216 | 37.7M | } |
217 | 592k | } spvtools::CFA<spvtools::opt::BasicBlock>::DepthFirstTraversal(spvtools::opt::BasicBlock const*, std::__1::function<std::__1::vector<spvtools::opt::BasicBlock*, std::__1::allocator<spvtools::opt::BasicBlock*> > const* (spvtools::opt::BasicBlock const*)>, std::__1::function<void (spvtools::opt::BasicBlock const*)>, std::__1::function<void (spvtools::opt::BasicBlock const*)>, std::__1::function<void (spvtools::opt::BasicBlock const*, spvtools::opt::BasicBlock const*)>, std::__1::function<bool (spvtools::opt::BasicBlock const*)>) Line | Count | Source | 181 | 256k | std::function<bool(cbb_ptr)> terminal) { | 182 | 256k | assert(successor_func && "The successor function cannot be empty."); | 183 | 256k | assert(preorder && "The preorder function cannot be empty."); | 184 | 256k | assert(postorder && "The postorder function cannot be empty."); | 185 | 256k | assert(terminal && "The terminal function cannot be empty."); | 186 | | | 187 | 256k | std::unordered_set<uint32_t> processed; | 188 | | | 189 | | /// NOTE: work_list is the sequence of nodes from the root node to the node | 190 | | /// being processed in the traversal | 191 | 256k | std::vector<block_info> work_list; | 192 | 256k | work_list.reserve(10); | 193 | | | 194 | 256k | work_list.push_back({entry, std::begin(*successor_func(entry))}); | 195 | 256k | preorder(entry); | 196 | 256k | processed.insert(entry->id()); | 197 | | | 198 | 24.9M | while (!work_list.empty()) { | 199 | 24.7M | block_info& top = work_list.back(); | 200 | 24.7M | if (terminal(top.block) || top.iter == end(*successor_func(top.block))) { | 201 | 10.2M | postorder(top.block); | 202 | 10.2M | work_list.pop_back(); | 203 | 14.4M | } else { | 204 | 14.4M | BB* child = *top.iter; | 205 | 14.4M | top.iter++; | 206 | 14.4M | if (backedge && FindInWorkList(work_list, child->id())) { | 207 | 0 | backedge(top.block, child); | 208 | 0 | } | 209 | 14.4M | if (processed.count(child->id()) == 0) { | 210 | 10.0M | preorder(child); | 211 | 10.0M | work_list.emplace_back( | 212 | 10.0M | block_info{child, std::begin(*successor_func(child))}); | 213 | 10.0M | processed.insert(child->id()); | 214 | 10.0M | } | 215 | 14.4M | } | 216 | 24.7M | } | 217 | 256k | } |
spvtools::CFA<spvtools::opt::DominatorTreeNode>::DepthFirstTraversal(spvtools::opt::DominatorTreeNode const*, std::__1::function<std::__1::vector<spvtools::opt::DominatorTreeNode*, std::__1::allocator<spvtools::opt::DominatorTreeNode*> > const* (spvtools::opt::DominatorTreeNode const*)>, std::__1::function<void (spvtools::opt::DominatorTreeNode const*)>, std::__1::function<void (spvtools::opt::DominatorTreeNode const*)>, std::__1::function<void (spvtools::opt::DominatorTreeNode const*, spvtools::opt::DominatorTreeNode const*)>, std::__1::function<bool (spvtools::opt::DominatorTreeNode const*)>) Line | Count | Source | 181 | 58.7k | std::function<bool(cbb_ptr)> terminal) { | 182 | 58.7k | assert(successor_func && "The successor function cannot be empty."); | 183 | 58.7k | assert(preorder && "The preorder function cannot be empty."); | 184 | 58.7k | assert(postorder && "The postorder function cannot be empty."); | 185 | 58.7k | assert(terminal && "The terminal function cannot be empty."); | 186 | | | 187 | 58.7k | std::unordered_set<uint32_t> processed; | 188 | | | 189 | | /// NOTE: work_list is the sequence of nodes from the root node to the node | 190 | | /// being processed in the traversal | 191 | 58.7k | std::vector<block_info> work_list; | 192 | 58.7k | work_list.reserve(10); | 193 | | | 194 | 58.7k | work_list.push_back({entry, std::begin(*successor_func(entry))}); | 195 | 58.7k | preorder(entry); | 196 | 58.7k | processed.insert(entry->id()); | 197 | | | 198 | 5.32M | while (!work_list.empty()) { | 199 | 5.26M | block_info& top = work_list.back(); | 200 | 5.26M | if (terminal(top.block) || top.iter == end(*successor_func(top.block))) { | 201 | 2.66M | postorder(top.block); | 202 | 2.66M | work_list.pop_back(); | 203 | 2.66M | } else { | 204 | 2.60M | BB* child = *top.iter; | 205 | 2.60M | top.iter++; | 206 | 2.60M | if (backedge && FindInWorkList(work_list, child->id())) { | 207 | 0 | backedge(top.block, child); | 208 | 0 | } | 209 | 2.60M | if (processed.count(child->id()) == 0) { | 210 | 2.60M | preorder(child); | 211 | 2.60M | work_list.emplace_back( | 212 | 2.60M | block_info{child, std::begin(*successor_func(child))}); | 213 | 2.60M | processed.insert(child->id()); | 214 | 2.60M | } | 215 | 2.60M | } | 216 | 5.26M | } | 217 | 58.7k | } |
spvtools::CFA<spvtools::val::BasicBlock>::DepthFirstTraversal(spvtools::val::BasicBlock const*, std::__1::function<std::__1::vector<spvtools::val::BasicBlock*, std::__1::allocator<spvtools::val::BasicBlock*> > const* (spvtools::val::BasicBlock const*)>, std::__1::function<void (spvtools::val::BasicBlock const*)>, std::__1::function<void (spvtools::val::BasicBlock const*)>, std::__1::function<void (spvtools::val::BasicBlock const*, spvtools::val::BasicBlock const*)>, std::__1::function<bool (spvtools::val::BasicBlock const*)>) Line | Count | Source | 181 | 277k | std::function<bool(cbb_ptr)> terminal) { | 182 | 277k | assert(successor_func && "The successor function cannot be empty."); | 183 | 277k | assert(preorder && "The preorder function cannot be empty."); | 184 | 277k | assert(postorder && "The postorder function cannot be empty."); | 185 | 277k | assert(terminal && "The terminal function cannot be empty."); | 186 | | | 187 | 277k | std::unordered_set<uint32_t> processed; | 188 | | | 189 | | /// NOTE: work_list is the sequence of nodes from the root node to the node | 190 | | /// being processed in the traversal | 191 | 277k | std::vector<block_info> work_list; | 192 | 277k | work_list.reserve(10); | 193 | | | 194 | 277k | work_list.push_back({entry, std::begin(*successor_func(entry))}); | 195 | 277k | preorder(entry); | 196 | 277k | processed.insert(entry->id()); | 197 | | | 198 | 8.05M | while (!work_list.empty()) { | 199 | 7.77M | block_info& top = work_list.back(); | 200 | 7.77M | if (terminal(top.block) || top.iter == end(*successor_func(top.block))) { | 201 | 3.10M | postorder(top.block); | 202 | 3.10M | work_list.pop_back(); | 203 | 4.66M | } else { | 204 | 4.66M | BB* child = *top.iter; | 205 | 4.66M | top.iter++; | 206 | 4.66M | if (backedge && FindInWorkList(work_list, child->id())) { | 207 | 39.1k | backedge(top.block, child); | 208 | 39.1k | } | 209 | 4.66M | if (processed.count(child->id()) == 0) { | 210 | 2.82M | preorder(child); | 211 | 2.82M | work_list.emplace_back( | 212 | 2.82M | block_info{child, std::begin(*successor_func(child))}); | 213 | 2.82M | processed.insert(child->id()); | 214 | 2.82M | } | 215 | 4.66M | } | 216 | 7.77M | } | 217 | 277k | } |
|
218 | | |
219 | | template <class BB> |
220 | | std::vector<std::pair<BB*, BB*>> CFA<BB>::CalculateDominators( |
221 | 150k | const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func) { |
222 | 150k | struct block_detail { |
223 | 150k | size_t dominator; ///< The index of blocks's dominator in post order array |
224 | 150k | size_t postorder_index; ///< The index of the block in the post order array |
225 | 150k | }; |
226 | 150k | const size_t undefined_dom = postorder.size(); |
227 | | |
228 | 150k | std::unordered_map<cbb_ptr, block_detail> idoms; |
229 | 3.89M | for (size_t i = 0; i < postorder.size(); i++) { |
230 | 3.74M | idoms[postorder[i]] = {undefined_dom, i}; |
231 | 3.74M | } |
232 | 150k | idoms[postorder.back()].dominator = idoms[postorder.back()].postorder_index; |
233 | | |
234 | 150k | bool changed = true; |
235 | 456k | while (changed) { |
236 | 305k | changed = false; |
237 | 7.65M | for (auto b = postorder.rbegin() + 1; b != postorder.rend(); ++b) { |
238 | 7.35M | const std::vector<BB*>& predecessors = *predecessor_func(*b); |
239 | | // Find the first processed/reachable predecessor that is reachable |
240 | | // in the forward traversal. |
241 | 7.35M | auto res = std::find_if(std::begin(predecessors), std::end(predecessors), |
242 | 7.42M | [&idoms, undefined_dom](BB* pred) { |
243 | 7.42M | return idoms.count(pred) && |
244 | 7.35M | idoms[pred].dominator != undefined_dom; |
245 | 7.42M | }); spvtools::CFA<spvtools::opt::BasicBlock>::CalculateDominators(std::__1::vector<spvtools::opt::BasicBlock const*, std::__1::allocator<spvtools::opt::BasicBlock const*> > const&, std::__1::function<std::__1::vector<spvtools::opt::BasicBlock*, std::__1::allocator<spvtools::opt::BasicBlock*> > const* (spvtools::opt::BasicBlock const*)>)::{lambda(spvtools::opt::BasicBlock*)#1}::operator()(spvtools::opt::BasicBlock*) constLine | Count | Source | 242 | 5.20M | [&idoms, undefined_dom](BB* pred) { | 243 | 5.20M | return idoms.count(pred) && | 244 | 5.20M | idoms[pred].dominator != undefined_dom; | 245 | 5.20M | }); |
spvtools::CFA<spvtools::val::BasicBlock>::CalculateDominators(std::__1::vector<spvtools::val::BasicBlock const*, std::__1::allocator<spvtools::val::BasicBlock const*> > const&, std::__1::function<std::__1::vector<spvtools::val::BasicBlock*, std::__1::allocator<spvtools::val::BasicBlock*> > const* (spvtools::val::BasicBlock const*)>)::{lambda(spvtools::val::BasicBlock*)#1}::operator()(spvtools::val::BasicBlock*) constLine | Count | Source | 242 | 2.21M | [&idoms, undefined_dom](BB* pred) { | 243 | 2.21M | return idoms.count(pred) && | 244 | 2.14M | idoms[pred].dominator != undefined_dom; | 245 | 2.21M | }); |
|
246 | 7.35M | if (res == end(predecessors)) continue; |
247 | 7.35M | const BB* idom = *res; |
248 | 7.35M | size_t idom_idx = idoms[idom].postorder_index; |
249 | | |
250 | | // all other predecessors |
251 | 10.0M | for (const auto* p : predecessors) { |
252 | 10.0M | if (idom == p) continue; |
253 | | // Only consider nodes reachable in the forward traversal. |
254 | | // Otherwise the intersection doesn't make sense and will never |
255 | | // terminate. |
256 | 2.33M | if (!idoms.count(p)) continue; |
257 | 2.06M | if (idoms[p].dominator != undefined_dom) { |
258 | 1.81M | size_t finger1 = idoms[p].postorder_index; |
259 | 1.81M | size_t finger2 = idom_idx; |
260 | 3.82M | while (finger1 != finger2) { |
261 | 5.99M | while (finger1 < finger2) { |
262 | 3.98M | finger1 = idoms[postorder[finger1]].dominator; |
263 | 3.98M | } |
264 | 2.89M | while (finger2 < finger1) { |
265 | 893k | finger2 = idoms[postorder[finger2]].dominator; |
266 | 893k | } |
267 | 2.00M | } |
268 | 1.81M | idom_idx = finger1; |
269 | 1.81M | } |
270 | 2.06M | } |
271 | 7.35M | if (idoms[*b].dominator != idom_idx) { |
272 | 3.60M | idoms[*b].dominator = idom_idx; |
273 | 3.60M | changed = true; |
274 | 3.60M | } |
275 | 7.35M | } |
276 | 305k | } |
277 | | |
278 | 150k | std::vector<std::pair<bb_ptr, bb_ptr>> out; |
279 | 3.74M | for (auto idom : idoms) { |
280 | | // At this point if there is no dominator for the node, just make it |
281 | | // reflexive. |
282 | 3.74M | auto dominator = std::get<1>(idom).dominator; |
283 | 3.74M | if (dominator == undefined_dom) { |
284 | 254 | dominator = std::get<1>(idom).postorder_index; |
285 | 254 | } |
286 | | // NOTE: performing a const cast for convenient usage with |
287 | | // UpdateImmediateDominators |
288 | 3.74M | out.push_back({const_cast<BB*>(std::get<0>(idom)), |
289 | 3.74M | const_cast<BB*>(postorder[dominator])}); |
290 | 3.74M | } |
291 | | |
292 | | // Sort by postorder index to generate a deterministic ordering of edges. |
293 | 150k | std::sort( |
294 | 150k | out.begin(), out.end(), |
295 | 150k | [&idoms](const std::pair<bb_ptr, bb_ptr>& lhs, |
296 | 28.6M | const std::pair<bb_ptr, bb_ptr>& rhs) { |
297 | 28.6M | assert(lhs.first); |
298 | 28.6M | assert(lhs.second); |
299 | 28.6M | assert(rhs.first); |
300 | 28.6M | assert(rhs.second); |
301 | 28.6M | auto lhs_indices = std::make_pair(idoms[lhs.first].postorder_index, |
302 | 28.6M | idoms[lhs.second].postorder_index); |
303 | 28.6M | auto rhs_indices = std::make_pair(idoms[rhs.first].postorder_index, |
304 | 28.6M | idoms[rhs.second].postorder_index); |
305 | 28.6M | return lhs_indices < rhs_indices; |
306 | 28.6M | }); spvtools::CFA<spvtools::opt::BasicBlock>::CalculateDominators(std::__1::vector<spvtools::opt::BasicBlock const*, std::__1::allocator<spvtools::opt::BasicBlock const*> > const&, std::__1::function<std::__1::vector<spvtools::opt::BasicBlock*, std::__1::allocator<spvtools::opt::BasicBlock*> > const* (spvtools::opt::BasicBlock const*)>)::{lambda(std::__1::pair<spvtools::opt::BasicBlock*, spvtools::opt::BasicBlock*> const&, std::__1::pair<spvtools::opt::BasicBlock*, spvtools::opt::BasicBlock*> const&)#1}::operator()(std::__1::pair<spvtools::opt::BasicBlock*, spvtools::opt::BasicBlock*> const&, std::__1::pair<spvtools::opt::BasicBlock*, spvtools::opt::BasicBlock*> const&) constLine | Count | Source | 296 | 22.7M | const std::pair<bb_ptr, bb_ptr>& rhs) { | 297 | 22.7M | assert(lhs.first); | 298 | 22.7M | assert(lhs.second); | 299 | 22.7M | assert(rhs.first); | 300 | 22.7M | assert(rhs.second); | 301 | 22.7M | auto lhs_indices = std::make_pair(idoms[lhs.first].postorder_index, | 302 | 22.7M | idoms[lhs.second].postorder_index); | 303 | 22.7M | auto rhs_indices = std::make_pair(idoms[rhs.first].postorder_index, | 304 | 22.7M | idoms[rhs.second].postorder_index); | 305 | 22.7M | return lhs_indices < rhs_indices; | 306 | 22.7M | }); |
spvtools::CFA<spvtools::val::BasicBlock>::CalculateDominators(std::__1::vector<spvtools::val::BasicBlock const*, std::__1::allocator<spvtools::val::BasicBlock const*> > const&, std::__1::function<std::__1::vector<spvtools::val::BasicBlock*, std::__1::allocator<spvtools::val::BasicBlock*> > const* (spvtools::val::BasicBlock const*)>)::{lambda(std::__1::pair<spvtools::val::BasicBlock*, spvtools::val::BasicBlock*> const&, std::__1::pair<spvtools::val::BasicBlock*, spvtools::val::BasicBlock*> const&)#1}::operator()(std::__1::pair<spvtools::val::BasicBlock*, spvtools::val::BasicBlock*> const&, std::__1::pair<spvtools::val::BasicBlock*, spvtools::val::BasicBlock*> const&) constLine | Count | Source | 296 | 5.88M | const std::pair<bb_ptr, bb_ptr>& rhs) { | 297 | 5.88M | assert(lhs.first); | 298 | 5.88M | assert(lhs.second); | 299 | 5.88M | assert(rhs.first); | 300 | 5.88M | assert(rhs.second); | 301 | 5.88M | auto lhs_indices = std::make_pair(idoms[lhs.first].postorder_index, | 302 | 5.88M | idoms[lhs.second].postorder_index); | 303 | 5.88M | auto rhs_indices = std::make_pair(idoms[rhs.first].postorder_index, | 304 | 5.88M | idoms[rhs.second].postorder_index); | 305 | 5.88M | return lhs_indices < rhs_indices; | 306 | 5.88M | }); |
|
307 | 150k | return out; |
308 | 150k | } spvtools::CFA<spvtools::opt::BasicBlock>::CalculateDominators(std::__1::vector<spvtools::opt::BasicBlock const*, std::__1::allocator<spvtools::opt::BasicBlock const*> > const&, std::__1::function<std::__1::vector<spvtools::opt::BasicBlock*, std::__1::allocator<spvtools::opt::BasicBlock*> > const* (spvtools::opt::BasicBlock const*)>) Line | Count | Source | 221 | 58.7k | const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func) { | 222 | 58.7k | struct block_detail { | 223 | 58.7k | size_t dominator; ///< The index of blocks's dominator in post order array | 224 | 58.7k | size_t postorder_index; ///< The index of the block in the post order array | 225 | 58.7k | }; | 226 | 58.7k | const size_t undefined_dom = postorder.size(); | 227 | | | 228 | 58.7k | std::unordered_map<cbb_ptr, block_detail> idoms; | 229 | 2.72M | for (size_t i = 0; i < postorder.size(); i++) { | 230 | 2.66M | idoms[postorder[i]] = {undefined_dom, i}; | 231 | 2.66M | } | 232 | 58.7k | idoms[postorder.back()].dominator = idoms[postorder.back()].postorder_index; | 233 | | | 234 | 58.7k | bool changed = true; | 235 | 176k | while (changed) { | 236 | 117k | changed = false; | 237 | 5.32M | for (auto b = postorder.rbegin() + 1; b != postorder.rend(); ++b) { | 238 | 5.20M | const std::vector<BB*>& predecessors = *predecessor_func(*b); | 239 | | // Find the first processed/reachable predecessor that is reachable | 240 | | // in the forward traversal. | 241 | 5.20M | auto res = std::find_if(std::begin(predecessors), std::end(predecessors), | 242 | 5.20M | [&idoms, undefined_dom](BB* pred) { | 243 | 5.20M | return idoms.count(pred) && | 244 | 5.20M | idoms[pred].dominator != undefined_dom; | 245 | 5.20M | }); | 246 | 5.20M | if (res == end(predecessors)) continue; | 247 | 5.20M | const BB* idom = *res; | 248 | 5.20M | size_t idom_idx = idoms[idom].postorder_index; | 249 | | | 250 | | // all other predecessors | 251 | 6.43M | for (const auto* p : predecessors) { | 252 | 6.43M | if (idom == p) continue; | 253 | | // Only consider nodes reachable in the forward traversal. | 254 | | // Otherwise the intersection doesn't make sense and will never | 255 | | // terminate. | 256 | 1.16M | if (!idoms.count(p)) continue; | 257 | 1.02M | if (idoms[p].dominator != undefined_dom) { | 258 | 918k | size_t finger1 = idoms[p].postorder_index; | 259 | 918k | size_t finger2 = idom_idx; | 260 | 1.90M | while (finger1 != finger2) { | 261 | 3.20M | while (finger1 < finger2) { | 262 | 2.22M | finger1 = idoms[postorder[finger1]].dominator; | 263 | 2.22M | } | 264 | 1.36M | while (finger2 < finger1) { | 265 | 385k | finger2 = idoms[postorder[finger2]].dominator; | 266 | 385k | } | 267 | 982k | } | 268 | 918k | idom_idx = finger1; | 269 | 918k | } | 270 | 1.02M | } | 271 | 5.20M | if (idoms[*b].dominator != idom_idx) { | 272 | 2.60M | idoms[*b].dominator = idom_idx; | 273 | 2.60M | changed = true; | 274 | 2.60M | } | 275 | 5.20M | } | 276 | 117k | } | 277 | | | 278 | 58.7k | std::vector<std::pair<bb_ptr, bb_ptr>> out; | 279 | 2.66M | for (auto idom : idoms) { | 280 | | // At this point if there is no dominator for the node, just make it | 281 | | // reflexive. | 282 | 2.66M | auto dominator = std::get<1>(idom).dominator; | 283 | 2.66M | if (dominator == undefined_dom) { | 284 | 0 | dominator = std::get<1>(idom).postorder_index; | 285 | 0 | } | 286 | | // NOTE: performing a const cast for convenient usage with | 287 | | // UpdateImmediateDominators | 288 | 2.66M | out.push_back({const_cast<BB*>(std::get<0>(idom)), | 289 | 2.66M | const_cast<BB*>(postorder[dominator])}); | 290 | 2.66M | } | 291 | | | 292 | | // Sort by postorder index to generate a deterministic ordering of edges. | 293 | 58.7k | std::sort( | 294 | 58.7k | out.begin(), out.end(), | 295 | 58.7k | [&idoms](const std::pair<bb_ptr, bb_ptr>& lhs, | 296 | 58.7k | const std::pair<bb_ptr, bb_ptr>& rhs) { | 297 | 58.7k | assert(lhs.first); | 298 | 58.7k | assert(lhs.second); | 299 | 58.7k | assert(rhs.first); | 300 | 58.7k | assert(rhs.second); | 301 | 58.7k | auto lhs_indices = std::make_pair(idoms[lhs.first].postorder_index, | 302 | 58.7k | idoms[lhs.second].postorder_index); | 303 | 58.7k | auto rhs_indices = std::make_pair(idoms[rhs.first].postorder_index, | 304 | 58.7k | idoms[rhs.second].postorder_index); | 305 | 58.7k | return lhs_indices < rhs_indices; | 306 | 58.7k | }); | 307 | 58.7k | return out; | 308 | 58.7k | } |
spvtools::CFA<spvtools::val::BasicBlock>::CalculateDominators(std::__1::vector<spvtools::val::BasicBlock const*, std::__1::allocator<spvtools::val::BasicBlock const*> > const&, std::__1::function<std::__1::vector<spvtools::val::BasicBlock*, std::__1::allocator<spvtools::val::BasicBlock*> > const* (spvtools::val::BasicBlock const*)>) Line | Count | Source | 221 | 92.0k | const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func) { | 222 | 92.0k | struct block_detail { | 223 | 92.0k | size_t dominator; ///< The index of blocks's dominator in post order array | 224 | 92.0k | size_t postorder_index; ///< The index of the block in the post order array | 225 | 92.0k | }; | 226 | 92.0k | const size_t undefined_dom = postorder.size(); | 227 | | | 228 | 92.0k | std::unordered_map<cbb_ptr, block_detail> idoms; | 229 | 1.17M | for (size_t i = 0; i < postorder.size(); i++) { | 230 | 1.08M | idoms[postorder[i]] = {undefined_dom, i}; | 231 | 1.08M | } | 232 | 92.0k | idoms[postorder.back()].dominator = idoms[postorder.back()].postorder_index; | 233 | | | 234 | 92.0k | bool changed = true; | 235 | 280k | while (changed) { | 236 | 188k | changed = false; | 237 | 2.33M | for (auto b = postorder.rbegin() + 1; b != postorder.rend(); ++b) { | 238 | 2.14M | const std::vector<BB*>& predecessors = *predecessor_func(*b); | 239 | | // Find the first processed/reachable predecessor that is reachable | 240 | | // in the forward traversal. | 241 | 2.14M | auto res = std::find_if(std::begin(predecessors), std::end(predecessors), | 242 | 2.14M | [&idoms, undefined_dom](BB* pred) { | 243 | 2.14M | return idoms.count(pred) && | 244 | 2.14M | idoms[pred].dominator != undefined_dom; | 245 | 2.14M | }); | 246 | 2.14M | if (res == end(predecessors)) continue; | 247 | 2.14M | const BB* idom = *res; | 248 | 2.14M | size_t idom_idx = idoms[idom].postorder_index; | 249 | | | 250 | | // all other predecessors | 251 | 3.59M | for (const auto* p : predecessors) { | 252 | 3.59M | if (idom == p) continue; | 253 | | // Only consider nodes reachable in the forward traversal. | 254 | | // Otherwise the intersection doesn't make sense and will never | 255 | | // terminate. | 256 | 1.17M | if (!idoms.count(p)) continue; | 257 | 1.03M | if (idoms[p].dominator != undefined_dom) { | 258 | 901k | size_t finger1 = idoms[p].postorder_index; | 259 | 901k | size_t finger2 = idom_idx; | 260 | 1.92M | while (finger1 != finger2) { | 261 | 2.78M | while (finger1 < finger2) { | 262 | 1.76M | finger1 = idoms[postorder[finger1]].dominator; | 263 | 1.76M | } | 264 | 1.52M | while (finger2 < finger1) { | 265 | 507k | finger2 = idoms[postorder[finger2]].dominator; | 266 | 507k | } | 267 | 1.02M | } | 268 | 901k | idom_idx = finger1; | 269 | 901k | } | 270 | 1.03M | } | 271 | 2.14M | if (idoms[*b].dominator != idom_idx) { | 272 | 999k | idoms[*b].dominator = idom_idx; | 273 | 999k | changed = true; | 274 | 999k | } | 275 | 2.14M | } | 276 | 188k | } | 277 | | | 278 | 92.0k | std::vector<std::pair<bb_ptr, bb_ptr>> out; | 279 | 1.08M | for (auto idom : idoms) { | 280 | | // At this point if there is no dominator for the node, just make it | 281 | | // reflexive. | 282 | 1.08M | auto dominator = std::get<1>(idom).dominator; | 283 | 1.08M | if (dominator == undefined_dom) { | 284 | 254 | dominator = std::get<1>(idom).postorder_index; | 285 | 254 | } | 286 | | // NOTE: performing a const cast for convenient usage with | 287 | | // UpdateImmediateDominators | 288 | 1.08M | out.push_back({const_cast<BB*>(std::get<0>(idom)), | 289 | 1.08M | const_cast<BB*>(postorder[dominator])}); | 290 | 1.08M | } | 291 | | | 292 | | // Sort by postorder index to generate a deterministic ordering of edges. | 293 | 92.0k | std::sort( | 294 | 92.0k | out.begin(), out.end(), | 295 | 92.0k | [&idoms](const std::pair<bb_ptr, bb_ptr>& lhs, | 296 | 92.0k | const std::pair<bb_ptr, bb_ptr>& rhs) { | 297 | 92.0k | assert(lhs.first); | 298 | 92.0k | assert(lhs.second); | 299 | 92.0k | assert(rhs.first); | 300 | 92.0k | assert(rhs.second); | 301 | 92.0k | auto lhs_indices = std::make_pair(idoms[lhs.first].postorder_index, | 302 | 92.0k | idoms[lhs.second].postorder_index); | 303 | 92.0k | auto rhs_indices = std::make_pair(idoms[rhs.first].postorder_index, | 304 | 92.0k | idoms[rhs.second].postorder_index); | 305 | 92.0k | return lhs_indices < rhs_indices; | 306 | 92.0k | }); | 307 | 92.0k | return out; | 308 | 92.0k | } |
|
309 | | |
310 | | template <class BB> |
311 | | std::vector<BB*> CFA<BB>::TraversalRoots(const std::vector<BB*>& blocks, |
312 | | get_blocks_func succ_func, |
313 | 85.2k | get_blocks_func pred_func) { |
314 | | // The set of nodes which have been visited from any of the roots so far. |
315 | 85.2k | std::unordered_set<const BB*> visited; |
316 | | |
317 | 1.61M | auto mark_visited = [&visited](const BB* b) { visited.insert(b); }; |
318 | 1.61M | auto ignore_block = [](const BB*) {}; |
319 | 4.05M | auto no_terminal_blocks = [](const BB*) { return false; }; |
320 | | |
321 | 85.2k | auto traverse_from_root = [&mark_visited, &succ_func, &ignore_block, |
322 | 155k | &no_terminal_blocks](const BB* entry) { |
323 | 155k | DepthFirstTraversal(entry, succ_func, mark_visited, ignore_block, |
324 | 155k | no_terminal_blocks); |
325 | 155k | }; |
326 | | |
327 | 85.2k | std::vector<BB*> result; |
328 | | |
329 | | // First collect nodes without predecessors. |
330 | 814k | for (auto block : blocks) { |
331 | 814k | if (pred_func(block)->empty()) { |
332 | 150k | assert(visited.count(block) == 0 && "Malformed graph!"); |
333 | 150k | result.push_back(block); |
334 | 150k | traverse_from_root(block); |
335 | 150k | } |
336 | 814k | } |
337 | | |
338 | | // Now collect other stranded nodes. These must be in unreachable cycles. |
339 | 814k | for (auto block : blocks) { |
340 | 814k | if (visited.count(block) == 0) { |
341 | 5.02k | result.push_back(block); |
342 | 5.02k | traverse_from_root(block); |
343 | 5.02k | } |
344 | 814k | } |
345 | | |
346 | 85.2k | return result; |
347 | 85.2k | } |
348 | | |
349 | | template <class BB> |
350 | | void CFA<BB>::ComputeAugmentedCFG( |
351 | | std::vector<BB*>& ordered_blocks, BB* pseudo_entry_block, |
352 | | BB* pseudo_exit_block, |
353 | | std::unordered_map<const BB*, std::vector<BB*>>* augmented_successors_map, |
354 | | std::unordered_map<const BB*, std::vector<BB*>>* augmented_predecessors_map, |
355 | 42.6k | get_blocks_func succ_func, get_blocks_func pred_func) { |
356 | | // Compute the successors of the pseudo-entry block, and |
357 | | // the predecessors of the pseudo exit block. |
358 | 42.6k | auto sources = TraversalRoots(ordered_blocks, succ_func, pred_func); |
359 | | |
360 | | // For the predecessor traversals, reverse the order of blocks. This |
361 | | // will affect the post-dominance calculation as follows: |
362 | | // - Suppose you have blocks A and B, with A appearing before B in |
363 | | // the list of blocks. |
364 | | // - Also, A branches only to B, and B branches only to A. |
365 | | // - We want to compute A as dominating B, and B as post-dominating B. |
366 | | // By using reversed blocks for predecessor traversal roots discovery, |
367 | | // we'll add an edge from B to the pseudo-exit node, rather than from A. |
368 | | // All this is needed to correctly process the dominance/post-dominance |
369 | | // constraint when A is a loop header that points to itself as its |
370 | | // own continue target, and B is the latch block for the loop. |
371 | 42.6k | std::vector<BB*> reversed_blocks(ordered_blocks.rbegin(), |
372 | 42.6k | ordered_blocks.rend()); |
373 | 42.6k | auto sinks = TraversalRoots(reversed_blocks, pred_func, succ_func); |
374 | | |
375 | | // Wire up the pseudo entry block. |
376 | 42.6k | (*augmented_successors_map)[pseudo_entry_block] = sources; |
377 | 68.3k | for (auto block : sources) { |
378 | 68.3k | auto& augmented_preds = (*augmented_predecessors_map)[block]; |
379 | 68.3k | const auto preds = pred_func(block); |
380 | 68.3k | augmented_preds.reserve(1 + preds->size()); |
381 | 68.3k | augmented_preds.push_back(pseudo_entry_block); |
382 | 68.3k | augmented_preds.insert(augmented_preds.end(), preds->begin(), preds->end()); |
383 | 68.3k | } |
384 | | |
385 | | // Wire up the pseudo exit block. |
386 | 42.6k | (*augmented_predecessors_map)[pseudo_exit_block] = sinks; |
387 | 86.6k | for (auto block : sinks) { |
388 | 86.6k | auto& augmented_succ = (*augmented_successors_map)[block]; |
389 | 86.6k | const auto succ = succ_func(block); |
390 | 86.6k | augmented_succ.reserve(1 + succ->size()); |
391 | 86.6k | augmented_succ.push_back(pseudo_exit_block); |
392 | 86.6k | augmented_succ.insert(augmented_succ.end(), succ->begin(), succ->end()); |
393 | 86.6k | } |
394 | 42.6k | } |
395 | | |
396 | | } // namespace spvtools |
397 | | |
398 | | #endif // SOURCE_CFA_H_ |