/src/llvm-project/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Our algorithm works by first identifying a subset of nodes that must always |
10 | | // be instrumented. We call these nodes ambiguous because knowing the coverage |
11 | | // of all remaining nodes is not enough to infer their coverage status. |
12 | | // |
13 | | // In general a node v is ambiguous if there exists two entry-to-terminal paths |
14 | | // P_1 and P_2 such that: |
15 | | // 1. v not in P_1 but P_1 visits a predecessor of v, and |
16 | | // 2. v not in P_2 but P_2 visits a successor of v. |
17 | | // |
18 | | // If a node v is not ambiguous, then if condition 1 fails, we can infer v’s |
19 | | // coverage from the coverage of its predecessors, or if condition 2 fails, we |
20 | | // can infer v’s coverage from the coverage of its successors. |
21 | | // |
22 | | // Sadly, there are example CFGs where it is not possible to infer all nodes |
23 | | // from the ambiguous nodes alone. Our algorithm selects a minimum number of |
24 | | // extra nodes to add to the ambiguous nodes to form a valid instrumentation S. |
25 | | // |
26 | | // Details on this algorithm can be found in https://arxiv.org/abs/2208.13907 |
27 | | // |
28 | | //===----------------------------------------------------------------------===// |
29 | | |
30 | | #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" |
31 | | #include "llvm/ADT/DepthFirstIterator.h" |
32 | | #include "llvm/ADT/Statistic.h" |
33 | | #include "llvm/Support/CRC.h" |
34 | | #include "llvm/Support/Debug.h" |
35 | | #include "llvm/Support/GraphWriter.h" |
36 | | #include "llvm/Support/raw_ostream.h" |
37 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
38 | | |
39 | | using namespace llvm; |
40 | | |
41 | | #define DEBUG_TYPE "pgo-block-coverage" |
42 | | |
43 | | STATISTIC(NumFunctions, "Number of total functions that BCI has processed"); |
44 | | STATISTIC(NumIneligibleFunctions, |
45 | | "Number of functions for which BCI cannot run on"); |
46 | | STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed"); |
47 | | STATISTIC(NumInstrumentedBlocks, |
48 | | "Number of basic blocks instrumented for coverage"); |
49 | | |
50 | | BlockCoverageInference::BlockCoverageInference(const Function &F, |
51 | | bool ForceInstrumentEntry) |
52 | 0 | : F(F), ForceInstrumentEntry(ForceInstrumentEntry) { |
53 | 0 | findDependencies(); |
54 | 0 | assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock())); |
55 | | |
56 | 0 | ++NumFunctions; |
57 | 0 | for (auto &BB : F) { |
58 | 0 | ++NumBlocks; |
59 | 0 | if (shouldInstrumentBlock(BB)) |
60 | 0 | ++NumInstrumentedBlocks; |
61 | 0 | } |
62 | 0 | } |
63 | | |
64 | | BlockCoverageInference::BlockSet |
65 | 0 | BlockCoverageInference::getDependencies(const BasicBlock &BB) const { |
66 | 0 | assert(BB.getParent() == &F); |
67 | 0 | BlockSet Dependencies; |
68 | 0 | auto It = PredecessorDependencies.find(&BB); |
69 | 0 | if (It != PredecessorDependencies.end()) |
70 | 0 | Dependencies.set_union(It->second); |
71 | 0 | It = SuccessorDependencies.find(&BB); |
72 | 0 | if (It != SuccessorDependencies.end()) |
73 | 0 | Dependencies.set_union(It->second); |
74 | 0 | return Dependencies; |
75 | 0 | } |
76 | | |
77 | 0 | uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const { |
78 | 0 | JamCRC JC; |
79 | 0 | uint64_t Index = 0; |
80 | 0 | for (auto &BB : F) { |
81 | 0 | if (shouldInstrumentBlock(BB)) { |
82 | 0 | uint8_t Data[8]; |
83 | 0 | support::endian::write64le(Data, Index); |
84 | 0 | JC.update(Data); |
85 | 0 | } |
86 | 0 | Index++; |
87 | 0 | } |
88 | 0 | return JC.getCRC(); |
89 | 0 | } |
90 | | |
91 | 0 | bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const { |
92 | 0 | assert(BB.getParent() == &F); |
93 | 0 | auto It = PredecessorDependencies.find(&BB); |
94 | 0 | if (It != PredecessorDependencies.end() && It->second.size()) |
95 | 0 | return false; |
96 | 0 | It = SuccessorDependencies.find(&BB); |
97 | 0 | if (It != SuccessorDependencies.end() && It->second.size()) |
98 | 0 | return false; |
99 | 0 | return true; |
100 | 0 | } |
101 | | |
102 | 0 | void BlockCoverageInference::findDependencies() { |
103 | 0 | assert(PredecessorDependencies.empty() && SuccessorDependencies.empty()); |
104 | | // Empirical analysis shows that this algorithm finishes within 5 seconds for |
105 | | // functions with fewer than 1.5K blocks. |
106 | 0 | if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) { |
107 | 0 | ++NumIneligibleFunctions; |
108 | 0 | return; |
109 | 0 | } |
110 | | |
111 | 0 | SmallVector<const BasicBlock *, 4> TerminalBlocks; |
112 | 0 | for (auto &BB : F) |
113 | 0 | if (succ_empty(&BB)) |
114 | 0 | TerminalBlocks.push_back(&BB); |
115 | | |
116 | | // Traverse the CFG backwards from the terminal blocks to make sure every |
117 | | // block can reach some terminal block. Otherwise this algorithm will not work |
118 | | // and we must fall back to instrumenting every block. |
119 | 0 | df_iterator_default_set<const BasicBlock *> Visited; |
120 | 0 | for (auto *BB : TerminalBlocks) |
121 | 0 | for (auto *N : inverse_depth_first_ext(BB, Visited)) |
122 | 0 | (void)N; |
123 | 0 | if (F.size() != Visited.size()) { |
124 | 0 | ++NumIneligibleFunctions; |
125 | 0 | return; |
126 | 0 | } |
127 | | |
128 | | // The current implementation for computing `PredecessorDependencies` and |
129 | | // `SuccessorDependencies` runs in quadratic time with respect to the number |
130 | | // of basic blocks. While we do have a more complicated linear time algorithm |
131 | | // in https://arxiv.org/abs/2208.13907 we do not know if it will give a |
132 | | // significant speedup in practice given that most functions tend to be |
133 | | // relatively small in size for intended use cases. |
134 | 0 | auto &EntryBlock = F.getEntryBlock(); |
135 | 0 | for (auto &BB : F) { |
136 | | // The set of blocks that are reachable while avoiding BB. |
137 | 0 | BlockSet ReachableFromEntry, ReachableFromTerminal; |
138 | 0 | getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true, |
139 | 0 | ReachableFromEntry); |
140 | 0 | for (auto *TerminalBlock : TerminalBlocks) |
141 | 0 | getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false, |
142 | 0 | ReachableFromTerminal); |
143 | |
|
144 | 0 | auto Preds = predecessors(&BB); |
145 | 0 | bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) { |
146 | 0 | return ReachableFromEntry.count(Pred) && |
147 | 0 | ReachableFromTerminal.count(Pred); |
148 | 0 | }); |
149 | 0 | if (!HasSuperReachablePred) |
150 | 0 | for (auto *Pred : Preds) |
151 | 0 | if (ReachableFromEntry.count(Pred)) |
152 | 0 | PredecessorDependencies[&BB].insert(Pred); |
153 | |
|
154 | 0 | auto Succs = successors(&BB); |
155 | 0 | bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) { |
156 | 0 | return ReachableFromEntry.count(Succ) && |
157 | 0 | ReachableFromTerminal.count(Succ); |
158 | 0 | }); |
159 | 0 | if (!HasSuperReachableSucc) |
160 | 0 | for (auto *Succ : Succs) |
161 | 0 | if (ReachableFromTerminal.count(Succ)) |
162 | 0 | SuccessorDependencies[&BB].insert(Succ); |
163 | 0 | } |
164 | |
|
165 | 0 | if (ForceInstrumentEntry) { |
166 | | // Force the entry block to be instrumented by clearing the blocks it can |
167 | | // infer coverage from. |
168 | 0 | PredecessorDependencies[&EntryBlock].clear(); |
169 | 0 | SuccessorDependencies[&EntryBlock].clear(); |
170 | 0 | } |
171 | | |
172 | | // Construct a graph where blocks are connected if there is a mutual |
173 | | // dependency between them. This graph has a special property that it contains |
174 | | // only paths. |
175 | 0 | DenseMap<const BasicBlock *, BlockSet> AdjacencyList; |
176 | 0 | for (auto &BB : F) { |
177 | 0 | for (auto *Succ : successors(&BB)) { |
178 | 0 | if (SuccessorDependencies[&BB].count(Succ) && |
179 | 0 | PredecessorDependencies[Succ].count(&BB)) { |
180 | 0 | AdjacencyList[&BB].insert(Succ); |
181 | 0 | AdjacencyList[Succ].insert(&BB); |
182 | 0 | } |
183 | 0 | } |
184 | 0 | } |
185 | | |
186 | | // Given a path with at least one node, return the next node on the path. |
187 | 0 | auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * { |
188 | 0 | assert(Path.size()); |
189 | 0 | auto &Neighbors = AdjacencyList[Path.back()]; |
190 | 0 | if (Path.size() == 1) { |
191 | | // This is the first node on the path, return its neighbor. |
192 | 0 | assert(Neighbors.size() == 1); |
193 | 0 | return Neighbors.front(); |
194 | 0 | } else if (Neighbors.size() == 2) { |
195 | | // This is the middle of the path, find the neighbor that is not on the |
196 | | // path already. |
197 | 0 | assert(Path.size() >= 2); |
198 | 0 | return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0]; |
199 | 0 | } |
200 | | // This is the end of the path. |
201 | 0 | assert(Neighbors.size() == 1); |
202 | 0 | return nullptr; |
203 | 0 | }; |
204 | | |
205 | | // Remove all cycles in the inferencing graph. |
206 | 0 | for (auto &BB : F) { |
207 | 0 | if (AdjacencyList[&BB].size() == 1) { |
208 | | // We found the head of some path. |
209 | 0 | BlockSet Path; |
210 | 0 | Path.insert(&BB); |
211 | 0 | while (const BasicBlock *Next = getNextOnPath(Path)) |
212 | 0 | Path.insert(Next); |
213 | 0 | LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n"); |
214 | | |
215 | | // Remove these nodes from the graph so we don't discover this path again. |
216 | 0 | for (auto *BB : Path) |
217 | 0 | AdjacencyList[BB].clear(); |
218 | | |
219 | | // Finally, remove the cycles. |
220 | 0 | if (PredecessorDependencies[Path.front()].size()) { |
221 | 0 | for (auto *BB : Path) |
222 | 0 | if (BB != Path.back()) |
223 | 0 | SuccessorDependencies[BB].clear(); |
224 | 0 | } else { |
225 | 0 | for (auto *BB : Path) |
226 | 0 | if (BB != Path.front()) |
227 | 0 | PredecessorDependencies[BB].clear(); |
228 | 0 | } |
229 | 0 | } |
230 | 0 | } |
231 | 0 | LLVM_DEBUG(dump(dbgs())); |
232 | 0 | } |
233 | | |
234 | | void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start, |
235 | | const BasicBlock &Avoid, |
236 | | bool IsForward, |
237 | 0 | BlockSet &Reachable) const { |
238 | 0 | df_iterator_default_set<const BasicBlock *> Visited; |
239 | 0 | Visited.insert(&Avoid); |
240 | 0 | if (IsForward) { |
241 | 0 | auto Range = depth_first_ext(&Start, Visited); |
242 | 0 | Reachable.insert(Range.begin(), Range.end()); |
243 | 0 | } else { |
244 | 0 | auto Range = inverse_depth_first_ext(&Start, Visited); |
245 | 0 | Reachable.insert(Range.begin(), Range.end()); |
246 | 0 | } |
247 | 0 | } |
248 | | |
249 | | namespace llvm { |
250 | | class DotFuncBCIInfo { |
251 | | private: |
252 | | const BlockCoverageInference *BCI; |
253 | | const DenseMap<const BasicBlock *, bool> *Coverage; |
254 | | |
255 | | public: |
256 | | DotFuncBCIInfo(const BlockCoverageInference *BCI, |
257 | | const DenseMap<const BasicBlock *, bool> *Coverage) |
258 | 0 | : BCI(BCI), Coverage(Coverage) {} |
259 | | |
260 | 0 | const Function &getFunction() { return BCI->F; } |
261 | | |
262 | 0 | bool isInstrumented(const BasicBlock *BB) const { |
263 | 0 | return BCI->shouldInstrumentBlock(*BB); |
264 | 0 | } |
265 | | |
266 | 0 | bool isCovered(const BasicBlock *BB) const { |
267 | 0 | return Coverage && Coverage->lookup(BB); |
268 | 0 | } |
269 | | |
270 | 0 | bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const { |
271 | 0 | return BCI->getDependencies(*Src).count(Dest); |
272 | 0 | } |
273 | | }; |
274 | | |
275 | | template <> |
276 | | struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> { |
277 | 0 | static NodeRef getEntryNode(DotFuncBCIInfo *Info) { |
278 | 0 | return &(Info->getFunction().getEntryBlock()); |
279 | 0 | } |
280 | | |
281 | | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph |
282 | | using nodes_iterator = pointer_iterator<Function::const_iterator>; |
283 | | |
284 | 0 | static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) { |
285 | 0 | return nodes_iterator(Info->getFunction().begin()); |
286 | 0 | } |
287 | | |
288 | 0 | static nodes_iterator nodes_end(DotFuncBCIInfo *Info) { |
289 | 0 | return nodes_iterator(Info->getFunction().end()); |
290 | 0 | } |
291 | | |
292 | 0 | static size_t size(DotFuncBCIInfo *Info) { |
293 | 0 | return Info->getFunction().size(); |
294 | 0 | } |
295 | | }; |
296 | | |
297 | | template <> |
298 | | struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits { |
299 | | |
300 | 0 | DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} |
301 | | |
302 | 0 | static std::string getGraphName(DotFuncBCIInfo *Info) { |
303 | 0 | return "BCI CFG for " + Info->getFunction().getName().str(); |
304 | 0 | } |
305 | | |
306 | 0 | std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) { |
307 | 0 | return Node->getName().str(); |
308 | 0 | } |
309 | | |
310 | | std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I, |
311 | 0 | DotFuncBCIInfo *Info) { |
312 | 0 | const BasicBlock *Dest = *I; |
313 | 0 | if (Info->isDependent(Src, Dest)) |
314 | 0 | return "color=red"; |
315 | 0 | if (Info->isDependent(Dest, Src)) |
316 | 0 | return "color=blue"; |
317 | 0 | return ""; |
318 | 0 | } |
319 | | |
320 | 0 | std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) { |
321 | 0 | std::string Result; |
322 | 0 | if (Info->isInstrumented(Node)) |
323 | 0 | Result += "style=filled,fillcolor=gray"; |
324 | 0 | if (Info->isCovered(Node)) |
325 | 0 | Result += std::string(Result.empty() ? "" : ",") + "color=red"; |
326 | 0 | return Result; |
327 | 0 | } |
328 | | }; |
329 | | |
330 | | } // namespace llvm |
331 | | |
332 | | void BlockCoverageInference::viewBlockCoverageGraph( |
333 | 0 | const DenseMap<const BasicBlock *, bool> *Coverage) const { |
334 | 0 | DotFuncBCIInfo Info(this, Coverage); |
335 | 0 | WriteGraph(&Info, "BCI", false, |
336 | 0 | "Block Coverage Inference for " + F.getName()); |
337 | 0 | } |
338 | | |
339 | 0 | void BlockCoverageInference::dump(raw_ostream &OS) const { |
340 | 0 | OS << "Minimal block coverage for function \'" << F.getName() |
341 | 0 | << "\' (Instrumented=*)\n"; |
342 | 0 | for (auto &BB : F) { |
343 | 0 | OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n"; |
344 | 0 | auto It = PredecessorDependencies.find(&BB); |
345 | 0 | if (It != PredecessorDependencies.end() && It->second.size()) |
346 | 0 | OS << " PredDeps = " << getBlockNames(It->second) << "\n"; |
347 | 0 | It = SuccessorDependencies.find(&BB); |
348 | 0 | if (It != SuccessorDependencies.end() && It->second.size()) |
349 | 0 | OS << " SuccDeps = " << getBlockNames(It->second) << "\n"; |
350 | 0 | } |
351 | 0 | OS << " Instrumented Blocks Hash = 0x" |
352 | 0 | << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n"; |
353 | 0 | } |
354 | | |
355 | | std::string |
356 | 0 | BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) { |
357 | 0 | std::string Result; |
358 | 0 | raw_string_ostream OS(Result); |
359 | 0 | OS << "["; |
360 | 0 | if (!BBs.empty()) { |
361 | 0 | OS << BBs.front()->getName(); |
362 | 0 | BBs = BBs.drop_front(); |
363 | 0 | } |
364 | 0 | for (auto *BB : BBs) |
365 | 0 | OS << ", " << BB->getName(); |
366 | 0 | OS << "]"; |
367 | 0 | return OS.str(); |
368 | 0 | } |