Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Our algorithm works by first identifying a subset of nodes that must always
10
// be instrumented. We call these nodes ambiguous because knowing the coverage
11
// of all remaining nodes is not enough to infer their coverage status.
12
//
13
// In general a node v is ambiguous if there exists two entry-to-terminal paths
14
// P_1 and P_2 such that:
15
//   1. v not in P_1 but P_1 visits a predecessor of v, and
16
//   2. v not in P_2 but P_2 visits a successor of v.
17
//
18
// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
19
// coverage from the coverage of its predecessors, or if condition 2 fails, we
20
// can infer v’s coverage from the coverage of its successors.
21
//
22
// Sadly, there are example CFGs where it is not possible to infer all nodes
23
// from the ambiguous nodes alone. Our algorithm selects a minimum number of
24
// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
25
//
26
// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
27
//
28
//===----------------------------------------------------------------------===//
29
30
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
31
#include "llvm/ADT/DepthFirstIterator.h"
32
#include "llvm/ADT/Statistic.h"
33
#include "llvm/Support/CRC.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/GraphWriter.h"
36
#include "llvm/Support/raw_ostream.h"
37
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38
39
using namespace llvm;
40
41
#define DEBUG_TYPE "pgo-block-coverage"
42
43
STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
44
STATISTIC(NumIneligibleFunctions,
45
          "Number of functions for which BCI cannot run on");
46
STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
47
STATISTIC(NumInstrumentedBlocks,
48
          "Number of basic blocks instrumented for coverage");
49
50
BlockCoverageInference::BlockCoverageInference(const Function &F,
51
                                               bool ForceInstrumentEntry)
52
0
    : F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
53
0
  findDependencies();
54
0
  assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
55
56
0
  ++NumFunctions;
57
0
  for (auto &BB : F) {
58
0
    ++NumBlocks;
59
0
    if (shouldInstrumentBlock(BB))
60
0
      ++NumInstrumentedBlocks;
61
0
  }
62
0
}
63
64
BlockCoverageInference::BlockSet
65
0
BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
66
0
  assert(BB.getParent() == &F);
67
0
  BlockSet Dependencies;
68
0
  auto It = PredecessorDependencies.find(&BB);
69
0
  if (It != PredecessorDependencies.end())
70
0
    Dependencies.set_union(It->second);
71
0
  It = SuccessorDependencies.find(&BB);
72
0
  if (It != SuccessorDependencies.end())
73
0
    Dependencies.set_union(It->second);
74
0
  return Dependencies;
75
0
}
76
77
0
uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
78
0
  JamCRC JC;
79
0
  uint64_t Index = 0;
80
0
  for (auto &BB : F) {
81
0
    if (shouldInstrumentBlock(BB)) {
82
0
      uint8_t Data[8];
83
0
      support::endian::write64le(Data, Index);
84
0
      JC.update(Data);
85
0
    }
86
0
    Index++;
87
0
  }
88
0
  return JC.getCRC();
89
0
}
90
91
0
bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
92
0
  assert(BB.getParent() == &F);
93
0
  auto It = PredecessorDependencies.find(&BB);
94
0
  if (It != PredecessorDependencies.end() && It->second.size())
95
0
    return false;
96
0
  It = SuccessorDependencies.find(&BB);
97
0
  if (It != SuccessorDependencies.end() && It->second.size())
98
0
    return false;
99
0
  return true;
100
0
}
101
102
0
void BlockCoverageInference::findDependencies() {
103
0
  assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
104
  // Empirical analysis shows that this algorithm finishes within 5 seconds for
105
  // functions with fewer than 1.5K blocks.
106
0
  if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
107
0
    ++NumIneligibleFunctions;
108
0
    return;
109
0
  }
110
111
0
  SmallVector<const BasicBlock *, 4> TerminalBlocks;
112
0
  for (auto &BB : F)
113
0
    if (succ_empty(&BB))
114
0
      TerminalBlocks.push_back(&BB);
115
116
  // Traverse the CFG backwards from the terminal blocks to make sure every
117
  // block can reach some terminal block. Otherwise this algorithm will not work
118
  // and we must fall back to instrumenting every block.
119
0
  df_iterator_default_set<const BasicBlock *> Visited;
120
0
  for (auto *BB : TerminalBlocks)
121
0
    for (auto *N : inverse_depth_first_ext(BB, Visited))
122
0
      (void)N;
123
0
  if (F.size() != Visited.size()) {
124
0
    ++NumIneligibleFunctions;
125
0
    return;
126
0
  }
127
128
  // The current implementation for computing `PredecessorDependencies` and
129
  // `SuccessorDependencies` runs in quadratic time with respect to the number
130
  // of basic blocks. While we do have a more complicated linear time algorithm
131
  // in https://arxiv.org/abs/2208.13907 we do not know if it will give a
132
  // significant speedup in practice given that most functions tend to be
133
  // relatively small in size for intended use cases.
134
0
  auto &EntryBlock = F.getEntryBlock();
135
0
  for (auto &BB : F) {
136
    // The set of blocks that are reachable while avoiding BB.
137
0
    BlockSet ReachableFromEntry, ReachableFromTerminal;
138
0
    getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
139
0
                         ReachableFromEntry);
140
0
    for (auto *TerminalBlock : TerminalBlocks)
141
0
      getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
142
0
                           ReachableFromTerminal);
143
144
0
    auto Preds = predecessors(&BB);
145
0
    bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
146
0
      return ReachableFromEntry.count(Pred) &&
147
0
             ReachableFromTerminal.count(Pred);
148
0
    });
149
0
    if (!HasSuperReachablePred)
150
0
      for (auto *Pred : Preds)
151
0
        if (ReachableFromEntry.count(Pred))
152
0
          PredecessorDependencies[&BB].insert(Pred);
153
154
0
    auto Succs = successors(&BB);
155
0
    bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
156
0
      return ReachableFromEntry.count(Succ) &&
157
0
             ReachableFromTerminal.count(Succ);
158
0
    });
159
0
    if (!HasSuperReachableSucc)
160
0
      for (auto *Succ : Succs)
161
0
        if (ReachableFromTerminal.count(Succ))
162
0
          SuccessorDependencies[&BB].insert(Succ);
163
0
  }
164
165
0
  if (ForceInstrumentEntry) {
166
    // Force the entry block to be instrumented by clearing the blocks it can
167
    // infer coverage from.
168
0
    PredecessorDependencies[&EntryBlock].clear();
169
0
    SuccessorDependencies[&EntryBlock].clear();
170
0
  }
171
172
  // Construct a graph where blocks are connected if there is a mutual
173
  // dependency between them. This graph has a special property that it contains
174
  // only paths.
175
0
  DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
176
0
  for (auto &BB : F) {
177
0
    for (auto *Succ : successors(&BB)) {
178
0
      if (SuccessorDependencies[&BB].count(Succ) &&
179
0
          PredecessorDependencies[Succ].count(&BB)) {
180
0
        AdjacencyList[&BB].insert(Succ);
181
0
        AdjacencyList[Succ].insert(&BB);
182
0
      }
183
0
    }
184
0
  }
185
186
  // Given a path with at least one node, return the next node on the path.
187
0
  auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
188
0
    assert(Path.size());
189
0
    auto &Neighbors = AdjacencyList[Path.back()];
190
0
    if (Path.size() == 1) {
191
      // This is the first node on the path, return its neighbor.
192
0
      assert(Neighbors.size() == 1);
193
0
      return Neighbors.front();
194
0
    } else if (Neighbors.size() == 2) {
195
      // This is the middle of the path, find the neighbor that is not on the
196
      // path already.
197
0
      assert(Path.size() >= 2);
198
0
      return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
199
0
    }
200
    // This is the end of the path.
201
0
    assert(Neighbors.size() == 1);
202
0
    return nullptr;
203
0
  };
204
205
  // Remove all cycles in the inferencing graph.
206
0
  for (auto &BB : F) {
207
0
    if (AdjacencyList[&BB].size() == 1) {
208
      // We found the head of some path.
209
0
      BlockSet Path;
210
0
      Path.insert(&BB);
211
0
      while (const BasicBlock *Next = getNextOnPath(Path))
212
0
        Path.insert(Next);
213
0
      LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
214
215
      // Remove these nodes from the graph so we don't discover this path again.
216
0
      for (auto *BB : Path)
217
0
        AdjacencyList[BB].clear();
218
219
      // Finally, remove the cycles.
220
0
      if (PredecessorDependencies[Path.front()].size()) {
221
0
        for (auto *BB : Path)
222
0
          if (BB != Path.back())
223
0
            SuccessorDependencies[BB].clear();
224
0
      } else {
225
0
        for (auto *BB : Path)
226
0
          if (BB != Path.front())
227
0
            PredecessorDependencies[BB].clear();
228
0
      }
229
0
    }
230
0
  }
231
0
  LLVM_DEBUG(dump(dbgs()));
232
0
}
233
234
void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
235
                                                  const BasicBlock &Avoid,
236
                                                  bool IsForward,
237
0
                                                  BlockSet &Reachable) const {
238
0
  df_iterator_default_set<const BasicBlock *> Visited;
239
0
  Visited.insert(&Avoid);
240
0
  if (IsForward) {
241
0
    auto Range = depth_first_ext(&Start, Visited);
242
0
    Reachable.insert(Range.begin(), Range.end());
243
0
  } else {
244
0
    auto Range = inverse_depth_first_ext(&Start, Visited);
245
0
    Reachable.insert(Range.begin(), Range.end());
246
0
  }
247
0
}
248
249
namespace llvm {
250
class DotFuncBCIInfo {
251
private:
252
  const BlockCoverageInference *BCI;
253
  const DenseMap<const BasicBlock *, bool> *Coverage;
254
255
public:
256
  DotFuncBCIInfo(const BlockCoverageInference *BCI,
257
                 const DenseMap<const BasicBlock *, bool> *Coverage)
258
0
      : BCI(BCI), Coverage(Coverage) {}
259
260
0
  const Function &getFunction() { return BCI->F; }
261
262
0
  bool isInstrumented(const BasicBlock *BB) const {
263
0
    return BCI->shouldInstrumentBlock(*BB);
264
0
  }
265
266
0
  bool isCovered(const BasicBlock *BB) const {
267
0
    return Coverage && Coverage->lookup(BB);
268
0
  }
269
270
0
  bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
271
0
    return BCI->getDependencies(*Src).count(Dest);
272
0
  }
273
};
274
275
template <>
276
struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
277
0
  static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
278
0
    return &(Info->getFunction().getEntryBlock());
279
0
  }
280
281
  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
282
  using nodes_iterator = pointer_iterator<Function::const_iterator>;
283
284
0
  static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
285
0
    return nodes_iterator(Info->getFunction().begin());
286
0
  }
287
288
0
  static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
289
0
    return nodes_iterator(Info->getFunction().end());
290
0
  }
291
292
0
  static size_t size(DotFuncBCIInfo *Info) {
293
0
    return Info->getFunction().size();
294
0
  }
295
};
296
297
template <>
298
struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
299
300
0
  DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
301
302
0
  static std::string getGraphName(DotFuncBCIInfo *Info) {
303
0
    return "BCI CFG for " + Info->getFunction().getName().str();
304
0
  }
305
306
0
  std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
307
0
    return Node->getName().str();
308
0
  }
309
310
  std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
311
0
                                DotFuncBCIInfo *Info) {
312
0
    const BasicBlock *Dest = *I;
313
0
    if (Info->isDependent(Src, Dest))
314
0
      return "color=red";
315
0
    if (Info->isDependent(Dest, Src))
316
0
      return "color=blue";
317
0
    return "";
318
0
  }
319
320
0
  std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
321
0
    std::string Result;
322
0
    if (Info->isInstrumented(Node))
323
0
      Result += "style=filled,fillcolor=gray";
324
0
    if (Info->isCovered(Node))
325
0
      Result += std::string(Result.empty() ? "" : ",") + "color=red";
326
0
    return Result;
327
0
  }
328
};
329
330
} // namespace llvm
331
332
void BlockCoverageInference::viewBlockCoverageGraph(
333
0
    const DenseMap<const BasicBlock *, bool> *Coverage) const {
334
0
  DotFuncBCIInfo Info(this, Coverage);
335
0
  WriteGraph(&Info, "BCI", false,
336
0
             "Block Coverage Inference for " + F.getName());
337
0
}
338
339
0
void BlockCoverageInference::dump(raw_ostream &OS) const {
340
0
  OS << "Minimal block coverage for function \'" << F.getName()
341
0
     << "\' (Instrumented=*)\n";
342
0
  for (auto &BB : F) {
343
0
    OS << (shouldInstrumentBlock(BB) ? "* " : "  ") << BB.getName() << "\n";
344
0
    auto It = PredecessorDependencies.find(&BB);
345
0
    if (It != PredecessorDependencies.end() && It->second.size())
346
0
      OS << "    PredDeps = " << getBlockNames(It->second) << "\n";
347
0
    It = SuccessorDependencies.find(&BB);
348
0
    if (It != SuccessorDependencies.end() && It->second.size())
349
0
      OS << "    SuccDeps = " << getBlockNames(It->second) << "\n";
350
0
  }
351
0
  OS << "  Instrumented Blocks Hash = 0x"
352
0
     << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
353
0
}
354
355
std::string
356
0
BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
357
0
  std::string Result;
358
0
  raw_string_ostream OS(Result);
359
0
  OS << "[";
360
0
  if (!BBs.empty()) {
361
0
    OS << BBs.front()->getName();
362
0
    BBs = BBs.drop_front();
363
0
  }
364
0
  for (auto *BB : BBs)
365
0
    OS << ", " << BB->getName();
366
0
  OS << "]";
367
0
  return OS.str();
368
0
}