Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Run a basic correctness check on the IR to ensure that Safepoints - if
10
// they've been inserted - were inserted correctly.  In particular, look for use
11
// of non-relocated values after a safepoint.  It's primary use is to check the
12
// correctness of safepoint insertion immediately after insertion, but it can
13
// also be used to verify that later transforms have not found a way to break
14
// safepoint semenatics.
15
//
16
// In its current form, this verify checks a property which is sufficient, but
17
// not neccessary for correctness.  There are some cases where an unrelocated
18
// pointer can be used after the safepoint.  Consider this example:
19
//
20
//    a = ...
21
//    b = ...
22
//    (a',b') = safepoint(a,b)
23
//    c = cmp eq a b
24
//    br c, ..., ....
25
//
26
// Because it is valid to reorder 'c' above the safepoint, this is legal.  In
27
// practice, this is a somewhat uncommon transform, but CodeGenPrep does create
28
// idioms like this.  The verifier knows about these cases and avoids reporting
29
// false positives.
30
//
31
//===----------------------------------------------------------------------===//
32
33
#include "llvm/IR/SafepointIRVerifier.h"
34
#include "llvm/ADT/DenseSet.h"
35
#include "llvm/ADT/PostOrderIterator.h"
36
#include "llvm/ADT/SetOperations.h"
37
#include "llvm/ADT/SetVector.h"
38
#include "llvm/IR/BasicBlock.h"
39
#include "llvm/IR/Dominators.h"
40
#include "llvm/IR/Function.h"
41
#include "llvm/IR/InstrTypes.h"
42
#include "llvm/IR/Instructions.h"
43
#include "llvm/IR/Statepoint.h"
44
#include "llvm/IR/Value.h"
45
#include "llvm/InitializePasses.h"
46
#include "llvm/Support/Allocator.h"
47
#include "llvm/Support/CommandLine.h"
48
#include "llvm/Support/Debug.h"
49
#include "llvm/Support/raw_ostream.h"
50
51
#define DEBUG_TYPE "safepoint-ir-verifier"
52
53
using namespace llvm;
54
55
/// This option is used for writing test cases.  Instead of crashing the program
56
/// when verification fails, report a message to the console (for FileCheck
57
/// usage) and continue execution as if nothing happened.
58
static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
59
                               cl::init(false));
60
61
namespace {
62
63
/// This CFG Deadness finds dead blocks and edges. Algorithm starts with a set
64
/// of blocks unreachable from entry then propagates deadness using foldable
65
/// conditional branches without modifying CFG. So GVN does but it changes CFG
66
/// by splitting critical edges. In most cases passes rely on SimplifyCFG to
67
/// clean up dead blocks, but in some cases, like verification or loop passes
68
/// it's not possible.
69
class CFGDeadness {
70
  const DominatorTree *DT = nullptr;
71
  SetVector<const BasicBlock *> DeadBlocks;
72
  SetVector<const Use *> DeadEdges; // Contains all dead edges from live blocks.
73
74
public:
75
  /// Return the edge that coresponds to the predecessor.
76
0
  static const Use& getEdge(const_pred_iterator &PredIt) {
77
0
    auto &PU = PredIt.getUse();
78
0
    return PU.getUser()->getOperandUse(PU.getOperandNo());
79
0
  }
80
81
  /// Return true if there is at least one live edge that corresponds to the
82
  /// basic block InBB listed in the phi node.
83
0
  bool hasLiveIncomingEdge(const PHINode *PN, const BasicBlock *InBB) const {
84
0
    assert(!isDeadBlock(InBB) && "block must be live");
85
0
    const BasicBlock* BB = PN->getParent();
86
0
    bool Listed = false;
87
0
    for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
88
0
      if (InBB == *PredIt) {
89
0
        if (!isDeadEdge(&getEdge(PredIt)))
90
0
          return true;
91
0
        Listed = true;
92
0
      }
93
0
    }
94
0
    (void)Listed;
95
0
    assert(Listed && "basic block is not found among incoming blocks");
96
0
    return false;
97
0
  }
98
99
100
0
  bool isDeadBlock(const BasicBlock *BB) const {
101
0
    return DeadBlocks.count(BB);
102
0
  }
103
104
0
  bool isDeadEdge(const Use *U) const {
105
0
    assert(cast<Instruction>(U->getUser())->isTerminator() &&
106
0
           "edge must be operand of terminator");
107
0
    assert(cast_or_null<BasicBlock>(U->get()) &&
108
0
           "edge must refer to basic block");
109
0
    assert(!isDeadBlock(cast<Instruction>(U->getUser())->getParent()) &&
110
0
           "isDeadEdge() must be applied to edge from live block");
111
0
    return DeadEdges.count(U);
112
0
  }
113
114
0
  bool hasLiveIncomingEdges(const BasicBlock *BB) const {
115
    // Check if all incoming edges are dead.
116
0
    for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
117
0
      auto &PU = PredIt.getUse();
118
0
      const Use &U = PU.getUser()->getOperandUse(PU.getOperandNo());
119
0
      if (!isDeadBlock(*PredIt) && !isDeadEdge(&U))
120
0
        return true; // Found a live edge.
121
0
    }
122
0
    return false;
123
0
  }
124
125
0
  void processFunction(const Function &F, const DominatorTree &DT) {
126
0
    this->DT = &DT;
127
128
    // Start with all blocks unreachable from entry.
129
0
    for (const BasicBlock &BB : F)
130
0
      if (!DT.isReachableFromEntry(&BB))
131
0
        DeadBlocks.insert(&BB);
132
133
    // Top-down walk of the dominator tree
134
0
    ReversePostOrderTraversal<const Function *> RPOT(&F);
135
0
    for (const BasicBlock *BB : RPOT) {
136
0
      const Instruction *TI = BB->getTerminator();
137
0
      assert(TI && "blocks must be well formed");
138
139
      // For conditional branches, we can perform simple conditional propagation on
140
      // the condition value itself.
141
0
      const BranchInst *BI = dyn_cast<BranchInst>(TI);
142
0
      if (!BI || !BI->isConditional() || !isa<Constant>(BI->getCondition()))
143
0
        continue;
144
145
      // If a branch has two identical successors, we cannot declare either dead.
146
0
      if (BI->getSuccessor(0) == BI->getSuccessor(1))
147
0
        continue;
148
149
0
      ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
150
0
      if (!Cond)
151
0
        continue;
152
153
0
      addDeadEdge(BI->getOperandUse(Cond->getZExtValue() ? 1 : 2));
154
0
    }
155
0
  }
156
157
protected:
158
0
  void addDeadBlock(const BasicBlock *BB) {
159
0
    SmallVector<const BasicBlock *, 4> NewDead;
160
0
    SmallSetVector<const BasicBlock *, 4> DF;
161
162
0
    NewDead.push_back(BB);
163
0
    while (!NewDead.empty()) {
164
0
      const BasicBlock *D = NewDead.pop_back_val();
165
0
      if (isDeadBlock(D))
166
0
        continue;
167
168
      // All blocks dominated by D are dead.
169
0
      SmallVector<BasicBlock *, 8> Dom;
170
0
      DT->getDescendants(const_cast<BasicBlock*>(D), Dom);
171
      // Do not need to mark all in and out edges dead
172
      // because BB is marked dead and this is enough
173
      // to run further.
174
0
      DeadBlocks.insert(Dom.begin(), Dom.end());
175
176
      // Figure out the dominance-frontier(D).
177
0
      for (BasicBlock *B : Dom)
178
0
        for (BasicBlock *S : successors(B))
179
0
          if (!isDeadBlock(S) && !hasLiveIncomingEdges(S))
180
0
            NewDead.push_back(S);
181
0
    }
182
0
  }
183
184
0
  void addDeadEdge(const Use &DeadEdge) {
185
0
    if (!DeadEdges.insert(&DeadEdge))
186
0
      return;
187
188
0
    BasicBlock *BB = cast_or_null<BasicBlock>(DeadEdge.get());
189
0
    if (hasLiveIncomingEdges(BB))
190
0
      return;
191
192
0
    addDeadBlock(BB);
193
0
  }
194
};
195
} // namespace
196
197
static void Verify(const Function &F, const DominatorTree &DT,
198
                   const CFGDeadness &CD);
199
200
namespace llvm {
201
PreservedAnalyses SafepointIRVerifierPass::run(Function &F,
202
0
                                               FunctionAnalysisManager &AM) {
203
0
  const auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
204
0
  CFGDeadness CD;
205
0
  CD.processFunction(F, DT);
206
0
  Verify(F, DT, CD);
207
0
  return PreservedAnalyses::all();
208
0
}
209
} // namespace llvm
210
211
namespace {
212
213
struct SafepointIRVerifier : public FunctionPass {
214
  static char ID; // Pass identification, replacement for typeid
215
0
  SafepointIRVerifier() : FunctionPass(ID) {
216
0
    initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry());
217
0
  }
218
219
0
  bool runOnFunction(Function &F) override {
220
0
    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
221
0
    CFGDeadness CD;
222
0
    CD.processFunction(F, DT);
223
0
    Verify(F, DT, CD);
224
0
    return false; // no modifications
225
0
  }
226
227
0
  void getAnalysisUsage(AnalysisUsage &AU) const override {
228
0
    AU.addRequiredID(DominatorTreeWrapperPass::ID);
229
0
    AU.setPreservesAll();
230
0
  }
231
232
0
  StringRef getPassName() const override { return "safepoint verifier"; }
233
};
234
} // namespace
235
236
0
void llvm::verifySafepointIR(Function &F) {
237
0
  SafepointIRVerifier pass;
238
0
  pass.runOnFunction(F);
239
0
}
240
241
char SafepointIRVerifier::ID = 0;
242
243
0
FunctionPass *llvm::createSafepointIRVerifierPass() {
244
0
  return new SafepointIRVerifier();
245
0
}
246
247
0
INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
248
0
                      "Safepoint IR Verifier", false, false)
249
0
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
250
0
INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
251
                    "Safepoint IR Verifier", false, false)
252
253
0
static bool isGCPointerType(Type *T) {
254
0
  if (auto *PT = dyn_cast<PointerType>(T))
255
    // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
256
    // GC managed heap.  We know that a pointer into this heap needs to be
257
    // updated and that no other pointer does.
258
0
    return (1 == PT->getAddressSpace());
259
0
  return false;
260
0
}
261
262
0
static bool containsGCPtrType(Type *Ty) {
263
0
  if (isGCPointerType(Ty))
264
0
    return true;
265
0
  if (VectorType *VT = dyn_cast<VectorType>(Ty))
266
0
    return isGCPointerType(VT->getScalarType());
267
0
  if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
268
0
    return containsGCPtrType(AT->getElementType());
269
0
  if (StructType *ST = dyn_cast<StructType>(Ty))
270
0
    return llvm::any_of(ST->elements(), containsGCPtrType);
271
0
  return false;
272
0
}
273
274
// Debugging aid -- prints a [Begin, End) range of values.
275
template<typename IteratorTy>
276
0
static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
277
0
  OS << "[ ";
278
0
  while (Begin != End) {
279
0
    OS << **Begin << " ";
280
0
    ++Begin;
281
0
  }
282
0
  OS << "]";
283
0
}
Unexecuted instantiation: SafepointIRVerifier.cpp:void PrintValueSet<llvm::detail::DenseSetImpl<llvm::Value const*, llvm::DenseMap<llvm::Value const*, llvm::detail::DenseSetEmpty, llvm::DenseMapInfo<llvm::Value const*, void>, llvm::detail::DenseSetPair<llvm::Value const*> >, llvm::DenseMapInfo<llvm::Value const*, void> >::ConstIterator>(llvm::raw_ostream&, llvm::detail::DenseSetImpl<llvm::Value const*, llvm::DenseMap<llvm::Value const*, llvm::detail::DenseSetEmpty, llvm::DenseMapInfo<llvm::Value const*, void>, llvm::detail::DenseSetPair<llvm::Value const*> >, llvm::DenseMapInfo<llvm::Value const*, void> >::ConstIterator, llvm::detail::DenseSetImpl<llvm::Value const*, llvm::DenseMap<llvm::Value const*, llvm::detail::DenseSetEmpty, llvm::DenseMapInfo<llvm::Value const*, void>, llvm::detail::DenseSetPair<llvm::Value const*> >, llvm::DenseMapInfo<llvm::Value const*, void> >::ConstIterator)
Unexecuted instantiation: SafepointIRVerifier.cpp:void PrintValueSet<llvm::detail::DenseSetImpl<llvm::Value const*, llvm::DenseMap<llvm::Value const*, llvm::detail::DenseSetEmpty, llvm::DenseMapInfo<llvm::Value const*, void>, llvm::detail::DenseSetPair<llvm::Value const*> >, llvm::DenseMapInfo<llvm::Value const*, void> >::Iterator>(llvm::raw_ostream&, llvm::detail::DenseSetImpl<llvm::Value const*, llvm::DenseMap<llvm::Value const*, llvm::detail::DenseSetEmpty, llvm::DenseMapInfo<llvm::Value const*, void>, llvm::detail::DenseSetPair<llvm::Value const*> >, llvm::DenseMapInfo<llvm::Value const*, void> >::Iterator, llvm::detail::DenseSetImpl<llvm::Value const*, llvm::DenseMap<llvm::Value const*, llvm::detail::DenseSetEmpty, llvm::DenseMapInfo<llvm::Value const*, void>, llvm::detail::DenseSetPair<llvm::Value const*> >, llvm::DenseMapInfo<llvm::Value const*, void> >::Iterator)
284
285
/// The verifier algorithm is phrased in terms of availability.  The set of
286
/// values "available" at a given point in the control flow graph is the set of
287
/// correctly relocated value at that point, and is a subset of the set of
288
/// definitions dominating that point.
289
290
using AvailableValueSet = DenseSet<const Value *>;
291
292
/// State we compute and track per basic block.
293
struct BasicBlockState {
294
  // Set of values available coming in, before the phi nodes
295
  AvailableValueSet AvailableIn;
296
297
  // Set of values available going out
298
  AvailableValueSet AvailableOut;
299
300
  // AvailableOut minus AvailableIn.
301
  // All elements are Instructions
302
  AvailableValueSet Contribution;
303
304
  // True if this block contains a safepoint and thus AvailableIn does not
305
  // contribute to AvailableOut.
306
  bool Cleared = false;
307
};
308
309
/// A given derived pointer can have multiple base pointers through phi/selects.
310
/// This type indicates when the base pointer is exclusively constant
311
/// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
312
/// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
313
/// NonConstant.
314
enum BaseType {
315
  NonConstant = 1, // Base pointers is not exclusively constant.
316
  ExclusivelyNull,
317
  ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
318
                          // set of constants, but they are not exclusively
319
                          // null.
320
};
321
322
/// Return the baseType for Val which states whether Val is exclusively
323
/// derived from constant/null, or not exclusively derived from constant.
324
/// Val is exclusively derived off a constant base when all operands of phi and
325
/// selects are derived off a constant base.
326
0
static enum BaseType getBaseType(const Value *Val) {
327
328
0
  SmallVector<const Value *, 32> Worklist;
329
0
  DenseSet<const Value *> Visited;
330
0
  bool isExclusivelyDerivedFromNull = true;
331
0
  Worklist.push_back(Val);
332
  // Strip through all the bitcasts and geps to get base pointer. Also check for
333
  // the exclusive value when there can be multiple base pointers (through phis
334
  // or selects).
335
0
  while(!Worklist.empty()) {
336
0
    const Value *V = Worklist.pop_back_val();
337
0
    if (!Visited.insert(V).second)
338
0
      continue;
339
340
0
    if (const auto *CI = dyn_cast<CastInst>(V)) {
341
0
      Worklist.push_back(CI->stripPointerCasts());
342
0
      continue;
343
0
    }
344
0
    if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
345
0
      Worklist.push_back(GEP->getPointerOperand());
346
0
      continue;
347
0
    }
348
    // Push all the incoming values of phi node into the worklist for
349
    // processing.
350
0
    if (const auto *PN = dyn_cast<PHINode>(V)) {
351
0
      append_range(Worklist, PN->incoming_values());
352
0
      continue;
353
0
    }
354
0
    if (const auto *SI = dyn_cast<SelectInst>(V)) {
355
      // Push in the true and false values
356
0
      Worklist.push_back(SI->getTrueValue());
357
0
      Worklist.push_back(SI->getFalseValue());
358
0
      continue;
359
0
    }
360
0
    if (const auto *GCRelocate = dyn_cast<GCRelocateInst>(V)) {
361
      // GCRelocates do not change null-ness or constant-ness of the value.
362
      // So we can continue with derived pointer this instruction relocates.
363
0
      Worklist.push_back(GCRelocate->getDerivedPtr());
364
0
      continue;
365
0
    }
366
0
    if (const auto *FI = dyn_cast<FreezeInst>(V)) {
367
      // Freeze does not change null-ness or constant-ness of the value.
368
0
      Worklist.push_back(FI->getOperand(0));
369
0
      continue;
370
0
    }
371
0
    if (isa<Constant>(V)) {
372
      // We found at least one base pointer which is non-null, so this derived
373
      // pointer is not exclusively derived from null.
374
0
      if (V != Constant::getNullValue(V->getType()))
375
0
        isExclusivelyDerivedFromNull = false;
376
      // Continue processing the remaining values to make sure it's exclusively
377
      // constant.
378
0
      continue;
379
0
    }
380
    // At this point, we know that the base pointer is not exclusively
381
    // constant.
382
0
    return BaseType::NonConstant;
383
0
  }
384
  // Now, we know that the base pointer is exclusively constant, but we need to
385
  // differentiate between exclusive null constant and non-null constant.
386
0
  return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
387
0
                                      : BaseType::ExclusivelySomeConstant;
388
0
}
389
390
0
static bool isNotExclusivelyConstantDerived(const Value *V) {
391
0
  return getBaseType(V) == BaseType::NonConstant;
392
0
}
393
394
namespace {
395
class InstructionVerifier;
396
397
/// Builds BasicBlockState for each BB of the function.
398
/// It can traverse function for verification and provides all required
399
/// information.
400
///
401
/// GC pointer may be in one of three states: relocated, unrelocated and
402
/// poisoned.
403
/// Relocated pointer may be used without any restrictions.
404
/// Unrelocated pointer cannot be dereferenced, passed as argument to any call
405
/// or returned. Unrelocated pointer may be safely compared against another
406
/// unrelocated pointer or against a pointer exclusively derived from null.
407
/// Poisoned pointers are produced when we somehow derive pointer from relocated
408
/// and unrelocated pointers (e.g. phi, select). This pointers may be safely
409
/// used in a very limited number of situations. Currently the only way to use
410
/// it is comparison against constant exclusively derived from null. All
411
/// limitations arise due to their undefined state: this pointers should be
412
/// treated as relocated and unrelocated simultaneously.
413
/// Rules of deriving:
414
/// R + U = P - that's where the poisoned pointers come from
415
/// P + X = P
416
/// U + U = U
417
/// R + R = R
418
/// X + C = X
419
/// Where "+" - any operation that somehow derive pointer, U - unrelocated,
420
/// R - relocated and P - poisoned, C - constant, X - U or R or P or C or
421
/// nothing (in case when "+" is unary operation).
422
/// Deriving of pointers by itself is always safe.
423
/// NOTE: when we are making decision on the status of instruction's result:
424
/// a) for phi we need to check status of each input *at the end of
425
///    corresponding predecessor BB*.
426
/// b) for other instructions we need to check status of each input *at the
427
///    current point*.
428
///
429
/// FIXME: This works fairly well except one case
430
///     bb1:
431
///     p = *some GC-ptr def*
432
///     p1 = gep p, offset
433
///         /     |
434
///        /      |
435
///    bb2:       |
436
///    safepoint  |
437
///        \      |
438
///         \     |
439
///      bb3:
440
///      p2 = phi [p, bb2] [p1, bb1]
441
///      p3 = phi [p, bb2] [p, bb1]
442
///      here p and p1 is unrelocated
443
///           p2 and p3 is poisoned (though they shouldn't be)
444
///
445
/// This leads to some weird results:
446
///      cmp eq p, p2 - illegal instruction (false-positive)
447
///      cmp eq p1, p2 - illegal instruction (false-positive)
448
///      cmp eq p, p3 - illegal instruction (false-positive)
449
///      cmp eq p, p1 - ok
450
/// To fix this we need to introduce conception of generations and be able to
451
/// check if two values belong to one generation or not. This way p2 will be
452
/// considered to be unrelocated and no false alarm will happen.
453
class GCPtrTracker {
454
  const Function &F;
455
  const CFGDeadness &CD;
456
  SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
457
  DenseMap<const BasicBlock *, BasicBlockState *> BlockMap;
458
  // This set contains defs of unrelocated pointers that are proved to be legal
459
  // and don't need verification.
460
  DenseSet<const Instruction *> ValidUnrelocatedDefs;
461
  // This set contains poisoned defs. They can be safely ignored during
462
  // verification too.
463
  DenseSet<const Value *> PoisonedDefs;
464
465
public:
466
  GCPtrTracker(const Function &F, const DominatorTree &DT,
467
               const CFGDeadness &CD);
468
469
0
  bool hasLiveIncomingEdge(const PHINode *PN, const BasicBlock *InBB) const {
470
0
    return CD.hasLiveIncomingEdge(PN, InBB);
471
0
  }
472
473
  BasicBlockState *getBasicBlockState(const BasicBlock *BB);
474
  const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const;
475
476
0
  bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); }
477
478
  /// Traverse each BB of the function and call
479
  /// InstructionVerifier::verifyInstruction for each possibly invalid
480
  /// instruction.
481
  /// It destructively modifies GCPtrTracker so it's passed via rvalue reference
482
  /// in order to prohibit further usages of GCPtrTracker as it'll be in
483
  /// inconsistent state.
484
  static void verifyFunction(GCPtrTracker &&Tracker,
485
                             InstructionVerifier &Verifier);
486
487
  /// Returns true for reachable and live blocks.
488
0
  bool isMapped(const BasicBlock *BB) const { return BlockMap.contains(BB); }
489
490
private:
491
  /// Returns true if the instruction may be safely skipped during verification.
492
  bool instructionMayBeSkipped(const Instruction *I) const;
493
494
  /// Iterates over all BBs from BlockMap and recalculates AvailableIn/Out for
495
  /// each of them until it converges.
496
  void recalculateBBsStates();
497
498
  /// Remove from Contribution all defs that legally produce unrelocated
499
  /// pointers and saves them to ValidUnrelocatedDefs.
500
  /// Though Contribution should belong to BBS it is passed separately with
501
  /// different const-modifier in order to emphasize (and guarantee) that only
502
  /// Contribution will be changed.
503
  /// Returns true if Contribution was changed otherwise false.
504
  bool removeValidUnrelocatedDefs(const BasicBlock *BB,
505
                                  const BasicBlockState *BBS,
506
                                  AvailableValueSet &Contribution);
507
508
  /// Gather all the definitions dominating the start of BB into Result. This is
509
  /// simply the defs introduced by every dominating basic block and the
510
  /// function arguments.
511
  void gatherDominatingDefs(const BasicBlock *BB, AvailableValueSet &Result,
512
                            const DominatorTree &DT);
513
514
  /// Compute the AvailableOut set for BB, based on the BasicBlockState BBS,
515
  /// which is the BasicBlockState for BB.
516
  /// ContributionChanged is set when the verifier runs for the first time
517
  /// (in this case Contribution was changed from 'empty' to its initial state)
518
  /// or when Contribution of this BB was changed since last computation.
519
  static void transferBlock(const BasicBlock *BB, BasicBlockState &BBS,
520
                            bool ContributionChanged);
521
522
  /// Model the effect of an instruction on the set of available values.
523
  static void transferInstruction(const Instruction &I, bool &Cleared,
524
                                  AvailableValueSet &Available);
525
};
526
527
/// It is a visitor for GCPtrTracker::verifyFunction. It decides if the
528
/// instruction (which uses heap reference) is legal or not, given our safepoint
529
/// semantics.
530
class InstructionVerifier {
531
  bool AnyInvalidUses = false;
532
533
public:
534
  void verifyInstruction(const GCPtrTracker *Tracker, const Instruction &I,
535
                         const AvailableValueSet &AvailableSet);
536
537
0
  bool hasAnyInvalidUses() const { return AnyInvalidUses; }
538
539
private:
540
  void reportInvalidUse(const Value &V, const Instruction &I);
541
};
542
} // end anonymous namespace
543
544
GCPtrTracker::GCPtrTracker(const Function &F, const DominatorTree &DT,
545
0
                           const CFGDeadness &CD) : F(F), CD(CD) {
546
  // Calculate Contribution of each live BB.
547
  // Allocate BB states for live blocks.
548
0
  for (const BasicBlock &BB : F)
549
0
    if (!CD.isDeadBlock(&BB)) {
550
0
      BasicBlockState *BBS = new (BSAllocator.Allocate()) BasicBlockState;
551
0
      for (const auto &I : BB)
552
0
        transferInstruction(I, BBS->Cleared, BBS->Contribution);
553
0
      BlockMap[&BB] = BBS;
554
0
    }
555
556
  // Initialize AvailableIn/Out sets of each BB using only information about
557
  // dominating BBs.
558
0
  for (auto &BBI : BlockMap) {
559
0
    gatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT);
560
0
    transferBlock(BBI.first, *BBI.second, true);
561
0
  }
562
563
  // Simulate the flow of defs through the CFG and recalculate AvailableIn/Out
564
  // sets of each BB until it converges. If any def is proved to be an
565
  // unrelocated pointer, it will be removed from all BBSs.
566
0
  recalculateBBsStates();
567
0
}
568
569
0
BasicBlockState *GCPtrTracker::getBasicBlockState(const BasicBlock *BB) {
570
0
  return BlockMap.lookup(BB);
571
0
}
572
573
const BasicBlockState *GCPtrTracker::getBasicBlockState(
574
0
    const BasicBlock *BB) const {
575
0
  return const_cast<GCPtrTracker *>(this)->getBasicBlockState(BB);
576
0
}
577
578
0
bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const {
579
  // Poisoned defs are skipped since they are always safe by itself by
580
  // definition (for details see comment to this class).
581
0
  return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I);
582
0
}
583
584
void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker,
585
0
                                  InstructionVerifier &Verifier) {
586
  // We need RPO here to a) report always the first error b) report errors in
587
  // same order from run to run.
588
0
  ReversePostOrderTraversal<const Function *> RPOT(&Tracker.F);
589
0
  for (const BasicBlock *BB : RPOT) {
590
0
    BasicBlockState *BBS = Tracker.getBasicBlockState(BB);
591
0
    if (!BBS)
592
0
      continue;
593
594
    // We destructively modify AvailableIn as we traverse the block instruction
595
    // by instruction.
596
0
    AvailableValueSet &AvailableSet = BBS->AvailableIn;
597
0
    for (const Instruction &I : *BB) {
598
0
      if (Tracker.instructionMayBeSkipped(&I))
599
0
        continue; // This instruction shouldn't be added to AvailableSet.
600
601
0
      Verifier.verifyInstruction(&Tracker, I, AvailableSet);
602
603
      // Model the effect of current instruction on AvailableSet to keep the set
604
      // relevant at each point of BB.
605
0
      bool Cleared = false;
606
0
      transferInstruction(I, Cleared, AvailableSet);
607
0
      (void)Cleared;
608
0
    }
609
0
  }
610
0
}
611
612
0
void GCPtrTracker::recalculateBBsStates() {
613
0
  SetVector<const BasicBlock *> Worklist;
614
  // TODO: This order is suboptimal, it's better to replace it with priority
615
  // queue where priority is RPO number of BB.
616
0
  for (auto &BBI : BlockMap)
617
0
    Worklist.insert(BBI.first);
618
619
  // This loop iterates the AvailableIn/Out sets until it converges.
620
  // The AvailableIn and AvailableOut sets decrease as we iterate.
621
0
  while (!Worklist.empty()) {
622
0
    const BasicBlock *BB = Worklist.pop_back_val();
623
0
    BasicBlockState *BBS = getBasicBlockState(BB);
624
0
    if (!BBS)
625
0
      continue; // Ignore dead successors.
626
627
0
    size_t OldInCount = BBS->AvailableIn.size();
628
0
    for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
629
0
      const BasicBlock *PBB = *PredIt;
630
0
      BasicBlockState *PBBS = getBasicBlockState(PBB);
631
0
      if (PBBS && !CD.isDeadEdge(&CFGDeadness::getEdge(PredIt)))
632
0
        set_intersect(BBS->AvailableIn, PBBS->AvailableOut);
633
0
    }
634
635
0
    assert(OldInCount >= BBS->AvailableIn.size() && "invariant!");
636
637
0
    bool InputsChanged = OldInCount != BBS->AvailableIn.size();
638
0
    bool ContributionChanged =
639
0
        removeValidUnrelocatedDefs(BB, BBS, BBS->Contribution);
640
0
    if (!InputsChanged && !ContributionChanged)
641
0
      continue;
642
643
0
    size_t OldOutCount = BBS->AvailableOut.size();
644
0
    transferBlock(BB, *BBS, ContributionChanged);
645
0
    if (OldOutCount != BBS->AvailableOut.size()) {
646
0
      assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
647
0
      Worklist.insert(succ_begin(BB), succ_end(BB));
648
0
    }
649
0
  }
650
0
}
651
652
bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB,
653
                                              const BasicBlockState *BBS,
654
0
                                              AvailableValueSet &Contribution) {
655
0
  assert(&BBS->Contribution == &Contribution &&
656
0
         "Passed Contribution should be from the passed BasicBlockState!");
657
0
  AvailableValueSet AvailableSet = BBS->AvailableIn;
658
0
  bool ContributionChanged = false;
659
  // For explanation why instructions are processed this way see
660
  // "Rules of deriving" in the comment to this class.
661
0
  for (const Instruction &I : *BB) {
662
0
    bool ValidUnrelocatedPointerDef = false;
663
0
    bool PoisonedPointerDef = false;
664
    // TODO: `select` instructions should be handled here too.
665
0
    if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
666
0
      if (containsGCPtrType(PN->getType())) {
667
        // If both is true, output is poisoned.
668
0
        bool HasRelocatedInputs = false;
669
0
        bool HasUnrelocatedInputs = false;
670
0
        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
671
0
          const BasicBlock *InBB = PN->getIncomingBlock(i);
672
0
          if (!isMapped(InBB) ||
673
0
              !CD.hasLiveIncomingEdge(PN, InBB))
674
0
            continue; // Skip dead block or dead edge.
675
676
0
          const Value *InValue = PN->getIncomingValue(i);
677
678
0
          if (isNotExclusivelyConstantDerived(InValue)) {
679
0
            if (isValuePoisoned(InValue)) {
680
              // If any of inputs is poisoned, output is always poisoned too.
681
0
              HasRelocatedInputs = true;
682
0
              HasUnrelocatedInputs = true;
683
0
              break;
684
0
            }
685
0
            if (BlockMap[InBB]->AvailableOut.count(InValue))
686
0
              HasRelocatedInputs = true;
687
0
            else
688
0
              HasUnrelocatedInputs = true;
689
0
          }
690
0
        }
691
0
        if (HasUnrelocatedInputs) {
692
0
          if (HasRelocatedInputs)
693
0
            PoisonedPointerDef = true;
694
0
          else
695
0
            ValidUnrelocatedPointerDef = true;
696
0
        }
697
0
      }
698
0
    } else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
699
0
               containsGCPtrType(I.getType())) {
700
      // GEP/bitcast of unrelocated pointer is legal by itself but this def
701
      // shouldn't appear in any AvailableSet.
702
0
      for (const Value *V : I.operands())
703
0
        if (containsGCPtrType(V->getType()) &&
704
0
            isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) {
705
0
          if (isValuePoisoned(V))
706
0
            PoisonedPointerDef = true;
707
0
          else
708
0
            ValidUnrelocatedPointerDef = true;
709
0
          break;
710
0
        }
711
0
    }
712
0
    assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) &&
713
0
           "Value cannot be both unrelocated and poisoned!");
714
0
    if (ValidUnrelocatedPointerDef) {
715
      // Remove def of unrelocated pointer from Contribution of this BB and
716
      // trigger update of all its successors.
717
0
      Contribution.erase(&I);
718
0
      PoisonedDefs.erase(&I);
719
0
      ValidUnrelocatedDefs.insert(&I);
720
0
      LLVM_DEBUG(dbgs() << "Removing urelocated " << I
721
0
                        << " from Contribution of " << BB->getName() << "\n");
722
0
      ContributionChanged = true;
723
0
    } else if (PoisonedPointerDef) {
724
      // Mark pointer as poisoned, remove its def from Contribution and trigger
725
      // update of all successors.
726
0
      Contribution.erase(&I);
727
0
      PoisonedDefs.insert(&I);
728
0
      LLVM_DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of "
729
0
                        << BB->getName() << "\n");
730
0
      ContributionChanged = true;
731
0
    } else {
732
0
      bool Cleared = false;
733
0
      transferInstruction(I, Cleared, AvailableSet);
734
0
      (void)Cleared;
735
0
    }
736
0
  }
737
0
  return ContributionChanged;
738
0
}
739
740
void GCPtrTracker::gatherDominatingDefs(const BasicBlock *BB,
741
                                        AvailableValueSet &Result,
742
0
                                        const DominatorTree &DT) {
743
0
  DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
744
745
0
  assert(DTN && "Unreachable blocks are ignored");
746
0
  while (DTN->getIDom()) {
747
0
    DTN = DTN->getIDom();
748
0
    auto BBS = getBasicBlockState(DTN->getBlock());
749
0
    assert(BBS && "immediate dominator cannot be dead for a live block");
750
0
    const auto &Defs = BBS->Contribution;
751
0
    Result.insert(Defs.begin(), Defs.end());
752
    // If this block is 'Cleared', then nothing LiveIn to this block can be
753
    // available after this block completes.  Note: This turns out to be
754
    // really important for reducing memory consuption of the initial available
755
    // sets and thus peak memory usage by this verifier.
756
0
    if (BBS->Cleared)
757
0
      return;
758
0
  }
759
760
0
  for (const Argument &A : BB->getParent()->args())
761
0
    if (containsGCPtrType(A.getType()))
762
0
      Result.insert(&A);
763
0
}
764
765
void GCPtrTracker::transferBlock(const BasicBlock *BB, BasicBlockState &BBS,
766
0
                                 bool ContributionChanged) {
767
0
  const AvailableValueSet &AvailableIn = BBS.AvailableIn;
768
0
  AvailableValueSet &AvailableOut = BBS.AvailableOut;
769
770
0
  if (BBS.Cleared) {
771
    // AvailableOut will change only when Contribution changed.
772
0
    if (ContributionChanged)
773
0
      AvailableOut = BBS.Contribution;
774
0
  } else {
775
    // Otherwise, we need to reduce the AvailableOut set by things which are no
776
    // longer in our AvailableIn
777
0
    AvailableValueSet Temp = BBS.Contribution;
778
0
    set_union(Temp, AvailableIn);
779
0
    AvailableOut = std::move(Temp);
780
0
  }
781
782
0
  LLVM_DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
783
0
             PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
784
0
             dbgs() << " to ";
785
0
             PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
786
0
             dbgs() << "\n";);
787
0
}
788
789
void GCPtrTracker::transferInstruction(const Instruction &I, bool &Cleared,
790
0
                                       AvailableValueSet &Available) {
791
0
  if (isa<GCStatepointInst>(I)) {
792
0
    Cleared = true;
793
0
    Available.clear();
794
0
  } else if (containsGCPtrType(I.getType()))
795
0
    Available.insert(&I);
796
0
}
797
798
void InstructionVerifier::verifyInstruction(
799
    const GCPtrTracker *Tracker, const Instruction &I,
800
0
    const AvailableValueSet &AvailableSet) {
801
0
  if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
802
0
    if (containsGCPtrType(PN->getType()))
803
0
      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
804
0
        const BasicBlock *InBB = PN->getIncomingBlock(i);
805
0
        const BasicBlockState *InBBS = Tracker->getBasicBlockState(InBB);
806
0
        if (!InBBS ||
807
0
            !Tracker->hasLiveIncomingEdge(PN, InBB))
808
0
          continue; // Skip dead block or dead edge.
809
810
0
        const Value *InValue = PN->getIncomingValue(i);
811
812
0
        if (isNotExclusivelyConstantDerived(InValue) &&
813
0
            !InBBS->AvailableOut.count(InValue))
814
0
          reportInvalidUse(*InValue, *PN);
815
0
      }
816
0
  } else if (isa<CmpInst>(I) &&
817
0
             containsGCPtrType(I.getOperand(0)->getType())) {
818
0
    Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
819
0
    enum BaseType baseTyLHS = getBaseType(LHS),
820
0
                  baseTyRHS = getBaseType(RHS);
821
822
    // Returns true if LHS and RHS are unrelocated pointers and they are
823
    // valid unrelocated uses.
824
0
    auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS,
825
0
                                   &LHS, &RHS] () {
826
        // A cmp instruction has valid unrelocated pointer operands only if
827
        // both operands are unrelocated pointers.
828
        // In the comparison between two pointers, if one is an unrelocated
829
        // use, the other *should be* an unrelocated use, for this
830
        // instruction to contain valid unrelocated uses. This unrelocated
831
        // use can be a null constant as well, or another unrelocated
832
        // pointer.
833
0
        if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
834
0
          return false;
835
        // Constant pointers (that are not exclusively null) may have
836
        // meaning in different VMs, so we cannot reorder the compare
837
        // against constant pointers before the safepoint. In other words,
838
        // comparison of an unrelocated use against a non-null constant
839
        // maybe invalid.
840
0
        if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
841
0
             baseTyRHS == BaseType::NonConstant) ||
842
0
            (baseTyLHS == BaseType::NonConstant &&
843
0
             baseTyRHS == BaseType::ExclusivelySomeConstant))
844
0
          return false;
845
846
        // If one of pointers is poisoned and other is not exclusively derived
847
        // from null it is an invalid expression: it produces poisoned result
848
        // and unless we want to track all defs (not only gc pointers) the only
849
        // option is to prohibit such instructions.
850
0
        if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) ||
851
0
            (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull))
852
0
            return false;
853
854
        // All other cases are valid cases enumerated below:
855
        // 1. Comparison between an exclusively derived null pointer and a
856
        // constant base pointer.
857
        // 2. Comparison between an exclusively derived null pointer and a
858
        // non-constant unrelocated base pointer.
859
        // 3. Comparison between 2 unrelocated pointers.
860
        // 4. Comparison between a pointer exclusively derived from null and a
861
        // non-constant poisoned pointer.
862
0
        return true;
863
0
    };
864
0
    if (!hasValidUnrelocatedUse()) {
865
      // Print out all non-constant derived pointers that are unrelocated
866
      // uses, which are invalid.
867
0
      if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
868
0
        reportInvalidUse(*LHS, I);
869
0
      if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
870
0
        reportInvalidUse(*RHS, I);
871
0
    }
872
0
  } else {
873
0
    for (const Value *V : I.operands())
874
0
      if (containsGCPtrType(V->getType()) &&
875
0
          isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
876
0
        reportInvalidUse(*V, I);
877
0
  }
878
0
}
879
880
void InstructionVerifier::reportInvalidUse(const Value &V,
881
0
                                           const Instruction &I) {
882
0
  errs() << "Illegal use of unrelocated value found!\n";
883
0
  errs() << "Def: " << V << "\n";
884
0
  errs() << "Use: " << I << "\n";
885
0
  if (!PrintOnly)
886
0
    abort();
887
0
  AnyInvalidUses = true;
888
0
}
889
890
static void Verify(const Function &F, const DominatorTree &DT,
891
0
                   const CFGDeadness &CD) {
892
0
  LLVM_DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName()
893
0
                    << "\n");
894
0
  if (PrintOnly)
895
0
    dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
896
897
0
  GCPtrTracker Tracker(F, DT, CD);
898
899
  // We now have all the information we need to decide if the use of a heap
900
  // reference is legal or not, given our safepoint semantics.
901
902
0
  InstructionVerifier Verifier;
903
0
  GCPtrTracker::verifyFunction(std::move(Tracker), Verifier);
904
905
0
  if (PrintOnly && !Verifier.hasAnyInvalidUses()) {
906
0
    dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()
907
0
           << "\n";
908
0
  }
909
0
}