Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/IR/StructuralHash.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "llvm/IR/StructuralHash.h"
10
#include "llvm/ADT/Hashing.h"
11
#include "llvm/IR/Function.h"
12
#include "llvm/IR/GlobalVariable.h"
13
#include "llvm/IR/InstrTypes.h"
14
#include "llvm/IR/Instructions.h"
15
#include "llvm/IR/IntrinsicInst.h"
16
#include "llvm/IR/Module.h"
17
18
using namespace llvm;
19
20
namespace {
21
22
// Basic hashing mechanism to detect structural change to the IR, used to verify
23
// pass return status consistency with actual change. In addition to being used
24
// by the MergeFunctions pass.
25
26
class StructuralHashImpl {
27
  uint64_t Hash;
28
29
0
  void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
30
31
  // This will produce different values on 32-bit and 64-bit systens as
32
  // hash_combine returns a size_t. However, this is only used for
33
  // detailed hashing which, in-tree, only needs to distinguish between
34
  // differences in functions.
35
0
  template <typename T> void hashArbitaryType(const T &V) {
36
0
    hash(hash_combine(V));
37
0
  }
Unexecuted instantiation: StructuralHash.cpp:void (anonymous namespace)::StructuralHashImpl::hashArbitaryType<llvm::APInt>(llvm::APInt const&)
Unexecuted instantiation: StructuralHash.cpp:void (anonymous namespace)::StructuralHashImpl::hashArbitaryType<llvm::APFloat>(llvm::APFloat const&)
Unexecuted instantiation: StructuralHash.cpp:void (anonymous namespace)::StructuralHashImpl::hashArbitaryType<llvm::StringRef>(llvm::StringRef const&)
38
39
0
  void hashType(Type *ValueType) {
40
0
    hash(ValueType->getTypeID());
41
0
    if (ValueType->isIntegerTy())
42
0
      hash(ValueType->getIntegerBitWidth());
43
0
  }
44
45
public:
46
0
  StructuralHashImpl() : Hash(4) {}
47
48
0
  void updateOperand(Value *Operand) {
49
0
    hashType(Operand->getType());
50
51
    // The cases enumerated below are not exhaustive and are only aimed to
52
    // get decent coverage over the function.
53
0
    if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
54
0
      hashArbitaryType(ConstInt->getValue());
55
0
    } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
56
0
      hashArbitaryType(ConstFP->getValue());
57
0
    } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
58
0
      hash(Arg->getArgNo());
59
0
    } else if (Function *Func = dyn_cast<Function>(Operand)) {
60
      // Hashing the name will be deterministic as LLVM's hashing infrastructure
61
      // has explicit support for hashing strings and will not simply hash
62
      // the pointer.
63
0
      hashArbitaryType(Func->getName());
64
0
    }
65
0
  }
66
67
0
  void updateInstruction(const Instruction &Inst, bool DetailedHash) {
68
0
    hash(Inst.getOpcode());
69
70
0
    if (!DetailedHash)
71
0
      return;
72
73
0
    hashType(Inst.getType());
74
75
    // Handle additional properties of specific instructions that cause
76
    // semantic differences in the IR.
77
0
    if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
78
0
      hash(ComparisonInstruction->getPredicate());
79
80
0
    for (const auto &Op : Inst.operands())
81
0
      updateOperand(Op);
82
0
  }
83
84
  // A function hash is calculated by considering only the number of arguments
85
  // and whether a function is varargs, the order of basic blocks (given by the
86
  // successors of each basic block in depth first order), and the order of
87
  // opcodes of each instruction within each of these basic blocks. This mirrors
88
  // the strategy FunctionComparator::compare() uses to compare functions by
89
  // walking the BBs in depth first order and comparing each instruction in
90
  // sequence. Because this hash currently does not look at the operands, it is
91
  // insensitive to things such as the target of calls and the constants used in
92
  // the function, which makes it useful when possibly merging functions which
93
  // are the same modulo constants and call targets.
94
  //
95
  // Note that different users of StructuralHash will want different behavior
96
  // out of it (i.e., MergeFunctions will want something different from PM
97
  // expensive checks for pass modification status). When modifying this
98
  // function, most changes should be gated behind an option and enabled
99
  // selectively.
100
0
  void update(const Function &F, bool DetailedHash) {
101
    // Declarations don't affect analyses.
102
0
    if (F.isDeclaration())
103
0
      return;
104
105
0
    hash(0x62642d6b6b2d6b72); // Function header
106
107
0
    hash(F.isVarArg());
108
0
    hash(F.arg_size());
109
110
0
    SmallVector<const BasicBlock *, 8> BBs;
111
0
    SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
112
113
    // Walk the blocks in the same order as
114
    // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
115
    // function "structure." (BB and opcode sequence)
116
0
    BBs.push_back(&F.getEntryBlock());
117
0
    VisitedBBs.insert(BBs[0]);
118
0
    while (!BBs.empty()) {
119
0
      const BasicBlock *BB = BBs.pop_back_val();
120
121
      // This random value acts as a block header, as otherwise the partition of
122
      // opcodes into BBs wouldn't affect the hash, only the order of the
123
      // opcodes
124
0
      hash(45798);
125
0
      for (auto &Inst : *BB)
126
0
        updateInstruction(Inst, DetailedHash);
127
128
0
      for (const BasicBlock *Succ : successors(BB))
129
0
        if (VisitedBBs.insert(Succ).second)
130
0
          BBs.push_back(Succ);
131
0
    }
132
0
  }
133
134
0
  void update(const GlobalVariable &GV) {
135
    // Declarations and used/compiler.used don't affect analyses.
136
    // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
137
    // we ignore anything with the `.llvm` prefix
138
0
    if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
139
0
      return;
140
0
    hash(23456); // Global header
141
0
    hash(GV.getValueType()->getTypeID());
142
0
  }
143
144
0
  void update(const Module &M, bool DetailedHash) {
145
0
    for (const GlobalVariable &GV : M.globals())
146
0
      update(GV);
147
0
    for (const Function &F : M)
148
0
      update(F, DetailedHash);
149
0
  }
150
151
0
  uint64_t getHash() const { return Hash; }
152
};
153
154
} // namespace
155
156
0
IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
157
0
  StructuralHashImpl H;
158
0
  H.update(F, DetailedHash);
159
0
  return H.getHash();
160
0
}
161
162
0
IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
163
0
  StructuralHashImpl H;
164
0
  H.update(M, DetailedHash);
165
0
  return H.getHash();
166
0
}