/src/llvm-project/llvm/lib/IR/StructuralHash.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "llvm/IR/StructuralHash.h" |
10 | | #include "llvm/ADT/Hashing.h" |
11 | | #include "llvm/IR/Function.h" |
12 | | #include "llvm/IR/GlobalVariable.h" |
13 | | #include "llvm/IR/InstrTypes.h" |
14 | | #include "llvm/IR/Instructions.h" |
15 | | #include "llvm/IR/IntrinsicInst.h" |
16 | | #include "llvm/IR/Module.h" |
17 | | |
18 | | using namespace llvm; |
19 | | |
20 | | namespace { |
21 | | |
22 | | // Basic hashing mechanism to detect structural change to the IR, used to verify |
23 | | // pass return status consistency with actual change. In addition to being used |
24 | | // by the MergeFunctions pass. |
25 | | |
26 | | class StructuralHashImpl { |
27 | | uint64_t Hash; |
28 | | |
29 | 0 | void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } |
30 | | |
31 | | // This will produce different values on 32-bit and 64-bit systens as |
32 | | // hash_combine returns a size_t. However, this is only used for |
33 | | // detailed hashing which, in-tree, only needs to distinguish between |
34 | | // differences in functions. |
35 | 0 | template <typename T> void hashArbitaryType(const T &V) { |
36 | 0 | hash(hash_combine(V)); |
37 | 0 | } Unexecuted instantiation: StructuralHash.cpp:void (anonymous namespace)::StructuralHashImpl::hashArbitaryType<llvm::APInt>(llvm::APInt const&) Unexecuted instantiation: StructuralHash.cpp:void (anonymous namespace)::StructuralHashImpl::hashArbitaryType<llvm::APFloat>(llvm::APFloat const&) Unexecuted instantiation: StructuralHash.cpp:void (anonymous namespace)::StructuralHashImpl::hashArbitaryType<llvm::StringRef>(llvm::StringRef const&) |
38 | | |
39 | 0 | void hashType(Type *ValueType) { |
40 | 0 | hash(ValueType->getTypeID()); |
41 | 0 | if (ValueType->isIntegerTy()) |
42 | 0 | hash(ValueType->getIntegerBitWidth()); |
43 | 0 | } |
44 | | |
45 | | public: |
46 | 0 | StructuralHashImpl() : Hash(4) {} |
47 | | |
48 | 0 | void updateOperand(Value *Operand) { |
49 | 0 | hashType(Operand->getType()); |
50 | | |
51 | | // The cases enumerated below are not exhaustive and are only aimed to |
52 | | // get decent coverage over the function. |
53 | 0 | if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) { |
54 | 0 | hashArbitaryType(ConstInt->getValue()); |
55 | 0 | } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) { |
56 | 0 | hashArbitaryType(ConstFP->getValue()); |
57 | 0 | } else if (Argument *Arg = dyn_cast<Argument>(Operand)) { |
58 | 0 | hash(Arg->getArgNo()); |
59 | 0 | } else if (Function *Func = dyn_cast<Function>(Operand)) { |
60 | | // Hashing the name will be deterministic as LLVM's hashing infrastructure |
61 | | // has explicit support for hashing strings and will not simply hash |
62 | | // the pointer. |
63 | 0 | hashArbitaryType(Func->getName()); |
64 | 0 | } |
65 | 0 | } |
66 | | |
67 | 0 | void updateInstruction(const Instruction &Inst, bool DetailedHash) { |
68 | 0 | hash(Inst.getOpcode()); |
69 | |
|
70 | 0 | if (!DetailedHash) |
71 | 0 | return; |
72 | | |
73 | 0 | hashType(Inst.getType()); |
74 | | |
75 | | // Handle additional properties of specific instructions that cause |
76 | | // semantic differences in the IR. |
77 | 0 | if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) |
78 | 0 | hash(ComparisonInstruction->getPredicate()); |
79 | |
|
80 | 0 | for (const auto &Op : Inst.operands()) |
81 | 0 | updateOperand(Op); |
82 | 0 | } |
83 | | |
84 | | // A function hash is calculated by considering only the number of arguments |
85 | | // and whether a function is varargs, the order of basic blocks (given by the |
86 | | // successors of each basic block in depth first order), and the order of |
87 | | // opcodes of each instruction within each of these basic blocks. This mirrors |
88 | | // the strategy FunctionComparator::compare() uses to compare functions by |
89 | | // walking the BBs in depth first order and comparing each instruction in |
90 | | // sequence. Because this hash currently does not look at the operands, it is |
91 | | // insensitive to things such as the target of calls and the constants used in |
92 | | // the function, which makes it useful when possibly merging functions which |
93 | | // are the same modulo constants and call targets. |
94 | | // |
95 | | // Note that different users of StructuralHash will want different behavior |
96 | | // out of it (i.e., MergeFunctions will want something different from PM |
97 | | // expensive checks for pass modification status). When modifying this |
98 | | // function, most changes should be gated behind an option and enabled |
99 | | // selectively. |
100 | 0 | void update(const Function &F, bool DetailedHash) { |
101 | | // Declarations don't affect analyses. |
102 | 0 | if (F.isDeclaration()) |
103 | 0 | return; |
104 | | |
105 | 0 | hash(0x62642d6b6b2d6b72); // Function header |
106 | |
|
107 | 0 | hash(F.isVarArg()); |
108 | 0 | hash(F.arg_size()); |
109 | |
|
110 | 0 | SmallVector<const BasicBlock *, 8> BBs; |
111 | 0 | SmallPtrSet<const BasicBlock *, 16> VisitedBBs; |
112 | | |
113 | | // Walk the blocks in the same order as |
114 | | // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the |
115 | | // function "structure." (BB and opcode sequence) |
116 | 0 | BBs.push_back(&F.getEntryBlock()); |
117 | 0 | VisitedBBs.insert(BBs[0]); |
118 | 0 | while (!BBs.empty()) { |
119 | 0 | const BasicBlock *BB = BBs.pop_back_val(); |
120 | | |
121 | | // This random value acts as a block header, as otherwise the partition of |
122 | | // opcodes into BBs wouldn't affect the hash, only the order of the |
123 | | // opcodes |
124 | 0 | hash(45798); |
125 | 0 | for (auto &Inst : *BB) |
126 | 0 | updateInstruction(Inst, DetailedHash); |
127 | |
|
128 | 0 | for (const BasicBlock *Succ : successors(BB)) |
129 | 0 | if (VisitedBBs.insert(Succ).second) |
130 | 0 | BBs.push_back(Succ); |
131 | 0 | } |
132 | 0 | } |
133 | | |
134 | 0 | void update(const GlobalVariable &GV) { |
135 | | // Declarations and used/compiler.used don't affect analyses. |
136 | | // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, |
137 | | // we ignore anything with the `.llvm` prefix |
138 | 0 | if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) |
139 | 0 | return; |
140 | 0 | hash(23456); // Global header |
141 | 0 | hash(GV.getValueType()->getTypeID()); |
142 | 0 | } |
143 | | |
144 | 0 | void update(const Module &M, bool DetailedHash) { |
145 | 0 | for (const GlobalVariable &GV : M.globals()) |
146 | 0 | update(GV); |
147 | 0 | for (const Function &F : M) |
148 | 0 | update(F, DetailedHash); |
149 | 0 | } |
150 | | |
151 | 0 | uint64_t getHash() const { return Hash; } |
152 | | }; |
153 | | |
154 | | } // namespace |
155 | | |
156 | 0 | IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { |
157 | 0 | StructuralHashImpl H; |
158 | 0 | H.update(F, DetailedHash); |
159 | 0 | return H.getHash(); |
160 | 0 | } |
161 | | |
162 | 0 | IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { |
163 | 0 | StructuralHashImpl H; |
164 | 0 | H.update(M, DetailedHash); |
165 | 0 | return H.getHash(); |
166 | 0 | } |