/src/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements an analysis that determines, for a given memory |
10 | | // operation, what preceding memory operations it depends on. It builds on |
11 | | // alias analysis information, and tries to provide a lazy, caching interface to |
12 | | // a common kind of alias information query. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "llvm/Analysis/MemoryDependenceAnalysis.h" |
17 | | #include "llvm/ADT/DenseMap.h" |
18 | | #include "llvm/ADT/STLExtras.h" |
19 | | #include "llvm/ADT/SmallPtrSet.h" |
20 | | #include "llvm/ADT/SmallVector.h" |
21 | | #include "llvm/ADT/Statistic.h" |
22 | | #include "llvm/Analysis/AliasAnalysis.h" |
23 | | #include "llvm/Analysis/AssumptionCache.h" |
24 | | #include "llvm/Analysis/MemoryBuiltins.h" |
25 | | #include "llvm/Analysis/MemoryLocation.h" |
26 | | #include "llvm/Analysis/PHITransAddr.h" |
27 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
28 | | #include "llvm/Analysis/ValueTracking.h" |
29 | | #include "llvm/IR/BasicBlock.h" |
30 | | #include "llvm/IR/Dominators.h" |
31 | | #include "llvm/IR/Function.h" |
32 | | #include "llvm/IR/InstrTypes.h" |
33 | | #include "llvm/IR/Instruction.h" |
34 | | #include "llvm/IR/Instructions.h" |
35 | | #include "llvm/IR/IntrinsicInst.h" |
36 | | #include "llvm/IR/LLVMContext.h" |
37 | | #include "llvm/IR/Metadata.h" |
38 | | #include "llvm/IR/Module.h" |
39 | | #include "llvm/IR/PredIteratorCache.h" |
40 | | #include "llvm/IR/Type.h" |
41 | | #include "llvm/IR/Use.h" |
42 | | #include "llvm/IR/Value.h" |
43 | | #include "llvm/InitializePasses.h" |
44 | | #include "llvm/Pass.h" |
45 | | #include "llvm/Support/AtomicOrdering.h" |
46 | | #include "llvm/Support/Casting.h" |
47 | | #include "llvm/Support/CommandLine.h" |
48 | | #include "llvm/Support/Compiler.h" |
49 | | #include "llvm/Support/Debug.h" |
50 | | #include <algorithm> |
51 | | #include <cassert> |
52 | | #include <iterator> |
53 | | #include <utility> |
54 | | |
55 | | using namespace llvm; |
56 | | |
57 | | #define DEBUG_TYPE "memdep" |
58 | | |
59 | | STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); |
60 | | STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); |
61 | | STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); |
62 | | |
63 | | STATISTIC(NumCacheNonLocalPtr, |
64 | | "Number of fully cached non-local ptr responses"); |
65 | | STATISTIC(NumCacheDirtyNonLocalPtr, |
66 | | "Number of cached, but dirty, non-local ptr responses"); |
67 | | STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses"); |
68 | | STATISTIC(NumCacheCompleteNonLocalPtr, |
69 | | "Number of block queries that were completely cached"); |
70 | | |
71 | | // Limit for the number of instructions to scan in a block. |
72 | | |
73 | | static cl::opt<unsigned> BlockScanLimit( |
74 | | "memdep-block-scan-limit", cl::Hidden, cl::init(100), |
75 | | cl::desc("The number of instructions to scan in a block in memory " |
76 | | "dependency analysis (default = 100)")); |
77 | | |
78 | | static cl::opt<unsigned> |
79 | | BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), |
80 | | cl::desc("The number of blocks to scan during memory " |
81 | | "dependency analysis (default = 200)")); |
82 | | |
83 | | // Limit on the number of memdep results to process. |
84 | | static const unsigned int NumResultsLimit = 100; |
85 | | |
86 | | /// This is a helper function that removes Val from 'Inst's set in ReverseMap. |
87 | | /// |
88 | | /// If the set becomes empty, remove Inst's entry. |
89 | | template <typename KeyTy> |
90 | | static void |
91 | | RemoveFromReverseMap(DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>> &ReverseMap, |
92 | 24.2k | Instruction *Inst, KeyTy Val) { |
93 | 24.2k | typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt = |
94 | 24.2k | ReverseMap.find(Inst); |
95 | 24.2k | assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); |
96 | 0 | bool Found = InstIt->second.erase(Val); |
97 | 24.2k | assert(Found && "Invalid reverse map!"); |
98 | 0 | (void)Found; |
99 | 24.2k | if (InstIt->second.empty()) |
100 | 19.3k | ReverseMap.erase(InstIt); |
101 | 24.2k | } MemoryDependenceAnalysis.cpp:void RemoveFromReverseMap<llvm::Instruction*>(llvm::DenseMap<llvm::Instruction*, llvm::SmallPtrSet<llvm::Instruction*, 4u>, llvm::DenseMapInfo<llvm::Instruction*, void>, llvm::detail::DenseMapPair<llvm::Instruction*, llvm::SmallPtrSet<llvm::Instruction*, 4u> > >&, llvm::Instruction*, llvm::Instruction*) Line | Count | Source | 92 | 10.2k | Instruction *Inst, KeyTy Val) { | 93 | 10.2k | typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt = | 94 | 10.2k | ReverseMap.find(Inst); | 95 | 10.2k | assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); | 96 | 0 | bool Found = InstIt->second.erase(Val); | 97 | 10.2k | assert(Found && "Invalid reverse map!"); | 98 | 0 | (void)Found; | 99 | 10.2k | if (InstIt->second.empty()) | 100 | 10.1k | ReverseMap.erase(InstIt); | 101 | 10.2k | } |
MemoryDependenceAnalysis.cpp:void RemoveFromReverseMap<llvm::PointerIntPair<llvm::Value const*, 1u, bool, llvm::PointerLikeTypeTraits<llvm::Value const*>, llvm::PointerIntPairInfo<llvm::Value const*, 1u, llvm::PointerLikeTypeTraits<llvm::Value const*> > > >(llvm::DenseMap<llvm::Instruction*, llvm::SmallPtrSet<llvm::PointerIntPair<llvm::Value const*, 1u, bool, llvm::PointerLikeTypeTraits<llvm::Value const*>, llvm::PointerIntPairInfo<llvm::Value const*, 1u, llvm::PointerLikeTypeTraits<llvm::Value const*> > >, 4u>, llvm::DenseMapInfo<llvm::Instruction*, void>, llvm::detail::DenseMapPair<llvm::Instruction*, llvm::SmallPtrSet<llvm::PointerIntPair<llvm::Value const*, 1u, bool, llvm::PointerLikeTypeTraits<llvm::Value const*>, llvm::PointerIntPairInfo<llvm::Value const*, 1u, llvm::PointerLikeTypeTraits<llvm::Value const*> > >, 4u> > >&, llvm::Instruction*, llvm::PointerIntPair<llvm::Value const*, 1u, bool, llvm::PointerLikeTypeTraits<llvm::Value const*>, llvm::PointerIntPairInfo<llvm::Value const*, 1u, llvm::PointerLikeTypeTraits<llvm::Value const*> > >) Line | Count | Source | 92 | 14.0k | Instruction *Inst, KeyTy Val) { | 93 | 14.0k | typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt = | 94 | 14.0k | ReverseMap.find(Inst); | 95 | 14.0k | assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); | 96 | 0 | bool Found = InstIt->second.erase(Val); | 97 | 14.0k | assert(Found && "Invalid reverse map!"); | 98 | 0 | (void)Found; | 99 | 14.0k | if (InstIt->second.empty()) | 100 | 9.16k | ReverseMap.erase(InstIt); | 101 | 14.0k | } |
MemoryDependenceAnalysis.cpp:void RemoveFromReverseMap<llvm::Value const*>(llvm::DenseMap<llvm::Instruction*, llvm::SmallPtrSet<llvm::Value const*, 4u>, llvm::DenseMapInfo<llvm::Instruction*, void>, llvm::detail::DenseMapPair<llvm::Instruction*, llvm::SmallPtrSet<llvm::Value const*, 4u> > >&, llvm::Instruction*, llvm::Value const*) Line | Count | Source | 92 | 1 | Instruction *Inst, KeyTy Val) { | 93 | 1 | typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt = | 94 | 1 | ReverseMap.find(Inst); | 95 | 1 | assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); | 96 | 0 | bool Found = InstIt->second.erase(Val); | 97 | 1 | assert(Found && "Invalid reverse map!"); | 98 | 0 | (void)Found; | 99 | 1 | if (InstIt->second.empty()) | 100 | 1 | ReverseMap.erase(InstIt); | 101 | 1 | } |
|
102 | | |
103 | | /// If the given instruction references a specific memory location, fill in Loc |
104 | | /// with the details, otherwise set Loc.Ptr to null. |
105 | | /// |
106 | | /// Returns a ModRefInfo value describing the general behavior of the |
107 | | /// instruction. |
108 | | static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, |
109 | 25.9k | const TargetLibraryInfo &TLI) { |
110 | 25.9k | if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { |
111 | 25.4k | if (LI->isUnordered()) { |
112 | 25.4k | Loc = MemoryLocation::get(LI); |
113 | 25.4k | return ModRefInfo::Ref; |
114 | 25.4k | } |
115 | 0 | if (LI->getOrdering() == AtomicOrdering::Monotonic) { |
116 | 0 | Loc = MemoryLocation::get(LI); |
117 | 0 | return ModRefInfo::ModRef; |
118 | 0 | } |
119 | 0 | Loc = MemoryLocation(); |
120 | 0 | return ModRefInfo::ModRef; |
121 | 0 | } |
122 | | |
123 | 501 | if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { |
124 | 76 | if (SI->isUnordered()) { |
125 | 76 | Loc = MemoryLocation::get(SI); |
126 | 76 | return ModRefInfo::Mod; |
127 | 76 | } |
128 | 0 | if (SI->getOrdering() == AtomicOrdering::Monotonic) { |
129 | 0 | Loc = MemoryLocation::get(SI); |
130 | 0 | return ModRefInfo::ModRef; |
131 | 0 | } |
132 | 0 | Loc = MemoryLocation(); |
133 | 0 | return ModRefInfo::ModRef; |
134 | 0 | } |
135 | | |
136 | 425 | if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { |
137 | 0 | Loc = MemoryLocation::get(V); |
138 | 0 | return ModRefInfo::ModRef; |
139 | 0 | } |
140 | | |
141 | 425 | if (const CallBase *CB = dyn_cast<CallBase>(Inst)) { |
142 | 222 | if (Value *FreedOp = getFreedOperand(CB, &TLI)) { |
143 | | // calls to free() deallocate the entire structure |
144 | 0 | Loc = MemoryLocation::getAfter(FreedOp); |
145 | 0 | return ModRefInfo::Mod; |
146 | 0 | } |
147 | 222 | } |
148 | | |
149 | 425 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { |
150 | 81 | switch (II->getIntrinsicID()) { |
151 | 0 | case Intrinsic::lifetime_start: |
152 | 0 | case Intrinsic::lifetime_end: |
153 | 0 | case Intrinsic::invariant_start: |
154 | 0 | Loc = MemoryLocation::getForArgument(II, 1, TLI); |
155 | | // These intrinsics don't really modify the memory, but returning Mod |
156 | | // will allow them to be handled conservatively. |
157 | 0 | return ModRefInfo::Mod; |
158 | 0 | case Intrinsic::invariant_end: |
159 | 0 | Loc = MemoryLocation::getForArgument(II, 2, TLI); |
160 | | // These intrinsics don't really modify the memory, but returning Mod |
161 | | // will allow them to be handled conservatively. |
162 | 0 | return ModRefInfo::Mod; |
163 | 36 | case Intrinsic::masked_load: |
164 | 36 | Loc = MemoryLocation::getForArgument(II, 0, TLI); |
165 | 36 | return ModRefInfo::Ref; |
166 | 45 | case Intrinsic::masked_store: |
167 | 45 | Loc = MemoryLocation::getForArgument(II, 1, TLI); |
168 | 45 | return ModRefInfo::Mod; |
169 | 0 | default: |
170 | 0 | break; |
171 | 81 | } |
172 | 81 | } |
173 | | |
174 | | // Otherwise, just do the coarse-grained thing that always works. |
175 | 344 | if (Inst->mayWriteToMemory()) |
176 | 0 | return ModRefInfo::ModRef; |
177 | 344 | if (Inst->mayReadFromMemory()) |
178 | 141 | return ModRefInfo::Ref; |
179 | 203 | return ModRefInfo::NoModRef; |
180 | 344 | } |
181 | | |
182 | | /// Private helper for finding the local dependencies of a call site. |
183 | | MemDepResult MemoryDependenceResults::getCallDependencyFrom( |
184 | | CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt, |
185 | 174 | BasicBlock *BB) { |
186 | 174 | unsigned Limit = getDefaultBlockScanLimit(); |
187 | | |
188 | | // Walk backwards through the block, looking for dependencies. |
189 | 442 | while (ScanIt != BB->begin()) { |
190 | 372 | Instruction *Inst = &*--ScanIt; |
191 | | // Debug intrinsics don't cause dependences and should not affect Limit |
192 | 372 | if (isa<DbgInfoIntrinsic>(Inst)) |
193 | 0 | continue; |
194 | | |
195 | | // Limit the amount of scanning we do so we don't end up with quadratic |
196 | | // running time on extreme testcases. |
197 | 372 | --Limit; |
198 | 372 | if (!Limit) |
199 | 0 | return MemDepResult::getUnknown(); |
200 | | |
201 | | // If this inst is a memory op, get the pointer it accessed |
202 | 372 | MemoryLocation Loc; |
203 | 372 | ModRefInfo MR = GetLocation(Inst, Loc, TLI); |
204 | 372 | if (Loc.Ptr) { |
205 | | // A simple instruction. |
206 | 110 | if (isModOrRefSet(AA.getModRefInfo(Call, Loc))) |
207 | 51 | return MemDepResult::getClobber(Inst); |
208 | 59 | continue; |
209 | 110 | } |
210 | | |
211 | 262 | if (auto *CallB = dyn_cast<CallBase>(Inst)) { |
212 | | // If these two calls do not interfere, look past it. |
213 | 59 | if (isNoModRef(AA.getModRefInfo(Call, CallB))) { |
214 | | // If the two calls are the same, return Inst as a Def, so that |
215 | | // Call can be found redundant and eliminated. |
216 | 59 | if (isReadOnlyCall && !isModSet(MR) && |
217 | 59 | Call->isIdenticalToWhenDefined(CallB)) |
218 | 53 | return MemDepResult::getDef(Inst); |
219 | | |
220 | | // Otherwise if the two calls don't interact (e.g. CallB is readnone) |
221 | | // keep scanning. |
222 | 6 | continue; |
223 | 59 | } else |
224 | 0 | return MemDepResult::getClobber(Inst); |
225 | 59 | } |
226 | | |
227 | | // If we could not obtain a pointer for the instruction and the instruction |
228 | | // touches memory then assume that this is a dependency. |
229 | 203 | if (isModOrRefSet(MR)) |
230 | 0 | return MemDepResult::getClobber(Inst); |
231 | 203 | } |
232 | | |
233 | | // No dependence found. If this is the entry block of the function, it is |
234 | | // unknown, otherwise it is non-local. |
235 | 70 | if (BB != &BB->getParent()->getEntryBlock()) |
236 | 69 | return MemDepResult::getNonLocal(); |
237 | 1 | return MemDepResult::getNonFuncLocal(); |
238 | 70 | } |
239 | | |
240 | | MemDepResult MemoryDependenceResults::getPointerDependencyFrom( |
241 | | const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, |
242 | | BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, |
243 | 115k | BatchAAResults &BatchAA) { |
244 | 115k | MemDepResult InvariantGroupDependency = MemDepResult::getUnknown(); |
245 | 115k | if (QueryInst != nullptr) { |
246 | 115k | if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { |
247 | 115k | InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB); |
248 | | |
249 | 115k | if (InvariantGroupDependency.isDef()) |
250 | 93 | return InvariantGroupDependency; |
251 | 115k | } |
252 | 115k | } |
253 | 114k | MemDepResult SimpleDep = getSimplePointerDependencyFrom( |
254 | 114k | MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA); |
255 | 114k | if (SimpleDep.isDef()) |
256 | 22.2k | return SimpleDep; |
257 | | // Non-local invariant group dependency indicates there is non local Def |
258 | | // (it only returns nonLocal if it finds nonLocal def), which is better than |
259 | | // local clobber and everything else. |
260 | 92.7k | if (InvariantGroupDependency.isNonLocal()) |
261 | 25 | return InvariantGroupDependency; |
262 | | |
263 | 92.7k | assert(InvariantGroupDependency.isUnknown() && |
264 | 92.7k | "InvariantGroupDependency should be only unknown at this point"); |
265 | 0 | return SimpleDep; |
266 | 92.7k | } |
267 | | |
268 | | MemDepResult MemoryDependenceResults::getPointerDependencyFrom( |
269 | | const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, |
270 | 25.4k | BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { |
271 | 25.4k | BatchAAResults BatchAA(AA, &EII); |
272 | 25.4k | return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, |
273 | 25.4k | BatchAA); |
274 | 25.4k | } |
275 | | |
276 | | MemDepResult |
277 | | MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, |
278 | 115k | BasicBlock *BB) { |
279 | | |
280 | 115k | if (!LI->hasMetadata(LLVMContext::MD_invariant_group)) |
281 | 114k | return MemDepResult::getUnknown(); |
282 | | |
283 | | // Take the ptr operand after all casts and geps 0. This way we can search |
284 | | // cast graph down only. |
285 | 260 | Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts(); |
286 | | |
287 | | // It's is not safe to walk the use list of global value, because function |
288 | | // passes aren't allowed to look outside their functions. |
289 | | // FIXME: this could be fixed by filtering instructions from outside |
290 | | // of current function. |
291 | 260 | if (isa<GlobalValue>(LoadOperand)) |
292 | 16 | return MemDepResult::getUnknown(); |
293 | | |
294 | | // Queue to process all pointers that are equivalent to load operand. |
295 | 244 | SmallVector<const Value *, 8> LoadOperandsQueue; |
296 | 244 | LoadOperandsQueue.push_back(LoadOperand); |
297 | | |
298 | 244 | Instruction *ClosestDependency = nullptr; |
299 | | // Order of instructions in uses list is unpredictible. In order to always |
300 | | // get the same result, we will look for the closest dominance. |
301 | 244 | auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) { |
302 | 121 | assert(Other && "Must call it with not null instruction"); |
303 | 121 | if (Best == nullptr || DT.dominates(Best, Other)) |
304 | 119 | return Other; |
305 | 2 | return Best; |
306 | 121 | }; |
307 | | |
308 | | // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case |
309 | | // we will see all the instructions. This should be fixed in MSSA. |
310 | 488 | while (!LoadOperandsQueue.empty()) { |
311 | 244 | const Value *Ptr = LoadOperandsQueue.pop_back_val(); |
312 | 244 | assert(Ptr && !isa<GlobalValue>(Ptr) && |
313 | 244 | "Null or GlobalValue should not be inserted"); |
314 | | |
315 | 1.45k | for (const Use &Us : Ptr->uses()) { |
316 | 1.45k | auto *U = dyn_cast<Instruction>(Us.getUser()); |
317 | 1.45k | if (!U || U == LI || !DT.dominates(U, LI)) |
318 | 952 | continue; |
319 | | |
320 | | // Bitcast or gep with zeros are using Ptr. Add to queue to check it's |
321 | | // users. U = bitcast Ptr |
322 | 503 | if (isa<BitCastInst>(U)) { |
323 | 0 | LoadOperandsQueue.push_back(U); |
324 | 0 | continue; |
325 | 0 | } |
326 | | // Gep with zeros is equivalent to bitcast. |
327 | | // FIXME: we are not sure if some bitcast should be canonicalized to gep 0 |
328 | | // or gep 0 to bitcast because of SROA, so there are 2 forms. When |
329 | | // typeless pointers will be ready then both cases will be gone |
330 | | // (and this BFS also won't be needed). |
331 | 503 | if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) |
332 | 103 | if (GEP->hasAllZeroIndices()) { |
333 | 0 | LoadOperandsQueue.push_back(U); |
334 | 0 | continue; |
335 | 0 | } |
336 | | |
337 | | // If we hit load/store with the same invariant.group metadata (and the |
338 | | // same pointer operand) we can assume that value pointed by pointer |
339 | | // operand didn't change. |
340 | 503 | if ((isa<LoadInst>(U) || |
341 | 503 | (isa<StoreInst>(U) && |
342 | 410 | cast<StoreInst>(U)->getPointerOperand() == Ptr)) && |
343 | 503 | U->hasMetadata(LLVMContext::MD_invariant_group)) |
344 | 121 | ClosestDependency = GetClosestDependency(ClosestDependency, U); |
345 | 503 | } |
346 | 244 | } |
347 | | |
348 | 244 | if (!ClosestDependency) |
349 | 125 | return MemDepResult::getUnknown(); |
350 | 119 | if (ClosestDependency->getParent() == BB) |
351 | 93 | return MemDepResult::getDef(ClosestDependency); |
352 | | // Def(U) can't be returned here because it is non-local. If local |
353 | | // dependency won't be found then return nonLocal counting that the |
354 | | // user will call getNonLocalPointerDependency, which will return cached |
355 | | // result. |
356 | 26 | NonLocalDefsCache.try_emplace( |
357 | 26 | LI, NonLocalDepResult(ClosestDependency->getParent(), |
358 | 26 | MemDepResult::getDef(ClosestDependency), nullptr)); |
359 | 26 | ReverseNonLocalDefsCache[ClosestDependency].insert(LI); |
360 | 26 | return MemDepResult::getNonLocal(); |
361 | 119 | } |
362 | | |
363 | | // Check if SI that may alias with MemLoc can be safely skipped. This is |
364 | | // possible in case if SI can only must alias or no alias with MemLoc (no |
365 | | // partial overlapping possible) and it writes the same value that MemLoc |
366 | | // contains now (it was loaded before this store and was not modified in |
367 | | // between). |
368 | | static bool canSkipClobberingStore(const StoreInst *SI, |
369 | | const MemoryLocation &MemLoc, |
370 | | Align MemLocAlign, BatchAAResults &BatchAA, |
371 | 8.21k | unsigned ScanLimit) { |
372 | 8.21k | if (!MemLoc.Size.hasValue()) |
373 | 0 | return false; |
374 | 8.21k | if (MemoryLocation::get(SI).Size != MemLoc.Size) |
375 | 3.64k | return false; |
376 | 4.56k | if (MemLoc.Size.isScalable()) |
377 | 3 | return false; |
378 | 4.56k | if (std::min(MemLocAlign, SI->getAlign()).value() < |
379 | 4.56k | MemLoc.Size.getValue().getKnownMinValue()) |
380 | 549 | return false; |
381 | | |
382 | 4.01k | auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()); |
383 | 4.01k | if (!LI || LI->getParent() != SI->getParent()) |
384 | 3.88k | return false; |
385 | 130 | if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias) |
386 | 113 | return false; |
387 | 17 | unsigned NumVisitedInsts = 0; |
388 | 293 | for (const Instruction *I = LI; I != SI; I = I->getNextNonDebugInstruction()) |
389 | 281 | if (++NumVisitedInsts > ScanLimit || |
390 | 281 | isModSet(BatchAA.getModRefInfo(I, MemLoc))) |
391 | 5 | return false; |
392 | | |
393 | 12 | return true; |
394 | 17 | } |
395 | | |
396 | | MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( |
397 | | const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, |
398 | | BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, |
399 | 114k | BatchAAResults &BatchAA) { |
400 | 114k | bool isInvariantLoad = false; |
401 | 114k | Align MemLocAlign = |
402 | 114k | MemLoc.Ptr->getPointerAlignment(BB->getModule()->getDataLayout()); |
403 | | |
404 | 114k | unsigned DefaultLimit = getDefaultBlockScanLimit(); |
405 | 114k | if (!Limit) |
406 | 114k | Limit = &DefaultLimit; |
407 | | |
408 | | // We must be careful with atomic accesses, as they may allow another thread |
409 | | // to touch this location, clobbering it. We are conservative: if the |
410 | | // QueryInst is not a simple (non-atomic) memory access, we automatically |
411 | | // return getClobber. |
412 | | // If it is simple, we know based on the results of |
413 | | // "Compiler testing via a theory of sound optimisations in the C11/C++11 |
414 | | // memory model" in PLDI 2013, that a non-atomic location can only be |
415 | | // clobbered between a pair of a release and an acquire action, with no |
416 | | // access to the location in between. |
417 | | // Here is an example for giving the general intuition behind this rule. |
418 | | // In the following code: |
419 | | // store x 0; |
420 | | // release action; [1] |
421 | | // acquire action; [4] |
422 | | // %val = load x; |
423 | | // It is unsafe to replace %val by 0 because another thread may be running: |
424 | | // acquire action; [2] |
425 | | // store x 42; |
426 | | // release action; [3] |
427 | | // with synchronization from 1 to 2 and from 3 to 4, resulting in %val |
428 | | // being 42. A key property of this program however is that if either |
429 | | // 1 or 4 were missing, there would be a race between the store of 42 |
430 | | // either the store of 0 or the load (making the whole program racy). |
431 | | // The paper mentioned above shows that the same property is respected |
432 | | // by every program that can detect any optimization of that kind: either |
433 | | // it is racy (undefined) or there is a release followed by an acquire |
434 | | // between the pair of accesses under consideration. |
435 | | |
436 | | // If the load is invariant, we "know" that it doesn't alias *any* write. We |
437 | | // do want to respect mustalias results since defs are useful for value |
438 | | // forwarding, but any mayalias write can be assumed to be noalias. |
439 | | // Arguably, this logic should be pushed inside AliasAnalysis itself. |
440 | 114k | if (isLoad && QueryInst) |
441 | 114k | if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) { |
442 | 114k | if (LI->hasMetadata(LLVMContext::MD_invariant_load)) |
443 | 0 | isInvariantLoad = true; |
444 | 114k | MemLocAlign = LI->getAlign(); |
445 | 114k | } |
446 | | |
447 | | // True for volatile instruction. |
448 | | // For Load/Store return true if atomic ordering is stronger than AO, |
449 | | // for other instruction just true if it can read or write to memory. |
450 | 114k | auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool { |
451 | 21 | if (I->isVolatile()) |
452 | 0 | return true; |
453 | 21 | if (auto *LI = dyn_cast<LoadInst>(I)) |
454 | 21 | return isStrongerThan(LI->getOrdering(), AO); |
455 | 0 | if (auto *SI = dyn_cast<StoreInst>(I)) |
456 | 0 | return isStrongerThan(SI->getOrdering(), AO); |
457 | 0 | return I->mayReadOrWriteMemory(); |
458 | 0 | }; |
459 | | |
460 | | // Walk backwards through the basic block, looking for dependencies. |
461 | 1.01M | while (ScanIt != BB->begin()) { |
462 | 928k | Instruction *Inst = &*--ScanIt; |
463 | | |
464 | 928k | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) |
465 | | // Debug intrinsics don't (and can't) cause dependencies. |
466 | 2.30k | if (isa<DbgInfoIntrinsic>(II)) |
467 | 196 | continue; |
468 | | |
469 | | // Limit the amount of scanning we do so we don't end up with quadratic |
470 | | // running time on extreme testcases. |
471 | 927k | --*Limit; |
472 | 927k | if (!*Limit) |
473 | 134 | return MemDepResult::getUnknown(); |
474 | | |
475 | 927k | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { |
476 | | // If we reach a lifetime begin or end marker, then the query ends here |
477 | | // because the value is undefined. |
478 | 2.10k | Intrinsic::ID ID = II->getIntrinsicID(); |
479 | 2.10k | switch (ID) { |
480 | 160 | case Intrinsic::lifetime_start: { |
481 | | // FIXME: This only considers queries directly on the invariant-tagged |
482 | | // pointer, not on query pointers that are indexed off of them. It'd |
483 | | // be nice to handle that at some point (the right approach is to use |
484 | | // GetPointerBaseWithConstantOffset). |
485 | 160 | MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1)); |
486 | 160 | if (BatchAA.isMustAlias(ArgLoc, MemLoc)) |
487 | 10 | return MemDepResult::getDef(II); |
488 | 150 | continue; |
489 | 160 | } |
490 | 150 | case Intrinsic::masked_load: |
491 | 75 | case Intrinsic::masked_store: { |
492 | 75 | MemoryLocation Loc; |
493 | 75 | /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI); |
494 | 75 | AliasResult R = BatchAA.alias(Loc, MemLoc); |
495 | 75 | if (R == AliasResult::NoAlias) |
496 | 49 | continue; |
497 | 26 | if (R == AliasResult::MustAlias) |
498 | 6 | return MemDepResult::getDef(II); |
499 | 20 | if (ID == Intrinsic::masked_load) |
500 | 3 | continue; |
501 | 17 | return MemDepResult::getClobber(II); |
502 | 20 | } |
503 | 2.10k | } |
504 | 2.10k | } |
505 | | |
506 | | // Values depend on loads if the pointers are must aliased. This means |
507 | | // that a load depends on another must aliased load from the same value. |
508 | | // One exception is atomic loads: a value can depend on an atomic load that |
509 | | // it does not alias with when this atomic load indicates that another |
510 | | // thread may be accessing the location. |
511 | 927k | if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { |
512 | | // While volatile access cannot be eliminated, they do not have to clobber |
513 | | // non-aliasing locations, as normal accesses, for example, can be safely |
514 | | // reordered with volatile accesses. |
515 | 129k | if (LI->isVolatile()) { |
516 | 2 | if (!QueryInst) |
517 | | // Original QueryInst *may* be volatile |
518 | 0 | return MemDepResult::getClobber(LI); |
519 | 2 | if (QueryInst->isVolatile()) |
520 | | // Ordering required if QueryInst is itself volatile |
521 | 0 | return MemDepResult::getClobber(LI); |
522 | | // Otherwise, volatile doesn't imply any special ordering |
523 | 2 | } |
524 | | |
525 | | // Atomic loads have complications involved. |
526 | | // A Monotonic (or higher) load is OK if the query inst is itself not |
527 | | // atomic. |
528 | | // FIXME: This is overly conservative. |
529 | 129k | if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) { |
530 | 18 | if (!QueryInst || |
531 | 18 | isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic)) |
532 | 0 | return MemDepResult::getClobber(LI); |
533 | 18 | if (LI->getOrdering() != AtomicOrdering::Monotonic) |
534 | 18 | return MemDepResult::getClobber(LI); |
535 | 18 | } |
536 | | |
537 | 129k | MemoryLocation LoadLoc = MemoryLocation::get(LI); |
538 | | |
539 | | // If we found a pointer, check if it could be the same as our pointer. |
540 | 129k | AliasResult R = BatchAA.alias(LoadLoc, MemLoc); |
541 | | |
542 | 129k | if (R == AliasResult::NoAlias) |
543 | 70.8k | continue; |
544 | | |
545 | 58.5k | if (isLoad) { |
546 | | // Must aliased loads are defs of each other. |
547 | 58.5k | if (R == AliasResult::MustAlias) |
548 | 10.6k | return MemDepResult::getDef(Inst); |
549 | | |
550 | | // If we have a partial alias, then return this as a clobber for the |
551 | | // client to handle. |
552 | 47.9k | if (R == AliasResult::PartialAlias && R.hasOffset()) { |
553 | 72 | ClobberOffsets[LI] = R.getOffset(); |
554 | 72 | return MemDepResult::getClobber(Inst); |
555 | 72 | } |
556 | | |
557 | | // Random may-alias loads don't depend on each other without a |
558 | | // dependence. |
559 | 47.8k | continue; |
560 | 47.9k | } |
561 | | |
562 | | // Stores don't alias loads from read-only memory. |
563 | 0 | if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc))) |
564 | 0 | continue; |
565 | | |
566 | | // Stores depend on may/must aliased loads. |
567 | 0 | return MemDepResult::getDef(Inst); |
568 | 0 | } |
569 | | |
570 | 798k | if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { |
571 | | // Atomic stores have complications involved. |
572 | | // A Monotonic store is OK if the query inst is itself not atomic. |
573 | | // FIXME: This is overly conservative. |
574 | 252k | if (!SI->isUnordered() && SI->isAtomic()) { |
575 | 3 | if (!QueryInst || |
576 | 3 | isComplexForReordering(QueryInst, AtomicOrdering::Unordered)) |
577 | 0 | return MemDepResult::getClobber(SI); |
578 | | // Ok, if we are here the guard above guarantee us that |
579 | | // QueryInst is a non-atomic or unordered load/store. |
580 | | // SI is atomic with monotonic or release semantic (seq_cst for store |
581 | | // is actually a release semantic plus total order over other seq_cst |
582 | | // instructions, as soon as QueryInst is not seq_cst we can consider it |
583 | | // as simple release semantic). |
584 | | // Monotonic and Release semantic allows re-ordering before store |
585 | | // so we are safe to go further and check the aliasing. It will prohibit |
586 | | // re-ordering in case locations are may or must alias. |
587 | 3 | } |
588 | | |
589 | | // While volatile access cannot be eliminated, they do not have to clobber |
590 | | // non-aliasing locations, as normal accesses can for example be reordered |
591 | | // with volatile accesses. |
592 | 252k | if (SI->isVolatile()) |
593 | 731 | if (!QueryInst || QueryInst->isVolatile()) |
594 | 0 | return MemDepResult::getClobber(SI); |
595 | | |
596 | | // If alias analysis can tell that this store is guaranteed to not modify |
597 | | // the query pointer, ignore it. Use getModRefInfo to handle cases where |
598 | | // the query pointer points to constant memory etc. |
599 | 252k | if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc))) |
600 | 242k | continue; |
601 | | |
602 | | // Ok, this store might clobber the query pointer. Check to see if it is |
603 | | // a must alias: in this case, we want to return this as a def. |
604 | | // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above. |
605 | 10.5k | MemoryLocation StoreLoc = MemoryLocation::get(SI); |
606 | | |
607 | | // If we found a pointer, check if it could be the same as our pointer. |
608 | 10.5k | AliasResult R = BatchAA.alias(StoreLoc, MemLoc); |
609 | | |
610 | 10.5k | if (R == AliasResult::NoAlias) |
611 | 1 | continue; |
612 | 10.5k | if (R == AliasResult::MustAlias) |
613 | 2.36k | return MemDepResult::getDef(Inst); |
614 | 8.21k | if (isInvariantLoad) |
615 | 0 | continue; |
616 | 8.21k | if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit)) |
617 | 12 | continue; |
618 | 8.20k | return MemDepResult::getClobber(Inst); |
619 | 8.21k | } |
620 | | |
621 | | // If this is an allocation, and if we know that the accessed pointer is to |
622 | | // the allocation, return Def. This means that there is no dependence and |
623 | | // the access can be optimized based on that. For example, a load could |
624 | | // turn into undef. Note that we can bypass the allocation itself when |
625 | | // looking for a clobber in many cases; that's an alias property and is |
626 | | // handled by BasicAA. |
627 | 545k | if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) { |
628 | 70.9k | const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr); |
629 | 70.9k | if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr)) |
630 | 9.19k | return MemDepResult::getDef(Inst); |
631 | 70.9k | } |
632 | | |
633 | | // If we found a select instruction for MemLoc pointer, return it as Def |
634 | | // dependency. |
635 | 536k | if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst) |
636 | 39 | return MemDepResult::getDef(Inst); |
637 | | |
638 | 536k | if (isInvariantLoad) |
639 | 0 | continue; |
640 | | |
641 | | // A release fence requires that all stores complete before it, but does |
642 | | // not prevent the reordering of following loads or stores 'before' the |
643 | | // fence. As a result, we look past it when finding a dependency for |
644 | | // loads. DSE uses this to find preceding stores to delete and thus we |
645 | | // can't bypass the fence if the query instruction is a store. |
646 | 536k | if (FenceInst *FI = dyn_cast<FenceInst>(Inst)) |
647 | 0 | if (isLoad && FI->getOrdering() == AtomicOrdering::Release) |
648 | 0 | continue; |
649 | | |
650 | | // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. |
651 | 536k | switch (BatchAA.getModRefInfo(Inst, MemLoc)) { |
652 | 534k | case ModRefInfo::NoModRef: |
653 | | // If the call has no effect on the queried pointer, just ignore it. |
654 | 534k | continue; |
655 | 78 | case ModRefInfo::Mod: |
656 | 78 | return MemDepResult::getClobber(Inst); |
657 | 173 | case ModRefInfo::Ref: |
658 | | // If the call is known to never store to the pointer, and if this is a |
659 | | // load query, we can safely ignore it (scan past it). |
660 | 173 | if (isLoad) |
661 | 173 | continue; |
662 | 173 | [[fallthrough]]; |
663 | 1.15k | default: |
664 | | // Otherwise, there is a potential dependence. Return a clobber. |
665 | 1.15k | return MemDepResult::getClobber(Inst); |
666 | 536k | } |
667 | 536k | } |
668 | | |
669 | | // No dependence found. If this is the entry block of the function, it is |
670 | | // unknown, otherwise it is non-local. |
671 | 83.0k | if (BB != &BB->getParent()->getEntryBlock()) |
672 | 76.3k | return MemDepResult::getNonLocal(); |
673 | 6.74k | return MemDepResult::getNonFuncLocal(); |
674 | 83.0k | } |
675 | | |
676 | 92.4k | MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { |
677 | 92.4k | ClobberOffsets.clear(); |
678 | 92.4k | Instruction *ScanPos = QueryInst; |
679 | | |
680 | | // Check for a cached result |
681 | 92.4k | MemDepResult &LocalCache = LocalDeps[QueryInst]; |
682 | | |
683 | | // If the cached entry is non-dirty, just return it. Note that this depends |
684 | | // on MemDepResult's default constructing to 'dirty'. |
685 | 92.4k | if (!LocalCache.isDirty()) |
686 | 62.4k | return LocalCache; |
687 | | |
688 | | // Otherwise, if we have a dirty entry, we know we can start the scan at that |
689 | | // instruction, which may save us some work. |
690 | 29.9k | if (Instruction *Inst = LocalCache.getInst()) { |
691 | 156 | ScanPos = Inst; |
692 | | |
693 | 156 | RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); |
694 | 156 | } |
695 | | |
696 | 29.9k | BasicBlock *QueryParent = QueryInst->getParent(); |
697 | | |
698 | | // Do the scan. |
699 | 29.9k | if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { |
700 | | // No dependence found. If this is the entry block of the function, it is |
701 | | // unknown, otherwise it is non-local. |
702 | 4.45k | if (QueryParent != &QueryParent->getParent()->getEntryBlock()) |
703 | 2.80k | LocalCache = MemDepResult::getNonLocal(); |
704 | 1.64k | else |
705 | 1.64k | LocalCache = MemDepResult::getNonFuncLocal(); |
706 | 25.5k | } else { |
707 | 25.5k | MemoryLocation MemLoc; |
708 | 25.5k | ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI); |
709 | 25.5k | if (MemLoc.Ptr) { |
710 | | // If we can do a pointer scan, make it happen. |
711 | 25.4k | bool isLoad = !isModSet(MR); |
712 | 25.4k | if (auto *II = dyn_cast<IntrinsicInst>(QueryInst)) |
713 | 3 | isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; |
714 | | |
715 | 25.4k | LocalCache = |
716 | 25.4k | getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(), |
717 | 25.4k | QueryParent, QueryInst, nullptr); |
718 | 25.4k | } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) { |
719 | 82 | bool isReadOnly = AA.onlyReadsMemory(QueryCall); |
720 | 82 | LocalCache = getCallDependencyFrom(QueryCall, isReadOnly, |
721 | 82 | ScanPos->getIterator(), QueryParent); |
722 | 82 | } else |
723 | | // Non-memory instruction. |
724 | 0 | LocalCache = MemDepResult::getUnknown(); |
725 | 25.5k | } |
726 | | |
727 | | // Remember the result! |
728 | 29.9k | if (Instruction *I = LocalCache.getInst()) |
729 | 11.7k | ReverseLocalDeps[I].insert(QueryInst); |
730 | | |
731 | 29.9k | return LocalCache; |
732 | 92.4k | } |
733 | | |
734 | | #ifndef NDEBUG |
735 | | /// This method is used when -debug is specified to verify that cache arrays |
736 | | /// are properly kept sorted. |
737 | | static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, |
738 | 0 | int Count = -1) { |
739 | 0 | if (Count == -1) |
740 | 0 | Count = Cache.size(); |
741 | 0 | assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) && |
742 | 0 | "Cache isn't sorted!"); |
743 | 0 | } |
744 | | #endif |
745 | | |
746 | | const MemoryDependenceResults::NonLocalDepInfo & |
747 | 49 | MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) { |
748 | 49 | assert(getDependency(QueryCall).isNonLocal() && |
749 | 49 | "getNonLocalCallDependency should only be used on calls with " |
750 | 49 | "non-local deps!"); |
751 | 0 | PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall]; |
752 | 49 | NonLocalDepInfo &Cache = CacheP.first; |
753 | | |
754 | | // This is the set of blocks that need to be recomputed. In the cached case, |
755 | | // this can happen due to instructions being deleted etc. In the uncached |
756 | | // case, this starts out as the set of predecessors we care about. |
757 | 49 | SmallVector<BasicBlock *, 32> DirtyBlocks; |
758 | | |
759 | 49 | if (!Cache.empty()) { |
760 | | // Okay, we have a cache entry. If we know it is not dirty, just return it |
761 | | // with no computation. |
762 | 29 | if (!CacheP.second) { |
763 | 11 | ++NumCacheNonLocal; |
764 | 11 | return Cache; |
765 | 11 | } |
766 | | |
767 | | // If we already have a partially computed set of results, scan them to |
768 | | // determine what is dirty, seeding our initial DirtyBlocks worklist. |
769 | 18 | for (auto &Entry : Cache) |
770 | 128 | if (Entry.getResult().isDirty()) |
771 | 10 | DirtyBlocks.push_back(Entry.getBB()); |
772 | | |
773 | | // Sort the cache so that we can do fast binary search lookups below. |
774 | 18 | llvm::sort(Cache); |
775 | | |
776 | 18 | ++NumCacheDirtyNonLocal; |
777 | 20 | } else { |
778 | | // Seed DirtyBlocks with each of the preds of QueryInst's block. |
779 | 20 | BasicBlock *QueryBB = QueryCall->getParent(); |
780 | 20 | append_range(DirtyBlocks, PredCache.get(QueryBB)); |
781 | 20 | ++NumUncacheNonLocal; |
782 | 20 | } |
783 | | |
784 | | // isReadonlyCall - If this is a read-only call, we can be more aggressive. |
785 | 38 | bool isReadonlyCall = AA.onlyReadsMemory(QueryCall); |
786 | | |
787 | 38 | SmallPtrSet<BasicBlock *, 32> Visited; |
788 | | |
789 | 38 | unsigned NumSortedEntries = Cache.size(); |
790 | 38 | LLVM_DEBUG(AssertSorted(Cache)); |
791 | | |
792 | | // Iterate while we still have blocks to update. |
793 | 163 | while (!DirtyBlocks.empty()) { |
794 | 125 | BasicBlock *DirtyBB = DirtyBlocks.pop_back_val(); |
795 | | |
796 | | // Already processed this block? |
797 | 125 | if (!Visited.insert(DirtyBB).second) |
798 | 25 | continue; |
799 | | |
800 | | // Do a binary search to see if we already have an entry for this block in |
801 | | // the cache set. If so, find it. |
802 | 100 | LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries)); |
803 | 100 | NonLocalDepInfo::iterator Entry = |
804 | 100 | std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries, |
805 | 100 | NonLocalDepEntry(DirtyBB)); |
806 | 100 | if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB) |
807 | 13 | --Entry; |
808 | | |
809 | 100 | NonLocalDepEntry *ExistingResult = nullptr; |
810 | 100 | if (Entry != Cache.begin() + NumSortedEntries && |
811 | 100 | Entry->getBB() == DirtyBB) { |
812 | | // If we already have an entry, and if it isn't already dirty, the block |
813 | | // is done. |
814 | 13 | if (!Entry->getResult().isDirty()) |
815 | 3 | continue; |
816 | | |
817 | | // Otherwise, remember this slot so we can update the value. |
818 | 10 | ExistingResult = &*Entry; |
819 | 10 | } |
820 | | |
821 | | // If the dirty entry has a pointer, start scanning from it so we don't have |
822 | | // to rescan the entire block. |
823 | 97 | BasicBlock::iterator ScanPos = DirtyBB->end(); |
824 | 97 | if (ExistingResult) { |
825 | 10 | if (Instruction *Inst = ExistingResult->getResult().getInst()) { |
826 | 10 | ScanPos = Inst->getIterator(); |
827 | | // We're removing QueryInst's use of Inst. |
828 | 10 | RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst, |
829 | 10 | QueryCall); |
830 | 10 | } |
831 | 10 | } |
832 | | |
833 | | // Find out if this block has a local dependency for QueryInst. |
834 | 97 | MemDepResult Dep; |
835 | | |
836 | 97 | if (ScanPos != DirtyBB->begin()) { |
837 | 92 | Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB); |
838 | 92 | } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { |
839 | | // No dependence found. If this is the entry block of the function, it is |
840 | | // a clobber, otherwise it is unknown. |
841 | 5 | Dep = MemDepResult::getNonLocal(); |
842 | 5 | } else { |
843 | 0 | Dep = MemDepResult::getNonFuncLocal(); |
844 | 0 | } |
845 | | |
846 | | // If we had a dirty entry for the block, update it. Otherwise, just add |
847 | | // a new entry. |
848 | 97 | if (ExistingResult) |
849 | 10 | ExistingResult->setResult(Dep); |
850 | 87 | else |
851 | 87 | Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); |
852 | | |
853 | | // If the block has a dependency (i.e. it isn't completely transparent to |
854 | | // the value), remember the association! |
855 | 97 | if (!Dep.isNonLocal()) { |
856 | | // Keep the ReverseNonLocalDeps map up to date so we can efficiently |
857 | | // update this when we remove instructions. |
858 | 40 | if (Instruction *Inst = Dep.getInst()) |
859 | 39 | ReverseNonLocalDeps[Inst].insert(QueryCall); |
860 | 57 | } else { |
861 | | |
862 | | // If the block *is* completely transparent to the load, we need to check |
863 | | // the predecessors of this block. Add them to our worklist. |
864 | 57 | append_range(DirtyBlocks, PredCache.get(DirtyBB)); |
865 | 57 | } |
866 | 97 | } |
867 | | |
868 | 38 | return Cache; |
869 | 49 | } |
870 | | |
871 | | void MemoryDependenceResults::getNonLocalPointerDependency( |
872 | 54.9k | Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { |
873 | 54.9k | const MemoryLocation Loc = MemoryLocation::get(QueryInst); |
874 | 54.9k | bool isLoad = isa<LoadInst>(QueryInst); |
875 | 54.9k | BasicBlock *FromBB = QueryInst->getParent(); |
876 | 54.9k | assert(FromBB); |
877 | | |
878 | 0 | assert(Loc.Ptr->getType()->isPointerTy() && |
879 | 54.9k | "Can't get pointer deps of a non-pointer!"); |
880 | 0 | Result.clear(); |
881 | 54.9k | { |
882 | | // Check if there is cached Def with invariant.group. |
883 | 54.9k | auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst); |
884 | 54.9k | if (NonLocalDefIt != NonLocalDefsCache.end()) { |
885 | 21 | Result.push_back(NonLocalDefIt->second); |
886 | 21 | ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()] |
887 | 21 | .erase(QueryInst); |
888 | 21 | NonLocalDefsCache.erase(NonLocalDefIt); |
889 | 21 | return; |
890 | 21 | } |
891 | 54.9k | } |
892 | | // This routine does not expect to deal with volatile instructions. |
893 | | // Doing so would require piping through the QueryInst all the way through. |
894 | | // TODO: volatiles can't be elided, but they can be reordered with other |
895 | | // non-volatile accesses. |
896 | | |
897 | | // We currently give up on any instruction which is ordered, but we do handle |
898 | | // atomic instructions which are unordered. |
899 | | // TODO: Handle ordered instructions |
900 | 54.9k | auto isOrdered = [](Instruction *Inst) { |
901 | 54.9k | if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { |
902 | 54.9k | return !LI->isUnordered(); |
903 | 54.9k | } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { |
904 | 0 | return !SI->isUnordered(); |
905 | 0 | } |
906 | 0 | return false; |
907 | 54.9k | }; |
908 | 54.9k | if (QueryInst->isVolatile() || isOrdered(QueryInst)) { |
909 | 0 | Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(), |
910 | 0 | const_cast<Value *>(Loc.Ptr))); |
911 | 0 | return; |
912 | 0 | } |
913 | 54.9k | const DataLayout &DL = FromBB->getModule()->getDataLayout(); |
914 | 54.9k | PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC); |
915 | | |
916 | | // This is the set of blocks we've inspected, and the pointer we consider in |
917 | | // each block. Because of critical edges, we currently bail out if querying |
918 | | // a block with multiple different pointers. This can happen during PHI |
919 | | // translation. |
920 | 54.9k | DenseMap<BasicBlock *, Value *> Visited; |
921 | 54.9k | if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, |
922 | 54.9k | Result, Visited, true)) |
923 | 53.1k | return; |
924 | 1.78k | Result.clear(); |
925 | 1.78k | Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(), |
926 | 1.78k | const_cast<Value *>(Loc.Ptr))); |
927 | 1.78k | } |
928 | | |
929 | | /// Compute the memdep value for BB with Pointer/PointeeSize using either |
930 | | /// cached information in Cache or by doing a lookup (which may use dirty cache |
931 | | /// info if available). |
932 | | /// |
933 | | /// If we do a lookup, add the result to the cache. |
934 | | MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock( |
935 | | Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad, |
936 | | BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries, |
937 | 181k | BatchAAResults &BatchAA) { |
938 | | |
939 | 181k | bool isInvariantLoad = false; |
940 | | |
941 | 181k | if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst)) |
942 | 181k | isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load); |
943 | | |
944 | | // Do a binary search to see if we already have an entry for this block in |
945 | | // the cache set. If so, find it. |
946 | 181k | NonLocalDepInfo::iterator Entry = std::upper_bound( |
947 | 181k | Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB)); |
948 | 181k | if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB) |
949 | 92.3k | --Entry; |
950 | | |
951 | 181k | NonLocalDepEntry *ExistingResult = nullptr; |
952 | 181k | if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB) |
953 | 92.3k | ExistingResult = &*Entry; |
954 | | |
955 | | // Use cached result for invariant load only if there is no dependency for non |
956 | | // invariant load. In this case invariant load can not have any dependency as |
957 | | // well. |
958 | 181k | if (ExistingResult && isInvariantLoad && |
959 | 181k | !ExistingResult->getResult().isNonFuncLocal()) |
960 | 0 | ExistingResult = nullptr; |
961 | | |
962 | | // If we have a cached entry, and it is non-dirty, use it as the value for |
963 | | // this dependency. |
964 | 181k | if (ExistingResult && !ExistingResult->getResult().isDirty()) { |
965 | 91.9k | ++NumCacheNonLocalPtr; |
966 | 91.9k | return ExistingResult->getResult(); |
967 | 91.9k | } |
968 | | |
969 | | // Otherwise, we have to scan for the value. If we have a dirty cache |
970 | | // entry, start scanning from its position, otherwise we scan from the end |
971 | | // of the block. |
972 | 89.6k | BasicBlock::iterator ScanPos = BB->end(); |
973 | 89.6k | if (ExistingResult && ExistingResult->getResult().getInst()) { |
974 | 480 | assert(ExistingResult->getResult().getInst()->getParent() == BB && |
975 | 480 | "Instruction invalidated?"); |
976 | 0 | ++NumCacheDirtyNonLocalPtr; |
977 | 480 | ScanPos = ExistingResult->getResult().getInst()->getIterator(); |
978 | | |
979 | | // Eliminating the dirty entry from 'Cache', so update the reverse info. |
980 | 480 | ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); |
981 | 480 | RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey); |
982 | 89.1k | } else { |
983 | 89.1k | ++NumUncacheNonLocalPtr; |
984 | 89.1k | } |
985 | | |
986 | | // Scan the block for the dependency. |
987 | 0 | MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, |
988 | 89.6k | QueryInst, nullptr, BatchAA); |
989 | | |
990 | | // Don't cache results for invariant load. |
991 | 89.6k | if (isInvariantLoad) |
992 | 0 | return Dep; |
993 | | |
994 | | // If we had a dirty entry for the block, update it. Otherwise, just add |
995 | | // a new entry. |
996 | 89.6k | if (ExistingResult) |
997 | 480 | ExistingResult->setResult(Dep); |
998 | 89.1k | else |
999 | 89.1k | Cache->push_back(NonLocalDepEntry(BB, Dep)); |
1000 | | |
1001 | | // If the block has a dependency (i.e. it isn't completely transparent to |
1002 | | // the value), remember the reverse association because we just added it |
1003 | | // to Cache! |
1004 | 89.6k | if (!Dep.isLocal()) |
1005 | 69.5k | return Dep; |
1006 | | |
1007 | | // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently |
1008 | | // update MemDep when we remove instructions. |
1009 | 20.1k | Instruction *Inst = Dep.getInst(); |
1010 | 20.1k | assert(Inst && "Didn't depend on anything?"); |
1011 | 0 | ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); |
1012 | 20.1k | ReverseNonLocalPtrDeps[Inst].insert(CacheKey); |
1013 | 20.1k | return Dep; |
1014 | 89.6k | } |
1015 | | |
1016 | | /// Sort the NonLocalDepInfo cache, given a certain number of elements in the |
1017 | | /// array that are already properly ordered. |
1018 | | /// |
1019 | | /// This is optimized for the case when only a few entries are added. |
1020 | | static void |
1021 | | SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, |
1022 | 80.2k | unsigned NumSortedEntries) { |
1023 | 80.2k | switch (Cache.size() - NumSortedEntries) { |
1024 | 59.6k | case 0: |
1025 | | // done, no new entries. |
1026 | 59.6k | break; |
1027 | 3.42k | case 2: { |
1028 | | // Two new entries, insert the last one into place. |
1029 | 3.42k | NonLocalDepEntry Val = Cache.back(); |
1030 | 3.42k | Cache.pop_back(); |
1031 | 3.42k | MemoryDependenceResults::NonLocalDepInfo::iterator Entry = |
1032 | 3.42k | std::upper_bound(Cache.begin(), Cache.end() - 1, Val); |
1033 | 3.42k | Cache.insert(Entry, Val); |
1034 | 3.42k | [[fallthrough]]; |
1035 | 3.42k | } |
1036 | 13.5k | case 1: |
1037 | | // One new entry, Just insert the new value at the appropriate position. |
1038 | 13.5k | if (Cache.size() != 1) { |
1039 | 8.99k | NonLocalDepEntry Val = Cache.back(); |
1040 | 8.99k | Cache.pop_back(); |
1041 | 8.99k | MemoryDependenceResults::NonLocalDepInfo::iterator Entry = |
1042 | 8.99k | llvm::upper_bound(Cache, Val); |
1043 | 8.99k | Cache.insert(Entry, Val); |
1044 | 8.99k | } |
1045 | 13.5k | break; |
1046 | 7.03k | default: |
1047 | | // Added many values, do a full scale sort. |
1048 | 7.03k | llvm::sort(Cache); |
1049 | 7.03k | break; |
1050 | 80.2k | } |
1051 | 80.2k | } |
1052 | | |
1053 | | /// Perform a dependency query based on pointer/pointeesize starting at the end |
1054 | | /// of StartBB. |
1055 | | /// |
1056 | | /// Add any clobber/def results to the results vector and keep track of which |
1057 | | /// blocks are visited in 'Visited'. |
1058 | | /// |
1059 | | /// This has special behavior for the first block queries (when SkipFirstBlock |
1060 | | /// is true). In this special case, it ignores the contents of the specified |
1061 | | /// block and starts returning dependence info for its predecessors. |
1062 | | /// |
1063 | | /// This function returns true on success, or false to indicate that it could |
1064 | | /// not compute dependence information for some reason. This should be treated |
1065 | | /// as a clobber dependence on the first instruction in the predecessor block. |
1066 | | bool MemoryDependenceResults::getNonLocalPointerDepFromBB( |
1067 | | Instruction *QueryInst, const PHITransAddr &Pointer, |
1068 | | const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB, |
1069 | | SmallVectorImpl<NonLocalDepResult> &Result, |
1070 | | DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock, |
1071 | 92.9k | bool IsIncomplete) { |
1072 | | // Look up the cached info for Pointer. |
1073 | 92.9k | ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); |
1074 | | |
1075 | | // Set up a temporary NLPI value. If the map doesn't yet have an entry for |
1076 | | // CacheKey, this value will be inserted as the associated value. Otherwise, |
1077 | | // it'll be ignored, and we'll have to check to see if the cached size and |
1078 | | // aa tags are consistent with the current query. |
1079 | 92.9k | NonLocalPointerInfo InitialNLPI; |
1080 | 92.9k | InitialNLPI.Size = Loc.Size; |
1081 | 92.9k | InitialNLPI.AATags = Loc.AATags; |
1082 | | |
1083 | 92.9k | bool isInvariantLoad = false; |
1084 | 92.9k | if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst)) |
1085 | 92.9k | isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load); |
1086 | | |
1087 | | // Get the NLPI for CacheKey, inserting one into the map if it doesn't |
1088 | | // already have one. |
1089 | 92.9k | std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = |
1090 | 92.9k | NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); |
1091 | 92.9k | NonLocalPointerInfo *CacheInfo = &Pair.first->second; |
1092 | | |
1093 | | // If we already have a cache entry for this CacheKey, we may need to do some |
1094 | | // work to reconcile the cache entry and the current query. |
1095 | | // Invariant loads don't participate in caching. Thus no need to reconcile. |
1096 | 92.9k | if (!isInvariantLoad && !Pair.second) { |
1097 | 78.6k | if (CacheInfo->Size != Loc.Size) { |
1098 | 2.45k | bool ThrowOutEverything; |
1099 | 2.45k | if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) { |
1100 | | // FIXME: We may be able to do better in the face of results with mixed |
1101 | | // precision. We don't appear to get them in practice, though, so just |
1102 | | // be conservative. |
1103 | 2.45k | ThrowOutEverything = |
1104 | 2.45k | CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() || |
1105 | 2.45k | !TypeSize::isKnownGE(CacheInfo->Size.getValue(), |
1106 | 2.45k | Loc.Size.getValue()); |
1107 | 2.45k | } else { |
1108 | | // For our purposes, unknown size > all others. |
1109 | 0 | ThrowOutEverything = !Loc.Size.hasValue(); |
1110 | 0 | } |
1111 | | |
1112 | 2.45k | if (ThrowOutEverything) { |
1113 | | // The query's Size is greater than the cached one. Throw out the |
1114 | | // cached data and proceed with the query at the greater size. |
1115 | 496 | CacheInfo->Pair = BBSkipFirstBlockPair(); |
1116 | 496 | CacheInfo->Size = Loc.Size; |
1117 | 496 | for (auto &Entry : CacheInfo->NonLocalDeps) |
1118 | 2.79k | if (Instruction *Inst = Entry.getResult().getInst()) |
1119 | 1.03k | RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); |
1120 | 496 | CacheInfo->NonLocalDeps.clear(); |
1121 | | // The cache is cleared (in the above line) so we will have lost |
1122 | | // information about blocks we have already visited. We therefore must |
1123 | | // assume that the cache information is incomplete. |
1124 | 496 | IsIncomplete = true; |
1125 | 1.96k | } else { |
1126 | | // This query's Size is less than the cached one. Conservatively restart |
1127 | | // the query using the greater size. |
1128 | 1.96k | return getNonLocalPointerDepFromBB( |
1129 | 1.96k | QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, |
1130 | 1.96k | StartBB, Result, Visited, SkipFirstBlock, IsIncomplete); |
1131 | 1.96k | } |
1132 | 2.45k | } |
1133 | | |
1134 | | // If the query's AATags are inconsistent with the cached one, |
1135 | | // conservatively throw out the cached data and restart the query with |
1136 | | // no tag if needed. |
1137 | 76.6k | if (CacheInfo->AATags != Loc.AATags) { |
1138 | 8 | if (CacheInfo->AATags) { |
1139 | 0 | CacheInfo->Pair = BBSkipFirstBlockPair(); |
1140 | 0 | CacheInfo->AATags = AAMDNodes(); |
1141 | 0 | for (auto &Entry : CacheInfo->NonLocalDeps) |
1142 | 0 | if (Instruction *Inst = Entry.getResult().getInst()) |
1143 | 0 | RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); |
1144 | 0 | CacheInfo->NonLocalDeps.clear(); |
1145 | | // The cache is cleared (in the above line) so we will have lost |
1146 | | // information about blocks we have already visited. We therefore must |
1147 | | // assume that the cache information is incomplete. |
1148 | 0 | IsIncomplete = true; |
1149 | 0 | } |
1150 | 8 | if (Loc.AATags) |
1151 | 8 | return getNonLocalPointerDepFromBB( |
1152 | 8 | QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result, |
1153 | 8 | Visited, SkipFirstBlock, IsIncomplete); |
1154 | 8 | } |
1155 | 76.6k | } |
1156 | | |
1157 | 91.0k | NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps; |
1158 | | |
1159 | | // If we have valid cached information for exactly the block we are |
1160 | | // investigating, just return it with no recomputation. |
1161 | | // Don't use cached information for invariant loads since it is valid for |
1162 | | // non-invariant loads only. |
1163 | 91.0k | if (!IsIncomplete && !isInvariantLoad && |
1164 | 91.0k | CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { |
1165 | | // We have a fully cached result for this query then we can just return the |
1166 | | // cached results and populate the visited set. However, we have to verify |
1167 | | // that we don't already have conflicting results for these blocks. Check |
1168 | | // to ensure that if a block in the results set is in the visited set that |
1169 | | // it was for the same pointer query. |
1170 | 11.5k | if (!Visited.empty()) { |
1171 | 1.33k | for (auto &Entry : *Cache) { |
1172 | 1.33k | DenseMap<BasicBlock *, Value *>::iterator VI = |
1173 | 1.33k | Visited.find(Entry.getBB()); |
1174 | 1.33k | if (VI == Visited.end() || VI->second == Pointer.getAddr()) |
1175 | 1.33k | continue; |
1176 | | |
1177 | | // We have a pointer mismatch in a block. Just return false, saying |
1178 | | // that something was clobbered in this result. We could also do a |
1179 | | // non-fully cached query, but there is little point in doing this. |
1180 | 0 | return false; |
1181 | 1.33k | } |
1182 | 538 | } |
1183 | | |
1184 | 11.5k | Value *Addr = Pointer.getAddr(); |
1185 | 99.7k | for (auto &Entry : *Cache) { |
1186 | 99.7k | Visited.insert(std::make_pair(Entry.getBB(), Addr)); |
1187 | 99.7k | if (Entry.getResult().isNonLocal()) { |
1188 | 76.4k | continue; |
1189 | 76.4k | } |
1190 | | |
1191 | 23.2k | if (DT.isReachableFromEntry(Entry.getBB())) { |
1192 | 23.1k | Result.push_back( |
1193 | 23.1k | NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr)); |
1194 | 23.1k | } |
1195 | 23.2k | } |
1196 | 11.5k | ++NumCacheCompleteNonLocalPtr; |
1197 | 11.5k | return true; |
1198 | 11.5k | } |
1199 | | |
1200 | | // Otherwise, either this is a new block, a block with an invalid cache |
1201 | | // pointer or one that we're about to invalidate by putting more info into |
1202 | | // it than its valid cache info. If empty and not explicitly indicated as |
1203 | | // incomplete, the result will be valid cache info, otherwise it isn't. |
1204 | | // |
1205 | | // Invariant loads don't affect cache in any way thus no need to update |
1206 | | // CacheInfo as well. |
1207 | 79.4k | if (!isInvariantLoad) { |
1208 | 79.4k | if (!IsIncomplete && Cache->empty()) |
1209 | 31.5k | CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); |
1210 | 47.8k | else |
1211 | 47.8k | CacheInfo->Pair = BBSkipFirstBlockPair(); |
1212 | 79.4k | } |
1213 | | |
1214 | 79.4k | SmallVector<BasicBlock *, 32> Worklist; |
1215 | 79.4k | Worklist.push_back(StartBB); |
1216 | | |
1217 | | // PredList used inside loop. |
1218 | 79.4k | SmallVector<std::pair<BasicBlock *, PHITransAddr>, 16> PredList; |
1219 | | |
1220 | | // Keep track of the entries that we know are sorted. Previously cached |
1221 | | // entries will all be sorted. The entries we add we only sort on demand (we |
1222 | | // don't insert every element into its sorted position). We know that we |
1223 | | // won't get any reuse from currently inserted values, because we don't |
1224 | | // revisit blocks after we insert info for them. |
1225 | 79.4k | unsigned NumSortedEntries = Cache->size(); |
1226 | 79.4k | unsigned WorklistEntries = BlockNumberLimit; |
1227 | 79.4k | bool GotWorklistLimit = false; |
1228 | 79.4k | LLVM_DEBUG(AssertSorted(*Cache)); |
1229 | | |
1230 | 79.4k | BatchAAResults BatchAA(AA, &EII); |
1231 | 303k | while (!Worklist.empty()) { |
1232 | 225k | BasicBlock *BB = Worklist.pop_back_val(); |
1233 | | |
1234 | | // If we do process a large number of blocks it becomes very expensive and |
1235 | | // likely it isn't worth worrying about |
1236 | 225k | if (Result.size() > NumResultsLimit) { |
1237 | | // Sort it now (if needed) so that recursive invocations of |
1238 | | // getNonLocalPointerDepFromBB and other routines that could reuse the |
1239 | | // cache value will only see properly sorted cache arrays. |
1240 | 0 | if (Cache && NumSortedEntries != Cache->size()) { |
1241 | 0 | SortNonLocalDepInfoCache(*Cache, NumSortedEntries); |
1242 | 0 | } |
1243 | | // Since we bail out, the "Cache" set won't contain all of the |
1244 | | // results for the query. This is ok (we can still use it to accelerate |
1245 | | // specific block queries) but we can't do the fastpath "return all |
1246 | | // results from the set". Clear out the indicator for this. |
1247 | 0 | CacheInfo->Pair = BBSkipFirstBlockPair(); |
1248 | 0 | return false; |
1249 | 0 | } |
1250 | | |
1251 | | // Skip the first block if we have it. |
1252 | 225k | if (!SkipFirstBlock) { |
1253 | | // Analyze the dependency of *Pointer in FromBB. See if we already have |
1254 | | // been here. |
1255 | 181k | assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); |
1256 | | |
1257 | | // Get the dependency info for Pointer in BB. If we have cached |
1258 | | // information, we will use it, otherwise we compute it. |
1259 | 181k | LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries)); |
1260 | 181k | MemDepResult Dep = getNonLocalInfoForBlock( |
1261 | 181k | QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA); |
1262 | | |
1263 | | // If we got a Def or Clobber, add this to the list of results. |
1264 | 181k | if (!Dep.isNonLocal()) { |
1265 | 53.1k | if (DT.isReachableFromEntry(BB)) { |
1266 | 53.0k | Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); |
1267 | 53.0k | continue; |
1268 | 53.0k | } |
1269 | 53.1k | } |
1270 | 181k | } |
1271 | | |
1272 | | // If 'Pointer' is an instruction defined in this block, then we need to do |
1273 | | // phi translation to change it into a value live in the predecessor block. |
1274 | | // If not, we just add the predecessors to the worklist and scan them with |
1275 | | // the same Pointer. |
1276 | 172k | if (!Pointer.needsPHITranslationFromBlock(BB)) { |
1277 | 135k | SkipFirstBlock = false; |
1278 | 135k | SmallVector<BasicBlock *, 16> NewBlocks; |
1279 | 214k | for (BasicBlock *Pred : PredCache.get(BB)) { |
1280 | | // Verify that we haven't looked at this block yet. |
1281 | 214k | std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes = |
1282 | 214k | Visited.insert(std::make_pair(Pred, Pointer.getAddr())); |
1283 | 214k | if (InsertRes.second) { |
1284 | | // First time we've looked at *PI. |
1285 | 146k | NewBlocks.push_back(Pred); |
1286 | 146k | continue; |
1287 | 146k | } |
1288 | | |
1289 | | // If we have seen this block before, but it was with a different |
1290 | | // pointer then we have a phi translation failure and we have to treat |
1291 | | // this as a clobber. |
1292 | 68.7k | if (InsertRes.first->second != Pointer.getAddr()) { |
1293 | | // Make sure to clean up the Visited map before continuing on to |
1294 | | // PredTranslationFailure. |
1295 | 302 | for (auto *NewBlock : NewBlocks) |
1296 | 74 | Visited.erase(NewBlock); |
1297 | 302 | goto PredTranslationFailure; |
1298 | 302 | } |
1299 | 68.7k | } |
1300 | 135k | if (NewBlocks.size() > WorklistEntries) { |
1301 | | // Make sure to clean up the Visited map before continuing on to |
1302 | | // PredTranslationFailure. |
1303 | 0 | for (auto *NewBlock : NewBlocks) |
1304 | 0 | Visited.erase(NewBlock); |
1305 | 0 | GotWorklistLimit = true; |
1306 | 0 | goto PredTranslationFailure; |
1307 | 0 | } |
1308 | 135k | WorklistEntries -= NewBlocks.size(); |
1309 | 135k | Worklist.append(NewBlocks.begin(), NewBlocks.end()); |
1310 | 135k | continue; |
1311 | 135k | } |
1312 | | |
1313 | | // We do need to do phi translation, if we know ahead of time we can't phi |
1314 | | // translate this value, don't even try. |
1315 | 37.1k | if (!Pointer.isPotentiallyPHITranslatable()) |
1316 | 2.02k | goto PredTranslationFailure; |
1317 | | |
1318 | | // We may have added values to the cache list before this PHI translation. |
1319 | | // If so, we haven't done anything to ensure that the cache remains sorted. |
1320 | | // Sort it now (if needed) so that recursive invocations of |
1321 | | // getNonLocalPointerDepFromBB and other routines that could reuse the cache |
1322 | | // value will only see properly sorted cache arrays. |
1323 | 35.0k | if (Cache && NumSortedEntries != Cache->size()) { |
1324 | 2.54k | SortNonLocalDepInfoCache(*Cache, NumSortedEntries); |
1325 | 2.54k | NumSortedEntries = Cache->size(); |
1326 | 2.54k | } |
1327 | 35.0k | Cache = nullptr; |
1328 | | |
1329 | 35.0k | PredList.clear(); |
1330 | 85.5k | for (BasicBlock *Pred : PredCache.get(BB)) { |
1331 | 85.5k | PredList.push_back(std::make_pair(Pred, Pointer)); |
1332 | | |
1333 | | // Get the PHI translated pointer in this predecessor. This can fail if |
1334 | | // not translatable, in which case the getAddr() returns null. |
1335 | 85.5k | PHITransAddr &PredPointer = PredList.back().second; |
1336 | 85.5k | Value *PredPtrVal = |
1337 | 85.5k | PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false); |
1338 | | |
1339 | | // Check to see if we have already visited this pred block with another |
1340 | | // pointer. If so, we can't do this lookup. This failure can occur |
1341 | | // with PHI translation when a critical edge exists and the PHI node in |
1342 | | // the successor translates to a pointer value different than the |
1343 | | // pointer the block was first analyzed with. |
1344 | 85.5k | std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes = |
1345 | 85.5k | Visited.insert(std::make_pair(Pred, PredPtrVal)); |
1346 | | |
1347 | 85.5k | if (!InsertRes.second) { |
1348 | | // We found the pred; take it off the list of preds to visit. |
1349 | 1.59k | PredList.pop_back(); |
1350 | | |
1351 | | // If the predecessor was visited with PredPtr, then we already did |
1352 | | // the analysis and can ignore it. |
1353 | 1.59k | if (InsertRes.first->second == PredPtrVal) |
1354 | 1.00k | continue; |
1355 | | |
1356 | | // Otherwise, the block was previously analyzed with a different |
1357 | | // pointer. We can't represent the result of this case, so we just |
1358 | | // treat this as a phi translation failure. |
1359 | | |
1360 | | // Make sure to clean up the Visited map before continuing on to |
1361 | | // PredTranslationFailure. |
1362 | 590 | for (const auto &Pred : PredList) |
1363 | 1.04k | Visited.erase(Pred.first); |
1364 | | |
1365 | 590 | goto PredTranslationFailure; |
1366 | 1.59k | } |
1367 | 85.5k | } |
1368 | | |
1369 | | // Actually process results here; this need to be a separate loop to avoid |
1370 | | // calling getNonLocalPointerDepFromBB for blocks we don't want to return |
1371 | | // any results for. (getNonLocalPointerDepFromBB will modify our |
1372 | | // datastructures in ways the code after the PredTranslationFailure label |
1373 | | // doesn't expect.) |
1374 | 82.9k | for (auto &I : PredList) { |
1375 | 82.9k | BasicBlock *Pred = I.first; |
1376 | 82.9k | PHITransAddr &PredPointer = I.second; |
1377 | 82.9k | Value *PredPtrVal = PredPointer.getAddr(); |
1378 | | |
1379 | 82.9k | bool CanTranslate = true; |
1380 | | // If PHI translation was unable to find an available pointer in this |
1381 | | // predecessor, then we have to assume that the pointer is clobbered in |
1382 | | // that predecessor. We can still do PRE of the load, which would insert |
1383 | | // a computation of the pointer in this predecessor. |
1384 | 82.9k | if (!PredPtrVal) |
1385 | 46.8k | CanTranslate = false; |
1386 | | |
1387 | | // FIXME: it is entirely possible that PHI translating will end up with |
1388 | | // the same value. Consider PHI translating something like: |
1389 | | // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* |
1390 | | // to recurse here, pedantically speaking. |
1391 | | |
1392 | | // If getNonLocalPointerDepFromBB fails here, that means the cached |
1393 | | // result conflicted with the Visited list; we have to conservatively |
1394 | | // assume it is unknown, but this also does not block PRE of the load. |
1395 | 82.9k | if (!CanTranslate || |
1396 | 82.9k | !getNonLocalPointerDepFromBB(QueryInst, PredPointer, |
1397 | 36.0k | Loc.getWithNewPtr(PredPtrVal), isLoad, |
1398 | 46.8k | Pred, Result, Visited)) { |
1399 | | // Add the entry to the Result list. |
1400 | 46.8k | NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); |
1401 | 46.8k | Result.push_back(Entry); |
1402 | | |
1403 | | // Since we had a phi translation failure, the cache for CacheKey won't |
1404 | | // include all of the entries that we need to immediately satisfy future |
1405 | | // queries. Mark this in NonLocalPointerDeps by setting the |
1406 | | // BBSkipFirstBlockPair pointer to null. This requires reuse of the |
1407 | | // cached value to do more work but not miss the phi trans failure. |
1408 | 46.8k | NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey]; |
1409 | 46.8k | NLPI.Pair = BBSkipFirstBlockPair(); |
1410 | 46.8k | continue; |
1411 | 46.8k | } |
1412 | 82.9k | } |
1413 | | |
1414 | | // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. |
1415 | 34.5k | CacheInfo = &NonLocalPointerDeps[CacheKey]; |
1416 | 34.5k | Cache = &CacheInfo->NonLocalDeps; |
1417 | 34.5k | NumSortedEntries = Cache->size(); |
1418 | | |
1419 | | // Since we did phi translation, the "Cache" set won't contain all of the |
1420 | | // results for the query. This is ok (we can still use it to accelerate |
1421 | | // specific block queries) but we can't do the fastpath "return all |
1422 | | // results from the set" Clear out the indicator for this. |
1423 | 34.5k | CacheInfo->Pair = BBSkipFirstBlockPair(); |
1424 | 34.5k | SkipFirstBlock = false; |
1425 | 34.5k | continue; |
1426 | | |
1427 | 2.92k | PredTranslationFailure: |
1428 | | // The following code is "failure"; we can't produce a sane translation |
1429 | | // for the given block. It assumes that we haven't modified any of |
1430 | | // our datastructures while processing the current block. |
1431 | | |
1432 | 2.92k | if (!Cache) { |
1433 | | // Refresh the CacheInfo/Cache pointer if it got invalidated. |
1434 | 590 | CacheInfo = &NonLocalPointerDeps[CacheKey]; |
1435 | 590 | Cache = &CacheInfo->NonLocalDeps; |
1436 | 590 | NumSortedEntries = Cache->size(); |
1437 | 590 | } |
1438 | | |
1439 | | // Since we failed phi translation, the "Cache" set won't contain all of the |
1440 | | // results for the query. This is ok (we can still use it to accelerate |
1441 | | // specific block queries) but we can't do the fastpath "return all |
1442 | | // results from the set". Clear out the indicator for this. |
1443 | 2.92k | CacheInfo->Pair = BBSkipFirstBlockPair(); |
1444 | | |
1445 | | // If *nothing* works, mark the pointer as unknown. |
1446 | | // |
1447 | | // If this is the magic first block, return this as a clobber of the whole |
1448 | | // incoming value. Since we can't phi translate to one of the predecessors, |
1449 | | // we have to bail out. |
1450 | 2.92k | if (SkipFirstBlock) |
1451 | 1.78k | return false; |
1452 | | |
1453 | | // Results of invariant loads are not cached thus no need to update cached |
1454 | | // information. |
1455 | 1.14k | if (!isInvariantLoad) { |
1456 | 3.22k | for (NonLocalDepEntry &I : llvm::reverse(*Cache)) { |
1457 | 3.22k | if (I.getBB() != BB) |
1458 | 2.08k | continue; |
1459 | | |
1460 | 1.14k | assert((GotWorklistLimit || I.getResult().isNonLocal() || |
1461 | 1.14k | !DT.isReachableFromEntry(BB)) && |
1462 | 1.14k | "Should only be here with transparent block"); |
1463 | | |
1464 | 0 | I.setResult(MemDepResult::getUnknown()); |
1465 | | |
1466 | | |
1467 | 1.14k | break; |
1468 | 3.22k | } |
1469 | 1.14k | } |
1470 | 1.14k | (void)GotWorklistLimit; |
1471 | | // Go ahead and report unknown dependence. |
1472 | 1.14k | Result.push_back( |
1473 | 1.14k | NonLocalDepResult(BB, MemDepResult::getUnknown(), Pointer.getAddr())); |
1474 | 1.14k | } |
1475 | | |
1476 | | // Okay, we're done now. If we added new values to the cache, re-sort it. |
1477 | 77.6k | SortNonLocalDepInfoCache(*Cache, NumSortedEntries); |
1478 | 77.6k | LLVM_DEBUG(AssertSorted(*Cache)); |
1479 | 77.6k | return true; |
1480 | 79.4k | } |
1481 | | |
1482 | | /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it. |
1483 | | void MemoryDependenceResults::removeCachedNonLocalPointerDependencies( |
1484 | 86.2k | ValueIsLoadPair P) { |
1485 | | |
1486 | | // Most of the time this cache is empty. |
1487 | 86.2k | if (!NonLocalDefsCache.empty()) { |
1488 | 3 | auto it = NonLocalDefsCache.find(P.getPointer()); |
1489 | 3 | if (it != NonLocalDefsCache.end()) { |
1490 | 1 | RemoveFromReverseMap(ReverseNonLocalDefsCache, |
1491 | 1 | it->second.getResult().getInst(), P.getPointer()); |
1492 | 1 | NonLocalDefsCache.erase(it); |
1493 | 1 | } |
1494 | | |
1495 | 3 | if (auto *I = dyn_cast<Instruction>(P.getPointer())) { |
1496 | 3 | auto toRemoveIt = ReverseNonLocalDefsCache.find(I); |
1497 | 3 | if (toRemoveIt != ReverseNonLocalDefsCache.end()) { |
1498 | 0 | for (const auto *entry : toRemoveIt->second) |
1499 | 0 | NonLocalDefsCache.erase(entry); |
1500 | 0 | ReverseNonLocalDefsCache.erase(toRemoveIt); |
1501 | 0 | } |
1502 | 3 | } |
1503 | 3 | } |
1504 | | |
1505 | 86.2k | CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P); |
1506 | 86.2k | if (It == NonLocalPointerDeps.end()) |
1507 | 77.6k | return; |
1508 | | |
1509 | | // Remove all of the entries in the BB->val map. This involves removing |
1510 | | // instructions from the reverse map. |
1511 | 8.60k | NonLocalDepInfo &PInfo = It->second.NonLocalDeps; |
1512 | | |
1513 | 56.6k | for (const NonLocalDepEntry &DE : PInfo) { |
1514 | 56.6k | Instruction *Target = DE.getResult().getInst(); |
1515 | 56.6k | if (!Target) |
1516 | 44.1k | continue; // Ignore non-local dep results. |
1517 | 12.5k | assert(Target->getParent() == DE.getBB()); |
1518 | | |
1519 | | // Eliminating the dirty entry from 'Cache', so update the reverse info. |
1520 | 0 | RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); |
1521 | 12.5k | } |
1522 | | |
1523 | | // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). |
1524 | 8.60k | NonLocalPointerDeps.erase(It); |
1525 | 8.60k | } |
1526 | | |
1527 | 44.4k | void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) { |
1528 | | // If Ptr isn't really a pointer, just ignore it. |
1529 | 44.4k | if (!Ptr->getType()->isPointerTy()) |
1530 | 20.7k | return; |
1531 | | // Flush store info for the pointer. |
1532 | 23.7k | removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); |
1533 | | // Flush load info for the pointer. |
1534 | 23.7k | removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); |
1535 | 23.7k | } |
1536 | | |
1537 | 5.09k | void MemoryDependenceResults::invalidateCachedPredecessors() { |
1538 | 5.09k | PredCache.clear(); |
1539 | 5.09k | } |
1540 | | |
1541 | 107k | void MemoryDependenceResults::removeInstruction(Instruction *RemInst) { |
1542 | 107k | EII.removeInstruction(RemInst); |
1543 | | |
1544 | | // Walk through the Non-local dependencies, removing this one as the value |
1545 | | // for any cached queries. |
1546 | 107k | NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst); |
1547 | 107k | if (NLDI != NonLocalDepsMap.end()) { |
1548 | 0 | NonLocalDepInfo &BlockMap = NLDI->second.first; |
1549 | 0 | for (auto &Entry : BlockMap) |
1550 | 0 | if (Instruction *Inst = Entry.getResult().getInst()) |
1551 | 0 | RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); |
1552 | 0 | NonLocalDepsMap.erase(NLDI); |
1553 | 0 | } |
1554 | | |
1555 | | // If we have a cached local dependence query for this instruction, remove it. |
1556 | 107k | LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); |
1557 | 107k | if (LocalDepEntry != LocalDeps.end()) { |
1558 | | // Remove us from DepInst's reverse set now that the local dep info is gone. |
1559 | 19.3k | if (Instruction *Inst = LocalDepEntry->second.getInst()) |
1560 | 10.0k | RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst); |
1561 | | |
1562 | | // Remove this local dependency info. |
1563 | 19.3k | LocalDeps.erase(LocalDepEntry); |
1564 | 19.3k | } |
1565 | | |
1566 | | // If we have any cached dependencies on this instruction, remove |
1567 | | // them. |
1568 | | |
1569 | | // If the instruction is a pointer, remove it from both the load info and the |
1570 | | // store info. |
1571 | 107k | if (RemInst->getType()->isPointerTy()) { |
1572 | 19.4k | removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); |
1573 | 19.4k | removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); |
1574 | 88.4k | } else { |
1575 | | // Otherwise, if the instructions is in the map directly, it must be a load. |
1576 | | // Remove it. |
1577 | 88.4k | auto toRemoveIt = NonLocalDefsCache.find(RemInst); |
1578 | 88.4k | if (toRemoveIt != NonLocalDefsCache.end()) { |
1579 | 0 | assert(isa<LoadInst>(RemInst) && |
1580 | 0 | "only load instructions should be added directly"); |
1581 | 0 | const Instruction *DepV = toRemoveIt->second.getResult().getInst(); |
1582 | 0 | ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst); |
1583 | 0 | NonLocalDefsCache.erase(toRemoveIt); |
1584 | 0 | } |
1585 | 88.4k | } |
1586 | | |
1587 | | // Loop over all of the things that depend on the instruction we're removing. |
1588 | 0 | SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd; |
1589 | | |
1590 | | // If we find RemInst as a clobber or Def in any of the maps for other values, |
1591 | | // we need to replace its entry with a dirty version of the instruction after |
1592 | | // it. If RemInst is a terminator, we use a null dirty value. |
1593 | | // |
1594 | | // Using a dirty version of the instruction after RemInst saves having to scan |
1595 | | // the entire block to get to this point. |
1596 | 107k | MemDepResult NewDirtyVal; |
1597 | 107k | if (!RemInst->isTerminator()) |
1598 | 107k | NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator()); |
1599 | | |
1600 | 107k | ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); |
1601 | 107k | if (ReverseDepIt != ReverseLocalDeps.end()) { |
1602 | | // RemInst can't be the terminator if it has local stuff depending on it. |
1603 | 185 | assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() && |
1604 | 185 | "Nothing can locally depend on a terminator"); |
1605 | | |
1606 | 185 | for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) { |
1607 | 185 | assert(InstDependingOnRemInst != RemInst && |
1608 | 185 | "Already removed our local dep info"); |
1609 | | |
1610 | 0 | LocalDeps[InstDependingOnRemInst] = NewDirtyVal; |
1611 | | |
1612 | | // Make sure to remember that new things depend on NewDepInst. |
1613 | 185 | assert(NewDirtyVal.getInst() && |
1614 | 185 | "There is no way something else can have " |
1615 | 185 | "a local dep on this if it is a terminator!"); |
1616 | 0 | ReverseDepsToAdd.push_back( |
1617 | 185 | std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst)); |
1618 | 185 | } |
1619 | | |
1620 | 185 | ReverseLocalDeps.erase(ReverseDepIt); |
1621 | | |
1622 | | // Add new reverse deps after scanning the set, to avoid invalidating the |
1623 | | // 'ReverseDeps' reference. |
1624 | 370 | while (!ReverseDepsToAdd.empty()) { |
1625 | 185 | ReverseLocalDeps[ReverseDepsToAdd.back().first].insert( |
1626 | 185 | ReverseDepsToAdd.back().second); |
1627 | 185 | ReverseDepsToAdd.pop_back(); |
1628 | 185 | } |
1629 | 185 | } |
1630 | | |
1631 | 0 | ReverseDepIt = ReverseNonLocalDeps.find(RemInst); |
1632 | 107k | if (ReverseDepIt != ReverseNonLocalDeps.end()) { |
1633 | 14 | for (Instruction *I : ReverseDepIt->second) { |
1634 | 14 | assert(I != RemInst && "Already removed NonLocalDep info for RemInst"); |
1635 | | |
1636 | 0 | PerInstNLInfo &INLD = NonLocalDepsMap[I]; |
1637 | | // The information is now dirty! |
1638 | 14 | INLD.second = true; |
1639 | | |
1640 | 180 | for (auto &Entry : INLD.first) { |
1641 | 180 | if (Entry.getResult().getInst() != RemInst) |
1642 | 166 | continue; |
1643 | | |
1644 | | // Convert to a dirty entry for the subsequent instruction. |
1645 | 14 | Entry.setResult(NewDirtyVal); |
1646 | | |
1647 | 14 | if (Instruction *NextI = NewDirtyVal.getInst()) |
1648 | 14 | ReverseDepsToAdd.push_back(std::make_pair(NextI, I)); |
1649 | 14 | } |
1650 | 14 | } |
1651 | | |
1652 | 14 | ReverseNonLocalDeps.erase(ReverseDepIt); |
1653 | | |
1654 | | // Add new reverse deps after scanning the set, to avoid invalidating 'Set' |
1655 | 28 | while (!ReverseDepsToAdd.empty()) { |
1656 | 14 | ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert( |
1657 | 14 | ReverseDepsToAdd.back().second); |
1658 | 14 | ReverseDepsToAdd.pop_back(); |
1659 | 14 | } |
1660 | 14 | } |
1661 | | |
1662 | | // If the instruction is in ReverseNonLocalPtrDeps then it appears as a |
1663 | | // value in the NonLocalPointerDeps info. |
1664 | 107k | ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = |
1665 | 107k | ReverseNonLocalPtrDeps.find(RemInst); |
1666 | 107k | if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { |
1667 | 4.53k | SmallVector<std::pair<Instruction *, ValueIsLoadPair>, 8> |
1668 | 4.53k | ReversePtrDepsToAdd; |
1669 | | |
1670 | 4.86k | for (ValueIsLoadPair P : ReversePtrDepIt->second) { |
1671 | 4.86k | assert(P.getPointer() != RemInst && |
1672 | 4.86k | "Already removed NonLocalPointerDeps info for RemInst"); |
1673 | | |
1674 | 0 | NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; |
1675 | | |
1676 | | // The cache is not valid for any specific block anymore. |
1677 | 4.86k | NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); |
1678 | | |
1679 | | // Update any entries for RemInst to use the instruction after it. |
1680 | 38.9k | for (auto &Entry : NLPDI) { |
1681 | 38.9k | if (Entry.getResult().getInst() != RemInst) |
1682 | 34.1k | continue; |
1683 | | |
1684 | | // Convert to a dirty entry for the subsequent instruction. |
1685 | 4.86k | Entry.setResult(NewDirtyVal); |
1686 | | |
1687 | 4.86k | if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) |
1688 | 4.86k | ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); |
1689 | 4.86k | } |
1690 | | |
1691 | | // Re-sort the NonLocalDepInfo. Changing the dirty entry to its |
1692 | | // subsequent value may invalidate the sortedness. |
1693 | 4.86k | llvm::sort(NLPDI); |
1694 | 4.86k | } |
1695 | | |
1696 | 4.53k | ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); |
1697 | | |
1698 | 9.40k | while (!ReversePtrDepsToAdd.empty()) { |
1699 | 4.86k | ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert( |
1700 | 4.86k | ReversePtrDepsToAdd.back().second); |
1701 | 4.86k | ReversePtrDepsToAdd.pop_back(); |
1702 | 4.86k | } |
1703 | 4.53k | } |
1704 | | |
1705 | 107k | assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?"); |
1706 | 107k | LLVM_DEBUG(verifyRemoved(RemInst)); |
1707 | 107k | } |
1708 | | |
1709 | | /// Verify that the specified instruction does not occur in our internal data |
1710 | | /// structures. |
1711 | | /// |
1712 | | /// This function verifies by asserting in debug builds. |
1713 | 0 | void MemoryDependenceResults::verifyRemoved(Instruction *D) const { |
1714 | 0 | #ifndef NDEBUG |
1715 | 0 | for (const auto &DepKV : LocalDeps) { |
1716 | 0 | assert(DepKV.first != D && "Inst occurs in data structures"); |
1717 | 0 | assert(DepKV.second.getInst() != D && "Inst occurs in data structures"); |
1718 | 0 | } |
1719 | |
|
1720 | 0 | for (const auto &DepKV : NonLocalPointerDeps) { |
1721 | 0 | assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key"); |
1722 | 0 | for (const auto &Entry : DepKV.second.NonLocalDeps) |
1723 | 0 | assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value"); |
1724 | 0 | } |
1725 | |
|
1726 | 0 | for (const auto &DepKV : NonLocalDepsMap) { |
1727 | 0 | assert(DepKV.first != D && "Inst occurs in data structures"); |
1728 | 0 | const PerInstNLInfo &INLD = DepKV.second; |
1729 | 0 | for (const auto &Entry : INLD.first) |
1730 | 0 | assert(Entry.getResult().getInst() != D && |
1731 | 0 | "Inst occurs in data structures"); |
1732 | 0 | } |
1733 | |
|
1734 | 0 | for (const auto &DepKV : ReverseLocalDeps) { |
1735 | 0 | assert(DepKV.first != D && "Inst occurs in data structures"); |
1736 | 0 | for (Instruction *Inst : DepKV.second) |
1737 | 0 | assert(Inst != D && "Inst occurs in data structures"); |
1738 | 0 | } |
1739 | |
|
1740 | 0 | for (const auto &DepKV : ReverseNonLocalDeps) { |
1741 | 0 | assert(DepKV.first != D && "Inst occurs in data structures"); |
1742 | 0 | for (Instruction *Inst : DepKV.second) |
1743 | 0 | assert(Inst != D && "Inst occurs in data structures"); |
1744 | 0 | } |
1745 | |
|
1746 | 0 | for (const auto &DepKV : ReverseNonLocalPtrDeps) { |
1747 | 0 | assert(DepKV.first != D && "Inst occurs in rev NLPD map"); |
1748 | | |
1749 | 0 | for (ValueIsLoadPair P : DepKV.second) |
1750 | 0 | assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) && |
1751 | 0 | "Inst occurs in ReverseNonLocalPtrDeps map"); |
1752 | 0 | } |
1753 | 0 | #endif |
1754 | 0 | } |
1755 | | |
1756 | | AnalysisKey MemoryDependenceAnalysis::Key; |
1757 | | |
1758 | | MemoryDependenceAnalysis::MemoryDependenceAnalysis() |
1759 | 112k | : DefaultBlockScanLimit(BlockScanLimit) {} |
1760 | | |
1761 | | MemoryDependenceResults |
1762 | 24.7k | MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { |
1763 | 24.7k | auto &AA = AM.getResult<AAManager>(F); |
1764 | 24.7k | auto &AC = AM.getResult<AssumptionAnalysis>(F); |
1765 | 24.7k | auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); |
1766 | 24.7k | auto &DT = AM.getResult<DominatorTreeAnalysis>(F); |
1767 | 24.7k | return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit); |
1768 | 24.7k | } |
1769 | | |
1770 | | char MemoryDependenceWrapperPass::ID = 0; |
1771 | | |
1772 | 62 | INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", |
1773 | 62 | "Memory Dependence Analysis", false, true) |
1774 | 62 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
1775 | 62 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
1776 | 62 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
1777 | 62 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
1778 | 62 | INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep", |
1779 | | "Memory Dependence Analysis", false, true) |
1780 | | |
1781 | 0 | MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) { |
1782 | 0 | initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry()); |
1783 | 0 | } |
1784 | | |
1785 | 0 | MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default; |
1786 | | |
1787 | 0 | void MemoryDependenceWrapperPass::releaseMemory() { |
1788 | 0 | MemDep.reset(); |
1789 | 0 | } |
1790 | | |
1791 | 0 | void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1792 | 0 | AU.setPreservesAll(); |
1793 | 0 | AU.addRequired<AssumptionCacheTracker>(); |
1794 | 0 | AU.addRequired<DominatorTreeWrapperPass>(); |
1795 | 0 | AU.addRequiredTransitive<AAResultsWrapperPass>(); |
1796 | 0 | AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); |
1797 | 0 | } |
1798 | | |
1799 | | bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA, |
1800 | 11.5k | FunctionAnalysisManager::Invalidator &Inv) { |
1801 | | // Check whether our analysis is preserved. |
1802 | 11.5k | auto PAC = PA.getChecker<MemoryDependenceAnalysis>(); |
1803 | 11.5k | if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) |
1804 | | // If not, give up now. |
1805 | 11.5k | return true; |
1806 | | |
1807 | | // Check whether the analyses we depend on became invalid for any reason. |
1808 | 0 | if (Inv.invalidate<AAManager>(F, PA) || |
1809 | 0 | Inv.invalidate<AssumptionAnalysis>(F, PA) || |
1810 | 0 | Inv.invalidate<DominatorTreeAnalysis>(F, PA)) |
1811 | 0 | return true; |
1812 | | |
1813 | | // Otherwise this analysis result remains valid. |
1814 | 0 | return false; |
1815 | 0 | } |
1816 | | |
1817 | 115k | unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const { |
1818 | 115k | return DefaultBlockScanLimit; |
1819 | 115k | } |
1820 | | |
1821 | 0 | bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { |
1822 | 0 | auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); |
1823 | 0 | auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
1824 | 0 | auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
1825 | 0 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
1826 | 0 | MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit); |
1827 | 0 | return false; |
1828 | 0 | } |