/src/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This pass applies cache prefetch instructions based on a profile. The pass |
10 | | // assumes DiscriminateMemOps ran immediately before, to ensure debug info |
11 | | // matches the one used at profile generation time. The profile is encoded in |
12 | | // afdo format (text or binary). It contains prefetch hints recommendations. |
13 | | // Each recommendation is made in terms of debug info locations, a type (i.e. |
14 | | // nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a |
15 | | // memory operand (see X86DiscriminateMemOps). The prefetch will be made for |
16 | | // a location at that memory operand + the delta specified in the |
17 | | // recommendation. |
18 | | // |
19 | | //===----------------------------------------------------------------------===// |
20 | | |
21 | | #include "X86.h" |
22 | | #include "X86InstrBuilder.h" |
23 | | #include "X86InstrInfo.h" |
24 | | #include "X86MachineFunctionInfo.h" |
25 | | #include "X86Subtarget.h" |
26 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
28 | | #include "llvm/IR/DebugInfoMetadata.h" |
29 | | #include "llvm/ProfileData/SampleProf.h" |
30 | | #include "llvm/ProfileData/SampleProfReader.h" |
31 | | #include "llvm/Support/VirtualFileSystem.h" |
32 | | #include "llvm/Transforms/IPO/SampleProfile.h" |
33 | | using namespace llvm; |
34 | | using namespace sampleprof; |
35 | | |
36 | | static cl::opt<std::string> |
37 | | PrefetchHintsFile("prefetch-hints-file", |
38 | | cl::desc("Path to the prefetch hints profile. See also " |
39 | | "-x86-discriminate-memops"), |
40 | | cl::Hidden); |
41 | | namespace { |
42 | | |
43 | | class X86InsertPrefetch : public MachineFunctionPass { |
44 | | void getAnalysisUsage(AnalysisUsage &AU) const override; |
45 | | bool doInitialization(Module &) override; |
46 | | |
47 | | bool runOnMachineFunction(MachineFunction &MF) override; |
48 | | struct PrefetchInfo { |
49 | | unsigned InstructionID; |
50 | | int64_t Delta; |
51 | | }; |
52 | | typedef SmallVectorImpl<PrefetchInfo> Prefetches; |
53 | | bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI, |
54 | | Prefetches &prefetches) const; |
55 | | |
56 | | public: |
57 | | static char ID; |
58 | | X86InsertPrefetch(const std::string &PrefetchHintsFilename); |
59 | 662 | StringRef getPassName() const override { |
60 | 662 | return "X86 Insert Cache Prefetches"; |
61 | 662 | } |
62 | | |
63 | | private: |
64 | | std::string Filename; |
65 | | std::unique_ptr<SampleProfileReader> Reader; |
66 | | }; |
67 | | |
68 | | using PrefetchHints = SampleRecord::CallTargetMap; |
69 | | |
70 | | // Return any prefetching hints for the specified MachineInstruction. The hints |
71 | | // are returned as pairs (name, delta). |
72 | | ErrorOr<const PrefetchHints &> |
73 | 0 | getPrefetchHints(const FunctionSamples *TopSamples, const MachineInstr &MI) { |
74 | 0 | if (const auto &Loc = MI.getDebugLoc()) |
75 | 0 | if (const auto *Samples = TopSamples->findFunctionSamples(Loc)) |
76 | 0 | return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc), |
77 | 0 | Loc->getBaseDiscriminator()); |
78 | 0 | return std::error_code(); |
79 | 0 | } |
80 | | |
81 | | // The prefetch instruction can't take memory operands involving vector |
82 | | // registers. |
83 | 0 | bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { |
84 | 0 | Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); |
85 | 0 | Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); |
86 | 0 | return (BaseReg == 0 || |
87 | 0 | X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || |
88 | 0 | X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) && |
89 | 0 | (IndexReg == 0 || |
90 | 0 | X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || |
91 | 0 | X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)); |
92 | 0 | } |
93 | | |
94 | | } // end anonymous namespace |
95 | | |
96 | | //===----------------------------------------------------------------------===// |
97 | | // Implementation |
98 | | //===----------------------------------------------------------------------===// |
99 | | |
100 | | char X86InsertPrefetch::ID = 0; |
101 | | |
102 | | X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename) |
103 | 662 | : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {} |
104 | | |
105 | | /// Return true if the provided MachineInstruction has cache prefetch hints. In |
106 | | /// that case, the prefetch hints are stored, in order, in the Prefetches |
107 | | /// vector. |
108 | | bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, |
109 | | const MachineInstr &MI, |
110 | 0 | Prefetches &Prefetches) const { |
111 | 0 | assert(Prefetches.empty() && |
112 | 0 | "Expected caller passed empty PrefetchInfo vector."); |
113 | | |
114 | | // There is no point to match prefetch hints if the profile is using MD5. |
115 | 0 | if (FunctionSamples::UseMD5) |
116 | 0 | return false; |
117 | | |
118 | 0 | static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = { |
119 | 0 | {"_nta_", X86::PREFETCHNTA}, |
120 | 0 | {"_t0_", X86::PREFETCHT0}, |
121 | 0 | {"_t1_", X86::PREFETCHT1}, |
122 | 0 | {"_t2_", X86::PREFETCHT2}, |
123 | 0 | }; |
124 | 0 | static const char *SerializedPrefetchPrefix = "__prefetch"; |
125 | |
|
126 | 0 | auto T = getPrefetchHints(TopSamples, MI); |
127 | 0 | if (!T) |
128 | 0 | return false; |
129 | 0 | int16_t max_index = -1; |
130 | | // Convert serialized prefetch hints into PrefetchInfo objects, and populate |
131 | | // the Prefetches vector. |
132 | 0 | for (const auto &S_V : *T) { |
133 | 0 | StringRef Name = S_V.first.stringRef(); |
134 | 0 | if (Name.consume_front(SerializedPrefetchPrefix)) { |
135 | 0 | int64_t D = static_cast<int64_t>(S_V.second); |
136 | 0 | unsigned IID = 0; |
137 | 0 | for (const auto &HintType : HintTypes) { |
138 | 0 | if (Name.consume_front(HintType.first)) { |
139 | 0 | IID = HintType.second; |
140 | 0 | break; |
141 | 0 | } |
142 | 0 | } |
143 | 0 | if (IID == 0) |
144 | 0 | return false; |
145 | 0 | uint8_t index = 0; |
146 | 0 | Name.consumeInteger(10, index); |
147 | |
|
148 | 0 | if (index >= Prefetches.size()) |
149 | 0 | Prefetches.resize(index + 1); |
150 | 0 | Prefetches[index] = {IID, D}; |
151 | 0 | max_index = std::max(max_index, static_cast<int16_t>(index)); |
152 | 0 | } |
153 | 0 | } |
154 | 0 | assert(max_index + 1 >= 0 && |
155 | 0 | "Possible overflow: max_index + 1 should be positive."); |
156 | 0 | assert(static_cast<size_t>(max_index + 1) == Prefetches.size() && |
157 | 0 | "The number of prefetch hints received should match the number of " |
158 | 0 | "PrefetchInfo objects returned"); |
159 | 0 | return !Prefetches.empty(); |
160 | 0 | } |
161 | | |
162 | 662 | bool X86InsertPrefetch::doInitialization(Module &M) { |
163 | 662 | if (Filename.empty()) |
164 | 662 | return false; |
165 | | |
166 | 0 | LLVMContext &Ctx = M.getContext(); |
167 | | // TODO: Propagate virtual file system into LLVM targets. |
168 | 0 | auto FS = vfs::getRealFileSystem(); |
169 | 0 | ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr = |
170 | 0 | SampleProfileReader::create(Filename, Ctx, *FS); |
171 | 0 | if (std::error_code EC = ReaderOrErr.getError()) { |
172 | 0 | std::string Msg = "Could not open profile: " + EC.message(); |
173 | 0 | Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg, |
174 | 0 | DiagnosticSeverity::DS_Warning)); |
175 | 0 | return false; |
176 | 0 | } |
177 | 0 | Reader = std::move(ReaderOrErr.get()); |
178 | 0 | Reader->read(); |
179 | 0 | return true; |
180 | 0 | } |
181 | | |
182 | 662 | void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const { |
183 | 662 | AU.setPreservesAll(); |
184 | 662 | MachineFunctionPass::getAnalysisUsage(AU); |
185 | 662 | } |
186 | | |
187 | 22.2k | bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) { |
188 | 22.2k | if (!Reader) |
189 | 22.2k | return false; |
190 | 0 | const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction()); |
191 | 0 | if (!Samples) |
192 | 0 | return false; |
193 | | |
194 | 0 | bool Changed = false; |
195 | |
|
196 | 0 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
197 | 0 | SmallVector<PrefetchInfo, 4> Prefetches; |
198 | 0 | for (auto &MBB : MF) { |
199 | 0 | for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) { |
200 | 0 | auto Current = MI; |
201 | 0 | ++MI; |
202 | |
|
203 | 0 | int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags); |
204 | 0 | if (Offset < 0) |
205 | 0 | continue; |
206 | 0 | unsigned Bias = X86II::getOperandBias(Current->getDesc()); |
207 | 0 | int MemOpOffset = Offset + Bias; |
208 | | // FIXME(mtrofin): ORE message when the recommendation cannot be taken. |
209 | 0 | if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset)) |
210 | 0 | continue; |
211 | 0 | Prefetches.clear(); |
212 | 0 | if (!findPrefetchInfo(Samples, *Current, Prefetches)) |
213 | 0 | continue; |
214 | 0 | assert(!Prefetches.empty() && |
215 | 0 | "The Prefetches vector should contain at least a value if " |
216 | 0 | "findPrefetchInfo returned true."); |
217 | 0 | for (auto &PrefInfo : Prefetches) { |
218 | 0 | unsigned PFetchInstrID = PrefInfo.InstructionID; |
219 | 0 | int64_t Delta = PrefInfo.Delta; |
220 | 0 | const MCInstrDesc &Desc = TII->get(PFetchInstrID); |
221 | 0 | MachineInstr *PFetch = |
222 | 0 | MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true); |
223 | 0 | MachineInstrBuilder MIB(MF, PFetch); |
224 | |
|
225 | 0 | static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 && |
226 | 0 | X86::AddrIndexReg == 2 && X86::AddrDisp == 3 && |
227 | 0 | X86::AddrSegmentReg == 4, |
228 | 0 | "Unexpected change in X86 operand offset order."); |
229 | | |
230 | | // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc. |
231 | | // FIXME(mtrofin): consider adding a: |
232 | | // MachineInstrBuilder::set(unsigned offset, op). |
233 | 0 | MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg()) |
234 | 0 | .addImm( |
235 | 0 | Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm()) |
236 | 0 | .addReg( |
237 | 0 | Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg()) |
238 | 0 | .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() + |
239 | 0 | Delta) |
240 | 0 | .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg) |
241 | 0 | .getReg()); |
242 | |
|
243 | 0 | if (!Current->memoperands_empty()) { |
244 | 0 | MachineMemOperand *CurrentOp = *(Current->memoperands_begin()); |
245 | 0 | MIB.addMemOperand(MF.getMachineMemOperand( |
246 | 0 | CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize())); |
247 | 0 | } |
248 | | |
249 | | // Insert before Current. This is because Current may clobber some of |
250 | | // the registers used to describe the input memory operand. |
251 | 0 | MBB.insert(Current, PFetch); |
252 | 0 | Changed = true; |
253 | 0 | } |
254 | 0 | } |
255 | 0 | } |
256 | 0 | return Changed; |
257 | 0 | } |
258 | | |
259 | 662 | FunctionPass *llvm::createX86InsertPrefetchPass() { |
260 | 662 | return new X86InsertPrefetch(PrefetchHintsFile); |
261 | 662 | } |