/src/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements hazard recognizers for scheduling on PowerPC processors. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "PPCHazardRecognizers.h" |
14 | | #include "PPCInstrInfo.h" |
15 | | #include "PPCSubtarget.h" |
16 | | #include "llvm/CodeGen/ScheduleDAG.h" |
17 | | #include "llvm/Support/Debug.h" |
18 | | #include "llvm/Support/ErrorHandling.h" |
19 | | #include "llvm/Support/raw_ostream.h" |
20 | | using namespace llvm; |
21 | | |
22 | | #define DEBUG_TYPE "pre-RA-sched" |
23 | | |
24 | 0 | bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { |
25 | | // FIXME: Move this. |
26 | 0 | if (isBCTRAfterSet(SU)) |
27 | 0 | return true; |
28 | | |
29 | 0 | const MCInstrDesc *MCID = DAG->getInstrDesc(SU); |
30 | 0 | if (!MCID) |
31 | 0 | return false; |
32 | | |
33 | 0 | if (!MCID->mayLoad()) |
34 | 0 | return false; |
35 | | |
36 | | // SU is a load; for any predecessors in this dispatch group, that are stores, |
37 | | // and with which we have an ordering dependency, return true. |
38 | 0 | for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { |
39 | 0 | const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); |
40 | 0 | if (!PredMCID || !PredMCID->mayStore()) |
41 | 0 | continue; |
42 | | |
43 | 0 | if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) |
44 | 0 | continue; |
45 | | |
46 | 0 | for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) |
47 | 0 | if (SU->Preds[i].getSUnit() == CurGroup[j]) |
48 | 0 | return true; |
49 | 0 | } |
50 | | |
51 | 0 | return false; |
52 | 0 | } |
53 | | |
54 | 0 | bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { |
55 | 0 | const MCInstrDesc *MCID = DAG->getInstrDesc(SU); |
56 | 0 | if (!MCID) |
57 | 0 | return false; |
58 | | |
59 | 0 | if (!MCID->isBranch()) |
60 | 0 | return false; |
61 | | |
62 | | // SU is a branch; for any predecessors in this dispatch group, with which we |
63 | | // have a data dependence and set the counter register, return true. |
64 | 0 | for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { |
65 | 0 | const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); |
66 | 0 | if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) |
67 | 0 | continue; |
68 | | |
69 | 0 | if (SU->Preds[i].isCtrl()) |
70 | 0 | continue; |
71 | | |
72 | 0 | for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) |
73 | 0 | if (SU->Preds[i].getSUnit() == CurGroup[j]) |
74 | 0 | return true; |
75 | 0 | } |
76 | | |
77 | 0 | return false; |
78 | 0 | } |
79 | | |
80 | | // FIXME: Remove this when we don't need this: |
81 | | namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } |
82 | | |
83 | | // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. |
84 | | |
85 | | bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, |
86 | 0 | unsigned &NSlots) { |
87 | | // FIXME: Indirectly, this information is contained in the itinerary, and |
88 | | // we should derive it from there instead of separately specifying it |
89 | | // here. |
90 | 0 | unsigned IIC = MCID->getSchedClass(); |
91 | 0 | switch (IIC) { |
92 | 0 | default: |
93 | 0 | NSlots = 1; |
94 | 0 | break; |
95 | 0 | case PPC::Sched::IIC_IntDivW: |
96 | 0 | case PPC::Sched::IIC_IntDivD: |
97 | 0 | case PPC::Sched::IIC_LdStLoadUpd: |
98 | 0 | case PPC::Sched::IIC_LdStLDU: |
99 | 0 | case PPC::Sched::IIC_LdStLFDU: |
100 | 0 | case PPC::Sched::IIC_LdStLFDUX: |
101 | 0 | case PPC::Sched::IIC_LdStLHA: |
102 | 0 | case PPC::Sched::IIC_LdStLHAU: |
103 | 0 | case PPC::Sched::IIC_LdStLWA: |
104 | 0 | case PPC::Sched::IIC_LdStSTU: |
105 | 0 | case PPC::Sched::IIC_LdStSTFDU: |
106 | 0 | NSlots = 2; |
107 | 0 | break; |
108 | 0 | case PPC::Sched::IIC_LdStLoadUpdX: |
109 | 0 | case PPC::Sched::IIC_LdStLDUX: |
110 | 0 | case PPC::Sched::IIC_LdStLHAUX: |
111 | 0 | case PPC::Sched::IIC_LdStLWARX: |
112 | 0 | case PPC::Sched::IIC_LdStLDARX: |
113 | 0 | case PPC::Sched::IIC_LdStSTUX: |
114 | 0 | case PPC::Sched::IIC_LdStSTDCX: |
115 | 0 | case PPC::Sched::IIC_LdStSTWCX: |
116 | 0 | case PPC::Sched::IIC_BrMCRX: // mtcr |
117 | | // FIXME: Add sync/isync (here and in the itinerary). |
118 | 0 | NSlots = 4; |
119 | 0 | break; |
120 | 0 | } |
121 | | |
122 | | // FIXME: record-form instructions need a different itinerary class. |
123 | 0 | if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) |
124 | 0 | NSlots = 2; |
125 | |
|
126 | 0 | switch (IIC) { |
127 | 0 | default: |
128 | | // All multi-slot instructions must come first. |
129 | 0 | return NSlots > 1; |
130 | 0 | case PPC::Sched::IIC_BrCR: // cr logicals |
131 | 0 | case PPC::Sched::IIC_SprMFCR: |
132 | 0 | case PPC::Sched::IIC_SprMFCRF: |
133 | 0 | case PPC::Sched::IIC_SprMTSPR: |
134 | 0 | return true; |
135 | 0 | } |
136 | 0 | } |
137 | | |
138 | | ScheduleHazardRecognizer::HazardType |
139 | 0 | PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { |
140 | 0 | if (Stalls == 0 && isLoadAfterStore(SU)) |
141 | 0 | return NoopHazard; |
142 | | |
143 | 0 | return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); |
144 | 0 | } |
145 | | |
146 | 0 | bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { |
147 | 0 | const MCInstrDesc *MCID = DAG->getInstrDesc(SU); |
148 | 0 | unsigned NSlots; |
149 | 0 | if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) |
150 | 0 | return true; |
151 | | |
152 | 0 | return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); |
153 | 0 | } |
154 | | |
155 | 0 | unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { |
156 | | // We only need to fill out a maximum of 5 slots here: The 6th slot could |
157 | | // only be a second branch, and otherwise the next instruction will start a |
158 | | // new group. |
159 | 0 | if (isLoadAfterStore(SU) && CurSlots < 6) { |
160 | 0 | unsigned Directive = |
161 | 0 | DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective(); |
162 | | // If we're using a special group-terminating nop, then we need only one. |
163 | | // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready |
164 | 0 | if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || |
165 | 0 | Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9) |
166 | 0 | return 1; |
167 | | |
168 | 0 | return 5 - CurSlots; |
169 | 0 | } |
170 | | |
171 | 0 | return ScoreboardHazardRecognizer::PreEmitNoops(SU); |
172 | 0 | } |
173 | | |
174 | 0 | void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { |
175 | 0 | const MCInstrDesc *MCID = DAG->getInstrDesc(SU); |
176 | 0 | if (MCID) { |
177 | 0 | if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { |
178 | 0 | CurGroup.clear(); |
179 | 0 | CurSlots = CurBranches = 0; |
180 | 0 | } else { |
181 | 0 | LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: "); |
182 | 0 | LLVM_DEBUG(DAG->dumpNode(*SU)); |
183 | |
|
184 | 0 | unsigned NSlots; |
185 | 0 | bool MustBeFirst = mustComeFirst(MCID, NSlots); |
186 | | |
187 | | // If this instruction must come first, but does not, then it starts a |
188 | | // new group. |
189 | 0 | if (MustBeFirst && CurSlots) { |
190 | 0 | CurSlots = CurBranches = 0; |
191 | 0 | CurGroup.clear(); |
192 | 0 | } |
193 | |
|
194 | 0 | CurSlots += NSlots; |
195 | 0 | CurGroup.push_back(SU); |
196 | |
|
197 | 0 | if (MCID->isBranch()) |
198 | 0 | ++CurBranches; |
199 | 0 | } |
200 | 0 | } |
201 | |
|
202 | 0 | return ScoreboardHazardRecognizer::EmitInstruction(SU); |
203 | 0 | } |
204 | | |
205 | 0 | void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { |
206 | 0 | return ScoreboardHazardRecognizer::AdvanceCycle(); |
207 | 0 | } |
208 | | |
209 | 0 | void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { |
210 | 0 | llvm_unreachable("Bottom-up scheduling not supported"); |
211 | 0 | } |
212 | | |
213 | 0 | void PPCDispatchGroupSBHazardRecognizer::Reset() { |
214 | 0 | CurGroup.clear(); |
215 | 0 | CurSlots = CurBranches = 0; |
216 | 0 | return ScoreboardHazardRecognizer::Reset(); |
217 | 0 | } |
218 | | |
219 | 0 | void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { |
220 | 0 | unsigned Directive = |
221 | 0 | DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective(); |
222 | | // If the group has now filled all of its slots, or if we're using a special |
223 | | // group-terminating nop, the group is complete. |
224 | | // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready |
225 | 0 | if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || |
226 | 0 | Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 || |
227 | 0 | CurSlots == 6) { |
228 | 0 | CurGroup.clear(); |
229 | 0 | CurSlots = CurBranches = 0; |
230 | 0 | } else { |
231 | 0 | CurGroup.push_back(nullptr); |
232 | 0 | ++CurSlots; |
233 | 0 | } |
234 | 0 | } |
235 | | |
236 | | //===----------------------------------------------------------------------===// |
237 | | // PowerPC 970 Hazard Recognizer |
238 | | // |
239 | | // This models the dispatch group formation of the PPC970 processor. Dispatch |
240 | | // groups are bundles of up to five instructions that can contain various mixes |
241 | | // of instructions. The PPC970 can dispatch a peak of 4 non-branch and one |
242 | | // branch instruction per-cycle. |
243 | | // |
244 | | // There are a number of restrictions to dispatch group formation: some |
245 | | // instructions can only be issued in the first slot of a dispatch group, & some |
246 | | // instructions fill an entire dispatch group. Additionally, only branches can |
247 | | // issue in the 5th (last) slot. |
248 | | // |
249 | | // Finally, there are a number of "structural" hazards on the PPC970. These |
250 | | // conditions cause large performance penalties due to misprediction, recovery, |
251 | | // and replay logic that has to happen. These cases include setting a CTR and |
252 | | // branching through it in the same dispatch group, and storing to an address, |
253 | | // then loading from the same address within a dispatch group. To avoid these |
254 | | // conditions, we insert no-op instructions when appropriate. |
255 | | // |
256 | | // FIXME: This is missing some significant cases: |
257 | | // 1. Modeling of microcoded instructions. |
258 | | // 2. Handling of serialized operations. |
259 | | // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". |
260 | | // |
261 | | |
262 | | PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG) |
263 | 0 | : DAG(DAG) { |
264 | 0 | EndDispatchGroup(); |
265 | 0 | } |
266 | | |
267 | 0 | void PPCHazardRecognizer970::EndDispatchGroup() { |
268 | 0 | LLVM_DEBUG(errs() << "=== Start of dispatch group\n"); |
269 | 0 | NumIssued = 0; |
270 | | |
271 | | // Structural hazard info. |
272 | 0 | HasCTRSet = false; |
273 | 0 | NumStores = 0; |
274 | 0 | } |
275 | | |
276 | | |
277 | | PPCII::PPC970_Unit |
278 | | PPCHazardRecognizer970::GetInstrType(unsigned Opcode, |
279 | | bool &isFirst, bool &isSingle, |
280 | | bool &isCracked, |
281 | 0 | bool &isLoad, bool &isStore) { |
282 | 0 | const MCInstrDesc &MCID = DAG.TII->get(Opcode); |
283 | |
|
284 | 0 | isLoad = MCID.mayLoad(); |
285 | 0 | isStore = MCID.mayStore(); |
286 | |
|
287 | 0 | uint64_t TSFlags = MCID.TSFlags; |
288 | |
|
289 | 0 | isFirst = TSFlags & PPCII::PPC970_First; |
290 | 0 | isSingle = TSFlags & PPCII::PPC970_Single; |
291 | 0 | isCracked = TSFlags & PPCII::PPC970_Cracked; |
292 | 0 | return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); |
293 | 0 | } |
294 | | |
295 | | /// isLoadOfStoredAddress - If we have a load from the previously stored pointer |
296 | | /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. |
297 | | bool PPCHazardRecognizer970:: |
298 | | isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, |
299 | 0 | const Value *LoadValue) const { |
300 | 0 | for (unsigned i = 0, e = NumStores; i != e; ++i) { |
301 | | // Handle exact and commuted addresses. |
302 | 0 | if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) |
303 | 0 | return true; |
304 | | |
305 | | // Okay, we don't have an exact match, if this is an indexed offset, see if |
306 | | // we have overlap (which happens during fp->int conversion for example). |
307 | 0 | if (StoreValue[i] == LoadValue) { |
308 | | // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check |
309 | | // to see if the load and store actually overlap. |
310 | 0 | if (StoreOffset[i] < LoadOffset) { |
311 | 0 | if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; |
312 | 0 | } else { |
313 | 0 | if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; |
314 | 0 | } |
315 | 0 | } |
316 | 0 | } |
317 | 0 | return false; |
318 | 0 | } |
319 | | |
320 | | /// getHazardType - We return hazard for any non-branch instruction that would |
321 | | /// terminate the dispatch group. We turn NoopHazard for any |
322 | | /// instructions that wouldn't terminate the dispatch group that would cause a |
323 | | /// pipeline flush. |
324 | | ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: |
325 | 0 | getHazardType(SUnit *SU, int Stalls) { |
326 | 0 | assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); |
327 | | |
328 | 0 | MachineInstr *MI = SU->getInstr(); |
329 | |
|
330 | 0 | if (MI->isDebugInstr()) |
331 | 0 | return NoHazard; |
332 | | |
333 | 0 | unsigned Opcode = MI->getOpcode(); |
334 | 0 | bool isFirst, isSingle, isCracked, isLoad, isStore; |
335 | 0 | PPCII::PPC970_Unit InstrType = |
336 | 0 | GetInstrType(Opcode, isFirst, isSingle, isCracked, |
337 | 0 | isLoad, isStore); |
338 | 0 | if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; |
339 | | |
340 | | // We can only issue a PPC970_First/PPC970_Single instruction (such as |
341 | | // crand/mtspr/etc) if this is the first cycle of the dispatch group. |
342 | 0 | if (NumIssued != 0 && (isFirst || isSingle)) |
343 | 0 | return Hazard; |
344 | | |
345 | | // If this instruction is cracked into two ops by the decoder, we know that |
346 | | // it is not a branch and that it cannot issue if 3 other instructions are |
347 | | // already in the dispatch group. |
348 | 0 | if (isCracked && NumIssued > 2) |
349 | 0 | return Hazard; |
350 | | |
351 | 0 | switch (InstrType) { |
352 | 0 | default: llvm_unreachable("Unknown instruction type!"); |
353 | 0 | case PPCII::PPC970_FXU: |
354 | 0 | case PPCII::PPC970_LSU: |
355 | 0 | case PPCII::PPC970_FPU: |
356 | 0 | case PPCII::PPC970_VALU: |
357 | 0 | case PPCII::PPC970_VPERM: |
358 | | // We can only issue a branch as the last instruction in a group. |
359 | 0 | if (NumIssued == 4) return Hazard; |
360 | 0 | break; |
361 | 0 | case PPCII::PPC970_CRU: |
362 | | // We can only issue a CR instruction in the first two slots. |
363 | 0 | if (NumIssued >= 2) return Hazard; |
364 | 0 | break; |
365 | 0 | case PPCII::PPC970_BRU: |
366 | 0 | break; |
367 | 0 | } |
368 | | |
369 | | // Do not allow MTCTR and BCTRL to be in the same dispatch group. |
370 | 0 | if (HasCTRSet && Opcode == PPC::BCTRL) |
371 | 0 | return NoopHazard; |
372 | | |
373 | | // If this is a load following a store, make sure it's not to the same or |
374 | | // overlapping address. |
375 | 0 | if (isLoad && NumStores && !MI->memoperands_empty()) { |
376 | 0 | MachineMemOperand *MO = *MI->memoperands_begin(); |
377 | 0 | if (isLoadOfStoredAddress(MO->getSize(), |
378 | 0 | MO->getOffset(), MO->getValue())) |
379 | 0 | return NoopHazard; |
380 | 0 | } |
381 | | |
382 | 0 | return NoHazard; |
383 | 0 | } |
384 | | |
385 | 0 | void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { |
386 | 0 | MachineInstr *MI = SU->getInstr(); |
387 | |
|
388 | 0 | if (MI->isDebugInstr()) |
389 | 0 | return; |
390 | | |
391 | 0 | unsigned Opcode = MI->getOpcode(); |
392 | 0 | bool isFirst, isSingle, isCracked, isLoad, isStore; |
393 | 0 | PPCII::PPC970_Unit InstrType = |
394 | 0 | GetInstrType(Opcode, isFirst, isSingle, isCracked, |
395 | 0 | isLoad, isStore); |
396 | 0 | if (InstrType == PPCII::PPC970_Pseudo) return; |
397 | | |
398 | | // Update structural hazard information. |
399 | 0 | if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; |
400 | | |
401 | | // Track the address stored to. |
402 | 0 | if (isStore && NumStores < 4 && !MI->memoperands_empty()) { |
403 | 0 | MachineMemOperand *MO = *MI->memoperands_begin(); |
404 | 0 | StoreSize[NumStores] = MO->getSize(); |
405 | 0 | StoreOffset[NumStores] = MO->getOffset(); |
406 | 0 | StoreValue[NumStores] = MO->getValue(); |
407 | 0 | ++NumStores; |
408 | 0 | } |
409 | |
|
410 | 0 | if (InstrType == PPCII::PPC970_BRU || isSingle) |
411 | 0 | NumIssued = 4; // Terminate a d-group. |
412 | 0 | ++NumIssued; |
413 | | |
414 | | // If this instruction is cracked into two ops by the decoder, remember that |
415 | | // we issued two pieces. |
416 | 0 | if (isCracked) |
417 | 0 | ++NumIssued; |
418 | |
|
419 | 0 | if (NumIssued == 5) |
420 | 0 | EndDispatchGroup(); |
421 | 0 | } |
422 | | |
423 | 0 | void PPCHazardRecognizer970::AdvanceCycle() { |
424 | 0 | assert(NumIssued < 5 && "Illegal dispatch group!"); |
425 | 0 | ++NumIssued; |
426 | 0 | if (NumIssued == 5) |
427 | 0 | EndDispatchGroup(); |
428 | 0 | } |
429 | | |
430 | 0 | void PPCHazardRecognizer970::Reset() { |
431 | 0 | EndDispatchGroup(); |
432 | 0 | } |
433 | | |