/src/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// This pass performs exec mask handling peephole optimizations which needs |
11 | | /// to be done before register allocation to reduce register pressure. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "AMDGPU.h" |
16 | | #include "GCNSubtarget.h" |
17 | | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
18 | | #include "llvm/CodeGen/LiveIntervals.h" |
19 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
20 | | #include "llvm/InitializePasses.h" |
21 | | |
22 | | using namespace llvm; |
23 | | |
24 | | #define DEBUG_TYPE "si-optimize-exec-masking-pre-ra" |
25 | | |
26 | | namespace { |
27 | | |
28 | | class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { |
29 | | private: |
30 | | const SIRegisterInfo *TRI; |
31 | | const SIInstrInfo *TII; |
32 | | MachineRegisterInfo *MRI; |
33 | | LiveIntervals *LIS; |
34 | | |
35 | | unsigned AndOpc; |
36 | | unsigned Andn2Opc; |
37 | | unsigned OrSaveExecOpc; |
38 | | unsigned XorTermrOpc; |
39 | | MCRegister CondReg; |
40 | | MCRegister ExecReg; |
41 | | |
42 | | bool optimizeVcndVcmpPair(MachineBasicBlock &MBB); |
43 | | bool optimizeElseBranch(MachineBasicBlock &MBB); |
44 | | |
45 | | public: |
46 | | static char ID; |
47 | | |
48 | 0 | SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) { |
49 | 0 | initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry()); |
50 | 0 | } |
51 | | |
52 | | bool runOnMachineFunction(MachineFunction &MF) override; |
53 | | |
54 | 0 | StringRef getPassName() const override { |
55 | 0 | return "SI optimize exec mask operations pre-RA"; |
56 | 0 | } |
57 | | |
58 | 0 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
59 | 0 | AU.addRequired<LiveIntervals>(); |
60 | 0 | AU.setPreservesAll(); |
61 | 0 | MachineFunctionPass::getAnalysisUsage(AU); |
62 | 0 | } |
63 | | }; |
64 | | |
65 | | } // End anonymous namespace. |
66 | | |
67 | 62 | INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, |
68 | 62 | "SI optimize exec mask operations pre-RA", false, false) |
69 | 62 | INITIALIZE_PASS_DEPENDENCY(LiveIntervals) |
70 | 62 | INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, |
71 | | "SI optimize exec mask operations pre-RA", false, false) |
72 | | |
73 | | char SIOptimizeExecMaskingPreRA::ID = 0; |
74 | | |
75 | | char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID; |
76 | | |
77 | 0 | FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() { |
78 | 0 | return new SIOptimizeExecMaskingPreRA(); |
79 | 0 | } |
80 | | |
81 | | // See if there is a def between \p AndIdx and \p SelIdx that needs to live |
82 | | // beyond \p AndIdx. |
83 | | static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx, |
84 | 0 | SlotIndex SelIdx) { |
85 | 0 | LiveQueryResult AndLRQ = LR.Query(AndIdx); |
86 | 0 | return (!AndLRQ.isKill() && AndLRQ.valueIn() != LR.Query(SelIdx).valueOut()); |
87 | 0 | } |
88 | | |
89 | | // FIXME: Why do we bother trying to handle physical registers here? |
90 | | static bool isDefBetween(const SIRegisterInfo &TRI, |
91 | | LiveIntervals *LIS, Register Reg, |
92 | 0 | const MachineInstr &Sel, const MachineInstr &And) { |
93 | 0 | SlotIndex AndIdx = LIS->getInstructionIndex(And).getRegSlot(); |
94 | 0 | SlotIndex SelIdx = LIS->getInstructionIndex(Sel).getRegSlot(); |
95 | |
|
96 | 0 | if (Reg.isVirtual()) |
97 | 0 | return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx); |
98 | | |
99 | 0 | for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg())) { |
100 | 0 | if (isDefBetween(LIS->getRegUnit(Unit), AndIdx, SelIdx)) |
101 | 0 | return true; |
102 | 0 | } |
103 | | |
104 | 0 | return false; |
105 | 0 | } |
106 | | |
107 | | // Optimize sequence |
108 | | // %sel = V_CNDMASK_B32_e64 0, 1, %cc |
109 | | // %cmp = V_CMP_NE_U32 1, %sel |
110 | | // $vcc = S_AND_B64 $exec, %cmp |
111 | | // S_CBRANCH_VCC[N]Z |
112 | | // => |
113 | | // $vcc = S_ANDN2_B64 $exec, %cc |
114 | | // S_CBRANCH_VCC[N]Z |
115 | | // |
116 | | // It is the negation pattern inserted by DAGCombiner::visitBRCOND() in the |
117 | | // rebuildSetCC(). We start with S_CBRANCH to avoid exhaustive search, but |
118 | | // only 3 first instructions are really needed. S_AND_B64 with exec is a |
119 | | // required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive |
120 | | // lanes. |
121 | | // |
122 | | // Returns true on success. |
123 | 0 | bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { |
124 | 0 | auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) { |
125 | 0 | unsigned Opc = MI.getOpcode(); |
126 | 0 | return Opc == AMDGPU::S_CBRANCH_VCCZ || |
127 | 0 | Opc == AMDGPU::S_CBRANCH_VCCNZ; }); |
128 | 0 | if (I == MBB.terminators().end()) |
129 | 0 | return false; |
130 | | |
131 | 0 | auto *And = |
132 | 0 | TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS); |
133 | 0 | if (!And || And->getOpcode() != AndOpc || |
134 | 0 | !And->getOperand(1).isReg() || !And->getOperand(2).isReg()) |
135 | 0 | return false; |
136 | | |
137 | 0 | MachineOperand *AndCC = &And->getOperand(1); |
138 | 0 | Register CmpReg = AndCC->getReg(); |
139 | 0 | unsigned CmpSubReg = AndCC->getSubReg(); |
140 | 0 | if (CmpReg == Register(ExecReg)) { |
141 | 0 | AndCC = &And->getOperand(2); |
142 | 0 | CmpReg = AndCC->getReg(); |
143 | 0 | CmpSubReg = AndCC->getSubReg(); |
144 | 0 | } else if (And->getOperand(2).getReg() != Register(ExecReg)) { |
145 | 0 | return false; |
146 | 0 | } |
147 | | |
148 | 0 | auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS); |
149 | 0 | if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 || |
150 | 0 | Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) || |
151 | 0 | Cmp->getParent() != And->getParent()) |
152 | 0 | return false; |
153 | | |
154 | 0 | MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0); |
155 | 0 | MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1); |
156 | 0 | if (Op1->isImm() && Op2->isReg()) |
157 | 0 | std::swap(Op1, Op2); |
158 | 0 | if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1) |
159 | 0 | return false; |
160 | | |
161 | 0 | Register SelReg = Op1->getReg(); |
162 | 0 | if (SelReg.isPhysical()) |
163 | 0 | return false; |
164 | | |
165 | 0 | auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS); |
166 | 0 | if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) |
167 | 0 | return false; |
168 | | |
169 | 0 | if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) || |
170 | 0 | TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers)) |
171 | 0 | return false; |
172 | | |
173 | 0 | Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0); |
174 | 0 | Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1); |
175 | 0 | MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2); |
176 | 0 | if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() || |
177 | 0 | Op1->getImm() != 0 || Op2->getImm() != 1) |
178 | 0 | return false; |
179 | | |
180 | 0 | Register CCReg = CC->getReg(); |
181 | | |
182 | | // If there was a def between the select and the and, we would need to move it |
183 | | // to fold this. |
184 | 0 | if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And)) |
185 | 0 | return false; |
186 | | |
187 | | // Cannot safely mirror live intervals with PHI nodes, so check for these |
188 | | // before optimization. |
189 | 0 | SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); |
190 | 0 | LiveInterval *SelLI = &LIS->getInterval(SelReg); |
191 | 0 | if (llvm::any_of(SelLI->vnis(), |
192 | 0 | [](const VNInfo *VNI) { |
193 | 0 | return VNI->isPHIDef(); |
194 | 0 | })) |
195 | 0 | return false; |
196 | | |
197 | | // TODO: Guard against implicit def operands? |
198 | 0 | LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' |
199 | 0 | << *And); |
200 | |
|
201 | 0 | MachineInstr *Andn2 = |
202 | 0 | BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), |
203 | 0 | And->getOperand(0).getReg()) |
204 | 0 | .addReg(ExecReg) |
205 | 0 | .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg()); |
206 | 0 | MachineOperand &AndSCC = And->getOperand(3); |
207 | 0 | assert(AndSCC.getReg() == AMDGPU::SCC); |
208 | 0 | MachineOperand &Andn2SCC = Andn2->getOperand(3); |
209 | 0 | assert(Andn2SCC.getReg() == AMDGPU::SCC); |
210 | 0 | Andn2SCC.setIsDead(AndSCC.isDead()); |
211 | |
|
212 | 0 | SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*And, *Andn2); |
213 | 0 | And->eraseFromParent(); |
214 | |
|
215 | 0 | LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n'); |
216 | | |
217 | | // Update live intervals for CCReg before potentially removing CmpReg/SelReg, |
218 | | // and their associated liveness information. |
219 | 0 | SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); |
220 | 0 | if (CCReg.isVirtual()) { |
221 | 0 | LiveInterval &CCLI = LIS->getInterval(CCReg); |
222 | 0 | auto CCQ = CCLI.Query(SelIdx.getRegSlot()); |
223 | 0 | if (CCQ.valueIn()) { |
224 | 0 | LIS->removeInterval(CCReg); |
225 | 0 | LIS->createAndComputeVirtRegInterval(CCReg); |
226 | 0 | } |
227 | 0 | } else |
228 | 0 | LIS->removeAllRegUnitsForPhysReg(CCReg); |
229 | | |
230 | | // Try to remove compare. Cmp value should not used in between of cmp |
231 | | // and s_and_b64 if VCC or just unused if any other register. |
232 | 0 | LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; |
233 | 0 | if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || |
234 | 0 | (CmpReg == Register(CondReg) && |
235 | 0 | std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), |
236 | 0 | [&](const MachineInstr &MI) { |
237 | 0 | return MI.readsRegister(CondReg, TRI); |
238 | 0 | }))) { |
239 | 0 | LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n'); |
240 | 0 | if (CmpLI) |
241 | 0 | LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot()); |
242 | 0 | LIS->RemoveMachineInstrFromMaps(*Cmp); |
243 | 0 | Cmp->eraseFromParent(); |
244 | | |
245 | | // Try to remove v_cndmask_b32. |
246 | | // Kill status must be checked before shrinking the live range. |
247 | 0 | bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); |
248 | 0 | LIS->shrinkToUses(SelLI); |
249 | 0 | bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); |
250 | 0 | if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { |
251 | 0 | LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); |
252 | |
|
253 | 0 | LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); |
254 | 0 | LIS->RemoveMachineInstrFromMaps(*Sel); |
255 | 0 | bool ShrinkSel = Sel->getOperand(0).readsReg(); |
256 | 0 | Sel->eraseFromParent(); |
257 | 0 | if (ShrinkSel) { |
258 | | // The result of the V_CNDMASK was a subreg def which counted as a read |
259 | | // from the other parts of the reg. Shrink their live ranges. |
260 | 0 | LIS->shrinkToUses(SelLI); |
261 | 0 | } |
262 | 0 | } |
263 | 0 | } |
264 | |
|
265 | 0 | return true; |
266 | 0 | } |
267 | | |
268 | | // Optimize sequence |
269 | | // %dst = S_OR_SAVEEXEC %src |
270 | | // ... instructions not modifying exec ... |
271 | | // %tmp = S_AND $exec, %dst |
272 | | // $exec = S_XOR_term $exec, %tmp |
273 | | // => |
274 | | // %dst = S_OR_SAVEEXEC %src |
275 | | // ... instructions not modifying exec ... |
276 | | // $exec = S_XOR_term $exec, %dst |
277 | | // |
278 | | // Clean up potentially unnecessary code added for safety during |
279 | | // control flow lowering. |
280 | | // |
281 | | // Return whether any changes were made to MBB. |
282 | 0 | bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) { |
283 | 0 | if (MBB.empty()) |
284 | 0 | return false; |
285 | | |
286 | | // Check this is an else block. |
287 | 0 | auto First = MBB.begin(); |
288 | 0 | MachineInstr &SaveExecMI = *First; |
289 | 0 | if (SaveExecMI.getOpcode() != OrSaveExecOpc) |
290 | 0 | return false; |
291 | | |
292 | 0 | auto I = llvm::find_if(MBB.terminators(), [this](const MachineInstr &MI) { |
293 | 0 | return MI.getOpcode() == XorTermrOpc; |
294 | 0 | }); |
295 | 0 | if (I == MBB.terminators().end()) |
296 | 0 | return false; |
297 | | |
298 | 0 | MachineInstr &XorTermMI = *I; |
299 | 0 | if (XorTermMI.getOperand(1).getReg() != Register(ExecReg)) |
300 | 0 | return false; |
301 | | |
302 | 0 | Register SavedExecReg = SaveExecMI.getOperand(0).getReg(); |
303 | 0 | Register DstReg = XorTermMI.getOperand(2).getReg(); |
304 | | |
305 | | // Find potentially unnecessary S_AND |
306 | 0 | MachineInstr *AndExecMI = nullptr; |
307 | 0 | I--; |
308 | 0 | while (I != First && !AndExecMI) { |
309 | 0 | if (I->getOpcode() == AndOpc && I->getOperand(0).getReg() == DstReg && |
310 | 0 | I->getOperand(1).getReg() == Register(ExecReg)) |
311 | 0 | AndExecMI = &*I; |
312 | 0 | I--; |
313 | 0 | } |
314 | 0 | if (!AndExecMI) |
315 | 0 | return false; |
316 | | |
317 | | // Check for exec modifying instructions. |
318 | | // Note: exec defs do not create live ranges beyond the |
319 | | // instruction so isDefBetween cannot be used. |
320 | | // Instead just check that the def segments are adjacent. |
321 | 0 | SlotIndex StartIdx = LIS->getInstructionIndex(SaveExecMI); |
322 | 0 | SlotIndex EndIdx = LIS->getInstructionIndex(*AndExecMI); |
323 | 0 | for (MCRegUnit Unit : TRI->regunits(ExecReg)) { |
324 | 0 | LiveRange &RegUnit = LIS->getRegUnit(Unit); |
325 | 0 | if (RegUnit.find(StartIdx) != std::prev(RegUnit.find(EndIdx))) |
326 | 0 | return false; |
327 | 0 | } |
328 | | |
329 | | // Remove unnecessary S_AND |
330 | 0 | LIS->removeInterval(SavedExecReg); |
331 | 0 | LIS->removeInterval(DstReg); |
332 | |
|
333 | 0 | SaveExecMI.getOperand(0).setReg(DstReg); |
334 | |
|
335 | 0 | LIS->RemoveMachineInstrFromMaps(*AndExecMI); |
336 | 0 | AndExecMI->eraseFromParent(); |
337 | |
|
338 | 0 | LIS->createAndComputeVirtRegInterval(DstReg); |
339 | |
|
340 | 0 | return true; |
341 | 0 | } |
342 | | |
343 | 0 | bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { |
344 | 0 | if (skipFunction(MF.getFunction())) |
345 | 0 | return false; |
346 | | |
347 | 0 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
348 | 0 | TRI = ST.getRegisterInfo(); |
349 | 0 | TII = ST.getInstrInfo(); |
350 | 0 | MRI = &MF.getRegInfo(); |
351 | 0 | LIS = &getAnalysis<LiveIntervals>(); |
352 | |
|
353 | 0 | const bool Wave32 = ST.isWave32(); |
354 | 0 | AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; |
355 | 0 | Andn2Opc = Wave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64; |
356 | 0 | OrSaveExecOpc = |
357 | 0 | Wave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; |
358 | 0 | XorTermrOpc = Wave32 ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term; |
359 | 0 | CondReg = MCRegister::from(Wave32 ? AMDGPU::VCC_LO : AMDGPU::VCC); |
360 | 0 | ExecReg = MCRegister::from(Wave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC); |
361 | |
|
362 | 0 | DenseSet<Register> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI}); |
363 | 0 | bool Changed = false; |
364 | |
|
365 | 0 | for (MachineBasicBlock &MBB : MF) { |
366 | |
|
367 | 0 | if (optimizeElseBranch(MBB)) { |
368 | 0 | RecalcRegs.insert(AMDGPU::SCC); |
369 | 0 | Changed = true; |
370 | 0 | } |
371 | |
|
372 | 0 | if (optimizeVcndVcmpPair(MBB)) { |
373 | 0 | RecalcRegs.insert(AMDGPU::VCC_LO); |
374 | 0 | RecalcRegs.insert(AMDGPU::VCC_HI); |
375 | 0 | RecalcRegs.insert(AMDGPU::SCC); |
376 | 0 | Changed = true; |
377 | 0 | } |
378 | | |
379 | | // Try to remove unneeded instructions before s_endpgm. |
380 | 0 | if (MBB.succ_empty()) { |
381 | 0 | if (MBB.empty()) |
382 | 0 | continue; |
383 | | |
384 | | // Skip this if the endpgm has any implicit uses, otherwise we would need |
385 | | // to be careful to update / remove them. |
386 | | // S_ENDPGM always has a single imm operand that is not used other than to |
387 | | // end up in the encoding |
388 | 0 | MachineInstr &Term = MBB.back(); |
389 | 0 | if (Term.getOpcode() != AMDGPU::S_ENDPGM || Term.getNumOperands() != 1) |
390 | 0 | continue; |
391 | | |
392 | 0 | SmallVector<MachineBasicBlock*, 4> Blocks({&MBB}); |
393 | |
|
394 | 0 | while (!Blocks.empty()) { |
395 | 0 | auto CurBB = Blocks.pop_back_val(); |
396 | 0 | auto I = CurBB->rbegin(), E = CurBB->rend(); |
397 | 0 | if (I != E) { |
398 | 0 | if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM) |
399 | 0 | ++I; |
400 | 0 | else if (I->isBranch()) |
401 | 0 | continue; |
402 | 0 | } |
403 | | |
404 | 0 | while (I != E) { |
405 | 0 | if (I->isDebugInstr()) { |
406 | 0 | I = std::next(I); |
407 | 0 | continue; |
408 | 0 | } |
409 | | |
410 | 0 | if (I->mayStore() || I->isBarrier() || I->isCall() || |
411 | 0 | I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef()) |
412 | 0 | break; |
413 | | |
414 | 0 | LLVM_DEBUG(dbgs() |
415 | 0 | << "Removing no effect instruction: " << *I << '\n'); |
416 | |
|
417 | 0 | for (auto &Op : I->operands()) { |
418 | 0 | if (Op.isReg()) |
419 | 0 | RecalcRegs.insert(Op.getReg()); |
420 | 0 | } |
421 | |
|
422 | 0 | auto Next = std::next(I); |
423 | 0 | LIS->RemoveMachineInstrFromMaps(*I); |
424 | 0 | I->eraseFromParent(); |
425 | 0 | I = Next; |
426 | |
|
427 | 0 | Changed = true; |
428 | 0 | } |
429 | |
|
430 | 0 | if (I != E) |
431 | 0 | continue; |
432 | | |
433 | | // Try to ascend predecessors. |
434 | 0 | for (auto *Pred : CurBB->predecessors()) { |
435 | 0 | if (Pred->succ_size() == 1) |
436 | 0 | Blocks.push_back(Pred); |
437 | 0 | } |
438 | 0 | } |
439 | 0 | continue; |
440 | 0 | } |
441 | | |
442 | | // If the only user of a logical operation is move to exec, fold it now |
443 | | // to prevent forming of saveexec. I.e.: |
444 | | // |
445 | | // %0:sreg_64 = COPY $exec |
446 | | // %1:sreg_64 = S_AND_B64 %0:sreg_64, %2:sreg_64 |
447 | | // => |
448 | | // %1 = S_AND_B64 $exec, %2:sreg_64 |
449 | 0 | unsigned ScanThreshold = 10; |
450 | 0 | for (auto I = MBB.rbegin(), E = MBB.rend(); I != E |
451 | 0 | && ScanThreshold--; ++I) { |
452 | | // Continue scanning if this is not a full exec copy |
453 | 0 | if (!(I->isFullCopy() && I->getOperand(1).getReg() == Register(ExecReg))) |
454 | 0 | continue; |
455 | | |
456 | 0 | Register SavedExec = I->getOperand(0).getReg(); |
457 | 0 | if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec)) { |
458 | 0 | MachineInstr *SingleExecUser = &*MRI->use_instr_nodbg_begin(SavedExec); |
459 | 0 | int Idx = SingleExecUser->findRegisterUseOperandIdx(SavedExec); |
460 | 0 | assert(Idx != -1); |
461 | 0 | if (SingleExecUser->getParent() == I->getParent() && |
462 | 0 | !SingleExecUser->getOperand(Idx).isImplicit() && |
463 | 0 | TII->isOperandLegal(*SingleExecUser, Idx, &I->getOperand(1))) { |
464 | 0 | LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n'); |
465 | 0 | LIS->RemoveMachineInstrFromMaps(*I); |
466 | 0 | I->eraseFromParent(); |
467 | 0 | MRI->replaceRegWith(SavedExec, ExecReg); |
468 | 0 | LIS->removeInterval(SavedExec); |
469 | 0 | Changed = true; |
470 | 0 | } |
471 | 0 | } |
472 | 0 | break; |
473 | 0 | } |
474 | 0 | } |
475 | |
|
476 | 0 | if (Changed) { |
477 | 0 | for (auto Reg : RecalcRegs) { |
478 | 0 | if (Reg.isVirtual()) { |
479 | 0 | LIS->removeInterval(Reg); |
480 | 0 | if (!MRI->reg_empty(Reg)) |
481 | 0 | LIS->createAndComputeVirtRegInterval(Reg); |
482 | 0 | } else { |
483 | 0 | LIS->removeAllRegUnitsForPhysReg(Reg); |
484 | 0 | } |
485 | 0 | } |
486 | 0 | } |
487 | |
|
488 | 0 | return Changed; |
489 | 0 | } |