/src/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- SIPostRABundler.cpp -----------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// This pass creates bundles of memory instructions to protect adjacent loads |
11 | | /// and stores from being rescheduled apart from each other post-RA. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "AMDGPU.h" |
16 | | #include "GCNSubtarget.h" |
17 | | #include "llvm/ADT/SmallSet.h" |
18 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
19 | | |
20 | | using namespace llvm; |
21 | | |
22 | | #define DEBUG_TYPE "si-post-ra-bundler" |
23 | | |
24 | | namespace { |
25 | | |
26 | | class SIPostRABundler : public MachineFunctionPass { |
27 | | public: |
28 | | static char ID; |
29 | | |
30 | | public: |
31 | 0 | SIPostRABundler() : MachineFunctionPass(ID) { |
32 | 0 | initializeSIPostRABundlerPass(*PassRegistry::getPassRegistry()); |
33 | 0 | } |
34 | | |
35 | | bool runOnMachineFunction(MachineFunction &MF) override; |
36 | | |
37 | 0 | StringRef getPassName() const override { |
38 | 0 | return "SI post-RA bundler"; |
39 | 0 | } |
40 | | |
41 | 0 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
42 | 0 | AU.setPreservesAll(); |
43 | 0 | MachineFunctionPass::getAnalysisUsage(AU); |
44 | 0 | } |
45 | | |
46 | | private: |
47 | | const SIRegisterInfo *TRI; |
48 | | |
49 | | SmallSet<Register, 16> Defs; |
50 | | |
51 | | void collectUsedRegUnits(const MachineInstr &MI, |
52 | | BitVector &UsedRegUnits) const; |
53 | | |
54 | | bool isBundleCandidate(const MachineInstr &MI) const; |
55 | | bool isDependentLoad(const MachineInstr &MI) const; |
56 | | bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const; |
57 | | }; |
58 | | |
59 | | constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF | |
60 | | SIInstrFlags::SMRD | SIInstrFlags::DS | |
61 | | SIInstrFlags::FLAT | SIInstrFlags::MIMG; |
62 | | |
63 | | } // End anonymous namespace. |
64 | | |
65 | | INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false) |
66 | | |
67 | | char SIPostRABundler::ID = 0; |
68 | | |
69 | | char &llvm::SIPostRABundlerID = SIPostRABundler::ID; |
70 | | |
71 | 0 | FunctionPass *llvm::createSIPostRABundlerPass() { |
72 | 0 | return new SIPostRABundler(); |
73 | 0 | } |
74 | | |
75 | 0 | bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const { |
76 | 0 | if (!MI.mayLoad()) |
77 | 0 | return false; |
78 | | |
79 | 0 | for (const MachineOperand &Op : MI.explicit_operands()) { |
80 | 0 | if (!Op.isReg()) |
81 | 0 | continue; |
82 | 0 | Register Reg = Op.getReg(); |
83 | 0 | for (Register Def : Defs) |
84 | 0 | if (TRI->regsOverlap(Reg, Def)) |
85 | 0 | return true; |
86 | 0 | } |
87 | | |
88 | 0 | return false; |
89 | 0 | } |
90 | | |
91 | | void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI, |
92 | 0 | BitVector &UsedRegUnits) const { |
93 | 0 | if (MI.isDebugInstr()) |
94 | 0 | return; |
95 | | |
96 | 0 | for (const MachineOperand &Op : MI.operands()) { |
97 | 0 | if (!Op.isReg() || !Op.readsReg()) |
98 | 0 | continue; |
99 | | |
100 | 0 | Register Reg = Op.getReg(); |
101 | 0 | assert(!Op.getSubReg() && |
102 | 0 | "subregister indexes should not be present after RA"); |
103 | | |
104 | 0 | for (MCRegUnit Unit : TRI->regunits(Reg)) |
105 | 0 | UsedRegUnits.set(Unit); |
106 | 0 | } |
107 | 0 | } |
108 | | |
109 | 0 | bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const { |
110 | 0 | const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; |
111 | 0 | return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled(); |
112 | 0 | } |
113 | | |
114 | | bool SIPostRABundler::canBundle(const MachineInstr &MI, |
115 | 0 | const MachineInstr &NextMI) const { |
116 | 0 | const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags; |
117 | |
|
118 | 0 | return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() && |
119 | 0 | NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() && |
120 | 0 | ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) && |
121 | 0 | !isDependentLoad(NextMI)); |
122 | 0 | } |
123 | | |
124 | 0 | bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) { |
125 | 0 | if (skipFunction(MF.getFunction())) |
126 | 0 | return false; |
127 | | |
128 | 0 | TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); |
129 | 0 | BitVector BundleUsedRegUnits(TRI->getNumRegUnits()); |
130 | 0 | BitVector KillUsedRegUnits(TRI->getNumRegUnits()); |
131 | |
|
132 | 0 | bool Changed = false; |
133 | 0 | for (MachineBasicBlock &MBB : MF) { |
134 | 0 | bool HasIGLPInstrs = llvm::any_of(MBB.instrs(), [](MachineInstr &MI) { |
135 | 0 | unsigned Opc = MI.getOpcode(); |
136 | 0 | return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; |
137 | 0 | }); |
138 | | |
139 | | // Don't cluster with IGLP instructions. |
140 | 0 | if (HasIGLPInstrs) |
141 | 0 | continue; |
142 | | |
143 | 0 | MachineBasicBlock::instr_iterator Next; |
144 | 0 | MachineBasicBlock::instr_iterator B = MBB.instr_begin(); |
145 | 0 | MachineBasicBlock::instr_iterator E = MBB.instr_end(); |
146 | |
|
147 | 0 | for (auto I = B; I != E; I = Next) { |
148 | 0 | Next = std::next(I); |
149 | 0 | if (!isBundleCandidate(*I)) |
150 | 0 | continue; |
151 | | |
152 | 0 | assert(Defs.empty()); |
153 | | |
154 | 0 | if (I->getNumExplicitDefs() != 0) |
155 | 0 | Defs.insert(I->defs().begin()->getReg()); |
156 | |
|
157 | 0 | MachineBasicBlock::instr_iterator BundleStart = I; |
158 | 0 | MachineBasicBlock::instr_iterator BundleEnd = I; |
159 | 0 | unsigned ClauseLength = 1; |
160 | 0 | for (I = Next; I != E; I = Next) { |
161 | 0 | Next = std::next(I); |
162 | |
|
163 | 0 | assert(BundleEnd != I); |
164 | 0 | if (canBundle(*BundleEnd, *I)) { |
165 | 0 | BundleEnd = I; |
166 | 0 | if (I->getNumExplicitDefs() != 0) |
167 | 0 | Defs.insert(I->defs().begin()->getReg()); |
168 | 0 | ++ClauseLength; |
169 | 0 | } else if (!I->isMetaInstruction()) { |
170 | | // Allow meta instructions in between bundle candidates, but do not |
171 | | // start or end a bundle on one. |
172 | | // |
173 | | // TODO: It may be better to move meta instructions like dbg_value |
174 | | // after the bundle. We're relying on the memory legalizer to unbundle |
175 | | // these. |
176 | 0 | break; |
177 | 0 | } |
178 | 0 | } |
179 | |
|
180 | 0 | Next = std::next(BundleEnd); |
181 | 0 | if (ClauseLength > 1) { |
182 | 0 | Changed = true; |
183 | | |
184 | | // Before register allocation, kills are inserted after potential soft |
185 | | // clauses to hint register allocation. Look for kills that look like |
186 | | // this, and erase them. |
187 | 0 | if (Next != E && Next->isKill()) { |
188 | | |
189 | | // TODO: Should maybe back-propagate kill flags to the bundle. |
190 | 0 | for (const MachineInstr &BundleMI : make_range(BundleStart, Next)) |
191 | 0 | collectUsedRegUnits(BundleMI, BundleUsedRegUnits); |
192 | |
|
193 | 0 | BundleUsedRegUnits.flip(); |
194 | |
|
195 | 0 | while (Next != E && Next->isKill()) { |
196 | 0 | MachineInstr &Kill = *Next; |
197 | 0 | collectUsedRegUnits(Kill, KillUsedRegUnits); |
198 | |
|
199 | 0 | KillUsedRegUnits &= BundleUsedRegUnits; |
200 | | |
201 | | // Erase the kill if it's a subset of the used registers. |
202 | | // |
203 | | // TODO: Should we just remove all kills? Is there any real reason to |
204 | | // keep them after RA? |
205 | 0 | if (KillUsedRegUnits.none()) { |
206 | 0 | ++Next; |
207 | 0 | Kill.eraseFromParent(); |
208 | 0 | } else |
209 | 0 | break; |
210 | | |
211 | 0 | KillUsedRegUnits.reset(); |
212 | 0 | } |
213 | |
|
214 | 0 | BundleUsedRegUnits.reset(); |
215 | 0 | } |
216 | |
|
217 | 0 | finalizeBundle(MBB, BundleStart, Next); |
218 | 0 | } |
219 | |
|
220 | 0 | Defs.clear(); |
221 | 0 | } |
222 | 0 | } |
223 | |
|
224 | 0 | return Changed; |
225 | 0 | } |