/src/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "ARM.h" |
10 | | #include "ARMMachineFunctionInfo.h" |
11 | | #include "ARMSubtarget.h" |
12 | | #include "MCTargetDesc/ARMBaseInfo.h" |
13 | | #include "Thumb2InstrInfo.h" |
14 | | #include "llvm/ADT/SmallVector.h" |
15 | | #include "llvm/ADT/Statistic.h" |
16 | | #include "llvm/ADT/StringRef.h" |
17 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | | #include "llvm/CodeGen/MachineFunction.h" |
19 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
20 | | #include "llvm/CodeGen/MachineInstr.h" |
21 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | | #include "llvm/CodeGen/MachineInstrBundle.h" |
23 | | #include "llvm/CodeGen/MachineOperand.h" |
24 | | #include "llvm/IR/DebugLoc.h" |
25 | | #include "llvm/MC/MCInstrDesc.h" |
26 | | #include "llvm/MC/MCRegisterInfo.h" |
27 | | #include "llvm/Support/Debug.h" |
28 | | #include <cassert> |
29 | | #include <new> |
30 | | |
31 | | using namespace llvm; |
32 | | |
33 | | #define DEBUG_TYPE "arm-mve-vpt" |
34 | | |
35 | | namespace { |
36 | | class MVEVPTBlock : public MachineFunctionPass { |
37 | | public: |
38 | | static char ID; |
39 | | const Thumb2InstrInfo *TII; |
40 | | const TargetRegisterInfo *TRI; |
41 | | |
42 | 2.47k | MVEVPTBlock() : MachineFunctionPass(ID) {} |
43 | | |
44 | | bool runOnMachineFunction(MachineFunction &Fn) override; |
45 | | |
46 | 2.47k | MachineFunctionProperties getRequiredProperties() const override { |
47 | 2.47k | return MachineFunctionProperties().set( |
48 | 2.47k | MachineFunctionProperties::Property::NoVRegs); |
49 | 2.47k | } |
50 | | |
51 | 2.47k | StringRef getPassName() const override { |
52 | 2.47k | return "MVE VPT block insertion pass"; |
53 | 2.47k | } |
54 | | |
55 | | private: |
56 | | bool InsertVPTBlocks(MachineBasicBlock &MBB); |
57 | | }; |
58 | | |
59 | | char MVEVPTBlock::ID = 0; |
60 | | |
61 | | } // end anonymous namespace |
62 | | |
63 | | INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) |
64 | | |
65 | | static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, |
66 | | const TargetRegisterInfo *TRI, |
67 | 0 | unsigned &NewOpcode) { |
68 | | // Search backwards to the instruction that defines VPR. This may or not |
69 | | // be a VCMP, we check that after this loop. If we find another instruction |
70 | | // that reads cpsr, we return nullptr. |
71 | 0 | MachineBasicBlock::iterator CmpMI = MI; |
72 | 0 | while (CmpMI != MI->getParent()->begin()) { |
73 | 0 | --CmpMI; |
74 | 0 | if (CmpMI->modifiesRegister(ARM::VPR, TRI)) |
75 | 0 | break; |
76 | 0 | if (CmpMI->readsRegister(ARM::VPR, TRI)) |
77 | 0 | break; |
78 | 0 | } |
79 | |
|
80 | 0 | if (CmpMI == MI) |
81 | 0 | return nullptr; |
82 | 0 | NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); |
83 | 0 | if (NewOpcode == 0) |
84 | 0 | return nullptr; |
85 | | |
86 | | // Search forward from CmpMI to MI, checking if either register was def'd |
87 | 0 | if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), |
88 | 0 | MI, TRI)) |
89 | 0 | return nullptr; |
90 | 0 | if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), |
91 | 0 | MI, TRI)) |
92 | 0 | return nullptr; |
93 | 0 | return &*CmpMI; |
94 | 0 | } |
95 | | |
96 | | // Advances Iter past a block of predicated instructions. |
97 | | // Returns true if it successfully skipped the whole block of predicated |
98 | | // instructions. Returns false when it stopped early (due to MaxSteps), or if |
99 | | // Iter didn't point to a predicated instruction. |
100 | | static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, |
101 | | MachineBasicBlock::instr_iterator EndIter, |
102 | | unsigned MaxSteps, |
103 | 0 | unsigned &NumInstrsSteppedOver) { |
104 | 0 | ARMVCC::VPTCodes NextPred = ARMVCC::None; |
105 | 0 | Register PredReg; |
106 | 0 | NumInstrsSteppedOver = 0; |
107 | |
|
108 | 0 | while (Iter != EndIter) { |
109 | 0 | if (Iter->isDebugInstr()) { |
110 | | // Skip debug instructions |
111 | 0 | ++Iter; |
112 | 0 | continue; |
113 | 0 | } |
114 | | |
115 | 0 | NextPred = getVPTInstrPredicate(*Iter, PredReg); |
116 | 0 | assert(NextPred != ARMVCC::Else && |
117 | 0 | "VPT block pass does not expect Else preds"); |
118 | 0 | if (NextPred == ARMVCC::None || MaxSteps == 0) |
119 | 0 | break; |
120 | 0 | --MaxSteps; |
121 | 0 | ++Iter; |
122 | 0 | ++NumInstrsSteppedOver; |
123 | 0 | }; |
124 | |
|
125 | 0 | return NumInstrsSteppedOver != 0 && |
126 | 0 | (NextPred == ARMVCC::None || Iter == EndIter); |
127 | 0 | } |
128 | | |
129 | | // Returns true if at least one instruction in the range [Iter, End) defines |
130 | | // or kills VPR. |
131 | | static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, |
132 | 0 | MachineBasicBlock::iterator End) { |
133 | 0 | for (; Iter != End; ++Iter) |
134 | 0 | if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR)) |
135 | 0 | return true; |
136 | 0 | return false; |
137 | 0 | } |
138 | | |
139 | | // Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize. |
140 | 0 | static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) { |
141 | 0 | switch (BlockSize) { |
142 | 0 | case 1: |
143 | 0 | return ARM::PredBlockMask::T; |
144 | 0 | case 2: |
145 | 0 | return ARM::PredBlockMask::TT; |
146 | 0 | case 3: |
147 | 0 | return ARM::PredBlockMask::TTT; |
148 | 0 | case 4: |
149 | 0 | return ARM::PredBlockMask::TTTT; |
150 | 0 | default: |
151 | 0 | llvm_unreachable("Invalid BlockSize!"); |
152 | 0 | } |
153 | 0 | } |
154 | | |
155 | | // Given an iterator (Iter) that points at an instruction with a "Then" |
156 | | // predicate, tries to create the largest block of continuous predicated |
157 | | // instructions possible, and returns the VPT Block Mask of that block. |
158 | | // |
159 | | // This will try to perform some minor optimization in order to maximize the |
160 | | // size of the block. |
161 | | static ARM::PredBlockMask |
162 | | CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, |
163 | | MachineBasicBlock::instr_iterator EndIter, |
164 | 0 | SmallVectorImpl<MachineInstr *> &DeadInstructions) { |
165 | 0 | MachineBasicBlock::instr_iterator BlockBeg = Iter; |
166 | 0 | (void)BlockBeg; |
167 | 0 | assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && |
168 | 0 | "Expected a Predicated Instruction"); |
169 | | |
170 | 0 | LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump()); |
171 | |
|
172 | 0 | unsigned BlockSize; |
173 | 0 | StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize); |
174 | |
|
175 | 0 | LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = |
176 | 0 | std::next(BlockBeg); |
177 | 0 | AddedInstIter != Iter; ++AddedInstIter) { |
178 | 0 | if (AddedInstIter->isDebugInstr()) |
179 | 0 | continue; |
180 | 0 | dbgs() << " adding: "; |
181 | 0 | AddedInstIter->dump(); |
182 | 0 | }); |
183 | | |
184 | | // Generate the initial BlockMask |
185 | 0 | ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize); |
186 | | |
187 | | // Remove VPNOTs while there's still room in the block, so we can make the |
188 | | // largest block possible. |
189 | 0 | ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; |
190 | 0 | while (BlockSize < 4 && Iter != EndIter && |
191 | 0 | Iter->getOpcode() == ARM::MVE_VPNOT) { |
192 | | |
193 | | // Try to skip all of the predicated instructions after the VPNOT, stopping |
194 | | // after (4 - BlockSize). If we can't skip them all, stop. |
195 | 0 | unsigned ElseInstCnt = 0; |
196 | 0 | MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter); |
197 | 0 | if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize), |
198 | 0 | ElseInstCnt)) |
199 | 0 | break; |
200 | | |
201 | | // Check if this VPNOT can be removed or not: It can only be removed if at |
202 | | // least one of the predicated instruction that follows it kills or sets |
203 | | // VPR. |
204 | 0 | if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) |
205 | 0 | break; |
206 | | |
207 | 0 | LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump()); |
208 | | |
209 | | // Record the new size of the block |
210 | 0 | BlockSize += ElseInstCnt; |
211 | 0 | assert(BlockSize <= 4 && "Block is too large!"); |
212 | | |
213 | | // Record the VPNot to remove it later. |
214 | 0 | DeadInstructions.push_back(&*Iter); |
215 | 0 | ++Iter; |
216 | | |
217 | | // Replace the predicates of the instructions we're adding. |
218 | | // Note that we are using "Iter" to iterate over the block so we can update |
219 | | // it at the same time. |
220 | 0 | for (; Iter != VPNOTBlockEndIter; ++Iter) { |
221 | 0 | if (Iter->isDebugInstr()) |
222 | 0 | continue; |
223 | | |
224 | | // Find the register in which the predicate is |
225 | 0 | int OpIdx = findFirstVPTPredOperandIdx(*Iter); |
226 | 0 | assert(OpIdx != -1); |
227 | | |
228 | | // Change the predicate and update the mask |
229 | 0 | Iter->getOperand(OpIdx).setImm(CurrentPredicate); |
230 | 0 | BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate); |
231 | |
|
232 | 0 | LLVM_DEBUG(dbgs() << " adding : "; Iter->dump()); |
233 | 0 | } |
234 | |
|
235 | 0 | CurrentPredicate = |
236 | 0 | (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); |
237 | 0 | } |
238 | 0 | return BlockMask; |
239 | 0 | } |
240 | | |
241 | 0 | bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { |
242 | 0 | bool Modified = false; |
243 | 0 | MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); |
244 | 0 | MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); |
245 | |
|
246 | 0 | SmallVector<MachineInstr *, 4> DeadInstructions; |
247 | |
|
248 | 0 | while (MBIter != EndIter) { |
249 | 0 | MachineInstr *MI = &*MBIter; |
250 | 0 | Register PredReg; |
251 | 0 | DebugLoc DL = MI->getDebugLoc(); |
252 | |
|
253 | 0 | ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); |
254 | | |
255 | | // The idea of the predicate is that None, Then and Else are for use when |
256 | | // handling assembly language: they correspond to the three possible |
257 | | // suffixes "", "t" and "e" on the mnemonic. So when instructions are read |
258 | | // from assembly source or disassembled from object code, you expect to |
259 | | // see a mixture whenever there's a long VPT block. But in code |
260 | | // generation, we hope we'll never generate an Else as input to this pass. |
261 | 0 | assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); |
262 | | |
263 | 0 | if (Pred == ARMVCC::None) { |
264 | 0 | ++MBIter; |
265 | 0 | continue; |
266 | 0 | } |
267 | | |
268 | 0 | ARM::PredBlockMask BlockMask = |
269 | 0 | CreateVPTBlock(MBIter, EndIter, DeadInstructions); |
270 | | |
271 | | // Search back for a VCMP that can be folded to create a VPT, or else |
272 | | // create a VPST directly |
273 | 0 | MachineInstrBuilder MIBuilder; |
274 | 0 | unsigned NewOpcode; |
275 | 0 | LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n"); |
276 | 0 | if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { |
277 | 0 | LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); |
278 | 0 | MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode)); |
279 | 0 | MIBuilder.addImm((uint64_t)BlockMask); |
280 | 0 | MIBuilder.add(VCMP->getOperand(1)); |
281 | 0 | MIBuilder.add(VCMP->getOperand(2)); |
282 | 0 | MIBuilder.add(VCMP->getOperand(3)); |
283 | | |
284 | | // We need to remove any kill flags between the original VCMP and the new |
285 | | // insertion point. |
286 | 0 | for (MachineInstr &MII : |
287 | 0 | make_range(VCMP->getIterator(), MI->getIterator())) { |
288 | 0 | MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI); |
289 | 0 | MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI); |
290 | 0 | } |
291 | |
|
292 | 0 | VCMP->eraseFromParent(); |
293 | 0 | } else { |
294 | 0 | MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST)); |
295 | 0 | MIBuilder.addImm((uint64_t)BlockMask); |
296 | 0 | } |
297 | | |
298 | | // Erase all dead instructions (VPNOT's). Do that now so that they do not |
299 | | // mess with the bundle creation. |
300 | 0 | for (MachineInstr *DeadMI : DeadInstructions) |
301 | 0 | DeadMI->eraseFromParent(); |
302 | 0 | DeadInstructions.clear(); |
303 | |
|
304 | 0 | finalizeBundle( |
305 | 0 | Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); |
306 | |
|
307 | 0 | Modified = true; |
308 | 0 | } |
309 | |
|
310 | 0 | return Modified; |
311 | 0 | } |
312 | | |
313 | 2.46k | bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { |
314 | 2.46k | const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); |
315 | | |
316 | 2.46k | if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) |
317 | 2.46k | return false; |
318 | | |
319 | 0 | TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); |
320 | 0 | TRI = STI.getRegisterInfo(); |
321 | |
|
322 | 0 | LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" |
323 | 0 | << "********** Function: " << Fn.getName() << '\n'); |
324 | |
|
325 | 0 | bool Modified = false; |
326 | 0 | for (MachineBasicBlock &MBB : Fn) |
327 | 0 | Modified |= InsertVPTBlocks(MBB); |
328 | |
|
329 | 0 | LLVM_DEBUG(dbgs() << "**************************************\n"); |
330 | 0 | return Modified; |
331 | 2.46k | } |
332 | | |
333 | | /// createMVEVPTBlock - Returns an instance of the MVE VPT block |
334 | | /// insertion pass. |
335 | 2.47k | FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } |