/src/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains a pass that lowers homogeneous prolog/epilog instructions. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "AArch64InstrInfo.h" |
14 | | #include "AArch64Subtarget.h" |
15 | | #include "MCTargetDesc/AArch64InstPrinter.h" |
16 | | #include "Utils/AArch64BaseInfo.h" |
17 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | | #include "llvm/CodeGen/MachineFunction.h" |
19 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
20 | | #include "llvm/CodeGen/MachineInstr.h" |
21 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
23 | | #include "llvm/CodeGen/MachineOperand.h" |
24 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
25 | | #include "llvm/IR/DebugLoc.h" |
26 | | #include "llvm/IR/IRBuilder.h" |
27 | | #include "llvm/Pass.h" |
28 | | #include "llvm/Support/raw_ostream.h" |
29 | | #include <optional> |
30 | | #include <sstream> |
31 | | |
32 | | using namespace llvm; |
33 | | |
34 | | #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \ |
35 | 0 | "AArch64 homogeneous prolog/epilog lowering pass" |
36 | | |
37 | | cl::opt<int> FrameHelperSizeThreshold( |
38 | | "frame-helper-size-threshold", cl::init(2), cl::Hidden, |
39 | | cl::desc("The minimum number of instructions that are outlined in a frame " |
40 | | "helper (default = 2)")); |
41 | | |
42 | | namespace { |
43 | | |
44 | | class AArch64LowerHomogeneousPE { |
45 | | public: |
46 | | const AArch64InstrInfo *TII; |
47 | | |
48 | | AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) |
49 | 0 | : M(M), MMI(MMI) {} |
50 | | |
51 | | bool run(); |
52 | | bool runOnMachineFunction(MachineFunction &Fn); |
53 | | |
54 | | private: |
55 | | Module *M; |
56 | | MachineModuleInfo *MMI; |
57 | | |
58 | | bool runOnMBB(MachineBasicBlock &MBB); |
59 | | bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
60 | | MachineBasicBlock::iterator &NextMBBI); |
61 | | |
62 | | /// Lower a HOM_Prolog pseudo instruction into a helper call |
63 | | /// or a sequence of homogeneous stores. |
64 | | /// When a fp setup follows, it can be optimized. |
65 | | bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
66 | | MachineBasicBlock::iterator &NextMBBI); |
67 | | /// Lower a HOM_Epilog pseudo instruction into a helper call |
68 | | /// or a sequence of homogeneous loads. |
69 | | /// When a return follow, it can be optimized. |
70 | | bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
71 | | MachineBasicBlock::iterator &NextMBBI); |
72 | | }; |
73 | | |
74 | | class AArch64LowerHomogeneousPrologEpilog : public ModulePass { |
75 | | public: |
76 | | static char ID; |
77 | | |
78 | 0 | AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) { |
79 | 0 | initializeAArch64LowerHomogeneousPrologEpilogPass( |
80 | 0 | *PassRegistry::getPassRegistry()); |
81 | 0 | } |
82 | 0 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
83 | 0 | AU.addRequired<MachineModuleInfoWrapperPass>(); |
84 | 0 | AU.addPreserved<MachineModuleInfoWrapperPass>(); |
85 | 0 | AU.setPreservesAll(); |
86 | 0 | ModulePass::getAnalysisUsage(AU); |
87 | 0 | } |
88 | | bool runOnModule(Module &M) override; |
89 | | |
90 | 0 | StringRef getPassName() const override { |
91 | 0 | return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; |
92 | 0 | } |
93 | | }; |
94 | | |
95 | | } // end anonymous namespace |
96 | | |
97 | | char AArch64LowerHomogeneousPrologEpilog::ID = 0; |
98 | | |
99 | | INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, |
100 | | "aarch64-lower-homogeneous-prolog-epilog", |
101 | | AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) |
102 | | |
103 | 0 | bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { |
104 | 0 | if (skipModule(M)) |
105 | 0 | return false; |
106 | | |
107 | 0 | MachineModuleInfo *MMI = |
108 | 0 | &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); |
109 | 0 | return AArch64LowerHomogeneousPE(&M, MMI).run(); |
110 | 0 | } |
111 | | |
112 | 0 | bool AArch64LowerHomogeneousPE::run() { |
113 | 0 | bool Changed = false; |
114 | 0 | for (auto &F : *M) { |
115 | 0 | if (F.empty()) |
116 | 0 | continue; |
117 | | |
118 | 0 | MachineFunction *MF = MMI->getMachineFunction(F); |
119 | 0 | if (!MF) |
120 | 0 | continue; |
121 | 0 | Changed |= runOnMachineFunction(*MF); |
122 | 0 | } |
123 | |
|
124 | 0 | return Changed; |
125 | 0 | } |
126 | | enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; |
127 | | |
128 | | /// Return a frame helper name with the given CSRs and the helper type. |
129 | | /// For instance, a prolog helper that saves x19 and x20 is named as |
130 | | /// OUTLINED_FUNCTION_PROLOG_x19x20. |
131 | | static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, |
132 | 0 | FrameHelperType Type, unsigned FpOffset) { |
133 | 0 | std::ostringstream RegStream; |
134 | 0 | switch (Type) { |
135 | 0 | case FrameHelperType::Prolog: |
136 | 0 | RegStream << "OUTLINED_FUNCTION_PROLOG_"; |
137 | 0 | break; |
138 | 0 | case FrameHelperType::PrologFrame: |
139 | 0 | RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_"; |
140 | 0 | break; |
141 | 0 | case FrameHelperType::Epilog: |
142 | 0 | RegStream << "OUTLINED_FUNCTION_EPILOG_"; |
143 | 0 | break; |
144 | 0 | case FrameHelperType::EpilogTail: |
145 | 0 | RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_"; |
146 | 0 | break; |
147 | 0 | } |
148 | | |
149 | 0 | for (auto Reg : Regs) { |
150 | 0 | if (Reg == AArch64::NoRegister) |
151 | 0 | continue; |
152 | 0 | RegStream << AArch64InstPrinter::getRegisterName(Reg); |
153 | 0 | } |
154 | |
|
155 | 0 | return RegStream.str(); |
156 | 0 | } |
157 | | |
158 | | /// Create a Function for the unique frame helper with the given name. |
159 | | /// Return a newly created MachineFunction with an empty MachineBasicBlock. |
160 | | static MachineFunction &createFrameHelperMachineFunction(Module *M, |
161 | | MachineModuleInfo *MMI, |
162 | 0 | StringRef Name) { |
163 | 0 | LLVMContext &C = M->getContext(); |
164 | 0 | Function *F = M->getFunction(Name); |
165 | 0 | assert(F == nullptr && "Function has been created before"); |
166 | 0 | F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), |
167 | 0 | Function::ExternalLinkage, Name, M); |
168 | 0 | assert(F && "Function was null!"); |
169 | | |
170 | | // Use ODR linkage to avoid duplication. |
171 | 0 | F->setLinkage(GlobalValue::LinkOnceODRLinkage); |
172 | 0 | F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); |
173 | | |
174 | | // Set no-opt/minsize, so we don't insert padding between outlined |
175 | | // functions. |
176 | 0 | F->addFnAttr(Attribute::OptimizeNone); |
177 | 0 | F->addFnAttr(Attribute::NoInline); |
178 | 0 | F->addFnAttr(Attribute::MinSize); |
179 | 0 | F->addFnAttr(Attribute::Naked); |
180 | |
|
181 | 0 | MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); |
182 | | // Remove unnecessary register liveness and set NoVRegs. |
183 | 0 | MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); |
184 | 0 | MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); |
185 | 0 | MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); |
186 | 0 | MF.getRegInfo().freezeReservedRegs(MF); |
187 | | |
188 | | // Create entry block. |
189 | 0 | BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); |
190 | 0 | IRBuilder<> Builder(EntryBB); |
191 | 0 | Builder.CreateRetVoid(); |
192 | | |
193 | | // Insert the new block into the function. |
194 | 0 | MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); |
195 | 0 | MF.insert(MF.begin(), MBB); |
196 | |
|
197 | 0 | return MF; |
198 | 0 | } |
199 | | |
200 | | /// Emit a store-pair instruction for frame-setup. |
201 | | /// If Reg2 is AArch64::NoRegister, emit STR instead. |
202 | | static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, |
203 | | MachineBasicBlock::iterator Pos, |
204 | | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, |
205 | 0 | int Offset, bool IsPreDec) { |
206 | 0 | assert(Reg1 != AArch64::NoRegister); |
207 | 0 | const bool IsPaired = Reg2 != AArch64::NoRegister; |
208 | 0 | bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); |
209 | 0 | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); |
210 | 0 | unsigned Opc; |
211 | 0 | if (IsPreDec) { |
212 | 0 | if (IsFloat) |
213 | 0 | Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; |
214 | 0 | else |
215 | 0 | Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; |
216 | 0 | } else { |
217 | 0 | if (IsFloat) |
218 | 0 | Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; |
219 | 0 | else |
220 | 0 | Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; |
221 | 0 | } |
222 | | // The implicit scale for Offset is 8. |
223 | 0 | TypeSize Scale(0U, false), Width(0U, false); |
224 | 0 | int64_t MinOffset, MaxOffset; |
225 | 0 | [[maybe_unused]] bool Success = |
226 | 0 | AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); |
227 | 0 | assert(Success && "Invalid Opcode"); |
228 | 0 | Offset *= (8 / (int)Scale); |
229 | |
|
230 | 0 | MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); |
231 | 0 | if (IsPreDec) |
232 | 0 | MIB.addDef(AArch64::SP); |
233 | 0 | if (IsPaired) |
234 | 0 | MIB.addReg(Reg2); |
235 | 0 | MIB.addReg(Reg1) |
236 | 0 | .addReg(AArch64::SP) |
237 | 0 | .addImm(Offset) |
238 | 0 | .setMIFlag(MachineInstr::FrameSetup); |
239 | 0 | } |
240 | | |
241 | | /// Emit a load-pair instruction for frame-destroy. |
242 | | /// If Reg2 is AArch64::NoRegister, emit LDR instead. |
243 | | static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, |
244 | | MachineBasicBlock::iterator Pos, |
245 | | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, |
246 | 0 | int Offset, bool IsPostDec) { |
247 | 0 | assert(Reg1 != AArch64::NoRegister); |
248 | 0 | const bool IsPaired = Reg2 != AArch64::NoRegister; |
249 | 0 | bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); |
250 | 0 | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); |
251 | 0 | unsigned Opc; |
252 | 0 | if (IsPostDec) { |
253 | 0 | if (IsFloat) |
254 | 0 | Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; |
255 | 0 | else |
256 | 0 | Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; |
257 | 0 | } else { |
258 | 0 | if (IsFloat) |
259 | 0 | Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; |
260 | 0 | else |
261 | 0 | Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; |
262 | 0 | } |
263 | | // The implicit scale for Offset is 8. |
264 | 0 | TypeSize Scale(0U, false), Width(0U, false); |
265 | 0 | int64_t MinOffset, MaxOffset; |
266 | 0 | [[maybe_unused]] bool Success = |
267 | 0 | AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); |
268 | 0 | assert(Success && "Invalid Opcode"); |
269 | 0 | Offset *= (8 / (int)Scale); |
270 | |
|
271 | 0 | MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); |
272 | 0 | if (IsPostDec) |
273 | 0 | MIB.addDef(AArch64::SP); |
274 | 0 | if (IsPaired) |
275 | 0 | MIB.addReg(Reg2, getDefRegState(true)); |
276 | 0 | MIB.addReg(Reg1, getDefRegState(true)) |
277 | 0 | .addReg(AArch64::SP) |
278 | 0 | .addImm(Offset) |
279 | 0 | .setMIFlag(MachineInstr::FrameDestroy); |
280 | 0 | } |
281 | | |
282 | | /// Return a unique function if a helper can be formed with the given Regs |
283 | | /// and frame type. |
284 | | /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: |
285 | | /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller |
286 | | /// stp x20, x19, [sp, #16] |
287 | | /// ret |
288 | | /// |
289 | | /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: |
290 | | /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller |
291 | | /// stp x20, x19, [sp, #16] |
292 | | /// add fp, sp, #32 |
293 | | /// ret |
294 | | /// |
295 | | /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: |
296 | | /// mov x16, x30 |
297 | | /// ldp x29, x30, [sp, #32] |
298 | | /// ldp x20, x19, [sp, #16] |
299 | | /// ldp x22, x21, [sp], #48 |
300 | | /// ret x16 |
301 | | /// |
302 | | /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: |
303 | | /// ldp x29, x30, [sp, #32] |
304 | | /// ldp x20, x19, [sp, #16] |
305 | | /// ldp x22, x21, [sp], #48 |
306 | | /// ret |
307 | | /// @param M module |
308 | | /// @param MMI machine module info |
309 | | /// @param Regs callee save regs that the helper will handle |
310 | | /// @param Type frame helper type |
311 | | /// @return a helper function |
312 | | static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, |
313 | | SmallVectorImpl<unsigned> &Regs, |
314 | | FrameHelperType Type, |
315 | 0 | unsigned FpOffset = 0) { |
316 | 0 | assert(Regs.size() >= 2); |
317 | 0 | auto Name = getFrameHelperName(Regs, Type, FpOffset); |
318 | 0 | auto *F = M->getFunction(Name); |
319 | 0 | if (F) |
320 | 0 | return F; |
321 | | |
322 | 0 | auto &MF = createFrameHelperMachineFunction(M, MMI, Name); |
323 | 0 | MachineBasicBlock &MBB = *MF.begin(); |
324 | 0 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
325 | 0 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
326 | |
|
327 | 0 | int Size = (int)Regs.size(); |
328 | 0 | switch (Type) { |
329 | 0 | case FrameHelperType::Prolog: |
330 | 0 | case FrameHelperType::PrologFrame: { |
331 | | // Compute the remaining SP adjust beyond FP/LR. |
332 | 0 | auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR)); |
333 | | |
334 | | // If the register stored to the lowest address is not LR, we must subtract |
335 | | // more from SP here. |
336 | 0 | if (LRIdx != Size - 2) { |
337 | 0 | assert(Regs[Size - 2] != AArch64::LR); |
338 | 0 | emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], |
339 | 0 | LRIdx - Size + 2, true); |
340 | 0 | } |
341 | | |
342 | | // Store CSRs in the reverse order. |
343 | 0 | for (int I = Size - 3; I >= 0; I -= 2) { |
344 | | // FP/LR has been stored at call-site. |
345 | 0 | if (Regs[I - 1] == AArch64::LR) |
346 | 0 | continue; |
347 | 0 | emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1, |
348 | 0 | false); |
349 | 0 | } |
350 | 0 | if (Type == FrameHelperType::PrologFrame) |
351 | 0 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri)) |
352 | 0 | .addDef(AArch64::FP) |
353 | 0 | .addUse(AArch64::SP) |
354 | 0 | .addImm(FpOffset) |
355 | 0 | .addImm(0) |
356 | 0 | .setMIFlag(MachineInstr::FrameSetup); |
357 | |
|
358 | 0 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) |
359 | 0 | .addReg(AArch64::LR); |
360 | 0 | break; |
361 | 0 | } |
362 | 0 | case FrameHelperType::Epilog: |
363 | 0 | case FrameHelperType::EpilogTail: |
364 | 0 | if (Type == FrameHelperType::Epilog) |
365 | | // Stash LR to X16 |
366 | 0 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) |
367 | 0 | .addDef(AArch64::X16) |
368 | 0 | .addReg(AArch64::XZR) |
369 | 0 | .addUse(AArch64::LR) |
370 | 0 | .addImm(0); |
371 | |
|
372 | 0 | for (int I = 0; I < Size - 2; I += 2) |
373 | 0 | emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2, |
374 | 0 | false); |
375 | | // Restore the last CSR with post-increment of SP. |
376 | 0 | emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size, |
377 | 0 | true); |
378 | |
|
379 | 0 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) |
380 | 0 | .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); |
381 | 0 | break; |
382 | 0 | } |
383 | | |
384 | 0 | return M->getFunction(Name); |
385 | 0 | } |
386 | | |
387 | | /// This function checks if a frame helper should be used for |
388 | | /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. |
389 | | /// @param MBB machine basic block |
390 | | /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog |
391 | | /// @param Regs callee save registers that are saved or restored. |
392 | | /// @param Type frame helper type |
393 | | /// @return True if a use of helper is qualified. |
394 | | static bool shouldUseFrameHelper(MachineBasicBlock &MBB, |
395 | | MachineBasicBlock::iterator &NextMBBI, |
396 | | SmallVectorImpl<unsigned> &Regs, |
397 | 0 | FrameHelperType Type) { |
398 | 0 | const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); |
399 | 0 | auto RegCount = Regs.size(); |
400 | 0 | assert(RegCount > 0 && (RegCount % 2 == 0)); |
401 | | // # of instructions that will be outlined. |
402 | 0 | int InstCount = RegCount / 2; |
403 | | |
404 | | // Do not use a helper call when not saving LR. |
405 | 0 | if (!llvm::is_contained(Regs, AArch64::LR)) |
406 | 0 | return false; |
407 | | |
408 | 0 | switch (Type) { |
409 | 0 | case FrameHelperType::Prolog: |
410 | | // Prolog helper cannot save FP/LR. |
411 | 0 | InstCount--; |
412 | 0 | break; |
413 | 0 | case FrameHelperType::PrologFrame: { |
414 | | // Effecitvely no change in InstCount since FpAdjusment is included. |
415 | 0 | break; |
416 | 0 | } |
417 | 0 | case FrameHelperType::Epilog: |
418 | | // Bail-out if X16 is live across the epilog helper because it is used in |
419 | | // the helper to handle X30. |
420 | 0 | for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { |
421 | 0 | if (NextMI->readsRegister(AArch64::W16, TRI)) |
422 | 0 | return false; |
423 | 0 | } |
424 | | // Epilog may not be in the last block. Check the liveness in successors. |
425 | 0 | for (const MachineBasicBlock *SuccMBB : MBB.successors()) { |
426 | 0 | if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) |
427 | 0 | return false; |
428 | 0 | } |
429 | | // No change in InstCount for the regular epilog case. |
430 | 0 | break; |
431 | 0 | case FrameHelperType::EpilogTail: { |
432 | | // EpilogTail helper includes the caller's return. |
433 | 0 | if (NextMBBI == MBB.end()) |
434 | 0 | return false; |
435 | 0 | if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) |
436 | 0 | return false; |
437 | 0 | InstCount++; |
438 | 0 | break; |
439 | 0 | } |
440 | 0 | } |
441 | | |
442 | 0 | return InstCount >= FrameHelperSizeThreshold; |
443 | 0 | } |
444 | | |
445 | | /// Lower a HOM_Epilog pseudo instruction into a helper call while |
446 | | /// creating the helper on demand. Or emit a sequence of loads in place when not |
447 | | /// using a helper call. |
448 | | /// |
449 | | /// 1. With a helper including ret |
450 | | /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI |
451 | | /// ret ; NextMBBI |
452 | | /// => |
453 | | /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 |
454 | | /// ... ; NextMBBI |
455 | | /// |
456 | | /// 2. With a helper |
457 | | /// HOM_Epilog x30, x29, x19, x20, x21, x22 |
458 | | /// => |
459 | | /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 |
460 | | /// |
461 | | /// 3. Without a helper |
462 | | /// HOM_Epilog x30, x29, x19, x20, x21, x22 |
463 | | /// => |
464 | | /// ldp x29, x30, [sp, #32] |
465 | | /// ldp x20, x19, [sp, #16] |
466 | | /// ldp x22, x21, [sp], #48 |
467 | | bool AArch64LowerHomogeneousPE::lowerEpilog( |
468 | | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
469 | 0 | MachineBasicBlock::iterator &NextMBBI) { |
470 | 0 | auto &MF = *MBB.getParent(); |
471 | 0 | MachineInstr &MI = *MBBI; |
472 | |
|
473 | 0 | DebugLoc DL = MI.getDebugLoc(); |
474 | 0 | SmallVector<unsigned, 8> Regs; |
475 | 0 | bool HasUnpairedReg = false; |
476 | 0 | for (auto &MO : MI.operands()) |
477 | 0 | if (MO.isReg()) { |
478 | 0 | if (!MO.getReg().isValid()) { |
479 | | // For now we are only expecting unpaired GP registers which should |
480 | | // occur exactly once. |
481 | 0 | assert(!HasUnpairedReg); |
482 | 0 | HasUnpairedReg = true; |
483 | 0 | } |
484 | 0 | Regs.push_back(MO.getReg()); |
485 | 0 | } |
486 | 0 | (void)HasUnpairedReg; |
487 | 0 | int Size = (int)Regs.size(); |
488 | 0 | if (Size == 0) |
489 | 0 | return false; |
490 | | // Registers are in pair. |
491 | 0 | assert(Size % 2 == 0); |
492 | 0 | assert(MI.getOpcode() == AArch64::HOM_Epilog); |
493 | | |
494 | 0 | auto Return = NextMBBI; |
495 | 0 | if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { |
496 | | // When MBB ends with a return, emit a tail-call to the epilog helper |
497 | 0 | auto *EpilogTailHelper = |
498 | 0 | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); |
499 | 0 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) |
500 | 0 | .addGlobalAddress(EpilogTailHelper) |
501 | 0 | .addImm(0) |
502 | 0 | .setMIFlag(MachineInstr::FrameDestroy) |
503 | 0 | .copyImplicitOps(MI) |
504 | 0 | .copyImplicitOps(*Return); |
505 | 0 | NextMBBI = std::next(Return); |
506 | 0 | Return->removeFromParent(); |
507 | 0 | } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, |
508 | 0 | FrameHelperType::Epilog)) { |
509 | | // The default epilog helper case. |
510 | 0 | auto *EpilogHelper = |
511 | 0 | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); |
512 | 0 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
513 | 0 | .addGlobalAddress(EpilogHelper) |
514 | 0 | .setMIFlag(MachineInstr::FrameDestroy) |
515 | 0 | .copyImplicitOps(MI); |
516 | 0 | } else { |
517 | | // Fall back to no-helper. |
518 | 0 | for (int I = 0; I < Size - 2; I += 2) |
519 | 0 | emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); |
520 | | // Restore the last CSR with post-increment of SP. |
521 | 0 | emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); |
522 | 0 | } |
523 | |
|
524 | 0 | MBBI->removeFromParent(); |
525 | 0 | return true; |
526 | 0 | } |
527 | | |
528 | | /// Lower a HOM_Prolog pseudo instruction into a helper call while |
529 | | /// creating the helper on demand. Or emit a sequence of stores in place when |
530 | | /// not using a helper call. |
531 | | /// |
532 | | /// 1. With a helper including frame-setup |
533 | | /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 |
534 | | /// => |
535 | | /// stp x29, x30, [sp, #-16]! |
536 | | /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 |
537 | | /// |
538 | | /// 2. With a helper |
539 | | /// HOM_Prolog x30, x29, x19, x20, x21, x22 |
540 | | /// => |
541 | | /// stp x29, x30, [sp, #-16]! |
542 | | /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 |
543 | | /// |
544 | | /// 3. Without a helper |
545 | | /// HOM_Prolog x30, x29, x19, x20, x21, x22 |
546 | | /// => |
547 | | /// stp x22, x21, [sp, #-48]! |
548 | | /// stp x20, x19, [sp, #16] |
549 | | /// stp x29, x30, [sp, #32] |
550 | | bool AArch64LowerHomogeneousPE::lowerProlog( |
551 | | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
552 | 0 | MachineBasicBlock::iterator &NextMBBI) { |
553 | 0 | auto &MF = *MBB.getParent(); |
554 | 0 | MachineInstr &MI = *MBBI; |
555 | |
|
556 | 0 | DebugLoc DL = MI.getDebugLoc(); |
557 | 0 | SmallVector<unsigned, 8> Regs; |
558 | 0 | bool HasUnpairedReg = false; |
559 | 0 | int LRIdx = 0; |
560 | 0 | std::optional<int> FpOffset; |
561 | 0 | for (auto &MO : MI.operands()) { |
562 | 0 | if (MO.isReg()) { |
563 | 0 | if (MO.getReg().isValid()) { |
564 | 0 | if (MO.getReg() == AArch64::LR) |
565 | 0 | LRIdx = Regs.size(); |
566 | 0 | } else { |
567 | | // For now we are only expecting unpaired GP registers which should |
568 | | // occur exactly once. |
569 | 0 | assert(!HasUnpairedReg); |
570 | 0 | HasUnpairedReg = true; |
571 | 0 | } |
572 | 0 | Regs.push_back(MO.getReg()); |
573 | 0 | } else if (MO.isImm()) { |
574 | 0 | FpOffset = MO.getImm(); |
575 | 0 | } |
576 | 0 | } |
577 | 0 | (void)HasUnpairedReg; |
578 | 0 | int Size = (int)Regs.size(); |
579 | 0 | if (Size == 0) |
580 | 0 | return false; |
581 | | // Allow compact unwind case only for oww. |
582 | 0 | assert(Size % 2 == 0); |
583 | 0 | assert(MI.getOpcode() == AArch64::HOM_Prolog); |
584 | | |
585 | 0 | if (FpOffset && |
586 | 0 | shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) { |
587 | | // FP/LR is stored at the top of stack before the prolog helper call. |
588 | 0 | emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); |
589 | 0 | auto *PrologFrameHelper = getOrCreateFrameHelper( |
590 | 0 | M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset); |
591 | 0 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
592 | 0 | .addGlobalAddress(PrologFrameHelper) |
593 | 0 | .setMIFlag(MachineInstr::FrameSetup) |
594 | 0 | .copyImplicitOps(MI) |
595 | 0 | .addReg(AArch64::FP, RegState::Implicit | RegState::Define) |
596 | 0 | .addReg(AArch64::SP, RegState::Implicit); |
597 | 0 | } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, |
598 | 0 | FrameHelperType::Prolog)) { |
599 | | // FP/LR is stored at the top of stack before the prolog helper call. |
600 | 0 | emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); |
601 | 0 | auto *PrologHelper = |
602 | 0 | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog); |
603 | 0 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
604 | 0 | .addGlobalAddress(PrologHelper) |
605 | 0 | .setMIFlag(MachineInstr::FrameSetup) |
606 | 0 | .copyImplicitOps(MI); |
607 | 0 | } else { |
608 | | // Fall back to no-helper. |
609 | 0 | emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); |
610 | 0 | for (int I = Size - 3; I >= 0; I -= 2) |
611 | 0 | emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); |
612 | 0 | if (FpOffset) { |
613 | 0 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) |
614 | 0 | .addDef(AArch64::FP) |
615 | 0 | .addUse(AArch64::SP) |
616 | 0 | .addImm(*FpOffset) |
617 | 0 | .addImm(0) |
618 | 0 | .setMIFlag(MachineInstr::FrameSetup); |
619 | 0 | } |
620 | 0 | } |
621 | |
|
622 | 0 | MBBI->removeFromParent(); |
623 | 0 | return true; |
624 | 0 | } |
625 | | |
626 | | /// Process each machine instruction |
627 | | /// @param MBB machine basic block |
628 | | /// @param MBBI current instruction iterator |
629 | | /// @param NextMBBI next instruction iterator which can be updated |
630 | | /// @return True when IR is changed. |
631 | | bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, |
632 | | MachineBasicBlock::iterator MBBI, |
633 | 0 | MachineBasicBlock::iterator &NextMBBI) { |
634 | 0 | MachineInstr &MI = *MBBI; |
635 | 0 | unsigned Opcode = MI.getOpcode(); |
636 | 0 | switch (Opcode) { |
637 | 0 | default: |
638 | 0 | break; |
639 | 0 | case AArch64::HOM_Prolog: |
640 | 0 | return lowerProlog(MBB, MBBI, NextMBBI); |
641 | 0 | case AArch64::HOM_Epilog: |
642 | 0 | return lowerEpilog(MBB, MBBI, NextMBBI); |
643 | 0 | } |
644 | 0 | return false; |
645 | 0 | } |
646 | | |
647 | 0 | bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { |
648 | 0 | bool Modified = false; |
649 | |
|
650 | 0 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
651 | 0 | while (MBBI != E) { |
652 | 0 | MachineBasicBlock::iterator NMBBI = std::next(MBBI); |
653 | 0 | Modified |= runOnMI(MBB, MBBI, NMBBI); |
654 | 0 | MBBI = NMBBI; |
655 | 0 | } |
656 | |
|
657 | 0 | return Modified; |
658 | 0 | } |
659 | | |
660 | 0 | bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { |
661 | 0 | TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); |
662 | |
|
663 | 0 | bool Modified = false; |
664 | 0 | for (auto &MBB : MF) |
665 | 0 | Modified |= runOnMBB(MBB); |
666 | 0 | return Modified; |
667 | 0 | } |
668 | | |
669 | 0 | ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { |
670 | 0 | return new AArch64LowerHomogeneousPrologEpilog(); |
671 | 0 | } |