Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains a pass that lowers homogeneous prolog/epilog instructions.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "AArch64InstrInfo.h"
14
#include "AArch64Subtarget.h"
15
#include "MCTargetDesc/AArch64InstPrinter.h"
16
#include "Utils/AArch64BaseInfo.h"
17
#include "llvm/CodeGen/MachineBasicBlock.h"
18
#include "llvm/CodeGen/MachineFunction.h"
19
#include "llvm/CodeGen/MachineFunctionPass.h"
20
#include "llvm/CodeGen/MachineInstr.h"
21
#include "llvm/CodeGen/MachineInstrBuilder.h"
22
#include "llvm/CodeGen/MachineModuleInfo.h"
23
#include "llvm/CodeGen/MachineOperand.h"
24
#include "llvm/CodeGen/TargetSubtargetInfo.h"
25
#include "llvm/IR/DebugLoc.h"
26
#include "llvm/IR/IRBuilder.h"
27
#include "llvm/Pass.h"
28
#include "llvm/Support/raw_ostream.h"
29
#include <optional>
30
#include <sstream>
31
32
using namespace llvm;
33
34
#define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME                           \
35
0
  "AArch64 homogeneous prolog/epilog lowering pass"
36
37
cl::opt<int> FrameHelperSizeThreshold(
38
    "frame-helper-size-threshold", cl::init(2), cl::Hidden,
39
    cl::desc("The minimum number of instructions that are outlined in a frame "
40
             "helper (default = 2)"));
41
42
namespace {
43
44
class AArch64LowerHomogeneousPE {
45
public:
46
  const AArch64InstrInfo *TII;
47
48
  AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
49
0
      : M(M), MMI(MMI) {}
50
51
  bool run();
52
  bool runOnMachineFunction(MachineFunction &Fn);
53
54
private:
55
  Module *M;
56
  MachineModuleInfo *MMI;
57
58
  bool runOnMBB(MachineBasicBlock &MBB);
59
  bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
60
               MachineBasicBlock::iterator &NextMBBI);
61
62
  /// Lower a HOM_Prolog pseudo instruction into a helper call
63
  /// or a sequence of homogeneous stores.
64
  /// When a fp setup follows, it can be optimized.
65
  bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
66
                   MachineBasicBlock::iterator &NextMBBI);
67
  /// Lower a HOM_Epilog pseudo instruction into a helper call
68
  /// or a sequence of homogeneous loads.
69
  /// When a return follow, it can be optimized.
70
  bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
71
                   MachineBasicBlock::iterator &NextMBBI);
72
};
73
74
class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
75
public:
76
  static char ID;
77
78
0
  AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
79
0
    initializeAArch64LowerHomogeneousPrologEpilogPass(
80
0
        *PassRegistry::getPassRegistry());
81
0
  }
82
0
  void getAnalysisUsage(AnalysisUsage &AU) const override {
83
0
    AU.addRequired<MachineModuleInfoWrapperPass>();
84
0
    AU.addPreserved<MachineModuleInfoWrapperPass>();
85
0
    AU.setPreservesAll();
86
0
    ModulePass::getAnalysisUsage(AU);
87
0
  }
88
  bool runOnModule(Module &M) override;
89
90
0
  StringRef getPassName() const override {
91
0
    return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
92
0
  }
93
};
94
95
} // end anonymous namespace
96
97
char AArch64LowerHomogeneousPrologEpilog::ID = 0;
98
99
INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
100
                "aarch64-lower-homogeneous-prolog-epilog",
101
                AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
102
103
0
bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
104
0
  if (skipModule(M))
105
0
    return false;
106
107
0
  MachineModuleInfo *MMI =
108
0
      &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
109
0
  return AArch64LowerHomogeneousPE(&M, MMI).run();
110
0
}
111
112
0
bool AArch64LowerHomogeneousPE::run() {
113
0
  bool Changed = false;
114
0
  for (auto &F : *M) {
115
0
    if (F.empty())
116
0
      continue;
117
118
0
    MachineFunction *MF = MMI->getMachineFunction(F);
119
0
    if (!MF)
120
0
      continue;
121
0
    Changed |= runOnMachineFunction(*MF);
122
0
  }
123
124
0
  return Changed;
125
0
}
126
enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
127
128
/// Return a frame helper name with the given CSRs and the helper type.
129
/// For instance, a prolog helper that saves x19 and x20 is named as
130
/// OUTLINED_FUNCTION_PROLOG_x19x20.
131
static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
132
0
                                      FrameHelperType Type, unsigned FpOffset) {
133
0
  std::ostringstream RegStream;
134
0
  switch (Type) {
135
0
  case FrameHelperType::Prolog:
136
0
    RegStream << "OUTLINED_FUNCTION_PROLOG_";
137
0
    break;
138
0
  case FrameHelperType::PrologFrame:
139
0
    RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
140
0
    break;
141
0
  case FrameHelperType::Epilog:
142
0
    RegStream << "OUTLINED_FUNCTION_EPILOG_";
143
0
    break;
144
0
  case FrameHelperType::EpilogTail:
145
0
    RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
146
0
    break;
147
0
  }
148
149
0
  for (auto Reg : Regs) {
150
0
    if (Reg == AArch64::NoRegister)
151
0
      continue;
152
0
    RegStream << AArch64InstPrinter::getRegisterName(Reg);
153
0
  }
154
155
0
  return RegStream.str();
156
0
}
157
158
/// Create a Function for the unique frame helper with the given name.
159
/// Return a newly created MachineFunction with an empty MachineBasicBlock.
160
static MachineFunction &createFrameHelperMachineFunction(Module *M,
161
                                                         MachineModuleInfo *MMI,
162
0
                                                         StringRef Name) {
163
0
  LLVMContext &C = M->getContext();
164
0
  Function *F = M->getFunction(Name);
165
0
  assert(F == nullptr && "Function has been created before");
166
0
  F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
167
0
                       Function::ExternalLinkage, Name, M);
168
0
  assert(F && "Function was null!");
169
170
  // Use ODR linkage to avoid duplication.
171
0
  F->setLinkage(GlobalValue::LinkOnceODRLinkage);
172
0
  F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
173
174
  // Set no-opt/minsize, so we don't insert padding between outlined
175
  // functions.
176
0
  F->addFnAttr(Attribute::OptimizeNone);
177
0
  F->addFnAttr(Attribute::NoInline);
178
0
  F->addFnAttr(Attribute::MinSize);
179
0
  F->addFnAttr(Attribute::Naked);
180
181
0
  MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
182
  // Remove unnecessary register liveness and set NoVRegs.
183
0
  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
184
0
  MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
185
0
  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
186
0
  MF.getRegInfo().freezeReservedRegs(MF);
187
188
  // Create entry block.
189
0
  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
190
0
  IRBuilder<> Builder(EntryBB);
191
0
  Builder.CreateRetVoid();
192
193
  // Insert the new block into the function.
194
0
  MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
195
0
  MF.insert(MF.begin(), MBB);
196
197
0
  return MF;
198
0
}
199
200
/// Emit a store-pair instruction for frame-setup.
201
/// If Reg2 is AArch64::NoRegister, emit STR instead.
202
static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
203
                      MachineBasicBlock::iterator Pos,
204
                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
205
0
                      int Offset, bool IsPreDec) {
206
0
  assert(Reg1 != AArch64::NoRegister);
207
0
  const bool IsPaired = Reg2 != AArch64::NoRegister;
208
0
  bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
209
0
  assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
210
0
  unsigned Opc;
211
0
  if (IsPreDec) {
212
0
    if (IsFloat)
213
0
      Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
214
0
    else
215
0
      Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
216
0
  } else {
217
0
    if (IsFloat)
218
0
      Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
219
0
    else
220
0
      Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
221
0
  }
222
  // The implicit scale for Offset is 8.
223
0
  TypeSize Scale(0U, false), Width(0U, false);
224
0
  int64_t MinOffset, MaxOffset;
225
0
  [[maybe_unused]] bool Success =
226
0
      AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
227
0
  assert(Success && "Invalid Opcode");
228
0
  Offset *= (8 / (int)Scale);
229
230
0
  MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
231
0
  if (IsPreDec)
232
0
    MIB.addDef(AArch64::SP);
233
0
  if (IsPaired)
234
0
    MIB.addReg(Reg2);
235
0
  MIB.addReg(Reg1)
236
0
      .addReg(AArch64::SP)
237
0
      .addImm(Offset)
238
0
      .setMIFlag(MachineInstr::FrameSetup);
239
0
}
240
241
/// Emit a load-pair instruction for frame-destroy.
242
/// If Reg2 is AArch64::NoRegister, emit LDR instead.
243
static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
244
                     MachineBasicBlock::iterator Pos,
245
                     const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
246
0
                     int Offset, bool IsPostDec) {
247
0
  assert(Reg1 != AArch64::NoRegister);
248
0
  const bool IsPaired = Reg2 != AArch64::NoRegister;
249
0
  bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
250
0
  assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
251
0
  unsigned Opc;
252
0
  if (IsPostDec) {
253
0
    if (IsFloat)
254
0
      Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
255
0
    else
256
0
      Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
257
0
  } else {
258
0
    if (IsFloat)
259
0
      Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
260
0
    else
261
0
      Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
262
0
  }
263
  // The implicit scale for Offset is 8.
264
0
  TypeSize Scale(0U, false), Width(0U, false);
265
0
  int64_t MinOffset, MaxOffset;
266
0
  [[maybe_unused]] bool Success =
267
0
      AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
268
0
  assert(Success && "Invalid Opcode");
269
0
  Offset *= (8 / (int)Scale);
270
271
0
  MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
272
0
  if (IsPostDec)
273
0
    MIB.addDef(AArch64::SP);
274
0
  if (IsPaired)
275
0
    MIB.addReg(Reg2, getDefRegState(true));
276
0
  MIB.addReg(Reg1, getDefRegState(true))
277
0
      .addReg(AArch64::SP)
278
0
      .addImm(Offset)
279
0
      .setMIFlag(MachineInstr::FrameDestroy);
280
0
}
281
282
/// Return a unique function if a helper can be formed with the given Regs
283
/// and frame type.
284
/// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
285
///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
286
///    stp x20, x19, [sp, #16]
287
///    ret
288
///
289
/// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
290
///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
291
///    stp x20, x19, [sp, #16]
292
///    add fp, sp, #32
293
///    ret
294
///
295
/// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
296
///    mov x16, x30
297
///    ldp x29, x30, [sp, #32]
298
///    ldp x20, x19, [sp, #16]
299
///    ldp x22, x21, [sp], #48
300
///    ret x16
301
///
302
/// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
303
///    ldp x29, x30, [sp, #32]
304
///    ldp x20, x19, [sp, #16]
305
///    ldp x22, x21, [sp], #48
306
///    ret
307
/// @param M module
308
/// @param MMI machine module info
309
/// @param Regs callee save regs that the helper will handle
310
/// @param Type frame helper type
311
/// @return a helper function
312
static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
313
                                        SmallVectorImpl<unsigned> &Regs,
314
                                        FrameHelperType Type,
315
0
                                        unsigned FpOffset = 0) {
316
0
  assert(Regs.size() >= 2);
317
0
  auto Name = getFrameHelperName(Regs, Type, FpOffset);
318
0
  auto *F = M->getFunction(Name);
319
0
  if (F)
320
0
    return F;
321
322
0
  auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
323
0
  MachineBasicBlock &MBB = *MF.begin();
324
0
  const TargetSubtargetInfo &STI = MF.getSubtarget();
325
0
  const TargetInstrInfo &TII = *STI.getInstrInfo();
326
327
0
  int Size = (int)Regs.size();
328
0
  switch (Type) {
329
0
  case FrameHelperType::Prolog:
330
0
  case FrameHelperType::PrologFrame: {
331
    // Compute the remaining SP adjust beyond FP/LR.
332
0
    auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR));
333
334
    // If the register stored to the lowest address is not LR, we must subtract
335
    // more from SP here.
336
0
    if (LRIdx != Size - 2) {
337
0
      assert(Regs[Size - 2] != AArch64::LR);
338
0
      emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
339
0
                LRIdx - Size + 2, true);
340
0
    }
341
342
    // Store CSRs in the reverse order.
343
0
    for (int I = Size - 3; I >= 0; I -= 2) {
344
      // FP/LR has been stored at call-site.
345
0
      if (Regs[I - 1] == AArch64::LR)
346
0
        continue;
347
0
      emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
348
0
                false);
349
0
    }
350
0
    if (Type == FrameHelperType::PrologFrame)
351
0
      BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
352
0
          .addDef(AArch64::FP)
353
0
          .addUse(AArch64::SP)
354
0
          .addImm(FpOffset)
355
0
          .addImm(0)
356
0
          .setMIFlag(MachineInstr::FrameSetup);
357
358
0
    BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
359
0
        .addReg(AArch64::LR);
360
0
    break;
361
0
  }
362
0
  case FrameHelperType::Epilog:
363
0
  case FrameHelperType::EpilogTail:
364
0
    if (Type == FrameHelperType::Epilog)
365
      // Stash LR to X16
366
0
      BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
367
0
          .addDef(AArch64::X16)
368
0
          .addReg(AArch64::XZR)
369
0
          .addUse(AArch64::LR)
370
0
          .addImm(0);
371
372
0
    for (int I = 0; I < Size - 2; I += 2)
373
0
      emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
374
0
               false);
375
    // Restore the last CSR with post-increment of SP.
376
0
    emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
377
0
             true);
378
379
0
    BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
380
0
        .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
381
0
    break;
382
0
  }
383
384
0
  return M->getFunction(Name);
385
0
}
386
387
/// This function checks if a frame helper should be used for
388
/// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
389
/// @param MBB machine basic block
390
/// @param NextMBBI  next instruction following HOM_Prolog/HOM_Epilog
391
/// @param Regs callee save registers that are saved or restored.
392
/// @param Type frame helper type
393
/// @return True if a use of helper is qualified.
394
static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
395
                                 MachineBasicBlock::iterator &NextMBBI,
396
                                 SmallVectorImpl<unsigned> &Regs,
397
0
                                 FrameHelperType Type) {
398
0
  const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
399
0
  auto RegCount = Regs.size();
400
0
  assert(RegCount > 0 && (RegCount % 2 == 0));
401
  // # of instructions that will be outlined.
402
0
  int InstCount = RegCount / 2;
403
404
  // Do not use a helper call when not saving LR.
405
0
  if (!llvm::is_contained(Regs, AArch64::LR))
406
0
    return false;
407
408
0
  switch (Type) {
409
0
  case FrameHelperType::Prolog:
410
    // Prolog helper cannot save FP/LR.
411
0
    InstCount--;
412
0
    break;
413
0
  case FrameHelperType::PrologFrame: {
414
    // Effecitvely no change in InstCount since FpAdjusment is included.
415
0
    break;
416
0
  }
417
0
  case FrameHelperType::Epilog:
418
    // Bail-out if X16 is live across the epilog helper because it is used in
419
    // the helper to handle X30.
420
0
    for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
421
0
      if (NextMI->readsRegister(AArch64::W16, TRI))
422
0
        return false;
423
0
    }
424
    // Epilog may not be in the last block. Check the liveness in successors.
425
0
    for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
426
0
      if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
427
0
        return false;
428
0
    }
429
    // No change in InstCount for the regular epilog case.
430
0
    break;
431
0
  case FrameHelperType::EpilogTail: {
432
    // EpilogTail helper includes the caller's return.
433
0
    if (NextMBBI == MBB.end())
434
0
      return false;
435
0
    if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
436
0
      return false;
437
0
    InstCount++;
438
0
    break;
439
0
  }
440
0
  }
441
442
0
  return InstCount >= FrameHelperSizeThreshold;
443
0
}
444
445
/// Lower a HOM_Epilog pseudo instruction into a helper call while
446
/// creating the helper on demand. Or emit a sequence of loads in place when not
447
/// using a helper call.
448
///
449
/// 1. With a helper including ret
450
///    HOM_Epilog x30, x29, x19, x20, x21, x22              ; MBBI
451
///    ret                                                  ; NextMBBI
452
///    =>
453
///    b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
454
///    ...                                                  ; NextMBBI
455
///
456
/// 2. With a helper
457
///    HOM_Epilog x30, x29, x19, x20, x21, x22
458
///    =>
459
///    bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
460
///
461
/// 3. Without a helper
462
///    HOM_Epilog x30, x29, x19, x20, x21, x22
463
///    =>
464
///    ldp x29, x30, [sp, #32]
465
///    ldp x20, x19, [sp, #16]
466
///    ldp x22, x21, [sp], #48
467
bool AArch64LowerHomogeneousPE::lowerEpilog(
468
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
469
0
    MachineBasicBlock::iterator &NextMBBI) {
470
0
  auto &MF = *MBB.getParent();
471
0
  MachineInstr &MI = *MBBI;
472
473
0
  DebugLoc DL = MI.getDebugLoc();
474
0
  SmallVector<unsigned, 8> Regs;
475
0
  bool HasUnpairedReg = false;
476
0
  for (auto &MO : MI.operands())
477
0
    if (MO.isReg()) {
478
0
      if (!MO.getReg().isValid()) {
479
        // For now we are only expecting unpaired GP registers which should
480
        // occur exactly once.
481
0
        assert(!HasUnpairedReg);
482
0
        HasUnpairedReg = true;
483
0
      }
484
0
      Regs.push_back(MO.getReg());
485
0
    }
486
0
  (void)HasUnpairedReg;
487
0
  int Size = (int)Regs.size();
488
0
  if (Size == 0)
489
0
    return false;
490
  // Registers are in pair.
491
0
  assert(Size % 2 == 0);
492
0
  assert(MI.getOpcode() == AArch64::HOM_Epilog);
493
494
0
  auto Return = NextMBBI;
495
0
  if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
496
    // When MBB ends with a return, emit a tail-call to the epilog helper
497
0
    auto *EpilogTailHelper =
498
0
        getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
499
0
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
500
0
        .addGlobalAddress(EpilogTailHelper)
501
0
        .addImm(0)
502
0
        .setMIFlag(MachineInstr::FrameDestroy)
503
0
        .copyImplicitOps(MI)
504
0
        .copyImplicitOps(*Return);
505
0
    NextMBBI = std::next(Return);
506
0
    Return->removeFromParent();
507
0
  } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
508
0
                                  FrameHelperType::Epilog)) {
509
    // The default epilog helper case.
510
0
    auto *EpilogHelper =
511
0
        getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
512
0
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
513
0
        .addGlobalAddress(EpilogHelper)
514
0
        .setMIFlag(MachineInstr::FrameDestroy)
515
0
        .copyImplicitOps(MI);
516
0
  } else {
517
    // Fall back to no-helper.
518
0
    for (int I = 0; I < Size - 2; I += 2)
519
0
      emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
520
    // Restore the last CSR with post-increment of SP.
521
0
    emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
522
0
  }
523
524
0
  MBBI->removeFromParent();
525
0
  return true;
526
0
}
527
528
/// Lower a HOM_Prolog pseudo instruction into a helper call while
529
/// creating the helper on demand. Or emit a sequence of stores in place when
530
/// not using a helper call.
531
///
532
/// 1. With a helper including frame-setup
533
///    HOM_Prolog x30, x29, x19, x20, x21, x22, 32
534
///    =>
535
///    stp x29, x30, [sp, #-16]!
536
///    bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
537
///
538
/// 2. With a helper
539
///    HOM_Prolog x30, x29, x19, x20, x21, x22
540
///    =>
541
///    stp x29, x30, [sp, #-16]!
542
///    bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
543
///
544
/// 3. Without a helper
545
///    HOM_Prolog x30, x29, x19, x20, x21, x22
546
///    =>
547
///    stp  x22, x21, [sp, #-48]!
548
///    stp  x20, x19, [sp, #16]
549
///    stp  x29, x30, [sp, #32]
550
bool AArch64LowerHomogeneousPE::lowerProlog(
551
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
552
0
    MachineBasicBlock::iterator &NextMBBI) {
553
0
  auto &MF = *MBB.getParent();
554
0
  MachineInstr &MI = *MBBI;
555
556
0
  DebugLoc DL = MI.getDebugLoc();
557
0
  SmallVector<unsigned, 8> Regs;
558
0
  bool HasUnpairedReg = false;
559
0
  int LRIdx = 0;
560
0
  std::optional<int> FpOffset;
561
0
  for (auto &MO : MI.operands()) {
562
0
    if (MO.isReg()) {
563
0
      if (MO.getReg().isValid()) {
564
0
        if (MO.getReg() == AArch64::LR)
565
0
          LRIdx = Regs.size();
566
0
      } else {
567
        // For now we are only expecting unpaired GP registers which should
568
        // occur exactly once.
569
0
        assert(!HasUnpairedReg);
570
0
        HasUnpairedReg = true;
571
0
      }
572
0
      Regs.push_back(MO.getReg());
573
0
    } else if (MO.isImm()) {
574
0
      FpOffset = MO.getImm();
575
0
    }
576
0
  }
577
0
  (void)HasUnpairedReg;
578
0
  int Size = (int)Regs.size();
579
0
  if (Size == 0)
580
0
    return false;
581
  // Allow compact unwind case only for oww.
582
0
  assert(Size % 2 == 0);
583
0
  assert(MI.getOpcode() == AArch64::HOM_Prolog);
584
585
0
  if (FpOffset &&
586
0
      shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
587
    // FP/LR is stored at the top of stack before the prolog helper call.
588
0
    emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
589
0
    auto *PrologFrameHelper = getOrCreateFrameHelper(
590
0
        M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
591
0
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
592
0
        .addGlobalAddress(PrologFrameHelper)
593
0
        .setMIFlag(MachineInstr::FrameSetup)
594
0
        .copyImplicitOps(MI)
595
0
        .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
596
0
        .addReg(AArch64::SP, RegState::Implicit);
597
0
  } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
598
0
                                               FrameHelperType::Prolog)) {
599
    // FP/LR is stored at the top of stack before the prolog helper call.
600
0
    emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
601
0
    auto *PrologHelper =
602
0
        getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
603
0
    BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
604
0
        .addGlobalAddress(PrologHelper)
605
0
        .setMIFlag(MachineInstr::FrameSetup)
606
0
        .copyImplicitOps(MI);
607
0
  } else {
608
    // Fall back to no-helper.
609
0
    emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
610
0
    for (int I = Size - 3; I >= 0; I -= 2)
611
0
      emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
612
0
    if (FpOffset) {
613
0
      BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
614
0
          .addDef(AArch64::FP)
615
0
          .addUse(AArch64::SP)
616
0
          .addImm(*FpOffset)
617
0
          .addImm(0)
618
0
          .setMIFlag(MachineInstr::FrameSetup);
619
0
    }
620
0
  }
621
622
0
  MBBI->removeFromParent();
623
0
  return true;
624
0
}
625
626
/// Process each machine instruction
627
/// @param MBB machine basic block
628
/// @param MBBI current instruction iterator
629
/// @param NextMBBI next instruction iterator which can be updated
630
/// @return True when IR is changed.
631
bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
632
                                        MachineBasicBlock::iterator MBBI,
633
0
                                        MachineBasicBlock::iterator &NextMBBI) {
634
0
  MachineInstr &MI = *MBBI;
635
0
  unsigned Opcode = MI.getOpcode();
636
0
  switch (Opcode) {
637
0
  default:
638
0
    break;
639
0
  case AArch64::HOM_Prolog:
640
0
    return lowerProlog(MBB, MBBI, NextMBBI);
641
0
  case AArch64::HOM_Epilog:
642
0
    return lowerEpilog(MBB, MBBI, NextMBBI);
643
0
  }
644
0
  return false;
645
0
}
646
647
0
bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
648
0
  bool Modified = false;
649
650
0
  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
651
0
  while (MBBI != E) {
652
0
    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
653
0
    Modified |= runOnMI(MBB, MBBI, NMBBI);
654
0
    MBBI = NMBBI;
655
0
  }
656
657
0
  return Modified;
658
0
}
659
660
0
bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
661
0
  TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
662
663
0
  bool Modified = false;
664
0
  for (auto &MBB : MF)
665
0
    Modified |= runOnMBB(MBB);
666
0
  return Modified;
667
0
}
668
669
0
ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
670
0
  return new AArch64LowerHomogeneousPrologEpilog();
671
0
}