Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements a register stacking pass.
11
///
12
/// This pass reorders instructions to put register uses and defs in an order
13
/// such that they form single-use expression trees. Registers fitting this form
14
/// are then marked as "stackified", meaning references to them are replaced by
15
/// "push" and "pop" from the value stack.
16
///
17
/// This is primarily a code size optimization, since temporary values on the
18
/// value stack don't need to be named.
19
///
20
//===----------------------------------------------------------------------===//
21
22
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
23
#include "WebAssembly.h"
24
#include "WebAssemblyDebugValueManager.h"
25
#include "WebAssemblyMachineFunctionInfo.h"
26
#include "WebAssemblySubtarget.h"
27
#include "WebAssemblyUtilities.h"
28
#include "llvm/Analysis/AliasAnalysis.h"
29
#include "llvm/CodeGen/LiveIntervals.h"
30
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
31
#include "llvm/CodeGen/MachineDominators.h"
32
#include "llvm/CodeGen/MachineInstrBuilder.h"
33
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
34
#include "llvm/CodeGen/MachineRegisterInfo.h"
35
#include "llvm/CodeGen/Passes.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/raw_ostream.h"
38
#include <iterator>
39
using namespace llvm;
40
41
#define DEBUG_TYPE "wasm-reg-stackify"
42
43
namespace {
44
class WebAssemblyRegStackify final : public MachineFunctionPass {
45
101
  StringRef getPassName() const override {
46
101
    return "WebAssembly Register Stackify";
47
101
  }
48
49
101
  void getAnalysisUsage(AnalysisUsage &AU) const override {
50
101
    AU.setPreservesCFG();
51
101
    AU.addRequired<MachineDominatorTree>();
52
101
    AU.addRequired<LiveIntervals>();
53
101
    AU.addPreserved<MachineBlockFrequencyInfo>();
54
101
    AU.addPreserved<SlotIndexes>();
55
101
    AU.addPreserved<LiveIntervals>();
56
101
    AU.addPreservedID(LiveVariablesID);
57
101
    AU.addPreserved<MachineDominatorTree>();
58
101
    MachineFunctionPass::getAnalysisUsage(AU);
59
101
  }
60
61
  bool runOnMachineFunction(MachineFunction &MF) override;
62
63
public:
64
  static char ID; // Pass identification, replacement for typeid
65
101
  WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
66
};
67
} // end anonymous namespace
68
69
char WebAssemblyRegStackify::ID = 0;
70
INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE,
71
                "Reorder instructions to use the WebAssembly value stack",
72
                false, false)
73
74
101
FunctionPass *llvm::createWebAssemblyRegStackify() {
75
101
  return new WebAssemblyRegStackify();
76
101
}
77
78
// Decorate the given instruction with implicit operands that enforce the
79
// expression stack ordering constraints for an instruction which is on
80
// the expression stack.
81
57.8k
static void imposeStackOrdering(MachineInstr *MI) {
82
  // Write the opaque VALUE_STACK register.
83
57.8k
  if (!MI->definesRegister(WebAssembly::VALUE_STACK))
84
57.8k
    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
85
57.8k
                                             /*isDef=*/true,
86
57.8k
                                             /*isImp=*/true));
87
88
  // Also read the opaque VALUE_STACK register.
89
57.8k
  if (!MI->readsRegister(WebAssembly::VALUE_STACK))
90
57.8k
    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
91
57.8k
                                             /*isDef=*/false,
92
57.8k
                                             /*isImp=*/true));
93
57.8k
}
94
95
// Convert an IMPLICIT_DEF instruction into an instruction which defines
96
// a constant zero value.
97
static void convertImplicitDefToConstZero(MachineInstr *MI,
98
                                          MachineRegisterInfo &MRI,
99
                                          const TargetInstrInfo *TII,
100
                                          MachineFunction &MF,
101
0
                                          LiveIntervals &LIS) {
102
0
  assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
103
104
0
  const auto *RegClass = MRI.getRegClass(MI->getOperand(0).getReg());
105
0
  if (RegClass == &WebAssembly::I32RegClass) {
106
0
    MI->setDesc(TII->get(WebAssembly::CONST_I32));
107
0
    MI->addOperand(MachineOperand::CreateImm(0));
108
0
  } else if (RegClass == &WebAssembly::I64RegClass) {
109
0
    MI->setDesc(TII->get(WebAssembly::CONST_I64));
110
0
    MI->addOperand(MachineOperand::CreateImm(0));
111
0
  } else if (RegClass == &WebAssembly::F32RegClass) {
112
0
    MI->setDesc(TII->get(WebAssembly::CONST_F32));
113
0
    auto *Val = cast<ConstantFP>(Constant::getNullValue(
114
0
        Type::getFloatTy(MF.getFunction().getContext())));
115
0
    MI->addOperand(MachineOperand::CreateFPImm(Val));
116
0
  } else if (RegClass == &WebAssembly::F64RegClass) {
117
0
    MI->setDesc(TII->get(WebAssembly::CONST_F64));
118
0
    auto *Val = cast<ConstantFP>(Constant::getNullValue(
119
0
        Type::getDoubleTy(MF.getFunction().getContext())));
120
0
    MI->addOperand(MachineOperand::CreateFPImm(Val));
121
0
  } else if (RegClass == &WebAssembly::V128RegClass) {
122
0
    MI->setDesc(TII->get(WebAssembly::CONST_V128_I64x2));
123
0
    MI->addOperand(MachineOperand::CreateImm(0));
124
0
    MI->addOperand(MachineOperand::CreateImm(0));
125
0
  } else {
126
0
    llvm_unreachable("Unexpected reg class");
127
0
  }
128
0
}
129
130
// Determine whether a call to the callee referenced by
131
// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
132
// effects.
133
static void queryCallee(const MachineInstr &MI, bool &Read, bool &Write,
134
139
                        bool &Effects, bool &StackPointer) {
135
  // All calls can use the stack pointer.
136
139
  StackPointer = true;
137
138
139
  const MachineOperand &MO = WebAssembly::getCalleeOp(MI);
139
139
  if (MO.isGlobal()) {
140
97
    const Constant *GV = MO.getGlobal();
141
97
    if (const auto *GA = dyn_cast<GlobalAlias>(GV))
142
0
      if (!GA->isInterposable())
143
0
        GV = GA->getAliasee();
144
145
97
    if (const auto *F = dyn_cast<Function>(GV)) {
146
97
      if (!F->doesNotThrow())
147
61
        Effects = true;
148
97
      if (F->doesNotAccessMemory())
149
36
        return;
150
61
      if (F->onlyReadsMemory()) {
151
0
        Read = true;
152
0
        return;
153
0
      }
154
61
    }
155
97
  }
156
157
  // Assume the worst.
158
103
  Write = true;
159
103
  Read = true;
160
103
  Effects = true;
161
103
}
162
163
// Determine whether MI reads memory, writes memory, has side effects,
164
// and/or uses the stack pointer value.
165
static void query(const MachineInstr &MI, bool &Read, bool &Write,
166
30.2k
                  bool &Effects, bool &StackPointer) {
167
30.2k
  assert(!MI.isTerminator());
168
169
30.2k
  if (MI.isDebugInstr() || MI.isPosition())
170
86
    return;
171
172
  // Check for loads.
173
30.1k
  if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad())
174
3.44k
    Read = true;
175
176
  // Check for stores.
177
30.1k
  if (MI.mayStore()) {
178
5.02k
    Write = true;
179
25.1k
  } else if (MI.hasOrderedMemoryRef()) {
180
490
    switch (MI.getOpcode()) {
181
51
    case WebAssembly::DIV_S_I32:
182
52
    case WebAssembly::DIV_S_I64:
183
89
    case WebAssembly::REM_S_I32:
184
98
    case WebAssembly::REM_S_I64:
185
149
    case WebAssembly::DIV_U_I32:
186
158
    case WebAssembly::DIV_U_I64:
187
186
    case WebAssembly::REM_U_I32:
188
194
    case WebAssembly::REM_U_I64:
189
194
    case WebAssembly::I32_TRUNC_S_F32:
190
194
    case WebAssembly::I64_TRUNC_S_F32:
191
194
    case WebAssembly::I32_TRUNC_S_F64:
192
194
    case WebAssembly::I64_TRUNC_S_F64:
193
194
    case WebAssembly::I32_TRUNC_U_F32:
194
194
    case WebAssembly::I64_TRUNC_U_F32:
195
194
    case WebAssembly::I32_TRUNC_U_F64:
196
194
    case WebAssembly::I64_TRUNC_U_F64:
197
      // These instruction have hasUnmodeledSideEffects() returning true
198
      // because they trap on overflow and invalid so they can't be arbitrarily
199
      // moved, however hasOrderedMemoryRef() interprets this plus their lack
200
      // of memoperands as having a potential unknown memory reference.
201
194
      break;
202
296
    default:
203
      // Record volatile accesses, unless it's a call, as calls are handled
204
      // specially below.
205
296
      if (!MI.isCall()) {
206
157
        Write = true;
207
157
        Effects = true;
208
157
      }
209
296
      break;
210
490
    }
211
490
  }
212
213
  // Check for side effects.
214
30.1k
  if (MI.hasUnmodeledSideEffects()) {
215
333
    switch (MI.getOpcode()) {
216
51
    case WebAssembly::DIV_S_I32:
217
52
    case WebAssembly::DIV_S_I64:
218
89
    case WebAssembly::REM_S_I32:
219
98
    case WebAssembly::REM_S_I64:
220
149
    case WebAssembly::DIV_U_I32:
221
158
    case WebAssembly::DIV_U_I64:
222
186
    case WebAssembly::REM_U_I32:
223
194
    case WebAssembly::REM_U_I64:
224
194
    case WebAssembly::I32_TRUNC_S_F32:
225
194
    case WebAssembly::I64_TRUNC_S_F32:
226
194
    case WebAssembly::I32_TRUNC_S_F64:
227
194
    case WebAssembly::I64_TRUNC_S_F64:
228
194
    case WebAssembly::I32_TRUNC_U_F32:
229
194
    case WebAssembly::I64_TRUNC_U_F32:
230
194
    case WebAssembly::I32_TRUNC_U_F64:
231
194
    case WebAssembly::I64_TRUNC_U_F64:
232
      // These instructions have hasUnmodeledSideEffects() returning true
233
      // because they trap on overflow and invalid so they can't be arbitrarily
234
      // moved, however in the specific case of register stackifying, it is safe
235
      // to move them because overflow and invalid are Undefined Behavior.
236
194
      break;
237
139
    default:
238
139
      Effects = true;
239
139
      break;
240
333
    }
241
333
  }
242
243
  // Check for writes to __stack_pointer global.
244
30.1k
  if ((MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 ||
245
30.1k
       MI.getOpcode() == WebAssembly::GLOBAL_SET_I64) &&
246
30.1k
      strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
247
563
    StackPointer = true;
248
249
  // Analyze calls.
250
30.1k
  if (MI.isCall()) {
251
139
    queryCallee(MI, Read, Write, Effects, StackPointer);
252
139
  }
253
30.1k
}
254
255
// Test whether Def is safe and profitable to rematerialize.
256
static bool shouldRematerialize(const MachineInstr &Def,
257
20.4k
                                const WebAssemblyInstrInfo *TII) {
258
20.4k
  return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def);
259
20.4k
}
260
261
// Identify the definition for this register at this point. This is a
262
// generalization of MachineRegisterInfo::getUniqueVRegDef that uses
263
// LiveIntervals to handle complex cases.
264
static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert,
265
                                const MachineRegisterInfo &MRI,
266
61.1k
                                const LiveIntervals &LIS) {
267
  // Most registers are in SSA form here so we try a quick MRI query first.
268
61.1k
  if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
269
55.0k
    return Def;
270
271
  // MRI doesn't know what the Def is. Try asking LIS.
272
6.17k
  if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
273
6.17k
          LIS.getInstructionIndex(*Insert)))
274
3.97k
    return LIS.getInstructionFromIndex(ValNo->def);
275
276
2.20k
  return nullptr;
277
6.17k
}
278
279
// Test whether Reg, as defined at Def, has exactly one use. This is a
280
// generalization of MachineRegisterInfo::hasOneNonDBGUse that uses
281
// LiveIntervals to handle complex cases.
282
static bool hasOneNonDBGUse(unsigned Reg, MachineInstr *Def,
283
                            MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
284
36.0k
                            LiveIntervals &LIS) {
285
  // Most registers are in SSA form here so we try a quick MRI query first.
286
36.0k
  if (MRI.hasOneNonDBGUse(Reg))
287
25.7k
    return true;
288
289
10.2k
  bool HasOne = false;
290
10.2k
  const LiveInterval &LI = LIS.getInterval(Reg);
291
10.2k
  const VNInfo *DefVNI =
292
10.2k
      LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot());
293
10.2k
  assert(DefVNI);
294
17.8k
  for (auto &I : MRI.use_nodbg_operands(Reg)) {
295
17.8k
    const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
296
17.8k
    if (Result.valueIn() == DefVNI) {
297
13.6k
      if (!Result.isKill())
298
10.1k
        return false;
299
3.47k
      if (HasOne)
300
107
        return false;
301
3.36k
      HasOne = true;
302
3.36k
    }
303
17.8k
  }
304
44
  return HasOne;
305
10.2k
}
306
307
// Test whether it's safe to move Def to just before Insert.
308
// TODO: Compute memory dependencies in a way that doesn't require always
309
// walking the block.
310
// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
311
// more precise.
312
static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
313
                         const MachineInstr *Insert,
314
                         const WebAssemblyFunctionInfo &MFI,
315
39.2k
                         const MachineRegisterInfo &MRI) {
316
39.2k
  const MachineInstr *DefI = Def->getParent();
317
39.2k
  const MachineInstr *UseI = Use->getParent();
318
39.2k
  assert(DefI->getParent() == Insert->getParent());
319
0
  assert(UseI->getParent() == Insert->getParent());
320
321
  // The first def of a multivalue instruction can be stackified by moving,
322
  // since the later defs can always be placed into locals if necessary. Later
323
  // defs can only be stackified if all previous defs are already stackified
324
  // since ExplicitLocals will not know how to place a def in a local if a
325
  // subsequent def is stackified. But only one def can be stackified by moving
326
  // the instruction, so it must be the first one.
327
  //
328
  // TODO: This could be loosened to be the first *live* def, but care would
329
  // have to be taken to ensure the drops of the initial dead defs can be
330
  // placed. This would require checking that no previous defs are used in the
331
  // same instruction as subsequent defs.
332
39.2k
  if (Def != DefI->defs().begin())
333
0
    return false;
334
335
  // If any subsequent def is used prior to the current value by the same
336
  // instruction in which the current value is used, we cannot
337
  // stackify. Stackifying in this case would require that def moving below the
338
  // current def in the stack, which cannot be achieved, even with locals.
339
  // Also ensure we don't sink the def past any other prior uses.
340
39.2k
  for (const auto &SubsequentDef : drop_begin(DefI->defs())) {
341
0
    auto I = std::next(MachineBasicBlock::const_iterator(DefI));
342
0
    auto E = std::next(MachineBasicBlock::const_iterator(UseI));
343
0
    for (; I != E; ++I) {
344
0
      for (const auto &PriorUse : I->uses()) {
345
0
        if (&PriorUse == Use)
346
0
          break;
347
0
        if (PriorUse.isReg() && SubsequentDef.getReg() == PriorUse.getReg())
348
0
          return false;
349
0
      }
350
0
    }
351
0
  }
352
353
  // If moving is a semantic nop, it is always allowed
354
39.2k
  const MachineBasicBlock *MBB = DefI->getParent();
355
39.2k
  auto NextI = std::next(MachineBasicBlock::const_iterator(DefI));
356
39.3k
  for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI)
357
36
    ;
358
39.2k
  if (NextI == Insert)
359
27.2k
    return true;
360
361
  // 'catch' and 'catch_all' should be the first instruction of a BB and cannot
362
  // move.
363
12.0k
  if (WebAssembly::isCatch(DefI->getOpcode()))
364
0
    return false;
365
366
  // Check for register dependencies.
367
12.0k
  SmallVector<unsigned, 4> MutableRegisters;
368
45.4k
  for (const MachineOperand &MO : DefI->operands()) {
369
45.4k
    if (!MO.isReg() || MO.isUndef())
370
6.65k
      continue;
371
38.8k
    Register Reg = MO.getReg();
372
373
    // If the register is dead here and at Insert, ignore it.
374
38.8k
    if (MO.isDead() && Insert->definesRegister(Reg) &&
375
38.8k
        !Insert->readsRegister(Reg))
376
10.6k
      continue;
377
378
28.1k
    if (Reg.isPhysical()) {
379
      // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
380
      // from moving down, and we've already checked for that.
381
1.67k
      if (Reg == WebAssembly::ARGUMENTS)
382
1.43k
        continue;
383
      // If the physical register is never modified, ignore it.
384
244
      if (!MRI.isPhysRegModified(Reg))
385
244
        continue;
386
      // Otherwise, it's a physical register with unknown liveness.
387
0
      return false;
388
244
    }
389
390
    // If one of the operands isn't in SSA form, it has different values at
391
    // different times, and we need to make sure we don't move our use across
392
    // a different def.
393
26.4k
    if (!MO.isDef() && !MRI.hasOneDef(Reg))
394
1.22k
      MutableRegisters.push_back(Reg);
395
26.4k
  }
396
397
12.0k
  bool Read = false, Write = false, Effects = false, StackPointer = false;
398
12.0k
  query(*DefI, Read, Write, Effects, StackPointer);
399
400
  // If the instruction does not access memory and has no side effects, it has
401
  // no additional dependencies.
402
12.0k
  bool HasMutableRegisters = !MutableRegisters.empty();
403
12.0k
  if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
404
9.40k
    return true;
405
406
  // Scan through the intervening instructions between DefI and Insert.
407
2.66k
  MachineBasicBlock::const_iterator D(DefI), I(Insert);
408
19.1k
  for (--I; I != D; --I) {
409
18.2k
    bool InterveningRead = false;
410
18.2k
    bool InterveningWrite = false;
411
18.2k
    bool InterveningEffects = false;
412
18.2k
    bool InterveningStackPointer = false;
413
18.2k
    query(*I, InterveningRead, InterveningWrite, InterveningEffects,
414
18.2k
          InterveningStackPointer);
415
18.2k
    if (Effects && InterveningEffects)
416
8
      return false;
417
18.1k
    if (Read && InterveningWrite)
418
1.24k
      return false;
419
16.9k
    if (Write && (InterveningRead || InterveningWrite))
420
3
      return false;
421
16.9k
    if (StackPointer && InterveningStackPointer)
422
0
      return false;
423
424
16.9k
    for (unsigned Reg : MutableRegisters)
425
15.1k
      for (const MachineOperand &MO : I->operands())
426
66.9k
        if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
427
466
          return false;
428
16.9k
  }
429
430
943
  return true;
431
2.66k
}
432
433
/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
434
static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
435
                                     const MachineBasicBlock &MBB,
436
                                     const MachineRegisterInfo &MRI,
437
                                     const MachineDominatorTree &MDT,
438
                                     LiveIntervals &LIS,
439
8.42k
                                     WebAssemblyFunctionInfo &MFI) {
440
8.42k
  const LiveInterval &LI = LIS.getInterval(Reg);
441
442
8.42k
  const MachineInstr *OneUseInst = OneUse.getParent();
443
8.42k
  VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
444
445
25.0k
  for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) {
446
25.0k
    if (&Use == &OneUse)
447
6.29k
      continue;
448
449
18.7k
    const MachineInstr *UseInst = Use.getParent();
450
18.7k
    VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
451
452
18.7k
    if (UseVNI != OneUseVNI)
453
5.88k
      continue;
454
455
12.8k
    if (UseInst == OneUseInst) {
456
      // Another use in the same instruction. We need to ensure that the one
457
      // selected use happens "before" it.
458
88
      if (&OneUse > &Use)
459
0
        return false;
460
12.7k
    } else {
461
      // Test that the use is dominated by the one selected use.
462
12.9k
      while (!MDT.dominates(OneUseInst, UseInst)) {
463
        // Actually, dominating is over-conservative. Test that the use would
464
        // happen after the one selected use in the stack evaluation order.
465
        //
466
        // This is needed as a consequence of using implicit local.gets for
467
        // uses and implicit local.sets for defs.
468
4.37k
        if (UseInst->getDesc().getNumDefs() == 0)
469
1.42k
          return false;
470
2.94k
        const MachineOperand &MO = UseInst->getOperand(0);
471
2.94k
        if (!MO.isReg())
472
0
          return false;
473
2.94k
        Register DefReg = MO.getReg();
474
2.94k
        if (!DefReg.isVirtual() || !MFI.isVRegStackified(DefReg))
475
2.41k
          return false;
476
532
        assert(MRI.hasOneNonDBGUse(DefReg));
477
0
        const MachineOperand &NewUse = *MRI.use_nodbg_begin(DefReg);
478
532
        const MachineInstr *NewUseInst = NewUse.getParent();
479
532
        if (NewUseInst == OneUseInst) {
480
387
          if (&OneUse > &NewUse)
481
0
            return false;
482
387
          break;
483
387
        }
484
145
        UseInst = NewUseInst;
485
145
      }
486
12.7k
    }
487
12.8k
  }
488
4.58k
  return true;
489
8.42k
}
490
491
/// Get the appropriate tee opcode for the given register class.
492
4.58k
static unsigned getTeeOpcode(const TargetRegisterClass *RC) {
493
4.58k
  if (RC == &WebAssembly::I32RegClass)
494
4.29k
    return WebAssembly::TEE_I32;
495
287
  if (RC == &WebAssembly::I64RegClass)
496
141
    return WebAssembly::TEE_I64;
497
146
  if (RC == &WebAssembly::F32RegClass)
498
73
    return WebAssembly::TEE_F32;
499
73
  if (RC == &WebAssembly::F64RegClass)
500
73
    return WebAssembly::TEE_F64;
501
0
  if (RC == &WebAssembly::V128RegClass)
502
0
    return WebAssembly::TEE_V128;
503
0
  if (RC == &WebAssembly::EXTERNREFRegClass)
504
0
    return WebAssembly::TEE_EXTERNREF;
505
0
  if (RC == &WebAssembly::FUNCREFRegClass)
506
0
    return WebAssembly::TEE_FUNCREF;
507
0
  llvm_unreachable("Unexpected register class");
508
0
}
509
510
// Shrink LI to its uses, cleaning up LI.
511
10.3k
static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
512
10.3k
  if (LIS.shrinkToUses(&LI)) {
513
20
    SmallVector<LiveInterval *, 4> SplitLIs;
514
20
    LIS.splitSeparateComponents(LI, SplitLIs);
515
20
  }
516
10.3k
}
517
518
/// A single-use def in the same block with no intervening memory or register
519
/// dependencies; move the def down and nest it with the current instruction.
520
static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
521
                                      MachineInstr *Def, MachineBasicBlock &MBB,
522
                                      MachineInstr *Insert, LiveIntervals &LIS,
523
                                      WebAssemblyFunctionInfo &MFI,
524
25.8k
                                      MachineRegisterInfo &MRI) {
525
25.8k
  LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
526
527
25.8k
  WebAssemblyDebugValueManager DefDIs(Def);
528
25.8k
  DefDIs.sink(Insert);
529
25.8k
  LIS.handleMove(*Def);
530
531
25.8k
  if (MRI.hasOneDef(Reg) && MRI.hasOneNonDBGUse(Reg)) {
532
    // No one else is using this register for anything so we can just stackify
533
    // it in place.
534
25.7k
    MFI.stackifyVReg(MRI, Reg);
535
25.7k
  } else {
536
    // The register may have unrelated uses or defs; create a new register for
537
    // just our one def and use so that we can stackify it.
538
45
    Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
539
45
    Op.setReg(NewReg);
540
45
    DefDIs.updateReg(NewReg);
541
542
    // Tell LiveIntervals about the new register.
543
45
    LIS.createAndComputeVirtRegInterval(NewReg);
544
545
    // Tell LiveIntervals about the changes to the old register.
546
45
    LiveInterval &LI = LIS.getInterval(Reg);
547
45
    LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
548
45
                     LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
549
45
                     /*RemoveDeadValNo=*/true);
550
551
45
    MFI.stackifyVReg(MRI, NewReg);
552
553
45
    LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
554
45
  }
555
556
25.8k
  imposeStackOrdering(Def);
557
25.8k
  return Def;
558
25.8k
}
559
560
7.89k
static MachineInstr *getPrevNonDebugInst(MachineInstr *MI) {
561
7.89k
  for (auto *I = MI->getPrevNode(); I; I = I->getPrevNode())
562
7.89k
    if (!I->isDebugInstr())
563
7.89k
      return I;
564
0
  return nullptr;
565
7.89k
}
566
567
/// A trivially cloneable instruction; clone it and nest the new copy with the
568
/// current instruction.
569
static MachineInstr *rematerializeCheapDef(
570
    unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
571
    MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
572
    WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
573
7.89k
    const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
574
7.89k
  LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
575
7.89k
  LLVM_DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
576
577
7.89k
  WebAssemblyDebugValueManager DefDIs(&Def);
578
579
7.89k
  Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
580
7.89k
  DefDIs.cloneSink(&*Insert, NewReg);
581
7.89k
  Op.setReg(NewReg);
582
7.89k
  MachineInstr *Clone = getPrevNonDebugInst(&*Insert);
583
7.89k
  assert(Clone);
584
0
  LIS.InsertMachineInstrInMaps(*Clone);
585
7.89k
  LIS.createAndComputeVirtRegInterval(NewReg);
586
7.89k
  MFI.stackifyVReg(MRI, NewReg);
587
7.89k
  imposeStackOrdering(Clone);
588
589
7.89k
  LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump());
590
591
  // Shrink the interval.
592
7.89k
  bool IsDead = MRI.use_empty(Reg);
593
7.89k
  if (!IsDead) {
594
5.80k
    LiveInterval &LI = LIS.getInterval(Reg);
595
5.80k
    shrinkToUses(LI, LIS);
596
5.80k
    IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
597
5.80k
  }
598
599
  // If that was the last use of the original, delete the original.
600
7.89k
  if (IsDead) {
601
2.09k
    LLVM_DEBUG(dbgs() << " - Deleting original\n");
602
2.09k
    SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
603
2.09k
    LIS.removePhysRegDefAt(MCRegister::from(WebAssembly::ARGUMENTS), Idx);
604
2.09k
    LIS.removeInterval(Reg);
605
2.09k
    LIS.RemoveMachineInstrFromMaps(Def);
606
2.09k
    DefDIs.removeDef();
607
2.09k
  }
608
609
7.89k
  return Clone;
610
7.89k
}
611
612
/// A multiple-use def in the same block with no intervening memory or register
613
/// dependencies; move the def down, nest it with the current instruction, and
614
/// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
615
/// this:
616
///
617
///    Reg = INST ...        // Def
618
///    INST ..., Reg, ...    // Insert
619
///    INST ..., Reg, ...
620
///    INST ..., Reg, ...
621
///
622
/// to this:
623
///
624
///    DefReg = INST ...     // Def (to become the new Insert)
625
///    TeeReg, Reg = TEE_... DefReg
626
///    INST ..., TeeReg, ... // Insert
627
///    INST ..., Reg, ...
628
///    INST ..., Reg, ...
629
///
630
/// with DefReg and TeeReg stackified. This eliminates a local.get from the
631
/// resulting code.
632
static MachineInstr *moveAndTeeForMultiUse(
633
    unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
634
    MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
635
4.58k
    MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
636
4.58k
  LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
637
638
4.58k
  const auto *RegClass = MRI.getRegClass(Reg);
639
4.58k
  Register TeeReg = MRI.createVirtualRegister(RegClass);
640
4.58k
  Register DefReg = MRI.createVirtualRegister(RegClass);
641
642
  // Move Def into place.
643
4.58k
  WebAssemblyDebugValueManager DefDIs(Def);
644
4.58k
  DefDIs.sink(Insert);
645
4.58k
  LIS.handleMove(*Def);
646
647
  // Create the Tee and attach the registers.
648
4.58k
  MachineOperand &DefMO = Def->getOperand(0);
649
4.58k
  MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
650
4.58k
                              TII->get(getTeeOpcode(RegClass)), TeeReg)
651
4.58k
                          .addReg(Reg, RegState::Define)
652
4.58k
                          .addReg(DefReg, getUndefRegState(DefMO.isDead()));
653
4.58k
  Op.setReg(TeeReg);
654
4.58k
  DefDIs.updateReg(DefReg);
655
4.58k
  SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
656
4.58k
  SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
657
658
  // Tell LiveIntervals we moved the original vreg def from Def to Tee.
659
4.58k
  LiveInterval &LI = LIS.getInterval(Reg);
660
4.58k
  LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
661
4.58k
  VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
662
4.58k
  I->start = TeeIdx;
663
4.58k
  ValNo->def = TeeIdx;
664
4.58k
  shrinkToUses(LI, LIS);
665
666
  // Finish stackifying the new regs.
667
4.58k
  LIS.createAndComputeVirtRegInterval(TeeReg);
668
4.58k
  LIS.createAndComputeVirtRegInterval(DefReg);
669
4.58k
  MFI.stackifyVReg(MRI, DefReg);
670
4.58k
  MFI.stackifyVReg(MRI, TeeReg);
671
4.58k
  imposeStackOrdering(Def);
672
4.58k
  imposeStackOrdering(Tee);
673
674
  // Even though 'TeeReg, Reg = TEE ...', has two defs, we don't need to clone
675
  // DBG_VALUEs for both of them, given that the latter will cancel the former
676
  // anyway. Here we only clone DBG_VALUEs for TeeReg, which will be converted
677
  // to a local index in ExplicitLocals pass.
678
4.58k
  DefDIs.cloneSink(Insert, TeeReg, /* CloneDef */ false);
679
680
4.58k
  LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
681
4.58k
  LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
682
4.58k
  return Def;
683
4.58k
}
684
685
namespace {
686
/// A stack for walking the tree of instructions being built, visiting the
687
/// MachineOperands in DFS order.
688
class TreeWalkerState {
689
  using mop_iterator = MachineInstr::mop_iterator;
690
  using mop_reverse_iterator = std::reverse_iterator<mop_iterator>;
691
  using RangeTy = iterator_range<mop_reverse_iterator>;
692
  SmallVector<RangeTy, 4> Worklist;
693
694
public:
695
32.2k
  explicit TreeWalkerState(MachineInstr *Insert) {
696
32.2k
    const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
697
32.2k
    if (!Range.empty())
698
29.2k
      Worklist.push_back(reverse(Range));
699
32.2k
  }
700
701
147k
  bool done() const { return Worklist.empty(); }
702
703
115k
  MachineOperand &pop() {
704
115k
    RangeTy &Range = Worklist.back();
705
115k
    MachineOperand &Op = *Range.begin();
706
115k
    Range = drop_begin(Range);
707
115k
    if (Range.empty())
708
67.5k
      Worklist.pop_back();
709
115k
    assert((Worklist.empty() || !Worklist.back().empty()) &&
710
115k
           "Empty ranges shouldn't remain in the worklist");
711
0
    return Op;
712
115k
  }
713
714
  /// Push Instr's operands onto the stack to be visited.
715
38.3k
  void pushOperands(MachineInstr *Instr) {
716
38.3k
    const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
717
38.3k
    if (!Range.empty())
718
38.3k
      Worklist.push_back(reverse(Range));
719
38.3k
  }
720
721
  /// Some of Instr's operands are on the top of the stack; remove them and
722
  /// re-insert them starting from the beginning (because we've commuted them).
723
204
  void resetTopOperands(MachineInstr *Instr) {
724
204
    assert(hasRemainingOperands(Instr) &&
725
204
           "Reseting operands should only be done when the instruction has "
726
204
           "an operand still on the stack");
727
0
    Worklist.back() = reverse(Instr->explicit_uses());
728
204
  }
729
730
  /// Test whether Instr has operands remaining to be visited at the top of
731
  /// the stack.
732
2.26k
  bool hasRemainingOperands(const MachineInstr *Instr) const {
733
2.26k
    if (Worklist.empty())
734
47
      return false;
735
2.21k
    const RangeTy &Range = Worklist.back();
736
2.21k
    return !Range.empty() && Range.begin()->getParent() == Instr;
737
2.26k
  }
738
739
  /// Test whether the given register is present on the stack, indicating an
740
  /// operand in the tree that we haven't visited yet. Moving a definition of
741
  /// Reg to a point in the tree after that would change its value.
742
  ///
743
  /// This is needed as a consequence of using implicit local.gets for
744
  /// uses and implicit local.sets for defs.
745
37.5k
  bool isOnStack(unsigned Reg) const {
746
37.5k
    for (const RangeTy &Range : Worklist)
747
39.8k
      for (const MachineOperand &MO : Range)
748
82.4k
        if (MO.isReg() && MO.getReg() == Reg)
749
1.46k
          return true;
750
36.0k
    return false;
751
37.5k
  }
752
};
753
754
/// State to keep track of whether commuting is in flight or whether it's been
755
/// tried for the current instruction and didn't work.
756
class CommutingState {
757
  /// There are effectively three states: the initial state where we haven't
758
  /// started commuting anything and we don't know anything yet, the tentative
759
  /// state where we've commuted the operands of the current instruction and are
760
  /// revisiting it, and the declined state where we've reverted the operands
761
  /// back to their original order and will no longer commute it further.
762
  bool TentativelyCommuting = false;
763
  bool Declined = false;
764
765
  /// During the tentative state, these hold the operand indices of the commuted
766
  /// operands.
767
  unsigned Operand0, Operand1;
768
769
public:
770
  /// Stackification for an operand was not successful due to ordering
771
  /// constraints. If possible, and if we haven't already tried it and declined
772
  /// it, commute Insert's operands and prepare to revisit it.
773
  void maybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
774
2.30k
                    const WebAssemblyInstrInfo *TII) {
775
2.30k
    if (TentativelyCommuting) {
776
180
      assert(!Declined &&
777
180
             "Don't decline commuting until you've finished trying it");
778
      // Commuting didn't help. Revert it.
779
0
      TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
780
180
      TentativelyCommuting = false;
781
180
      Declined = true;
782
2.12k
    } else if (!Declined && TreeWalker.hasRemainingOperands(Insert)) {
783
839
      Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
784
839
      Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
785
839
      if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
786
        // Tentatively commute the operands and try again.
787
204
        TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
788
204
        TreeWalker.resetTopOperands(Insert);
789
204
        TentativelyCommuting = true;
790
204
        Declined = false;
791
204
      }
792
839
    }
793
2.30k
  }
794
795
  /// Stackification for some operand was successful. Reset to the default
796
  /// state.
797
38.3k
  void reset() {
798
38.3k
    TentativelyCommuting = false;
799
38.3k
    Declined = false;
800
38.3k
  }
801
};
802
} // end anonymous namespace
803
804
6.72k
bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
805
6.72k
  LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n"
806
6.72k
                       "********** Function: "
807
6.72k
                    << MF.getName() << '\n');
808
809
6.72k
  bool Changed = false;
810
6.72k
  MachineRegisterInfo &MRI = MF.getRegInfo();
811
6.72k
  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
812
6.72k
  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
813
6.72k
  const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
814
6.72k
  auto &MDT = getAnalysis<MachineDominatorTree>();
815
6.72k
  auto &LIS = getAnalysis<LiveIntervals>();
816
817
  // Walk the instructions from the bottom up. Currently we don't look past
818
  // block boundaries, and the blocks aren't ordered so the block visitation
819
  // order isn't significant, but we may want to change this in the future.
820
11.4k
  for (MachineBasicBlock &MBB : MF) {
821
    // Don't use a range-based for loop, because we modify the list as we're
822
    // iterating over it and the end iterator may change.
823
43.7k
    for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
824
32.3k
      MachineInstr *Insert = &*MII;
825
      // Don't nest anything inside an inline asm, because we don't have
826
      // constraints for $push inputs.
827
32.3k
      if (Insert->isInlineAsm())
828
0
        continue;
829
830
      // Ignore debugging intrinsics.
831
32.3k
      if (Insert->isDebugValue())
832
12
        continue;
833
834
      // Iterate through the inputs in reverse order, since we'll be pulling
835
      // operands off the stack in LIFO order.
836
32.2k
      CommutingState Commuting;
837
32.2k
      TreeWalkerState TreeWalker(Insert);
838
147k
      while (!TreeWalker.done()) {
839
115k
        MachineOperand &Use = TreeWalker.pop();
840
841
        // We're only interested in explicit virtual register operands.
842
115k
        if (!Use.isReg())
843
54.2k
          continue;
844
845
61.1k
        Register Reg = Use.getReg();
846
61.1k
        assert(Use.isUse() && "explicit_uses() should only iterate over uses");
847
0
        assert(!Use.isImplicit() &&
848
61.1k
               "explicit_uses() should only iterate over explicit operands");
849
61.1k
        if (Reg.isPhysical())
850
0
          continue;
851
852
        // Identify the definition for this register at this point.
853
61.1k
        MachineInstr *DefI = getVRegDef(Reg, Insert, MRI, LIS);
854
61.1k
        if (!DefI)
855
4.86k
          continue;
856
857
        // Don't nest an INLINE_ASM def into anything, because we don't have
858
        // constraints for $pop outputs.
859
56.3k
        if (DefI->isInlineAsm())
860
0
          continue;
861
862
        // Argument instructions represent live-in registers and not real
863
        // instructions.
864
56.3k
        if (WebAssembly::isArgument(DefI->getOpcode()))
865
10.0k
          continue;
866
867
46.3k
        MachineOperand *Def = DefI->findRegisterDefOperand(Reg);
868
46.3k
        assert(Def != nullptr);
869
870
        // Decide which strategy to take. Prefer to move a single-use value
871
        // over cloning it, and prefer cloning over introducing a tee.
872
        // For moving, we require the def to be in the same block as the use;
873
        // this makes things simpler (LiveIntervals' handleMove function only
874
        // supports intra-block moves) and it's MachineSink's job to catch all
875
        // the sinking opportunities anyway.
876
0
        bool SameBlock = DefI->getParent() == &MBB;
877
46.3k
        bool CanMove = SameBlock && isSafeToMove(Def, &Use, Insert, MFI, MRI) &&
878
46.3k
                       !TreeWalker.isOnStack(Reg);
879
46.3k
        if (CanMove && hasOneNonDBGUse(Reg, DefI, MRI, MDT, LIS)) {
880
25.8k
          Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI);
881
882
          // If we are removing the frame base reg completely, remove the debug
883
          // info as well.
884
          // TODO: Encode this properly as a stackified value.
885
25.8k
          if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg)
886
128
            MFI.clearFrameBaseVreg();
887
25.8k
        } else if (shouldRematerialize(*DefI, TII)) {
888
7.89k
          Insert =
889
7.89k
              rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(),
890
7.89k
                                    LIS, MFI, MRI, TII, TRI);
891
12.5k
        } else if (CanMove && oneUseDominatesOtherUses(Reg, Use, MBB, MRI, MDT,
892
8.42k
                                                       LIS, MFI)) {
893
4.58k
          Insert = moveAndTeeForMultiUse(Reg, Use, DefI, MBB, Insert, LIS, MFI,
894
4.58k
                                         MRI, TII);
895
7.99k
        } else {
896
          // We failed to stackify the operand. If the problem was ordering
897
          // constraints, Commuting may be able to help.
898
7.99k
          if (!CanMove && SameBlock)
899
2.30k
            Commuting.maybeCommute(Insert, TreeWalker, TII);
900
          // Proceed to the next operand.
901
7.99k
          continue;
902
7.99k
        }
903
904
        // Stackifying a multivalue def may unlock in-place stackification of
905
        // subsequent defs. TODO: Handle the case where the consecutive uses are
906
        // not all in the same instruction.
907
38.3k
        auto *SubsequentDef = Insert->defs().begin();
908
38.3k
        auto *SubsequentUse = &Use;
909
72.0k
        while (SubsequentDef != Insert->defs().end() &&
910
72.0k
               SubsequentUse != Use.getParent()->uses().end()) {
911
38.3k
          if (!SubsequentDef->isReg() || !SubsequentUse->isReg())
912
0
            break;
913
38.3k
          Register DefReg = SubsequentDef->getReg();
914
38.3k
          Register UseReg = SubsequentUse->getReg();
915
          // TODO: This single-use restriction could be relaxed by using tees
916
38.3k
          if (DefReg != UseReg || !MRI.hasOneNonDBGUse(DefReg))
917
4.58k
            break;
918
33.7k
          MFI.stackifyVReg(MRI, DefReg);
919
33.7k
          ++SubsequentDef;
920
33.7k
          ++SubsequentUse;
921
33.7k
        }
922
923
        // If the instruction we just stackified is an IMPLICIT_DEF, convert it
924
        // to a constant 0 so that the def is explicit, and the push/pop
925
        // correspondence is maintained.
926
38.3k
        if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
927
0
          convertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
928
929
        // We stackified an operand. Add the defining instruction's operands to
930
        // the worklist stack now to continue to build an ever deeper tree.
931
38.3k
        Commuting.reset();
932
38.3k
        TreeWalker.pushOperands(Insert);
933
38.3k
      }
934
935
      // If we stackified any operands, skip over the tree to start looking for
936
      // the next instruction we can build a tree on.
937
32.2k
      if (Insert != &*MII) {
938
14.9k
        imposeStackOrdering(&*MII);
939
14.9k
        MII = MachineBasicBlock::iterator(Insert).getReverse();
940
14.9k
        Changed = true;
941
14.9k
      }
942
32.2k
    }
943
11.4k
  }
944
945
  // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
946
  // that it never looks like a use-before-def.
947
6.72k
  if (Changed) {
948
5.43k
    MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK);
949
5.43k
    for (MachineBasicBlock &MBB : MF)
950
10.0k
      MBB.addLiveIn(WebAssembly::VALUE_STACK);
951
5.43k
  }
952
953
6.72k
#ifndef NDEBUG
954
  // Verify that pushes and pops are performed in LIFO order.
955
6.72k
  SmallVector<unsigned, 0> Stack;
956
11.4k
  for (MachineBasicBlock &MBB : MF) {
957
73.1k
    for (MachineInstr &MI : MBB) {
958
73.1k
      if (MI.isDebugInstr())
959
21
        continue;
960
117k
      for (MachineOperand &MO : reverse(MI.explicit_uses())) {
961
117k
        if (!MO.isReg())
962
52.1k
          continue;
963
65.5k
        Register Reg = MO.getReg();
964
65.5k
        if (MFI.isVRegStackified(Reg))
965
42.8k
          assert(Stack.pop_back_val() == Reg &&
966
65.5k
                 "Register stack pop should be paired with a push");
967
65.5k
      }
968
73.1k
      for (MachineOperand &MO : MI.defs()) {
969
59.2k
        if (!MO.isReg())
970
0
          continue;
971
59.2k
        Register Reg = MO.getReg();
972
59.2k
        if (MFI.isVRegStackified(Reg))
973
42.8k
          Stack.push_back(MO.getReg());
974
59.2k
      }
975
73.1k
    }
976
    // TODO: Generalize this code to support keeping values on the stack across
977
    // basic block boundaries.
978
11.4k
    assert(Stack.empty() &&
979
11.4k
           "Register stack pushes and pops should be balanced");
980
11.4k
  }
981
6.72k
#endif
982
983
6.72k
  return Changed;
984
6.72k
}