Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This contains a MachineSchedStrategy implementation for maximizing wave
11
/// occupancy on GCN hardware.
12
///
13
/// This pass will apply multiple scheduling stages to the same function.
14
/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15
/// entry point for the scheduling of those regions is
16
/// GCNScheduleDAGMILive::runSchedStages.
17
18
/// Generally, the reason for having multiple scheduling stages is to account
19
/// for the kernel-wide effect of register usage on occupancy.  Usually, only a
20
/// few scheduling regions will have register pressure high enough to limit
21
/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22
/// other regions.
23
///
24
//===----------------------------------------------------------------------===//
25
26
#include "GCNSchedStrategy.h"
27
#include "AMDGPUIGroupLP.h"
28
#include "SIMachineFunctionInfo.h"
29
#include "llvm/CodeGen/RegisterClassInfo.h"
30
31
#define DEBUG_TYPE "machine-scheduler"
32
33
using namespace llvm;
34
35
static cl::opt<bool> DisableUnclusterHighRP(
36
    "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
37
    cl::desc("Disable unclustered high register pressure "
38
             "reduction scheduling stage."),
39
    cl::init(false));
40
41
static cl::opt<bool> DisableClusteredLowOccupancy(
42
    "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
43
    cl::desc("Disable clustered low occupancy "
44
             "rescheduling for ILP scheduling stage."),
45
    cl::init(false));
46
47
static cl::opt<unsigned> ScheduleMetricBias(
48
    "amdgpu-schedule-metric-bias", cl::Hidden,
49
    cl::desc(
50
        "Sets the bias which adds weight to occupancy vs latency. Set it to "
51
        "100 to chase the occupancy only."),
52
    cl::init(10));
53
54
static cl::opt<bool>
55
    RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
56
               cl::desc("Relax occupancy targets for kernels which are memory "
57
                        "bound (amdgpu-membound-threshold), or "
58
                        "Wave Limited (amdgpu-limit-wave-threshold)."),
59
               cl::init(false));
60
61
const unsigned ScheduleMetrics::ScaleFactor = 100;
62
63
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
64
    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
65
0
      HasHighPressure(false) {}
66
67
0
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
68
0
  GenericScheduler::initialize(DAG);
69
70
0
  MF = &DAG->MF;
71
72
0
  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
73
74
0
  SGPRExcessLimit =
75
0
      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
76
0
  VGPRExcessLimit =
77
0
      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
78
79
0
  SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
80
  // Set the initial TargetOccupnacy to the maximum occupancy that we can
81
  // achieve for this function. This effectively sets a lower bound on the
82
  // 'Critical' register limits in the scheduler.
83
  // Allow for lower occupancy targets if kernel is wave limited or memory
84
  // bound, and using the relaxed occupancy feature.
85
0
  TargetOccupancy =
86
0
      RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();
87
0
  SGPRCriticalLimit =
88
0
      std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
89
90
0
  if (!KnownExcessRP) {
91
0
    VGPRCriticalLimit =
92
0
        std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
93
0
  } else {
94
    // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
95
    // returns a reasonably small number for targets with lots of VGPRs, such
96
    // as GFX10 and GFX11.
97
0
    LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
98
0
                         "VGPRCriticalLimit calculation method.\n");
99
100
0
    unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
101
0
    unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
102
0
    unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
103
0
    VGPRBudget = std::max(VGPRBudget, Granule);
104
0
    VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
105
0
  }
106
107
  // Subtract error margin and bias from register limits and avoid overflow.
108
0
  SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit);
109
0
  VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);
110
0
  SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);
111
0
  VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);
112
113
0
  LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
114
0
                    << ", VGPRExcessLimit = " << VGPRExcessLimit
115
0
                    << ", SGPRCriticalLimit = " << SGPRCriticalLimit
116
0
                    << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
117
0
}
118
119
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
120
                                     bool AtTop,
121
                                     const RegPressureTracker &RPTracker,
122
                                     const SIRegisterInfo *SRI,
123
                                     unsigned SGPRPressure,
124
0
                                     unsigned VGPRPressure) {
125
0
  Cand.SU = SU;
126
0
  Cand.AtTop = AtTop;
127
128
0
  if (!DAG->isTrackingPressure())
129
0
    return;
130
131
  // getDownwardPressure() and getUpwardPressure() make temporary changes to
132
  // the tracker, so we need to pass those function a non-const copy.
133
0
  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
134
135
0
  Pressure.clear();
136
0
  MaxPressure.clear();
137
138
0
  if (AtTop)
139
0
    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
140
0
  else {
141
    // FIXME: I think for bottom up scheduling, the register pressure is cached
142
    // and can be retrieved by DAG->getPressureDif(SU).
143
0
    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
144
0
  }
145
146
0
  unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
147
0
  unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
148
149
  // If two instructions increase the pressure of different register sets
150
  // by the same amount, the generic scheduler will prefer to schedule the
151
  // instruction that increases the set with the least amount of registers,
152
  // which in our case would be SGPRs.  This is rarely what we want, so
153
  // when we report excess/critical register pressure, we do it either
154
  // only for VGPRs or only for SGPRs.
155
156
  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
157
0
  const unsigned MaxVGPRPressureInc = 16;
158
0
  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
159
0
  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
160
161
162
  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
163
  // to increase the likelihood we don't go over the limits.  We should improve
164
  // the analysis to look through dependencies to find the path with the least
165
  // register pressure.
166
167
  // We only need to update the RPDelta for instructions that increase register
168
  // pressure. Instructions that decrease or keep reg pressure the same will be
169
  // marked as RegExcess in tryCandidate() when they are compared with
170
  // instructions that increase the register pressure.
171
0
  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
172
0
    HasHighPressure = true;
173
0
    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
174
0
    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
175
0
  }
176
177
0
  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
178
0
    HasHighPressure = true;
179
0
    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
180
0
    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
181
0
  }
182
183
  // Register pressure is considered 'CRITICAL' if it is approaching a value
184
  // that would reduce the wave occupancy for the execution unit.  When
185
  // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
186
  // has the same cost, so we don't need to prefer one over the other.
187
188
0
  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
189
0
  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
190
191
0
  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
192
0
    HasHighPressure = true;
193
0
    if (SGPRDelta > VGPRDelta) {
194
0
      Cand.RPDelta.CriticalMax =
195
0
        PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
196
0
      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
197
0
    } else {
198
0
      Cand.RPDelta.CriticalMax =
199
0
        PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
200
0
      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
201
0
    }
202
0
  }
203
0
}
204
205
// This function is mostly cut and pasted from
206
// GenericScheduler::pickNodeFromQueue()
207
void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
208
                                         const CandPolicy &ZonePolicy,
209
                                         const RegPressureTracker &RPTracker,
210
0
                                         SchedCandidate &Cand) {
211
0
  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
212
0
  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
213
0
  unsigned SGPRPressure = 0;
214
0
  unsigned VGPRPressure = 0;
215
0
  if (DAG->isTrackingPressure()) {
216
0
    SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
217
0
    VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
218
0
  }
219
0
  ReadyQueue &Q = Zone.Available;
220
0
  for (SUnit *SU : Q) {
221
222
0
    SchedCandidate TryCand(ZonePolicy);
223
0
    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
224
0
                  SGPRPressure, VGPRPressure);
225
    // Pass SchedBoundary only when comparing nodes from the same boundary.
226
0
    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
227
0
    tryCandidate(Cand, TryCand, ZoneArg);
228
0
    if (TryCand.Reason != NoCand) {
229
      // Initialize resource delta if needed in case future heuristics query it.
230
0
      if (TryCand.ResDelta == SchedResourceDelta())
231
0
        TryCand.initResourceDelta(Zone.DAG, SchedModel);
232
0
      Cand.setBest(TryCand);
233
0
      LLVM_DEBUG(traceCandidate(Cand));
234
0
    }
235
0
  }
236
0
}
237
238
// This function is mostly cut and pasted from
239
// GenericScheduler::pickNodeBidirectional()
240
0
SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
241
  // Schedule as far as possible in the direction of no choice. This is most
242
  // efficient, but also provides the best heuristics for CriticalPSets.
243
0
  if (SUnit *SU = Bot.pickOnlyChoice()) {
244
0
    IsTopNode = false;
245
0
    return SU;
246
0
  }
247
0
  if (SUnit *SU = Top.pickOnlyChoice()) {
248
0
    IsTopNode = true;
249
0
    return SU;
250
0
  }
251
  // Set the bottom-up policy based on the state of the current bottom zone and
252
  // the instructions outside the zone, including the top zone.
253
0
  CandPolicy BotPolicy;
254
0
  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
255
  // Set the top-down policy based on the state of the current top zone and
256
  // the instructions outside the zone, including the bottom zone.
257
0
  CandPolicy TopPolicy;
258
0
  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
259
260
  // See if BotCand is still valid (because we previously scheduled from Top).
261
0
  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
262
0
  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
263
0
      BotCand.Policy != BotPolicy) {
264
0
    BotCand.reset(CandPolicy());
265
0
    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
266
0
    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
267
0
  } else {
268
0
    LLVM_DEBUG(traceCandidate(BotCand));
269
0
#ifndef NDEBUG
270
0
    if (VerifyScheduling) {
271
0
      SchedCandidate TCand;
272
0
      TCand.reset(CandPolicy());
273
0
      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
274
0
      assert(TCand.SU == BotCand.SU &&
275
0
             "Last pick result should correspond to re-picking right now");
276
0
    }
277
0
#endif
278
0
  }
279
280
  // Check if the top Q has a better candidate.
281
0
  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
282
0
  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
283
0
      TopCand.Policy != TopPolicy) {
284
0
    TopCand.reset(CandPolicy());
285
0
    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
286
0
    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
287
0
  } else {
288
0
    LLVM_DEBUG(traceCandidate(TopCand));
289
0
#ifndef NDEBUG
290
0
    if (VerifyScheduling) {
291
0
      SchedCandidate TCand;
292
0
      TCand.reset(CandPolicy());
293
0
      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
294
0
      assert(TCand.SU == TopCand.SU &&
295
0
           "Last pick result should correspond to re-picking right now");
296
0
    }
297
0
#endif
298
0
  }
299
300
  // Pick best from BotCand and TopCand.
301
0
  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
302
0
             dbgs() << "Bot Cand: "; traceCandidate(BotCand););
303
0
  SchedCandidate Cand = BotCand;
304
0
  TopCand.Reason = NoCand;
305
0
  tryCandidate(Cand, TopCand, nullptr);
306
0
  if (TopCand.Reason != NoCand) {
307
0
    Cand.setBest(TopCand);
308
0
  }
309
0
  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
310
311
0
  IsTopNode = Cand.AtTop;
312
0
  return Cand.SU;
313
0
}
314
315
// This function is mostly cut and pasted from
316
// GenericScheduler::pickNode()
317
0
SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
318
0
  if (DAG->top() == DAG->bottom()) {
319
0
    assert(Top.Available.empty() && Top.Pending.empty() &&
320
0
           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
321
0
    return nullptr;
322
0
  }
323
0
  SUnit *SU;
324
0
  do {
325
0
    if (RegionPolicy.OnlyTopDown) {
326
0
      SU = Top.pickOnlyChoice();
327
0
      if (!SU) {
328
0
        CandPolicy NoPolicy;
329
0
        TopCand.reset(NoPolicy);
330
0
        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
331
0
        assert(TopCand.Reason != NoCand && "failed to find a candidate");
332
0
        SU = TopCand.SU;
333
0
      }
334
0
      IsTopNode = true;
335
0
    } else if (RegionPolicy.OnlyBottomUp) {
336
0
      SU = Bot.pickOnlyChoice();
337
0
      if (!SU) {
338
0
        CandPolicy NoPolicy;
339
0
        BotCand.reset(NoPolicy);
340
0
        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
341
0
        assert(BotCand.Reason != NoCand && "failed to find a candidate");
342
0
        SU = BotCand.SU;
343
0
      }
344
0
      IsTopNode = false;
345
0
    } else {
346
0
      SU = pickNodeBidirectional(IsTopNode);
347
0
    }
348
0
  } while (SU->isScheduled);
349
350
0
  if (SU->isTopReady())
351
0
    Top.removeReady(SU);
352
0
  if (SU->isBottomReady())
353
0
    Bot.removeReady(SU);
354
355
0
  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
356
0
                    << *SU->getInstr());
357
0
  return SU;
358
0
}
359
360
0
GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
361
0
  assert(CurrentStage && CurrentStage != SchedStages.end());
362
0
  return *CurrentStage;
363
0
}
364
365
0
bool GCNSchedStrategy::advanceStage() {
366
0
  assert(CurrentStage != SchedStages.end());
367
0
  if (!CurrentStage)
368
0
    CurrentStage = SchedStages.begin();
369
0
  else
370
0
    CurrentStage++;
371
372
0
  return CurrentStage != SchedStages.end();
373
0
}
374
375
0
bool GCNSchedStrategy::hasNextStage() const {
376
0
  assert(CurrentStage);
377
0
  return std::next(CurrentStage) != SchedStages.end();
378
0
}
379
380
0
GCNSchedStageID GCNSchedStrategy::getNextStage() const {
381
0
  assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
382
0
  return *std::next(CurrentStage);
383
0
}
384
385
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
386
    const MachineSchedContext *C)
387
0
    : GCNSchedStrategy(C) {
388
0
  SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
389
0
  SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
390
0
  SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
391
0
  SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
392
0
}
393
394
GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
395
0
    : GCNSchedStrategy(C) {
396
0
  SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);
397
0
}
398
399
bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
400
                                          SchedCandidate &TryCand,
401
0
                                          SchedBoundary *Zone) const {
402
  // Initialize the candidate if needed.
403
0
  if (!Cand.isValid()) {
404
0
    TryCand.Reason = NodeOrder;
405
0
    return true;
406
0
  }
407
408
  // Avoid spilling by exceeding the register limit.
409
0
  if (DAG->isTrackingPressure() &&
410
0
      tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
411
0
                  RegExcess, TRI, DAG->MF))
412
0
    return TryCand.Reason != NoCand;
413
414
  // Bias PhysReg Defs and copies to their uses and defined respectively.
415
0
  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
416
0
                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
417
0
    return TryCand.Reason != NoCand;
418
419
0
  bool SameBoundary = Zone != nullptr;
420
0
  if (SameBoundary) {
421
    // Prioritize instructions that read unbuffered resources by stall cycles.
422
0
    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
423
0
                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
424
0
      return TryCand.Reason != NoCand;
425
426
    // Avoid critical resource consumption and balance the schedule.
427
0
    TryCand.initResourceDelta(DAG, SchedModel);
428
0
    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
429
0
                TryCand, Cand, ResourceReduce))
430
0
      return TryCand.Reason != NoCand;
431
0
    if (tryGreater(TryCand.ResDelta.DemandedResources,
432
0
                   Cand.ResDelta.DemandedResources, TryCand, Cand,
433
0
                   ResourceDemand))
434
0
      return TryCand.Reason != NoCand;
435
436
    // Unconditionally try to reduce latency.
437
0
    if (tryLatency(TryCand, Cand, *Zone))
438
0
      return TryCand.Reason != NoCand;
439
440
    // Weak edges are for clustering and other constraints.
441
0
    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
442
0
                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
443
0
      return TryCand.Reason != NoCand;
444
0
  }
445
446
  // Keep clustered nodes together to encourage downstream peephole
447
  // optimizations which may reduce resource requirements.
448
  //
449
  // This is a best effort to set things up for a post-RA pass. Optimizations
450
  // like generating loads of multiple registers should ideally be done within
451
  // the scheduler pass by combining the loads during DAG postprocessing.
452
0
  const SUnit *CandNextClusterSU =
453
0
      Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
454
0
  const SUnit *TryCandNextClusterSU =
455
0
      TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
456
0
  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
457
0
                 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
458
0
    return TryCand.Reason != NoCand;
459
460
  // Avoid increasing the max critical pressure in the scheduled region.
461
0
  if (DAG->isTrackingPressure() &&
462
0
      tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
463
0
                  TryCand, Cand, RegCritical, TRI, DAG->MF))
464
0
    return TryCand.Reason != NoCand;
465
466
  // Avoid increasing the max pressure of the entire region.
467
0
  if (DAG->isTrackingPressure() &&
468
0
      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
469
0
                  Cand, RegMax, TRI, DAG->MF))
470
0
    return TryCand.Reason != NoCand;
471
472
0
  if (SameBoundary) {
473
    // Fall through to original instruction order.
474
0
    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
475
0
        (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
476
0
      TryCand.Reason = NodeOrder;
477
0
      return true;
478
0
    }
479
0
  }
480
0
  return false;
481
0
}
482
483
GCNScheduleDAGMILive::GCNScheduleDAGMILive(
484
    MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
485
    : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
486
      MFI(*MF.getInfo<SIMachineFunctionInfo>()),
487
0
      StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
488
489
0
  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
490
0
  if (RelaxedOcc) {
491
0
    MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
492
0
    if (MinOccupancy != StartingOccupancy)
493
0
      LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
494
0
                        << ".\n");
495
0
  }
496
0
}
497
498
std::unique_ptr<GCNSchedStage>
499
0
GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
500
0
  switch (SchedStageID) {
501
0
  case GCNSchedStageID::OccInitialSchedule:
502
0
    return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
503
0
  case GCNSchedStageID::UnclusteredHighRPReschedule:
504
0
    return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
505
0
  case GCNSchedStageID::ClusteredLowOccupancyReschedule:
506
0
    return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
507
0
  case GCNSchedStageID::PreRARematerialize:
508
0
    return std::make_unique<PreRARematStage>(SchedStageID, *this);
509
0
  case GCNSchedStageID::ILPInitialSchedule:
510
0
    return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
511
0
  }
512
513
0
  llvm_unreachable("Unknown SchedStageID.");
514
0
}
515
516
0
void GCNScheduleDAGMILive::schedule() {
517
  // Collect all scheduling regions. The actual scheduling is performed in
518
  // GCNScheduleDAGMILive::finalizeSchedule.
519
0
  Regions.push_back(std::pair(RegionBegin, RegionEnd));
520
0
}
521
522
GCNRegPressure
523
0
GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
524
0
  GCNDownwardRPTracker RPTracker(*LIS);
525
0
  RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
526
0
  return RPTracker.moveMaxPressure();
527
0
}
528
529
void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
530
0
                                                const MachineBasicBlock *MBB) {
531
0
  GCNDownwardRPTracker RPTracker(*LIS);
532
533
  // If the block has the only successor then live-ins of that successor are
534
  // live-outs of the current block. We can reuse calculated live set if the
535
  // successor will be sent to scheduling past current block.
536
537
  // However, due to the bug in LiveInterval analysis it may happen that two
538
  // predecessors of the same successor block have different lane bitmasks for
539
  // a live-out register. Workaround that by sticking to one-to-one relationship
540
  // i.e. one predecessor with one successor block.
541
0
  const MachineBasicBlock *OnlySucc = nullptr;
542
0
  if (MBB->succ_size() == 1) {
543
0
    auto *Candidate = *MBB->succ_begin();
544
0
    if (!Candidate->empty() && Candidate->pred_size() == 1) {
545
0
      SlotIndexes *Ind = LIS->getSlotIndexes();
546
0
      if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
547
0
        OnlySucc = Candidate;
548
0
    }
549
0
  }
550
551
  // Scheduler sends regions from the end of the block upwards.
552
0
  size_t CurRegion = RegionIdx;
553
0
  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
554
0
    if (Regions[CurRegion].first->getParent() != MBB)
555
0
      break;
556
0
  --CurRegion;
557
558
0
  auto I = MBB->begin();
559
0
  auto LiveInIt = MBBLiveIns.find(MBB);
560
0
  auto &Rgn = Regions[CurRegion];
561
0
  auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
562
0
  if (LiveInIt != MBBLiveIns.end()) {
563
0
    auto LiveIn = std::move(LiveInIt->second);
564
0
    RPTracker.reset(*MBB->begin(), &LiveIn);
565
0
    MBBLiveIns.erase(LiveInIt);
566
0
  } else {
567
0
    I = Rgn.first;
568
0
    auto LRS = BBLiveInMap.lookup(NonDbgMI);
569
#ifdef EXPENSIVE_CHECKS
570
    assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
571
#endif
572
0
    RPTracker.reset(*I, &LRS);
573
0
  }
574
575
0
  for (;;) {
576
0
    I = RPTracker.getNext();
577
578
0
    if (Regions[CurRegion].first == I || NonDbgMI == I) {
579
0
      LiveIns[CurRegion] = RPTracker.getLiveRegs();
580
0
      RPTracker.clearMaxPressure();
581
0
    }
582
583
0
    if (Regions[CurRegion].second == I) {
584
0
      Pressure[CurRegion] = RPTracker.moveMaxPressure();
585
0
      if (CurRegion-- == RegionIdx)
586
0
        break;
587
0
    }
588
0
    RPTracker.advanceToNext();
589
0
    RPTracker.advanceBeforeNext();
590
0
  }
591
592
0
  if (OnlySucc) {
593
0
    if (I != MBB->end()) {
594
0
      RPTracker.advanceToNext();
595
0
      RPTracker.advance(MBB->end());
596
0
    }
597
0
    RPTracker.advanceBeforeNext();
598
0
    MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
599
0
  }
600
0
}
601
602
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
603
0
GCNScheduleDAGMILive::getBBLiveInMap() const {
604
0
  assert(!Regions.empty());
605
0
  std::vector<MachineInstr *> BBStarters;
606
0
  BBStarters.reserve(Regions.size());
607
0
  auto I = Regions.rbegin(), E = Regions.rend();
608
0
  auto *BB = I->first->getParent();
609
0
  do {
610
0
    auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
611
0
    BBStarters.push_back(MI);
612
0
    do {
613
0
      ++I;
614
0
    } while (I != E && I->first->getParent() == BB);
615
0
  } while (I != E);
616
0
  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
617
0
}
618
619
0
void GCNScheduleDAGMILive::finalizeSchedule() {
620
  // Start actual scheduling here. This function is called by the base
621
  // MachineScheduler after all regions have been recorded by
622
  // GCNScheduleDAGMILive::schedule().
623
0
  LiveIns.resize(Regions.size());
624
0
  Pressure.resize(Regions.size());
625
0
  RescheduleRegions.resize(Regions.size());
626
0
  RegionsWithHighRP.resize(Regions.size());
627
0
  RegionsWithExcessRP.resize(Regions.size());
628
0
  RegionsWithMinOcc.resize(Regions.size());
629
0
  RegionsWithIGLPInstrs.resize(Regions.size());
630
0
  RescheduleRegions.set();
631
0
  RegionsWithHighRP.reset();
632
0
  RegionsWithExcessRP.reset();
633
0
  RegionsWithMinOcc.reset();
634
0
  RegionsWithIGLPInstrs.reset();
635
636
0
  runSchedStages();
637
0
}
638
639
0
void GCNScheduleDAGMILive::runSchedStages() {
640
0
  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
641
642
0
  if (!Regions.empty())
643
0
    BBLiveInMap = getBBLiveInMap();
644
645
0
  GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
646
0
  while (S.advanceStage()) {
647
0
    auto Stage = createSchedStage(S.getCurrentStage());
648
0
    if (!Stage->initGCNSchedStage())
649
0
      continue;
650
651
0
    for (auto Region : Regions) {
652
0
      RegionBegin = Region.first;
653
0
      RegionEnd = Region.second;
654
      // Setup for scheduling the region and check whether it should be skipped.
655
0
      if (!Stage->initGCNRegion()) {
656
0
        Stage->advanceRegion();
657
0
        exitRegion();
658
0
        continue;
659
0
      }
660
661
0
      ScheduleDAGMILive::schedule();
662
0
      Stage->finalizeGCNRegion();
663
0
    }
664
665
0
    Stage->finalizeGCNSchedStage();
666
0
  }
667
0
}
668
669
#ifndef NDEBUG
670
0
raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
671
0
  switch (StageID) {
672
0
  case GCNSchedStageID::OccInitialSchedule:
673
0
    OS << "Max Occupancy Initial Schedule";
674
0
    break;
675
0
  case GCNSchedStageID::UnclusteredHighRPReschedule:
676
0
    OS << "Unclustered High Register Pressure Reschedule";
677
0
    break;
678
0
  case GCNSchedStageID::ClusteredLowOccupancyReschedule:
679
0
    OS << "Clustered Low Occupancy Reschedule";
680
0
    break;
681
0
  case GCNSchedStageID::PreRARematerialize:
682
0
    OS << "Pre-RA Rematerialize";
683
0
    break;
684
0
  case GCNSchedStageID::ILPInitialSchedule:
685
0
    OS << "Max ILP Initial Schedule";
686
0
    break;
687
0
  }
688
689
0
  return OS;
690
0
}
691
#endif
692
693
GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
694
    : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
695
0
      MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
696
697
0
bool GCNSchedStage::initGCNSchedStage() {
698
0
  if (!DAG.LIS)
699
0
    return false;
700
701
0
  LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
702
0
  return true;
703
0
}
704
705
0
bool UnclusteredHighRPStage::initGCNSchedStage() {
706
0
  if (DisableUnclusterHighRP)
707
0
    return false;
708
709
0
  if (!GCNSchedStage::initGCNSchedStage())
710
0
    return false;
711
712
0
  if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
713
0
    return false;
714
715
0
  SavedMutations.swap(DAG.Mutations);
716
0
  DAG.addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/false));
717
718
0
  InitialOccupancy = DAG.MinOccupancy;
719
  // Aggressivly try to reduce register pressure in the unclustered high RP
720
  // stage. Temporarily increase occupancy target in the region.
721
0
  S.SGPRLimitBias = S.HighRPSGPRBias;
722
0
  S.VGPRLimitBias = S.HighRPVGPRBias;
723
0
  if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
724
0
    MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
725
726
0
  LLVM_DEBUG(
727
0
      dbgs()
728
0
      << "Retrying function scheduling without clustering. "
729
0
         "Aggressivly try to reduce register pressure to achieve occupancy "
730
0
      << DAG.MinOccupancy << ".\n");
731
732
0
  return true;
733
0
}
734
735
0
bool ClusteredLowOccStage::initGCNSchedStage() {
736
0
  if (DisableClusteredLowOccupancy)
737
0
    return false;
738
739
0
  if (!GCNSchedStage::initGCNSchedStage())
740
0
    return false;
741
742
  // Don't bother trying to improve ILP in lower RP regions if occupancy has not
743
  // been dropped. All regions will have already been scheduled with the ideal
744
  // occupancy targets.
745
0
  if (DAG.StartingOccupancy <= DAG.MinOccupancy)
746
0
    return false;
747
748
0
  LLVM_DEBUG(
749
0
      dbgs() << "Retrying function scheduling with lowest recorded occupancy "
750
0
             << DAG.MinOccupancy << ".\n");
751
0
  return true;
752
0
}
753
754
0
bool PreRARematStage::initGCNSchedStage() {
755
0
  if (!GCNSchedStage::initGCNSchedStage())
756
0
    return false;
757
758
0
  if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
759
0
    return false;
760
761
0
  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
762
  // Check maximum occupancy
763
0
  if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
764
0
      DAG.MinOccupancy)
765
0
    return false;
766
767
  // FIXME: This pass will invalidate cached MBBLiveIns for regions
768
  // inbetween the defs and region we sinked the def to. Cached pressure
769
  // for regions where a def is sinked from will also be invalidated. Will
770
  // need to be fixed if there is another pass after this pass.
771
0
  assert(!S.hasNextStage());
772
773
0
  collectRematerializableInstructions();
774
0
  if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
775
0
    return false;
776
777
0
  LLVM_DEBUG(
778
0
      dbgs() << "Retrying function scheduling with improved occupancy of "
779
0
             << DAG.MinOccupancy << " from rematerializing\n");
780
0
  return true;
781
0
}
782
783
0
void GCNSchedStage::finalizeGCNSchedStage() {
784
0
  DAG.finishBlock();
785
0
  LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
786
0
}
787
788
0
void UnclusteredHighRPStage::finalizeGCNSchedStage() {
789
0
  SavedMutations.swap(DAG.Mutations);
790
0
  S.SGPRLimitBias = S.VGPRLimitBias = 0;
791
0
  if (DAG.MinOccupancy > InitialOccupancy) {
792
0
    for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
793
0
      DAG.RegionsWithMinOcc[IDX] =
794
0
          DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
795
796
0
    LLVM_DEBUG(dbgs() << StageID
797
0
                      << " stage successfully increased occupancy to "
798
0
                      << DAG.MinOccupancy << '\n');
799
0
  }
800
801
0
  GCNSchedStage::finalizeGCNSchedStage();
802
0
}
803
804
0
bool GCNSchedStage::initGCNRegion() {
805
  // Check whether this new region is also a new block.
806
0
  if (DAG.RegionBegin->getParent() != CurrentMBB)
807
0
    setupNewBlock();
808
809
0
  unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
810
0
  DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
811
812
  // Skip empty scheduling regions (0 or 1 schedulable instructions).
813
0
  if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
814
0
    return false;
815
816
0
  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
817
0
  LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
818
0
                    << " " << CurrentMBB->getName()
819
0
                    << "\n  From: " << *DAG.begin() << "    To: ";
820
0
             if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
821
0
             else dbgs() << "End";
822
0
             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
823
824
  // Save original instruction order before scheduling for possible revert.
825
0
  Unsched.clear();
826
0
  Unsched.reserve(DAG.NumRegionInstrs);
827
0
  if (StageID == GCNSchedStageID::OccInitialSchedule ||
828
0
      StageID == GCNSchedStageID::ILPInitialSchedule) {
829
0
    for (auto &I : DAG) {
830
0
      Unsched.push_back(&I);
831
0
      if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
832
0
          I.getOpcode() == AMDGPU::IGLP_OPT)
833
0
        DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
834
0
    }
835
0
  } else {
836
0
    for (auto &I : DAG)
837
0
      Unsched.push_back(&I);
838
0
  }
839
840
0
  PressureBefore = DAG.Pressure[RegionIdx];
841
842
0
  LLVM_DEBUG(
843
0
      dbgs() << "Pressure before scheduling:\nRegion live-ins:"
844
0
             << print(DAG.LiveIns[RegionIdx], DAG.MRI)
845
0
             << "Region live-in pressure:  "
846
0
             << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))
847
0
             << "Region register pressure: " << print(PressureBefore));
848
849
0
  S.HasHighPressure = false;
850
0
  S.KnownExcessRP = isRegionWithExcessRP();
851
852
0
  if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
853
0
      StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {
854
0
    SavedMutations.clear();
855
0
    SavedMutations.swap(DAG.Mutations);
856
0
    bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
857
0
                          StageID == GCNSchedStageID::ILPInitialSchedule;
858
0
    DAG.addMutation(createIGroupLPDAGMutation(/*IsReentry=*/!IsInitialStage));
859
0
  }
860
861
0
  return true;
862
0
}
863
864
0
bool UnclusteredHighRPStage::initGCNRegion() {
865
  // Only reschedule regions with the minimum occupancy or regions that may have
866
  // spilling (excess register pressure).
867
0
  if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
868
0
       DAG.MinOccupancy <= InitialOccupancy) &&
869
0
      !DAG.RegionsWithExcessRP[RegionIdx])
870
0
    return false;
871
872
0
  return GCNSchedStage::initGCNRegion();
873
0
}
874
875
0
bool ClusteredLowOccStage::initGCNRegion() {
876
  // We may need to reschedule this region if it wasn't rescheduled in the last
877
  // stage, or if we found it was testing critical register pressure limits in
878
  // the unclustered reschedule stage. The later is because we may not have been
879
  // able to raise the min occupancy in the previous stage so the region may be
880
  // overly constrained even if it was already rescheduled.
881
0
  if (!DAG.RegionsWithHighRP[RegionIdx])
882
0
    return false;
883
884
0
  return GCNSchedStage::initGCNRegion();
885
0
}
886
887
0
bool PreRARematStage::initGCNRegion() {
888
0
  if (!DAG.RescheduleRegions[RegionIdx])
889
0
    return false;
890
891
0
  return GCNSchedStage::initGCNRegion();
892
0
}
893
894
0
void GCNSchedStage::setupNewBlock() {
895
0
  if (CurrentMBB)
896
0
    DAG.finishBlock();
897
898
0
  CurrentMBB = DAG.RegionBegin->getParent();
899
0
  DAG.startBlock(CurrentMBB);
900
  // Get real RP for the region if it hasn't be calculated before. After the
901
  // initial schedule stage real RP will be collected after scheduling.
902
0
  if (StageID == GCNSchedStageID::OccInitialSchedule ||
903
0
      StageID == GCNSchedStageID::ILPInitialSchedule)
904
0
    DAG.computeBlockPressure(RegionIdx, CurrentMBB);
905
0
}
906
907
0
void GCNSchedStage::finalizeGCNRegion() {
908
0
  DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
909
0
  DAG.RescheduleRegions[RegionIdx] = false;
910
0
  if (S.HasHighPressure)
911
0
    DAG.RegionsWithHighRP[RegionIdx] = true;
912
913
  // Revert scheduling if we have dropped occupancy or there is some other
914
  // reason that the original schedule is better.
915
0
  checkScheduling();
916
917
0
  if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
918
0
      StageID != GCNSchedStageID::UnclusteredHighRPReschedule)
919
0
    SavedMutations.swap(DAG.Mutations);
920
921
0
  DAG.exitRegion();
922
0
  RegionIdx++;
923
0
}
924
925
0
void GCNSchedStage::checkScheduling() {
926
  // Check the results of scheduling.
927
0
  PressureAfter = DAG.getRealRegPressure(RegionIdx);
928
0
  LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
929
0
  LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
930
931
0
  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
932
0
      PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
933
0
    DAG.Pressure[RegionIdx] = PressureAfter;
934
0
    DAG.RegionsWithMinOcc[RegionIdx] =
935
0
        PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
936
937
    // Early out if we have achieve the occupancy target.
938
0
    LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
939
0
    return;
940
0
  }
941
942
0
  unsigned TargetOccupancy =
943
0
      std::min(S.getTargetOccupancy(), ST.getOccupancyWithLocalMemSize(MF));
944
0
  unsigned WavesAfter =
945
0
      std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
946
0
  unsigned WavesBefore =
947
0
      std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
948
0
  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
949
0
                    << ", after " << WavesAfter << ".\n");
950
951
  // We may not be able to keep the current target occupancy because of the just
952
  // scheduled region. We might still be able to revert scheduling if the
953
  // occupancy before was higher, or if the current schedule has register
954
  // pressure higher than the excess limits which could lead to more spilling.
955
0
  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
956
957
  // Allow memory bound functions to drop to 4 waves if not limited by an
958
  // attribute.
959
0
  if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
960
0
      WavesAfter >= MFI.getMinAllowedOccupancy()) {
961
0
    LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
962
0
                      << MFI.getMinAllowedOccupancy() << " waves\n");
963
0
    NewOccupancy = WavesAfter;
964
0
  }
965
966
0
  if (NewOccupancy < DAG.MinOccupancy) {
967
0
    DAG.MinOccupancy = NewOccupancy;
968
0
    MFI.limitOccupancy(DAG.MinOccupancy);
969
0
    DAG.RegionsWithMinOcc.reset();
970
0
    LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
971
0
                      << DAG.MinOccupancy << ".\n");
972
0
  }
973
974
0
  unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
975
0
  unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
976
0
  if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
977
0
      PressureAfter.getAGPRNum() > MaxVGPRs ||
978
0
      PressureAfter.getSGPRNum() > MaxSGPRs) {
979
0
    DAG.RescheduleRegions[RegionIdx] = true;
980
0
    DAG.RegionsWithHighRP[RegionIdx] = true;
981
0
    DAG.RegionsWithExcessRP[RegionIdx] = true;
982
0
  }
983
984
  // Revert if this region's schedule would cause a drop in occupancy or
985
  // spilling.
986
0
  if (shouldRevertScheduling(WavesAfter)) {
987
0
    revertScheduling();
988
0
  } else {
989
0
    DAG.Pressure[RegionIdx] = PressureAfter;
990
0
    DAG.RegionsWithMinOcc[RegionIdx] =
991
0
        PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
992
0
  }
993
0
}
994
995
unsigned
996
GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
997
                                      DenseMap<unsigned, unsigned> &ReadyCycles,
998
0
                                      const TargetSchedModel &SM) {
999
0
  unsigned ReadyCycle = CurrCycle;
1000
0
  for (auto &D : SU.Preds) {
1001
0
    if (D.isAssignedRegDep()) {
1002
0
      MachineInstr *DefMI = D.getSUnit()->getInstr();
1003
0
      unsigned Latency = SM.computeInstrLatency(DefMI);
1004
0
      unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
1005
0
      ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
1006
0
    }
1007
0
  }
1008
0
  ReadyCycles[SU.NodeNum] = ReadyCycle;
1009
0
  return ReadyCycle;
1010
0
}
1011
1012
#ifndef NDEBUG
1013
struct EarlierIssuingCycle {
1014
  bool operator()(std::pair<MachineInstr *, unsigned> A,
1015
0
                  std::pair<MachineInstr *, unsigned> B) const {
1016
0
    return A.second < B.second;
1017
0
  }
1018
};
1019
1020
static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
1021
0
                                        EarlierIssuingCycle> &ReadyCycles) {
1022
0
  if (ReadyCycles.empty())
1023
0
    return;
1024
0
  unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1025
0
  dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1026
0
         << " ##################\n# Cycle #\t\t\tInstruction          "
1027
0
            "             "
1028
0
            "                            \n";
1029
0
  unsigned IPrev = 1;
1030
0
  for (auto &I : ReadyCycles) {
1031
0
    if (I.second > IPrev + 1)
1032
0
      dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1033
0
             << " CYCLES DETECTED ******************************\n\n";
1034
0
    dbgs() << "[ " << I.second << " ]  :  " << *I.first << "\n";
1035
0
    IPrev = I.second;
1036
0
  }
1037
0
}
1038
#endif
1039
1040
ScheduleMetrics
1041
0
GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1042
0
#ifndef NDEBUG
1043
0
  std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1044
0
      ReadyCyclesSorted;
1045
0
#endif
1046
0
  const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1047
0
  unsigned SumBubbles = 0;
1048
0
  DenseMap<unsigned, unsigned> ReadyCycles;
1049
0
  unsigned CurrCycle = 0;
1050
0
  for (auto &SU : InputSchedule) {
1051
0
    unsigned ReadyCycle =
1052
0
        computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1053
0
    SumBubbles += ReadyCycle - CurrCycle;
1054
0
#ifndef NDEBUG
1055
0
    ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1056
0
#endif
1057
0
    CurrCycle = ++ReadyCycle;
1058
0
  }
1059
0
#ifndef NDEBUG
1060
0
  LLVM_DEBUG(
1061
0
      printScheduleModel(ReadyCyclesSorted);
1062
0
      dbgs() << "\n\t"
1063
0
             << "Metric: "
1064
0
             << (SumBubbles
1065
0
                     ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1066
0
                     : 1)
1067
0
             << "\n\n");
1068
0
#endif
1069
1070
0
  return ScheduleMetrics(CurrCycle, SumBubbles);
1071
0
}
1072
1073
ScheduleMetrics
1074
0
GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {
1075
0
#ifndef NDEBUG
1076
0
  std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1077
0
      ReadyCyclesSorted;
1078
0
#endif
1079
0
  const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1080
0
  unsigned SumBubbles = 0;
1081
0
  DenseMap<unsigned, unsigned> ReadyCycles;
1082
0
  unsigned CurrCycle = 0;
1083
0
  for (auto &MI : DAG) {
1084
0
    SUnit *SU = DAG.getSUnit(&MI);
1085
0
    if (!SU)
1086
0
      continue;
1087
0
    unsigned ReadyCycle =
1088
0
        computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1089
0
    SumBubbles += ReadyCycle - CurrCycle;
1090
0
#ifndef NDEBUG
1091
0
    ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1092
0
#endif
1093
0
    CurrCycle = ++ReadyCycle;
1094
0
  }
1095
0
#ifndef NDEBUG
1096
0
  LLVM_DEBUG(
1097
0
      printScheduleModel(ReadyCyclesSorted);
1098
0
      dbgs() << "\n\t"
1099
0
             << "Metric: "
1100
0
             << (SumBubbles
1101
0
                     ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1102
0
                     : 1)
1103
0
             << "\n\n");
1104
0
#endif
1105
1106
0
  return ScheduleMetrics(CurrCycle, SumBubbles);
1107
0
}
1108
1109
0
bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1110
0
  if (WavesAfter < DAG.MinOccupancy)
1111
0
    return true;
1112
1113
0
  return false;
1114
0
}
1115
1116
0
bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1117
0
  if (PressureAfter == PressureBefore)
1118
0
    return false;
1119
1120
0
  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1121
0
    return true;
1122
1123
0
  if (mayCauseSpilling(WavesAfter))
1124
0
    return true;
1125
1126
0
  return false;
1127
0
}
1128
1129
0
bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
1130
  // If RP is not reduced in the unclustered reschedule stage, revert to the
1131
  // old schedule.
1132
0
  if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1133
0
       mayCauseSpilling(WavesAfter)) ||
1134
0
      GCNSchedStage::shouldRevertScheduling(WavesAfter)) {
1135
0
    LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1136
0
    return true;
1137
0
  }
1138
1139
  // Do not attempt to relax schedule even more if we are already spilling.
1140
0
  if (isRegionWithExcessRP())
1141
0
    return false;
1142
1143
0
  LLVM_DEBUG(
1144
0
      dbgs()
1145
0
      << "\n\t      *** In shouldRevertScheduling ***\n"
1146
0
      << "      *********** BEFORE UnclusteredHighRPStage ***********\n");
1147
0
  ScheduleMetrics MBefore =
1148
0
      getScheduleMetrics(DAG.SUnits);
1149
0
  LLVM_DEBUG(
1150
0
      dbgs()
1151
0
      << "\n      *********** AFTER UnclusteredHighRPStage ***********\n");
1152
0
  ScheduleMetrics MAfter = getScheduleMetrics(DAG);
1153
0
  unsigned OldMetric = MBefore.getMetric();
1154
0
  unsigned NewMetric = MAfter.getMetric();
1155
0
  unsigned WavesBefore =
1156
0
      std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST));
1157
0
  unsigned Profit =
1158
0
      ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1159
0
       ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
1160
0
       NewMetric) /
1161
0
      ScheduleMetrics::ScaleFactor;
1162
0
  LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1163
0
                    << MAfter << "Profit: " << Profit << "\n");
1164
0
  return Profit < ScheduleMetrics::ScaleFactor;
1165
0
}
1166
1167
0
bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
1168
0
  if (PressureAfter == PressureBefore)
1169
0
    return false;
1170
1171
0
  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1172
0
    return true;
1173
1174
0
  if (mayCauseSpilling(WavesAfter))
1175
0
    return true;
1176
1177
0
  return false;
1178
0
}
1179
1180
0
bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
1181
0
  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1182
0
    return true;
1183
1184
0
  if (mayCauseSpilling(WavesAfter))
1185
0
    return true;
1186
1187
0
  return false;
1188
0
}
1189
1190
0
bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1191
0
  if (mayCauseSpilling(WavesAfter))
1192
0
    return true;
1193
1194
0
  return false;
1195
0
}
1196
1197
0
bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1198
0
  if (WavesAfter <= MFI.getMinWavesPerEU() &&
1199
0
      !PressureAfter.less(ST, PressureBefore) &&
1200
0
      isRegionWithExcessRP()) {
1201
0
    LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1202
0
    return true;
1203
0
  }
1204
1205
0
  return false;
1206
0
}
1207
1208
0
void GCNSchedStage::revertScheduling() {
1209
0
  DAG.RegionsWithMinOcc[RegionIdx] =
1210
0
      PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1211
0
  LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1212
0
  DAG.RescheduleRegions[RegionIdx] =
1213
0
      S.hasNextStage() &&
1214
0
      S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule;
1215
0
  DAG.RegionEnd = DAG.RegionBegin;
1216
0
  int SkippedDebugInstr = 0;
1217
0
  for (MachineInstr *MI : Unsched) {
1218
0
    if (MI->isDebugInstr()) {
1219
0
      ++SkippedDebugInstr;
1220
0
      continue;
1221
0
    }
1222
1223
0
    if (MI->getIterator() != DAG.RegionEnd) {
1224
0
      DAG.BB->remove(MI);
1225
0
      DAG.BB->insert(DAG.RegionEnd, MI);
1226
0
      if (!MI->isDebugInstr())
1227
0
        DAG.LIS->handleMove(*MI, true);
1228
0
    }
1229
1230
    // Reset read-undef flags and update them later.
1231
0
    for (auto &Op : MI->all_defs())
1232
0
      Op.setIsUndef(false);
1233
0
    RegisterOperands RegOpers;
1234
0
    RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1235
0
    if (!MI->isDebugInstr()) {
1236
0
      if (DAG.ShouldTrackLaneMasks) {
1237
        // Adjust liveness and add missing dead+read-undef flags.
1238
0
        SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1239
0
        RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1240
0
      } else {
1241
        // Adjust for missing dead-def flags.
1242
0
        RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1243
0
      }
1244
0
    }
1245
0
    DAG.RegionEnd = MI->getIterator();
1246
0
    ++DAG.RegionEnd;
1247
0
    LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1248
0
  }
1249
1250
  // After reverting schedule, debug instrs will now be at the end of the block
1251
  // and RegionEnd will point to the first debug instr. Increment RegionEnd
1252
  // pass debug instrs to the actual end of the scheduling region.
1253
0
  while (SkippedDebugInstr-- > 0)
1254
0
    ++DAG.RegionEnd;
1255
1256
  // If Unsched.front() instruction is a debug instruction, this will actually
1257
  // shrink the region since we moved all debug instructions to the end of the
1258
  // block. Find the first instruction that is not a debug instruction.
1259
0
  DAG.RegionBegin = Unsched.front()->getIterator();
1260
0
  if (DAG.RegionBegin->isDebugInstr()) {
1261
0
    for (MachineInstr *MI : Unsched) {
1262
0
      if (MI->isDebugInstr())
1263
0
        continue;
1264
0
      DAG.RegionBegin = MI->getIterator();
1265
0
      break;
1266
0
    }
1267
0
  }
1268
1269
  // Then move the debug instructions back into their correct place and set
1270
  // RegionBegin and RegionEnd if needed.
1271
0
  DAG.placeDebugValues();
1272
1273
0
  DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1274
0
}
1275
1276
0
void PreRARematStage::collectRematerializableInstructions() {
1277
0
  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1278
0
  for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1279
0
    Register Reg = Register::index2VirtReg(I);
1280
0
    if (!DAG.LIS->hasInterval(Reg))
1281
0
      continue;
1282
1283
    // TODO: Handle AGPR and SGPR rematerialization
1284
0
    if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1285
0
        !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
1286
0
      continue;
1287
1288
0
    MachineOperand *Op = DAG.MRI.getOneDef(Reg);
1289
0
    MachineInstr *Def = Op->getParent();
1290
0
    if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1291
0
      continue;
1292
1293
0
    MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
1294
0
    if (Def->getParent() == UseI->getParent())
1295
0
      continue;
1296
1297
    // We are only collecting defs that are defined in another block and are
1298
    // live-through or used inside regions at MinOccupancy. This means that the
1299
    // register must be in the live-in set for the region.
1300
0
    bool AddedToRematList = false;
1301
0
    for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1302
0
      auto It = DAG.LiveIns[I].find(Reg);
1303
0
      if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1304
0
        if (DAG.RegionsWithMinOcc[I]) {
1305
0
          RematerializableInsts[I][Def] = UseI;
1306
0
          AddedToRematList = true;
1307
0
        }
1308
1309
        // Collect regions with rematerializable reg as live-in to avoid
1310
        // searching later when updating RP.
1311
0
        RematDefToLiveInRegions[Def].push_back(I);
1312
0
      }
1313
0
    }
1314
0
    if (!AddedToRematList)
1315
0
      RematDefToLiveInRegions.erase(Def);
1316
0
  }
1317
0
}
1318
1319
bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1320
0
                                              const TargetInstrInfo *TII) {
1321
  // Temporary copies of cached variables we will be modifying and replacing if
1322
  // sinking succeeds.
1323
0
  SmallVector<
1324
0
      std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1325
0
      NewRegions;
1326
0
  DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
1327
0
  DenseMap<unsigned, GCNRegPressure> NewPressure;
1328
0
  BitVector NewRescheduleRegions;
1329
0
  LiveIntervals *LIS = DAG.LIS;
1330
1331
0
  NewRegions.resize(DAG.Regions.size());
1332
0
  NewRescheduleRegions.resize(DAG.Regions.size());
1333
1334
  // Collect only regions that has a rematerializable def as a live-in.
1335
0
  SmallSet<unsigned, 16> ImpactedRegions;
1336
0
  for (const auto &It : RematDefToLiveInRegions)
1337
0
    ImpactedRegions.insert(It.second.begin(), It.second.end());
1338
1339
  // Make copies of register pressure and live-ins cache that will be updated
1340
  // as we rematerialize.
1341
0
  for (auto Idx : ImpactedRegions) {
1342
0
    NewPressure[Idx] = DAG.Pressure[Idx];
1343
0
    NewLiveIns[Idx] = DAG.LiveIns[Idx];
1344
0
  }
1345
0
  NewRegions = DAG.Regions;
1346
0
  NewRescheduleRegions.reset();
1347
1348
0
  DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
1349
0
  bool Improved = false;
1350
0
  for (auto I : ImpactedRegions) {
1351
0
    if (!DAG.RegionsWithMinOcc[I])
1352
0
      continue;
1353
1354
0
    Improved = false;
1355
0
    int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
1356
0
    int SGPRUsage = NewPressure[I].getSGPRNum();
1357
1358
    // TODO: Handle occupancy drop due to AGPR and SGPR.
1359
    // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1360
0
    if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
1361
0
      break;
1362
1363
    // The occupancy of this region could have been improved by a previous
1364
    // iteration's sinking of defs.
1365
0
    if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1366
0
      NewRescheduleRegions[I] = true;
1367
0
      Improved = true;
1368
0
      continue;
1369
0
    }
1370
1371
    // First check if we have enough trivially rematerializable instructions to
1372
    // improve occupancy. Optimistically assume all instructions we are able to
1373
    // sink decreased RP.
1374
0
    int TotalSinkableRegs = 0;
1375
0
    for (const auto &It : RematerializableInsts[I]) {
1376
0
      MachineInstr *Def = It.first;
1377
0
      Register DefReg = Def->getOperand(0).getReg();
1378
0
      TotalSinkableRegs +=
1379
0
          SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1380
0
    }
1381
0
    int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1382
0
    unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
1383
    // If in the most optimistic scenario, we cannot improve occupancy, then do
1384
    // not attempt to sink any instructions.
1385
0
    if (OptimisticOccupancy <= DAG.MinOccupancy)
1386
0
      break;
1387
1388
0
    unsigned ImproveOccupancy = 0;
1389
0
    SmallVector<MachineInstr *, 4> SinkedDefs;
1390
0
    for (auto &It : RematerializableInsts[I]) {
1391
0
      MachineInstr *Def = It.first;
1392
0
      MachineBasicBlock::iterator InsertPos =
1393
0
          MachineBasicBlock::iterator(It.second);
1394
0
      Register Reg = Def->getOperand(0).getReg();
1395
      // Rematerialize MI to its use block. Since we are only rematerializing
1396
      // instructions that do not have any virtual reg uses, we do not need to
1397
      // call LiveRangeEdit::allUsesAvailableAt() and
1398
      // LiveRangeEdit::canRematerializeAt().
1399
0
      TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1400
0
                         Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1401
0
      MachineInstr *NewMI = &*std::prev(InsertPos);
1402
0
      LIS->InsertMachineInstrInMaps(*NewMI);
1403
0
      LIS->removeInterval(Reg);
1404
0
      LIS->createAndComputeVirtRegInterval(Reg);
1405
0
      InsertedMIToOldDef[NewMI] = Def;
1406
1407
      // Update region boundaries in scheduling region we sinked from since we
1408
      // may sink an instruction that was at the beginning or end of its region
1409
0
      DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
1410
0
                                 /*Removing =*/true);
1411
1412
      // Update region boundaries in region we sinked to.
1413
0
      DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1414
1415
0
      LaneBitmask PrevMask = NewLiveIns[I][Reg];
1416
      // FIXME: Also update cached pressure for where the def was sinked from.
1417
      // Update RP for all regions that has this reg as a live-in and remove
1418
      // the reg from all regions as a live-in.
1419
0
      for (auto Idx : RematDefToLiveInRegions[Def]) {
1420
0
        NewLiveIns[Idx].erase(Reg);
1421
0
        if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1422
          // Def is live-through and not used in this block.
1423
0
          NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
1424
0
        } else {
1425
          // Def is used and rematerialized into this block.
1426
0
          GCNDownwardRPTracker RPT(*LIS);
1427
0
          auto *NonDbgMI = &*skipDebugInstructionsForward(
1428
0
              NewRegions[Idx].first, NewRegions[Idx].second);
1429
0
          RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1430
0
          RPT.advance(NewRegions[Idx].second);
1431
0
          NewPressure[Idx] = RPT.moveMaxPressure();
1432
0
        }
1433
0
      }
1434
1435
0
      SinkedDefs.push_back(Def);
1436
0
      ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1437
0
      if (ImproveOccupancy > DAG.MinOccupancy)
1438
0
        break;
1439
0
    }
1440
1441
    // Remove defs we just sinked from all regions' list of sinkable defs
1442
0
    for (auto &Def : SinkedDefs)
1443
0
      for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1444
0
        RematerializableInsts[TrackedIdx].erase(Def);
1445
1446
0
    if (ImproveOccupancy <= DAG.MinOccupancy)
1447
0
      break;
1448
1449
0
    NewRescheduleRegions[I] = true;
1450
0
    Improved = true;
1451
0
  }
1452
1453
0
  if (!Improved) {
1454
    // Occupancy was not improved for all regions that were at MinOccupancy.
1455
    // Undo sinking and remove newly rematerialized instructions.
1456
0
    for (auto &Entry : InsertedMIToOldDef) {
1457
0
      MachineInstr *MI = Entry.first;
1458
0
      MachineInstr *OldMI = Entry.second;
1459
0
      Register Reg = MI->getOperand(0).getReg();
1460
0
      LIS->RemoveMachineInstrFromMaps(*MI);
1461
0
      MI->eraseFromParent();
1462
0
      OldMI->clearRegisterDeads(Reg);
1463
0
      LIS->removeInterval(Reg);
1464
0
      LIS->createAndComputeVirtRegInterval(Reg);
1465
0
    }
1466
0
    return false;
1467
0
  }
1468
1469
  // Occupancy was improved for all regions.
1470
0
  for (auto &Entry : InsertedMIToOldDef) {
1471
0
    MachineInstr *MI = Entry.first;
1472
0
    MachineInstr *OldMI = Entry.second;
1473
1474
    // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1475
0
    DAG.BBLiveInMap.erase(OldMI);
1476
1477
    // Remove OldMI and update LIS
1478
0
    Register Reg = MI->getOperand(0).getReg();
1479
0
    LIS->RemoveMachineInstrFromMaps(*OldMI);
1480
0
    OldMI->eraseFromParent();
1481
0
    LIS->removeInterval(Reg);
1482
0
    LIS->createAndComputeVirtRegInterval(Reg);
1483
0
  }
1484
1485
  // Update live-ins, register pressure, and regions caches.
1486
0
  for (auto Idx : ImpactedRegions) {
1487
0
    DAG.LiveIns[Idx] = NewLiveIns[Idx];
1488
0
    DAG.Pressure[Idx] = NewPressure[Idx];
1489
0
    DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1490
0
  }
1491
0
  DAG.Regions = NewRegions;
1492
0
  DAG.RescheduleRegions = NewRescheduleRegions;
1493
1494
0
  SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1495
0
  MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1496
1497
0
  return true;
1498
0
}
1499
1500
// Copied from MachineLICM
1501
0
bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1502
0
  if (!DAG.TII->isTriviallyReMaterializable(MI))
1503
0
    return false;
1504
1505
0
  for (const MachineOperand &MO : MI.all_uses())
1506
0
    if (MO.getReg().isVirtual())
1507
0
      return false;
1508
1509
0
  return true;
1510
0
}
1511
1512
// When removing, we will have to check both beginning and ending of the region.
1513
// When inserting, we will only have to check if we are inserting NewMI in front
1514
// of a scheduling region and do not need to check the ending since we will only
1515
// ever be inserting before an already existing MI.
1516
void GCNScheduleDAGMILive::updateRegionBoundaries(
1517
    SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
1518
                              MachineBasicBlock::iterator>> &RegionBoundaries,
1519
0
    MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1520
0
  unsigned I = 0, E = RegionBoundaries.size();
1521
  // Search for first region of the block where MI is located
1522
0
  while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1523
0
    ++I;
1524
1525
0
  for (; I != E; ++I) {
1526
0
    if (MI->getParent() != RegionBoundaries[I].first->getParent())
1527
0
      return;
1528
1529
0
    if (Removing && MI == RegionBoundaries[I].first &&
1530
0
        MI == RegionBoundaries[I].second) {
1531
      // MI is in a region with size 1, after removing, the region will be
1532
      // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1533
0
      RegionBoundaries[I] =
1534
0
          std::pair(MI->getParent()->end(), MI->getParent()->end());
1535
0
      return;
1536
0
    }
1537
0
    if (MI == RegionBoundaries[I].first) {
1538
0
      if (Removing)
1539
0
        RegionBoundaries[I] =
1540
0
            std::pair(std::next(MI), RegionBoundaries[I].second);
1541
0
      else
1542
        // Inserted NewMI in front of region, set new RegionBegin to NewMI
1543
0
        RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
1544
0
                                        RegionBoundaries[I].second);
1545
0
      return;
1546
0
    }
1547
0
    if (Removing && MI == RegionBoundaries[I].second) {
1548
0
      RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
1549
0
      return;
1550
0
    }
1551
0
  }
1552
0
}
1553
1554
0
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1555
0
  return std::any_of(
1556
0
      DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1557
0
        unsigned Opc = MI->getOpcode();
1558
0
        return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1559
0
      });
1560
0
}
1561
1562
GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
1563
    MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1564
    bool RemoveKillFlags)
1565
0
    : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1566
1567
0
void GCNPostScheduleDAGMILive::schedule() {
1568
0
  HasIGLPInstrs = hasIGLPInstrs(this);
1569
0
  if (HasIGLPInstrs) {
1570
0
    SavedMutations.clear();
1571
0
    SavedMutations.swap(Mutations);
1572
0
    addMutation(createIGroupLPDAGMutation(/*IsReentry=*/true));
1573
0
  }
1574
1575
0
  ScheduleDAGMI::schedule();
1576
0
}
1577
1578
0
void GCNPostScheduleDAGMILive::finalizeSchedule() {
1579
0
  if (HasIGLPInstrs)
1580
0
    SavedMutations.swap(Mutations);
1581
1582
0
  ScheduleDAGMI::finalizeSchedule();
1583
0
}