Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file This file contains the AMDGPU DAG scheduling
10
/// mutation to pair VOPD instructions back to back. It also contains
11
//  subroutines useful in the creation of VOPD instructions
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "GCNVOPDUtils.h"
16
#include "AMDGPUSubtarget.h"
17
#include "GCNSubtarget.h"
18
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19
#include "SIInstrInfo.h"
20
#include "Utils/AMDGPUBaseInfo.h"
21
#include "llvm/ADT/STLExtras.h"
22
#include "llvm/ADT/SmallVector.h"
23
#include "llvm/CodeGen/MachineBasicBlock.h"
24
#include "llvm/CodeGen/MachineInstr.h"
25
#include "llvm/CodeGen/MachineOperand.h"
26
#include "llvm/CodeGen/MachineRegisterInfo.h"
27
#include "llvm/CodeGen/MacroFusion.h"
28
#include "llvm/CodeGen/ScheduleDAG.h"
29
#include "llvm/CodeGen/ScheduleDAGMutation.h"
30
#include "llvm/CodeGen/TargetInstrInfo.h"
31
#include "llvm/MC/MCInst.h"
32
33
using namespace llvm;
34
35
#define DEBUG_TYPE "gcn-vopd-utils"
36
37
bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38
                                   const MachineInstr &FirstMI,
39
0
                                   const MachineInstr &SecondMI) {
40
0
  namespace VOPD = AMDGPU::VOPD;
41
42
0
  const MachineFunction *MF = FirstMI.getMF();
43
0
  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44
0
  const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
45
0
  const MachineRegisterInfo &MRI = MF->getRegInfo();
46
  // Literals also count against scalar bus limit
47
0
  SmallVector<const MachineOperand *> UniqueLiterals;
48
0
  auto addLiteral = [&](const MachineOperand &Op) {
49
0
    for (auto &Literal : UniqueLiterals) {
50
0
      if (Literal->isIdenticalTo(Op))
51
0
        return;
52
0
    }
53
0
    UniqueLiterals.push_back(&Op);
54
0
  };
55
0
  SmallVector<Register> UniqueScalarRegs;
56
0
  assert([&]() -> bool {
57
0
    for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
58
0
         MII != FirstMI.getParent()->instr_end(); ++MII) {
59
0
      if (&*MII == &SecondMI)
60
0
        return true;
61
0
    }
62
0
    return false;
63
0
  }() && "Expected FirstMI to precede SecondMI");
64
  // Cannot pair dependent instructions
65
0
  for (const auto &Use : SecondMI.uses())
66
0
    if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
67
0
      return false;
68
69
0
  auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
70
0
    const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
71
0
    const MachineOperand &Operand = MI.getOperand(OperandIdx);
72
0
    if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
73
0
      return Operand.getReg();
74
0
    return Register();
75
0
  };
76
77
0
  auto InstInfo =
78
0
      AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
79
80
0
  for (auto CompIdx : VOPD::COMPONENTS) {
81
0
    const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
82
83
0
    const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
84
0
    if (Src0.isReg()) {
85
0
      if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
86
0
        if (!is_contained(UniqueScalarRegs, Src0.getReg()))
87
0
          UniqueScalarRegs.push_back(Src0.getReg());
88
0
      }
89
0
    } else {
90
0
      if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
91
0
        addLiteral(Src0);
92
0
    }
93
94
0
    if (InstInfo[CompIdx].hasMandatoryLiteral()) {
95
0
      auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
96
0
      addLiteral(MI.getOperand(CompOprIdx));
97
0
    }
98
0
    if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
99
0
      UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
100
0
  }
101
102
0
  if (UniqueLiterals.size() > 1)
103
0
    return false;
104
0
  if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
105
0
    return false;
106
107
  // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
108
0
  bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
109
0
                 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
110
0
                 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
111
112
0
  if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))
113
0
    return false;
114
115
0
  LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
116
0
                    << "\n\tY: " << SecondMI << "\n");
117
0
  return true;
118
0
}
119
120
/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
121
/// together. Given SecondMI, when FirstMI is unspecified, then check if
122
/// SecondMI may be part of a fused pair at all.
123
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
124
                                       const TargetSubtargetInfo &TSI,
125
                                       const MachineInstr *FirstMI,
126
0
                                       const MachineInstr &SecondMI) {
127
0
  const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
128
0
  unsigned Opc2 = SecondMI.getOpcode();
129
0
  auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
130
131
  // One instruction case
132
0
  if (!FirstMI)
133
0
    return SecondCanBeVOPD.Y;
134
135
0
  unsigned Opc = FirstMI->getOpcode();
136
0
  auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
137
138
0
  if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
139
0
        (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
140
0
    return false;
141
142
0
  return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
143
0
}
144
145
namespace {
146
/// Adapts design from MacroFusion
147
/// Puts valid candidate instructions back-to-back so they can easily
148
/// be turned into VOPD instructions
149
/// Greedily pairs instruction candidates. O(n^2) algorithm.
150
struct VOPDPairingMutation : ScheduleDAGMutation {
151
  MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
152
153
  VOPDPairingMutation(
154
      MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
155
0
      : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
156
157
0
  void apply(ScheduleDAGInstrs *DAG) override {
158
0
    const TargetInstrInfo &TII = *DAG->TII;
159
0
    const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
160
0
    if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
161
0
      LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
162
0
      return;
163
0
    }
164
165
0
    std::vector<SUnit>::iterator ISUI, JSUI;
166
0
    for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
167
0
      const MachineInstr *IMI = ISUI->getInstr();
168
0
      if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
169
0
        continue;
170
0
      if (!hasLessThanNumFused(*ISUI, 2))
171
0
        continue;
172
173
0
      for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
174
0
        if (JSUI->isBoundaryNode())
175
0
          continue;
176
0
        const MachineInstr *JMI = JSUI->getInstr();
177
0
        if (!hasLessThanNumFused(*JSUI, 2) ||
178
0
            !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
179
0
          continue;
180
0
        if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
181
0
          break;
182
0
      }
183
0
    }
184
0
    LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
185
0
  }
186
};
187
} // namespace
188
189
0
std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
190
0
  return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
191
0
}