/src/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file This file contains the AMDGPU DAG scheduling |
10 | | /// mutation to pair VOPD instructions back to back. It also contains |
11 | | // subroutines useful in the creation of VOPD instructions |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "GCNVOPDUtils.h" |
16 | | #include "AMDGPUSubtarget.h" |
17 | | #include "GCNSubtarget.h" |
18 | | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
19 | | #include "SIInstrInfo.h" |
20 | | #include "Utils/AMDGPUBaseInfo.h" |
21 | | #include "llvm/ADT/STLExtras.h" |
22 | | #include "llvm/ADT/SmallVector.h" |
23 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | | #include "llvm/CodeGen/MachineInstr.h" |
25 | | #include "llvm/CodeGen/MachineOperand.h" |
26 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
27 | | #include "llvm/CodeGen/MacroFusion.h" |
28 | | #include "llvm/CodeGen/ScheduleDAG.h" |
29 | | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
30 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
31 | | #include "llvm/MC/MCInst.h" |
32 | | |
33 | | using namespace llvm; |
34 | | |
35 | | #define DEBUG_TYPE "gcn-vopd-utils" |
36 | | |
37 | | bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, |
38 | | const MachineInstr &FirstMI, |
39 | 0 | const MachineInstr &SecondMI) { |
40 | 0 | namespace VOPD = AMDGPU::VOPD; |
41 | |
|
42 | 0 | const MachineFunction *MF = FirstMI.getMF(); |
43 | 0 | const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); |
44 | 0 | const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo()); |
45 | 0 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
46 | | // Literals also count against scalar bus limit |
47 | 0 | SmallVector<const MachineOperand *> UniqueLiterals; |
48 | 0 | auto addLiteral = [&](const MachineOperand &Op) { |
49 | 0 | for (auto &Literal : UniqueLiterals) { |
50 | 0 | if (Literal->isIdenticalTo(Op)) |
51 | 0 | return; |
52 | 0 | } |
53 | 0 | UniqueLiterals.push_back(&Op); |
54 | 0 | }; |
55 | 0 | SmallVector<Register> UniqueScalarRegs; |
56 | 0 | assert([&]() -> bool { |
57 | 0 | for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); |
58 | 0 | MII != FirstMI.getParent()->instr_end(); ++MII) { |
59 | 0 | if (&*MII == &SecondMI) |
60 | 0 | return true; |
61 | 0 | } |
62 | 0 | return false; |
63 | 0 | }() && "Expected FirstMI to precede SecondMI"); |
64 | | // Cannot pair dependent instructions |
65 | 0 | for (const auto &Use : SecondMI.uses()) |
66 | 0 | if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI)) |
67 | 0 | return false; |
68 | | |
69 | 0 | auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) { |
70 | 0 | const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI; |
71 | 0 | const MachineOperand &Operand = MI.getOperand(OperandIdx); |
72 | 0 | if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg())) |
73 | 0 | return Operand.getReg(); |
74 | 0 | return Register(); |
75 | 0 | }; |
76 | |
|
77 | 0 | auto InstInfo = |
78 | 0 | AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc()); |
79 | |
|
80 | 0 | for (auto CompIdx : VOPD::COMPONENTS) { |
81 | 0 | const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI; |
82 | |
|
83 | 0 | const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0); |
84 | 0 | if (Src0.isReg()) { |
85 | 0 | if (!TRI->isVectorRegister(MRI, Src0.getReg())) { |
86 | 0 | if (!is_contained(UniqueScalarRegs, Src0.getReg())) |
87 | 0 | UniqueScalarRegs.push_back(Src0.getReg()); |
88 | 0 | } |
89 | 0 | } else { |
90 | 0 | if (!TII.isInlineConstant(MI, VOPD::Component::SRC0)) |
91 | 0 | addLiteral(Src0); |
92 | 0 | } |
93 | |
|
94 | 0 | if (InstInfo[CompIdx].hasMandatoryLiteral()) { |
95 | 0 | auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex(); |
96 | 0 | addLiteral(MI.getOperand(CompOprIdx)); |
97 | 0 | } |
98 | 0 | if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC)) |
99 | 0 | UniqueScalarRegs.push_back(AMDGPU::VCC_LO); |
100 | 0 | } |
101 | |
|
102 | 0 | if (UniqueLiterals.size() > 1) |
103 | 0 | return false; |
104 | 0 | if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) |
105 | 0 | return false; |
106 | | |
107 | | // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. |
108 | 0 | bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 && |
109 | 0 | FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 && |
110 | 0 | SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32; |
111 | |
|
112 | 0 | if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc)) |
113 | 0 | return false; |
114 | | |
115 | 0 | LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI |
116 | 0 | << "\n\tY: " << SecondMI << "\n"); |
117 | 0 | return true; |
118 | 0 | } |
119 | | |
120 | | /// Check if the instr pair, FirstMI and SecondMI, should be scheduled |
121 | | /// together. Given SecondMI, when FirstMI is unspecified, then check if |
122 | | /// SecondMI may be part of a fused pair at all. |
123 | | static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, |
124 | | const TargetSubtargetInfo &TSI, |
125 | | const MachineInstr *FirstMI, |
126 | 0 | const MachineInstr &SecondMI) { |
127 | 0 | const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); |
128 | 0 | unsigned Opc2 = SecondMI.getOpcode(); |
129 | 0 | auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); |
130 | | |
131 | | // One instruction case |
132 | 0 | if (!FirstMI) |
133 | 0 | return SecondCanBeVOPD.Y; |
134 | | |
135 | 0 | unsigned Opc = FirstMI->getOpcode(); |
136 | 0 | auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); |
137 | |
|
138 | 0 | if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || |
139 | 0 | (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) |
140 | 0 | return false; |
141 | | |
142 | 0 | return checkVOPDRegConstraints(STII, *FirstMI, SecondMI); |
143 | 0 | } |
144 | | |
145 | | namespace { |
146 | | /// Adapts design from MacroFusion |
147 | | /// Puts valid candidate instructions back-to-back so they can easily |
148 | | /// be turned into VOPD instructions |
149 | | /// Greedily pairs instruction candidates. O(n^2) algorithm. |
150 | | struct VOPDPairingMutation : ScheduleDAGMutation { |
151 | | MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer |
152 | | |
153 | | VOPDPairingMutation( |
154 | | MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer |
155 | 0 | : shouldScheduleAdjacent(shouldScheduleAdjacent) {} |
156 | | |
157 | 0 | void apply(ScheduleDAGInstrs *DAG) override { |
158 | 0 | const TargetInstrInfo &TII = *DAG->TII; |
159 | 0 | const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); |
160 | 0 | if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) { |
161 | 0 | LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n"); |
162 | 0 | return; |
163 | 0 | } |
164 | | |
165 | 0 | std::vector<SUnit>::iterator ISUI, JSUI; |
166 | 0 | for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { |
167 | 0 | const MachineInstr *IMI = ISUI->getInstr(); |
168 | 0 | if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) |
169 | 0 | continue; |
170 | 0 | if (!hasLessThanNumFused(*ISUI, 2)) |
171 | 0 | continue; |
172 | | |
173 | 0 | for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { |
174 | 0 | if (JSUI->isBoundaryNode()) |
175 | 0 | continue; |
176 | 0 | const MachineInstr *JMI = JSUI->getInstr(); |
177 | 0 | if (!hasLessThanNumFused(*JSUI, 2) || |
178 | 0 | !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) |
179 | 0 | continue; |
180 | 0 | if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) |
181 | 0 | break; |
182 | 0 | } |
183 | 0 | } |
184 | 0 | LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n"); |
185 | 0 | } |
186 | | }; |
187 | | } // namespace |
188 | | |
189 | 0 | std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { |
190 | 0 | return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent); |
191 | 0 | } |