/src/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp

Source (jump to first uncovered line)
//===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file This file contains the AMDGPU DAG scheduling
/// mutation to pair VOPD instructions back to back. It also contains
//  subroutines useful in the creation of VOPD instructions
//
//===----------------------------------------------------------------------===//

#include "GCNVOPDUtils.h"
#include "AMDGPUSubtarget.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCInst.h"

using namespace llvm;

#define DEBUG_TYPE "gcn-vopd-utils"

bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
                                   const MachineInstr &FirstMI,
                                   const MachineInstr &SecondMI) {
  namespace VOPD = AMDGPU::VOPD;

  const MachineFunction *MF = FirstMI.getMF();
  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
  const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
  const MachineRegisterInfo &MRI = MF->getRegInfo();
  // Literals also count against scalar bus limit
  SmallVector<const MachineOperand *> UniqueLiterals;
  auto addLiteral = [&](const MachineOperand &Op) {
    for (auto &Literal : UniqueLiterals) {
      if (Literal->isIdenticalTo(Op))
        return;
    }
    UniqueLiterals.push_back(&Op);
  };
  SmallVector<Register> UniqueScalarRegs;
  assert([&]() -> bool {
    for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
         MII != FirstMI.getParent()->instr_end(); ++MII) {
      if (&*MII == &SecondMI)
        return true;
    }
    return false;
  }() && "Expected FirstMI to precede SecondMI");
  // Cannot pair dependent instructions
  for (const auto &Use : SecondMI.uses())
    if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
      return false;

  auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
    const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
    const MachineOperand &Operand = MI.getOperand(OperandIdx);
    if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
      return Operand.getReg();
    return Register();
  };

  auto InstInfo =
      AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());

  for (auto CompIdx : VOPD::COMPONENTS) {
    const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;

    const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
    if (Src0.isReg()) {
      if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
        if (!is_contained(UniqueScalarRegs, Src0.getReg()))
          UniqueScalarRegs.push_back(Src0.getReg());
      }
    } else {
      if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
        addLiteral(Src0);
    }

    if (InstInfo[CompIdx].hasMandatoryLiteral()) {
      auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
      addLiteral(MI.getOperand(CompOprIdx));
    }
    if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
      UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
  }

  if (UniqueLiterals.size() > 1)
    return false;
  if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
    return false;

  // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
  bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
                 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
                 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;

  if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))
    return false;

  LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
                    << "\n\tY: " << SecondMI << "\n");
  return true;
}

/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
                                       const TargetSubtargetInfo &TSI,
                                       const MachineInstr *FirstMI,
                                       const MachineInstr &SecondMI) {
  const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
  unsigned Opc2 = SecondMI.getOpcode();
  auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);

  // One instruction case
  if (!FirstMI)
    return SecondCanBeVOPD.Y;

  unsigned Opc = FirstMI->getOpcode();
  auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);

  if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
        (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
    return false;

  return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
}

namespace {
/// Adapts design from MacroFusion
/// Puts valid candidate instructions back-to-back so they can easily
/// be turned into VOPD instructions
/// Greedily pairs instruction candidates. O(n^2) algorithm.
struct VOPDPairingMutation : ScheduleDAGMutation {
  MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer

  VOPDPairingMutation(
      MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
      : shouldScheduleAdjacent(shouldScheduleAdjacent) {}

  void apply(ScheduleDAGInstrs *DAG) override {
    const TargetInstrInfo &TII = *DAG->TII;
    const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
    if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
      LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
      return;
    }

    std::vector<SUnit>::iterator ISUI, JSUI;
    for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
      const MachineInstr *IMI = ISUI->getInstr();
      if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
        continue;
      if (!hasLessThanNumFused(*ISUI, 2))
        continue;

      for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
        if (JSUI->isBoundaryNode())
          continue;
        const MachineInstr *JMI = JSUI->getInstr();
        if (!hasLessThanNumFused(*JSUI, 2) ||
            !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
          continue;
        if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
          break;
      }
    }
    LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
  }
};
} // namespace

std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
  return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
}

Line	Count	Source (jump to first uncovered line)
1		//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2		//
3		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4		// See https://llvm.org/LICENSE.txt for license information.
5		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6		//
7		//===----------------------------------------------------------------------===//
8		//
9		/// \file This file contains the AMDGPU DAG scheduling
10		/// mutation to pair VOPD instructions back to back. It also contains
11		// subroutines useful in the creation of VOPD instructions
12		//
13		//===----------------------------------------------------------------------===//
14
15		#include "GCNVOPDUtils.h"
16		#include "AMDGPUSubtarget.h"
17		#include "GCNSubtarget.h"
18		#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19		#include "SIInstrInfo.h"
20		#include "Utils/AMDGPUBaseInfo.h"
21		#include "llvm/ADT/STLExtras.h"
22		#include "llvm/ADT/SmallVector.h"
23		#include "llvm/CodeGen/MachineBasicBlock.h"
24		#include "llvm/CodeGen/MachineInstr.h"
25		#include "llvm/CodeGen/MachineOperand.h"
26		#include "llvm/CodeGen/MachineRegisterInfo.h"
27		#include "llvm/CodeGen/MacroFusion.h"
28		#include "llvm/CodeGen/ScheduleDAG.h"
29		#include "llvm/CodeGen/ScheduleDAGMutation.h"
30		#include "llvm/CodeGen/TargetInstrInfo.h"
31		#include "llvm/MC/MCInst.h"
32
33		using namespace llvm;
34
35		#define DEBUG_TYPE "gcn-vopd-utils"
36
37		bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38		const MachineInstr &FirstMI,
39	0	const MachineInstr &SecondMI) {
40	0	namespace VOPD = AMDGPU::VOPD;
41
42	0	const MachineFunction *MF = FirstMI.getMF();
43	0	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44	0	const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
45	0	const MachineRegisterInfo &MRI = MF->getRegInfo();
46		// Literals also count against scalar bus limit
47	0	SmallVector<const MachineOperand *> UniqueLiterals;
48	0	auto addLiteral = [&](const MachineOperand &Op) {
49	0	for (auto &Literal : UniqueLiterals) {
50	0	if (Literal->isIdenticalTo(Op))
51	0	return;
52	0	}
53	0	UniqueLiterals.push_back(&Op);
54	0	};
55	0	SmallVector<Register> UniqueScalarRegs;
56	0	assert([&]() -> bool {
57	0	for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
58	0	MII != FirstMI.getParent()->instr_end(); ++MII) {
59	0	if (&*MII == &SecondMI)
60	0	return true;
61	0	}
62	0	return false;
63	0	}() && "Expected FirstMI to precede SecondMI");
64		// Cannot pair dependent instructions
65	0	for (const auto &Use : SecondMI.uses())
66	0	if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
67	0	return false;
68
69	0	auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
70	0	const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
71	0	const MachineOperand &Operand = MI.getOperand(OperandIdx);
72	0	if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
73	0	return Operand.getReg();
74	0	return Register();
75	0	};
76
77	0	auto InstInfo =
78	0	AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
79
80	0	for (auto CompIdx : VOPD::COMPONENTS) {
81	0	const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
82
83	0	const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
84	0	if (Src0.isReg()) {
85	0	if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
86	0	if (!is_contained(UniqueScalarRegs, Src0.getReg()))
87	0	UniqueScalarRegs.push_back(Src0.getReg());
88	0	}
89	0	} else {
90	0	if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
91	0	addLiteral(Src0);
92	0	}
93
94	0	if (InstInfo[CompIdx].hasMandatoryLiteral()) {
95	0	auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
96	0	addLiteral(MI.getOperand(CompOprIdx));
97	0	}
98	0	if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
99	0	UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
100	0	}
101
102	0	if (UniqueLiterals.size() > 1)
103	0	return false;
104	0	if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
105	0	return false;
106
107		// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
108	0	bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
109	0	FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
110	0	SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
111
112	0	if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))
113	0	return false;
114
115	0	LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
116	0	<< "\n\tY: " << SecondMI << "\n");
117	0	return true;
118	0	}
119
120		/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
121		/// together. Given SecondMI, when FirstMI is unspecified, then check if
122		/// SecondMI may be part of a fused pair at all.
123		static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
124		const TargetSubtargetInfo &TSI,
125		const MachineInstr *FirstMI,
126	0	const MachineInstr &SecondMI) {
127	0	const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
128	0	unsigned Opc2 = SecondMI.getOpcode();
129	0	auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
130
131		// One instruction case
132	0	if (!FirstMI)
133	0	return SecondCanBeVOPD.Y;
134
135	0	unsigned Opc = FirstMI->getOpcode();
136	0	auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
137
138	0	if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) \|\|
139	0	(FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
140	0	return false;
141
142	0	return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
143	0	}
144
145		namespace {
146		/// Adapts design from MacroFusion
147		/// Puts valid candidate instructions back-to-back so they can easily
148		/// be turned into VOPD instructions
149		/// Greedily pairs instruction candidates. O(n^2) algorithm.
150		struct VOPDPairingMutation : ScheduleDAGMutation {
151		MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
152
153		VOPDPairingMutation(
154		MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
155	0	: shouldScheduleAdjacent(shouldScheduleAdjacent) {}
156
157	0	void apply(ScheduleDAGInstrs *DAG) override {
158	0	const TargetInstrInfo &TII = *DAG->TII;
159	0	const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
160	0	if (!AMDGPU::hasVOPD(ST) \|\| !ST.isWave32()) {
161	0	LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
162	0	return;
163	0	}
164
165	0	std::vector<SUnit>::iterator ISUI, JSUI;
166	0	for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
167	0	const MachineInstr *IMI = ISUI->getInstr();
168	0	if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
169	0	continue;
170	0	if (!hasLessThanNumFused(*ISUI, 2))
171	0	continue;
172
173	0	for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
174	0	if (JSUI->isBoundaryNode())
175	0	continue;
176	0	const MachineInstr *JMI = JSUI->getInstr();
177	0	if (!hasLessThanNumFused(*JSUI, 2) \|\|
178	0	!shouldScheduleAdjacent(TII, ST, IMI, *JMI))
179	0	continue;
180	0	if (fuseInstructionPair(DAG, ISUI, *JSUI))
181	0	break;
182	0	}
183	0	}
184	0	LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
185	0	}
186		};
187		} // namespace
188
189	0	std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
190	0	return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
191	0	}

Coverage Report

Created: 2024-01-17 10:31