Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// Insert s_clause instructions to form hard clauses.
11
///
12
/// Clausing load instructions can give cache coherency benefits. Before gfx10,
13
/// the hardware automatically detected "soft clauses", which were sequences of
14
/// memory instructions of the same type. In gfx10 this detection was removed,
15
/// and the s_clause instruction was introduced to explicitly mark "hard
16
/// clauses".
17
///
18
/// It's the scheduler's job to form the clauses by putting similar memory
19
/// instructions next to each other. Our job is just to insert an s_clause
20
/// instruction to mark the start of each clause.
21
///
22
/// Note that hard clauses are very similar to, but logically distinct from, the
23
/// groups of instructions that have to be restartable when XNACK is enabled.
24
/// The rules are slightly different in each case. For example an s_nop
25
/// instruction breaks a restartable group, but can appear in the middle of a
26
/// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27
/// "soft clauses" or just "clauses".)
28
///
29
/// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30
/// groups, not hard clauses.
31
//
32
//===----------------------------------------------------------------------===//
33
34
#include "AMDGPU.h"
35
#include "GCNSubtarget.h"
36
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37
#include "llvm/ADT/SmallVector.h"
38
#include "llvm/CodeGen/MachineFunctionPass.h"
39
40
using namespace llvm;
41
42
#define DEBUG_TYPE "si-insert-hard-clauses"
43
44
namespace {
45
46
// A clause length of 64 instructions could be encoded in the s_clause
47
// instruction, but the hardware documentation (at least for GFX11) says that
48
// 63 is the maximum allowed.
49
constexpr unsigned MaxInstructionsInClause = 63;
50
51
enum HardClauseType {
52
  // For GFX10:
53
54
  // Texture, buffer, global or scratch memory instructions.
55
  HARDCLAUSE_VMEM,
56
  // Flat (not global or scratch) memory instructions.
57
  HARDCLAUSE_FLAT,
58
59
  // For GFX11:
60
61
  // Texture memory instructions.
62
  HARDCLAUSE_MIMG_LOAD,
63
  HARDCLAUSE_MIMG_STORE,
64
  HARDCLAUSE_MIMG_ATOMIC,
65
  HARDCLAUSE_MIMG_SAMPLE,
66
  // Buffer, global or scratch memory instructions.
67
  HARDCLAUSE_VMEM_LOAD,
68
  HARDCLAUSE_VMEM_STORE,
69
  HARDCLAUSE_VMEM_ATOMIC,
70
  // Flat (not global or scratch) memory instructions.
71
  HARDCLAUSE_FLAT_LOAD,
72
  HARDCLAUSE_FLAT_STORE,
73
  HARDCLAUSE_FLAT_ATOMIC,
74
  // BVH instructions.
75
  HARDCLAUSE_BVH,
76
77
  // Common:
78
79
  // Instructions that access LDS.
80
  HARDCLAUSE_LDS,
81
  // Scalar memory instructions.
82
  HARDCLAUSE_SMEM,
83
  // VALU instructions.
84
  HARDCLAUSE_VALU,
85
  LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
86
87
  // Internal instructions, which are allowed in the middle of a hard clause,
88
  // except for s_waitcnt.
89
  HARDCLAUSE_INTERNAL,
90
  // Meta instructions that do not result in any ISA like KILL.
91
  HARDCLAUSE_IGNORE,
92
  // Instructions that are not allowed in a hard clause: SALU, export, branch,
93
  // message, GDS, s_waitcnt and anything else not mentioned above.
94
  HARDCLAUSE_ILLEGAL,
95
};
96
97
class SIInsertHardClauses : public MachineFunctionPass {
98
public:
99
  static char ID;
100
  const GCNSubtarget *ST = nullptr;
101
102
0
  SIInsertHardClauses() : MachineFunctionPass(ID) {}
103
104
0
  void getAnalysisUsage(AnalysisUsage &AU) const override {
105
0
    AU.setPreservesCFG();
106
0
    MachineFunctionPass::getAnalysisUsage(AU);
107
0
  }
108
109
0
  HardClauseType getHardClauseType(const MachineInstr &MI) {
110
0
    if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
111
0
      if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
112
0
        if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
113
0
          if (ST->hasNSAClauseBug()) {
114
0
            const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
115
0
            if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
116
0
              return HARDCLAUSE_ILLEGAL;
117
0
          }
118
0
          return HARDCLAUSE_VMEM;
119
0
        }
120
0
        if (SIInstrInfo::isFLAT(MI))
121
0
          return HARDCLAUSE_FLAT;
122
0
      } else {
123
0
        assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
124
0
        if (SIInstrInfo::isMIMG(MI)) {
125
0
          const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
126
0
          const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
127
0
              AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
128
0
          if (BaseInfo->BVH)
129
0
            return HARDCLAUSE_BVH;
130
0
          if (BaseInfo->Sampler)
131
0
            return HARDCLAUSE_MIMG_SAMPLE;
132
0
          return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
133
0
                                              : HARDCLAUSE_MIMG_LOAD
134
0
                              : HARDCLAUSE_MIMG_STORE;
135
0
        }
136
0
        if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
137
0
          return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
138
0
                                              : HARDCLAUSE_VMEM_LOAD
139
0
                              : HARDCLAUSE_VMEM_STORE;
140
0
        }
141
0
        if (SIInstrInfo::isFLAT(MI)) {
142
0
          return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
143
0
                                              : HARDCLAUSE_FLAT_LOAD
144
0
                              : HARDCLAUSE_FLAT_STORE;
145
0
        }
146
0
      }
147
      // TODO: LDS
148
0
      if (SIInstrInfo::isSMRD(MI))
149
0
        return HARDCLAUSE_SMEM;
150
0
    }
151
152
    // Don't form VALU clauses. It's not clear what benefit they give, if any.
153
154
    // In practice s_nop is the only internal instruction we're likely to see.
155
    // It's safe to treat the rest as illegal.
156
0
    if (MI.getOpcode() == AMDGPU::S_NOP)
157
0
      return HARDCLAUSE_INTERNAL;
158
0
    if (MI.isMetaInstruction())
159
0
      return HARDCLAUSE_IGNORE;
160
0
    return HARDCLAUSE_ILLEGAL;
161
0
  }
162
163
  // Track information about a clause as we discover it.
164
  struct ClauseInfo {
165
    // The type of all (non-internal) instructions in the clause.
166
    HardClauseType Type = HARDCLAUSE_ILLEGAL;
167
    // The first (necessarily non-internal) instruction in the clause.
168
    MachineInstr *First = nullptr;
169
    // The last non-internal instruction in the clause.
170
    MachineInstr *Last = nullptr;
171
    // The length of the clause including any internal instructions in the
172
    // middle (but not at the end) of the clause.
173
    unsigned Length = 0;
174
    // Internal instructions at the and of a clause should not be included in
175
    // the clause. Count them in TrailingInternalLength until a new memory
176
    // instruction is added.
177
    unsigned TrailingInternalLength = 0;
178
    // The base operands of *Last.
179
    SmallVector<const MachineOperand *, 4> BaseOps;
180
  };
181
182
0
  bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
183
0
    if (CI.First == CI.Last)
184
0
      return false;
185
0
    assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
186
187
0
    auto &MBB = *CI.First->getParent();
188
0
    auto ClauseMI =
189
0
        BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
190
0
            .addImm(CI.Length - 1);
191
0
    finalizeBundle(MBB, ClauseMI->getIterator(),
192
0
                   std::next(CI.Last->getIterator()));
193
0
    return true;
194
0
  }
195
196
0
  bool runOnMachineFunction(MachineFunction &MF) override {
197
0
    if (skipFunction(MF.getFunction()))
198
0
      return false;
199
200
0
    ST = &MF.getSubtarget<GCNSubtarget>();
201
0
    if (!ST->hasHardClauses())
202
0
      return false;
203
204
0
    const SIInstrInfo *SII = ST->getInstrInfo();
205
0
    const TargetRegisterInfo *TRI = ST->getRegisterInfo();
206
207
0
    bool Changed = false;
208
0
    for (auto &MBB : MF) {
209
0
      ClauseInfo CI;
210
0
      for (auto &MI : MBB) {
211
0
        HardClauseType Type = getHardClauseType(MI);
212
213
0
        int64_t Dummy1;
214
0
        bool Dummy2;
215
0
        unsigned Dummy3;
216
0
        SmallVector<const MachineOperand *, 4> BaseOps;
217
0
        if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
218
0
          if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
219
0
                                                  Dummy3, TRI)) {
220
            // We failed to get the base operands, so we'll never clause this
221
            // instruction with any other, so pretend it's illegal.
222
0
            Type = HARDCLAUSE_ILLEGAL;
223
0
          }
224
0
        }
225
226
0
        if (CI.Length == MaxInstructionsInClause ||
227
0
            (CI.Length && Type != HARDCLAUSE_INTERNAL &&
228
0
             Type != HARDCLAUSE_IGNORE &&
229
0
             (Type != CI.Type ||
230
              // Note that we lie to shouldClusterMemOps about the size of the
231
              // cluster. When shouldClusterMemOps is called from the machine
232
              // scheduler it limits the size of the cluster to avoid increasing
233
              // register pressure too much, but this pass runs after register
234
              // allocation so there is no need for that kind of limit.
235
              // We also lie about the Offset and OffsetIsScalable parameters,
236
              // as they aren't used in the SIInstrInfo implementation.
237
0
              !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
238
0
                                        2, 2)))) {
239
          // Finish the current clause.
240
0
          Changed |= emitClause(CI, SII);
241
0
          CI = ClauseInfo();
242
0
        }
243
244
0
        if (CI.Length) {
245
          // Extend the current clause.
246
0
          if (Type != HARDCLAUSE_IGNORE) {
247
0
            if (Type == HARDCLAUSE_INTERNAL) {
248
0
              ++CI.TrailingInternalLength;
249
0
            } else {
250
0
              ++CI.Length;
251
0
              CI.Length += CI.TrailingInternalLength;
252
0
              CI.TrailingInternalLength = 0;
253
0
              CI.Last = &MI;
254
0
              CI.BaseOps = std::move(BaseOps);
255
0
            }
256
0
          }
257
0
        } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
258
          // Start a new clause.
259
0
          CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
260
0
        }
261
0
      }
262
263
      // Finish the last clause in the basic block if any.
264
0
      if (CI.Length)
265
0
        Changed |= emitClause(CI, SII);
266
0
    }
267
268
0
    return Changed;
269
0
  }
270
};
271
272
} // namespace
273
274
char SIInsertHardClauses::ID = 0;
275
276
char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
277
278
INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
279
                false, false)