Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
Line
Count
Source (jump to first uncovered line)
1
//===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This provides a generalized class for OpenMP runtime code generation
10
// specialized by GPU targets NVPTX and AMDGCN.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
15
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
16
17
#include "CGOpenMPRuntime.h"
18
#include "CodeGenFunction.h"
19
#include "clang/AST/StmtOpenMP.h"
20
21
namespace clang {
22
namespace CodeGen {
23
24
class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
25
public:
26
  /// Defines the execution mode.
27
  enum ExecutionMode {
28
    /// SPMD execution mode (all threads are worker threads).
29
    EM_SPMD,
30
    /// Non-SPMD execution mode (1 master thread, others are workers).
31
    EM_NonSPMD,
32
    /// Unknown execution mode (orphaned directive).
33
    EM_Unknown,
34
  };
35
36
  /// Target codegen is specialized based on two data-sharing modes: CUDA, in
37
  /// which the local variables are actually global threadlocal, and Generic, in
38
  /// which the local variables are placed in global memory if they may escape
39
  /// their declaration context.
40
  enum DataSharingMode {
41
    /// CUDA data sharing mode.
42
    DS_CUDA,
43
    /// Generic data-sharing mode.
44
    DS_Generic,
45
  };
46
47
private:
48
  /// Parallel outlined function work for workers to execute.
49
  llvm::SmallVector<llvm::Function *, 16> Work;
50
51
  struct EntryFunctionState {
52
    SourceLocation Loc;
53
  };
54
55
  ExecutionMode getExecutionMode() const;
56
57
  DataSharingMode getDataSharingMode() const;
58
59
  /// Get barrier to synchronize all threads in a block.
60
  void syncCTAThreads(CodeGenFunction &CGF);
61
62
  /// Helper for target directive initialization.
63
  void emitKernelInit(const OMPExecutableDirective &D, CodeGenFunction &CGF,
64
                      EntryFunctionState &EST, bool IsSPMD);
65
66
  /// Helper for target directive finalization.
67
  void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
68
                        bool IsSPMD);
69
70
  /// Helper for generic variables globalization prolog.
71
  void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
72
73
  /// Helper for generic variables globalization epilog.
74
  void emitGenericVarsEpilog(CodeGenFunction &CGF);
75
76
  //
77
  // Base class overrides.
78
  //
79
80
  /// Emit outlined function specialized for the Fork-Join
81
  /// programming model for applicable target directives on the NVPTX device.
82
  /// \param D Directive to emit.
83
  /// \param ParentName Name of the function that encloses the target region.
84
  /// \param OutlinedFn Outlined function value to be defined by this call.
85
  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
86
  /// \param IsOffloadEntry True if the outlined function is an offload entry.
87
  /// An outlined function may not be an entry if, e.g. the if clause always
88
  /// evaluates to false.
89
  void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
90
                         llvm::Function *&OutlinedFn,
91
                         llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
92
                         const RegionCodeGenTy &CodeGen);
93
94
  /// Emit outlined function specialized for the Single Program
95
  /// Multiple Data programming model for applicable target directives on the
96
  /// NVPTX device.
97
  /// \param D Directive to emit.
98
  /// \param ParentName Name of the function that encloses the target region.
99
  /// \param OutlinedFn Outlined function value to be defined by this call.
100
  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
101
  /// \param IsOffloadEntry True if the outlined function is an offload entry.
102
  /// \param CodeGen Object containing the target statements.
103
  /// An outlined function may not be an entry if, e.g. the if clause always
104
  /// evaluates to false.
105
  void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
106
                      llvm::Function *&OutlinedFn,
107
                      llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
108
                      const RegionCodeGenTy &CodeGen);
109
110
  /// Emit outlined function for 'target' directive on the NVPTX
111
  /// device.
112
  /// \param D Directive to emit.
113
  /// \param ParentName Name of the function that encloses the target region.
114
  /// \param OutlinedFn Outlined function value to be defined by this call.
115
  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
116
  /// \param IsOffloadEntry True if the outlined function is an offload entry.
117
  /// An outlined function may not be an entry if, e.g. the if clause always
118
  /// evaluates to false.
119
  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
120
                                  StringRef ParentName,
121
                                  llvm::Function *&OutlinedFn,
122
                                  llvm::Constant *&OutlinedFnID,
123
                                  bool IsOffloadEntry,
124
                                  const RegionCodeGenTy &CodeGen) override;
125
126
protected:
127
  /// Check if the default location must be constant.
128
  /// Constant for NVPTX for better optimization.
129
0
  bool isDefaultLocationConstant() const override { return true; }
130
131
public:
132
  explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
133
134
0
  bool isGPU() const override { return true; };
135
136
  /// Declare generalized virtual functions which need to be defined
137
  /// by all specializations of OpenMPGPURuntime Targets like AMDGCN
138
  /// and NVPTX.
139
140
  /// Check if the variable length declaration is delayed:
141
  bool isDelayedVariableLengthDecl(CodeGenFunction &CGF,
142
                                   const VarDecl *VD) const override;
143
144
  /// Get call to __kmpc_alloc_shared
145
  std::pair<llvm::Value *, llvm::Value *>
146
  getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override;
147
148
  /// Get call to __kmpc_free_shared
149
  void getKmpcFreeShared(
150
      CodeGenFunction &CGF,
151
      const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) override;
152
153
  /// Get the GPU warp size.
154
  llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
155
156
  /// Get the id of the current thread on the GPU.
157
  llvm::Value *getGPUThreadID(CodeGenFunction &CGF);
158
159
  /// Get the maximum number of threads in a block of the GPU.
160
  llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);
161
162
  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
163
  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
164
  void emitProcBindClause(CodeGenFunction &CGF,
165
                          llvm::omp::ProcBindKind ProcBind,
166
                          SourceLocation Loc) override;
167
168
  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
169
  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
170
  /// clause.
171
  /// \param NumThreads An integer value of threads.
172
  void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads,
173
                            SourceLocation Loc) override;
174
175
  /// This function ought to emit, in the general case, a call to
176
  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
177
  // as these numbers are obtained through the PTX grid and block configuration.
178
  /// \param NumTeams An integer expression of teams.
179
  /// \param ThreadLimit An integer expression of threads.
180
  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
181
                          const Expr *ThreadLimit, SourceLocation Loc) override;
182
183
  /// Emits inlined function for the specified OpenMP parallel
184
  //  directive.
185
  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
186
  /// kmp_int32 BoundID, struct context_vars*).
187
  /// \param CGF Reference to current CodeGenFunction.
188
  /// \param D OpenMP directive.
189
  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
190
  /// \param InnermostKind Kind of innermost directive (for simple directives it
191
  /// is a directive itself, for combined - its innermost directive).
192
  /// \param CodeGen Code generation sequence for the \a D directive.
193
  llvm::Function *emitParallelOutlinedFunction(
194
      CodeGenFunction &CGF, const OMPExecutableDirective &D,
195
      const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
196
      const RegionCodeGenTy &CodeGen) override;
197
198
  /// Emits inlined function for the specified OpenMP teams
199
  //  directive.
200
  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
201
  /// kmp_int32 BoundID, struct context_vars*).
202
  /// \param CGF Reference to current CodeGenFunction.
203
  /// \param D OpenMP directive.
204
  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
205
  /// \param InnermostKind Kind of innermost directive (for simple directives it
206
  /// is a directive itself, for combined - its innermost directive).
207
  /// \param CodeGen Code generation sequence for the \a D directive.
208
  llvm::Function *emitTeamsOutlinedFunction(
209
      CodeGenFunction &CGF, const OMPExecutableDirective &D,
210
      const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
211
      const RegionCodeGenTy &CodeGen) override;
212
213
  /// Emits code for teams call of the \a OutlinedFn with
214
  /// variables captured in a record which address is stored in \a
215
  /// CapturedStruct.
216
  /// \param OutlinedFn Outlined function to be run by team masters. Type of
217
  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
218
  /// \param CapturedVars A pointer to the record with the references to
219
  /// variables used in \a OutlinedFn function.
220
  ///
221
  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
222
                     SourceLocation Loc, llvm::Function *OutlinedFn,
223
                     ArrayRef<llvm::Value *> CapturedVars) override;
224
225
  /// Emits code for parallel or serial call of the \a OutlinedFn with
226
  /// variables captured in a record which address is stored in \a
227
  /// CapturedStruct.
228
  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
229
  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
230
  /// \param CapturedVars A pointer to the record with the references to
231
  /// variables used in \a OutlinedFn function.
232
  /// \param IfCond Condition in the associated 'if' clause, if it was
233
  /// specified, nullptr otherwise.
234
  /// \param NumThreads The value corresponding to the num_threads clause, if
235
  /// any,
236
  ///                   or nullptr.
237
  void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
238
                        llvm::Function *OutlinedFn,
239
                        ArrayRef<llvm::Value *> CapturedVars,
240
                        const Expr *IfCond, llvm::Value *NumThreads) override;
241
242
  /// Emit an implicit/explicit barrier for OpenMP threads.
243
  /// \param Kind Directive for which this implicit barrier call must be
244
  /// generated. Must be OMPD_barrier for explicit barrier generation.
245
  /// \param EmitChecks true if need to emit checks for cancellation barriers.
246
  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
247
  /// runtime class decides which one to emit (simple or with cancellation
248
  /// checks).
249
  ///
250
  void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
251
                       OpenMPDirectiveKind Kind, bool EmitChecks = true,
252
                       bool ForceSimpleCall = false) override;
253
254
  /// Emits a critical region.
255
  /// \param CriticalName Name of the critical region.
256
  /// \param CriticalOpGen Generator for the statement associated with the given
257
  /// critical region.
258
  /// \param Hint Value of the 'hint' clause (optional).
259
  void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
260
                          const RegionCodeGenTy &CriticalOpGen,
261
                          SourceLocation Loc,
262
                          const Expr *Hint = nullptr) override;
263
264
  /// Emit a code for reduction clause.
265
  ///
266
  /// \param Privates List of private copies for original reduction arguments.
267
  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
268
  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
269
  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
270
  /// or 'operator binop(LHS, RHS)'.
271
  /// \param Options List of options for reduction codegen:
272
  ///     WithNowait true if parent directive has also nowait clause, false
273
  ///     otherwise.
274
  ///     SimpleReduction Emit reduction operation only. Used for omp simd
275
  ///     directive on the host.
276
  ///     ReductionKind The kind of reduction to perform.
277
  void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
278
                     ArrayRef<const Expr *> Privates,
279
                     ArrayRef<const Expr *> LHSExprs,
280
                     ArrayRef<const Expr *> RHSExprs,
281
                     ArrayRef<const Expr *> ReductionOps,
282
                     ReductionOptionsTy Options) override;
283
284
  /// Translates the native parameter of outlined function if this is required
285
  /// for target.
286
  /// \param FD Field decl from captured record for the parameter.
287
  /// \param NativeParam Parameter itself.
288
  const VarDecl *translateParameter(const FieldDecl *FD,
289
                                    const VarDecl *NativeParam) const override;
290
291
  /// Gets the address of the native argument basing on the address of the
292
  /// target-specific parameter.
293
  /// \param NativeParam Parameter itself.
294
  /// \param TargetParam Corresponding target-specific parameter.
295
  Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
296
                              const VarDecl *TargetParam) const override;
297
298
  /// Emits call of the outlined function with the provided arguments,
299
  /// translating these arguments to correct target-specific arguments.
300
  void emitOutlinedFunctionCall(
301
      CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
302
      ArrayRef<llvm::Value *> Args = std::nullopt) const override;
303
304
  /// Emits OpenMP-specific function prolog.
305
  /// Required for device constructs.
306
  void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
307
308
  /// Gets the OpenMP-specific address of the local variable.
309
  Address getAddressOfLocalVariable(CodeGenFunction &CGF,
310
                                    const VarDecl *VD) override;
311
312
  /// Cleans up references to the objects in finished function.
313
  ///
314
  void functionFinished(CodeGenFunction &CGF) override;
315
316
  /// Choose a default value for the dist_schedule clause.
317
  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
318
      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
319
      llvm::Value *&Chunk) const override;
320
321
  /// Choose a default value for the schedule clause.
322
  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
323
      const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
324
      const Expr *&ChunkExpr) const override;
325
326
  /// Adjust some parameters for the target-based directives, like addresses of
327
  /// the variables captured by reference in lambdas.
328
  void adjustTargetSpecificDataForLambdas(
329
      CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
330
331
  /// Perform check on requires decl to ensure that target architecture
332
  /// supports unified addressing
333
  void processRequiresDirective(const OMPRequiresDecl *D) override;
334
335
  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
336
  /// the predefined allocator and translates it into the corresponding address
337
  /// space.
338
  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
339
340
private:
341
  /// Track the execution mode when codegening directives within a target
342
  /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
343
  /// target region and used by containing directives such as 'parallel'
344
  /// to emit optimized code.
345
  ExecutionMode CurrentExecutionMode = EM_Unknown;
346
347
  /// Track the data sharing mode when codegening directives within a target
348
  /// region.
349
  DataSharingMode CurrentDataSharingMode = DataSharingMode::DS_Generic;
350
351
  /// true if currently emitting code for target/teams/distribute region, false
352
  /// - otherwise.
353
  bool IsInTTDRegion = false;
354
355
  /// Map between an outlined function and its wrapper.
356
  llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
357
358
  /// Emit function which wraps the outline parallel region
359
  /// and controls the parameters which are passed to this function.
360
  /// The wrapper ensures that the outlined function is called
361
  /// with the correct arguments when data is shared.
362
  llvm::Function *createParallelDataSharingWrapper(
363
      llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
364
365
  /// The data for the single globalized variable.
366
  struct MappedVarData {
367
    /// Corresponding field in the global record.
368
    llvm::Value *GlobalizedVal = nullptr;
369
    /// Corresponding address.
370
    Address PrivateAddr = Address::invalid();
371
  };
372
  /// The map of local variables to their addresses in the global memory.
373
  using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
374
  /// Set of the parameters passed by value escaping OpenMP context.
375
  using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
376
  struct FunctionData {
377
    DeclToAddrMapTy LocalVarData;
378
    EscapedParamsTy EscapedParameters;
379
    llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
380
    llvm::SmallVector<const ValueDecl *, 4> DelayedVariableLengthDecls;
381
    llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4>
382
        EscapedVariableLengthDeclsAddrs;
383
    std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
384
  };
385
  /// Maps the function to the list of the globalized variables with their
386
  /// addresses.
387
  llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
388
  /// List of the records with the list of fields for the reductions across the
389
  /// teams. Used to build the intermediate buffer for the fast teams
390
  /// reductions.
391
  /// All the records are gathered into a union `union.type` is created.
392
  llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
393
  /// Pair of the Non-SPMD team and all reductions variables in this team
394
  /// region.
395
  std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
396
      TeamAndReductions;
397
};
398
399
} // CodeGen namespace.
400
} // clang namespace.
401
402
#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H