Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This provides a class for OpenMP runtime code generation.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGOpenMPRuntime.h"
14
#include "CGCXXABI.h"
15
#include "CGCleanup.h"
16
#include "CGRecordLayout.h"
17
#include "CodeGenFunction.h"
18
#include "TargetInfo.h"
19
#include "clang/AST/APValue.h"
20
#include "clang/AST/Attr.h"
21
#include "clang/AST/Decl.h"
22
#include "clang/AST/OpenMPClause.h"
23
#include "clang/AST/StmtOpenMP.h"
24
#include "clang/AST/StmtVisitor.h"
25
#include "clang/Basic/BitmaskEnum.h"
26
#include "clang/Basic/FileManager.h"
27
#include "clang/Basic/OpenMPKinds.h"
28
#include "clang/Basic/SourceManager.h"
29
#include "clang/CodeGen/ConstantInitBuilder.h"
30
#include "llvm/ADT/ArrayRef.h"
31
#include "llvm/ADT/SetOperations.h"
32
#include "llvm/ADT/SmallBitVector.h"
33
#include "llvm/ADT/StringExtras.h"
34
#include "llvm/Bitcode/BitcodeReader.h"
35
#include "llvm/IR/Constants.h"
36
#include "llvm/IR/DerivedTypes.h"
37
#include "llvm/IR/GlobalValue.h"
38
#include "llvm/IR/InstrTypes.h"
39
#include "llvm/IR/Value.h"
40
#include "llvm/Support/AtomicOrdering.h"
41
#include "llvm/Support/Format.h"
42
#include "llvm/Support/raw_ostream.h"
43
#include <cassert>
44
#include <cstdint>
45
#include <numeric>
46
#include <optional>
47
48
using namespace clang;
49
using namespace CodeGen;
50
using namespace llvm::omp;
51
52
namespace {
53
/// Base class for handling code generation inside OpenMP regions.
54
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55
public:
56
  /// Kinds of OpenMP regions used in codegen.
57
  enum CGOpenMPRegionKind {
58
    /// Region with outlined function for standalone 'parallel'
59
    /// directive.
60
    ParallelOutlinedRegion,
61
    /// Region with outlined function for standalone 'task' directive.
62
    TaskOutlinedRegion,
63
    /// Region for constructs that do not require function outlining,
64
    /// like 'for', 'sections', 'atomic' etc. directives.
65
    InlinedRegion,
66
    /// Region with outlined function for standalone 'target' directive.
67
    TargetRegion,
68
  };
69
70
  CGOpenMPRegionInfo(const CapturedStmt &CS,
71
                     const CGOpenMPRegionKind RegionKind,
72
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73
                     bool HasCancel)
74
      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75
0
        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77
  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79
                     bool HasCancel)
80
      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81
0
        Kind(Kind), HasCancel(HasCancel) {}
82
83
  /// Get a variable or parameter for storing global thread id
84
  /// inside OpenMP construct.
85
  virtual const VarDecl *getThreadIDVariable() const = 0;
86
87
  /// Emit the captured statement body.
88
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90
  /// Get an LValue for the current ThreadID variable.
91
  /// \return LValue for thread id variable. This LValue always has type int32*.
92
  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94
0
  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96
0
  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98
0
  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100
0
  bool hasCancel() const { return HasCancel; }
101
102
0
  static bool classof(const CGCapturedStmtInfo *Info) {
103
0
    return Info->getKind() == CR_OpenMP;
104
0
  }
105
106
  ~CGOpenMPRegionInfo() override = default;
107
108
protected:
109
  CGOpenMPRegionKind RegionKind;
110
  RegionCodeGenTy CodeGen;
111
  OpenMPDirectiveKind Kind;
112
  bool HasCancel;
113
};
114
115
/// API for captured statement code generation in OpenMP constructs.
116
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117
public:
118
  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119
                             const RegionCodeGenTy &CodeGen,
120
                             OpenMPDirectiveKind Kind, bool HasCancel,
121
                             StringRef HelperName)
122
      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123
                           HasCancel),
124
0
        ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125
0
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126
0
  }
127
128
  /// Get a variable or parameter for storing global thread id
129
  /// inside OpenMP construct.
130
0
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132
  /// Get the name of the capture helper.
133
0
  StringRef getHelperName() const override { return HelperName; }
134
135
0
  static bool classof(const CGCapturedStmtInfo *Info) {
136
0
    return CGOpenMPRegionInfo::classof(Info) &&
137
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138
0
               ParallelOutlinedRegion;
139
0
  }
140
141
private:
142
  /// A variable or parameter storing global thread id for OpenMP
143
  /// constructs.
144
  const VarDecl *ThreadIDVar;
145
  StringRef HelperName;
146
};
147
148
/// API for captured statement code generation in OpenMP constructs.
149
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150
public:
151
  class UntiedTaskActionTy final : public PrePostActionTy {
152
    bool Untied;
153
    const VarDecl *PartIDVar;
154
    const RegionCodeGenTy UntiedCodeGen;
155
    llvm::SwitchInst *UntiedSwitch = nullptr;
156
157
  public:
158
    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159
                       const RegionCodeGenTy &UntiedCodeGen)
160
0
        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161
0
    void Enter(CodeGenFunction &CGF) override {
162
0
      if (Untied) {
163
        // Emit task switching point.
164
0
        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165
0
            CGF.GetAddrOfLocalVar(PartIDVar),
166
0
            PartIDVar->getType()->castAs<PointerType>());
167
0
        llvm::Value *Res =
168
0
            CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169
0
        llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170
0
        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171
0
        CGF.EmitBlock(DoneBB);
172
0
        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
173
0
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174
0
        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175
0
                              CGF.Builder.GetInsertBlock());
176
0
        emitUntiedSwitch(CGF);
177
0
      }
178
0
    }
179
0
    void emitUntiedSwitch(CodeGenFunction &CGF) const {
180
0
      if (Untied) {
181
0
        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182
0
            CGF.GetAddrOfLocalVar(PartIDVar),
183
0
            PartIDVar->getType()->castAs<PointerType>());
184
0
        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185
0
                              PartIdLVal);
186
0
        UntiedCodeGen(CGF);
187
0
        CodeGenFunction::JumpDest CurPoint =
188
0
            CGF.getJumpDestInCurrentScope(".untied.next.");
189
0
        CGF.EmitBranch(CGF.ReturnBlock.getBlock());
190
0
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191
0
        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192
0
                              CGF.Builder.GetInsertBlock());
193
0
        CGF.EmitBranchThroughCleanup(CurPoint);
194
0
        CGF.EmitBlock(CurPoint.getBlock());
195
0
      }
196
0
    }
197
0
    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198
  };
199
  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200
                                 const VarDecl *ThreadIDVar,
201
                                 const RegionCodeGenTy &CodeGen,
202
                                 OpenMPDirectiveKind Kind, bool HasCancel,
203
                                 const UntiedTaskActionTy &Action)
204
      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205
0
        ThreadIDVar(ThreadIDVar), Action(Action) {
206
0
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207
0
  }
208
209
  /// Get a variable or parameter for storing global thread id
210
  /// inside OpenMP construct.
211
0
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213
  /// Get an LValue for the current ThreadID variable.
214
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216
  /// Get the name of the capture helper.
217
0
  StringRef getHelperName() const override { return ".omp_outlined."; }
218
219
0
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
220
0
    Action.emitUntiedSwitch(CGF);
221
0
  }
222
223
0
  static bool classof(const CGCapturedStmtInfo *Info) {
224
0
    return CGOpenMPRegionInfo::classof(Info) &&
225
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226
0
               TaskOutlinedRegion;
227
0
  }
228
229
private:
230
  /// A variable or parameter storing global thread id for OpenMP
231
  /// constructs.
232
  const VarDecl *ThreadIDVar;
233
  /// Action for emitting code for untied tasks.
234
  const UntiedTaskActionTy &Action;
235
};
236
237
/// API for inlined captured statement code generation in OpenMP
238
/// constructs.
239
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240
public:
241
  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242
                            const RegionCodeGenTy &CodeGen,
243
                            OpenMPDirectiveKind Kind, bool HasCancel)
244
      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245
        OldCSI(OldCSI),
246
0
        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248
  // Retrieve the value of the context parameter.
249
0
  llvm::Value *getContextValue() const override {
250
0
    if (OuterRegionInfo)
251
0
      return OuterRegionInfo->getContextValue();
252
0
    llvm_unreachable("No context value for inlined OpenMP region");
253
0
  }
254
255
0
  void setContextValue(llvm::Value *V) override {
256
0
    if (OuterRegionInfo) {
257
0
      OuterRegionInfo->setContextValue(V);
258
0
      return;
259
0
    }
260
0
    llvm_unreachable("No context value for inlined OpenMP region");
261
0
  }
262
263
  /// Lookup the captured field decl for a variable.
264
0
  const FieldDecl *lookup(const VarDecl *VD) const override {
265
0
    if (OuterRegionInfo)
266
0
      return OuterRegionInfo->lookup(VD);
267
    // If there is no outer outlined region,no need to lookup in a list of
268
    // captured variables, we can use the original one.
269
0
    return nullptr;
270
0
  }
271
272
0
  FieldDecl *getThisFieldDecl() const override {
273
0
    if (OuterRegionInfo)
274
0
      return OuterRegionInfo->getThisFieldDecl();
275
0
    return nullptr;
276
0
  }
277
278
  /// Get a variable or parameter for storing global thread id
279
  /// inside OpenMP construct.
280
0
  const VarDecl *getThreadIDVariable() const override {
281
0
    if (OuterRegionInfo)
282
0
      return OuterRegionInfo->getThreadIDVariable();
283
0
    return nullptr;
284
0
  }
285
286
  /// Get an LValue for the current ThreadID variable.
287
0
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288
0
    if (OuterRegionInfo)
289
0
      return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290
0
    llvm_unreachable("No LValue for inlined OpenMP construct");
291
0
  }
292
293
  /// Get the name of the capture helper.
294
0
  StringRef getHelperName() const override {
295
0
    if (auto *OuterRegionInfo = getOldCSI())
296
0
      return OuterRegionInfo->getHelperName();
297
0
    llvm_unreachable("No helper name for inlined OpenMP construct");
298
0
  }
299
300
0
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
301
0
    if (OuterRegionInfo)
302
0
      OuterRegionInfo->emitUntiedSwitch(CGF);
303
0
  }
304
305
0
  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307
0
  static bool classof(const CGCapturedStmtInfo *Info) {
308
0
    return CGOpenMPRegionInfo::classof(Info) &&
309
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310
0
  }
311
312
  ~CGOpenMPInlinedRegionInfo() override = default;
313
314
private:
315
  /// CodeGen info about outer OpenMP region.
316
  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317
  CGOpenMPRegionInfo *OuterRegionInfo;
318
};
319
320
/// API for captured statement code generation in OpenMP target
321
/// constructs. For this captures, implicit parameters are used instead of the
322
/// captured fields. The name of the target region has to be unique in a given
323
/// application so it is provided by the client, because only the client has
324
/// the information to generate that.
325
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326
public:
327
  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328
                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
329
      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330
                           /*HasCancel=*/false),
331
0
        HelperName(HelperName) {}
332
333
  /// This is unused for target regions because each starts executing
334
  /// with a single thread.
335
0
  const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337
  /// Get the name of the capture helper.
338
0
  StringRef getHelperName() const override { return HelperName; }
339
340
0
  static bool classof(const CGCapturedStmtInfo *Info) {
341
0
    return CGOpenMPRegionInfo::classof(Info) &&
342
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343
0
  }
344
345
private:
346
  StringRef HelperName;
347
};
348
349
0
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350
0
  llvm_unreachable("No codegen for expressions");
351
0
}
352
/// API for generation of expressions captured in a innermost OpenMP
353
/// region.
354
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355
public:
356
  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357
      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358
                                  OMPD_unknown,
359
                                  /*HasCancel=*/false),
360
0
        PrivScope(CGF) {
361
    // Make sure the globals captured in the provided statement are local by
362
    // using the privatization logic. We assume the same variable is not
363
    // captured more than once.
364
0
    for (const auto &C : CS.captures()) {
365
0
      if (!C.capturesVariable() && !C.capturesVariableByCopy())
366
0
        continue;
367
368
0
      const VarDecl *VD = C.getCapturedVar();
369
0
      if (VD->isLocalVarDeclOrParm())
370
0
        continue;
371
372
0
      DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373
0
                      /*RefersToEnclosingVariableOrCapture=*/false,
374
0
                      VD->getType().getNonReferenceType(), VK_LValue,
375
0
                      C.getLocation());
376
0
      PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
377
0
    }
378
0
    (void)PrivScope.Privatize();
379
0
  }
380
381
  /// Lookup the captured field decl for a variable.
382
0
  const FieldDecl *lookup(const VarDecl *VD) const override {
383
0
    if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384
0
      return FD;
385
0
    return nullptr;
386
0
  }
387
388
  /// Emit the captured statement body.
389
0
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390
0
    llvm_unreachable("No body for expressions");
391
0
  }
392
393
  /// Get a variable or parameter for storing global thread id
394
  /// inside OpenMP construct.
395
0
  const VarDecl *getThreadIDVariable() const override {
396
0
    llvm_unreachable("No thread id for expressions");
397
0
  }
398
399
  /// Get the name of the capture helper.
400
0
  StringRef getHelperName() const override {
401
0
    llvm_unreachable("No helper name for expressions");
402
0
  }
403
404
0
  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406
private:
407
  /// Private scope to capture global variables.
408
  CodeGenFunction::OMPPrivateScope PrivScope;
409
};
410
411
/// RAII for emitting code of OpenMP constructs.
412
class InlinedOpenMPRegionRAII {
413
  CodeGenFunction &CGF;
414
  llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415
  FieldDecl *LambdaThisCaptureField = nullptr;
416
  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417
  bool NoInheritance = false;
418
419
public:
420
  /// Constructs region for combined constructs.
421
  /// \param CodeGen Code generation sequence for combined directives. Includes
422
  /// a list of functions used for code generation of implicitly inlined
423
  /// regions.
424
  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425
                          OpenMPDirectiveKind Kind, bool HasCancel,
426
                          bool NoInheritance = true)
427
0
      : CGF(CGF), NoInheritance(NoInheritance) {
428
    // Start emission for the construct.
429
0
    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430
0
        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431
0
    if (NoInheritance) {
432
0
      std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433
0
      LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434
0
      CGF.LambdaThisCaptureField = nullptr;
435
0
      BlockInfo = CGF.BlockInfo;
436
0
      CGF.BlockInfo = nullptr;
437
0
    }
438
0
  }
439
440
0
  ~InlinedOpenMPRegionRAII() {
441
    // Restore original CapturedStmtInfo only if we're done with code emission.
442
0
    auto *OldCSI =
443
0
        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444
0
    delete CGF.CapturedStmtInfo;
445
0
    CGF.CapturedStmtInfo = OldCSI;
446
0
    if (NoInheritance) {
447
0
      std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448
0
      CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449
0
      CGF.BlockInfo = BlockInfo;
450
0
    }
451
0
  }
452
};
453
454
/// Values for bit flags used in the ident_t to describe the fields.
455
/// All enumeric elements are named and described in accordance with the code
456
/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457
enum OpenMPLocationFlags : unsigned {
458
  /// Use trampoline for internal microtask.
459
  OMP_IDENT_IMD = 0x01,
460
  /// Use c-style ident structure.
461
  OMP_IDENT_KMPC = 0x02,
462
  /// Atomic reduction option for kmpc_reduce.
463
  OMP_ATOMIC_REDUCE = 0x10,
464
  /// Explicit 'barrier' directive.
465
  OMP_IDENT_BARRIER_EXPL = 0x20,
466
  /// Implicit barrier in code.
467
  OMP_IDENT_BARRIER_IMPL = 0x40,
468
  /// Implicit barrier in 'for' directive.
469
  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470
  /// Implicit barrier in 'sections' directive.
471
  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472
  /// Implicit barrier in 'single' directive.
473
  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474
  /// Call of __kmp_for_static_init for static loop.
475
  OMP_IDENT_WORK_LOOP = 0x200,
476
  /// Call of __kmp_for_static_init for sections.
477
  OMP_IDENT_WORK_SECTIONS = 0x400,
478
  /// Call of __kmp_for_static_init for distribute.
479
  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480
  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481
};
482
483
/// Describes ident structure that describes a source location.
484
/// All descriptions are taken from
485
/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486
/// Original structure:
487
/// typedef struct ident {
488
///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
489
///                                  see above  */
490
///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
491
///                                  KMP_IDENT_KMPC identifies this union
492
///                                  member  */
493
///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
494
///                                  see above */
495
///#if USE_ITT_BUILD
496
///                            /*  but currently used for storing
497
///                                region-specific ITT */
498
///                            /*  contextual information. */
499
///#endif /* USE_ITT_BUILD */
500
///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
501
///                                 C++  */
502
///    char const *psource;    /**< String describing the source location.
503
///                            The string is composed of semi-colon separated
504
//                             fields which describe the source file,
505
///                            the function and a pair of line numbers that
506
///                            delimit the construct.
507
///                             */
508
/// } ident_t;
509
enum IdentFieldIndex {
510
  /// might be used in Fortran
511
  IdentField_Reserved_1,
512
  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513
  IdentField_Flags,
514
  /// Not really used in Fortran any more
515
  IdentField_Reserved_2,
516
  /// Source[4] in Fortran, do not use for C++
517
  IdentField_Reserved_3,
518
  /// String describing the source location. The string is composed of
519
  /// semi-colon separated fields which describe the source file, the function
520
  /// and a pair of line numbers that delimit the construct.
521
  IdentField_PSource
522
};
523
524
/// Schedule types for 'omp for' loops (these enumerators are taken from
525
/// the enum sched_type in kmp.h).
526
enum OpenMPSchedType {
527
  /// Lower bound for default (unordered) versions.
528
  OMP_sch_lower = 32,
529
  OMP_sch_static_chunked = 33,
530
  OMP_sch_static = 34,
531
  OMP_sch_dynamic_chunked = 35,
532
  OMP_sch_guided_chunked = 36,
533
  OMP_sch_runtime = 37,
534
  OMP_sch_auto = 38,
535
  /// static with chunk adjustment (e.g., simd)
536
  OMP_sch_static_balanced_chunked = 45,
537
  /// Lower bound for 'ordered' versions.
538
  OMP_ord_lower = 64,
539
  OMP_ord_static_chunked = 65,
540
  OMP_ord_static = 66,
541
  OMP_ord_dynamic_chunked = 67,
542
  OMP_ord_guided_chunked = 68,
543
  OMP_ord_runtime = 69,
544
  OMP_ord_auto = 70,
545
  OMP_sch_default = OMP_sch_static,
546
  /// dist_schedule types
547
  OMP_dist_sch_static_chunked = 91,
548
  OMP_dist_sch_static = 92,
549
  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550
  /// Set if the monotonic schedule modifier was present.
551
  OMP_sch_modifier_monotonic = (1 << 29),
552
  /// Set if the nonmonotonic schedule modifier was present.
553
  OMP_sch_modifier_nonmonotonic = (1 << 30),
554
};
555
556
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557
/// region.
558
class CleanupTy final : public EHScopeStack::Cleanup {
559
  PrePostActionTy *Action;
560
561
public:
562
0
  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563
0
  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564
0
    if (!CGF.HaveInsertPoint())
565
0
      return;
566
0
    Action->Exit(CGF);
567
0
  }
568
};
569
570
} // anonymous namespace
571
572
0
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573
0
  CodeGenFunction::RunCleanupsScope Scope(CGF);
574
0
  if (PrePostAction) {
575
0
    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576
0
    Callback(CodeGen, CGF, *PrePostAction);
577
0
  } else {
578
0
    PrePostActionTy Action;
579
0
    Callback(CodeGen, CGF, Action);
580
0
  }
581
0
}
582
583
/// Check if the combiner is a call to UDR combiner and if it is so return the
584
/// UDR decl used for reduction.
585
static const OMPDeclareReductionDecl *
586
0
getReductionInit(const Expr *ReductionOp) {
587
0
  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588
0
    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589
0
      if (const auto *DRE =
590
0
              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591
0
        if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592
0
          return DRD;
593
0
  return nullptr;
594
0
}
595
596
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597
                                             const OMPDeclareReductionDecl *DRD,
598
                                             const Expr *InitOp,
599
                                             Address Private, Address Original,
600
0
                                             QualType Ty) {
601
0
  if (DRD->getInitializer()) {
602
0
    std::pair<llvm::Function *, llvm::Function *> Reduction =
603
0
        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
604
0
    const auto *CE = cast<CallExpr>(InitOp);
605
0
    const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606
0
    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607
0
    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608
0
    const auto *LHSDRE =
609
0
        cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610
0
    const auto *RHSDRE =
611
0
        cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612
0
    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613
0
    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614
0
    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615
0
    (void)PrivateScope.Privatize();
616
0
    RValue Func = RValue::get(Reduction.second);
617
0
    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618
0
    CGF.EmitIgnoredExpr(InitOp);
619
0
  } else {
620
0
    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621
0
    std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622
0
    auto *GV = new llvm::GlobalVariable(
623
0
        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624
0
        llvm::GlobalValue::PrivateLinkage, Init, Name);
625
0
    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
626
0
    RValue InitRVal;
627
0
    switch (CGF.getEvaluationKind(Ty)) {
628
0
    case TEK_Scalar:
629
0
      InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630
0
      break;
631
0
    case TEK_Complex:
632
0
      InitRVal =
633
0
          RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
634
0
      break;
635
0
    case TEK_Aggregate: {
636
0
      OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637
0
      CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638
0
      CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639
0
                           /*IsInitializer=*/false);
640
0
      return;
641
0
    }
642
0
    }
643
0
    OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644
0
    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645
0
    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646
0
                         /*IsInitializer=*/false);
647
0
  }
648
0
}
649
650
/// Emit initialization of arrays of complex types.
651
/// \param DestAddr Address of the array.
652
/// \param Type Type of array.
653
/// \param Init Initial expression of array.
654
/// \param SrcAddr Address of the original array.
655
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656
                                 QualType Type, bool EmitDeclareReductionInit,
657
                                 const Expr *Init,
658
                                 const OMPDeclareReductionDecl *DRD,
659
0
                                 Address SrcAddr = Address::invalid()) {
660
  // Perform element-by-element initialization.
661
0
  QualType ElementTy;
662
663
  // Drill down to the base element type on both arrays.
664
0
  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665
0
  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666
0
  if (DRD)
667
0
    SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669
0
  llvm::Value *SrcBegin = nullptr;
670
0
  if (DRD)
671
0
    SrcBegin = SrcAddr.getPointer();
672
0
  llvm::Value *DestBegin = DestAddr.getPointer();
673
  // Cast from pointer to array type to pointer to single element.
674
0
  llvm::Value *DestEnd =
675
0
      CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676
  // The basic structure here is a while-do loop.
677
0
  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678
0
  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679
0
  llvm::Value *IsEmpty =
680
0
      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681
0
  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683
  // Enter the loop body, making that address the current address.
684
0
  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685
0
  CGF.EmitBlock(BodyBB);
686
687
0
  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689
0
  llvm::PHINode *SrcElementPHI = nullptr;
690
0
  Address SrcElementCurrent = Address::invalid();
691
0
  if (DRD) {
692
0
    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693
0
                                          "omp.arraycpy.srcElementPast");
694
0
    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695
0
    SrcElementCurrent =
696
0
        Address(SrcElementPHI, SrcAddr.getElementType(),
697
0
                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698
0
  }
699
0
  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700
0
      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701
0
  DestElementPHI->addIncoming(DestBegin, EntryBB);
702
0
  Address DestElementCurrent =
703
0
      Address(DestElementPHI, DestAddr.getElementType(),
704
0
              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706
  // Emit copy.
707
0
  {
708
0
    CodeGenFunction::RunCleanupsScope InitScope(CGF);
709
0
    if (EmitDeclareReductionInit) {
710
0
      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711
0
                                       SrcElementCurrent, ElementTy);
712
0
    } else
713
0
      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714
0
                           /*IsInitializer=*/false);
715
0
  }
716
717
0
  if (DRD) {
718
    // Shift the address forward by one element.
719
0
    llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720
0
        SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721
0
        "omp.arraycpy.dest.element");
722
0
    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723
0
  }
724
725
  // Shift the address forward by one element.
726
0
  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727
0
      DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728
0
      "omp.arraycpy.dest.element");
729
  // Check whether we've reached the end.
730
0
  llvm::Value *Done =
731
0
      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732
0
  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733
0
  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735
  // Done.
736
0
  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737
0
}
738
739
0
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740
0
  return CGF.EmitOMPSharedLValue(E);
741
0
}
742
743
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744
0
                                            const Expr *E) {
745
0
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
746
0
    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
747
0
  return LValue();
748
0
}
749
750
void ReductionCodeGen::emitAggregateInitialization(
751
    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752
0
    const OMPDeclareReductionDecl *DRD) {
753
  // Emit VarDecl with copy init for arrays.
754
  // Get the address of the original variable captured in current
755
  // captured region.
756
0
  const auto *PrivateVD =
757
0
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758
0
  bool EmitDeclareReductionInit =
759
0
      DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760
0
  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761
0
                       EmitDeclareReductionInit,
762
0
                       EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763
0
                                                : PrivateVD->getInit(),
764
0
                       DRD, SharedAddr);
765
0
}
766
767
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768
                                   ArrayRef<const Expr *> Origs,
769
                                   ArrayRef<const Expr *> Privates,
770
0
                                   ArrayRef<const Expr *> ReductionOps) {
771
0
  ClausesData.reserve(Shareds.size());
772
0
  SharedAddresses.reserve(Shareds.size());
773
0
  Sizes.reserve(Shareds.size());
774
0
  BaseDecls.reserve(Shareds.size());
775
0
  const auto *IOrig = Origs.begin();
776
0
  const auto *IPriv = Privates.begin();
777
0
  const auto *IRed = ReductionOps.begin();
778
0
  for (const Expr *Ref : Shareds) {
779
0
    ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780
0
    std::advance(IOrig, 1);
781
0
    std::advance(IPriv, 1);
782
0
    std::advance(IRed, 1);
783
0
  }
784
0
}
785
786
0
void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787
0
  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788
0
         "Number of generated lvalues must be exactly N.");
789
0
  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790
0
  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791
0
  SharedAddresses.emplace_back(First, Second);
792
0
  if (ClausesData[N].Shared == ClausesData[N].Ref) {
793
0
    OrigAddresses.emplace_back(First, Second);
794
0
  } else {
795
0
    LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796
0
    LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797
0
    OrigAddresses.emplace_back(First, Second);
798
0
  }
799
0
}
800
801
0
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802
0
  QualType PrivateType = getPrivateType(N);
803
0
  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
804
0
  if (!PrivateType->isVariablyModifiedType()) {
805
0
    Sizes.emplace_back(
806
0
        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807
0
        nullptr);
808
0
    return;
809
0
  }
810
0
  llvm::Value *Size;
811
0
  llvm::Value *SizeInChars;
812
0
  auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813
0
  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814
0
  if (AsArraySection) {
815
0
    Size = CGF.Builder.CreatePtrDiff(ElemType,
816
0
                                     OrigAddresses[N].second.getPointer(CGF),
817
0
                                     OrigAddresses[N].first.getPointer(CGF));
818
0
    Size = CGF.Builder.CreateNUWAdd(
819
0
        Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820
0
    SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821
0
  } else {
822
0
    SizeInChars =
823
0
        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824
0
    Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825
0
  }
826
0
  Sizes.emplace_back(SizeInChars, Size);
827
0
  CodeGenFunction::OpaqueValueMapping OpaqueMap(
828
0
      CGF,
829
0
      cast<OpaqueValueExpr>(
830
0
          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831
0
      RValue::get(Size));
832
0
  CGF.EmitVariablyModifiedType(PrivateType);
833
0
}
834
835
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
836
0
                                         llvm::Value *Size) {
837
0
  QualType PrivateType = getPrivateType(N);
838
0
  if (!PrivateType->isVariablyModifiedType()) {
839
0
    assert(!Size && !Sizes[N].second &&
840
0
           "Size should be nullptr for non-variably modified reduction "
841
0
           "items.");
842
0
    return;
843
0
  }
844
0
  CodeGenFunction::OpaqueValueMapping OpaqueMap(
845
0
      CGF,
846
0
      cast<OpaqueValueExpr>(
847
0
          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848
0
      RValue::get(Size));
849
0
  CGF.EmitVariablyModifiedType(PrivateType);
850
0
}
851
852
void ReductionCodeGen::emitInitialization(
853
    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854
0
    llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855
0
  assert(SharedAddresses.size() > N && "No variable was generated");
856
0
  const auto *PrivateVD =
857
0
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858
0
  const OMPDeclareReductionDecl *DRD =
859
0
      getReductionInit(ClausesData[N].ReductionOp);
860
0
  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861
0
    if (DRD && DRD->getInitializer())
862
0
      (void)DefaultInit(CGF);
863
0
    emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864
0
  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865
0
    (void)DefaultInit(CGF);
866
0
    QualType SharedType = SharedAddresses[N].first.getType();
867
0
    emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868
0
                                     PrivateAddr, SharedAddr, SharedType);
869
0
  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870
0
             !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871
0
    CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872
0
                         PrivateVD->getType().getQualifiers(),
873
0
                         /*IsInitializer=*/false);
874
0
  }
875
0
}
876
877
0
bool ReductionCodeGen::needCleanups(unsigned N) {
878
0
  QualType PrivateType = getPrivateType(N);
879
0
  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880
0
  return DTorKind != QualType::DK_none;
881
0
}
882
883
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
884
0
                                    Address PrivateAddr) {
885
0
  QualType PrivateType = getPrivateType(N);
886
0
  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887
0
  if (needCleanups(N)) {
888
0
    PrivateAddr =
889
0
        PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890
0
    CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891
0
  }
892
0
}
893
894
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895
0
                          LValue BaseLV) {
896
0
  BaseTy = BaseTy.getNonReferenceType();
897
0
  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898
0
         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899
0
    if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900
0
      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
901
0
    } else {
902
0
      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
903
0
      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904
0
    }
905
0
    BaseTy = BaseTy->getPointeeType();
906
0
  }
907
0
  return CGF.MakeAddrLValue(
908
0
      BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
909
0
      BaseLV.getType(), BaseLV.getBaseInfo(),
910
0
      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911
0
}
912
913
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
914
0
                          Address OriginalBaseAddress, llvm::Value *Addr) {
915
0
  Address Tmp = Address::invalid();
916
0
  Address TopTmp = Address::invalid();
917
0
  Address MostTopTmp = Address::invalid();
918
0
  BaseTy = BaseTy.getNonReferenceType();
919
0
  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920
0
         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921
0
    Tmp = CGF.CreateMemTemp(BaseTy);
922
0
    if (TopTmp.isValid())
923
0
      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924
0
    else
925
0
      MostTopTmp = Tmp;
926
0
    TopTmp = Tmp;
927
0
    BaseTy = BaseTy->getPointeeType();
928
0
  }
929
930
0
  if (Tmp.isValid()) {
931
0
    Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
932
0
        Addr, Tmp.getElementType());
933
0
    CGF.Builder.CreateStore(Addr, Tmp);
934
0
    return MostTopTmp;
935
0
  }
936
937
0
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
938
0
      Addr, OriginalBaseAddress.getType());
939
0
  return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940
0
}
941
942
0
static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943
0
  const VarDecl *OrigVD = nullptr;
944
0
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
945
0
    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946
0
    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
947
0
      Base = TempOASE->getBase()->IgnoreParenImpCasts();
948
0
    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949
0
      Base = TempASE->getBase()->IgnoreParenImpCasts();
950
0
    DE = cast<DeclRefExpr>(Base);
951
0
    OrigVD = cast<VarDecl>(DE->getDecl());
952
0
  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953
0
    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954
0
    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955
0
      Base = TempASE->getBase()->IgnoreParenImpCasts();
956
0
    DE = cast<DeclRefExpr>(Base);
957
0
    OrigVD = cast<VarDecl>(DE->getDecl());
958
0
  }
959
0
  return OrigVD;
960
0
}
961
962
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
963
0
                                               Address PrivateAddr) {
964
0
  const DeclRefExpr *DE;
965
0
  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966
0
    BaseDecls.emplace_back(OrigVD);
967
0
    LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968
0
    LValue BaseLValue =
969
0
        loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970
0
                    OriginalBaseLValue);
971
0
    Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972
0
    llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973
0
        SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974
0
        SharedAddr.getPointer());
975
0
    llvm::Value *PrivatePointer =
976
0
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
977
0
            PrivateAddr.getPointer(), SharedAddr.getType());
978
0
    llvm::Value *Ptr = CGF.Builder.CreateGEP(
979
0
        SharedAddr.getElementType(), PrivatePointer, Adjustment);
980
0
    return castToBase(CGF, OrigVD->getType(),
981
0
                      SharedAddresses[N].first.getType(),
982
0
                      OriginalBaseLValue.getAddress(CGF), Ptr);
983
0
  }
984
0
  BaseDecls.emplace_back(
985
0
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986
0
  return PrivateAddr;
987
0
}
988
989
0
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
990
0
  const OMPDeclareReductionDecl *DRD =
991
0
      getReductionInit(ClausesData[N].ReductionOp);
992
0
  return DRD && DRD->getInitializer();
993
0
}
994
995
0
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996
0
  return CGF.EmitLoadOfPointerLValue(
997
0
      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998
0
      getThreadIDVariable()->getType()->castAs<PointerType>());
999
0
}
1000
1001
0
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002
0
  if (!CGF.HaveInsertPoint())
1003
0
    return;
1004
  // 1.2.2 OpenMP Language Terminology
1005
  // Structured block - An executable statement with a single entry at the
1006
  // top and a single exit at the bottom.
1007
  // The point of exit cannot be a branch out of the structured block.
1008
  // longjmp() and throw() must not violate the entry/exit criteria.
1009
0
  CGF.EHStack.pushTerminate();
1010
0
  if (S)
1011
0
    CGF.incrementProfileCounter(S);
1012
0
  CodeGen(CGF);
1013
0
  CGF.EHStack.popTerminate();
1014
0
}
1015
1016
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017
0
    CodeGenFunction &CGF) {
1018
0
  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019
0
                            getThreadIDVariable()->getType(),
1020
0
                            AlignmentSource::Decl);
1021
0
}
1022
1023
static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1024
0
                                       QualType FieldTy) {
1025
0
  auto *Field = FieldDecl::Create(
1026
0
      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027
0
      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028
0
      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029
0
  Field->setAccess(AS_public);
1030
0
  DC->addDecl(Field);
1031
0
  return Field;
1032
0
}
1033
1034
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1035
0
    : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036
0
  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037
0
  llvm::OpenMPIRBuilderConfig Config(
1038
0
      CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039
0
      CGM.getLangOpts().OpenMPOffloadMandatory,
1040
0
      /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041
0
      hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042
0
  OMPBuilder.initialize();
1043
0
  OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1044
0
                                         ? CGM.getLangOpts().OMPHostIRFile
1045
0
                                         : StringRef{});
1046
0
  OMPBuilder.setConfig(Config);
1047
0
}
1048
1049
0
void CGOpenMPRuntime::clear() {
1050
0
  InternalVars.clear();
1051
  // Clean non-target variable declarations possibly used only in debug info.
1052
0
  for (const auto &Data : EmittedNonTargetVariables) {
1053
0
    if (!Data.getValue().pointsToAliveValue())
1054
0
      continue;
1055
0
    auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1056
0
    if (!GV)
1057
0
      continue;
1058
0
    if (!GV->isDeclaration() || GV->getNumUses() > 0)
1059
0
      continue;
1060
0
    GV->eraseFromParent();
1061
0
  }
1062
0
}
1063
1064
0
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1065
0
  return OMPBuilder.createPlatformSpecificName(Parts);
1066
0
}
1067
1068
static llvm::Function *
1069
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1070
                          const Expr *CombinerInitializer, const VarDecl *In,
1071
0
                          const VarDecl *Out, bool IsCombiner) {
1072
  // void .omp_combiner.(Ty *in, Ty *out);
1073
0
  ASTContext &C = CGM.getContext();
1074
0
  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1075
0
  FunctionArgList Args;
1076
0
  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1077
0
                               /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1078
0
  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1079
0
                              /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1080
0
  Args.push_back(&OmpOutParm);
1081
0
  Args.push_back(&OmpInParm);
1082
0
  const CGFunctionInfo &FnInfo =
1083
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1084
0
  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1085
0
  std::string Name = CGM.getOpenMPRuntime().getName(
1086
0
      {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1087
0
  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1088
0
                                    Name, &CGM.getModule());
1089
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1090
0
  if (CGM.getLangOpts().Optimize) {
1091
0
    Fn->removeFnAttr(llvm::Attribute::NoInline);
1092
0
    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1093
0
    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1094
0
  }
1095
0
  CodeGenFunction CGF(CGM);
1096
  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1097
  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1098
0
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1099
0
                    Out->getLocation());
1100
0
  CodeGenFunction::OMPPrivateScope Scope(CGF);
1101
0
  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1102
0
  Scope.addPrivate(
1103
0
      In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1104
0
              .getAddress(CGF));
1105
0
  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1106
0
  Scope.addPrivate(
1107
0
      Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1108
0
               .getAddress(CGF));
1109
0
  (void)Scope.Privatize();
1110
0
  if (!IsCombiner && Out->hasInit() &&
1111
0
      !CGF.isTrivialInitializer(Out->getInit())) {
1112
0
    CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1113
0
                         Out->getType().getQualifiers(),
1114
0
                         /*IsInitializer=*/true);
1115
0
  }
1116
0
  if (CombinerInitializer)
1117
0
    CGF.EmitIgnoredExpr(CombinerInitializer);
1118
0
  Scope.ForceCleanup();
1119
0
  CGF.FinishFunction();
1120
0
  return Fn;
1121
0
}
1122
1123
void CGOpenMPRuntime::emitUserDefinedReduction(
1124
0
    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1125
0
  if (UDRMap.count(D) > 0)
1126
0
    return;
1127
0
  llvm::Function *Combiner = emitCombinerOrInitializer(
1128
0
      CGM, D->getType(), D->getCombiner(),
1129
0
      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1130
0
      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1131
0
      /*IsCombiner=*/true);
1132
0
  llvm::Function *Initializer = nullptr;
1133
0
  if (const Expr *Init = D->getInitializer()) {
1134
0
    Initializer = emitCombinerOrInitializer(
1135
0
        CGM, D->getType(),
1136
0
        D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1137
0
                                                                     : nullptr,
1138
0
        cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1139
0
        cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1140
0
        /*IsCombiner=*/false);
1141
0
  }
1142
0
  UDRMap.try_emplace(D, Combiner, Initializer);
1143
0
  if (CGF) {
1144
0
    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1145
0
    Decls.second.push_back(D);
1146
0
  }
1147
0
}
1148
1149
std::pair<llvm::Function *, llvm::Function *>
1150
0
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1151
0
  auto I = UDRMap.find(D);
1152
0
  if (I != UDRMap.end())
1153
0
    return I->second;
1154
0
  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1155
0
  return UDRMap.lookup(D);
1156
0
}
1157
1158
namespace {
1159
// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1160
// Builder if one is present.
1161
struct PushAndPopStackRAII {
1162
  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1163
                      bool HasCancel, llvm::omp::Directive Kind)
1164
0
      : OMPBuilder(OMPBuilder) {
1165
0
    if (!OMPBuilder)
1166
0
      return;
1167
1168
    // The following callback is the crucial part of clangs cleanup process.
1169
    //
1170
    // NOTE:
1171
    // Once the OpenMPIRBuilder is used to create parallel regions (and
1172
    // similar), the cancellation destination (Dest below) is determined via
1173
    // IP. That means if we have variables to finalize we split the block at IP,
1174
    // use the new block (=BB) as destination to build a JumpDest (via
1175
    // getJumpDestInCurrentScope(BB)) which then is fed to
1176
    // EmitBranchThroughCleanup. Furthermore, there will not be the need
1177
    // to push & pop an FinalizationInfo object.
1178
    // The FiniCB will still be needed but at the point where the
1179
    // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1180
0
    auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1181
0
      assert(IP.getBlock()->end() == IP.getPoint() &&
1182
0
             "Clang CG should cause non-terminated block!");
1183
0
      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1184
0
      CGF.Builder.restoreIP(IP);
1185
0
      CodeGenFunction::JumpDest Dest =
1186
0
          CGF.getOMPCancelDestination(OMPD_parallel);
1187
0
      CGF.EmitBranchThroughCleanup(Dest);
1188
0
    };
1189
1190
    // TODO: Remove this once we emit parallel regions through the
1191
    //       OpenMPIRBuilder as it can do this setup internally.
1192
0
    llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1193
0
    OMPBuilder->pushFinalizationCB(std::move(FI));
1194
0
  }
1195
0
  ~PushAndPopStackRAII() {
1196
0
    if (OMPBuilder)
1197
0
      OMPBuilder->popFinalizationCB();
1198
0
  }
1199
  llvm::OpenMPIRBuilder *OMPBuilder;
1200
};
1201
} // namespace
1202
1203
static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1204
    CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1205
    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1206
0
    const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1207
0
  assert(ThreadIDVar->getType()->isPointerType() &&
1208
0
         "thread id variable must be of type kmp_int32 *");
1209
0
  CodeGenFunction CGF(CGM, true);
1210
0
  bool HasCancel = false;
1211
0
  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1212
0
    HasCancel = OPD->hasCancel();
1213
0
  else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1214
0
    HasCancel = OPD->hasCancel();
1215
0
  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1216
0
    HasCancel = OPSD->hasCancel();
1217
0
  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1218
0
    HasCancel = OPFD->hasCancel();
1219
0
  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1220
0
    HasCancel = OPFD->hasCancel();
1221
0
  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1222
0
    HasCancel = OPFD->hasCancel();
1223
0
  else if (const auto *OPFD =
1224
0
               dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1225
0
    HasCancel = OPFD->hasCancel();
1226
0
  else if (const auto *OPFD =
1227
0
               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1228
0
    HasCancel = OPFD->hasCancel();
1229
1230
  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1231
  //       parallel region to make cancellation barriers work properly.
1232
0
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1233
0
  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1234
0
  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1235
0
                                    HasCancel, OutlinedHelperName);
1236
0
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1237
0
  return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1238
0
}
1239
1240
0
std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1241
0
  std::string Suffix = getName({"omp_outlined"});
1242
0
  return (Name + Suffix).str();
1243
0
}
1244
1245
0
std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1246
0
  return getOutlinedHelperName(CGF.CurFn->getName());
1247
0
}
1248
1249
0
std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1250
0
  std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1251
0
  return (Name + Suffix).str();
1252
0
}
1253
1254
llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1255
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
1256
    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1257
0
    const RegionCodeGenTy &CodeGen) {
1258
0
  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1259
0
  return emitParallelOrTeamsOutlinedFunction(
1260
0
      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1261
0
      CodeGen);
1262
0
}
1263
1264
llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1265
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
1266
    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1267
0
    const RegionCodeGenTy &CodeGen) {
1268
0
  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1269
0
  return emitParallelOrTeamsOutlinedFunction(
1270
0
      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1271
0
      CodeGen);
1272
0
}
1273
1274
llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1275
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276
    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1277
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1278
0
    bool Tied, unsigned &NumberOfParts) {
1279
0
  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1280
0
                                              PrePostActionTy &) {
1281
0
    llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1282
0
    llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1283
0
    llvm::Value *TaskArgs[] = {
1284
0
        UpLoc, ThreadID,
1285
0
        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1286
0
                                    TaskTVar->getType()->castAs<PointerType>())
1287
0
            .getPointer(CGF)};
1288
0
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1289
0
                            CGM.getModule(), OMPRTL___kmpc_omp_task),
1290
0
                        TaskArgs);
1291
0
  };
1292
0
  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1293
0
                                                            UntiedCodeGen);
1294
0
  CodeGen.setAction(Action);
1295
0
  assert(!ThreadIDVar->getType()->isPointerType() &&
1296
0
         "thread id variable must be of type kmp_int32 for tasks");
1297
0
  const OpenMPDirectiveKind Region =
1298
0
      isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1299
0
                                                      : OMPD_task;
1300
0
  const CapturedStmt *CS = D.getCapturedStmt(Region);
1301
0
  bool HasCancel = false;
1302
0
  if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1303
0
    HasCancel = TD->hasCancel();
1304
0
  else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1305
0
    HasCancel = TD->hasCancel();
1306
0
  else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1307
0
    HasCancel = TD->hasCancel();
1308
0
  else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1309
0
    HasCancel = TD->hasCancel();
1310
1311
0
  CodeGenFunction CGF(CGM, true);
1312
0
  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1313
0
                                        InnermostKind, HasCancel, Action);
1314
0
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1315
0
  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1316
0
  if (!Tied)
1317
0
    NumberOfParts = Action.getNumberOfParts();
1318
0
  return Res;
1319
0
}
1320
1321
void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1322
0
                                             bool AtCurrentPoint) {
1323
0
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1324
0
  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1325
1326
0
  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1327
0
  if (AtCurrentPoint) {
1328
0
    Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1329
0
        Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1330
0
  } else {
1331
0
    Elem.second.ServiceInsertPt =
1332
0
        new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1333
0
    Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1334
0
  }
1335
0
}
1336
1337
0
void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1338
0
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1339
0
  if (Elem.second.ServiceInsertPt) {
1340
0
    llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1341
0
    Elem.second.ServiceInsertPt = nullptr;
1342
0
    Ptr->eraseFromParent();
1343
0
  }
1344
0
}
1345
1346
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1347
                                                  SourceLocation Loc,
1348
0
                                                  SmallString<128> &Buffer) {
1349
0
  llvm::raw_svector_ostream OS(Buffer);
1350
  // Build debug location
1351
0
  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1352
0
  OS << ";" << PLoc.getFilename() << ";";
1353
0
  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1354
0
    OS << FD->getQualifiedNameAsString();
1355
0
  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1356
0
  return OS.str();
1357
0
}
1358
1359
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1360
                                                 SourceLocation Loc,
1361
0
                                                 unsigned Flags, bool EmitLoc) {
1362
0
  uint32_t SrcLocStrSize;
1363
0
  llvm::Constant *SrcLocStr;
1364
0
  if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1365
0
                       llvm::codegenoptions::NoDebugInfo) ||
1366
0
      Loc.isInvalid()) {
1367
0
    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1368
0
  } else {
1369
0
    std::string FunctionName;
1370
0
    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1371
0
      FunctionName = FD->getQualifiedNameAsString();
1372
0
    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1373
0
    const char *FileName = PLoc.getFilename();
1374
0
    unsigned Line = PLoc.getLine();
1375
0
    unsigned Column = PLoc.getColumn();
1376
0
    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1377
0
                                                Column, SrcLocStrSize);
1378
0
  }
1379
0
  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1380
0
  return OMPBuilder.getOrCreateIdent(
1381
0
      SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1382
0
}
1383
1384
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1385
0
                                          SourceLocation Loc) {
1386
0
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1387
  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1388
  // the clang invariants used below might be broken.
1389
0
  if (CGM.getLangOpts().OpenMPIRBuilder) {
1390
0
    SmallString<128> Buffer;
1391
0
    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1392
0
    uint32_t SrcLocStrSize;
1393
0
    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1394
0
        getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1395
0
    return OMPBuilder.getOrCreateThreadID(
1396
0
        OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1397
0
  }
1398
1399
0
  llvm::Value *ThreadID = nullptr;
1400
  // Check whether we've already cached a load of the thread id in this
1401
  // function.
1402
0
  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1403
0
  if (I != OpenMPLocThreadIDMap.end()) {
1404
0
    ThreadID = I->second.ThreadID;
1405
0
    if (ThreadID != nullptr)
1406
0
      return ThreadID;
1407
0
  }
1408
  // If exceptions are enabled, do not use parameter to avoid possible crash.
1409
0
  if (auto *OMPRegionInfo =
1410
0
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1411
0
    if (OMPRegionInfo->getThreadIDVariable()) {
1412
      // Check if this an outlined function with thread id passed as argument.
1413
0
      LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1414
0
      llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1415
0
      if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1416
0
          !CGF.getLangOpts().CXXExceptions ||
1417
0
          CGF.Builder.GetInsertBlock() == TopBlock ||
1418
0
          !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1419
0
          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1420
0
              TopBlock ||
1421
0
          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1422
0
              CGF.Builder.GetInsertBlock()) {
1423
0
        ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1424
        // If value loaded in entry block, cache it and use it everywhere in
1425
        // function.
1426
0
        if (CGF.Builder.GetInsertBlock() == TopBlock) {
1427
0
          auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1428
0
          Elem.second.ThreadID = ThreadID;
1429
0
        }
1430
0
        return ThreadID;
1431
0
      }
1432
0
    }
1433
0
  }
1434
1435
  // This is not an outlined function region - need to call __kmpc_int32
1436
  // kmpc_global_thread_num(ident_t *loc).
1437
  // Generate thread id value and cache this value for use across the
1438
  // function.
1439
0
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1440
0
  if (!Elem.second.ServiceInsertPt)
1441
0
    setLocThreadIdInsertPt(CGF);
1442
0
  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1443
0
  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1444
0
  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1445
0
  llvm::CallInst *Call = CGF.Builder.CreateCall(
1446
0
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1447
0
                                            OMPRTL___kmpc_global_thread_num),
1448
0
      emitUpdateLocation(CGF, Loc));
1449
0
  Call->setCallingConv(CGF.getRuntimeCC());
1450
0
  Elem.second.ThreadID = Call;
1451
0
  return Call;
1452
0
}
1453
1454
0
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1455
0
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456
0
  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1457
0
    clearLocThreadIdInsertPt(CGF);
1458
0
    OpenMPLocThreadIDMap.erase(CGF.CurFn);
1459
0
  }
1460
0
  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1461
0
    for(const auto *D : FunctionUDRMap[CGF.CurFn])
1462
0
      UDRMap.erase(D);
1463
0
    FunctionUDRMap.erase(CGF.CurFn);
1464
0
  }
1465
0
  auto I = FunctionUDMMap.find(CGF.CurFn);
1466
0
  if (I != FunctionUDMMap.end()) {
1467
0
    for(const auto *D : I->second)
1468
0
      UDMMap.erase(D);
1469
0
    FunctionUDMMap.erase(I);
1470
0
  }
1471
0
  LastprivateConditionalToTypes.erase(CGF.CurFn);
1472
0
  FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1473
0
}
1474
1475
0
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1476
0
  return OMPBuilder.IdentPtr;
1477
0
}
1478
1479
0
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1480
0
  if (!Kmpc_MicroTy) {
1481
    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1482
0
    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1483
0
                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1484
0
    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1485
0
  }
1486
0
  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1487
0
}
1488
1489
llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1490
0
convertDeviceClause(const VarDecl *VD) {
1491
0
  std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1492
0
      OMPDeclareTargetDeclAttr::getDeviceType(VD);
1493
0
  if (!DevTy)
1494
0
    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1495
1496
0
  switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1497
0
  case OMPDeclareTargetDeclAttr::DT_Host:
1498
0
    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1499
0
    break;
1500
0
  case OMPDeclareTargetDeclAttr::DT_NoHost:
1501
0
    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1502
0
    break;
1503
0
  case OMPDeclareTargetDeclAttr::DT_Any:
1504
0
    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1505
0
    break;
1506
0
  default:
1507
0
    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1508
0
    break;
1509
0
  }
1510
0
}
1511
1512
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1513
0
convertCaptureClause(const VarDecl *VD) {
1514
0
  std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1515
0
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1516
0
  if (!MapType)
1517
0
    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1518
0
  switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1519
0
  case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1520
0
    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1521
0
    break;
1522
0
  case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1523
0
    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1524
0
    break;
1525
0
  case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1526
0
    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1527
0
    break;
1528
0
  default:
1529
0
    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1530
0
    break;
1531
0
  }
1532
0
}
1533
1534
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1535
    CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1536
0
    SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1537
1538
0
  auto FileInfoCallBack = [&]() {
1539
0
    SourceManager &SM = CGM.getContext().getSourceManager();
1540
0
    PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1541
1542
0
    llvm::sys::fs::UniqueID ID;
1543
0
    if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1544
0
      PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1545
0
    }
1546
1547
0
    return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1548
0
  };
1549
1550
0
  return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1551
0
}
1552
1553
0
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1554
0
  auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1555
1556
0
  auto LinkageForVariable = [&VD, this]() {
1557
0
    return CGM.getLLVMLinkageVarDefinition(VD);
1558
0
  };
1559
1560
0
  std::vector<llvm::GlobalVariable *> GeneratedRefs;
1561
1562
0
  llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1563
0
      CGM.getContext().getPointerType(VD->getType()));
1564
0
  llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1565
0
      convertCaptureClause(VD), convertDeviceClause(VD),
1566
0
      VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1567
0
      VD->isExternallyVisible(),
1568
0
      getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1569
0
                                  VD->getCanonicalDecl()->getBeginLoc()),
1570
0
      CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1571
0
      CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1572
0
      LinkageForVariable);
1573
1574
0
  if (!addr)
1575
0
    return Address::invalid();
1576
0
  return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1577
0
}
1578
1579
llvm::Constant *
1580
0
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1581
0
  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1582
0
         !CGM.getContext().getTargetInfo().isTLSSupported());
1583
  // Lookup the entry, lazily creating it if necessary.
1584
0
  std::string Suffix = getName({"cache", ""});
1585
0
  return OMPBuilder.getOrCreateInternalVariable(
1586
0
      CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1587
0
}
1588
1589
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1590
                                                const VarDecl *VD,
1591
                                                Address VDAddr,
1592
0
                                                SourceLocation Loc) {
1593
0
  if (CGM.getLangOpts().OpenMPUseTLS &&
1594
0
      CGM.getContext().getTargetInfo().isTLSSupported())
1595
0
    return VDAddr;
1596
1597
0
  llvm::Type *VarTy = VDAddr.getElementType();
1598
0
  llvm::Value *Args[] = {
1599
0
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1600
0
      CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1601
0
      CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1602
0
      getOrCreateThreadPrivateCache(VD)};
1603
0
  return Address(
1604
0
      CGF.EmitRuntimeCall(
1605
0
          OMPBuilder.getOrCreateRuntimeFunction(
1606
0
              CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1607
0
          Args),
1608
0
      CGF.Int8Ty, VDAddr.getAlignment());
1609
0
}
1610
1611
void CGOpenMPRuntime::emitThreadPrivateVarInit(
1612
    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1613
0
    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1614
  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1615
  // library.
1616
0
  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1617
0
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1618
0
                          CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1619
0
                      OMPLoc);
1620
  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1621
  // to register constructor/destructor for variable.
1622
0
  llvm::Value *Args[] = {
1623
0
      OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1624
0
      Ctor, CopyCtor, Dtor};
1625
0
  CGF.EmitRuntimeCall(
1626
0
      OMPBuilder.getOrCreateRuntimeFunction(
1627
0
          CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1628
0
      Args);
1629
0
}
1630
1631
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1632
    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633
0
    bool PerformInit, CodeGenFunction *CGF) {
1634
0
  if (CGM.getLangOpts().OpenMPUseTLS &&
1635
0
      CGM.getContext().getTargetInfo().isTLSSupported())
1636
0
    return nullptr;
1637
1638
0
  VD = VD->getDefinition(CGM.getContext());
1639
0
  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1640
0
    QualType ASTTy = VD->getType();
1641
1642
0
    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643
0
    const Expr *Init = VD->getAnyInitializer();
1644
0
    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645
      // Generate function that re-emits the declaration's initializer into the
1646
      // threadprivate copy of the variable VD
1647
0
      CodeGenFunction CtorCGF(CGM);
1648
0
      FunctionArgList Args;
1649
0
      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650
0
                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1651
0
                            ImplicitParamKind::Other);
1652
0
      Args.push_back(&Dst);
1653
1654
0
      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1655
0
          CGM.getContext().VoidPtrTy, Args);
1656
0
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1657
0
      std::string Name = getName({"__kmpc_global_ctor_", ""});
1658
0
      llvm::Function *Fn =
1659
0
          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1660
0
      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1661
0
                            Args, Loc, Loc);
1662
0
      llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663
0
          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1664
0
          CGM.getContext().VoidPtrTy, Dst.getLocation());
1665
0
      Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1666
0
                  VDAddr.getAlignment());
1667
0
      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1668
0
                               /*IsInitializer=*/true);
1669
0
      ArgVal = CtorCGF.EmitLoadOfScalar(
1670
0
          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671
0
          CGM.getContext().VoidPtrTy, Dst.getLocation());
1672
0
      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1673
0
      CtorCGF.FinishFunction();
1674
0
      Ctor = Fn;
1675
0
    }
1676
0
    if (VD->getType().isDestructedType() != QualType::DK_none) {
1677
      // Generate function that emits destructor call for the threadprivate copy
1678
      // of the variable VD
1679
0
      CodeGenFunction DtorCGF(CGM);
1680
0
      FunctionArgList Args;
1681
0
      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682
0
                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1683
0
                            ImplicitParamKind::Other);
1684
0
      Args.push_back(&Dst);
1685
1686
0
      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1687
0
          CGM.getContext().VoidTy, Args);
1688
0
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1689
0
      std::string Name = getName({"__kmpc_global_dtor_", ""});
1690
0
      llvm::Function *Fn =
1691
0
          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1692
0
      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1693
0
      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1694
0
                            Loc, Loc);
1695
      // Create a scope with an artificial location for the body of this function.
1696
0
      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1697
0
      llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698
0
          DtorCGF.GetAddrOfLocalVar(&Dst),
1699
0
          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1700
0
      DtorCGF.emitDestroy(
1701
0
          Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1702
0
          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1703
0
          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1704
0
      DtorCGF.FinishFunction();
1705
0
      Dtor = Fn;
1706
0
    }
1707
    // Do not emit init function if it is not required.
1708
0
    if (!Ctor && !Dtor)
1709
0
      return nullptr;
1710
1711
0
    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1712
0
    auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1713
0
                                               /*isVarArg=*/false)
1714
0
                           ->getPointerTo();
1715
    // Copying constructor for the threadprivate variable.
1716
    // Must be NULL - reserved by runtime, but currently it requires that this
1717
    // parameter is always NULL. Otherwise it fires assertion.
1718
0
    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1719
0
    if (Ctor == nullptr) {
1720
0
      auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1721
0
                                             /*isVarArg=*/false)
1722
0
                         ->getPointerTo();
1723
0
      Ctor = llvm::Constant::getNullValue(CtorTy);
1724
0
    }
1725
0
    if (Dtor == nullptr) {
1726
0
      auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1727
0
                                             /*isVarArg=*/false)
1728
0
                         ->getPointerTo();
1729
0
      Dtor = llvm::Constant::getNullValue(DtorTy);
1730
0
    }
1731
0
    if (!CGF) {
1732
0
      auto *InitFunctionTy =
1733
0
          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1734
0
      std::string Name = getName({"__omp_threadprivate_init_", ""});
1735
0
      llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1736
0
          InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1737
0
      CodeGenFunction InitCGF(CGM);
1738
0
      FunctionArgList ArgList;
1739
0
      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1740
0
                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
1741
0
                            Loc, Loc);
1742
0
      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1743
0
      InitCGF.FinishFunction();
1744
0
      return InitFunction;
1745
0
    }
1746
0
    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1747
0
  }
1748
0
  return nullptr;
1749
0
}
1750
1751
void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1752
0
                                                llvm::GlobalValue *GV) {
1753
0
  std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1754
0
      OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1755
1756
  // We only need to handle active 'indirect' declare target functions.
1757
0
  if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1758
0
    return;
1759
1760
  // Get a mangled name to store the new device global in.
1761
0
  llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1762
0
      CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1763
0
  SmallString<128> Name;
1764
0
  OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1765
1766
  // We need to generate a new global to hold the address of the indirectly
1767
  // called device function. Doing this allows us to keep the visibility and
1768
  // linkage of the associated function unchanged while allowing the runtime to
1769
  // access its value.
1770
0
  llvm::GlobalValue *Addr = GV;
1771
0
  if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1772
0
    Addr = new llvm::GlobalVariable(
1773
0
        CGM.getModule(), CGM.VoidPtrTy,
1774
0
        /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1775
0
        nullptr, llvm::GlobalValue::NotThreadLocal,
1776
0
        CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1777
0
    Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1778
0
  }
1779
1780
0
  OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1781
0
      Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1782
0
      llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1783
0
      llvm::GlobalValue::WeakODRLinkage);
1784
0
}
1785
1786
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1787
                                                          QualType VarType,
1788
0
                                                          StringRef Name) {
1789
0
  std::string Suffix = getName({"artificial", ""});
1790
0
  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1791
0
  llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1792
0
      VarLVType, Twine(Name).concat(Suffix).str());
1793
0
  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1794
0
      CGM.getTarget().isTLSSupported()) {
1795
0
    GAddr->setThreadLocal(/*Val=*/true);
1796
0
    return Address(GAddr, GAddr->getValueType(),
1797
0
                   CGM.getContext().getTypeAlignInChars(VarType));
1798
0
  }
1799
0
  std::string CacheSuffix = getName({"cache", ""});
1800
0
  llvm::Value *Args[] = {
1801
0
      emitUpdateLocation(CGF, SourceLocation()),
1802
0
      getThreadID(CGF, SourceLocation()),
1803
0
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1804
0
      CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1805
0
                                /*isSigned=*/false),
1806
0
      OMPBuilder.getOrCreateInternalVariable(
1807
0
          CGM.VoidPtrPtrTy,
1808
0
          Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1809
0
  return Address(
1810
0
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1811
0
          CGF.EmitRuntimeCall(
1812
0
              OMPBuilder.getOrCreateRuntimeFunction(
1813
0
                  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1814
0
              Args),
1815
0
          VarLVType->getPointerTo(/*AddrSpace=*/0)),
1816
0
      VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1817
0
}
1818
1819
void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1820
                                   const RegionCodeGenTy &ThenGen,
1821
0
                                   const RegionCodeGenTy &ElseGen) {
1822
0
  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1823
1824
  // If the condition constant folds and can be elided, try to avoid emitting
1825
  // the condition and the dead arm of the if/else.
1826
0
  bool CondConstant;
1827
0
  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1828
0
    if (CondConstant)
1829
0
      ThenGen(CGF);
1830
0
    else
1831
0
      ElseGen(CGF);
1832
0
    return;
1833
0
  }
1834
1835
  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1836
  // emit the conditional branch.
1837
0
  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1838
0
  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1839
0
  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1840
0
  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1841
1842
  // Emit the 'then' code.
1843
0
  CGF.EmitBlock(ThenBlock);
1844
0
  ThenGen(CGF);
1845
0
  CGF.EmitBranch(ContBlock);
1846
  // Emit the 'else' code if present.
1847
  // There is no need to emit line number for unconditional branch.
1848
0
  (void)ApplyDebugLocation::CreateEmpty(CGF);
1849
0
  CGF.EmitBlock(ElseBlock);
1850
0
  ElseGen(CGF);
1851
  // There is no need to emit line number for unconditional branch.
1852
0
  (void)ApplyDebugLocation::CreateEmpty(CGF);
1853
0
  CGF.EmitBranch(ContBlock);
1854
  // Emit the continuation block for code after the if.
1855
0
  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1856
0
}
1857
1858
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1859
                                       llvm::Function *OutlinedFn,
1860
                                       ArrayRef<llvm::Value *> CapturedVars,
1861
                                       const Expr *IfCond,
1862
0
                                       llvm::Value *NumThreads) {
1863
0
  if (!CGF.HaveInsertPoint())
1864
0
    return;
1865
0
  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1866
0
  auto &M = CGM.getModule();
1867
0
  auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1868
0
                    this](CodeGenFunction &CGF, PrePostActionTy &) {
1869
    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1870
0
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1871
0
    llvm::Value *Args[] = {
1872
0
        RTLoc,
1873
0
        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1874
0
        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1875
0
    llvm::SmallVector<llvm::Value *, 16> RealArgs;
1876
0
    RealArgs.append(std::begin(Args), std::end(Args));
1877
0
    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1878
1879
0
    llvm::FunctionCallee RTLFn =
1880
0
        OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1881
0
    CGF.EmitRuntimeCall(RTLFn, RealArgs);
1882
0
  };
1883
0
  auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1884
0
                    this](CodeGenFunction &CGF, PrePostActionTy &) {
1885
0
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1886
0
    llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1887
    // Build calls:
1888
    // __kmpc_serialized_parallel(&Loc, GTid);
1889
0
    llvm::Value *Args[] = {RTLoc, ThreadID};
1890
0
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1891
0
                            M, OMPRTL___kmpc_serialized_parallel),
1892
0
                        Args);
1893
1894
    // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1895
0
    Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1896
0
    Address ZeroAddrBound =
1897
0
        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1898
0
                                         /*Name=*/".bound.zero.addr");
1899
0
    CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1900
0
    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1901
    // ThreadId for serialized parallels is 0.
1902
0
    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1903
0
    OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1904
0
    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1905
1906
    // Ensure we do not inline the function. This is trivially true for the ones
1907
    // passed to __kmpc_fork_call but the ones called in serialized regions
1908
    // could be inlined. This is not a perfect but it is closer to the invariant
1909
    // we want, namely, every data environment starts with a new function.
1910
    // TODO: We should pass the if condition to the runtime function and do the
1911
    //       handling there. Much cleaner code.
1912
0
    OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1913
0
    OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1914
0
    RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1915
1916
    // __kmpc_end_serialized_parallel(&Loc, GTid);
1917
0
    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1918
0
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1919
0
                            M, OMPRTL___kmpc_end_serialized_parallel),
1920
0
                        EndArgs);
1921
0
  };
1922
0
  if (IfCond) {
1923
0
    emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1924
0
  } else {
1925
0
    RegionCodeGenTy ThenRCG(ThenGen);
1926
0
    ThenRCG(CGF);
1927
0
  }
1928
0
}
1929
1930
// If we're inside an (outlined) parallel region, use the region info's
1931
// thread-ID variable (it is passed in a first argument of the outlined function
1932
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1933
// regular serial code region, get thread ID by calling kmp_int32
1934
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1935
// return the address of that temp.
1936
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1937
0
                                             SourceLocation Loc) {
1938
0
  if (auto *OMPRegionInfo =
1939
0
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1940
0
    if (OMPRegionInfo->getThreadIDVariable())
1941
0
      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1942
1943
0
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
1944
0
  QualType Int32Ty =
1945
0
      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1946
0
  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1947
0
  CGF.EmitStoreOfScalar(ThreadID,
1948
0
                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1949
1950
0
  return ThreadIDTemp;
1951
0
}
1952
1953
0
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1954
0
  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1955
0
  std::string Name = getName({Prefix, "var"});
1956
0
  return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1957
0
}
1958
1959
namespace {
1960
/// Common pre(post)-action for different OpenMP constructs.
1961
class CommonActionTy final : public PrePostActionTy {
1962
  llvm::FunctionCallee EnterCallee;
1963
  ArrayRef<llvm::Value *> EnterArgs;
1964
  llvm::FunctionCallee ExitCallee;
1965
  ArrayRef<llvm::Value *> ExitArgs;
1966
  bool Conditional;
1967
  llvm::BasicBlock *ContBlock = nullptr;
1968
1969
public:
1970
  CommonActionTy(llvm::FunctionCallee EnterCallee,
1971
                 ArrayRef<llvm::Value *> EnterArgs,
1972
                 llvm::FunctionCallee ExitCallee,
1973
                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1974
      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1975
0
        ExitArgs(ExitArgs), Conditional(Conditional) {}
1976
0
  void Enter(CodeGenFunction &CGF) override {
1977
0
    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1978
0
    if (Conditional) {
1979
0
      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1980
0
      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1981
0
      ContBlock = CGF.createBasicBlock("omp_if.end");
1982
      // Generate the branch (If-stmt)
1983
0
      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1984
0
      CGF.EmitBlock(ThenBlock);
1985
0
    }
1986
0
  }
1987
0
  void Done(CodeGenFunction &CGF) {
1988
    // Emit the rest of blocks/branches
1989
0
    CGF.EmitBranch(ContBlock);
1990
0
    CGF.EmitBlock(ContBlock, true);
1991
0
  }
1992
0
  void Exit(CodeGenFunction &CGF) override {
1993
0
    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1994
0
  }
1995
};
1996
} // anonymous namespace
1997
1998
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1999
                                         StringRef CriticalName,
2000
                                         const RegionCodeGenTy &CriticalOpGen,
2001
0
                                         SourceLocation Loc, const Expr *Hint) {
2002
  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2003
  // CriticalOpGen();
2004
  // __kmpc_end_critical(ident_t *, gtid, Lock);
2005
  // Prepare arguments and build a call to __kmpc_critical
2006
0
  if (!CGF.HaveInsertPoint())
2007
0
    return;
2008
0
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2009
0
                         getCriticalRegionLock(CriticalName)};
2010
0
  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2011
0
                                                std::end(Args));
2012
0
  if (Hint) {
2013
0
    EnterArgs.push_back(CGF.Builder.CreateIntCast(
2014
0
        CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2015
0
  }
2016
0
  CommonActionTy Action(
2017
0
      OMPBuilder.getOrCreateRuntimeFunction(
2018
0
          CGM.getModule(),
2019
0
          Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2020
0
      EnterArgs,
2021
0
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2022
0
                                            OMPRTL___kmpc_end_critical),
2023
0
      Args);
2024
0
  CriticalOpGen.setAction(Action);
2025
0
  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2026
0
}
2027
2028
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2029
                                       const RegionCodeGenTy &MasterOpGen,
2030
0
                                       SourceLocation Loc) {
2031
0
  if (!CGF.HaveInsertPoint())
2032
0
    return;
2033
  // if(__kmpc_master(ident_t *, gtid)) {
2034
  //   MasterOpGen();
2035
  //   __kmpc_end_master(ident_t *, gtid);
2036
  // }
2037
  // Prepare arguments and build a call to __kmpc_master
2038
0
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2039
0
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2040
0
                            CGM.getModule(), OMPRTL___kmpc_master),
2041
0
                        Args,
2042
0
                        OMPBuilder.getOrCreateRuntimeFunction(
2043
0
                            CGM.getModule(), OMPRTL___kmpc_end_master),
2044
0
                        Args,
2045
0
                        /*Conditional=*/true);
2046
0
  MasterOpGen.setAction(Action);
2047
0
  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2048
0
  Action.Done(CGF);
2049
0
}
2050
2051
void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2052
                                       const RegionCodeGenTy &MaskedOpGen,
2053
0
                                       SourceLocation Loc, const Expr *Filter) {
2054
0
  if (!CGF.HaveInsertPoint())
2055
0
    return;
2056
  // if(__kmpc_masked(ident_t *, gtid, filter)) {
2057
  //   MaskedOpGen();
2058
  //   __kmpc_end_masked(iden_t *, gtid);
2059
  // }
2060
  // Prepare arguments and build a call to __kmpc_masked
2061
0
  llvm::Value *FilterVal = Filter
2062
0
                               ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2063
0
                               : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2064
0
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2065
0
                         FilterVal};
2066
0
  llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2067
0
                            getThreadID(CGF, Loc)};
2068
0
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2069
0
                            CGM.getModule(), OMPRTL___kmpc_masked),
2070
0
                        Args,
2071
0
                        OMPBuilder.getOrCreateRuntimeFunction(
2072
0
                            CGM.getModule(), OMPRTL___kmpc_end_masked),
2073
0
                        ArgsEnd,
2074
0
                        /*Conditional=*/true);
2075
0
  MaskedOpGen.setAction(Action);
2076
0
  emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2077
0
  Action.Done(CGF);
2078
0
}
2079
2080
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2081
0
                                        SourceLocation Loc) {
2082
0
  if (!CGF.HaveInsertPoint())
2083
0
    return;
2084
0
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2085
0
    OMPBuilder.createTaskyield(CGF.Builder);
2086
0
  } else {
2087
    // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2088
0
    llvm::Value *Args[] = {
2089
0
        emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2090
0
        llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2091
0
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2092
0
                            CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2093
0
                        Args);
2094
0
  }
2095
2096
0
  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2097
0
    Region->emitUntiedSwitch(CGF);
2098
0
}
2099
2100
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2101
                                          const RegionCodeGenTy &TaskgroupOpGen,
2102
0
                                          SourceLocation Loc) {
2103
0
  if (!CGF.HaveInsertPoint())
2104
0
    return;
2105
  // __kmpc_taskgroup(ident_t *, gtid);
2106
  // TaskgroupOpGen();
2107
  // __kmpc_end_taskgroup(ident_t *, gtid);
2108
  // Prepare arguments and build a call to __kmpc_taskgroup
2109
0
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2110
0
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2111
0
                            CGM.getModule(), OMPRTL___kmpc_taskgroup),
2112
0
                        Args,
2113
0
                        OMPBuilder.getOrCreateRuntimeFunction(
2114
0
                            CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2115
0
                        Args);
2116
0
  TaskgroupOpGen.setAction(Action);
2117
0
  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2118
0
}
2119
2120
/// Given an array of pointers to variables, project the address of a
2121
/// given variable.
2122
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2123
0
                                      unsigned Index, const VarDecl *Var) {
2124
  // Pull out the pointer to the variable.
2125
0
  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2126
0
  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2127
2128
0
  llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2129
0
  return Address(
2130
0
      CGF.Builder.CreateBitCast(
2131
0
          Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2132
0
      ElemTy, CGF.getContext().getDeclAlign(Var));
2133
0
}
2134
2135
static llvm::Value *emitCopyprivateCopyFunction(
2136
    CodeGenModule &CGM, llvm::Type *ArgsElemType,
2137
    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2138
    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2139
0
    SourceLocation Loc) {
2140
0
  ASTContext &C = CGM.getContext();
2141
  // void copy_func(void *LHSArg, void *RHSArg);
2142
0
  FunctionArgList Args;
2143
0
  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2144
0
                           ImplicitParamKind::Other);
2145
0
  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2146
0
                           ImplicitParamKind::Other);
2147
0
  Args.push_back(&LHSArg);
2148
0
  Args.push_back(&RHSArg);
2149
0
  const auto &CGFI =
2150
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2151
0
  std::string Name =
2152
0
      CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2153
0
  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2154
0
                                    llvm::GlobalValue::InternalLinkage, Name,
2155
0
                                    &CGM.getModule());
2156
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2157
0
  Fn->setDoesNotRecurse();
2158
0
  CodeGenFunction CGF(CGM);
2159
0
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2160
  // Dest = (void*[n])(LHSArg);
2161
  // Src = (void*[n])(RHSArg);
2162
0
  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2163
0
                  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2164
0
                  ArgsElemType->getPointerTo()),
2165
0
              ArgsElemType, CGF.getPointerAlign());
2166
0
  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2167
0
                  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2168
0
                  ArgsElemType->getPointerTo()),
2169
0
              ArgsElemType, CGF.getPointerAlign());
2170
  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2171
  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2172
  // ...
2173
  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2174
0
  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2175
0
    const auto *DestVar =
2176
0
        cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2177
0
    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2178
2179
0
    const auto *SrcVar =
2180
0
        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2181
0
    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2182
2183
0
    const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2184
0
    QualType Type = VD->getType();
2185
0
    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2186
0
  }
2187
0
  CGF.FinishFunction();
2188
0
  return Fn;
2189
0
}
2190
2191
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2192
                                       const RegionCodeGenTy &SingleOpGen,
2193
                                       SourceLocation Loc,
2194
                                       ArrayRef<const Expr *> CopyprivateVars,
2195
                                       ArrayRef<const Expr *> SrcExprs,
2196
                                       ArrayRef<const Expr *> DstExprs,
2197
0
                                       ArrayRef<const Expr *> AssignmentOps) {
2198
0
  if (!CGF.HaveInsertPoint())
2199
0
    return;
2200
0
  assert(CopyprivateVars.size() == SrcExprs.size() &&
2201
0
         CopyprivateVars.size() == DstExprs.size() &&
2202
0
         CopyprivateVars.size() == AssignmentOps.size());
2203
0
  ASTContext &C = CGM.getContext();
2204
  // int32 did_it = 0;
2205
  // if(__kmpc_single(ident_t *, gtid)) {
2206
  //   SingleOpGen();
2207
  //   __kmpc_end_single(ident_t *, gtid);
2208
  //   did_it = 1;
2209
  // }
2210
  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2211
  // <copy_func>, did_it);
2212
2213
0
  Address DidIt = Address::invalid();
2214
0
  if (!CopyprivateVars.empty()) {
2215
    // int32 did_it = 0;
2216
0
    QualType KmpInt32Ty =
2217
0
        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2218
0
    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2219
0
    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2220
0
  }
2221
  // Prepare arguments and build a call to __kmpc_single
2222
0
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2223
0
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2224
0
                            CGM.getModule(), OMPRTL___kmpc_single),
2225
0
                        Args,
2226
0
                        OMPBuilder.getOrCreateRuntimeFunction(
2227
0
                            CGM.getModule(), OMPRTL___kmpc_end_single),
2228
0
                        Args,
2229
0
                        /*Conditional=*/true);
2230
0
  SingleOpGen.setAction(Action);
2231
0
  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2232
0
  if (DidIt.isValid()) {
2233
    // did_it = 1;
2234
0
    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2235
0
  }
2236
0
  Action.Done(CGF);
2237
  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2238
  // <copy_func>, did_it);
2239
0
  if (DidIt.isValid()) {
2240
0
    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2241
0
    QualType CopyprivateArrayTy = C.getConstantArrayType(
2242
0
        C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2243
0
        /*IndexTypeQuals=*/0);
2244
    // Create a list of all private variables for copyprivate.
2245
0
    Address CopyprivateList =
2246
0
        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2247
0
    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2248
0
      Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2249
0
      CGF.Builder.CreateStore(
2250
0
          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2251
0
              CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2252
0
              CGF.VoidPtrTy),
2253
0
          Elem);
2254
0
    }
2255
    // Build function that copies private values from single region to all other
2256
    // threads in the corresponding parallel region.
2257
0
    llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2258
0
        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2259
0
        SrcExprs, DstExprs, AssignmentOps, Loc);
2260
0
    llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2261
0
    Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2262
0
        CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2263
0
    llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2264
0
    llvm::Value *Args[] = {
2265
0
        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2266
0
        getThreadID(CGF, Loc),        // i32 <gtid>
2267
0
        BufSize,                      // size_t <buf_size>
2268
0
        CL.getPointer(),              // void *<copyprivate list>
2269
0
        CpyFn,                        // void (*) (void *, void *) <copy_func>
2270
0
        DidItVal                      // i32 did_it
2271
0
    };
2272
0
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2273
0
                            CGM.getModule(), OMPRTL___kmpc_copyprivate),
2274
0
                        Args);
2275
0
  }
2276
0
}
2277
2278
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2279
                                        const RegionCodeGenTy &OrderedOpGen,
2280
0
                                        SourceLocation Loc, bool IsThreads) {
2281
0
  if (!CGF.HaveInsertPoint())
2282
0
    return;
2283
  // __kmpc_ordered(ident_t *, gtid);
2284
  // OrderedOpGen();
2285
  // __kmpc_end_ordered(ident_t *, gtid);
2286
  // Prepare arguments and build a call to __kmpc_ordered
2287
0
  if (IsThreads) {
2288
0
    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2289
0
    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2290
0
                              CGM.getModule(), OMPRTL___kmpc_ordered),
2291
0
                          Args,
2292
0
                          OMPBuilder.getOrCreateRuntimeFunction(
2293
0
                              CGM.getModule(), OMPRTL___kmpc_end_ordered),
2294
0
                          Args);
2295
0
    OrderedOpGen.setAction(Action);
2296
0
    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2297
0
    return;
2298
0
  }
2299
0
  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2300
0
}
2301
2302
0
unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2303
0
  unsigned Flags;
2304
0
  if (Kind == OMPD_for)
2305
0
    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2306
0
  else if (Kind == OMPD_sections)
2307
0
    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2308
0
  else if (Kind == OMPD_single)
2309
0
    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2310
0
  else if (Kind == OMPD_barrier)
2311
0
    Flags = OMP_IDENT_BARRIER_EXPL;
2312
0
  else
2313
0
    Flags = OMP_IDENT_BARRIER_IMPL;
2314
0
  return Flags;
2315
0
}
2316
2317
void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2318
    CodeGenFunction &CGF, const OMPLoopDirective &S,
2319
0
    OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2320
  // Check if the loop directive is actually a doacross loop directive. In this
2321
  // case choose static, 1 schedule.
2322
0
  if (llvm::any_of(
2323
0
          S.getClausesOfKind<OMPOrderedClause>(),
2324
0
          [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2325
0
    ScheduleKind = OMPC_SCHEDULE_static;
2326
    // Chunk size is 1 in this case.
2327
0
    llvm::APInt ChunkSize(32, 1);
2328
0
    ChunkExpr = IntegerLiteral::Create(
2329
0
        CGF.getContext(), ChunkSize,
2330
0
        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2331
0
        SourceLocation());
2332
0
  }
2333
0
}
2334
2335
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2336
                                      OpenMPDirectiveKind Kind, bool EmitChecks,
2337
0
                                      bool ForceSimpleCall) {
2338
  // Check if we should use the OMPBuilder
2339
0
  auto *OMPRegionInfo =
2340
0
      dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2341
0
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2342
0
    CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2343
0
        CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2344
0
    return;
2345
0
  }
2346
2347
0
  if (!CGF.HaveInsertPoint())
2348
0
    return;
2349
  // Build call __kmpc_cancel_barrier(loc, thread_id);
2350
  // Build call __kmpc_barrier(loc, thread_id);
2351
0
  unsigned Flags = getDefaultFlagsForBarriers(Kind);
2352
  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2353
  // thread_id);
2354
0
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2355
0
                         getThreadID(CGF, Loc)};
2356
0
  if (OMPRegionInfo) {
2357
0
    if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2358
0
      llvm::Value *Result = CGF.EmitRuntimeCall(
2359
0
          OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2360
0
                                                OMPRTL___kmpc_cancel_barrier),
2361
0
          Args);
2362
0
      if (EmitChecks) {
2363
        // if (__kmpc_cancel_barrier()) {
2364
        //   exit from construct;
2365
        // }
2366
0
        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2367
0
        llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2368
0
        llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2369
0
        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2370
0
        CGF.EmitBlock(ExitBB);
2371
        //   exit from construct;
2372
0
        CodeGenFunction::JumpDest CancelDestination =
2373
0
            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2374
0
        CGF.EmitBranchThroughCleanup(CancelDestination);
2375
0
        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2376
0
      }
2377
0
      return;
2378
0
    }
2379
0
  }
2380
0
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2381
0
                          CGM.getModule(), OMPRTL___kmpc_barrier),
2382
0
                      Args);
2383
0
}
2384
2385
void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2386
0
                                    Expr *ME, bool IsFatal) {
2387
0
  llvm::Value *MVL =
2388
0
      ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2389
0
         : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2390
  // Build call void __kmpc_error(ident_t *loc, int severity, const char
2391
  // *message)
2392
0
  llvm::Value *Args[] = {
2393
0
      emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2394
0
      llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2395
0
      CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2396
0
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2397
0
                          CGM.getModule(), OMPRTL___kmpc_error),
2398
0
                      Args);
2399
0
}
2400
2401
/// Map the OpenMP loop schedule to the runtime enumeration.
2402
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2403
0
                                          bool Chunked, bool Ordered) {
2404
0
  switch (ScheduleKind) {
2405
0
  case OMPC_SCHEDULE_static:
2406
0
    return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2407
0
                   : (Ordered ? OMP_ord_static : OMP_sch_static);
2408
0
  case OMPC_SCHEDULE_dynamic:
2409
0
    return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2410
0
  case OMPC_SCHEDULE_guided:
2411
0
    return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2412
0
  case OMPC_SCHEDULE_runtime:
2413
0
    return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2414
0
  case OMPC_SCHEDULE_auto:
2415
0
    return Ordered ? OMP_ord_auto : OMP_sch_auto;
2416
0
  case OMPC_SCHEDULE_unknown:
2417
0
    assert(!Chunked && "chunk was specified but schedule kind not known");
2418
0
    return Ordered ? OMP_ord_static : OMP_sch_static;
2419
0
  }
2420
0
  llvm_unreachable("Unexpected runtime schedule");
2421
0
}
2422
2423
/// Map the OpenMP distribute schedule to the runtime enumeration.
2424
static OpenMPSchedType
2425
0
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2426
  // only static is allowed for dist_schedule
2427
0
  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2428
0
}
2429
2430
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2431
0
                                         bool Chunked) const {
2432
0
  OpenMPSchedType Schedule =
2433
0
      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2434
0
  return Schedule == OMP_sch_static;
2435
0
}
2436
2437
bool CGOpenMPRuntime::isStaticNonchunked(
2438
0
    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2439
0
  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2440
0
  return Schedule == OMP_dist_sch_static;
2441
0
}
2442
2443
bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2444
0
                                      bool Chunked) const {
2445
0
  OpenMPSchedType Schedule =
2446
0
      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2447
0
  return Schedule == OMP_sch_static_chunked;
2448
0
}
2449
2450
bool CGOpenMPRuntime::isStaticChunked(
2451
0
    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2452
0
  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2453
0
  return Schedule == OMP_dist_sch_static_chunked;
2454
0
}
2455
2456
0
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2457
0
  OpenMPSchedType Schedule =
2458
0
      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2459
0
  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2460
0
  return Schedule != OMP_sch_static;
2461
0
}
2462
2463
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2464
                                  OpenMPScheduleClauseModifier M1,
2465
0
                                  OpenMPScheduleClauseModifier M2) {
2466
0
  int Modifier = 0;
2467
0
  switch (M1) {
2468
0
  case OMPC_SCHEDULE_MODIFIER_monotonic:
2469
0
    Modifier = OMP_sch_modifier_monotonic;
2470
0
    break;
2471
0
  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2472
0
    Modifier = OMP_sch_modifier_nonmonotonic;
2473
0
    break;
2474
0
  case OMPC_SCHEDULE_MODIFIER_simd:
2475
0
    if (Schedule == OMP_sch_static_chunked)
2476
0
      Schedule = OMP_sch_static_balanced_chunked;
2477
0
    break;
2478
0
  case OMPC_SCHEDULE_MODIFIER_last:
2479
0
  case OMPC_SCHEDULE_MODIFIER_unknown:
2480
0
    break;
2481
0
  }
2482
0
  switch (M2) {
2483
0
  case OMPC_SCHEDULE_MODIFIER_monotonic:
2484
0
    Modifier = OMP_sch_modifier_monotonic;
2485
0
    break;
2486
0
  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2487
0
    Modifier = OMP_sch_modifier_nonmonotonic;
2488
0
    break;
2489
0
  case OMPC_SCHEDULE_MODIFIER_simd:
2490
0
    if (Schedule == OMP_sch_static_chunked)
2491
0
      Schedule = OMP_sch_static_balanced_chunked;
2492
0
    break;
2493
0
  case OMPC_SCHEDULE_MODIFIER_last:
2494
0
  case OMPC_SCHEDULE_MODIFIER_unknown:
2495
0
    break;
2496
0
  }
2497
  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2498
  // If the static schedule kind is specified or if the ordered clause is
2499
  // specified, and if the nonmonotonic modifier is not specified, the effect is
2500
  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2501
  // modifier is specified, the effect is as if the nonmonotonic modifier is
2502
  // specified.
2503
0
  if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2504
0
    if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2505
0
          Schedule == OMP_sch_static_balanced_chunked ||
2506
0
          Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2507
0
          Schedule == OMP_dist_sch_static_chunked ||
2508
0
          Schedule == OMP_dist_sch_static))
2509
0
      Modifier = OMP_sch_modifier_nonmonotonic;
2510
0
  }
2511
0
  return Schedule | Modifier;
2512
0
}
2513
2514
void CGOpenMPRuntime::emitForDispatchInit(
2515
    CodeGenFunction &CGF, SourceLocation Loc,
2516
    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2517
0
    bool Ordered, const DispatchRTInput &DispatchValues) {
2518
0
  if (!CGF.HaveInsertPoint())
2519
0
    return;
2520
0
  OpenMPSchedType Schedule = getRuntimeSchedule(
2521
0
      ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2522
0
  assert(Ordered ||
2523
0
         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2524
0
          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2525
0
          Schedule != OMP_sch_static_balanced_chunked));
2526
  // Call __kmpc_dispatch_init(
2527
  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2528
  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2529
  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2530
2531
  // If the Chunk was not specified in the clause - use default value 1.
2532
0
  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2533
0
                                            : CGF.Builder.getIntN(IVSize, 1);
2534
0
  llvm::Value *Args[] = {
2535
0
      emitUpdateLocation(CGF, Loc),
2536
0
      getThreadID(CGF, Loc),
2537
0
      CGF.Builder.getInt32(addMonoNonMonoModifier(
2538
0
          CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2539
0
      DispatchValues.LB,                                     // Lower
2540
0
      DispatchValues.UB,                                     // Upper
2541
0
      CGF.Builder.getIntN(IVSize, 1),                        // Stride
2542
0
      Chunk                                                  // Chunk
2543
0
  };
2544
0
  CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2545
0
                      Args);
2546
0
}
2547
2548
static void emitForStaticInitCall(
2549
    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2550
    llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2551
    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2552
0
    const CGOpenMPRuntime::StaticRTInput &Values) {
2553
0
  if (!CGF.HaveInsertPoint())
2554
0
    return;
2555
2556
0
  assert(!Values.Ordered);
2557
0
  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2558
0
         Schedule == OMP_sch_static_balanced_chunked ||
2559
0
         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2560
0
         Schedule == OMP_dist_sch_static ||
2561
0
         Schedule == OMP_dist_sch_static_chunked);
2562
2563
  // Call __kmpc_for_static_init(
2564
  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2565
  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2566
  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2567
  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2568
0
  llvm::Value *Chunk = Values.Chunk;
2569
0
  if (Chunk == nullptr) {
2570
0
    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2571
0
            Schedule == OMP_dist_sch_static) &&
2572
0
           "expected static non-chunked schedule");
2573
    // If the Chunk was not specified in the clause - use default value 1.
2574
0
    Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2575
0
  } else {
2576
0
    assert((Schedule == OMP_sch_static_chunked ||
2577
0
            Schedule == OMP_sch_static_balanced_chunked ||
2578
0
            Schedule == OMP_ord_static_chunked ||
2579
0
            Schedule == OMP_dist_sch_static_chunked) &&
2580
0
           "expected static chunked schedule");
2581
0
  }
2582
0
  llvm::Value *Args[] = {
2583
0
      UpdateLocation,
2584
0
      ThreadId,
2585
0
      CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2586
0
                                                  M2)), // Schedule type
2587
0
      Values.IL.getPointer(),                           // &isLastIter
2588
0
      Values.LB.getPointer(),                           // &LB
2589
0
      Values.UB.getPointer(),                           // &UB
2590
0
      Values.ST.getPointer(),                           // &Stride
2591
0
      CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2592
0
      Chunk                                             // Chunk
2593
0
  };
2594
0
  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2595
0
}
2596
2597
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2598
                                        SourceLocation Loc,
2599
                                        OpenMPDirectiveKind DKind,
2600
                                        const OpenMPScheduleTy &ScheduleKind,
2601
0
                                        const StaticRTInput &Values) {
2602
0
  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2603
0
      ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2604
0
  assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2605
0
         "Expected loop-based or sections-based directive.");
2606
0
  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2607
0
                                             isOpenMPLoopDirective(DKind)
2608
0
                                                 ? OMP_IDENT_WORK_LOOP
2609
0
                                                 : OMP_IDENT_WORK_SECTIONS);
2610
0
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2611
0
  llvm::FunctionCallee StaticInitFunction =
2612
0
      OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2613
0
                                             false);
2614
0
  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2615
0
  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2616
0
                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2617
0
}
2618
2619
void CGOpenMPRuntime::emitDistributeStaticInit(
2620
    CodeGenFunction &CGF, SourceLocation Loc,
2621
    OpenMPDistScheduleClauseKind SchedKind,
2622
0
    const CGOpenMPRuntime::StaticRTInput &Values) {
2623
0
  OpenMPSchedType ScheduleNum =
2624
0
      getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2625
0
  llvm::Value *UpdatedLocation =
2626
0
      emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2627
0
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2628
0
  llvm::FunctionCallee StaticInitFunction;
2629
0
  bool isGPUDistribute =
2630
0
      CGM.getLangOpts().OpenMPIsTargetDevice &&
2631
0
      (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2632
0
  StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2633
0
      Values.IVSize, Values.IVSigned, isGPUDistribute);
2634
2635
0
  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2636
0
                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2637
0
                        OMPC_SCHEDULE_MODIFIER_unknown, Values);
2638
0
}
2639
2640
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2641
                                          SourceLocation Loc,
2642
0
                                          OpenMPDirectiveKind DKind) {
2643
0
  if (!CGF.HaveInsertPoint())
2644
0
    return;
2645
  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2646
0
  llvm::Value *Args[] = {
2647
0
      emitUpdateLocation(CGF, Loc,
2648
0
                         isOpenMPDistributeDirective(DKind)
2649
0
                             ? OMP_IDENT_WORK_DISTRIBUTE
2650
0
                             : isOpenMPLoopDirective(DKind)
2651
0
                                   ? OMP_IDENT_WORK_LOOP
2652
0
                                   : OMP_IDENT_WORK_SECTIONS),
2653
0
      getThreadID(CGF, Loc)};
2654
0
  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2655
0
  if (isOpenMPDistributeDirective(DKind) &&
2656
0
      CGM.getLangOpts().OpenMPIsTargetDevice &&
2657
0
      (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2658
0
    CGF.EmitRuntimeCall(
2659
0
        OMPBuilder.getOrCreateRuntimeFunction(
2660
0
            CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2661
0
        Args);
2662
0
  else
2663
0
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2664
0
                            CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2665
0
                        Args);
2666
0
}
2667
2668
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2669
                                                 SourceLocation Loc,
2670
                                                 unsigned IVSize,
2671
0
                                                 bool IVSigned) {
2672
0
  if (!CGF.HaveInsertPoint())
2673
0
    return;
2674
  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2675
0
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2676
0
  CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2677
0
                      Args);
2678
0
}
2679
2680
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2681
                                          SourceLocation Loc, unsigned IVSize,
2682
                                          bool IVSigned, Address IL,
2683
                                          Address LB, Address UB,
2684
0
                                          Address ST) {
2685
  // Call __kmpc_dispatch_next(
2686
  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2687
  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2688
  //          kmp_int[32|64] *p_stride);
2689
0
  llvm::Value *Args[] = {
2690
0
      emitUpdateLocation(CGF, Loc),
2691
0
      getThreadID(CGF, Loc),
2692
0
      IL.getPointer(), // &isLastIter
2693
0
      LB.getPointer(), // &Lower
2694
0
      UB.getPointer(), // &Upper
2695
0
      ST.getPointer()  // &Stride
2696
0
  };
2697
0
  llvm::Value *Call = CGF.EmitRuntimeCall(
2698
0
      OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2699
0
  return CGF.EmitScalarConversion(
2700
0
      Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2701
0
      CGF.getContext().BoolTy, Loc);
2702
0
}
2703
2704
void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2705
                                           llvm::Value *NumThreads,
2706
0
                                           SourceLocation Loc) {
2707
0
  if (!CGF.HaveInsertPoint())
2708
0
    return;
2709
  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2710
0
  llvm::Value *Args[] = {
2711
0
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2712
0
      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2713
0
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2714
0
                          CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2715
0
                      Args);
2716
0
}
2717
2718
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2719
                                         ProcBindKind ProcBind,
2720
0
                                         SourceLocation Loc) {
2721
0
  if (!CGF.HaveInsertPoint())
2722
0
    return;
2723
0
  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2724
  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2725
0
  llvm::Value *Args[] = {
2726
0
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2727
0
      llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2728
0
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2729
0
                          CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2730
0
                      Args);
2731
0
}
2732
2733
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2734
0
                                SourceLocation Loc, llvm::AtomicOrdering AO) {
2735
0
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2736
0
    OMPBuilder.createFlush(CGF.Builder);
2737
0
  } else {
2738
0
    if (!CGF.HaveInsertPoint())
2739
0
      return;
2740
    // Build call void __kmpc_flush(ident_t *loc)
2741
0
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2742
0
                            CGM.getModule(), OMPRTL___kmpc_flush),
2743
0
                        emitUpdateLocation(CGF, Loc));
2744
0
  }
2745
0
}
2746
2747
namespace {
2748
/// Indexes of fields for type kmp_task_t.
2749
enum KmpTaskTFields {
2750
  /// List of shared variables.
2751
  KmpTaskTShareds,
2752
  /// Task routine.
2753
  KmpTaskTRoutine,
2754
  /// Partition id for the untied tasks.
2755
  KmpTaskTPartId,
2756
  /// Function with call of destructors for private variables.
2757
  Data1,
2758
  /// Task priority.
2759
  Data2,
2760
  /// (Taskloops only) Lower bound.
2761
  KmpTaskTLowerBound,
2762
  /// (Taskloops only) Upper bound.
2763
  KmpTaskTUpperBound,
2764
  /// (Taskloops only) Stride.
2765
  KmpTaskTStride,
2766
  /// (Taskloops only) Is last iteration flag.
2767
  KmpTaskTLastIter,
2768
  /// (Taskloops only) Reduction data.
2769
  KmpTaskTReductions,
2770
};
2771
} // anonymous namespace
2772
2773
0
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2774
  // If we are in simd mode or there are no entries, we don't need to do
2775
  // anything.
2776
0
  if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2777
0
    return;
2778
2779
0
  llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2780
0
      [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2781
0
             const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2782
0
    SourceLocation Loc;
2783
0
    if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2784
0
      for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2785
0
                E = CGM.getContext().getSourceManager().fileinfo_end();
2786
0
           I != E; ++I) {
2787
0
        if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2788
0
            I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2789
0
          Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2790
0
              I->getFirst(), EntryInfo.Line, 1);
2791
0
          break;
2792
0
        }
2793
0
      }
2794
0
    }
2795
0
    switch (Kind) {
2796
0
    case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2797
0
      unsigned DiagID = CGM.getDiags().getCustomDiagID(
2798
0
          DiagnosticsEngine::Error, "Offloading entry for target region in "
2799
0
                                    "%0 is incorrect: either the "
2800
0
                                    "address or the ID is invalid.");
2801
0
      CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2802
0
    } break;
2803
0
    case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2804
0
      unsigned DiagID = CGM.getDiags().getCustomDiagID(
2805
0
          DiagnosticsEngine::Error, "Offloading entry for declare target "
2806
0
                                    "variable %0 is incorrect: the "
2807
0
                                    "address is invalid.");
2808
0
      CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2809
0
    } break;
2810
0
    case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2811
0
      unsigned DiagID = CGM.getDiags().getCustomDiagID(
2812
0
          DiagnosticsEngine::Error,
2813
0
          "Offloading entry for declare target variable is incorrect: the "
2814
0
          "address is invalid.");
2815
0
      CGM.getDiags().Report(DiagID);
2816
0
    } break;
2817
0
    }
2818
0
  };
2819
2820
0
  OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2821
0
}
2822
2823
0
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2824
0
  if (!KmpRoutineEntryPtrTy) {
2825
    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2826
0
    ASTContext &C = CGM.getContext();
2827
0
    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2828
0
    FunctionProtoType::ExtProtoInfo EPI;
2829
0
    KmpRoutineEntryPtrQTy = C.getPointerType(
2830
0
        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2831
0
    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2832
0
  }
2833
0
}
2834
2835
namespace {
2836
struct PrivateHelpersTy {
2837
  PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2838
                   const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2839
      : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2840
0
        PrivateElemInit(PrivateElemInit) {}
2841
0
  PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2842
  const Expr *OriginalRef = nullptr;
2843
  const VarDecl *Original = nullptr;
2844
  const VarDecl *PrivateCopy = nullptr;
2845
  const VarDecl *PrivateElemInit = nullptr;
2846
0
  bool isLocalPrivate() const {
2847
0
    return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2848
0
  }
2849
};
2850
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2851
} // anonymous namespace
2852
2853
0
static bool isAllocatableDecl(const VarDecl *VD) {
2854
0
  const VarDecl *CVD = VD->getCanonicalDecl();
2855
0
  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2856
0
    return false;
2857
0
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2858
  // Use the default allocation.
2859
0
  return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2860
0
           !AA->getAllocator());
2861
0
}
2862
2863
static RecordDecl *
2864
0
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2865
0
  if (!Privates.empty()) {
2866
0
    ASTContext &C = CGM.getContext();
2867
    // Build struct .kmp_privates_t. {
2868
    //         /*  private vars  */
2869
    //       };
2870
0
    RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2871
0
    RD->startDefinition();
2872
0
    for (const auto &Pair : Privates) {
2873
0
      const VarDecl *VD = Pair.second.Original;
2874
0
      QualType Type = VD->getType().getNonReferenceType();
2875
      // If the private variable is a local variable with lvalue ref type,
2876
      // allocate the pointer instead of the pointee type.
2877
0
      if (Pair.second.isLocalPrivate()) {
2878
0
        if (VD->getType()->isLValueReferenceType())
2879
0
          Type = C.getPointerType(Type);
2880
0
        if (isAllocatableDecl(VD))
2881
0
          Type = C.getPointerType(Type);
2882
0
      }
2883
0
      FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2884
0
      if (VD->hasAttrs()) {
2885
0
        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2886
0
             E(VD->getAttrs().end());
2887
0
             I != E; ++I)
2888
0
          FD->addAttr(*I);
2889
0
      }
2890
0
    }
2891
0
    RD->completeDefinition();
2892
0
    return RD;
2893
0
  }
2894
0
  return nullptr;
2895
0
}
2896
2897
static RecordDecl *
2898
createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2899
                         QualType KmpInt32Ty,
2900
0
                         QualType KmpRoutineEntryPointerQTy) {
2901
0
  ASTContext &C = CGM.getContext();
2902
  // Build struct kmp_task_t {
2903
  //         void *              shareds;
2904
  //         kmp_routine_entry_t routine;
2905
  //         kmp_int32           part_id;
2906
  //         kmp_cmplrdata_t data1;
2907
  //         kmp_cmplrdata_t data2;
2908
  // For taskloops additional fields:
2909
  //         kmp_uint64          lb;
2910
  //         kmp_uint64          ub;
2911
  //         kmp_int64           st;
2912
  //         kmp_int32           liter;
2913
  //         void *              reductions;
2914
  //       };
2915
0
  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2916
0
  UD->startDefinition();
2917
0
  addFieldToRecordDecl(C, UD, KmpInt32Ty);
2918
0
  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2919
0
  UD->completeDefinition();
2920
0
  QualType KmpCmplrdataTy = C.getRecordType(UD);
2921
0
  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2922
0
  RD->startDefinition();
2923
0
  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2924
0
  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2925
0
  addFieldToRecordDecl(C, RD, KmpInt32Ty);
2926
0
  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2927
0
  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2928
0
  if (isOpenMPTaskLoopDirective(Kind)) {
2929
0
    QualType KmpUInt64Ty =
2930
0
        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2931
0
    QualType KmpInt64Ty =
2932
0
        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2933
0
    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2934
0
    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2935
0
    addFieldToRecordDecl(C, RD, KmpInt64Ty);
2936
0
    addFieldToRecordDecl(C, RD, KmpInt32Ty);
2937
0
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2938
0
  }
2939
0
  RD->completeDefinition();
2940
0
  return RD;
2941
0
}
2942
2943
static RecordDecl *
2944
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2945
0
                                     ArrayRef<PrivateDataTy> Privates) {
2946
0
  ASTContext &C = CGM.getContext();
2947
  // Build struct kmp_task_t_with_privates {
2948
  //         kmp_task_t task_data;
2949
  //         .kmp_privates_t. privates;
2950
  //       };
2951
0
  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2952
0
  RD->startDefinition();
2953
0
  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2954
0
  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2955
0
    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2956
0
  RD->completeDefinition();
2957
0
  return RD;
2958
0
}
2959
2960
/// Emit a proxy function which accepts kmp_task_t as the second
2961
/// argument.
2962
/// \code
2963
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2964
///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2965
///   For taskloops:
2966
///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2967
///   tt->reductions, tt->shareds);
2968
///   return 0;
2969
/// }
2970
/// \endcode
2971
static llvm::Function *
2972
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2973
                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2974
                      QualType KmpTaskTWithPrivatesPtrQTy,
2975
                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2976
                      QualType SharedsPtrTy, llvm::Function *TaskFunction,
2977
0
                      llvm::Value *TaskPrivatesMap) {
2978
0
  ASTContext &C = CGM.getContext();
2979
0
  FunctionArgList Args;
2980
0
  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2981
0
                            ImplicitParamKind::Other);
2982
0
  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2983
0
                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2984
0
                                ImplicitParamKind::Other);
2985
0
  Args.push_back(&GtidArg);
2986
0
  Args.push_back(&TaskTypeArg);
2987
0
  const auto &TaskEntryFnInfo =
2988
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2989
0
  llvm::FunctionType *TaskEntryTy =
2990
0
      CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2991
0
  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2992
0
  auto *TaskEntry = llvm::Function::Create(
2993
0
      TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2994
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2995
0
  TaskEntry->setDoesNotRecurse();
2996
0
  CodeGenFunction CGF(CGM);
2997
0
  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
2998
0
                    Loc, Loc);
2999
3000
  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3001
  // tt,
3002
  // For taskloops:
3003
  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3004
  // tt->task_data.shareds);
3005
0
  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3006
0
      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3007
0
  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3008
0
      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3009
0
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3010
0
  const auto *KmpTaskTWithPrivatesQTyRD =
3011
0
      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3012
0
  LValue Base =
3013
0
      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3014
0
  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3015
0
  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3016
0
  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3017
0
  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3018
3019
0
  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3020
0
  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3021
0
  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3022
0
      CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3023
0
      CGF.ConvertTypeForMem(SharedsPtrTy));
3024
3025
0
  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3026
0
  llvm::Value *PrivatesParam;
3027
0
  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3028
0
    LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3029
0
    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3030
0
        PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3031
0
  } else {
3032
0
    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3033
0
  }
3034
3035
0
  llvm::Value *CommonArgs[] = {
3036
0
      GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3037
0
      CGF.Builder
3038
0
          .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3039
0
                                               CGF.VoidPtrTy, CGF.Int8Ty)
3040
0
          .getPointer()};
3041
0
  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3042
0
                                          std::end(CommonArgs));
3043
0
  if (isOpenMPTaskLoopDirective(Kind)) {
3044
0
    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3045
0
    LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3046
0
    llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3047
0
    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3048
0
    LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3049
0
    llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3050
0
    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3051
0
    LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3052
0
    llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3053
0
    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3054
0
    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3055
0
    llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3056
0
    auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3057
0
    LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3058
0
    llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3059
0
    CallArgs.push_back(LBParam);
3060
0
    CallArgs.push_back(UBParam);
3061
0
    CallArgs.push_back(StParam);
3062
0
    CallArgs.push_back(LIParam);
3063
0
    CallArgs.push_back(RParam);
3064
0
  }
3065
0
  CallArgs.push_back(SharedsParam);
3066
3067
0
  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3068
0
                                                  CallArgs);
3069
0
  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3070
0
                             CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3071
0
  CGF.FinishFunction();
3072
0
  return TaskEntry;
3073
0
}
3074
3075
static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3076
                                            SourceLocation Loc,
3077
                                            QualType KmpInt32Ty,
3078
                                            QualType KmpTaskTWithPrivatesPtrQTy,
3079
0
                                            QualType KmpTaskTWithPrivatesQTy) {
3080
0
  ASTContext &C = CGM.getContext();
3081
0
  FunctionArgList Args;
3082
0
  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3083
0
                            ImplicitParamKind::Other);
3084
0
  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3085
0
                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3086
0
                                ImplicitParamKind::Other);
3087
0
  Args.push_back(&GtidArg);
3088
0
  Args.push_back(&TaskTypeArg);
3089
0
  const auto &DestructorFnInfo =
3090
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3091
0
  llvm::FunctionType *DestructorFnTy =
3092
0
      CGM.getTypes().GetFunctionType(DestructorFnInfo);
3093
0
  std::string Name =
3094
0
      CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3095
0
  auto *DestructorFn =
3096
0
      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3097
0
                             Name, &CGM.getModule());
3098
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3099
0
                                    DestructorFnInfo);
3100
0
  DestructorFn->setDoesNotRecurse();
3101
0
  CodeGenFunction CGF(CGM);
3102
0
  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3103
0
                    Args, Loc, Loc);
3104
3105
0
  LValue Base = CGF.EmitLoadOfPointerLValue(
3106
0
      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3107
0
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3108
0
  const auto *KmpTaskTWithPrivatesQTyRD =
3109
0
      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3110
0
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3111
0
  Base = CGF.EmitLValueForField(Base, *FI);
3112
0
  for (const auto *Field :
3113
0
       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3114
0
    if (QualType::DestructionKind DtorKind =
3115
0
            Field->getType().isDestructedType()) {
3116
0
      LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3117
0
      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3118
0
    }
3119
0
  }
3120
0
  CGF.FinishFunction();
3121
0
  return DestructorFn;
3122
0
}
3123
3124
/// Emit a privates mapping function for correct handling of private and
3125
/// firstprivate variables.
3126
/// \code
3127
/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3128
/// **noalias priv1,...,  <tyn> **noalias privn) {
3129
///   *priv1 = &.privates.priv1;
3130
///   ...;
3131
///   *privn = &.privates.privn;
3132
/// }
3133
/// \endcode
3134
static llvm::Value *
3135
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3136
                               const OMPTaskDataTy &Data, QualType PrivatesQTy,
3137
0
                               ArrayRef<PrivateDataTy> Privates) {
3138
0
  ASTContext &C = CGM.getContext();
3139
0
  FunctionArgList Args;
3140
0
  ImplicitParamDecl TaskPrivatesArg(
3141
0
      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3142
0
      C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3143
0
      ImplicitParamKind::Other);
3144
0
  Args.push_back(&TaskPrivatesArg);
3145
0
  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3146
0
  unsigned Counter = 1;
3147
0
  for (const Expr *E : Data.PrivateVars) {
3148
0
    Args.push_back(ImplicitParamDecl::Create(
3149
0
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3150
0
        C.getPointerType(C.getPointerType(E->getType()))
3151
0
            .withConst()
3152
0
            .withRestrict(),
3153
0
        ImplicitParamKind::Other));
3154
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3155
0
    PrivateVarsPos[VD] = Counter;
3156
0
    ++Counter;
3157
0
  }
3158
0
  for (const Expr *E : Data.FirstprivateVars) {
3159
0
    Args.push_back(ImplicitParamDecl::Create(
3160
0
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3161
0
        C.getPointerType(C.getPointerType(E->getType()))
3162
0
            .withConst()
3163
0
            .withRestrict(),
3164
0
        ImplicitParamKind::Other));
3165
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3166
0
    PrivateVarsPos[VD] = Counter;
3167
0
    ++Counter;
3168
0
  }
3169
0
  for (const Expr *E : Data.LastprivateVars) {
3170
0
    Args.push_back(ImplicitParamDecl::Create(
3171
0
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172
0
        C.getPointerType(C.getPointerType(E->getType()))
3173
0
            .withConst()
3174
0
            .withRestrict(),
3175
0
        ImplicitParamKind::Other));
3176
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177
0
    PrivateVarsPos[VD] = Counter;
3178
0
    ++Counter;
3179
0
  }
3180
0
  for (const VarDecl *VD : Data.PrivateLocals) {
3181
0
    QualType Ty = VD->getType().getNonReferenceType();
3182
0
    if (VD->getType()->isLValueReferenceType())
3183
0
      Ty = C.getPointerType(Ty);
3184
0
    if (isAllocatableDecl(VD))
3185
0
      Ty = C.getPointerType(Ty);
3186
0
    Args.push_back(ImplicitParamDecl::Create(
3187
0
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3188
0
        C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3189
0
        ImplicitParamKind::Other));
3190
0
    PrivateVarsPos[VD] = Counter;
3191
0
    ++Counter;
3192
0
  }
3193
0
  const auto &TaskPrivatesMapFnInfo =
3194
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3195
0
  llvm::FunctionType *TaskPrivatesMapTy =
3196
0
      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3197
0
  std::string Name =
3198
0
      CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3199
0
  auto *TaskPrivatesMap = llvm::Function::Create(
3200
0
      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3201
0
      &CGM.getModule());
3202
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3203
0
                                    TaskPrivatesMapFnInfo);
3204
0
  if (CGM.getLangOpts().Optimize) {
3205
0
    TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3206
0
    TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3207
0
    TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3208
0
  }
3209
0
  CodeGenFunction CGF(CGM);
3210
0
  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3211
0
                    TaskPrivatesMapFnInfo, Args, Loc, Loc);
3212
3213
  // *privi = &.privates.privi;
3214
0
  LValue Base = CGF.EmitLoadOfPointerLValue(
3215
0
      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3216
0
      TaskPrivatesArg.getType()->castAs<PointerType>());
3217
0
  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3218
0
  Counter = 0;
3219
0
  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3220
0
    LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3221
0
    const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3222
0
    LValue RefLVal =
3223
0
        CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3224
0
    LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3225
0
        RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3226
0
    CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3227
0
    ++Counter;
3228
0
  }
3229
0
  CGF.FinishFunction();
3230
0
  return TaskPrivatesMap;
3231
0
}
3232
3233
/// Emit initialization for private variables in task-based directives.
3234
static void emitPrivatesInit(CodeGenFunction &CGF,
3235
                             const OMPExecutableDirective &D,
3236
                             Address KmpTaskSharedsPtr, LValue TDBase,
3237
                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3238
                             QualType SharedsTy, QualType SharedsPtrTy,
3239
                             const OMPTaskDataTy &Data,
3240
0
                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3241
0
  ASTContext &C = CGF.getContext();
3242
0
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3243
0
  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3244
0
  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3245
0
                                 ? OMPD_taskloop
3246
0
                                 : OMPD_task;
3247
0
  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3248
0
  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3249
0
  LValue SrcBase;
3250
0
  bool IsTargetTask =
3251
0
      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3252
0
      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3253
  // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3254
  // PointersArray, SizesArray, and MappersArray. The original variables for
3255
  // these arrays are not captured and we get their addresses explicitly.
3256
0
  if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3257
0
      (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3258
0
    SrcBase = CGF.MakeAddrLValue(
3259
0
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3260
0
            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3261
0
            CGF.ConvertTypeForMem(SharedsTy)),
3262
0
        SharedsTy);
3263
0
  }
3264
0
  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3265
0
  for (const PrivateDataTy &Pair : Privates) {
3266
    // Do not initialize private locals.
3267
0
    if (Pair.second.isLocalPrivate()) {
3268
0
      ++FI;
3269
0
      continue;
3270
0
    }
3271
0
    const VarDecl *VD = Pair.second.PrivateCopy;
3272
0
    const Expr *Init = VD->getAnyInitializer();
3273
0
    if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3274
0
                             !CGF.isTrivialInitializer(Init)))) {
3275
0
      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3276
0
      if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3277
0
        const VarDecl *OriginalVD = Pair.second.Original;
3278
        // Check if the variable is the target-based BasePointersArray,
3279
        // PointersArray, SizesArray, or MappersArray.
3280
0
        LValue SharedRefLValue;
3281
0
        QualType Type = PrivateLValue.getType();
3282
0
        const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3283
0
        if (IsTargetTask && !SharedField) {
3284
0
          assert(isa<ImplicitParamDecl>(OriginalVD) &&
3285
0
                 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3286
0
                 cast<CapturedDecl>(OriginalVD->getDeclContext())
3287
0
                         ->getNumParams() == 0 &&
3288
0
                 isa<TranslationUnitDecl>(
3289
0
                     cast<CapturedDecl>(OriginalVD->getDeclContext())
3290
0
                         ->getDeclContext()) &&
3291
0
                 "Expected artificial target data variable.");
3292
0
          SharedRefLValue =
3293
0
              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3294
0
        } else if (ForDup) {
3295
0
          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3296
0
          SharedRefLValue = CGF.MakeAddrLValue(
3297
0
              SharedRefLValue.getAddress(CGF).withAlignment(
3298
0
                  C.getDeclAlign(OriginalVD)),
3299
0
              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3300
0
              SharedRefLValue.getTBAAInfo());
3301
0
        } else if (CGF.LambdaCaptureFields.count(
3302
0
                       Pair.second.Original->getCanonicalDecl()) > 0 ||
3303
0
                   isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3304
0
          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3305
0
        } else {
3306
          // Processing for implicitly captured variables.
3307
0
          InlinedOpenMPRegionRAII Region(
3308
0
              CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3309
0
              /*HasCancel=*/false, /*NoInheritance=*/true);
3310
0
          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3311
0
        }
3312
0
        if (Type->isArrayType()) {
3313
          // Initialize firstprivate array.
3314
0
          if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3315
            // Perform simple memcpy.
3316
0
            CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3317
0
          } else {
3318
            // Initialize firstprivate array using element-by-element
3319
            // initialization.
3320
0
            CGF.EmitOMPAggregateAssign(
3321
0
                PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3322
0
                Type,
3323
0
                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3324
0
                                                  Address SrcElement) {
3325
                  // Clean up any temporaries needed by the initialization.
3326
0
                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3327
0
                  InitScope.addPrivate(Elem, SrcElement);
3328
0
                  (void)InitScope.Privatize();
3329
                  // Emit initialization for single element.
3330
0
                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3331
0
                      CGF, &CapturesInfo);
3332
0
                  CGF.EmitAnyExprToMem(Init, DestElement,
3333
0
                                       Init->getType().getQualifiers(),
3334
0
                                       /*IsInitializer=*/false);
3335
0
                });
3336
0
          }
3337
0
        } else {
3338
0
          CodeGenFunction::OMPPrivateScope InitScope(CGF);
3339
0
          InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3340
0
          (void)InitScope.Privatize();
3341
0
          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3342
0
          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3343
0
                             /*capturedByInit=*/false);
3344
0
        }
3345
0
      } else {
3346
0
        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3347
0
      }
3348
0
    }
3349
0
    ++FI;
3350
0
  }
3351
0
}
3352
3353
/// Check if duplication function is required for taskloops.
3354
static bool checkInitIsRequired(CodeGenFunction &CGF,
3355
0
                                ArrayRef<PrivateDataTy> Privates) {
3356
0
  bool InitRequired = false;
3357
0
  for (const PrivateDataTy &Pair : Privates) {
3358
0
    if (Pair.second.isLocalPrivate())
3359
0
      continue;
3360
0
    const VarDecl *VD = Pair.second.PrivateCopy;
3361
0
    const Expr *Init = VD->getAnyInitializer();
3362
0
    InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3363
0
                                    !CGF.isTrivialInitializer(Init));
3364
0
    if (InitRequired)
3365
0
      break;
3366
0
  }
3367
0
  return InitRequired;
3368
0
}
3369
3370
3371
/// Emit task_dup function (for initialization of
3372
/// private/firstprivate/lastprivate vars and last_iter flag)
3373
/// \code
3374
/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3375
/// lastpriv) {
3376
/// // setup lastprivate flag
3377
///    task_dst->last = lastpriv;
3378
/// // could be constructor calls here...
3379
/// }
3380
/// \endcode
3381
static llvm::Value *
3382
emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3383
                    const OMPExecutableDirective &D,
3384
                    QualType KmpTaskTWithPrivatesPtrQTy,
3385
                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3386
                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3387
                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3388
0
                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3389
0
  ASTContext &C = CGM.getContext();
3390
0
  FunctionArgList Args;
3391
0
  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3392
0
                           KmpTaskTWithPrivatesPtrQTy,
3393
0
                           ImplicitParamKind::Other);
3394
0
  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3395
0
                           KmpTaskTWithPrivatesPtrQTy,
3396
0
                           ImplicitParamKind::Other);
3397
0
  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3398
0
                                ImplicitParamKind::Other);
3399
0
  Args.push_back(&DstArg);
3400
0
  Args.push_back(&SrcArg);
3401
0
  Args.push_back(&LastprivArg);
3402
0
  const auto &TaskDupFnInfo =
3403
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3404
0
  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3405
0
  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3406
0
  auto *TaskDup = llvm::Function::Create(
3407
0
      TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3408
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3409
0
  TaskDup->setDoesNotRecurse();
3410
0
  CodeGenFunction CGF(CGM);
3411
0
  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3412
0
                    Loc);
3413
3414
0
  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3415
0
      CGF.GetAddrOfLocalVar(&DstArg),
3416
0
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3417
  // task_dst->liter = lastpriv;
3418
0
  if (WithLastIter) {
3419
0
    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3420
0
    LValue Base = CGF.EmitLValueForField(
3421
0
        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3422
0
    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3423
0
    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3424
0
        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3425
0
    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3426
0
  }
3427
3428
  // Emit initial values for private copies (if any).
3429
0
  assert(!Privates.empty());
3430
0
  Address KmpTaskSharedsPtr = Address::invalid();
3431
0
  if (!Data.FirstprivateVars.empty()) {
3432
0
    LValue TDBase = CGF.EmitLoadOfPointerLValue(
3433
0
        CGF.GetAddrOfLocalVar(&SrcArg),
3434
0
        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3435
0
    LValue Base = CGF.EmitLValueForField(
3436
0
        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3437
0
    KmpTaskSharedsPtr = Address(
3438
0
        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3439
0
                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3440
0
                                                  KmpTaskTShareds)),
3441
0
                             Loc),
3442
0
        CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3443
0
  }
3444
0
  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3445
0
                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3446
0
  CGF.FinishFunction();
3447
0
  return TaskDup;
3448
0
}
3449
3450
/// Checks if destructor function is required to be generated.
3451
/// \return true if cleanups are required, false otherwise.
3452
static bool
3453
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3454
0
                         ArrayRef<PrivateDataTy> Privates) {
3455
0
  for (const PrivateDataTy &P : Privates) {
3456
0
    if (P.second.isLocalPrivate())
3457
0
      continue;
3458
0
    QualType Ty = P.second.Original->getType().getNonReferenceType();
3459
0
    if (Ty.isDestructedType())
3460
0
      return true;
3461
0
  }
3462
0
  return false;
3463
0
}
3464
3465
namespace {
3466
/// Loop generator for OpenMP iterator expression.
3467
class OMPIteratorGeneratorScope final
3468
    : public CodeGenFunction::OMPPrivateScope {
3469
  CodeGenFunction &CGF;
3470
  const OMPIteratorExpr *E = nullptr;
3471
  SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3472
  SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3473
  OMPIteratorGeneratorScope() = delete;
3474
  OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3475
3476
public:
3477
  OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3478
0
      : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3479
0
    if (!E)
3480
0
      return;
3481
0
    SmallVector<llvm::Value *, 4> Uppers;
3482
0
    for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3483
0
      Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3484
0
      const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3485
0
      addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3486
0
      const OMPIteratorHelperData &HelperData = E->getHelper(I);
3487
0
      addPrivate(
3488
0
          HelperData.CounterVD,
3489
0
          CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3490
0
    }
3491
0
    Privatize();
3492
3493
0
    for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3494
0
      const OMPIteratorHelperData &HelperData = E->getHelper(I);
3495
0
      LValue CLVal =
3496
0
          CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3497
0
                             HelperData.CounterVD->getType());
3498
      // Counter = 0;
3499
0
      CGF.EmitStoreOfScalar(
3500
0
          llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3501
0
          CLVal);
3502
0
      CodeGenFunction::JumpDest &ContDest =
3503
0
          ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3504
0
      CodeGenFunction::JumpDest &ExitDest =
3505
0
          ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3506
      // N = <number-of_iterations>;
3507
0
      llvm::Value *N = Uppers[I];
3508
      // cont:
3509
      // if (Counter < N) goto body; else goto exit;
3510
0
      CGF.EmitBlock(ContDest.getBlock());
3511
0
      auto *CVal =
3512
0
          CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3513
0
      llvm::Value *Cmp =
3514
0
          HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3515
0
              ? CGF.Builder.CreateICmpSLT(CVal, N)
3516
0
              : CGF.Builder.CreateICmpULT(CVal, N);
3517
0
      llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3518
0
      CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3519
      // body:
3520
0
      CGF.EmitBlock(BodyBB);
3521
      // Iteri = Begini + Counter * Stepi;
3522
0
      CGF.EmitIgnoredExpr(HelperData.Update);
3523
0
    }
3524
0
  }
3525
0
  ~OMPIteratorGeneratorScope() {
3526
0
    if (!E)
3527
0
      return;
3528
0
    for (unsigned I = E->numOfIterators(); I > 0; --I) {
3529
      // Counter = Counter + 1;
3530
0
      const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3531
0
      CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3532
      // goto cont;
3533
0
      CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3534
      // exit:
3535
0
      CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3536
0
    }
3537
0
  }
3538
};
3539
} // namespace
3540
3541
static std::pair<llvm::Value *, llvm::Value *>
3542
0
getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3543
0
  const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3544
0
  llvm::Value *Addr;
3545
0
  if (OASE) {
3546
0
    const Expr *Base = OASE->getBase();
3547
0
    Addr = CGF.EmitScalarExpr(Base);
3548
0
  } else {
3549
0
    Addr = CGF.EmitLValue(E).getPointer(CGF);
3550
0
  }
3551
0
  llvm::Value *SizeVal;
3552
0
  QualType Ty = E->getType();
3553
0
  if (OASE) {
3554
0
    SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3555
0
    for (const Expr *SE : OASE->getDimensions()) {
3556
0
      llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3557
0
      Sz = CGF.EmitScalarConversion(
3558
0
          Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3559
0
      SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3560
0
    }
3561
0
  } else if (const auto *ASE =
3562
0
                 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3563
0
    LValue UpAddrLVal =
3564
0
        CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3565
0
    Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3566
0
    llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3567
0
        UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3568
0
    llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3569
0
    llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3570
0
    SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3571
0
  } else {
3572
0
    SizeVal = CGF.getTypeSize(Ty);
3573
0
  }
3574
0
  return std::make_pair(Addr, SizeVal);
3575
0
}
3576
3577
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3578
0
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3579
0
  QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3580
0
  if (KmpTaskAffinityInfoTy.isNull()) {
3581
0
    RecordDecl *KmpAffinityInfoRD =
3582
0
        C.buildImplicitRecord("kmp_task_affinity_info_t");
3583
0
    KmpAffinityInfoRD->startDefinition();
3584
0
    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3585
0
    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3586
0
    addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3587
0
    KmpAffinityInfoRD->completeDefinition();
3588
0
    KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3589
0
  }
3590
0
}
3591
3592
CGOpenMPRuntime::TaskResultTy
3593
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3594
                              const OMPExecutableDirective &D,
3595
                              llvm::Function *TaskFunction, QualType SharedsTy,
3596
0
                              Address Shareds, const OMPTaskDataTy &Data) {
3597
0
  ASTContext &C = CGM.getContext();
3598
0
  llvm::SmallVector<PrivateDataTy, 4> Privates;
3599
  // Aggregate privates and sort them by the alignment.
3600
0
  const auto *I = Data.PrivateCopies.begin();
3601
0
  for (const Expr *E : Data.PrivateVars) {
3602
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3603
0
    Privates.emplace_back(
3604
0
        C.getDeclAlign(VD),
3605
0
        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3606
0
                         /*PrivateElemInit=*/nullptr));
3607
0
    ++I;
3608
0
  }
3609
0
  I = Data.FirstprivateCopies.begin();
3610
0
  const auto *IElemInitRef = Data.FirstprivateInits.begin();
3611
0
  for (const Expr *E : Data.FirstprivateVars) {
3612
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3613
0
    Privates.emplace_back(
3614
0
        C.getDeclAlign(VD),
3615
0
        PrivateHelpersTy(
3616
0
            E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3617
0
            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3618
0
    ++I;
3619
0
    ++IElemInitRef;
3620
0
  }
3621
0
  I = Data.LastprivateCopies.begin();
3622
0
  for (const Expr *E : Data.LastprivateVars) {
3623
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624
0
    Privates.emplace_back(
3625
0
        C.getDeclAlign(VD),
3626
0
        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627
0
                         /*PrivateElemInit=*/nullptr));
3628
0
    ++I;
3629
0
  }
3630
0
  for (const VarDecl *VD : Data.PrivateLocals) {
3631
0
    if (isAllocatableDecl(VD))
3632
0
      Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3633
0
    else
3634
0
      Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3635
0
  }
3636
0
  llvm::stable_sort(Privates,
3637
0
                    [](const PrivateDataTy &L, const PrivateDataTy &R) {
3638
0
                      return L.first > R.first;
3639
0
                    });
3640
0
  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3641
  // Build type kmp_routine_entry_t (if not built yet).
3642
0
  emitKmpRoutineEntryT(KmpInt32Ty);
3643
  // Build type kmp_task_t (if not built yet).
3644
0
  if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3645
0
    if (SavedKmpTaskloopTQTy.isNull()) {
3646
0
      SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3647
0
          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3648
0
    }
3649
0
    KmpTaskTQTy = SavedKmpTaskloopTQTy;
3650
0
  } else {
3651
0
    assert((D.getDirectiveKind() == OMPD_task ||
3652
0
            isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3653
0
            isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3654
0
           "Expected taskloop, task or target directive");
3655
0
    if (SavedKmpTaskTQTy.isNull()) {
3656
0
      SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3657
0
          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3658
0
    }
3659
0
    KmpTaskTQTy = SavedKmpTaskTQTy;
3660
0
  }
3661
0
  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3662
  // Build particular struct kmp_task_t for the given task.
3663
0
  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3664
0
      createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3665
0
  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3666
0
  QualType KmpTaskTWithPrivatesPtrQTy =
3667
0
      C.getPointerType(KmpTaskTWithPrivatesQTy);
3668
0
  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3669
0
  llvm::Type *KmpTaskTWithPrivatesPtrTy =
3670
0
      KmpTaskTWithPrivatesTy->getPointerTo();
3671
0
  llvm::Value *KmpTaskTWithPrivatesTySize =
3672
0
      CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3673
0
  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3674
3675
  // Emit initial values for private copies (if any).
3676
0
  llvm::Value *TaskPrivatesMap = nullptr;
3677
0
  llvm::Type *TaskPrivatesMapTy =
3678
0
      std::next(TaskFunction->arg_begin(), 3)->getType();
3679
0
  if (!Privates.empty()) {
3680
0
    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3681
0
    TaskPrivatesMap =
3682
0
        emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3683
0
    TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3684
0
        TaskPrivatesMap, TaskPrivatesMapTy);
3685
0
  } else {
3686
0
    TaskPrivatesMap = llvm::ConstantPointerNull::get(
3687
0
        cast<llvm::PointerType>(TaskPrivatesMapTy));
3688
0
  }
3689
  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3690
  // kmp_task_t *tt);
3691
0
  llvm::Function *TaskEntry = emitProxyTaskFunction(
3692
0
      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3693
0
      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3694
0
      TaskPrivatesMap);
3695
3696
  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3697
  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3698
  // kmp_routine_entry_t *task_entry);
3699
  // Task flags. Format is taken from
3700
  // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3701
  // description of kmp_tasking_flags struct.
3702
0
  enum {
3703
0
    TiedFlag = 0x1,
3704
0
    FinalFlag = 0x2,
3705
0
    DestructorsFlag = 0x8,
3706
0
    PriorityFlag = 0x20,
3707
0
    DetachableFlag = 0x40,
3708
0
  };
3709
0
  unsigned Flags = Data.Tied ? TiedFlag : 0;
3710
0
  bool NeedsCleanup = false;
3711
0
  if (!Privates.empty()) {
3712
0
    NeedsCleanup =
3713
0
        checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3714
0
    if (NeedsCleanup)
3715
0
      Flags = Flags | DestructorsFlag;
3716
0
  }
3717
0
  if (Data.Priority.getInt())
3718
0
    Flags = Flags | PriorityFlag;
3719
0
  if (D.hasClausesOfKind<OMPDetachClause>())
3720
0
    Flags = Flags | DetachableFlag;
3721
0
  llvm::Value *TaskFlags =
3722
0
      Data.Final.getPointer()
3723
0
          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3724
0
                                     CGF.Builder.getInt32(FinalFlag),
3725
0
                                     CGF.Builder.getInt32(/*C=*/0))
3726
0
          : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3727
0
  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3728
0
  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3729
0
  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3730
0
      getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3731
0
      SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3732
0
          TaskEntry, KmpRoutineEntryPtrTy)};
3733
0
  llvm::Value *NewTask;
3734
0
  if (D.hasClausesOfKind<OMPNowaitClause>()) {
3735
    // Check if we have any device clause associated with the directive.
3736
0
    const Expr *Device = nullptr;
3737
0
    if (auto *C = D.getSingleClause<OMPDeviceClause>())
3738
0
      Device = C->getDevice();
3739
    // Emit device ID if any otherwise use default value.
3740
0
    llvm::Value *DeviceID;
3741
0
    if (Device)
3742
0
      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3743
0
                                           CGF.Int64Ty, /*isSigned=*/true);
3744
0
    else
3745
0
      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3746
0
    AllocArgs.push_back(DeviceID);
3747
0
    NewTask = CGF.EmitRuntimeCall(
3748
0
        OMPBuilder.getOrCreateRuntimeFunction(
3749
0
            CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3750
0
        AllocArgs);
3751
0
  } else {
3752
0
    NewTask =
3753
0
        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3754
0
                                CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3755
0
                            AllocArgs);
3756
0
  }
3757
  // Emit detach clause initialization.
3758
  // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3759
  // task_descriptor);
3760
0
  if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3761
0
    const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3762
0
    LValue EvtLVal = CGF.EmitLValue(Evt);
3763
3764
    // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3765
    // int gtid, kmp_task_t *task);
3766
0
    llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3767
0
    llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3768
0
    Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3769
0
    llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3770
0
        OMPBuilder.getOrCreateRuntimeFunction(
3771
0
            CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3772
0
        {Loc, Tid, NewTask});
3773
0
    EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3774
0
                                      Evt->getExprLoc());
3775
0
    CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3776
0
  }
3777
  // Process affinity clauses.
3778
0
  if (D.hasClausesOfKind<OMPAffinityClause>()) {
3779
    // Process list of affinity data.
3780
0
    ASTContext &C = CGM.getContext();
3781
0
    Address AffinitiesArray = Address::invalid();
3782
    // Calculate number of elements to form the array of affinity data.
3783
0
    llvm::Value *NumOfElements = nullptr;
3784
0
    unsigned NumAffinities = 0;
3785
0
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3786
0
      if (const Expr *Modifier = C->getModifier()) {
3787
0
        const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3788
0
        for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3789
0
          llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3790
0
          Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3791
0
          NumOfElements =
3792
0
              NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3793
0
        }
3794
0
      } else {
3795
0
        NumAffinities += C->varlist_size();
3796
0
      }
3797
0
    }
3798
0
    getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3799
    // Fields ids in kmp_task_affinity_info record.
3800
0
    enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3801
3802
0
    QualType KmpTaskAffinityInfoArrayTy;
3803
0
    if (NumOfElements) {
3804
0
      NumOfElements = CGF.Builder.CreateNUWAdd(
3805
0
          llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3806
0
      auto *OVE = new (C) OpaqueValueExpr(
3807
0
          Loc,
3808
0
          C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3809
0
          VK_PRValue);
3810
0
      CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3811
0
                                                    RValue::get(NumOfElements));
3812
0
      KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3813
0
          KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3814
0
          /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3815
      // Properly emit variable-sized array.
3816
0
      auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3817
0
                                           ImplicitParamKind::Other);
3818
0
      CGF.EmitVarDecl(*PD);
3819
0
      AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3820
0
      NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3821
0
                                                /*isSigned=*/false);
3822
0
    } else {
3823
0
      KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3824
0
          KmpTaskAffinityInfoTy,
3825
0
          llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3826
0
          ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3827
0
      AffinitiesArray =
3828
0
          CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3829
0
      AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3830
0
      NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3831
0
                                             /*isSigned=*/false);
3832
0
    }
3833
3834
0
    const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3835
    // Fill array by elements without iterators.
3836
0
    unsigned Pos = 0;
3837
0
    bool HasIterator = false;
3838
0
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3839
0
      if (C->getModifier()) {
3840
0
        HasIterator = true;
3841
0
        continue;
3842
0
      }
3843
0
      for (const Expr *E : C->varlists()) {
3844
0
        llvm::Value *Addr;
3845
0
        llvm::Value *Size;
3846
0
        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3847
0
        LValue Base =
3848
0
            CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3849
0
                               KmpTaskAffinityInfoTy);
3850
        // affs[i].base_addr = &<Affinities[i].second>;
3851
0
        LValue BaseAddrLVal = CGF.EmitLValueForField(
3852
0
            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3853
0
        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3854
0
                              BaseAddrLVal);
3855
        // affs[i].len = sizeof(<Affinities[i].second>);
3856
0
        LValue LenLVal = CGF.EmitLValueForField(
3857
0
            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3858
0
        CGF.EmitStoreOfScalar(Size, LenLVal);
3859
0
        ++Pos;
3860
0
      }
3861
0
    }
3862
0
    LValue PosLVal;
3863
0
    if (HasIterator) {
3864
0
      PosLVal = CGF.MakeAddrLValue(
3865
0
          CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3866
0
          C.getSizeType());
3867
0
      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3868
0
    }
3869
    // Process elements with iterators.
3870
0
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3871
0
      const Expr *Modifier = C->getModifier();
3872
0
      if (!Modifier)
3873
0
        continue;
3874
0
      OMPIteratorGeneratorScope IteratorScope(
3875
0
          CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3876
0
      for (const Expr *E : C->varlists()) {
3877
0
        llvm::Value *Addr;
3878
0
        llvm::Value *Size;
3879
0
        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3880
0
        llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3881
0
        LValue Base = CGF.MakeAddrLValue(
3882
0
            CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
3883
        // affs[i].base_addr = &<Affinities[i].second>;
3884
0
        LValue BaseAddrLVal = CGF.EmitLValueForField(
3885
0
            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3886
0
        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3887
0
                              BaseAddrLVal);
3888
        // affs[i].len = sizeof(<Affinities[i].second>);
3889
0
        LValue LenLVal = CGF.EmitLValueForField(
3890
0
            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3891
0
        CGF.EmitStoreOfScalar(Size, LenLVal);
3892
0
        Idx = CGF.Builder.CreateNUWAdd(
3893
0
            Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3894
0
        CGF.EmitStoreOfScalar(Idx, PosLVal);
3895
0
      }
3896
0
    }
3897
    // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3898
    // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3899
    // naffins, kmp_task_affinity_info_t *affin_list);
3900
0
    llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3901
0
    llvm::Value *GTid = getThreadID(CGF, Loc);
3902
0
    llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3903
0
        AffinitiesArray.getPointer(), CGM.VoidPtrTy);
3904
    // FIXME: Emit the function and ignore its result for now unless the
3905
    // runtime function is properly implemented.
3906
0
    (void)CGF.EmitRuntimeCall(
3907
0
        OMPBuilder.getOrCreateRuntimeFunction(
3908
0
            CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3909
0
        {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3910
0
  }
3911
0
  llvm::Value *NewTaskNewTaskTTy =
3912
0
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3913
0
          NewTask, KmpTaskTWithPrivatesPtrTy);
3914
0
  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3915
0
                                               KmpTaskTWithPrivatesQTy);
3916
0
  LValue TDBase =
3917
0
      CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3918
  // Fill the data in the resulting kmp_task_t record.
3919
  // Copy shareds if there are any.
3920
0
  Address KmpTaskSharedsPtr = Address::invalid();
3921
0
  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3922
0
    KmpTaskSharedsPtr = Address(
3923
0
        CGF.EmitLoadOfScalar(
3924
0
            CGF.EmitLValueForField(
3925
0
                TDBase,
3926
0
                *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3927
0
            Loc),
3928
0
        CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3929
0
    LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3930
0
    LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3931
0
    CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3932
0
  }
3933
  // Emit initial values for private copies (if any).
3934
0
  TaskResultTy Result;
3935
0
  if (!Privates.empty()) {
3936
0
    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3937
0
                     SharedsTy, SharedsPtrTy, Data, Privates,
3938
0
                     /*ForDup=*/false);
3939
0
    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3940
0
        (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3941
0
      Result.TaskDupFn = emitTaskDupFunction(
3942
0
          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3943
0
          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3944
0
          /*WithLastIter=*/!Data.LastprivateVars.empty());
3945
0
    }
3946
0
  }
3947
  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3948
0
  enum { Priority = 0, Destructors = 1 };
3949
  // Provide pointer to function with destructors for privates.
3950
0
  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3951
0
  const RecordDecl *KmpCmplrdataUD =
3952
0
      (*FI)->getType()->getAsUnionType()->getDecl();
3953
0
  if (NeedsCleanup) {
3954
0
    llvm::Value *DestructorFn = emitDestructorsFunction(
3955
0
        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3956
0
        KmpTaskTWithPrivatesQTy);
3957
0
    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3958
0
    LValue DestructorsLV = CGF.EmitLValueForField(
3959
0
        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3960
0
    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3961
0
                              DestructorFn, KmpRoutineEntryPtrTy),
3962
0
                          DestructorsLV);
3963
0
  }
3964
  // Set priority.
3965
0
  if (Data.Priority.getInt()) {
3966
0
    LValue Data2LV = CGF.EmitLValueForField(
3967
0
        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3968
0
    LValue PriorityLV = CGF.EmitLValueForField(
3969
0
        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3970
0
    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3971
0
  }
3972
0
  Result.NewTask = NewTask;
3973
0
  Result.TaskEntry = TaskEntry;
3974
0
  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3975
0
  Result.TDBase = TDBase;
3976
0
  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3977
0
  return Result;
3978
0
}
3979
3980
/// Translates internal dependency kind into the runtime kind.
3981
0
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3982
0
  RTLDependenceKindTy DepKind;
3983
0
  switch (K) {
3984
0
  case OMPC_DEPEND_in:
3985
0
    DepKind = RTLDependenceKindTy::DepIn;
3986
0
    break;
3987
  // Out and InOut dependencies must use the same code.
3988
0
  case OMPC_DEPEND_out:
3989
0
  case OMPC_DEPEND_inout:
3990
0
    DepKind = RTLDependenceKindTy::DepInOut;
3991
0
    break;
3992
0
  case OMPC_DEPEND_mutexinoutset:
3993
0
    DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3994
0
    break;
3995
0
  case OMPC_DEPEND_inoutset:
3996
0
    DepKind = RTLDependenceKindTy::DepInOutSet;
3997
0
    break;
3998
0
  case OMPC_DEPEND_outallmemory:
3999
0
    DepKind = RTLDependenceKindTy::DepOmpAllMem;
4000
0
    break;
4001
0
  case OMPC_DEPEND_source:
4002
0
  case OMPC_DEPEND_sink:
4003
0
  case OMPC_DEPEND_depobj:
4004
0
  case OMPC_DEPEND_inoutallmemory:
4005
0
  case OMPC_DEPEND_unknown:
4006
0
    llvm_unreachable("Unknown task dependence type");
4007
0
  }
4008
0
  return DepKind;
4009
0
}
4010
4011
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4012
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4013
0
                           QualType &FlagsTy) {
4014
0
  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4015
0
  if (KmpDependInfoTy.isNull()) {
4016
0
    RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4017
0
    KmpDependInfoRD->startDefinition();
4018
0
    addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4019
0
    addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4020
0
    addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4021
0
    KmpDependInfoRD->completeDefinition();
4022
0
    KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4023
0
  }
4024
0
}
4025
4026
std::pair<llvm::Value *, LValue>
4027
CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4028
0
                                   SourceLocation Loc) {
4029
0
  ASTContext &C = CGM.getContext();
4030
0
  QualType FlagsTy;
4031
0
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4032
0
  RecordDecl *KmpDependInfoRD =
4033
0
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4034
0
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4035
0
  LValue Base = CGF.EmitLoadOfPointerLValue(
4036
0
      DepobjLVal.getAddress(CGF).withElementType(
4037
0
          CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4038
0
      KmpDependInfoPtrTy->castAs<PointerType>());
4039
0
  Address DepObjAddr = CGF.Builder.CreateGEP(
4040
0
      Base.getAddress(CGF),
4041
0
      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4042
0
  LValue NumDepsBase = CGF.MakeAddrLValue(
4043
0
      DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4044
  // NumDeps = deps[i].base_addr;
4045
0
  LValue BaseAddrLVal = CGF.EmitLValueForField(
4046
0
      NumDepsBase,
4047
0
      *std::next(KmpDependInfoRD->field_begin(),
4048
0
                 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4049
0
  llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4050
0
  return std::make_pair(NumDeps, Base);
4051
0
}
4052
4053
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4054
                           llvm::PointerUnion<unsigned *, LValue *> Pos,
4055
                           const OMPTaskDataTy::DependData &Data,
4056
0
                           Address DependenciesArray) {
4057
0
  CodeGenModule &CGM = CGF.CGM;
4058
0
  ASTContext &C = CGM.getContext();
4059
0
  QualType FlagsTy;
4060
0
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4061
0
  RecordDecl *KmpDependInfoRD =
4062
0
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4063
0
  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4064
4065
0
  OMPIteratorGeneratorScope IteratorScope(
4066
0
      CGF, cast_or_null<OMPIteratorExpr>(
4067
0
               Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4068
0
                                 : nullptr));
4069
0
  for (const Expr *E : Data.DepExprs) {
4070
0
    llvm::Value *Addr;
4071
0
    llvm::Value *Size;
4072
4073
    // The expression will be a nullptr in the 'omp_all_memory' case.
4074
0
    if (E) {
4075
0
      std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4076
0
      Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4077
0
    } else {
4078
0
      Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4079
0
      Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4080
0
    }
4081
0
    LValue Base;
4082
0
    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4083
0
      Base = CGF.MakeAddrLValue(
4084
0
          CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4085
0
    } else {
4086
0
      assert(E && "Expected a non-null expression");
4087
0
      LValue &PosLVal = *Pos.get<LValue *>();
4088
0
      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4089
0
      Base = CGF.MakeAddrLValue(
4090
0
          CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4091
0
    }
4092
    // deps[i].base_addr = &<Dependencies[i].second>;
4093
0
    LValue BaseAddrLVal = CGF.EmitLValueForField(
4094
0
        Base,
4095
0
        *std::next(KmpDependInfoRD->field_begin(),
4096
0
                   static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4097
0
    CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4098
    // deps[i].len = sizeof(<Dependencies[i].second>);
4099
0
    LValue LenLVal = CGF.EmitLValueForField(
4100
0
        Base, *std::next(KmpDependInfoRD->field_begin(),
4101
0
                         static_cast<unsigned int>(RTLDependInfoFields::Len)));
4102
0
    CGF.EmitStoreOfScalar(Size, LenLVal);
4103
    // deps[i].flags = <Dependencies[i].first>;
4104
0
    RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4105
0
    LValue FlagsLVal = CGF.EmitLValueForField(
4106
0
        Base,
4107
0
        *std::next(KmpDependInfoRD->field_begin(),
4108
0
                   static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4109
0
    CGF.EmitStoreOfScalar(
4110
0
        llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4111
0
        FlagsLVal);
4112
0
    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4113
0
      ++(*P);
4114
0
    } else {
4115
0
      LValue &PosLVal = *Pos.get<LValue *>();
4116
0
      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4117
0
      Idx = CGF.Builder.CreateNUWAdd(Idx,
4118
0
                                     llvm::ConstantInt::get(Idx->getType(), 1));
4119
0
      CGF.EmitStoreOfScalar(Idx, PosLVal);
4120
0
    }
4121
0
  }
4122
0
}
4123
4124
SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4125
    CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4126
0
    const OMPTaskDataTy::DependData &Data) {
4127
0
  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4128
0
         "Expected depobj dependency kind.");
4129
0
  SmallVector<llvm::Value *, 4> Sizes;
4130
0
  SmallVector<LValue, 4> SizeLVals;
4131
0
  ASTContext &C = CGF.getContext();
4132
0
  {
4133
0
    OMPIteratorGeneratorScope IteratorScope(
4134
0
        CGF, cast_or_null<OMPIteratorExpr>(
4135
0
                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4136
0
                                   : nullptr));
4137
0
    for (const Expr *E : Data.DepExprs) {
4138
0
      llvm::Value *NumDeps;
4139
0
      LValue Base;
4140
0
      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4141
0
      std::tie(NumDeps, Base) =
4142
0
          getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4143
0
      LValue NumLVal = CGF.MakeAddrLValue(
4144
0
          CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4145
0
          C.getUIntPtrType());
4146
0
      CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4147
0
                              NumLVal.getAddress(CGF));
4148
0
      llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4149
0
      llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4150
0
      CGF.EmitStoreOfScalar(Add, NumLVal);
4151
0
      SizeLVals.push_back(NumLVal);
4152
0
    }
4153
0
  }
4154
0
  for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4155
0
    llvm::Value *Size =
4156
0
        CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4157
0
    Sizes.push_back(Size);
4158
0
  }
4159
0
  return Sizes;
4160
0
}
4161
4162
void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4163
                                         QualType &KmpDependInfoTy,
4164
                                         LValue PosLVal,
4165
                                         const OMPTaskDataTy::DependData &Data,
4166
0
                                         Address DependenciesArray) {
4167
0
  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4168
0
         "Expected depobj dependency kind.");
4169
0
  llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4170
0
  {
4171
0
    OMPIteratorGeneratorScope IteratorScope(
4172
0
        CGF, cast_or_null<OMPIteratorExpr>(
4173
0
                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4174
0
                                   : nullptr));
4175
0
    for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4176
0
      const Expr *E = Data.DepExprs[I];
4177
0
      llvm::Value *NumDeps;
4178
0
      LValue Base;
4179
0
      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4180
0
      std::tie(NumDeps, Base) =
4181
0
          getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4182
4183
      // memcopy dependency data.
4184
0
      llvm::Value *Size = CGF.Builder.CreateNUWMul(
4185
0
          ElSize,
4186
0
          CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4187
0
      llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4188
0
      Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4189
0
      CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4190
4191
      // Increase pos.
4192
      // pos += size;
4193
0
      llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4194
0
      CGF.EmitStoreOfScalar(Add, PosLVal);
4195
0
    }
4196
0
  }
4197
0
}
4198
4199
std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4200
    CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4201
0
    SourceLocation Loc) {
4202
0
  if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4203
0
        return D.DepExprs.empty();
4204
0
      }))
4205
0
    return std::make_pair(nullptr, Address::invalid());
4206
  // Process list of dependencies.
4207
0
  ASTContext &C = CGM.getContext();
4208
0
  Address DependenciesArray = Address::invalid();
4209
0
  llvm::Value *NumOfElements = nullptr;
4210
0
  unsigned NumDependencies = std::accumulate(
4211
0
      Dependencies.begin(), Dependencies.end(), 0,
4212
0
      [](unsigned V, const OMPTaskDataTy::DependData &D) {
4213
0
        return D.DepKind == OMPC_DEPEND_depobj
4214
0
                   ? V
4215
0
                   : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4216
0
      });
4217
0
  QualType FlagsTy;
4218
0
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4219
0
  bool HasDepobjDeps = false;
4220
0
  bool HasRegularWithIterators = false;
4221
0
  llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4222
0
  llvm::Value *NumOfRegularWithIterators =
4223
0
      llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4224
  // Calculate number of depobj dependencies and regular deps with the
4225
  // iterators.
4226
0
  for (const OMPTaskDataTy::DependData &D : Dependencies) {
4227
0
    if (D.DepKind == OMPC_DEPEND_depobj) {
4228
0
      SmallVector<llvm::Value *, 4> Sizes =
4229
0
          emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4230
0
      for (llvm::Value *Size : Sizes) {
4231
0
        NumOfDepobjElements =
4232
0
            CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4233
0
      }
4234
0
      HasDepobjDeps = true;
4235
0
      continue;
4236
0
    }
4237
    // Include number of iterations, if any.
4238
4239
0
    if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4240
0
      for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4241
0
        llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4242
0
        Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4243
0
        llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4244
0
            Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4245
0
        NumOfRegularWithIterators =
4246
0
            CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4247
0
      }
4248
0
      HasRegularWithIterators = true;
4249
0
      continue;
4250
0
    }
4251
0
  }
4252
4253
0
  QualType KmpDependInfoArrayTy;
4254
0
  if (HasDepobjDeps || HasRegularWithIterators) {
4255
0
    NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4256
0
                                           /*isSigned=*/false);
4257
0
    if (HasDepobjDeps) {
4258
0
      NumOfElements =
4259
0
          CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4260
0
    }
4261
0
    if (HasRegularWithIterators) {
4262
0
      NumOfElements =
4263
0
          CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4264
0
    }
4265
0
    auto *OVE = new (C) OpaqueValueExpr(
4266
0
        Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4267
0
        VK_PRValue);
4268
0
    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4269
0
                                                  RValue::get(NumOfElements));
4270
0
    KmpDependInfoArrayTy =
4271
0
        C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4272
0
                               /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4273
    // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4274
    // Properly emit variable-sized array.
4275
0
    auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4276
0
                                         ImplicitParamKind::Other);
4277
0
    CGF.EmitVarDecl(*PD);
4278
0
    DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4279
0
    NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4280
0
                                              /*isSigned=*/false);
4281
0
  } else {
4282
0
    KmpDependInfoArrayTy = C.getConstantArrayType(
4283
0
        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4284
0
        ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4285
0
    DependenciesArray =
4286
0
        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4287
0
    DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4288
0
    NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4289
0
                                           /*isSigned=*/false);
4290
0
  }
4291
0
  unsigned Pos = 0;
4292
0
  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4293
0
    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4294
0
        Dependencies[I].IteratorExpr)
4295
0
      continue;
4296
0
    emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4297
0
                   DependenciesArray);
4298
0
  }
4299
  // Copy regular dependencies with iterators.
4300
0
  LValue PosLVal = CGF.MakeAddrLValue(
4301
0
      CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4302
0
  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4303
0
  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4304
0
    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4305
0
        !Dependencies[I].IteratorExpr)
4306
0
      continue;
4307
0
    emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4308
0
                   DependenciesArray);
4309
0
  }
4310
  // Copy final depobj arrays without iterators.
4311
0
  if (HasDepobjDeps) {
4312
0
    for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4313
0
      if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4314
0
        continue;
4315
0
      emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4316
0
                         DependenciesArray);
4317
0
    }
4318
0
  }
4319
0
  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4320
0
      DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4321
0
  return std::make_pair(NumOfElements, DependenciesArray);
4322
0
}
4323
4324
Address CGOpenMPRuntime::emitDepobjDependClause(
4325
    CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4326
0
    SourceLocation Loc) {
4327
0
  if (Dependencies.DepExprs.empty())
4328
0
    return Address::invalid();
4329
  // Process list of dependencies.
4330
0
  ASTContext &C = CGM.getContext();
4331
0
  Address DependenciesArray = Address::invalid();
4332
0
  unsigned NumDependencies = Dependencies.DepExprs.size();
4333
0
  QualType FlagsTy;
4334
0
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4335
0
  RecordDecl *KmpDependInfoRD =
4336
0
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4337
4338
0
  llvm::Value *Size;
4339
  // Define type kmp_depend_info[<Dependencies.size()>];
4340
  // For depobj reserve one extra element to store the number of elements.
4341
  // It is required to handle depobj(x) update(in) construct.
4342
  // kmp_depend_info[<Dependencies.size()>] deps;
4343
0
  llvm::Value *NumDepsVal;
4344
0
  CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4345
0
  if (const auto *IE =
4346
0
          cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4347
0
    NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4348
0
    for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4349
0
      llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4350
0
      Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4351
0
      NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4352
0
    }
4353
0
    Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4354
0
                                    NumDepsVal);
4355
0
    CharUnits SizeInBytes =
4356
0
        C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4357
0
    llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4358
0
    Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4359
0
    NumDepsVal =
4360
0
        CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4361
0
  } else {
4362
0
    QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4363
0
        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4364
0
        nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4365
0
    CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4366
0
    Size = CGM.getSize(Sz.alignTo(Align));
4367
0
    NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4368
0
  }
4369
  // Need to allocate on the dynamic memory.
4370
0
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4371
  // Use default allocator.
4372
0
  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4373
0
  llvm::Value *Args[] = {ThreadID, Size, Allocator};
4374
4375
0
  llvm::Value *Addr =
4376
0
      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4377
0
                              CGM.getModule(), OMPRTL___kmpc_alloc),
4378
0
                          Args, ".dep.arr.addr");
4379
0
  llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4380
0
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4381
0
      Addr, KmpDependInfoLlvmTy->getPointerTo());
4382
0
  DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4383
  // Write number of elements in the first element of array for depobj.
4384
0
  LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4385
  // deps[i].base_addr = NumDependencies;
4386
0
  LValue BaseAddrLVal = CGF.EmitLValueForField(
4387
0
      Base,
4388
0
      *std::next(KmpDependInfoRD->field_begin(),
4389
0
                 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4390
0
  CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4391
0
  llvm::PointerUnion<unsigned *, LValue *> Pos;
4392
0
  unsigned Idx = 1;
4393
0
  LValue PosLVal;
4394
0
  if (Dependencies.IteratorExpr) {
4395
0
    PosLVal = CGF.MakeAddrLValue(
4396
0
        CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4397
0
        C.getSizeType());
4398
0
    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4399
0
                          /*IsInit=*/true);
4400
0
    Pos = &PosLVal;
4401
0
  } else {
4402
0
    Pos = &Idx;
4403
0
  }
4404
0
  emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4405
0
  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4406
0
      CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4407
0
      CGF.Int8Ty);
4408
0
  return DependenciesArray;
4409
0
}
4410
4411
void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4412
0
                                        SourceLocation Loc) {
4413
0
  ASTContext &C = CGM.getContext();
4414
0
  QualType FlagsTy;
4415
0
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4416
0
  LValue Base = CGF.EmitLoadOfPointerLValue(
4417
0
      DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4418
0
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4419
0
  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4420
0
      Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4421
0
      CGF.ConvertTypeForMem(KmpDependInfoTy));
4422
0
  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4423
0
      Addr.getElementType(), Addr.getPointer(),
4424
0
      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4425
0
  DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4426
0
                                                               CGF.VoidPtrTy);
4427
0
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4428
  // Use default allocator.
4429
0
  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4430
0
  llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4431
4432
  // _kmpc_free(gtid, addr, nullptr);
4433
0
  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4434
0
                                CGM.getModule(), OMPRTL___kmpc_free),
4435
0
                            Args);
4436
0
}
4437
4438
void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4439
                                       OpenMPDependClauseKind NewDepKind,
4440
0
                                       SourceLocation Loc) {
4441
0
  ASTContext &C = CGM.getContext();
4442
0
  QualType FlagsTy;
4443
0
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4444
0
  RecordDecl *KmpDependInfoRD =
4445
0
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4446
0
  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4447
0
  llvm::Value *NumDeps;
4448
0
  LValue Base;
4449
0
  std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4450
4451
0
  Address Begin = Base.getAddress(CGF);
4452
  // Cast from pointer to array type to pointer to single element.
4453
0
  llvm::Value *End = CGF.Builder.CreateGEP(
4454
0
      Begin.getElementType(), Begin.getPointer(), NumDeps);
4455
  // The basic structure here is a while-do loop.
4456
0
  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4457
0
  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4458
0
  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4459
0
  CGF.EmitBlock(BodyBB);
4460
0
  llvm::PHINode *ElementPHI =
4461
0
      CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4462
0
  ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4463
0
  Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4464
0
  Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4465
0
                            Base.getTBAAInfo());
4466
  // deps[i].flags = NewDepKind;
4467
0
  RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4468
0
  LValue FlagsLVal = CGF.EmitLValueForField(
4469
0
      Base, *std::next(KmpDependInfoRD->field_begin(),
4470
0
                       static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4471
0
  CGF.EmitStoreOfScalar(
4472
0
      llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4473
0
      FlagsLVal);
4474
4475
  // Shift the address forward by one element.
4476
0
  Address ElementNext =
4477
0
      CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4478
0
  ElementPHI->addIncoming(ElementNext.getPointer(),
4479
0
                          CGF.Builder.GetInsertBlock());
4480
0
  llvm::Value *IsEmpty =
4481
0
      CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4482
0
  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4483
  // Done.
4484
0
  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4485
0
}
4486
4487
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4488
                                   const OMPExecutableDirective &D,
4489
                                   llvm::Function *TaskFunction,
4490
                                   QualType SharedsTy, Address Shareds,
4491
                                   const Expr *IfCond,
4492
0
                                   const OMPTaskDataTy &Data) {
4493
0
  if (!CGF.HaveInsertPoint())
4494
0
    return;
4495
4496
0
  TaskResultTy Result =
4497
0
      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4498
0
  llvm::Value *NewTask = Result.NewTask;
4499
0
  llvm::Function *TaskEntry = Result.TaskEntry;
4500
0
  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4501
0
  LValue TDBase = Result.TDBase;
4502
0
  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4503
  // Process list of dependences.
4504
0
  Address DependenciesArray = Address::invalid();
4505
0
  llvm::Value *NumOfElements;
4506
0
  std::tie(NumOfElements, DependenciesArray) =
4507
0
      emitDependClause(CGF, Data.Dependences, Loc);
4508
4509
  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4510
  // libcall.
4511
  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4512
  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4513
  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4514
  // list is not empty
4515
0
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4516
0
  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4517
0
  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4518
0
  llvm::Value *DepTaskArgs[7];
4519
0
  if (!Data.Dependences.empty()) {
4520
0
    DepTaskArgs[0] = UpLoc;
4521
0
    DepTaskArgs[1] = ThreadID;
4522
0
    DepTaskArgs[2] = NewTask;
4523
0
    DepTaskArgs[3] = NumOfElements;
4524
0
    DepTaskArgs[4] = DependenciesArray.getPointer();
4525
0
    DepTaskArgs[5] = CGF.Builder.getInt32(0);
4526
0
    DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4527
0
  }
4528
0
  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4529
0
                        &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4530
0
    if (!Data.Tied) {
4531
0
      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4532
0
      LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4533
0
      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4534
0
    }
4535
0
    if (!Data.Dependences.empty()) {
4536
0
      CGF.EmitRuntimeCall(
4537
0
          OMPBuilder.getOrCreateRuntimeFunction(
4538
0
              CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4539
0
          DepTaskArgs);
4540
0
    } else {
4541
0
      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4542
0
                              CGM.getModule(), OMPRTL___kmpc_omp_task),
4543
0
                          TaskArgs);
4544
0
    }
4545
    // Check if parent region is untied and build return for untied task;
4546
0
    if (auto *Region =
4547
0
            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4548
0
      Region->emitUntiedSwitch(CGF);
4549
0
  };
4550
4551
0
  llvm::Value *DepWaitTaskArgs[7];
4552
0
  if (!Data.Dependences.empty()) {
4553
0
    DepWaitTaskArgs[0] = UpLoc;
4554
0
    DepWaitTaskArgs[1] = ThreadID;
4555
0
    DepWaitTaskArgs[2] = NumOfElements;
4556
0
    DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4557
0
    DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4558
0
    DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4559
0
    DepWaitTaskArgs[6] =
4560
0
        llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4561
0
  }
4562
0
  auto &M = CGM.getModule();
4563
0
  auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4564
0
                        TaskEntry, &Data, &DepWaitTaskArgs,
4565
0
                        Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4566
0
    CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4567
    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4568
    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4569
    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4570
    // is specified.
4571
0
    if (!Data.Dependences.empty())
4572
0
      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4573
0
                              M, OMPRTL___kmpc_omp_taskwait_deps_51),
4574
0
                          DepWaitTaskArgs);
4575
    // Call proxy_task_entry(gtid, new_task);
4576
0
    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4577
0
                      Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4578
0
      Action.Enter(CGF);
4579
0
      llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4580
0
      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4581
0
                                                          OutlinedFnArgs);
4582
0
    };
4583
4584
    // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4585
    // kmp_task_t *new_task);
4586
    // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4587
    // kmp_task_t *new_task);
4588
0
    RegionCodeGenTy RCG(CodeGen);
4589
0
    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4590
0
                              M, OMPRTL___kmpc_omp_task_begin_if0),
4591
0
                          TaskArgs,
4592
0
                          OMPBuilder.getOrCreateRuntimeFunction(
4593
0
                              M, OMPRTL___kmpc_omp_task_complete_if0),
4594
0
                          TaskArgs);
4595
0
    RCG.setAction(Action);
4596
0
    RCG(CGF);
4597
0
  };
4598
4599
0
  if (IfCond) {
4600
0
    emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4601
0
  } else {
4602
0
    RegionCodeGenTy ThenRCG(ThenCodeGen);
4603
0
    ThenRCG(CGF);
4604
0
  }
4605
0
}
4606
4607
void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4608
                                       const OMPLoopDirective &D,
4609
                                       llvm::Function *TaskFunction,
4610
                                       QualType SharedsTy, Address Shareds,
4611
                                       const Expr *IfCond,
4612
0
                                       const OMPTaskDataTy &Data) {
4613
0
  if (!CGF.HaveInsertPoint())
4614
0
    return;
4615
0
  TaskResultTy Result =
4616
0
      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4617
  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4618
  // libcall.
4619
  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4620
  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4621
  // sched, kmp_uint64 grainsize, void *task_dup);
4622
0
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4623
0
  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4624
0
  llvm::Value *IfVal;
4625
0
  if (IfCond) {
4626
0
    IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4627
0
                                      /*isSigned=*/true);
4628
0
  } else {
4629
0
    IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4630
0
  }
4631
4632
0
  LValue LBLVal = CGF.EmitLValueForField(
4633
0
      Result.TDBase,
4634
0
      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4635
0
  const auto *LBVar =
4636
0
      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4637
0
  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4638
0
                       LBLVal.getQuals(),
4639
0
                       /*IsInitializer=*/true);
4640
0
  LValue UBLVal = CGF.EmitLValueForField(
4641
0
      Result.TDBase,
4642
0
      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4643
0
  const auto *UBVar =
4644
0
      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4645
0
  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4646
0
                       UBLVal.getQuals(),
4647
0
                       /*IsInitializer=*/true);
4648
0
  LValue StLVal = CGF.EmitLValueForField(
4649
0
      Result.TDBase,
4650
0
      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4651
0
  const auto *StVar =
4652
0
      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4653
0
  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4654
0
                       StLVal.getQuals(),
4655
0
                       /*IsInitializer=*/true);
4656
  // Store reductions address.
4657
0
  LValue RedLVal = CGF.EmitLValueForField(
4658
0
      Result.TDBase,
4659
0
      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4660
0
  if (Data.Reductions) {
4661
0
    CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4662
0
  } else {
4663
0
    CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4664
0
                               CGF.getContext().VoidPtrTy);
4665
0
  }
4666
0
  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4667
0
  llvm::Value *TaskArgs[] = {
4668
0
      UpLoc,
4669
0
      ThreadID,
4670
0
      Result.NewTask,
4671
0
      IfVal,
4672
0
      LBLVal.getPointer(CGF),
4673
0
      UBLVal.getPointer(CGF),
4674
0
      CGF.EmitLoadOfScalar(StLVal, Loc),
4675
0
      llvm::ConstantInt::getSigned(
4676
0
          CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4677
0
      llvm::ConstantInt::getSigned(
4678
0
          CGF.IntTy, Data.Schedule.getPointer()
4679
0
                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
4680
0
                         : NoSchedule),
4681
0
      Data.Schedule.getPointer()
4682
0
          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4683
0
                                      /*isSigned=*/false)
4684
0
          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4685
0
      Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4686
0
                             Result.TaskDupFn, CGF.VoidPtrTy)
4687
0
                       : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4688
0
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4689
0
                          CGM.getModule(), OMPRTL___kmpc_taskloop),
4690
0
                      TaskArgs);
4691
0
}
4692
4693
/// Emit reduction operation for each element of array (required for
4694
/// array sections) LHS op = RHS.
4695
/// \param Type Type of array.
4696
/// \param LHSVar Variable on the left side of the reduction operation
4697
/// (references element of array in original variable).
4698
/// \param RHSVar Variable on the right side of the reduction operation
4699
/// (references element of array in original variable).
4700
/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4701
/// RHSVar.
4702
static void EmitOMPAggregateReduction(
4703
    CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4704
    const VarDecl *RHSVar,
4705
    const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4706
                                  const Expr *, const Expr *)> &RedOpGen,
4707
    const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4708
0
    const Expr *UpExpr = nullptr) {
4709
  // Perform element-by-element initialization.
4710
0
  QualType ElementTy;
4711
0
  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4712
0
  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4713
4714
  // Drill down to the base element type on both arrays.
4715
0
  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4716
0
  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4717
4718
0
  llvm::Value *RHSBegin = RHSAddr.getPointer();
4719
0
  llvm::Value *LHSBegin = LHSAddr.getPointer();
4720
  // Cast from pointer to array type to pointer to single element.
4721
0
  llvm::Value *LHSEnd =
4722
0
      CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4723
  // The basic structure here is a while-do loop.
4724
0
  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4725
0
  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4726
0
  llvm::Value *IsEmpty =
4727
0
      CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4728
0
  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4729
4730
  // Enter the loop body, making that address the current address.
4731
0
  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4732
0
  CGF.EmitBlock(BodyBB);
4733
4734
0
  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4735
4736
0
  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4737
0
      RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4738
0
  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4739
0
  Address RHSElementCurrent(
4740
0
      RHSElementPHI, RHSAddr.getElementType(),
4741
0
      RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4742
4743
0
  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4744
0
      LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4745
0
  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4746
0
  Address LHSElementCurrent(
4747
0
      LHSElementPHI, LHSAddr.getElementType(),
4748
0
      LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4749
4750
  // Emit copy.
4751
0
  CodeGenFunction::OMPPrivateScope Scope(CGF);
4752
0
  Scope.addPrivate(LHSVar, LHSElementCurrent);
4753
0
  Scope.addPrivate(RHSVar, RHSElementCurrent);
4754
0
  Scope.Privatize();
4755
0
  RedOpGen(CGF, XExpr, EExpr, UpExpr);
4756
0
  Scope.ForceCleanup();
4757
4758
  // Shift the address forward by one element.
4759
0
  llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4760
0
      LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4761
0
      "omp.arraycpy.dest.element");
4762
0
  llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4763
0
      RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4764
0
      "omp.arraycpy.src.element");
4765
  // Check whether we've reached the end.
4766
0
  llvm::Value *Done =
4767
0
      CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4768
0
  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4769
0
  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4770
0
  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4771
4772
  // Done.
4773
0
  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4774
0
}
4775
4776
/// Emit reduction combiner. If the combiner is a simple expression emit it as
4777
/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4778
/// UDR combiner function.
4779
static void emitReductionCombiner(CodeGenFunction &CGF,
4780
0
                                  const Expr *ReductionOp) {
4781
0
  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4782
0
    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4783
0
      if (const auto *DRE =
4784
0
              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4785
0
        if (const auto *DRD =
4786
0
                dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4787
0
          std::pair<llvm::Function *, llvm::Function *> Reduction =
4788
0
              CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4789
0
          RValue Func = RValue::get(Reduction.first);
4790
0
          CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4791
0
          CGF.EmitIgnoredExpr(ReductionOp);
4792
0
          return;
4793
0
        }
4794
0
  CGF.EmitIgnoredExpr(ReductionOp);
4795
0
}
4796
4797
llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4798
    StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4799
    ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4800
0
    ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4801
0
  ASTContext &C = CGM.getContext();
4802
4803
  // void reduction_func(void *LHSArg, void *RHSArg);
4804
0
  FunctionArgList Args;
4805
0
  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4806
0
                           ImplicitParamKind::Other);
4807
0
  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4808
0
                           ImplicitParamKind::Other);
4809
0
  Args.push_back(&LHSArg);
4810
0
  Args.push_back(&RHSArg);
4811
0
  const auto &CGFI =
4812
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4813
0
  std::string Name = getReductionFuncName(ReducerName);
4814
0
  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4815
0
                                    llvm::GlobalValue::InternalLinkage, Name,
4816
0
                                    &CGM.getModule());
4817
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4818
0
  Fn->setDoesNotRecurse();
4819
0
  CodeGenFunction CGF(CGM);
4820
0
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4821
4822
  // Dst = (void*[n])(LHSArg);
4823
  // Src = (void*[n])(RHSArg);
4824
0
  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4825
0
                  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4826
0
                  ArgsElemType->getPointerTo()),
4827
0
              ArgsElemType, CGF.getPointerAlign());
4828
0
  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4829
0
                  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4830
0
                  ArgsElemType->getPointerTo()),
4831
0
              ArgsElemType, CGF.getPointerAlign());
4832
4833
  //  ...
4834
  //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4835
  //  ...
4836
0
  CodeGenFunction::OMPPrivateScope Scope(CGF);
4837
0
  const auto *IPriv = Privates.begin();
4838
0
  unsigned Idx = 0;
4839
0
  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4840
0
    const auto *RHSVar =
4841
0
        cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4842
0
    Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4843
0
    const auto *LHSVar =
4844
0
        cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4845
0
    Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4846
0
    QualType PrivTy = (*IPriv)->getType();
4847
0
    if (PrivTy->isVariablyModifiedType()) {
4848
      // Get array size and emit VLA type.
4849
0
      ++Idx;
4850
0
      Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4851
0
      llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4852
0
      const VariableArrayType *VLA =
4853
0
          CGF.getContext().getAsVariableArrayType(PrivTy);
4854
0
      const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4855
0
      CodeGenFunction::OpaqueValueMapping OpaqueMap(
4856
0
          CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4857
0
      CGF.EmitVariablyModifiedType(PrivTy);
4858
0
    }
4859
0
  }
4860
0
  Scope.Privatize();
4861
0
  IPriv = Privates.begin();
4862
0
  const auto *ILHS = LHSExprs.begin();
4863
0
  const auto *IRHS = RHSExprs.begin();
4864
0
  for (const Expr *E : ReductionOps) {
4865
0
    if ((*IPriv)->getType()->isArrayType()) {
4866
      // Emit reduction for array section.
4867
0
      const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4868
0
      const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4869
0
      EmitOMPAggregateReduction(
4870
0
          CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4871
0
          [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4872
0
            emitReductionCombiner(CGF, E);
4873
0
          });
4874
0
    } else {
4875
      // Emit reduction for array subscript or single variable.
4876
0
      emitReductionCombiner(CGF, E);
4877
0
    }
4878
0
    ++IPriv;
4879
0
    ++ILHS;
4880
0
    ++IRHS;
4881
0
  }
4882
0
  Scope.ForceCleanup();
4883
0
  CGF.FinishFunction();
4884
0
  return Fn;
4885
0
}
4886
4887
void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4888
                                                  const Expr *ReductionOp,
4889
                                                  const Expr *PrivateRef,
4890
                                                  const DeclRefExpr *LHS,
4891
0
                                                  const DeclRefExpr *RHS) {
4892
0
  if (PrivateRef->getType()->isArrayType()) {
4893
    // Emit reduction for array section.
4894
0
    const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4895
0
    const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4896
0
    EmitOMPAggregateReduction(
4897
0
        CGF, PrivateRef->getType(), LHSVar, RHSVar,
4898
0
        [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4899
0
          emitReductionCombiner(CGF, ReductionOp);
4900
0
        });
4901
0
  } else {
4902
    // Emit reduction for array subscript or single variable.
4903
0
    emitReductionCombiner(CGF, ReductionOp);
4904
0
  }
4905
0
}
4906
4907
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4908
                                    ArrayRef<const Expr *> Privates,
4909
                                    ArrayRef<const Expr *> LHSExprs,
4910
                                    ArrayRef<const Expr *> RHSExprs,
4911
                                    ArrayRef<const Expr *> ReductionOps,
4912
0
                                    ReductionOptionsTy Options) {
4913
0
  if (!CGF.HaveInsertPoint())
4914
0
    return;
4915
4916
0
  bool WithNowait = Options.WithNowait;
4917
0
  bool SimpleReduction = Options.SimpleReduction;
4918
4919
  // Next code should be emitted for reduction:
4920
  //
4921
  // static kmp_critical_name lock = { 0 };
4922
  //
4923
  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4924
  //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4925
  //  ...
4926
  //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4927
  //  *(Type<n>-1*)rhs[<n>-1]);
4928
  // }
4929
  //
4930
  // ...
4931
  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4932
  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4933
  // RedList, reduce_func, &<lock>)) {
4934
  // case 1:
4935
  //  ...
4936
  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4937
  //  ...
4938
  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4939
  // break;
4940
  // case 2:
4941
  //  ...
4942
  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4943
  //  ...
4944
  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4945
  // break;
4946
  // default:;
4947
  // }
4948
  //
4949
  // if SimpleReduction is true, only the next code is generated:
4950
  //  ...
4951
  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4952
  //  ...
4953
4954
0
  ASTContext &C = CGM.getContext();
4955
4956
0
  if (SimpleReduction) {
4957
0
    CodeGenFunction::RunCleanupsScope Scope(CGF);
4958
0
    const auto *IPriv = Privates.begin();
4959
0
    const auto *ILHS = LHSExprs.begin();
4960
0
    const auto *IRHS = RHSExprs.begin();
4961
0
    for (const Expr *E : ReductionOps) {
4962
0
      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4963
0
                                  cast<DeclRefExpr>(*IRHS));
4964
0
      ++IPriv;
4965
0
      ++ILHS;
4966
0
      ++IRHS;
4967
0
    }
4968
0
    return;
4969
0
  }
4970
4971
  // 1. Build a list of reduction variables.
4972
  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4973
0
  auto Size = RHSExprs.size();
4974
0
  for (const Expr *E : Privates) {
4975
0
    if (E->getType()->isVariablyModifiedType())
4976
      // Reserve place for array size.
4977
0
      ++Size;
4978
0
  }
4979
0
  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4980
0
  QualType ReductionArrayTy = C.getConstantArrayType(
4981
0
      C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4982
0
      /*IndexTypeQuals=*/0);
4983
0
  Address ReductionList =
4984
0
      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4985
0
  const auto *IPriv = Privates.begin();
4986
0
  unsigned Idx = 0;
4987
0
  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4988
0
    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4989
0
    CGF.Builder.CreateStore(
4990
0
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4991
0
            CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
4992
0
        Elem);
4993
0
    if ((*IPriv)->getType()->isVariablyModifiedType()) {
4994
      // Store array size.
4995
0
      ++Idx;
4996
0
      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4997
0
      llvm::Value *Size = CGF.Builder.CreateIntCast(
4998
0
          CGF.getVLASize(
4999
0
                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5000
0
              .NumElts,
5001
0
          CGF.SizeTy, /*isSigned=*/false);
5002
0
      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5003
0
                              Elem);
5004
0
    }
5005
0
  }
5006
5007
  // 2. Emit reduce_func().
5008
0
  llvm::Function *ReductionFn = emitReductionFunction(
5009
0
      CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5010
0
      Privates, LHSExprs, RHSExprs, ReductionOps);
5011
5012
  // 3. Create static kmp_critical_name lock = { 0 };
5013
0
  std::string Name = getName({"reduction"});
5014
0
  llvm::Value *Lock = getCriticalRegionLock(Name);
5015
5016
  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5017
  // RedList, reduce_func, &<lock>);
5018
0
  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5019
0
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
5020
0
  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5021
0
  llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5022
0
      ReductionList.getPointer(), CGF.VoidPtrTy);
5023
0
  llvm::Value *Args[] = {
5024
0
      IdentTLoc,                             // ident_t *<loc>
5025
0
      ThreadId,                              // i32 <gtid>
5026
0
      CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5027
0
      ReductionArrayTySize,                  // size_type sizeof(RedList)
5028
0
      RL,                                    // void *RedList
5029
0
      ReductionFn, // void (*) (void *, void *) <reduce_func>
5030
0
      Lock         // kmp_critical_name *&<lock>
5031
0
  };
5032
0
  llvm::Value *Res = CGF.EmitRuntimeCall(
5033
0
      OMPBuilder.getOrCreateRuntimeFunction(
5034
0
          CGM.getModule(),
5035
0
          WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5036
0
      Args);
5037
5038
  // 5. Build switch(res)
5039
0
  llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5040
0
  llvm::SwitchInst *SwInst =
5041
0
      CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5042
5043
  // 6. Build case 1:
5044
  //  ...
5045
  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5046
  //  ...
5047
  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5048
  // break;
5049
0
  llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5050
0
  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5051
0
  CGF.EmitBlock(Case1BB);
5052
5053
  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5054
0
  llvm::Value *EndArgs[] = {
5055
0
      IdentTLoc, // ident_t *<loc>
5056
0
      ThreadId,  // i32 <gtid>
5057
0
      Lock       // kmp_critical_name *&<lock>
5058
0
  };
5059
0
  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5060
0
                       CodeGenFunction &CGF, PrePostActionTy &Action) {
5061
0
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5062
0
    const auto *IPriv = Privates.begin();
5063
0
    const auto *ILHS = LHSExprs.begin();
5064
0
    const auto *IRHS = RHSExprs.begin();
5065
0
    for (const Expr *E : ReductionOps) {
5066
0
      RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5067
0
                                     cast<DeclRefExpr>(*IRHS));
5068
0
      ++IPriv;
5069
0
      ++ILHS;
5070
0
      ++IRHS;
5071
0
    }
5072
0
  };
5073
0
  RegionCodeGenTy RCG(CodeGen);
5074
0
  CommonActionTy Action(
5075
0
      nullptr, std::nullopt,
5076
0
      OMPBuilder.getOrCreateRuntimeFunction(
5077
0
          CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5078
0
                                      : OMPRTL___kmpc_end_reduce),
5079
0
      EndArgs);
5080
0
  RCG.setAction(Action);
5081
0
  RCG(CGF);
5082
5083
0
  CGF.EmitBranch(DefaultBB);
5084
5085
  // 7. Build case 2:
5086
  //  ...
5087
  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5088
  //  ...
5089
  // break;
5090
0
  llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5091
0
  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5092
0
  CGF.EmitBlock(Case2BB);
5093
5094
0
  auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5095
0
                             CodeGenFunction &CGF, PrePostActionTy &Action) {
5096
0
    const auto *ILHS = LHSExprs.begin();
5097
0
    const auto *IRHS = RHSExprs.begin();
5098
0
    const auto *IPriv = Privates.begin();
5099
0
    for (const Expr *E : ReductionOps) {
5100
0
      const Expr *XExpr = nullptr;
5101
0
      const Expr *EExpr = nullptr;
5102
0
      const Expr *UpExpr = nullptr;
5103
0
      BinaryOperatorKind BO = BO_Comma;
5104
0
      if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5105
0
        if (BO->getOpcode() == BO_Assign) {
5106
0
          XExpr = BO->getLHS();
5107
0
          UpExpr = BO->getRHS();
5108
0
        }
5109
0
      }
5110
      // Try to emit update expression as a simple atomic.
5111
0
      const Expr *RHSExpr = UpExpr;
5112
0
      if (RHSExpr) {
5113
        // Analyze RHS part of the whole expression.
5114
0
        if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5115
0
                RHSExpr->IgnoreParenImpCasts())) {
5116
          // If this is a conditional operator, analyze its condition for
5117
          // min/max reduction operator.
5118
0
          RHSExpr = ACO->getCond();
5119
0
        }
5120
0
        if (const auto *BORHS =
5121
0
                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5122
0
          EExpr = BORHS->getRHS();
5123
0
          BO = BORHS->getOpcode();
5124
0
        }
5125
0
      }
5126
0
      if (XExpr) {
5127
0
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5128
0
        auto &&AtomicRedGen = [BO, VD,
5129
0
                               Loc](CodeGenFunction &CGF, const Expr *XExpr,
5130
0
                                    const Expr *EExpr, const Expr *UpExpr) {
5131
0
          LValue X = CGF.EmitLValue(XExpr);
5132
0
          RValue E;
5133
0
          if (EExpr)
5134
0
            E = CGF.EmitAnyExpr(EExpr);
5135
0
          CGF.EmitOMPAtomicSimpleUpdateExpr(
5136
0
              X, E, BO, /*IsXLHSInRHSPart=*/true,
5137
0
              llvm::AtomicOrdering::Monotonic, Loc,
5138
0
              [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5139
0
                CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5140
0
                Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5141
0
                CGF.emitOMPSimpleStore(
5142
0
                    CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5143
0
                    VD->getType().getNonReferenceType(), Loc);
5144
0
                PrivateScope.addPrivate(VD, LHSTemp);
5145
0
                (void)PrivateScope.Privatize();
5146
0
                return CGF.EmitAnyExpr(UpExpr);
5147
0
              });
5148
0
        };
5149
0
        if ((*IPriv)->getType()->isArrayType()) {
5150
          // Emit atomic reduction for array section.
5151
0
          const auto *RHSVar =
5152
0
              cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5153
0
          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5154
0
                                    AtomicRedGen, XExpr, EExpr, UpExpr);
5155
0
        } else {
5156
          // Emit atomic reduction for array subscript or single variable.
5157
0
          AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5158
0
        }
5159
0
      } else {
5160
        // Emit as a critical region.
5161
0
        auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5162
0
                                           const Expr *, const Expr *) {
5163
0
          CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5164
0
          std::string Name = RT.getName({"atomic_reduction"});
5165
0
          RT.emitCriticalRegion(
5166
0
              CGF, Name,
5167
0
              [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5168
0
                Action.Enter(CGF);
5169
0
                emitReductionCombiner(CGF, E);
5170
0
              },
5171
0
              Loc);
5172
0
        };
5173
0
        if ((*IPriv)->getType()->isArrayType()) {
5174
0
          const auto *LHSVar =
5175
0
              cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5176
0
          const auto *RHSVar =
5177
0
              cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5178
0
          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5179
0
                                    CritRedGen);
5180
0
        } else {
5181
0
          CritRedGen(CGF, nullptr, nullptr, nullptr);
5182
0
        }
5183
0
      }
5184
0
      ++ILHS;
5185
0
      ++IRHS;
5186
0
      ++IPriv;
5187
0
    }
5188
0
  };
5189
0
  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5190
0
  if (!WithNowait) {
5191
    // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5192
0
    llvm::Value *EndArgs[] = {
5193
0
        IdentTLoc, // ident_t *<loc>
5194
0
        ThreadId,  // i32 <gtid>
5195
0
        Lock       // kmp_critical_name *&<lock>
5196
0
    };
5197
0
    CommonActionTy Action(nullptr, std::nullopt,
5198
0
                          OMPBuilder.getOrCreateRuntimeFunction(
5199
0
                              CGM.getModule(), OMPRTL___kmpc_end_reduce),
5200
0
                          EndArgs);
5201
0
    AtomicRCG.setAction(Action);
5202
0
    AtomicRCG(CGF);
5203
0
  } else {
5204
0
    AtomicRCG(CGF);
5205
0
  }
5206
5207
0
  CGF.EmitBranch(DefaultBB);
5208
0
  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5209
0
}
5210
5211
/// Generates unique name for artificial threadprivate variables.
5212
/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5213
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5214
0
                                      const Expr *Ref) {
5215
0
  SmallString<256> Buffer;
5216
0
  llvm::raw_svector_ostream Out(Buffer);
5217
0
  const clang::DeclRefExpr *DE;
5218
0
  const VarDecl *D = ::getBaseDecl(Ref, DE);
5219
0
  if (!D)
5220
0
    D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5221
0
  D = D->getCanonicalDecl();
5222
0
  std::string Name = CGM.getOpenMPRuntime().getName(
5223
0
      {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5224
0
  Out << Prefix << Name << "_"
5225
0
      << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5226
0
  return std::string(Out.str());
5227
0
}
5228
5229
/// Emits reduction initializer function:
5230
/// \code
5231
/// void @.red_init(void* %arg, void* %orig) {
5232
/// %0 = bitcast void* %arg to <type>*
5233
/// store <type> <init>, <type>* %0
5234
/// ret void
5235
/// }
5236
/// \endcode
5237
static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5238
                                           SourceLocation Loc,
5239
0
                                           ReductionCodeGen &RCG, unsigned N) {
5240
0
  ASTContext &C = CGM.getContext();
5241
0
  QualType VoidPtrTy = C.VoidPtrTy;
5242
0
  VoidPtrTy.addRestrict();
5243
0
  FunctionArgList Args;
5244
0
  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5245
0
                          ImplicitParamKind::Other);
5246
0
  ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5247
0
                              ImplicitParamKind::Other);
5248
0
  Args.emplace_back(&Param);
5249
0
  Args.emplace_back(&ParamOrig);
5250
0
  const auto &FnInfo =
5251
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5252
0
  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5253
0
  std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5254
0
  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5255
0
                                    Name, &CGM.getModule());
5256
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5257
0
  Fn->setDoesNotRecurse();
5258
0
  CodeGenFunction CGF(CGM);
5259
0
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5260
0
  QualType PrivateType = RCG.getPrivateType(N);
5261
0
  Address PrivateAddr = CGF.EmitLoadOfPointer(
5262
0
      CGF.GetAddrOfLocalVar(&Param).withElementType(
5263
0
          CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5264
0
      C.getPointerType(PrivateType)->castAs<PointerType>());
5265
0
  llvm::Value *Size = nullptr;
5266
  // If the size of the reduction item is non-constant, load it from global
5267
  // threadprivate variable.
5268
0
  if (RCG.getSizes(N).second) {
5269
0
    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5270
0
        CGF, CGM.getContext().getSizeType(),
5271
0
        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5272
0
    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5273
0
                                CGM.getContext().getSizeType(), Loc);
5274
0
  }
5275
0
  RCG.emitAggregateType(CGF, N, Size);
5276
0
  Address OrigAddr = Address::invalid();
5277
  // If initializer uses initializer from declare reduction construct, emit a
5278
  // pointer to the address of the original reduction item (reuired by reduction
5279
  // initializer)
5280
0
  if (RCG.usesReductionInitializer(N)) {
5281
0
    Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5282
0
    OrigAddr = CGF.EmitLoadOfPointer(
5283
0
        SharedAddr,
5284
0
        CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5285
0
  }
5286
  // Emit the initializer:
5287
  // %0 = bitcast void* %arg to <type>*
5288
  // store <type> <init>, <type>* %0
5289
0
  RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5290
0
                         [](CodeGenFunction &) { return false; });
5291
0
  CGF.FinishFunction();
5292
0
  return Fn;
5293
0
}
5294
5295
/// Emits reduction combiner function:
5296
/// \code
5297
/// void @.red_comb(void* %arg0, void* %arg1) {
5298
/// %lhs = bitcast void* %arg0 to <type>*
5299
/// %rhs = bitcast void* %arg1 to <type>*
5300
/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5301
/// store <type> %2, <type>* %lhs
5302
/// ret void
5303
/// }
5304
/// \endcode
5305
static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5306
                                           SourceLocation Loc,
5307
                                           ReductionCodeGen &RCG, unsigned N,
5308
                                           const Expr *ReductionOp,
5309
                                           const Expr *LHS, const Expr *RHS,
5310
0
                                           const Expr *PrivateRef) {
5311
0
  ASTContext &C = CGM.getContext();
5312
0
  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5313
0
  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5314
0
  FunctionArgList Args;
5315
0
  ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5316
0
                               C.VoidPtrTy, ImplicitParamKind::Other);
5317
0
  ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5318
0
                            ImplicitParamKind::Other);
5319
0
  Args.emplace_back(&ParamInOut);
5320
0
  Args.emplace_back(&ParamIn);
5321
0
  const auto &FnInfo =
5322
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5323
0
  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5324
0
  std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5325
0
  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5326
0
                                    Name, &CGM.getModule());
5327
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5328
0
  Fn->setDoesNotRecurse();
5329
0
  CodeGenFunction CGF(CGM);
5330
0
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5331
0
  llvm::Value *Size = nullptr;
5332
  // If the size of the reduction item is non-constant, load it from global
5333
  // threadprivate variable.
5334
0
  if (RCG.getSizes(N).second) {
5335
0
    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5336
0
        CGF, CGM.getContext().getSizeType(),
5337
0
        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5338
0
    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5339
0
                                CGM.getContext().getSizeType(), Loc);
5340
0
  }
5341
0
  RCG.emitAggregateType(CGF, N, Size);
5342
  // Remap lhs and rhs variables to the addresses of the function arguments.
5343
  // %lhs = bitcast void* %arg0 to <type>*
5344
  // %rhs = bitcast void* %arg1 to <type>*
5345
0
  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5346
0
  PrivateScope.addPrivate(
5347
0
      LHSVD,
5348
      // Pull out the pointer to the variable.
5349
0
      CGF.EmitLoadOfPointer(
5350
0
          CGF.GetAddrOfLocalVar(&ParamInOut)
5351
0
              .withElementType(
5352
0
                  CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5353
0
          C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5354
0
  PrivateScope.addPrivate(
5355
0
      RHSVD,
5356
      // Pull out the pointer to the variable.
5357
0
      CGF.EmitLoadOfPointer(
5358
0
          CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5359
0
              CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5360
0
          C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5361
0
  PrivateScope.Privatize();
5362
  // Emit the combiner body:
5363
  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5364
  // store <type> %2, <type>* %lhs
5365
0
  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5366
0
      CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5367
0
      cast<DeclRefExpr>(RHS));
5368
0
  CGF.FinishFunction();
5369
0
  return Fn;
5370
0
}
5371
5372
/// Emits reduction finalizer function:
5373
/// \code
5374
/// void @.red_fini(void* %arg) {
5375
/// %0 = bitcast void* %arg to <type>*
5376
/// <destroy>(<type>* %0)
5377
/// ret void
5378
/// }
5379
/// \endcode
5380
static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5381
                                           SourceLocation Loc,
5382
0
                                           ReductionCodeGen &RCG, unsigned N) {
5383
0
  if (!RCG.needCleanups(N))
5384
0
    return nullptr;
5385
0
  ASTContext &C = CGM.getContext();
5386
0
  FunctionArgList Args;
5387
0
  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5388
0
                          ImplicitParamKind::Other);
5389
0
  Args.emplace_back(&Param);
5390
0
  const auto &FnInfo =
5391
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5392
0
  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5393
0
  std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5394
0
  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5395
0
                                    Name, &CGM.getModule());
5396
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5397
0
  Fn->setDoesNotRecurse();
5398
0
  CodeGenFunction CGF(CGM);
5399
0
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5400
0
  Address PrivateAddr = CGF.EmitLoadOfPointer(
5401
0
      CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5402
0
  llvm::Value *Size = nullptr;
5403
  // If the size of the reduction item is non-constant, load it from global
5404
  // threadprivate variable.
5405
0
  if (RCG.getSizes(N).second) {
5406
0
    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5407
0
        CGF, CGM.getContext().getSizeType(),
5408
0
        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5409
0
    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5410
0
                                CGM.getContext().getSizeType(), Loc);
5411
0
  }
5412
0
  RCG.emitAggregateType(CGF, N, Size);
5413
  // Emit the finalizer body:
5414
  // <destroy>(<type>* %0)
5415
0
  RCG.emitCleanups(CGF, N, PrivateAddr);
5416
0
  CGF.FinishFunction(Loc);
5417
0
  return Fn;
5418
0
}
5419
5420
llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5421
    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5422
0
    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5423
0
  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5424
0
    return nullptr;
5425
5426
  // Build typedef struct:
5427
  // kmp_taskred_input {
5428
  //   void *reduce_shar; // shared reduction item
5429
  //   void *reduce_orig; // original reduction item used for initialization
5430
  //   size_t reduce_size; // size of data item
5431
  //   void *reduce_init; // data initialization routine
5432
  //   void *reduce_fini; // data finalization routine
5433
  //   void *reduce_comb; // data combiner routine
5434
  //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5435
  // } kmp_taskred_input_t;
5436
0
  ASTContext &C = CGM.getContext();
5437
0
  RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5438
0
  RD->startDefinition();
5439
0
  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5440
0
  const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5441
0
  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5442
0
  const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5443
0
  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5444
0
  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5445
0
  const FieldDecl *FlagsFD = addFieldToRecordDecl(
5446
0
      C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5447
0
  RD->completeDefinition();
5448
0
  QualType RDType = C.getRecordType(RD);
5449
0
  unsigned Size = Data.ReductionVars.size();
5450
0
  llvm::APInt ArraySize(/*numBits=*/64, Size);
5451
0
  QualType ArrayRDType =
5452
0
      C.getConstantArrayType(RDType, ArraySize, nullptr,
5453
0
                             ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5454
  // kmp_task_red_input_t .rd_input.[Size];
5455
0
  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5456
0
  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5457
0
                       Data.ReductionCopies, Data.ReductionOps);
5458
0
  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5459
    // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5460
0
    llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5461
0
                           llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5462
0
    llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5463
0
        TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5464
0
        /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5465
0
        ".rd_input.gep.");
5466
0
    LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5467
    // ElemLVal.reduce_shar = &Shareds[Cnt];
5468
0
    LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5469
0
    RCG.emitSharedOrigLValue(CGF, Cnt);
5470
0
    llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5471
0
    CGF.EmitStoreOfScalar(Shared, SharedLVal);
5472
    // ElemLVal.reduce_orig = &Origs[Cnt];
5473
0
    LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5474
0
    llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5475
0
    CGF.EmitStoreOfScalar(Orig, OrigLVal);
5476
0
    RCG.emitAggregateType(CGF, Cnt);
5477
0
    llvm::Value *SizeValInChars;
5478
0
    llvm::Value *SizeVal;
5479
0
    std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5480
    // We use delayed creation/initialization for VLAs and array sections. It is
5481
    // required because runtime does not provide the way to pass the sizes of
5482
    // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5483
    // threadprivate global variables are used to store these values and use
5484
    // them in the functions.
5485
0
    bool DelayedCreation = !!SizeVal;
5486
0
    SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5487
0
                                               /*isSigned=*/false);
5488
0
    LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5489
0
    CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5490
    // ElemLVal.reduce_init = init;
5491
0
    LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5492
0
    llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5493
0
    CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5494
    // ElemLVal.reduce_fini = fini;
5495
0
    LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5496
0
    llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5497
0
    llvm::Value *FiniAddr =
5498
0
        Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5499
0
    CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5500
    // ElemLVal.reduce_comb = comb;
5501
0
    LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5502
0
    llvm::Value *CombAddr = emitReduceCombFunction(
5503
0
        CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5504
0
        RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5505
0
    CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5506
    // ElemLVal.flags = 0;
5507
0
    LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5508
0
    if (DelayedCreation) {
5509
0
      CGF.EmitStoreOfScalar(
5510
0
          llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5511
0
          FlagsLVal);
5512
0
    } else
5513
0
      CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5514
0
                                 FlagsLVal.getType());
5515
0
  }
5516
0
  if (Data.IsReductionWithTaskMod) {
5517
    // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5518
    // is_ws, int num, void *data);
5519
0
    llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5520
0
    llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5521
0
                                                  CGM.IntTy, /*isSigned=*/true);
5522
0
    llvm::Value *Args[] = {
5523
0
        IdentTLoc, GTid,
5524
0
        llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5525
0
                               /*isSigned=*/true),
5526
0
        llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5527
0
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5528
0
            TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5529
0
    return CGF.EmitRuntimeCall(
5530
0
        OMPBuilder.getOrCreateRuntimeFunction(
5531
0
            CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5532
0
        Args);
5533
0
  }
5534
  // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5535
0
  llvm::Value *Args[] = {
5536
0
      CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5537
0
                                /*isSigned=*/true),
5538
0
      llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5539
0
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5540
0
                                                      CGM.VoidPtrTy)};
5541
0
  return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5542
0
                                 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5543
0
                             Args);
5544
0
}
5545
5546
void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5547
                                            SourceLocation Loc,
5548
0
                                            bool IsWorksharingReduction) {
5549
  // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5550
  // is_ws, int num, void *data);
5551
0
  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5552
0
  llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5553
0
                                                CGM.IntTy, /*isSigned=*/true);
5554
0
  llvm::Value *Args[] = {IdentTLoc, GTid,
5555
0
                         llvm::ConstantInt::get(CGM.IntTy,
5556
0
                                                IsWorksharingReduction ? 1 : 0,
5557
0
                                                /*isSigned=*/true)};
5558
0
  (void)CGF.EmitRuntimeCall(
5559
0
      OMPBuilder.getOrCreateRuntimeFunction(
5560
0
          CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5561
0
      Args);
5562
0
}
5563
5564
void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5565
                                              SourceLocation Loc,
5566
                                              ReductionCodeGen &RCG,
5567
0
                                              unsigned N) {
5568
0
  auto Sizes = RCG.getSizes(N);
5569
  // Emit threadprivate global variable if the type is non-constant
5570
  // (Sizes.second = nullptr).
5571
0
  if (Sizes.second) {
5572
0
    llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5573
0
                                                     /*isSigned=*/false);
5574
0
    Address SizeAddr = getAddrOfArtificialThreadPrivate(
5575
0
        CGF, CGM.getContext().getSizeType(),
5576
0
        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5577
0
    CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5578
0
  }
5579
0
}
5580
5581
Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5582
                                              SourceLocation Loc,
5583
                                              llvm::Value *ReductionsPtr,
5584
0
                                              LValue SharedLVal) {
5585
  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5586
  // *d);
5587
0
  llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5588
0
                                                   CGM.IntTy,
5589
0
                                                   /*isSigned=*/true),
5590
0
                         ReductionsPtr,
5591
0
                         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5592
0
                             SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5593
0
  return Address(
5594
0
      CGF.EmitRuntimeCall(
5595
0
          OMPBuilder.getOrCreateRuntimeFunction(
5596
0
              CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5597
0
          Args),
5598
0
      CGF.Int8Ty, SharedLVal.getAlignment());
5599
0
}
5600
5601
void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5602
0
                                       const OMPTaskDataTy &Data) {
5603
0
  if (!CGF.HaveInsertPoint())
5604
0
    return;
5605
5606
0
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5607
    // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5608
0
    OMPBuilder.createTaskwait(CGF.Builder);
5609
0
  } else {
5610
0
    llvm::Value *ThreadID = getThreadID(CGF, Loc);
5611
0
    llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5612
0
    auto &M = CGM.getModule();
5613
0
    Address DependenciesArray = Address::invalid();
5614
0
    llvm::Value *NumOfElements;
5615
0
    std::tie(NumOfElements, DependenciesArray) =
5616
0
        emitDependClause(CGF, Data.Dependences, Loc);
5617
0
    if (!Data.Dependences.empty()) {
5618
0
      llvm::Value *DepWaitTaskArgs[7];
5619
0
      DepWaitTaskArgs[0] = UpLoc;
5620
0
      DepWaitTaskArgs[1] = ThreadID;
5621
0
      DepWaitTaskArgs[2] = NumOfElements;
5622
0
      DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5623
0
      DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5624
0
      DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5625
0
      DepWaitTaskArgs[6] =
5626
0
          llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5627
5628
0
      CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5629
5630
      // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5631
      // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5632
      // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5633
      // kmp_int32 has_no_wait); if dependence info is specified.
5634
0
      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5635
0
                              M, OMPRTL___kmpc_omp_taskwait_deps_51),
5636
0
                          DepWaitTaskArgs);
5637
5638
0
    } else {
5639
5640
      // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5641
      // global_tid);
5642
0
      llvm::Value *Args[] = {UpLoc, ThreadID};
5643
      // Ignore return result until untied tasks are supported.
5644
0
      CGF.EmitRuntimeCall(
5645
0
          OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5646
0
          Args);
5647
0
    }
5648
0
  }
5649
5650
0
  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5651
0
    Region->emitUntiedSwitch(CGF);
5652
0
}
5653
5654
void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5655
                                           OpenMPDirectiveKind InnerKind,
5656
                                           const RegionCodeGenTy &CodeGen,
5657
0
                                           bool HasCancel) {
5658
0
  if (!CGF.HaveInsertPoint())
5659
0
    return;
5660
0
  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5661
0
                                 InnerKind != OMPD_critical &&
5662
0
                                     InnerKind != OMPD_master &&
5663
0
                                     InnerKind != OMPD_masked);
5664
0
  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5665
0
}
5666
5667
namespace {
5668
enum RTCancelKind {
5669
  CancelNoreq = 0,
5670
  CancelParallel = 1,
5671
  CancelLoop = 2,
5672
  CancelSections = 3,
5673
  CancelTaskgroup = 4
5674
};
5675
} // anonymous namespace
5676
5677
0
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5678
0
  RTCancelKind CancelKind = CancelNoreq;
5679
0
  if (CancelRegion == OMPD_parallel)
5680
0
    CancelKind = CancelParallel;
5681
0
  else if (CancelRegion == OMPD_for)
5682
0
    CancelKind = CancelLoop;
5683
0
  else if (CancelRegion == OMPD_sections)
5684
0
    CancelKind = CancelSections;
5685
0
  else {
5686
0
    assert(CancelRegion == OMPD_taskgroup);
5687
0
    CancelKind = CancelTaskgroup;
5688
0
  }
5689
0
  return CancelKind;
5690
0
}
5691
5692
void CGOpenMPRuntime::emitCancellationPointCall(
5693
    CodeGenFunction &CGF, SourceLocation Loc,
5694
0
    OpenMPDirectiveKind CancelRegion) {
5695
0
  if (!CGF.HaveInsertPoint())
5696
0
    return;
5697
  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5698
  // global_tid, kmp_int32 cncl_kind);
5699
0
  if (auto *OMPRegionInfo =
5700
0
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5701
    // For 'cancellation point taskgroup', the task region info may not have a
5702
    // cancel. This may instead happen in another adjacent task.
5703
0
    if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5704
0
      llvm::Value *Args[] = {
5705
0
          emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5706
0
          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5707
      // Ignore return result until untied tasks are supported.
5708
0
      llvm::Value *Result = CGF.EmitRuntimeCall(
5709
0
          OMPBuilder.getOrCreateRuntimeFunction(
5710
0
              CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5711
0
          Args);
5712
      // if (__kmpc_cancellationpoint()) {
5713
      //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5714
      //   exit from construct;
5715
      // }
5716
0
      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5717
0
      llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5718
0
      llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5719
0
      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5720
0
      CGF.EmitBlock(ExitBB);
5721
0
      if (CancelRegion == OMPD_parallel)
5722
0
        emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5723
      // exit from construct;
5724
0
      CodeGenFunction::JumpDest CancelDest =
5725
0
          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5726
0
      CGF.EmitBranchThroughCleanup(CancelDest);
5727
0
      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5728
0
    }
5729
0
  }
5730
0
}
5731
5732
void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5733
                                     const Expr *IfCond,
5734
0
                                     OpenMPDirectiveKind CancelRegion) {
5735
0
  if (!CGF.HaveInsertPoint())
5736
0
    return;
5737
  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5738
  // kmp_int32 cncl_kind);
5739
0
  auto &M = CGM.getModule();
5740
0
  if (auto *OMPRegionInfo =
5741
0
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5742
0
    auto &&ThenGen = [this, &M, Loc, CancelRegion,
5743
0
                      OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5744
0
      CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5745
0
      llvm::Value *Args[] = {
5746
0
          RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5747
0
          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5748
      // Ignore return result until untied tasks are supported.
5749
0
      llvm::Value *Result = CGF.EmitRuntimeCall(
5750
0
          OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5751
      // if (__kmpc_cancel()) {
5752
      //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5753
      //   exit from construct;
5754
      // }
5755
0
      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5756
0
      llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5757
0
      llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5758
0
      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5759
0
      CGF.EmitBlock(ExitBB);
5760
0
      if (CancelRegion == OMPD_parallel)
5761
0
        RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5762
      // exit from construct;
5763
0
      CodeGenFunction::JumpDest CancelDest =
5764
0
          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5765
0
      CGF.EmitBranchThroughCleanup(CancelDest);
5766
0
      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5767
0
    };
5768
0
    if (IfCond) {
5769
0
      emitIfClause(CGF, IfCond, ThenGen,
5770
0
                   [](CodeGenFunction &, PrePostActionTy &) {});
5771
0
    } else {
5772
0
      RegionCodeGenTy ThenRCG(ThenGen);
5773
0
      ThenRCG(CGF);
5774
0
    }
5775
0
  }
5776
0
}
5777
5778
namespace {
5779
/// Cleanup action for uses_allocators support.
5780
class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5781
  ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5782
5783
public:
5784
  OMPUsesAllocatorsActionTy(
5785
      ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5786
0
      : Allocators(Allocators) {}
5787
0
  void Enter(CodeGenFunction &CGF) override {
5788
0
    if (!CGF.HaveInsertPoint())
5789
0
      return;
5790
0
    for (const auto &AllocatorData : Allocators) {
5791
0
      CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5792
0
          CGF, AllocatorData.first, AllocatorData.second);
5793
0
    }
5794
0
  }
5795
0
  void Exit(CodeGenFunction &CGF) override {
5796
0
    if (!CGF.HaveInsertPoint())
5797
0
      return;
5798
0
    for (const auto &AllocatorData : Allocators) {
5799
0
      CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5800
0
                                                        AllocatorData.first);
5801
0
    }
5802
0
  }
5803
};
5804
} // namespace
5805
5806
void CGOpenMPRuntime::emitTargetOutlinedFunction(
5807
    const OMPExecutableDirective &D, StringRef ParentName,
5808
    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5809
0
    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5810
0
  assert(!ParentName.empty() && "Invalid target entry parent name!");
5811
0
  HasEmittedTargetRegion = true;
5812
0
  SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5813
0
  for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5814
0
    for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5815
0
      const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5816
0
      if (!D.AllocatorTraits)
5817
0
        continue;
5818
0
      Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5819
0
    }
5820
0
  }
5821
0
  OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5822
0
  CodeGen.setAction(UsesAllocatorAction);
5823
0
  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5824
0
                                   IsOffloadEntry, CodeGen);
5825
0
}
5826
5827
void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5828
                                             const Expr *Allocator,
5829
0
                                             const Expr *AllocatorTraits) {
5830
0
  llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5831
0
  ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5832
  // Use default memspace handle.
5833
0
  llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5834
0
  llvm::Value *NumTraits = llvm::ConstantInt::get(
5835
0
      CGF.IntTy, cast<ConstantArrayType>(
5836
0
                     AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5837
0
                     ->getSize()
5838
0
                     .getLimitedValue());
5839
0
  LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5840
0
  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5841
0
      AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5842
0
  AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5843
0
                                           AllocatorTraitsLVal.getBaseInfo(),
5844
0
                                           AllocatorTraitsLVal.getTBAAInfo());
5845
0
  llvm::Value *Traits = Addr.getPointer();
5846
5847
0
  llvm::Value *AllocatorVal =
5848
0
      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5849
0
                              CGM.getModule(), OMPRTL___kmpc_init_allocator),
5850
0
                          {ThreadId, MemSpaceHandle, NumTraits, Traits});
5851
  // Store to allocator.
5852
0
  CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5853
0
      cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5854
0
  LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5855
0
  AllocatorVal =
5856
0
      CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5857
0
                               Allocator->getType(), Allocator->getExprLoc());
5858
0
  CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5859
0
}
5860
5861
void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5862
0
                                             const Expr *Allocator) {
5863
0
  llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5864
0
  ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5865
0
  LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5866
0
  llvm::Value *AllocatorVal =
5867
0
      CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5868
0
  AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5869
0
                                          CGF.getContext().VoidPtrTy,
5870
0
                                          Allocator->getExprLoc());
5871
0
  (void)CGF.EmitRuntimeCall(
5872
0
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5873
0
                                            OMPRTL___kmpc_destroy_allocator),
5874
0
      {ThreadId, AllocatorVal});
5875
0
}
5876
5877
void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5878
    const OMPExecutableDirective &D, CodeGenFunction &CGF,
5879
    int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5880
0
    int32_t &MaxTeamsVal) {
5881
5882
0
  getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5883
0
  getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5884
0
                                      /*UpperBoundOnly=*/true);
5885
5886
0
  for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5887
0
    for (auto *A : C->getAttrs()) {
5888
0
      int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5889
0
      int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5890
0
      if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5891
0
        CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5892
0
                                       &AttrMinBlocksVal, &AttrMaxBlocksVal);
5893
0
      else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5894
0
        CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5895
0
            nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5896
0
            &AttrMaxThreadsVal);
5897
0
      else
5898
0
        continue;
5899
5900
0
      MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5901
0
      if (AttrMaxThreadsVal > 0)
5902
0
        MaxThreadsVal = MaxThreadsVal > 0
5903
0
                            ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5904
0
                            : AttrMaxThreadsVal;
5905
0
      MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5906
0
      if (AttrMaxBlocksVal > 0)
5907
0
        MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5908
0
                                      : AttrMaxBlocksVal;
5909
0
    }
5910
0
  }
5911
0
}
5912
5913
void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5914
    const OMPExecutableDirective &D, StringRef ParentName,
5915
    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5916
0
    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5917
5918
0
  llvm::TargetRegionEntryInfo EntryInfo =
5919
0
      getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5920
5921
0
  CodeGenFunction CGF(CGM, true);
5922
0
  llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5923
0
      [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5924
0
        const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5925
5926
0
        CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5927
0
        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5928
0
        return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5929
0
      };
5930
5931
0
  OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5932
0
                                      IsOffloadEntry, OutlinedFn, OutlinedFnID);
5933
5934
0
  if (!OutlinedFn)
5935
0
    return;
5936
5937
0
  CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5938
5939
0
  for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5940
0
    for (auto *A : C->getAttrs()) {
5941
0
      if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5942
0
        CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5943
0
    }
5944
0
  }
5945
0
}
5946
5947
/// Checks if the expression is constant or does not have non-trivial function
5948
/// calls.
5949
0
static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5950
  // We can skip constant expressions.
5951
  // We can skip expressions with trivial calls or simple expressions.
5952
0
  return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5953
0
          !E->hasNonTrivialCall(Ctx)) &&
5954
0
         !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5955
0
}
5956
5957
const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5958
0
                                                    const Stmt *Body) {
5959
0
  const Stmt *Child = Body->IgnoreContainers();
5960
0
  while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5961
0
    Child = nullptr;
5962
0
    for (const Stmt *S : C->body()) {
5963
0
      if (const auto *E = dyn_cast<Expr>(S)) {
5964
0
        if (isTrivial(Ctx, E))
5965
0
          continue;
5966
0
      }
5967
      // Some of the statements can be ignored.
5968
0
      if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5969
0
          isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5970
0
        continue;
5971
      // Analyze declarations.
5972
0
      if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5973
0
        if (llvm::all_of(DS->decls(), [](const Decl *D) {
5974
0
              if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5975
0
                  isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5976
0
                  isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5977
0
                  isa<UsingDirectiveDecl>(D) ||
5978
0
                  isa<OMPDeclareReductionDecl>(D) ||
5979
0
                  isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5980
0
                return true;
5981
0
              const auto *VD = dyn_cast<VarDecl>(D);
5982
0
              if (!VD)
5983
0
                return false;
5984
0
              return VD->hasGlobalStorage() || !VD->isUsed();
5985
0
            }))
5986
0
          continue;
5987
0
      }
5988
      // Found multiple children - cannot get the one child only.
5989
0
      if (Child)
5990
0
        return nullptr;
5991
0
      Child = S;
5992
0
    }
5993
0
    if (Child)
5994
0
      Child = Child->IgnoreContainers();
5995
0
  }
5996
0
  return Child;
5997
0
}
5998
5999
const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6000
    CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6001
0
    int32_t &MaxTeamsVal) {
6002
6003
0
  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6004
0
  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6005
0
         "Expected target-based executable directive.");
6006
0
  switch (DirectiveKind) {
6007
0
  case OMPD_target: {
6008
0
    const auto *CS = D.getInnermostCapturedStmt();
6009
0
    const auto *Body =
6010
0
        CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6011
0
    const Stmt *ChildStmt =
6012
0
        CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6013
0
    if (const auto *NestedDir =
6014
0
            dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6015
0
      if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6016
0
        if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6017
0
          const Expr *NumTeams =
6018
0
              NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6019
0
          if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6020
0
            if (auto Constant =
6021
0
                    NumTeams->getIntegerConstantExpr(CGF.getContext()))
6022
0
              MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6023
0
          return NumTeams;
6024
0
        }
6025
0
        MinTeamsVal = MaxTeamsVal = 0;
6026
0
        return nullptr;
6027
0
      }
6028
0
      if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6029
0
          isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6030
0
        MinTeamsVal = MaxTeamsVal = 1;
6031
0
        return nullptr;
6032
0
      }
6033
0
      MinTeamsVal = MaxTeamsVal = 1;
6034
0
      return nullptr;
6035
0
    }
6036
    // A value of -1 is used to check if we need to emit no teams region
6037
0
    MinTeamsVal = MaxTeamsVal = -1;
6038
0
    return nullptr;
6039
0
  }
6040
0
  case OMPD_target_teams_loop:
6041
0
  case OMPD_target_teams:
6042
0
  case OMPD_target_teams_distribute:
6043
0
  case OMPD_target_teams_distribute_simd:
6044
0
  case OMPD_target_teams_distribute_parallel_for:
6045
0
  case OMPD_target_teams_distribute_parallel_for_simd: {
6046
0
    if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6047
0
      const Expr *NumTeams =
6048
0
          D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6049
0
      if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6050
0
        if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6051
0
          MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6052
0
      return NumTeams;
6053
0
    }
6054
0
    MinTeamsVal = MaxTeamsVal = 0;
6055
0
    return nullptr;
6056
0
  }
6057
0
  case OMPD_target_parallel:
6058
0
  case OMPD_target_parallel_for:
6059
0
  case OMPD_target_parallel_for_simd:
6060
0
  case OMPD_target_parallel_loop:
6061
0
  case OMPD_target_simd:
6062
0
    MinTeamsVal = MaxTeamsVal = 1;
6063
0
    return nullptr;
6064
0
  case OMPD_parallel:
6065
0
  case OMPD_for:
6066
0
  case OMPD_parallel_for:
6067
0
  case OMPD_parallel_loop:
6068
0
  case OMPD_parallel_master:
6069
0
  case OMPD_parallel_sections:
6070
0
  case OMPD_for_simd:
6071
0
  case OMPD_parallel_for_simd:
6072
0
  case OMPD_cancel:
6073
0
  case OMPD_cancellation_point:
6074
0
  case OMPD_ordered:
6075
0
  case OMPD_threadprivate:
6076
0
  case OMPD_allocate:
6077
0
  case OMPD_task:
6078
0
  case OMPD_simd:
6079
0
  case OMPD_tile:
6080
0
  case OMPD_unroll:
6081
0
  case OMPD_sections:
6082
0
  case OMPD_section:
6083
0
  case OMPD_single:
6084
0
  case OMPD_master:
6085
0
  case OMPD_critical:
6086
0
  case OMPD_taskyield:
6087
0
  case OMPD_barrier:
6088
0
  case OMPD_taskwait:
6089
0
  case OMPD_taskgroup:
6090
0
  case OMPD_atomic:
6091
0
  case OMPD_flush:
6092
0
  case OMPD_depobj:
6093
0
  case OMPD_scan:
6094
0
  case OMPD_teams:
6095
0
  case OMPD_target_data:
6096
0
  case OMPD_target_exit_data:
6097
0
  case OMPD_target_enter_data:
6098
0
  case OMPD_distribute:
6099
0
  case OMPD_distribute_simd:
6100
0
  case OMPD_distribute_parallel_for:
6101
0
  case OMPD_distribute_parallel_for_simd:
6102
0
  case OMPD_teams_distribute:
6103
0
  case OMPD_teams_distribute_simd:
6104
0
  case OMPD_teams_distribute_parallel_for:
6105
0
  case OMPD_teams_distribute_parallel_for_simd:
6106
0
  case OMPD_target_update:
6107
0
  case OMPD_declare_simd:
6108
0
  case OMPD_declare_variant:
6109
0
  case OMPD_begin_declare_variant:
6110
0
  case OMPD_end_declare_variant:
6111
0
  case OMPD_declare_target:
6112
0
  case OMPD_end_declare_target:
6113
0
  case OMPD_declare_reduction:
6114
0
  case OMPD_declare_mapper:
6115
0
  case OMPD_taskloop:
6116
0
  case OMPD_taskloop_simd:
6117
0
  case OMPD_master_taskloop:
6118
0
  case OMPD_master_taskloop_simd:
6119
0
  case OMPD_parallel_master_taskloop:
6120
0
  case OMPD_parallel_master_taskloop_simd:
6121
0
  case OMPD_requires:
6122
0
  case OMPD_metadirective:
6123
0
  case OMPD_unknown:
6124
0
    break;
6125
0
  default:
6126
0
    break;
6127
0
  }
6128
0
  llvm_unreachable("Unexpected directive kind.");
6129
0
}
6130
6131
llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6132
0
    CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6133
0
  assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6134
0
         "Clauses associated with the teams directive expected to be emitted "
6135
0
         "only for the host!");
6136
0
  CGBuilderTy &Bld = CGF.Builder;
6137
0
  int32_t MinNT = -1, MaxNT = -1;
6138
0
  const Expr *NumTeams =
6139
0
      getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6140
0
  if (NumTeams != nullptr) {
6141
0
    OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6142
6143
0
    switch (DirectiveKind) {
6144
0
    case OMPD_target: {
6145
0
      const auto *CS = D.getInnermostCapturedStmt();
6146
0
      CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6147
0
      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6148
0
      llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6149
0
                                                  /*IgnoreResultAssign*/ true);
6150
0
      return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6151
0
                             /*isSigned=*/true);
6152
0
    }
6153
0
    case OMPD_target_teams:
6154
0
    case OMPD_target_teams_distribute:
6155
0
    case OMPD_target_teams_distribute_simd:
6156
0
    case OMPD_target_teams_distribute_parallel_for:
6157
0
    case OMPD_target_teams_distribute_parallel_for_simd: {
6158
0
      CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6159
0
      llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6160
0
                                                  /*IgnoreResultAssign*/ true);
6161
0
      return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6162
0
                             /*isSigned=*/true);
6163
0
    }
6164
0
    default:
6165
0
      break;
6166
0
    }
6167
0
  }
6168
6169
0
  assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6170
0
  return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6171
0
}
6172
6173
/// Check for a num threads constant value (stored in \p DefaultVal), or
6174
/// expression (stored in \p E). If the value is conditional (via an if-clause),
6175
/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6176
/// nullptr, no expression evaluation is perfomed.
6177
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6178
                          const Expr **E, int32_t &UpperBound,
6179
0
                          bool UpperBoundOnly, llvm::Value **CondVal) {
6180
0
  const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6181
0
      CGF.getContext(), CS->getCapturedStmt());
6182
0
  const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6183
0
  if (!Dir)
6184
0
    return;
6185
6186
0
  if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6187
    // Handle if clause. If if clause present, the number of threads is
6188
    // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6189
0
    if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6190
0
      CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6191
0
      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6192
0
      const OMPIfClause *IfClause = nullptr;
6193
0
      for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6194
0
        if (C->getNameModifier() == OMPD_unknown ||
6195
0
            C->getNameModifier() == OMPD_parallel) {
6196
0
          IfClause = C;
6197
0
          break;
6198
0
        }
6199
0
      }
6200
0
      if (IfClause) {
6201
0
        const Expr *CondExpr = IfClause->getCondition();
6202
0
        bool Result;
6203
0
        if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6204
0
          if (!Result) {
6205
0
            UpperBound = 1;
6206
0
            return;
6207
0
          }
6208
0
        } else {
6209
0
          CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6210
0
          if (const auto *PreInit =
6211
0
                  cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6212
0
            for (const auto *I : PreInit->decls()) {
6213
0
              if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6214
0
                CGF.EmitVarDecl(cast<VarDecl>(*I));
6215
0
              } else {
6216
0
                CodeGenFunction::AutoVarEmission Emission =
6217
0
                    CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6218
0
                CGF.EmitAutoVarCleanups(Emission);
6219
0
              }
6220
0
            }
6221
0
            *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6222
0
          }
6223
0
        }
6224
0
      }
6225
0
    }
6226
    // Check the value of num_threads clause iff if clause was not specified
6227
    // or is not evaluated to false.
6228
0
    if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6229
0
      CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6230
0
      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6231
0
      const auto *NumThreadsClause =
6232
0
          Dir->getSingleClause<OMPNumThreadsClause>();
6233
0
      const Expr *NTExpr = NumThreadsClause->getNumThreads();
6234
0
      if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6235
0
        if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6236
0
          UpperBound =
6237
0
              UpperBound
6238
0
                  ? Constant->getZExtValue()
6239
0
                  : std::min(UpperBound,
6240
0
                             static_cast<int32_t>(Constant->getZExtValue()));
6241
      // If we haven't found a upper bound, remember we saw a thread limiting
6242
      // clause.
6243
0
      if (UpperBound == -1)
6244
0
        UpperBound = 0;
6245
0
      if (!E)
6246
0
        return;
6247
0
      CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6248
0
      if (const auto *PreInit =
6249
0
              cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6250
0
        for (const auto *I : PreInit->decls()) {
6251
0
          if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6252
0
            CGF.EmitVarDecl(cast<VarDecl>(*I));
6253
0
          } else {
6254
0
            CodeGenFunction::AutoVarEmission Emission =
6255
0
                CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6256
0
            CGF.EmitAutoVarCleanups(Emission);
6257
0
          }
6258
0
        }
6259
0
      }
6260
0
      *E = NTExpr;
6261
0
    }
6262
0
    return;
6263
0
  }
6264
0
  if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6265
0
    UpperBound = 1;
6266
0
}
6267
6268
const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6269
    CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6270
0
    bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6271
0
  assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6272
0
         "Clauses associated with the teams directive expected to be emitted "
6273
0
         "only for the host!");
6274
0
  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6275
0
  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6276
0
         "Expected target-based executable directive.");
6277
6278
0
  const Expr *NT = nullptr;
6279
0
  const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6280
6281
0
  auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6282
0
    if (E->isIntegerConstantExpr(CGF.getContext())) {
6283
0
      if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6284
0
        UpperBound = UpperBound ? Constant->getZExtValue()
6285
0
                                : std::min(UpperBound,
6286
0
                                           int32_t(Constant->getZExtValue()));
6287
0
    }
6288
    // If we haven't found a upper bound, remember we saw a thread limiting
6289
    // clause.
6290
0
    if (UpperBound == -1)
6291
0
      UpperBound = 0;
6292
0
    if (EPtr)
6293
0
      *EPtr = E;
6294
0
  };
6295
6296
0
  auto ReturnSequential = [&]() {
6297
0
    UpperBound = 1;
6298
0
    return NT;
6299
0
  };
6300
6301
0
  switch (DirectiveKind) {
6302
0
  case OMPD_target: {
6303
0
    const CapturedStmt *CS = D.getInnermostCapturedStmt();
6304
0
    getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6305
0
    const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6306
0
        CGF.getContext(), CS->getCapturedStmt());
6307
    // TODO: The standard is not clear how to resolve two thread limit clauses,
6308
    //       let's pick the teams one if it's present, otherwise the target one.
6309
0
    const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6310
0
    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6311
0
      if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6312
0
        ThreadLimitClause = TLC;
6313
0
        if (ThreadLimitExpr) {
6314
0
          CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6315
0
          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6316
0
          CodeGenFunction::LexicalScope Scope(
6317
0
              CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6318
0
          if (const auto *PreInit =
6319
0
                  cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6320
0
            for (const auto *I : PreInit->decls()) {
6321
0
              if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6322
0
                CGF.EmitVarDecl(cast<VarDecl>(*I));
6323
0
              } else {
6324
0
                CodeGenFunction::AutoVarEmission Emission =
6325
0
                    CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6326
0
                CGF.EmitAutoVarCleanups(Emission);
6327
0
              }
6328
0
            }
6329
0
          }
6330
0
        }
6331
0
      }
6332
0
    }
6333
0
    if (ThreadLimitClause)
6334
0
      CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6335
0
    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6336
0
      if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6337
0
          !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6338
0
        CS = Dir->getInnermostCapturedStmt();
6339
0
        const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6340
0
            CGF.getContext(), CS->getCapturedStmt());
6341
0
        Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6342
0
      }
6343
0
      if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6344
0
        CS = Dir->getInnermostCapturedStmt();
6345
0
        getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6346
0
      } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6347
0
        return ReturnSequential();
6348
0
    }
6349
0
    return NT;
6350
0
  }
6351
0
  case OMPD_target_teams: {
6352
0
    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6353
0
      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6354
0
      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6355
0
      CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6356
0
    }
6357
0
    const CapturedStmt *CS = D.getInnermostCapturedStmt();
6358
0
    getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6359
0
    const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6360
0
        CGF.getContext(), CS->getCapturedStmt());
6361
0
    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6362
0
      if (Dir->getDirectiveKind() == OMPD_distribute) {
6363
0
        CS = Dir->getInnermostCapturedStmt();
6364
0
        getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6365
0
      }
6366
0
    }
6367
0
    return NT;
6368
0
  }
6369
0
  case OMPD_target_teams_distribute:
6370
0
    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6371
0
      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6372
0
      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6373
0
      CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6374
0
    }
6375
0
    getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6376
0
                  UpperBoundOnly, CondVal);
6377
0
    return NT;
6378
0
  case OMPD_target_teams_loop:
6379
0
  case OMPD_target_parallel_loop:
6380
0
  case OMPD_target_parallel:
6381
0
  case OMPD_target_parallel_for:
6382
0
  case OMPD_target_parallel_for_simd:
6383
0
  case OMPD_target_teams_distribute_parallel_for:
6384
0
  case OMPD_target_teams_distribute_parallel_for_simd: {
6385
0
    if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6386
0
      const OMPIfClause *IfClause = nullptr;
6387
0
      for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6388
0
        if (C->getNameModifier() == OMPD_unknown ||
6389
0
            C->getNameModifier() == OMPD_parallel) {
6390
0
          IfClause = C;
6391
0
          break;
6392
0
        }
6393
0
      }
6394
0
      if (IfClause) {
6395
0
        const Expr *Cond = IfClause->getCondition();
6396
0
        bool Result;
6397
0
        if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6398
0
          if (!Result)
6399
0
            return ReturnSequential();
6400
0
        } else {
6401
0
          CodeGenFunction::RunCleanupsScope Scope(CGF);
6402
0
          *CondVal = CGF.EvaluateExprAsBool(Cond);
6403
0
        }
6404
0
      }
6405
0
    }
6406
0
    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6407
0
      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6408
0
      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6409
0
      CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6410
0
    }
6411
0
    if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6412
0
      CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6413
0
      const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6414
0
      CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6415
0
      return NumThreadsClause->getNumThreads();
6416
0
    }
6417
0
    return NT;
6418
0
  }
6419
0
  case OMPD_target_teams_distribute_simd:
6420
0
  case OMPD_target_simd:
6421
0
    return ReturnSequential();
6422
0
  default:
6423
0
    break;
6424
0
  }
6425
0
  llvm_unreachable("Unsupported directive kind.");
6426
0
}
6427
6428
llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6429
0
    CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6430
0
  llvm::Value *NumThreadsVal = nullptr;
6431
0
  llvm::Value *CondVal = nullptr;
6432
0
  llvm::Value *ThreadLimitVal = nullptr;
6433
0
  const Expr *ThreadLimitExpr = nullptr;
6434
0
  int32_t UpperBound = -1;
6435
6436
0
  const Expr *NT = getNumThreadsExprForTargetDirective(
6437
0
      CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6438
0
      &ThreadLimitExpr);
6439
6440
  // Thread limit expressions are used below, emit them.
6441
0
  if (ThreadLimitExpr) {
6442
0
    ThreadLimitVal =
6443
0
        CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6444
0
    ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6445
0
                                               /*isSigned=*/false);
6446
0
  }
6447
6448
  // Generate the num teams expression.
6449
0
  if (UpperBound == 1) {
6450
0
    NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6451
0
  } else if (NT) {
6452
0
    NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6453
0
    NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6454
0
                                              /*isSigned=*/false);
6455
0
  } else if (ThreadLimitVal) {
6456
    // If we do not have a num threads value but a thread limit, replace the
6457
    // former with the latter. We know handled the thread limit expression.
6458
0
    NumThreadsVal = ThreadLimitVal;
6459
0
    ThreadLimitVal = nullptr;
6460
0
  } else {
6461
    // Default to "0" which means runtime choice.
6462
0
    assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6463
0
    NumThreadsVal = CGF.Builder.getInt32(0);
6464
0
  }
6465
6466
  // Handle if clause. If if clause present, the number of threads is
6467
  // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6468
0
  if (CondVal) {
6469
0
    CodeGenFunction::RunCleanupsScope Scope(CGF);
6470
0
    NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6471
0
                                             CGF.Builder.getInt32(1));
6472
0
  }
6473
6474
  // If the thread limit and num teams expression were present, take the
6475
  // minimum.
6476
0
  if (ThreadLimitVal) {
6477
0
    NumThreadsVal = CGF.Builder.CreateSelect(
6478
0
        CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6479
0
        ThreadLimitVal, NumThreadsVal);
6480
0
  }
6481
6482
0
  return NumThreadsVal;
6483
0
}
6484
6485
namespace {
6486
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6487
6488
// Utility to handle information from clauses associated with a given
6489
// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6490
// It provides a convenient interface to obtain the information and generate
6491
// code for that information.
6492
class MappableExprsHandler {
6493
public:
6494
  /// Get the offset of the OMP_MAP_MEMBER_OF field.
6495
0
  static unsigned getFlagMemberOffset() {
6496
0
    unsigned Offset = 0;
6497
0
    for (uint64_t Remain =
6498
0
             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6499
0
                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6500
0
         !(Remain & 1); Remain = Remain >> 1)
6501
0
      Offset++;
6502
0
    return Offset;
6503
0
  }
6504
6505
  /// Class that holds debugging information for a data mapping to be passed to
6506
  /// the runtime library.
6507
  class MappingExprInfo {
6508
    /// The variable declaration used for the data mapping.
6509
    const ValueDecl *MapDecl = nullptr;
6510
    /// The original expression used in the map clause, or null if there is
6511
    /// none.
6512
    const Expr *MapExpr = nullptr;
6513
6514
  public:
6515
    MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6516
0
        : MapDecl(MapDecl), MapExpr(MapExpr) {}
6517
6518
0
    const ValueDecl *getMapDecl() const { return MapDecl; }
6519
0
    const Expr *getMapExpr() const { return MapExpr; }
6520
  };
6521
6522
  using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6523
  using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6524
  using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6525
  using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6526
  using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6527
  using MapNonContiguousArrayTy =
6528
      llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6529
  using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6530
  using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6531
6532
  /// This structure contains combined information generated for mappable
6533
  /// clauses, including base pointers, pointers, sizes, map types, user-defined
6534
  /// mappers, and non-contiguous information.
6535
  struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6536
    MapExprsArrayTy Exprs;
6537
    MapValueDeclsArrayTy Mappers;
6538
    MapValueDeclsArrayTy DevicePtrDecls;
6539
6540
    /// Append arrays in \a CurInfo.
6541
0
    void append(MapCombinedInfoTy &CurInfo) {
6542
0
      Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6543
0
      DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6544
0
                            CurInfo.DevicePtrDecls.end());
6545
0
      Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6546
0
      llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6547
0
    }
6548
  };
6549
6550
  /// Map between a struct and the its lowest & highest elements which have been
6551
  /// mapped.
6552
  /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6553
  ///                    HE(FieldIndex, Pointer)}
6554
  struct StructRangeInfoTy {
6555
    MapCombinedInfoTy PreliminaryMapData;
6556
    std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6557
        0, Address::invalid()};
6558
    std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6559
        0, Address::invalid()};
6560
    Address Base = Address::invalid();
6561
    Address LB = Address::invalid();
6562
    bool IsArraySection = false;
6563
    bool HasCompleteRecord = false;
6564
  };
6565
6566
private:
6567
  /// Kind that defines how a device pointer has to be returned.
6568
  struct MapInfo {
6569
    OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6570
    OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6571
    ArrayRef<OpenMPMapModifierKind> MapModifiers;
6572
    ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6573
    bool ReturnDevicePointer = false;
6574
    bool IsImplicit = false;
6575
    const ValueDecl *Mapper = nullptr;
6576
    const Expr *VarRef = nullptr;
6577
    bool ForDeviceAddr = false;
6578
6579
    MapInfo() = default;
6580
    MapInfo(
6581
        OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6582
        OpenMPMapClauseKind MapType,
6583
        ArrayRef<OpenMPMapModifierKind> MapModifiers,
6584
        ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6585
        bool ReturnDevicePointer, bool IsImplicit,
6586
        const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6587
        bool ForDeviceAddr = false)
6588
        : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6589
          MotionModifiers(MotionModifiers),
6590
          ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6591
0
          Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6592
  };
6593
6594
  /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6595
  /// member and there is no map information about it, then emission of that
6596
  /// entry is deferred until the whole struct has been processed.
6597
  struct DeferredDevicePtrEntryTy {
6598
    const Expr *IE = nullptr;
6599
    const ValueDecl *VD = nullptr;
6600
    bool ForDeviceAddr = false;
6601
6602
    DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6603
                             bool ForDeviceAddr)
6604
0
        : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6605
  };
6606
6607
  /// The target directive from where the mappable clauses were extracted. It
6608
  /// is either a executable directive or a user-defined mapper directive.
6609
  llvm::PointerUnion<const OMPExecutableDirective *,
6610
                     const OMPDeclareMapperDecl *>
6611
      CurDir;
6612
6613
  /// Function the directive is being generated for.
6614
  CodeGenFunction &CGF;
6615
6616
  /// Set of all first private variables in the current directive.
6617
  /// bool data is set to true if the variable is implicitly marked as
6618
  /// firstprivate, false otherwise.
6619
  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6620
6621
  /// Map between device pointer declarations and their expression components.
6622
  /// The key value for declarations in 'this' is null.
6623
  llvm::DenseMap<
6624
      const ValueDecl *,
6625
      SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6626
      DevPointersMap;
6627
6628
  /// Map between device addr declarations and their expression components.
6629
  /// The key value for declarations in 'this' is null.
6630
  llvm::DenseMap<
6631
      const ValueDecl *,
6632
      SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6633
      HasDevAddrsMap;
6634
6635
  /// Map between lambda declarations and their map type.
6636
  llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6637
6638
0
  llvm::Value *getExprTypeSize(const Expr *E) const {
6639
0
    QualType ExprTy = E->getType().getCanonicalType();
6640
6641
    // Calculate the size for array shaping expression.
6642
0
    if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6643
0
      llvm::Value *Size =
6644
0
          CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6645
0
      for (const Expr *SE : OAE->getDimensions()) {
6646
0
        llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6647
0
        Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6648
0
                                      CGF.getContext().getSizeType(),
6649
0
                                      SE->getExprLoc());
6650
0
        Size = CGF.Builder.CreateNUWMul(Size, Sz);
6651
0
      }
6652
0
      return Size;
6653
0
    }
6654
6655
    // Reference types are ignored for mapping purposes.
6656
0
    if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6657
0
      ExprTy = RefTy->getPointeeType().getCanonicalType();
6658
6659
    // Given that an array section is considered a built-in type, we need to
6660
    // do the calculation based on the length of the section instead of relying
6661
    // on CGF.getTypeSize(E->getType()).
6662
0
    if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6663
0
      QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6664
0
                            OAE->getBase()->IgnoreParenImpCasts())
6665
0
                            .getCanonicalType();
6666
6667
      // If there is no length associated with the expression and lower bound is
6668
      // not specified too, that means we are using the whole length of the
6669
      // base.
6670
0
      if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6671
0
          !OAE->getLowerBound())
6672
0
        return CGF.getTypeSize(BaseTy);
6673
6674
0
      llvm::Value *ElemSize;
6675
0
      if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6676
0
        ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6677
0
      } else {
6678
0
        const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6679
0
        assert(ATy && "Expecting array type if not a pointer type.");
6680
0
        ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6681
0
      }
6682
6683
      // If we don't have a length at this point, that is because we have an
6684
      // array section with a single element.
6685
0
      if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6686
0
        return ElemSize;
6687
6688
0
      if (const Expr *LenExpr = OAE->getLength()) {
6689
0
        llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6690
0
        LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6691
0
                                             CGF.getContext().getSizeType(),
6692
0
                                             LenExpr->getExprLoc());
6693
0
        return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6694
0
      }
6695
0
      assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6696
0
             OAE->getLowerBound() && "expected array_section[lb:].");
6697
      // Size = sizetype - lb * elemtype;
6698
0
      llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6699
0
      llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6700
0
      LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6701
0
                                       CGF.getContext().getSizeType(),
6702
0
                                       OAE->getLowerBound()->getExprLoc());
6703
0
      LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6704
0
      llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6705
0
      llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6706
0
      LengthVal = CGF.Builder.CreateSelect(
6707
0
          Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6708
0
      return LengthVal;
6709
0
    }
6710
0
    return CGF.getTypeSize(ExprTy);
6711
0
  }
6712
6713
  /// Return the corresponding bits for a given map clause modifier. Add
6714
  /// a flag marking the map as a pointer if requested. Add a flag marking the
6715
  /// map as the first one of a series of maps that relate to the same map
6716
  /// expression.
6717
  OpenMPOffloadMappingFlags getMapTypeBits(
6718
      OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6719
      ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6720
0
      bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6721
0
    OpenMPOffloadMappingFlags Bits =
6722
0
        IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6723
0
                   : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6724
0
    switch (MapType) {
6725
0
    case OMPC_MAP_alloc:
6726
0
    case OMPC_MAP_release:
6727
      // alloc and release is the default behavior in the runtime library,  i.e.
6728
      // if we don't pass any bits alloc/release that is what the runtime is
6729
      // going to do. Therefore, we don't need to signal anything for these two
6730
      // type modifiers.
6731
0
      break;
6732
0
    case OMPC_MAP_to:
6733
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6734
0
      break;
6735
0
    case OMPC_MAP_from:
6736
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6737
0
      break;
6738
0
    case OMPC_MAP_tofrom:
6739
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6740
0
              OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6741
0
      break;
6742
0
    case OMPC_MAP_delete:
6743
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6744
0
      break;
6745
0
    case OMPC_MAP_unknown:
6746
0
      llvm_unreachable("Unexpected map type!");
6747
0
    }
6748
0
    if (AddPtrFlag)
6749
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6750
0
    if (AddIsTargetParamFlag)
6751
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6752
0
    if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6753
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6754
0
    if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6755
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6756
0
    if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6757
0
        llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6758
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6759
0
    if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6760
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6761
0
    if (IsNonContiguous)
6762
0
      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6763
0
    return Bits;
6764
0
  }
6765
6766
  /// Return true if the provided expression is a final array section. A
6767
  /// final array section, is one whose length can't be proved to be one.
6768
0
  bool isFinalArraySectionExpression(const Expr *E) const {
6769
0
    const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6770
6771
    // It is not an array section and therefore not a unity-size one.
6772
0
    if (!OASE)
6773
0
      return false;
6774
6775
    // An array section with no colon always refer to a single element.
6776
0
    if (OASE->getColonLocFirst().isInvalid())
6777
0
      return false;
6778
6779
0
    const Expr *Length = OASE->getLength();
6780
6781
    // If we don't have a length we have to check if the array has size 1
6782
    // for this dimension. Also, we should always expect a length if the
6783
    // base type is pointer.
6784
0
    if (!Length) {
6785
0
      QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6786
0
                             OASE->getBase()->IgnoreParenImpCasts())
6787
0
                             .getCanonicalType();
6788
0
      if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6789
0
        return ATy->getSize().getSExtValue() != 1;
6790
      // If we don't have a constant dimension length, we have to consider
6791
      // the current section as having any size, so it is not necessarily
6792
      // unitary. If it happen to be unity size, that's user fault.
6793
0
      return true;
6794
0
    }
6795
6796
    // Check if the length evaluates to 1.
6797
0
    Expr::EvalResult Result;
6798
0
    if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6799
0
      return true; // Can have more that size 1.
6800
6801
0
    llvm::APSInt ConstLength = Result.Val.getInt();
6802
0
    return ConstLength.getSExtValue() != 1;
6803
0
  }
6804
6805
  /// Generate the base pointers, section pointers, sizes, map type bits, and
6806
  /// user-defined mappers (all included in \a CombinedInfo) for the provided
6807
  /// map type, map or motion modifiers, and expression components.
6808
  /// \a IsFirstComponent should be set to true if the provided set of
6809
  /// components is the first associated with a capture.
6810
  void generateInfoForComponentList(
6811
      OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6812
      ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6813
      OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6814
      MapCombinedInfoTy &CombinedInfo,
6815
      MapCombinedInfoTy &StructBaseCombinedInfo,
6816
      StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6817
      bool IsImplicit, bool GenerateAllInfoForClauses,
6818
      const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6819
      const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6820
      ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6821
0
          OverlappedElements = std::nullopt) const {
6822
    // The following summarizes what has to be generated for each map and the
6823
    // types below. The generated information is expressed in this order:
6824
    // base pointer, section pointer, size, flags
6825
    // (to add to the ones that come from the map type and modifier).
6826
    //
6827
    // double d;
6828
    // int i[100];
6829
    // float *p;
6830
    // int **a = &i;
6831
    //
6832
    // struct S1 {
6833
    //   int i;
6834
    //   float f[50];
6835
    // }
6836
    // struct S2 {
6837
    //   int i;
6838
    //   float f[50];
6839
    //   S1 s;
6840
    //   double *p;
6841
    //   struct S2 *ps;
6842
    //   int &ref;
6843
    // }
6844
    // S2 s;
6845
    // S2 *ps;
6846
    //
6847
    // map(d)
6848
    // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6849
    //
6850
    // map(i)
6851
    // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6852
    //
6853
    // map(i[1:23])
6854
    // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6855
    //
6856
    // map(p)
6857
    // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6858
    //
6859
    // map(p[1:24])
6860
    // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6861
    // in unified shared memory mode or for local pointers
6862
    // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6863
    //
6864
    // map((*a)[0:3])
6865
    // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6866
    // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6867
    //
6868
    // map(**a)
6869
    // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6870
    // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6871
    //
6872
    // map(s)
6873
    // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6874
    //
6875
    // map(s.i)
6876
    // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6877
    //
6878
    // map(s.s.f)
6879
    // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6880
    //
6881
    // map(s.p)
6882
    // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6883
    //
6884
    // map(to: s.p[:22])
6885
    // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6886
    // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6887
    // &(s.p), &(s.p[0]), 22*sizeof(double),
6888
    //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6889
    // (*) alloc space for struct members, only this is a target parameter
6890
    // (**) map the pointer (nothing to be mapped in this example) (the compiler
6891
    //      optimizes this entry out, same in the examples below)
6892
    // (***) map the pointee (map: to)
6893
    //
6894
    // map(to: s.ref)
6895
    // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6896
    // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6897
    // (*) alloc space for struct members, only this is a target parameter
6898
    // (**) map the pointer (nothing to be mapped in this example) (the compiler
6899
    //      optimizes this entry out, same in the examples below)
6900
    // (***) map the pointee (map: to)
6901
    //
6902
    // map(s.ps)
6903
    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6904
    //
6905
    // map(from: s.ps->s.i)
6906
    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6907
    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6908
    // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
6909
    //
6910
    // map(to: s.ps->ps)
6911
    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6912
    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6913
    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
6914
    //
6915
    // map(s.ps->ps->ps)
6916
    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6917
    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6918
    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6919
    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6920
    //
6921
    // map(to: s.ps->ps->s.f[:22])
6922
    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6923
    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6924
    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6925
    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6926
    //
6927
    // map(ps)
6928
    // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6929
    //
6930
    // map(ps->i)
6931
    // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6932
    //
6933
    // map(ps->s.f)
6934
    // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6935
    //
6936
    // map(from: ps->p)
6937
    // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6938
    //
6939
    // map(to: ps->p[:22])
6940
    // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6941
    // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6942
    // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6943
    //
6944
    // map(ps->ps)
6945
    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6946
    //
6947
    // map(from: ps->ps->s.i)
6948
    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6949
    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6950
    // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6951
    //
6952
    // map(from: ps->ps->ps)
6953
    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6954
    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6955
    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6956
    //
6957
    // map(ps->ps->ps->ps)
6958
    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6959
    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6960
    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6961
    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6962
    //
6963
    // map(to: ps->ps->ps->s.f[:22])
6964
    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6965
    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6966
    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6967
    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6968
    //
6969
    // map(to: s.f[:22]) map(from: s.p[:33])
6970
    // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6971
    //     sizeof(double*) (**), TARGET_PARAM
6972
    // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6973
    // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6974
    // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6975
    // (*) allocate contiguous space needed to fit all mapped members even if
6976
    //     we allocate space for members not mapped (in this example,
6977
    //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
6978
    //     them as well because they fall between &s.f[0] and &s.p)
6979
    //
6980
    // map(from: s.f[:22]) map(to: ps->p[:33])
6981
    // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6982
    // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6983
    // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6984
    // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6985
    // (*) the struct this entry pertains to is the 2nd element in the list of
6986
    //     arguments, hence MEMBER_OF(2)
6987
    //
6988
    // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6989
    // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6990
    // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6991
    // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6992
    // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6993
    // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
6994
    // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
6995
    // (*) the struct this entry pertains to is the 4th element in the list
6996
    //     of arguments, hence MEMBER_OF(4)
6997
6998
    // Track if the map information being generated is the first for a capture.
6999
0
    bool IsCaptureFirstInfo = IsFirstComponentList;
7000
    // When the variable is on a declare target link or in a to clause with
7001
    // unified memory, a reference is needed to hold the host/device address
7002
    // of the variable.
7003
0
    bool RequiresReference = false;
7004
7005
    // Scan the components from the base to the complete expression.
7006
0
    auto CI = Components.rbegin();
7007
0
    auto CE = Components.rend();
7008
0
    auto I = CI;
7009
7010
    // Track if the map information being generated is the first for a list of
7011
    // components.
7012
0
    bool IsExpressionFirstInfo = true;
7013
0
    bool FirstPointerInComplexData = false;
7014
0
    Address BP = Address::invalid();
7015
0
    const Expr *AssocExpr = I->getAssociatedExpression();
7016
0
    const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7017
0
    const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7018
0
    const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7019
7020
0
    if (isa<MemberExpr>(AssocExpr)) {
7021
      // The base is the 'this' pointer. The content of the pointer is going
7022
      // to be the base of the field being mapped.
7023
0
      BP = CGF.LoadCXXThisAddress();
7024
0
    } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7025
0
               (OASE &&
7026
0
                isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7027
0
      BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7028
0
    } else if (OAShE &&
7029
0
               isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7030
0
      BP = Address(
7031
0
          CGF.EmitScalarExpr(OAShE->getBase()),
7032
0
          CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7033
0
          CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7034
0
    } else {
7035
      // The base is the reference to the variable.
7036
      // BP = &Var.
7037
0
      BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7038
0
      if (const auto *VD =
7039
0
              dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7040
0
        if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7041
0
                OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7042
0
          if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7043
0
              ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7044
0
                *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7045
0
               CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7046
0
            RequiresReference = true;
7047
0
            BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7048
0
          }
7049
0
        }
7050
0
      }
7051
7052
      // If the variable is a pointer and is being dereferenced (i.e. is not
7053
      // the last component), the base has to be the pointer itself, not its
7054
      // reference. References are ignored for mapping purposes.
7055
0
      QualType Ty =
7056
0
          I->getAssociatedDeclaration()->getType().getNonReferenceType();
7057
0
      if (Ty->isAnyPointerType() && std::next(I) != CE) {
7058
        // No need to generate individual map information for the pointer, it
7059
        // can be associated with the combined storage if shared memory mode is
7060
        // active or the base declaration is not global variable.
7061
0
        const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7062
0
        if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7063
0
            !VD || VD->hasLocalStorage())
7064
0
          BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7065
0
        else
7066
0
          FirstPointerInComplexData = true;
7067
0
        ++I;
7068
0
      }
7069
0
    }
7070
7071
    // Track whether a component of the list should be marked as MEMBER_OF some
7072
    // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7073
    // in a component list should be marked as MEMBER_OF, all subsequent entries
7074
    // do not belong to the base struct. E.g.
7075
    // struct S2 s;
7076
    // s.ps->ps->ps->f[:]
7077
    //   (1) (2) (3) (4)
7078
    // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7079
    // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7080
    // is the pointee of ps(2) which is not member of struct s, so it should not
7081
    // be marked as such (it is still PTR_AND_OBJ).
7082
    // The variable is initialized to false so that PTR_AND_OBJ entries which
7083
    // are not struct members are not considered (e.g. array of pointers to
7084
    // data).
7085
0
    bool ShouldBeMemberOf = false;
7086
7087
    // Variable keeping track of whether or not we have encountered a component
7088
    // in the component list which is a member expression. Useful when we have a
7089
    // pointer or a final array section, in which case it is the previous
7090
    // component in the list which tells us whether we have a member expression.
7091
    // E.g. X.f[:]
7092
    // While processing the final array section "[:]" it is "f" which tells us
7093
    // whether we are dealing with a member of a declared struct.
7094
0
    const MemberExpr *EncounteredME = nullptr;
7095
7096
    // Track for the total number of dimension. Start from one for the dummy
7097
    // dimension.
7098
0
    uint64_t DimSize = 1;
7099
7100
0
    bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7101
0
    bool IsPrevMemberReference = false;
7102
7103
    // We need to check if we will be encountering any MEs. If we do not
7104
    // encounter any ME expression it means we will be mapping the whole struct.
7105
    // In that case we need to skip adding an entry for the struct to the
7106
    // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7107
    // list only when generating all info for clauses.
7108
0
    bool IsMappingWholeStruct = true;
7109
0
    if (!GenerateAllInfoForClauses) {
7110
0
      IsMappingWholeStruct = false;
7111
0
    } else {
7112
0
      for (auto TempI = I; TempI != CE; ++TempI) {
7113
0
        const MemberExpr *PossibleME =
7114
0
            dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7115
0
        if (PossibleME) {
7116
0
          IsMappingWholeStruct = false;
7117
0
          break;
7118
0
        }
7119
0
      }
7120
0
    }
7121
7122
0
    for (; I != CE; ++I) {
7123
      // If the current component is member of a struct (parent struct) mark it.
7124
0
      if (!EncounteredME) {
7125
0
        EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7126
        // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7127
        // as MEMBER_OF the parent struct.
7128
0
        if (EncounteredME) {
7129
0
          ShouldBeMemberOf = true;
7130
          // Do not emit as complex pointer if this is actually not array-like
7131
          // expression.
7132
0
          if (FirstPointerInComplexData) {
7133
0
            QualType Ty = std::prev(I)
7134
0
                              ->getAssociatedDeclaration()
7135
0
                              ->getType()
7136
0
                              .getNonReferenceType();
7137
0
            BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7138
0
            FirstPointerInComplexData = false;
7139
0
          }
7140
0
        }
7141
0
      }
7142
7143
0
      auto Next = std::next(I);
7144
7145
      // We need to generate the addresses and sizes if this is the last
7146
      // component, if the component is a pointer or if it is an array section
7147
      // whose length can't be proved to be one. If this is a pointer, it
7148
      // becomes the base address for the following components.
7149
7150
      // A final array section, is one whose length can't be proved to be one.
7151
      // If the map item is non-contiguous then we don't treat any array section
7152
      // as final array section.
7153
0
      bool IsFinalArraySection =
7154
0
          !IsNonContiguous &&
7155
0
          isFinalArraySectionExpression(I->getAssociatedExpression());
7156
7157
      // If we have a declaration for the mapping use that, otherwise use
7158
      // the base declaration of the map clause.
7159
0
      const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7160
0
                                     ? I->getAssociatedDeclaration()
7161
0
                                     : BaseDecl;
7162
0
      MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7163
0
                                               : MapExpr;
7164
7165
      // Get information on whether the element is a pointer. Have to do a
7166
      // special treatment for array sections given that they are built-in
7167
      // types.
7168
0
      const auto *OASE =
7169
0
          dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7170
0
      const auto *OAShE =
7171
0
          dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7172
0
      const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7173
0
      const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7174
0
      bool IsPointer =
7175
0
          OAShE ||
7176
0
          (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7177
0
                       .getCanonicalType()
7178
0
                       ->isAnyPointerType()) ||
7179
0
          I->getAssociatedExpression()->getType()->isAnyPointerType();
7180
0
      bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7181
0
                               MapDecl &&
7182
0
                               MapDecl->getType()->isLValueReferenceType();
7183
0
      bool IsNonDerefPointer = IsPointer &&
7184
0
                               !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7185
0
                               !IsNonContiguous;
7186
7187
0
      if (OASE)
7188
0
        ++DimSize;
7189
7190
0
      if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7191
0
          IsFinalArraySection) {
7192
        // If this is not the last component, we expect the pointer to be
7193
        // associated with an array expression or member expression.
7194
0
        assert((Next == CE ||
7195
0
                isa<MemberExpr>(Next->getAssociatedExpression()) ||
7196
0
                isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7197
0
                isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7198
0
                isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7199
0
                isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7200
0
                isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7201
0
               "Unexpected expression");
7202
7203
0
        Address LB = Address::invalid();
7204
0
        Address LowestElem = Address::invalid();
7205
0
        auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7206
0
                                       const MemberExpr *E) {
7207
0
          const Expr *BaseExpr = E->getBase();
7208
          // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7209
          // scalar.
7210
0
          LValue BaseLV;
7211
0
          if (E->isArrow()) {
7212
0
            LValueBaseInfo BaseInfo;
7213
0
            TBAAAccessInfo TBAAInfo;
7214
0
            Address Addr =
7215
0
                CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7216
0
            QualType PtrTy = BaseExpr->getType()->getPointeeType();
7217
0
            BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7218
0
          } else {
7219
0
            BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7220
0
          }
7221
0
          return BaseLV;
7222
0
        };
7223
0
        if (OAShE) {
7224
0
          LowestElem = LB =
7225
0
              Address(CGF.EmitScalarExpr(OAShE->getBase()),
7226
0
                      CGF.ConvertTypeForMem(
7227
0
                          OAShE->getBase()->getType()->getPointeeType()),
7228
0
                      CGF.getContext().getTypeAlignInChars(
7229
0
                          OAShE->getBase()->getType()));
7230
0
        } else if (IsMemberReference) {
7231
0
          const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7232
0
          LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7233
0
          LowestElem = CGF.EmitLValueForFieldInitialization(
7234
0
                              BaseLVal, cast<FieldDecl>(MapDecl))
7235
0
                           .getAddress(CGF);
7236
0
          LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7237
0
                   .getAddress(CGF);
7238
0
        } else {
7239
0
          LowestElem = LB =
7240
0
              CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7241
0
                  .getAddress(CGF);
7242
0
        }
7243
7244
        // If this component is a pointer inside the base struct then we don't
7245
        // need to create any entry for it - it will be combined with the object
7246
        // it is pointing to into a single PTR_AND_OBJ entry.
7247
0
        bool IsMemberPointerOrAddr =
7248
0
            EncounteredME &&
7249
0
            (((IsPointer || ForDeviceAddr) &&
7250
0
              I->getAssociatedExpression() == EncounteredME) ||
7251
0
             (IsPrevMemberReference && !IsPointer) ||
7252
0
             (IsMemberReference && Next != CE &&
7253
0
              !Next->getAssociatedExpression()->getType()->isPointerType()));
7254
0
        if (!OverlappedElements.empty() && Next == CE) {
7255
          // Handle base element with the info for overlapped elements.
7256
0
          assert(!PartialStruct.Base.isValid() && "The base element is set.");
7257
0
          assert(!IsPointer &&
7258
0
                 "Unexpected base element with the pointer type.");
7259
          // Mark the whole struct as the struct that requires allocation on the
7260
          // device.
7261
0
          PartialStruct.LowestElem = {0, LowestElem};
7262
0
          CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7263
0
              I->getAssociatedExpression()->getType());
7264
0
          Address HB = CGF.Builder.CreateConstGEP(
7265
0
              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7266
0
                  LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7267
0
              TypeSize.getQuantity() - 1);
7268
0
          PartialStruct.HighestElem = {
7269
0
              std::numeric_limits<decltype(
7270
0
                  PartialStruct.HighestElem.first)>::max(),
7271
0
              HB};
7272
0
          PartialStruct.Base = BP;
7273
0
          PartialStruct.LB = LB;
7274
0
          assert(
7275
0
              PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7276
0
              "Overlapped elements must be used only once for the variable.");
7277
0
          std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7278
          // Emit data for non-overlapped data.
7279
0
          OpenMPOffloadMappingFlags Flags =
7280
0
              OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7281
0
              getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7282
0
                             /*AddPtrFlag=*/false,
7283
0
                             /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7284
0
          llvm::Value *Size = nullptr;
7285
          // Do bitcopy of all non-overlapped structure elements.
7286
0
          for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7287
0
                   Component : OverlappedElements) {
7288
0
            Address ComponentLB = Address::invalid();
7289
0
            for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7290
0
                 Component) {
7291
0
              if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7292
0
                const auto *FD = dyn_cast<FieldDecl>(VD);
7293
0
                if (FD && FD->getType()->isLValueReferenceType()) {
7294
0
                  const auto *ME =
7295
0
                      cast<MemberExpr>(MC.getAssociatedExpression());
7296
0
                  LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7297
0
                  ComponentLB =
7298
0
                      CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7299
0
                          .getAddress(CGF);
7300
0
                } else {
7301
0
                  ComponentLB =
7302
0
                      CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7303
0
                          .getAddress(CGF);
7304
0
                }
7305
0
                Size = CGF.Builder.CreatePtrDiff(
7306
0
                    CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer());
7307
0
                break;
7308
0
              }
7309
0
            }
7310
0
            assert(Size && "Failed to determine structure size");
7311
0
            CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7312
0
            CombinedInfo.BasePointers.push_back(BP.getPointer());
7313
0
            CombinedInfo.DevicePtrDecls.push_back(nullptr);
7314
0
            CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7315
0
            CombinedInfo.Pointers.push_back(LB.getPointer());
7316
0
            CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7317
0
                Size, CGF.Int64Ty, /*isSigned=*/true));
7318
0
            CombinedInfo.Types.push_back(Flags);
7319
0
            CombinedInfo.Mappers.push_back(nullptr);
7320
0
            CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7321
0
                                                                      : 1);
7322
0
            LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7323
0
          }
7324
0
          CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7325
0
          CombinedInfo.BasePointers.push_back(BP.getPointer());
7326
0
          CombinedInfo.DevicePtrDecls.push_back(nullptr);
7327
0
          CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7328
0
          CombinedInfo.Pointers.push_back(LB.getPointer());
7329
0
          Size = CGF.Builder.CreatePtrDiff(
7330
0
              CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7331
0
              LB.getPointer());
7332
0
          CombinedInfo.Sizes.push_back(
7333
0
              CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7334
0
          CombinedInfo.Types.push_back(Flags);
7335
0
          CombinedInfo.Mappers.push_back(nullptr);
7336
0
          CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7337
0
                                                                    : 1);
7338
0
          break;
7339
0
        }
7340
0
        llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7341
        // Skip adding an entry in the CurInfo of this combined entry if the
7342
        // whole struct is currently being mapped. The struct needs to be added
7343
        // in the first position before any data internal to the struct is being
7344
        // mapped.
7345
0
        if (!IsMemberPointerOrAddr ||
7346
0
            (Next == CE && MapType != OMPC_MAP_unknown)) {
7347
0
          if (!IsMappingWholeStruct) {
7348
0
            CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7349
0
            CombinedInfo.BasePointers.push_back(BP.getPointer());
7350
0
            CombinedInfo.DevicePtrDecls.push_back(nullptr);
7351
0
            CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7352
0
            CombinedInfo.Pointers.push_back(LB.getPointer());
7353
0
            CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7354
0
                Size, CGF.Int64Ty, /*isSigned=*/true));
7355
0
            CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7356
0
                                                                      : 1);
7357
0
          } else {
7358
0
            StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7359
0
            StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer());
7360
0
            StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7361
0
            StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7362
0
            StructBaseCombinedInfo.Pointers.push_back(LB.getPointer());
7363
0
            StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7364
0
                Size, CGF.Int64Ty, /*isSigned=*/true));
7365
0
            StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7366
0
                IsNonContiguous ? DimSize : 1);
7367
0
          }
7368
7369
          // If Mapper is valid, the last component inherits the mapper.
7370
0
          bool HasMapper = Mapper && Next == CE;
7371
0
          if (!IsMappingWholeStruct)
7372
0
            CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7373
0
          else
7374
0
            StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7375
0
                                                               : nullptr);
7376
7377
          // We need to add a pointer flag for each map that comes from the
7378
          // same expression except for the first one. We also need to signal
7379
          // this map is the first one that relates with the current capture
7380
          // (there is a set of entries for each capture).
7381
0
          OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7382
0
              MapType, MapModifiers, MotionModifiers, IsImplicit,
7383
0
              !IsExpressionFirstInfo || RequiresReference ||
7384
0
                  FirstPointerInComplexData || IsMemberReference,
7385
0
              IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7386
7387
0
          if (!IsExpressionFirstInfo || IsMemberReference) {
7388
            // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7389
            // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7390
0
            if (IsPointer || (IsMemberReference && Next != CE))
7391
0
              Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7392
0
                         OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7393
0
                         OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7394
0
                         OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7395
0
                         OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7396
7397
0
            if (ShouldBeMemberOf) {
7398
              // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7399
              // should be later updated with the correct value of MEMBER_OF.
7400
0
              Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7401
              // From now on, all subsequent PTR_AND_OBJ entries should not be
7402
              // marked as MEMBER_OF.
7403
0
              ShouldBeMemberOf = false;
7404
0
            }
7405
0
          }
7406
7407
0
          if (!IsMappingWholeStruct)
7408
0
            CombinedInfo.Types.push_back(Flags);
7409
0
          else
7410
0
            StructBaseCombinedInfo.Types.push_back(Flags);
7411
0
        }
7412
7413
        // If we have encountered a member expression so far, keep track of the
7414
        // mapped member. If the parent is "*this", then the value declaration
7415
        // is nullptr.
7416
0
        if (EncounteredME) {
7417
0
          const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7418
0
          unsigned FieldIndex = FD->getFieldIndex();
7419
7420
          // Update info about the lowest and highest elements for this struct
7421
0
          if (!PartialStruct.Base.isValid()) {
7422
0
            PartialStruct.LowestElem = {FieldIndex, LowestElem};
7423
0
            if (IsFinalArraySection) {
7424
0
              Address HB =
7425
0
                  CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7426
0
                      .getAddress(CGF);
7427
0
              PartialStruct.HighestElem = {FieldIndex, HB};
7428
0
            } else {
7429
0
              PartialStruct.HighestElem = {FieldIndex, LowestElem};
7430
0
            }
7431
0
            PartialStruct.Base = BP;
7432
0
            PartialStruct.LB = BP;
7433
0
          } else if (FieldIndex < PartialStruct.LowestElem.first) {
7434
0
            PartialStruct.LowestElem = {FieldIndex, LowestElem};
7435
0
          } else if (FieldIndex > PartialStruct.HighestElem.first) {
7436
0
            if (IsFinalArraySection) {
7437
0
              Address HB =
7438
0
                  CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7439
0
                      .getAddress(CGF);
7440
0
              PartialStruct.HighestElem = {FieldIndex, HB};
7441
0
            } else {
7442
0
              PartialStruct.HighestElem = {FieldIndex, LowestElem};
7443
0
            }
7444
0
          }
7445
0
        }
7446
7447
        // Need to emit combined struct for array sections.
7448
0
        if (IsFinalArraySection || IsNonContiguous)
7449
0
          PartialStruct.IsArraySection = true;
7450
7451
        // If we have a final array section, we are done with this expression.
7452
0
        if (IsFinalArraySection)
7453
0
          break;
7454
7455
        // The pointer becomes the base for the next element.
7456
0
        if (Next != CE)
7457
0
          BP = IsMemberReference ? LowestElem : LB;
7458
7459
0
        IsExpressionFirstInfo = false;
7460
0
        IsCaptureFirstInfo = false;
7461
0
        FirstPointerInComplexData = false;
7462
0
        IsPrevMemberReference = IsMemberReference;
7463
0
      } else if (FirstPointerInComplexData) {
7464
0
        QualType Ty = Components.rbegin()
7465
0
                          ->getAssociatedDeclaration()
7466
0
                          ->getType()
7467
0
                          .getNonReferenceType();
7468
0
        BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7469
0
        FirstPointerInComplexData = false;
7470
0
      }
7471
0
    }
7472
    // If ran into the whole component - allocate the space for the whole
7473
    // record.
7474
0
    if (!EncounteredME)
7475
0
      PartialStruct.HasCompleteRecord = true;
7476
7477
0
    if (!IsNonContiguous)
7478
0
      return;
7479
7480
0
    const ASTContext &Context = CGF.getContext();
7481
7482
    // For supporting stride in array section, we need to initialize the first
7483
    // dimension size as 1, first offset as 0, and first count as 1
7484
0
    MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7485
0
    MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7486
0
    MapValuesArrayTy CurStrides;
7487
0
    MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7488
0
    uint64_t ElementTypeSize;
7489
7490
    // Collect Size information for each dimension and get the element size as
7491
    // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7492
    // should be [10, 10] and the first stride is 4 btyes.
7493
0
    for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7494
0
         Components) {
7495
0
      const Expr *AssocExpr = Component.getAssociatedExpression();
7496
0
      const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7497
7498
0
      if (!OASE)
7499
0
        continue;
7500
7501
0
      QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7502
0
      auto *CAT = Context.getAsConstantArrayType(Ty);
7503
0
      auto *VAT = Context.getAsVariableArrayType(Ty);
7504
7505
      // We need all the dimension size except for the last dimension.
7506
0
      assert((VAT || CAT || &Component == &*Components.begin()) &&
7507
0
             "Should be either ConstantArray or VariableArray if not the "
7508
0
             "first Component");
7509
7510
      // Get element size if CurStrides is empty.
7511
0
      if (CurStrides.empty()) {
7512
0
        const Type *ElementType = nullptr;
7513
0
        if (CAT)
7514
0
          ElementType = CAT->getElementType().getTypePtr();
7515
0
        else if (VAT)
7516
0
          ElementType = VAT->getElementType().getTypePtr();
7517
0
        else
7518
0
          assert(&Component == &*Components.begin() &&
7519
0
                 "Only expect pointer (non CAT or VAT) when this is the "
7520
0
                 "first Component");
7521
        // If ElementType is null, then it means the base is a pointer
7522
        // (neither CAT nor VAT) and we'll attempt to get ElementType again
7523
        // for next iteration.
7524
0
        if (ElementType) {
7525
          // For the case that having pointer as base, we need to remove one
7526
          // level of indirection.
7527
0
          if (&Component != &*Components.begin())
7528
0
            ElementType = ElementType->getPointeeOrArrayElementType();
7529
0
          ElementTypeSize =
7530
0
              Context.getTypeSizeInChars(ElementType).getQuantity();
7531
0
          CurStrides.push_back(
7532
0
              llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7533
0
        }
7534
0
      }
7535
      // Get dimension value except for the last dimension since we don't need
7536
      // it.
7537
0
      if (DimSizes.size() < Components.size() - 1) {
7538
0
        if (CAT)
7539
0
          DimSizes.push_back(llvm::ConstantInt::get(
7540
0
              CGF.Int64Ty, CAT->getSize().getZExtValue()));
7541
0
        else if (VAT)
7542
0
          DimSizes.push_back(CGF.Builder.CreateIntCast(
7543
0
              CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7544
0
              /*IsSigned=*/false));
7545
0
      }
7546
0
    }
7547
7548
    // Skip the dummy dimension since we have already have its information.
7549
0
    auto *DI = DimSizes.begin() + 1;
7550
    // Product of dimension.
7551
0
    llvm::Value *DimProd =
7552
0
        llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7553
7554
    // Collect info for non-contiguous. Notice that offset, count, and stride
7555
    // are only meaningful for array-section, so we insert a null for anything
7556
    // other than array-section.
7557
    // Also, the size of offset, count, and stride are not the same as
7558
    // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7559
    // count, and stride are the same as the number of non-contiguous
7560
    // declaration in target update to/from clause.
7561
0
    for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7562
0
         Components) {
7563
0
      const Expr *AssocExpr = Component.getAssociatedExpression();
7564
7565
0
      if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7566
0
        llvm::Value *Offset = CGF.Builder.CreateIntCast(
7567
0
            CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7568
0
            /*isSigned=*/false);
7569
0
        CurOffsets.push_back(Offset);
7570
0
        CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7571
0
        CurStrides.push_back(CurStrides.back());
7572
0
        continue;
7573
0
      }
7574
7575
0
      const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7576
7577
0
      if (!OASE)
7578
0
        continue;
7579
7580
      // Offset
7581
0
      const Expr *OffsetExpr = OASE->getLowerBound();
7582
0
      llvm::Value *Offset = nullptr;
7583
0
      if (!OffsetExpr) {
7584
        // If offset is absent, then we just set it to zero.
7585
0
        Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7586
0
      } else {
7587
0
        Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7588
0
                                           CGF.Int64Ty,
7589
0
                                           /*isSigned=*/false);
7590
0
      }
7591
0
      CurOffsets.push_back(Offset);
7592
7593
      // Count
7594
0
      const Expr *CountExpr = OASE->getLength();
7595
0
      llvm::Value *Count = nullptr;
7596
0
      if (!CountExpr) {
7597
        // In Clang, once a high dimension is an array section, we construct all
7598
        // the lower dimension as array section, however, for case like
7599
        // arr[0:2][2], Clang construct the inner dimension as an array section
7600
        // but it actually is not in an array section form according to spec.
7601
0
        if (!OASE->getColonLocFirst().isValid() &&
7602
0
            !OASE->getColonLocSecond().isValid()) {
7603
0
          Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7604
0
        } else {
7605
          // OpenMP 5.0, 2.1.5 Array Sections, Description.
7606
          // When the length is absent it defaults to ⌈(size −
7607
          // lower-bound)/stride⌉, where size is the size of the array
7608
          // dimension.
7609
0
          const Expr *StrideExpr = OASE->getStride();
7610
0
          llvm::Value *Stride =
7611
0
              StrideExpr
7612
0
                  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7613
0
                                              CGF.Int64Ty, /*isSigned=*/false)
7614
0
                  : nullptr;
7615
0
          if (Stride)
7616
0
            Count = CGF.Builder.CreateUDiv(
7617
0
                CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7618
0
          else
7619
0
            Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7620
0
        }
7621
0
      } else {
7622
0
        Count = CGF.EmitScalarExpr(CountExpr);
7623
0
      }
7624
0
      Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7625
0
      CurCounts.push_back(Count);
7626
7627
      // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7628
      // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7629
      //              Offset      Count     Stride
7630
      //    D0          0           1         4    (int)    <- dummy dimension
7631
      //    D1          0           2         8    (2 * (1) * 4)
7632
      //    D2          1           2         20   (1 * (1 * 5) * 4)
7633
      //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7634
0
      const Expr *StrideExpr = OASE->getStride();
7635
0
      llvm::Value *Stride =
7636
0
          StrideExpr
7637
0
              ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7638
0
                                          CGF.Int64Ty, /*isSigned=*/false)
7639
0
              : nullptr;
7640
0
      DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7641
0
      if (Stride)
7642
0
        CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7643
0
      else
7644
0
        CurStrides.push_back(DimProd);
7645
0
      if (DI != DimSizes.end())
7646
0
        ++DI;
7647
0
    }
7648
7649
0
    CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7650
0
    CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7651
0
    CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7652
0
  }
7653
7654
  /// Return the adjusted map modifiers if the declaration a capture refers to
7655
  /// appears in a first-private clause. This is expected to be used only with
7656
  /// directives that start with 'target'.
7657
  OpenMPOffloadMappingFlags
7658
0
  getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7659
0
    assert(Cap.capturesVariable() && "Expected capture by reference only!");
7660
7661
    // A first private variable captured by reference will use only the
7662
    // 'private ptr' and 'map to' flag. Return the right flags if the captured
7663
    // declaration is known as first-private in this handler.
7664
0
    if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7665
0
      if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7666
0
        return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7667
0
               OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7668
0
      return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7669
0
             OpenMPOffloadMappingFlags::OMP_MAP_TO;
7670
0
    }
7671
0
    auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7672
0
    if (I != LambdasMap.end())
7673
      // for map(to: lambda): using user specified map type.
7674
0
      return getMapTypeBits(
7675
0
          I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7676
0
          /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7677
0
          /*AddPtrFlag=*/false,
7678
0
          /*AddIsTargetParamFlag=*/false,
7679
0
          /*isNonContiguous=*/false);
7680
0
    return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7681
0
           OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7682
0
  }
7683
7684
  void getPlainLayout(const CXXRecordDecl *RD,
7685
                      llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7686
0
                      bool AsBase) const {
7687
0
    const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7688
7689
0
    llvm::StructType *St =
7690
0
        AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7691
7692
0
    unsigned NumElements = St->getNumElements();
7693
0
    llvm::SmallVector<
7694
0
        llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7695
0
        RecordLayout(NumElements);
7696
7697
    // Fill bases.
7698
0
    for (const auto &I : RD->bases()) {
7699
0
      if (I.isVirtual())
7700
0
        continue;
7701
0
      const auto *Base = I.getType()->getAsCXXRecordDecl();
7702
      // Ignore empty bases.
7703
0
      if (Base->isEmpty() || CGF.getContext()
7704
0
                                 .getASTRecordLayout(Base)
7705
0
                                 .getNonVirtualSize()
7706
0
                                 .isZero())
7707
0
        continue;
7708
7709
0
      unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7710
0
      RecordLayout[FieldIndex] = Base;
7711
0
    }
7712
    // Fill in virtual bases.
7713
0
    for (const auto &I : RD->vbases()) {
7714
0
      const auto *Base = I.getType()->getAsCXXRecordDecl();
7715
      // Ignore empty bases.
7716
0
      if (Base->isEmpty())
7717
0
        continue;
7718
0
      unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7719
0
      if (RecordLayout[FieldIndex])
7720
0
        continue;
7721
0
      RecordLayout[FieldIndex] = Base;
7722
0
    }
7723
    // Fill in all the fields.
7724
0
    assert(!RD->isUnion() && "Unexpected union.");
7725
0
    for (const auto *Field : RD->fields()) {
7726
      // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7727
      // will fill in later.)
7728
0
      if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7729
0
        unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7730
0
        RecordLayout[FieldIndex] = Field;
7731
0
      }
7732
0
    }
7733
0
    for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7734
0
             &Data : RecordLayout) {
7735
0
      if (Data.isNull())
7736
0
        continue;
7737
0
      if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7738
0
        getPlainLayout(Base, Layout, /*AsBase=*/true);
7739
0
      else
7740
0
        Layout.push_back(Data.get<const FieldDecl *>());
7741
0
    }
7742
0
  }
7743
7744
  /// Generate all the base pointers, section pointers, sizes, map types, and
7745
  /// mappers for the extracted mappable expressions (all included in \a
7746
  /// CombinedInfo). Also, for each item that relates with a device pointer, a
7747
  /// pair of the relevant declaration and index where it occurs is appended to
7748
  /// the device pointers info array.
7749
  void generateAllInfoForClauses(
7750
      ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7751
      llvm::OpenMPIRBuilder &OMPBuilder,
7752
      const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7753
0
          llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7754
    // We have to process the component lists that relate with the same
7755
    // declaration in a single chunk so that we can generate the map flags
7756
    // correctly. Therefore, we organize all lists in a map.
7757
0
    enum MapKind { Present, Allocs, Other, Total };
7758
0
    llvm::MapVector<CanonicalDeclPtr<const Decl>,
7759
0
                    SmallVector<SmallVector<MapInfo, 8>, 4>>
7760
0
        Info;
7761
7762
    // Helper function to fill the information map for the different supported
7763
    // clauses.
7764
0
    auto &&InfoGen =
7765
0
        [&Info, &SkipVarSet](
7766
0
            const ValueDecl *D, MapKind Kind,
7767
0
            OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7768
0
            OpenMPMapClauseKind MapType,
7769
0
            ArrayRef<OpenMPMapModifierKind> MapModifiers,
7770
0
            ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7771
0
            bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7772
0
            const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7773
0
          if (SkipVarSet.contains(D))
7774
0
            return;
7775
0
          auto It = Info.find(D);
7776
0
          if (It == Info.end())
7777
0
            It = Info
7778
0
                     .insert(std::make_pair(
7779
0
                         D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7780
0
                     .first;
7781
0
          It->second[Kind].emplace_back(
7782
0
              L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7783
0
              IsImplicit, Mapper, VarRef, ForDeviceAddr);
7784
0
        };
7785
7786
0
    for (const auto *Cl : Clauses) {
7787
0
      const auto *C = dyn_cast<OMPMapClause>(Cl);
7788
0
      if (!C)
7789
0
        continue;
7790
0
      MapKind Kind = Other;
7791
0
      if (llvm::is_contained(C->getMapTypeModifiers(),
7792
0
                             OMPC_MAP_MODIFIER_present))
7793
0
        Kind = Present;
7794
0
      else if (C->getMapType() == OMPC_MAP_alloc)
7795
0
        Kind = Allocs;
7796
0
      const auto *EI = C->getVarRefs().begin();
7797
0
      for (const auto L : C->component_lists()) {
7798
0
        const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7799
0
        InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7800
0
                C->getMapTypeModifiers(), std::nullopt,
7801
0
                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7802
0
                E);
7803
0
        ++EI;
7804
0
      }
7805
0
    }
7806
0
    for (const auto *Cl : Clauses) {
7807
0
      const auto *C = dyn_cast<OMPToClause>(Cl);
7808
0
      if (!C)
7809
0
        continue;
7810
0
      MapKind Kind = Other;
7811
0
      if (llvm::is_contained(C->getMotionModifiers(),
7812
0
                             OMPC_MOTION_MODIFIER_present))
7813
0
        Kind = Present;
7814
0
      const auto *EI = C->getVarRefs().begin();
7815
0
      for (const auto L : C->component_lists()) {
7816
0
        InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7817
0
                C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7818
0
                C->isImplicit(), std::get<2>(L), *EI);
7819
0
        ++EI;
7820
0
      }
7821
0
    }
7822
0
    for (const auto *Cl : Clauses) {
7823
0
      const auto *C = dyn_cast<OMPFromClause>(Cl);
7824
0
      if (!C)
7825
0
        continue;
7826
0
      MapKind Kind = Other;
7827
0
      if (llvm::is_contained(C->getMotionModifiers(),
7828
0
                             OMPC_MOTION_MODIFIER_present))
7829
0
        Kind = Present;
7830
0
      const auto *EI = C->getVarRefs().begin();
7831
0
      for (const auto L : C->component_lists()) {
7832
0
        InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7833
0
                std::nullopt, C->getMotionModifiers(),
7834
0
                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7835
0
                *EI);
7836
0
        ++EI;
7837
0
      }
7838
0
    }
7839
7840
    // Look at the use_device_ptr and use_device_addr clauses information and
7841
    // mark the existing map entries as such. If there is no map information for
7842
    // an entry in the use_device_ptr and use_device_addr list, we create one
7843
    // with map type 'alloc' and zero size section. It is the user fault if that
7844
    // was not mapped before. If there is no map information and the pointer is
7845
    // a struct member, then we defer the emission of that entry until the whole
7846
    // struct has been processed.
7847
0
    llvm::MapVector<CanonicalDeclPtr<const Decl>,
7848
0
                    SmallVector<DeferredDevicePtrEntryTy, 4>>
7849
0
        DeferredInfo;
7850
0
    MapCombinedInfoTy UseDeviceDataCombinedInfo;
7851
7852
0
    auto &&UseDeviceDataCombinedInfoGen =
7853
0
        [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7854
0
                                     CodeGenFunction &CGF, bool IsDevAddr) {
7855
0
          UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7856
0
          UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7857
0
          UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7858
0
          UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7859
0
              IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7860
0
          UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7861
0
          UseDeviceDataCombinedInfo.Sizes.push_back(
7862
0
              llvm::Constant::getNullValue(CGF.Int64Ty));
7863
0
          UseDeviceDataCombinedInfo.Types.push_back(
7864
0
              OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7865
0
          UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7866
0
        };
7867
7868
0
    auto &&MapInfoGen =
7869
0
        [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7870
0
         &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7871
0
                   OMPClauseMappableExprCommon::MappableExprComponentListRef
7872
0
                       Components,
7873
0
                   bool IsImplicit, bool IsDevAddr) {
7874
          // We didn't find any match in our map information - generate a zero
7875
          // size array section - if the pointer is a struct member we defer
7876
          // this action until the whole struct has been processed.
7877
0
          if (isa<MemberExpr>(IE)) {
7878
            // Insert the pointer into Info to be processed by
7879
            // generateInfoForComponentList. Because it is a member pointer
7880
            // without a pointee, no entry will be generated for it, therefore
7881
            // we need to generate one after the whole struct has been
7882
            // processed. Nonetheless, generateInfoForComponentList must be
7883
            // called to take the pointer into account for the calculation of
7884
            // the range of the partial struct.
7885
0
            InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7886
0
                    std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7887
0
                    nullptr, nullptr, IsDevAddr);
7888
0
            DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7889
0
          } else {
7890
0
            llvm::Value *Ptr;
7891
0
            if (IsDevAddr) {
7892
0
              if (IE->isGLValue())
7893
0
                Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7894
0
              else
7895
0
                Ptr = CGF.EmitScalarExpr(IE);
7896
0
            } else {
7897
0
              Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7898
0
            }
7899
0
            UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7900
0
          }
7901
0
        };
7902
7903
0
    auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7904
0
                                    const Expr *IE, bool IsDevAddr) -> bool {
7905
      // We potentially have map information for this declaration already.
7906
      // Look for the first set of components that refer to it. If found,
7907
      // return true.
7908
      // If the first component is a member expression, we have to look into
7909
      // 'this', which maps to null in the map of map information. Otherwise
7910
      // look directly for the information.
7911
0
      auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7912
0
      if (It != Info.end()) {
7913
0
        bool Found = false;
7914
0
        for (auto &Data : It->second) {
7915
0
          auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7916
0
            return MI.Components.back().getAssociatedDeclaration() == VD;
7917
0
          });
7918
          // If we found a map entry, signal that the pointer has to be
7919
          // returned and move on to the next declaration. Exclude cases where
7920
          // the base pointer is mapped as array subscript, array section or
7921
          // array shaping. The base address is passed as a pointer to base in
7922
          // this case and cannot be used as a base for use_device_ptr list
7923
          // item.
7924
0
          if (CI != Data.end()) {
7925
0
            if (IsDevAddr) {
7926
0
              CI->ForDeviceAddr = IsDevAddr;
7927
0
              CI->ReturnDevicePointer = true;
7928
0
              Found = true;
7929
0
              break;
7930
0
            } else {
7931
0
              auto PrevCI = std::next(CI->Components.rbegin());
7932
0
              const auto *VarD = dyn_cast<VarDecl>(VD);
7933
0
              if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7934
0
                  isa<MemberExpr>(IE) ||
7935
0
                  !VD->getType().getNonReferenceType()->isPointerType() ||
7936
0
                  PrevCI == CI->Components.rend() ||
7937
0
                  isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7938
0
                  VarD->hasLocalStorage()) {
7939
0
                CI->ForDeviceAddr = IsDevAddr;
7940
0
                CI->ReturnDevicePointer = true;
7941
0
                Found = true;
7942
0
                break;
7943
0
              }
7944
0
            }
7945
0
          }
7946
0
        }
7947
0
        return Found;
7948
0
      }
7949
0
      return false;
7950
0
    };
7951
7952
    // Look at the use_device_ptr clause information and mark the existing map
7953
    // entries as such. If there is no map information for an entry in the
7954
    // use_device_ptr list, we create one with map type 'alloc' and zero size
7955
    // section. It is the user fault if that was not mapped before. If there is
7956
    // no map information and the pointer is a struct member, then we defer the
7957
    // emission of that entry until the whole struct has been processed.
7958
0
    for (const auto *Cl : Clauses) {
7959
0
      const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7960
0
      if (!C)
7961
0
        continue;
7962
0
      for (const auto L : C->component_lists()) {
7963
0
        OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7964
0
            std::get<1>(L);
7965
0
        assert(!Components.empty() &&
7966
0
               "Not expecting empty list of components!");
7967
0
        const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7968
0
        VD = cast<ValueDecl>(VD->getCanonicalDecl());
7969
0
        const Expr *IE = Components.back().getAssociatedExpression();
7970
0
        if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7971
0
          continue;
7972
0
        MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7973
0
                   /*IsDevAddr=*/false);
7974
0
      }
7975
0
    }
7976
7977
0
    llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7978
0
    for (const auto *Cl : Clauses) {
7979
0
      const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
7980
0
      if (!C)
7981
0
        continue;
7982
0
      for (const auto L : C->component_lists()) {
7983
0
        OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7984
0
            std::get<1>(L);
7985
0
        assert(!std::get<1>(L).empty() &&
7986
0
               "Not expecting empty list of components!");
7987
0
        const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
7988
0
        if (!Processed.insert(VD).second)
7989
0
          continue;
7990
0
        VD = cast<ValueDecl>(VD->getCanonicalDecl());
7991
0
        const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
7992
0
        if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
7993
0
          continue;
7994
0
        MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7995
0
                   /*IsDevAddr=*/true);
7996
0
      }
7997
0
    }
7998
7999
0
    for (const auto &Data : Info) {
8000
0
      StructRangeInfoTy PartialStruct;
8001
      // Current struct information:
8002
0
      MapCombinedInfoTy CurInfo;
8003
      // Current struct base information:
8004
0
      MapCombinedInfoTy StructBaseCurInfo;
8005
0
      const Decl *D = Data.first;
8006
0
      const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8007
0
      for (const auto &M : Data.second) {
8008
0
        for (const MapInfo &L : M) {
8009
0
          assert(!L.Components.empty() &&
8010
0
                 "Not expecting declaration with no component lists.");
8011
8012
          // Remember the current base pointer index.
8013
0
          unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8014
0
          unsigned StructBasePointersIdx =
8015
0
              StructBaseCurInfo.BasePointers.size();
8016
0
          CurInfo.NonContigInfo.IsNonContiguous =
8017
0
              L.Components.back().isNonContiguous();
8018
0
          generateInfoForComponentList(
8019
0
              L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8020
0
              CurInfo, StructBaseCurInfo, PartialStruct,
8021
0
              /*IsFirstComponentList=*/false, L.IsImplicit,
8022
0
              /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8023
0
              L.VarRef);
8024
8025
          // If this entry relates to a device pointer, set the relevant
8026
          // declaration and add the 'return pointer' flag.
8027
0
          if (L.ReturnDevicePointer) {
8028
            // Check whether a value was added to either CurInfo or
8029
            // StructBaseCurInfo and error if no value was added to either of
8030
            // them:
8031
0
            assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8032
0
                    StructBasePointersIdx <
8033
0
                        StructBaseCurInfo.BasePointers.size()) &&
8034
0
                   "Unexpected number of mapped base pointers.");
8035
8036
            // Choose a base pointer index which is always valid:
8037
0
            const ValueDecl *RelevantVD =
8038
0
                L.Components.back().getAssociatedDeclaration();
8039
0
            assert(RelevantVD &&
8040
0
                   "No relevant declaration related with device pointer??");
8041
8042
            // If StructBaseCurInfo has been updated this iteration then work on
8043
            // the first new entry added to it i.e. make sure that when multiple
8044
            // values are added to any of the lists, the first value added is
8045
            // being modified by the assignments below (not the last value
8046
            // added).
8047
0
            if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8048
0
              StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8049
0
                  RelevantVD;
8050
0
              StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8051
0
                  L.ForDeviceAddr ? DeviceInfoTy::Address
8052
0
                                  : DeviceInfoTy::Pointer;
8053
0
              StructBaseCurInfo.Types[StructBasePointersIdx] |=
8054
0
                  OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8055
0
            } else {
8056
0
              CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8057
0
              CurInfo.DevicePointers[CurrentBasePointersIdx] =
8058
0
                  L.ForDeviceAddr ? DeviceInfoTy::Address
8059
0
                                  : DeviceInfoTy::Pointer;
8060
0
              CurInfo.Types[CurrentBasePointersIdx] |=
8061
0
                  OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8062
0
            }
8063
0
          }
8064
0
        }
8065
0
      }
8066
8067
      // Append any pending zero-length pointers which are struct members and
8068
      // used with use_device_ptr or use_device_addr.
8069
0
      auto CI = DeferredInfo.find(Data.first);
8070
0
      if (CI != DeferredInfo.end()) {
8071
0
        for (const DeferredDevicePtrEntryTy &L : CI->second) {
8072
0
          llvm::Value *BasePtr;
8073
0
          llvm::Value *Ptr;
8074
0
          if (L.ForDeviceAddr) {
8075
0
            if (L.IE->isGLValue())
8076
0
              Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8077
0
            else
8078
0
              Ptr = this->CGF.EmitScalarExpr(L.IE);
8079
0
            BasePtr = Ptr;
8080
            // Entry is RETURN_PARAM. Also, set the placeholder value
8081
            // MEMBER_OF=FFFF so that the entry is later updated with the
8082
            // correct value of MEMBER_OF.
8083
0
            CurInfo.Types.push_back(
8084
0
                OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8085
0
                OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8086
0
          } else {
8087
0
            BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8088
0
            Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8089
0
                                             L.IE->getExprLoc());
8090
            // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8091
            // placeholder value MEMBER_OF=FFFF so that the entry is later
8092
            // updated with the correct value of MEMBER_OF.
8093
0
            CurInfo.Types.push_back(
8094
0
                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8095
0
                OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8096
0
                OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8097
0
          }
8098
0
          CurInfo.Exprs.push_back(L.VD);
8099
0
          CurInfo.BasePointers.emplace_back(BasePtr);
8100
0
          CurInfo.DevicePtrDecls.emplace_back(L.VD);
8101
0
          CurInfo.DevicePointers.emplace_back(
8102
0
              L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8103
0
          CurInfo.Pointers.push_back(Ptr);
8104
0
          CurInfo.Sizes.push_back(
8105
0
              llvm::Constant::getNullValue(this->CGF.Int64Ty));
8106
0
          CurInfo.Mappers.push_back(nullptr);
8107
0
        }
8108
0
      }
8109
8110
      // Unify entries in one list making sure the struct mapping precedes the
8111
      // individual fields:
8112
0
      MapCombinedInfoTy UnionCurInfo;
8113
0
      UnionCurInfo.append(StructBaseCurInfo);
8114
0
      UnionCurInfo.append(CurInfo);
8115
8116
      // If there is an entry in PartialStruct it means we have a struct with
8117
      // individual members mapped. Emit an extra combined entry.
8118
0
      if (PartialStruct.Base.isValid()) {
8119
0
        UnionCurInfo.NonContigInfo.Dims.push_back(0);
8120
        // Emit a combined entry:
8121
0
        emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8122
0
                          /*IsMapThis*/ !VD, OMPBuilder, VD);
8123
0
      }
8124
8125
      // We need to append the results of this capture to what we already have.
8126
0
      CombinedInfo.append(UnionCurInfo);
8127
0
    }
8128
    // Append data for use_device_ptr clauses.
8129
0
    CombinedInfo.append(UseDeviceDataCombinedInfo);
8130
0
  }
8131
8132
public:
8133
  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8134
0
      : CurDir(&Dir), CGF(CGF) {
8135
    // Extract firstprivate clause information.
8136
0
    for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8137
0
      for (const auto *D : C->varlists())
8138
0
        FirstPrivateDecls.try_emplace(
8139
0
            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8140
    // Extract implicit firstprivates from uses_allocators clauses.
8141
0
    for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8142
0
      for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8143
0
        OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8144
0
        if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8145
0
          FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8146
0
                                        /*Implicit=*/true);
8147
0
        else if (const auto *VD = dyn_cast<VarDecl>(
8148
0
                     cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8149
0
                         ->getDecl()))
8150
0
          FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8151
0
      }
8152
0
    }
8153
    // Extract device pointer clause information.
8154
0
    for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8155
0
      for (auto L : C->component_lists())
8156
0
        DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8157
    // Extract device addr clause information.
8158
0
    for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8159
0
      for (auto L : C->component_lists())
8160
0
        HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8161
    // Extract map information.
8162
0
    for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8163
0
      if (C->getMapType() != OMPC_MAP_to)
8164
0
        continue;
8165
0
      for (auto L : C->component_lists()) {
8166
0
        const ValueDecl *VD = std::get<0>(L);
8167
0
        const auto *RD = VD ? VD->getType()
8168
0
                                  .getCanonicalType()
8169
0
                                  .getNonReferenceType()
8170
0
                                  ->getAsCXXRecordDecl()
8171
0
                            : nullptr;
8172
0
        if (RD && RD->isLambda())
8173
0
          LambdasMap.try_emplace(std::get<0>(L), C);
8174
0
      }
8175
0
    }
8176
0
  }
8177
8178
  /// Constructor for the declare mapper directive.
8179
  MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8180
0
      : CurDir(&Dir), CGF(CGF) {}
8181
8182
  /// Generate code for the combined entry if we have a partially mapped struct
8183
  /// and take care of the mapping flags of the arguments corresponding to
8184
  /// individual struct members.
8185
  void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8186
                         MapFlagsArrayTy &CurTypes,
8187
                         const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8188
                         llvm::OpenMPIRBuilder &OMPBuilder,
8189
                         const ValueDecl *VD = nullptr,
8190
0
                         bool NotTargetParams = true) const {
8191
0
    if (CurTypes.size() == 1 &&
8192
0
        ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8193
0
         OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8194
0
        !PartialStruct.IsArraySection)
8195
0
      return;
8196
0
    Address LBAddr = PartialStruct.LowestElem.second;
8197
0
    Address HBAddr = PartialStruct.HighestElem.second;
8198
0
    if (PartialStruct.HasCompleteRecord) {
8199
0
      LBAddr = PartialStruct.LB;
8200
0
      HBAddr = PartialStruct.LB;
8201
0
    }
8202
0
    CombinedInfo.Exprs.push_back(VD);
8203
    // Base is the base of the struct
8204
0
    CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8205
0
    CombinedInfo.DevicePtrDecls.push_back(nullptr);
8206
0
    CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8207
    // Pointer is the address of the lowest element
8208
0
    llvm::Value *LB = LBAddr.getPointer();
8209
0
    const CXXMethodDecl *MD =
8210
0
        CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8211
0
    const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8212
0
    bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8213
    // There should not be a mapper for a combined entry.
8214
0
    if (HasBaseClass) {
8215
      // OpenMP 5.2 148:21:
8216
      // If the target construct is within a class non-static member function,
8217
      // and a variable is an accessible data member of the object for which the
8218
      // non-static data member function is invoked, the variable is treated as
8219
      // if the this[:1] expression had appeared in a map clause with a map-type
8220
      // of tofrom.
8221
      // Emit this[:1]
8222
0
      CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8223
0
      QualType Ty = MD->getFunctionObjectParameterType();
8224
0
      llvm::Value *Size =
8225
0
          CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8226
0
                                    /*isSigned=*/true);
8227
0
      CombinedInfo.Sizes.push_back(Size);
8228
0
    } else {
8229
0
      CombinedInfo.Pointers.push_back(LB);
8230
      // Size is (addr of {highest+1} element) - (addr of lowest element)
8231
0
      llvm::Value *HB = HBAddr.getPointer();
8232
0
      llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8233
0
          HBAddr.getElementType(), HB, /*Idx0=*/1);
8234
0
      llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8235
0
      llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8236
0
      llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8237
0
      llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8238
0
                                                    /*isSigned=*/false);
8239
0
      CombinedInfo.Sizes.push_back(Size);
8240
0
    }
8241
0
    CombinedInfo.Mappers.push_back(nullptr);
8242
    // Map type is always TARGET_PARAM, if generate info for captures.
8243
0
    CombinedInfo.Types.push_back(
8244
0
        NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8245
0
                        : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8246
    // If any element has the present modifier, then make sure the runtime
8247
    // doesn't attempt to allocate the struct.
8248
0
    if (CurTypes.end() !=
8249
0
        llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8250
0
          return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8251
0
              Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8252
0
        }))
8253
0
      CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8254
    // Remove TARGET_PARAM flag from the first element
8255
0
    (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8256
    // If any element has the ompx_hold modifier, then make sure the runtime
8257
    // uses the hold reference count for the struct as a whole so that it won't
8258
    // be unmapped by an extra dynamic reference count decrement.  Add it to all
8259
    // elements as well so the runtime knows which reference count to check
8260
    // when determining whether it's time for device-to-host transfers of
8261
    // individual elements.
8262
0
    if (CurTypes.end() !=
8263
0
        llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8264
0
          return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8265
0
              Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8266
0
        })) {
8267
0
      CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8268
0
      for (auto &M : CurTypes)
8269
0
        M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8270
0
    }
8271
8272
    // All other current entries will be MEMBER_OF the combined entry
8273
    // (except for PTR_AND_OBJ entries which do not have a placeholder value
8274
    // 0xFFFF in the MEMBER_OF field).
8275
0
    OpenMPOffloadMappingFlags MemberOfFlag =
8276
0
        OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8277
0
    for (auto &M : CurTypes)
8278
0
      OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8279
0
  }
8280
8281
  /// Generate all the base pointers, section pointers, sizes, map types, and
8282
  /// mappers for the extracted mappable expressions (all included in \a
8283
  /// CombinedInfo). Also, for each item that relates with a device pointer, a
8284
  /// pair of the relevant declaration and index where it occurs is appended to
8285
  /// the device pointers info array.
8286
  void generateAllInfo(
8287
      MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8288
      const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8289
0
          llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8290
0
    assert(CurDir.is<const OMPExecutableDirective *>() &&
8291
0
           "Expect a executable directive");
8292
0
    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8293
0
    generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8294
0
                              SkipVarSet);
8295
0
  }
8296
8297
  /// Generate all the base pointers, section pointers, sizes, map types, and
8298
  /// mappers for the extracted map clauses of user-defined mapper (all included
8299
  /// in \a CombinedInfo).
8300
  void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8301
0
                                llvm::OpenMPIRBuilder &OMPBuilder) const {
8302
0
    assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8303
0
           "Expect a declare mapper directive");
8304
0
    const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8305
0
    generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8306
0
                              OMPBuilder);
8307
0
  }
8308
8309
  /// Emit capture info for lambdas for variables captured by reference.
8310
  void generateInfoForLambdaCaptures(
8311
      const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8312
0
      llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8313
0
    QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8314
0
    const auto *RD = VDType->getAsCXXRecordDecl();
8315
0
    if (!RD || !RD->isLambda())
8316
0
      return;
8317
0
    Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8318
0
                   CGF.getContext().getDeclAlign(VD));
8319
0
    LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8320
0
    llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8321
0
    FieldDecl *ThisCapture = nullptr;
8322
0
    RD->getCaptureFields(Captures, ThisCapture);
8323
0
    if (ThisCapture) {
8324
0
      LValue ThisLVal =
8325
0
          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8326
0
      LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8327
0
      LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8328
0
                                 VDLVal.getPointer(CGF));
8329
0
      CombinedInfo.Exprs.push_back(VD);
8330
0
      CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8331
0
      CombinedInfo.DevicePtrDecls.push_back(nullptr);
8332
0
      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8333
0
      CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8334
0
      CombinedInfo.Sizes.push_back(
8335
0
          CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8336
0
                                    CGF.Int64Ty, /*isSigned=*/true));
8337
0
      CombinedInfo.Types.push_back(
8338
0
          OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8339
0
          OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8340
0
          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8341
0
          OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8342
0
      CombinedInfo.Mappers.push_back(nullptr);
8343
0
    }
8344
0
    for (const LambdaCapture &LC : RD->captures()) {
8345
0
      if (!LC.capturesVariable())
8346
0
        continue;
8347
0
      const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8348
0
      if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8349
0
        continue;
8350
0
      auto It = Captures.find(VD);
8351
0
      assert(It != Captures.end() && "Found lambda capture without field.");
8352
0
      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8353
0
      if (LC.getCaptureKind() == LCK_ByRef) {
8354
0
        LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8355
0
        LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8356
0
                                   VDLVal.getPointer(CGF));
8357
0
        CombinedInfo.Exprs.push_back(VD);
8358
0
        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8359
0
        CombinedInfo.DevicePtrDecls.push_back(nullptr);
8360
0
        CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8361
0
        CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8362
0
        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8363
0
            CGF.getTypeSize(
8364
0
                VD->getType().getCanonicalType().getNonReferenceType()),
8365
0
            CGF.Int64Ty, /*isSigned=*/true));
8366
0
      } else {
8367
0
        RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8368
0
        LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8369
0
                                   VDLVal.getPointer(CGF));
8370
0
        CombinedInfo.Exprs.push_back(VD);
8371
0
        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8372
0
        CombinedInfo.DevicePtrDecls.push_back(nullptr);
8373
0
        CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8374
0
        CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8375
0
        CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8376
0
      }
8377
0
      CombinedInfo.Types.push_back(
8378
0
          OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8379
0
          OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8380
0
          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8381
0
          OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8382
0
      CombinedInfo.Mappers.push_back(nullptr);
8383
0
    }
8384
0
  }
8385
8386
  /// Set correct indices for lambdas captures.
8387
  void adjustMemberOfForLambdaCaptures(
8388
      llvm::OpenMPIRBuilder &OMPBuilder,
8389
      const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8390
      MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8391
0
      MapFlagsArrayTy &Types) const {
8392
0
    for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8393
      // Set correct member_of idx for all implicit lambda captures.
8394
0
      if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8395
0
                       OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8396
0
                       OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8397
0
                       OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8398
0
        continue;
8399
0
      llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8400
0
      assert(BasePtr && "Unable to find base lambda address.");
8401
0
      int TgtIdx = -1;
8402
0
      for (unsigned J = I; J > 0; --J) {
8403
0
        unsigned Idx = J - 1;
8404
0
        if (Pointers[Idx] != BasePtr)
8405
0
          continue;
8406
0
        TgtIdx = Idx;
8407
0
        break;
8408
0
      }
8409
0
      assert(TgtIdx != -1 && "Unable to find parent lambda.");
8410
      // All other current entries will be MEMBER_OF the combined entry
8411
      // (except for PTR_AND_OBJ entries which do not have a placeholder value
8412
      // 0xFFFF in the MEMBER_OF field).
8413
0
      OpenMPOffloadMappingFlags MemberOfFlag =
8414
0
          OMPBuilder.getMemberOfFlag(TgtIdx);
8415
0
      OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8416
0
    }
8417
0
  }
8418
8419
  /// Generate the base pointers, section pointers, sizes, map types, and
8420
  /// mappers associated to a given capture (all included in \a CombinedInfo).
8421
  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8422
                              llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8423
0
                              StructRangeInfoTy &PartialStruct) const {
8424
0
    assert(!Cap->capturesVariableArrayType() &&
8425
0
           "Not expecting to generate map info for a variable array type!");
8426
8427
    // We need to know when we generating information for the first component
8428
0
    const ValueDecl *VD = Cap->capturesThis()
8429
0
                              ? nullptr
8430
0
                              : Cap->getCapturedVar()->getCanonicalDecl();
8431
8432
    // for map(to: lambda): skip here, processing it in
8433
    // generateDefaultMapInfo
8434
0
    if (LambdasMap.count(VD))
8435
0
      return;
8436
8437
    // If this declaration appears in a is_device_ptr clause we just have to
8438
    // pass the pointer by value. If it is a reference to a declaration, we just
8439
    // pass its value.
8440
0
    if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8441
0
      CombinedInfo.Exprs.push_back(VD);
8442
0
      CombinedInfo.BasePointers.emplace_back(Arg);
8443
0
      CombinedInfo.DevicePtrDecls.emplace_back(VD);
8444
0
      CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8445
0
      CombinedInfo.Pointers.push_back(Arg);
8446
0
      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8447
0
          CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8448
0
          /*isSigned=*/true));
8449
0
      CombinedInfo.Types.push_back(
8450
0
          OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8451
0
          OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8452
0
      CombinedInfo.Mappers.push_back(nullptr);
8453
0
      return;
8454
0
    }
8455
8456
0
    using MapData =
8457
0
        std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8458
0
                   OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8459
0
                   const ValueDecl *, const Expr *>;
8460
0
    SmallVector<MapData, 4> DeclComponentLists;
8461
    // For member fields list in is_device_ptr, store it in
8462
    // DeclComponentLists for generating components info.
8463
0
    static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8464
0
    auto It = DevPointersMap.find(VD);
8465
0
    if (It != DevPointersMap.end())
8466
0
      for (const auto &MCL : It->second)
8467
0
        DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8468
0
                                        /*IsImpicit = */ true, nullptr,
8469
0
                                        nullptr);
8470
0
    auto I = HasDevAddrsMap.find(VD);
8471
0
    if (I != HasDevAddrsMap.end())
8472
0
      for (const auto &MCL : I->second)
8473
0
        DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8474
0
                                        /*IsImpicit = */ true, nullptr,
8475
0
                                        nullptr);
8476
0
    assert(CurDir.is<const OMPExecutableDirective *>() &&
8477
0
           "Expect a executable directive");
8478
0
    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8479
0
    for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8480
0
      const auto *EI = C->getVarRefs().begin();
8481
0
      for (const auto L : C->decl_component_lists(VD)) {
8482
0
        const ValueDecl *VDecl, *Mapper;
8483
        // The Expression is not correct if the mapping is implicit
8484
0
        const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8485
0
        OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8486
0
        std::tie(VDecl, Components, Mapper) = L;
8487
0
        assert(VDecl == VD && "We got information for the wrong declaration??");
8488
0
        assert(!Components.empty() &&
8489
0
               "Not expecting declaration with no component lists.");
8490
0
        DeclComponentLists.emplace_back(Components, C->getMapType(),
8491
0
                                        C->getMapTypeModifiers(),
8492
0
                                        C->isImplicit(), Mapper, E);
8493
0
        ++EI;
8494
0
      }
8495
0
    }
8496
0
    llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8497
0
                                             const MapData &RHS) {
8498
0
      ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8499
0
      OpenMPMapClauseKind MapType = std::get<1>(RHS);
8500
0
      bool HasPresent =
8501
0
          llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8502
0
      bool HasAllocs = MapType == OMPC_MAP_alloc;
8503
0
      MapModifiers = std::get<2>(RHS);
8504
0
      MapType = std::get<1>(LHS);
8505
0
      bool HasPresentR =
8506
0
          llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8507
0
      bool HasAllocsR = MapType == OMPC_MAP_alloc;
8508
0
      return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8509
0
    });
8510
8511
    // Find overlapping elements (including the offset from the base element).
8512
0
    llvm::SmallDenseMap<
8513
0
        const MapData *,
8514
0
        llvm::SmallVector<
8515
0
            OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8516
0
        4>
8517
0
        OverlappedData;
8518
0
    size_t Count = 0;
8519
0
    for (const MapData &L : DeclComponentLists) {
8520
0
      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8521
0
      OpenMPMapClauseKind MapType;
8522
0
      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8523
0
      bool IsImplicit;
8524
0
      const ValueDecl *Mapper;
8525
0
      const Expr *VarRef;
8526
0
      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8527
0
          L;
8528
0
      ++Count;
8529
0
      for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8530
0
        OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8531
0
        std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8532
0
                 VarRef) = L1;
8533
0
        auto CI = Components.rbegin();
8534
0
        auto CE = Components.rend();
8535
0
        auto SI = Components1.rbegin();
8536
0
        auto SE = Components1.rend();
8537
0
        for (; CI != CE && SI != SE; ++CI, ++SI) {
8538
0
          if (CI->getAssociatedExpression()->getStmtClass() !=
8539
0
              SI->getAssociatedExpression()->getStmtClass())
8540
0
            break;
8541
          // Are we dealing with different variables/fields?
8542
0
          if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8543
0
            break;
8544
0
        }
8545
        // Found overlapping if, at least for one component, reached the head
8546
        // of the components list.
8547
0
        if (CI == CE || SI == SE) {
8548
          // Ignore it if it is the same component.
8549
0
          if (CI == CE && SI == SE)
8550
0
            continue;
8551
0
          const auto It = (SI == SE) ? CI : SI;
8552
          // If one component is a pointer and another one is a kind of
8553
          // dereference of this pointer (array subscript, section, dereference,
8554
          // etc.), it is not an overlapping.
8555
          // Same, if one component is a base and another component is a
8556
          // dereferenced pointer memberexpr with the same base.
8557
0
          if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8558
0
              (std::prev(It)->getAssociatedDeclaration() &&
8559
0
               std::prev(It)
8560
0
                   ->getAssociatedDeclaration()
8561
0
                   ->getType()
8562
0
                   ->isPointerType()) ||
8563
0
              (It->getAssociatedDeclaration() &&
8564
0
               It->getAssociatedDeclaration()->getType()->isPointerType() &&
8565
0
               std::next(It) != CE && std::next(It) != SE))
8566
0
            continue;
8567
0
          const MapData &BaseData = CI == CE ? L : L1;
8568
0
          OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8569
0
              SI == SE ? Components : Components1;
8570
0
          auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8571
0
          OverlappedElements.getSecond().push_back(SubData);
8572
0
        }
8573
0
      }
8574
0
    }
8575
    // Sort the overlapped elements for each item.
8576
0
    llvm::SmallVector<const FieldDecl *, 4> Layout;
8577
0
    if (!OverlappedData.empty()) {
8578
0
      const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8579
0
      const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8580
0
      while (BaseType != OrigType) {
8581
0
        BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8582
0
        OrigType = BaseType->getPointeeOrArrayElementType();
8583
0
      }
8584
8585
0
      if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8586
0
        getPlainLayout(CRD, Layout, /*AsBase=*/false);
8587
0
      else {
8588
0
        const auto *RD = BaseType->getAsRecordDecl();
8589
0
        Layout.append(RD->field_begin(), RD->field_end());
8590
0
      }
8591
0
    }
8592
0
    for (auto &Pair : OverlappedData) {
8593
0
      llvm::stable_sort(
8594
0
          Pair.getSecond(),
8595
0
          [&Layout](
8596
0
              OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8597
0
              OMPClauseMappableExprCommon::MappableExprComponentListRef
8598
0
                  Second) {
8599
0
            auto CI = First.rbegin();
8600
0
            auto CE = First.rend();
8601
0
            auto SI = Second.rbegin();
8602
0
            auto SE = Second.rend();
8603
0
            for (; CI != CE && SI != SE; ++CI, ++SI) {
8604
0
              if (CI->getAssociatedExpression()->getStmtClass() !=
8605
0
                  SI->getAssociatedExpression()->getStmtClass())
8606
0
                break;
8607
              // Are we dealing with different variables/fields?
8608
0
              if (CI->getAssociatedDeclaration() !=
8609
0
                  SI->getAssociatedDeclaration())
8610
0
                break;
8611
0
            }
8612
8613
            // Lists contain the same elements.
8614
0
            if (CI == CE && SI == SE)
8615
0
              return false;
8616
8617
            // List with less elements is less than list with more elements.
8618
0
            if (CI == CE || SI == SE)
8619
0
              return CI == CE;
8620
8621
0
            const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8622
0
            const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8623
0
            if (FD1->getParent() == FD2->getParent())
8624
0
              return FD1->getFieldIndex() < FD2->getFieldIndex();
8625
0
            const auto *It =
8626
0
                llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8627
0
                  return FD == FD1 || FD == FD2;
8628
0
                });
8629
0
            return *It == FD1;
8630
0
          });
8631
0
    }
8632
8633
    // Associated with a capture, because the mapping flags depend on it.
8634
    // Go through all of the elements with the overlapped elements.
8635
0
    bool IsFirstComponentList = true;
8636
0
    MapCombinedInfoTy StructBaseCombinedInfo;
8637
0
    for (const auto &Pair : OverlappedData) {
8638
0
      const MapData &L = *Pair.getFirst();
8639
0
      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8640
0
      OpenMPMapClauseKind MapType;
8641
0
      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8642
0
      bool IsImplicit;
8643
0
      const ValueDecl *Mapper;
8644
0
      const Expr *VarRef;
8645
0
      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8646
0
          L;
8647
0
      ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8648
0
          OverlappedComponents = Pair.getSecond();
8649
0
      generateInfoForComponentList(
8650
0
          MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8651
0
          StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8652
0
          IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8653
0
          /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8654
0
      IsFirstComponentList = false;
8655
0
    }
8656
    // Go through other elements without overlapped elements.
8657
0
    for (const MapData &L : DeclComponentLists) {
8658
0
      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8659
0
      OpenMPMapClauseKind MapType;
8660
0
      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8661
0
      bool IsImplicit;
8662
0
      const ValueDecl *Mapper;
8663
0
      const Expr *VarRef;
8664
0
      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8665
0
          L;
8666
0
      auto It = OverlappedData.find(&L);
8667
0
      if (It == OverlappedData.end())
8668
0
        generateInfoForComponentList(
8669
0
            MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8670
0
            StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8671
0
            IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8672
0
            /*ForDeviceAddr=*/false, VD, VarRef);
8673
0
      IsFirstComponentList = false;
8674
0
    }
8675
0
  }
8676
8677
  /// Generate the default map information for a given capture \a CI,
8678
  /// record field declaration \a RI and captured value \a CV.
8679
  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8680
                              const FieldDecl &RI, llvm::Value *CV,
8681
0
                              MapCombinedInfoTy &CombinedInfo) const {
8682
0
    bool IsImplicit = true;
8683
    // Do the default mapping.
8684
0
    if (CI.capturesThis()) {
8685
0
      CombinedInfo.Exprs.push_back(nullptr);
8686
0
      CombinedInfo.BasePointers.push_back(CV);
8687
0
      CombinedInfo.DevicePtrDecls.push_back(nullptr);
8688
0
      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8689
0
      CombinedInfo.Pointers.push_back(CV);
8690
0
      const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8691
0
      CombinedInfo.Sizes.push_back(
8692
0
          CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8693
0
                                    CGF.Int64Ty, /*isSigned=*/true));
8694
      // Default map type.
8695
0
      CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8696
0
                                   OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8697
0
    } else if (CI.capturesVariableByCopy()) {
8698
0
      const VarDecl *VD = CI.getCapturedVar();
8699
0
      CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8700
0
      CombinedInfo.BasePointers.push_back(CV);
8701
0
      CombinedInfo.DevicePtrDecls.push_back(nullptr);
8702
0
      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8703
0
      CombinedInfo.Pointers.push_back(CV);
8704
0
      if (!RI.getType()->isAnyPointerType()) {
8705
        // We have to signal to the runtime captures passed by value that are
8706
        // not pointers.
8707
0
        CombinedInfo.Types.push_back(
8708
0
            OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8709
0
        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8710
0
            CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8711
0
      } else {
8712
        // Pointers are implicitly mapped with a zero size and no flags
8713
        // (other than first map that is added for all implicit maps).
8714
0
        CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8715
0
        CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8716
0
      }
8717
0
      auto I = FirstPrivateDecls.find(VD);
8718
0
      if (I != FirstPrivateDecls.end())
8719
0
        IsImplicit = I->getSecond();
8720
0
    } else {
8721
0
      assert(CI.capturesVariable() && "Expected captured reference.");
8722
0
      const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8723
0
      QualType ElementType = PtrTy->getPointeeType();
8724
0
      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8725
0
          CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8726
      // The default map type for a scalar/complex type is 'to' because by
8727
      // default the value doesn't have to be retrieved. For an aggregate
8728
      // type, the default is 'tofrom'.
8729
0
      CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8730
0
      const VarDecl *VD = CI.getCapturedVar();
8731
0
      auto I = FirstPrivateDecls.find(VD);
8732
0
      CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8733
0
      CombinedInfo.BasePointers.push_back(CV);
8734
0
      CombinedInfo.DevicePtrDecls.push_back(nullptr);
8735
0
      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8736
0
      if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8737
0
        Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8738
0
            CV, ElementType, CGF.getContext().getDeclAlign(VD),
8739
0
            AlignmentSource::Decl));
8740
0
        CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8741
0
      } else {
8742
0
        CombinedInfo.Pointers.push_back(CV);
8743
0
      }
8744
0
      if (I != FirstPrivateDecls.end())
8745
0
        IsImplicit = I->getSecond();
8746
0
    }
8747
    // Every default map produces a single argument which is a target parameter.
8748
0
    CombinedInfo.Types.back() |=
8749
0
        OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8750
8751
    // Add flag stating this is an implicit map.
8752
0
    if (IsImplicit)
8753
0
      CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8754
8755
    // No user-defined mapper for default mapping.
8756
0
    CombinedInfo.Mappers.push_back(nullptr);
8757
0
  }
8758
};
8759
} // anonymous namespace
8760
8761
// Try to extract the base declaration from a `this->x` expression if possible.
8762
0
static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8763
0
  if (!E)
8764
0
    return nullptr;
8765
8766
0
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8767
0
    if (const MemberExpr *ME =
8768
0
            dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8769
0
      return ME->getMemberDecl();
8770
0
  return nullptr;
8771
0
}
8772
8773
/// Emit a string constant containing the names of the values mapped to the
8774
/// offloading runtime library.
8775
llvm::Constant *
8776
emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8777
0
                       MappableExprsHandler::MappingExprInfo &MapExprs) {
8778
8779
0
  uint32_t SrcLocStrSize;
8780
0
  if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8781
0
    return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8782
8783
0
  SourceLocation Loc;
8784
0
  if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8785
0
    if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8786
0
      Loc = VD->getLocation();
8787
0
    else
8788
0
      Loc = MapExprs.getMapExpr()->getExprLoc();
8789
0
  } else {
8790
0
    Loc = MapExprs.getMapDecl()->getLocation();
8791
0
  }
8792
8793
0
  std::string ExprName;
8794
0
  if (MapExprs.getMapExpr()) {
8795
0
    PrintingPolicy P(CGF.getContext().getLangOpts());
8796
0
    llvm::raw_string_ostream OS(ExprName);
8797
0
    MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8798
0
    OS.flush();
8799
0
  } else {
8800
0
    ExprName = MapExprs.getMapDecl()->getNameAsString();
8801
0
  }
8802
8803
0
  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8804
0
  return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8805
0
                                         PLoc.getLine(), PLoc.getColumn(),
8806
0
                                         SrcLocStrSize);
8807
0
}
8808
8809
/// Emit the arrays used to pass the captures and map information to the
8810
/// offloading runtime library. If there is no map or capture information,
8811
/// return nullptr by reference.
8812
static void emitOffloadingArrays(
8813
    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8814
    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8815
0
    bool IsNonContiguous = false) {
8816
0
  CodeGenModule &CGM = CGF.CGM;
8817
8818
  // Reset the array information.
8819
0
  Info.clearArrayInfo();
8820
0
  Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8821
8822
0
  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8823
0
  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8824
0
                         CGF.AllocaInsertPt->getIterator());
8825
0
  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8826
0
                          CGF.Builder.GetInsertPoint());
8827
8828
0
  auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8829
0
    return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8830
0
  };
8831
0
  if (CGM.getCodeGenOpts().getDebugInfo() !=
8832
0
      llvm::codegenoptions::NoDebugInfo) {
8833
0
    CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8834
0
    llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8835
0
                    FillInfoMap);
8836
0
  }
8837
8838
0
  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8839
0
    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8840
0
      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8841
0
    }
8842
0
  };
8843
8844
0
  auto CustomMapperCB = [&](unsigned int I) {
8845
0
    llvm::Value *MFunc = nullptr;
8846
0
    if (CombinedInfo.Mappers[I]) {
8847
0
      Info.HasMapper = true;
8848
0
      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8849
0
          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8850
0
    }
8851
0
    return MFunc;
8852
0
  };
8853
0
  OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8854
0
                                  /*IsNonContiguous=*/true, DeviceAddrCB,
8855
0
                                  CustomMapperCB);
8856
0
}
8857
8858
/// Check for inner distribute directive.
8859
static const OMPExecutableDirective *
8860
0
getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8861
0
  const auto *CS = D.getInnermostCapturedStmt();
8862
0
  const auto *Body =
8863
0
      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8864
0
  const Stmt *ChildStmt =
8865
0
      CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8866
8867
0
  if (const auto *NestedDir =
8868
0
          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8869
0
    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8870
0
    switch (D.getDirectiveKind()) {
8871
0
    case OMPD_target:
8872
      // For now, just treat 'target teams loop' as if it's distributed.
8873
0
      if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8874
0
        return NestedDir;
8875
0
      if (DKind == OMPD_teams) {
8876
0
        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8877
0
            /*IgnoreCaptured=*/true);
8878
0
        if (!Body)
8879
0
          return nullptr;
8880
0
        ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8881
0
        if (const auto *NND =
8882
0
                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8883
0
          DKind = NND->getDirectiveKind();
8884
0
          if (isOpenMPDistributeDirective(DKind))
8885
0
            return NND;
8886
0
        }
8887
0
      }
8888
0
      return nullptr;
8889
0
    case OMPD_target_teams:
8890
0
      if (isOpenMPDistributeDirective(DKind))
8891
0
        return NestedDir;
8892
0
      return nullptr;
8893
0
    case OMPD_target_parallel:
8894
0
    case OMPD_target_simd:
8895
0
    case OMPD_target_parallel_for:
8896
0
    case OMPD_target_parallel_for_simd:
8897
0
      return nullptr;
8898
0
    case OMPD_target_teams_distribute:
8899
0
    case OMPD_target_teams_distribute_simd:
8900
0
    case OMPD_target_teams_distribute_parallel_for:
8901
0
    case OMPD_target_teams_distribute_parallel_for_simd:
8902
0
    case OMPD_parallel:
8903
0
    case OMPD_for:
8904
0
    case OMPD_parallel_for:
8905
0
    case OMPD_parallel_master:
8906
0
    case OMPD_parallel_sections:
8907
0
    case OMPD_for_simd:
8908
0
    case OMPD_parallel_for_simd:
8909
0
    case OMPD_cancel:
8910
0
    case OMPD_cancellation_point:
8911
0
    case OMPD_ordered:
8912
0
    case OMPD_threadprivate:
8913
0
    case OMPD_allocate:
8914
0
    case OMPD_task:
8915
0
    case OMPD_simd:
8916
0
    case OMPD_tile:
8917
0
    case OMPD_unroll:
8918
0
    case OMPD_sections:
8919
0
    case OMPD_section:
8920
0
    case OMPD_single:
8921
0
    case OMPD_master:
8922
0
    case OMPD_critical:
8923
0
    case OMPD_taskyield:
8924
0
    case OMPD_barrier:
8925
0
    case OMPD_taskwait:
8926
0
    case OMPD_taskgroup:
8927
0
    case OMPD_atomic:
8928
0
    case OMPD_flush:
8929
0
    case OMPD_depobj:
8930
0
    case OMPD_scan:
8931
0
    case OMPD_teams:
8932
0
    case OMPD_target_data:
8933
0
    case OMPD_target_exit_data:
8934
0
    case OMPD_target_enter_data:
8935
0
    case OMPD_distribute:
8936
0
    case OMPD_distribute_simd:
8937
0
    case OMPD_distribute_parallel_for:
8938
0
    case OMPD_distribute_parallel_for_simd:
8939
0
    case OMPD_teams_distribute:
8940
0
    case OMPD_teams_distribute_simd:
8941
0
    case OMPD_teams_distribute_parallel_for:
8942
0
    case OMPD_teams_distribute_parallel_for_simd:
8943
0
    case OMPD_target_update:
8944
0
    case OMPD_declare_simd:
8945
0
    case OMPD_declare_variant:
8946
0
    case OMPD_begin_declare_variant:
8947
0
    case OMPD_end_declare_variant:
8948
0
    case OMPD_declare_target:
8949
0
    case OMPD_end_declare_target:
8950
0
    case OMPD_declare_reduction:
8951
0
    case OMPD_declare_mapper:
8952
0
    case OMPD_taskloop:
8953
0
    case OMPD_taskloop_simd:
8954
0
    case OMPD_master_taskloop:
8955
0
    case OMPD_master_taskloop_simd:
8956
0
    case OMPD_parallel_master_taskloop:
8957
0
    case OMPD_parallel_master_taskloop_simd:
8958
0
    case OMPD_requires:
8959
0
    case OMPD_metadirective:
8960
0
    case OMPD_unknown:
8961
0
    default:
8962
0
      llvm_unreachable("Unexpected directive.");
8963
0
    }
8964
0
  }
8965
8966
0
  return nullptr;
8967
0
}
8968
8969
/// Emit the user-defined mapper function. The code generation follows the
8970
/// pattern in the example below.
8971
/// \code
8972
/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8973
///                                           void *base, void *begin,
8974
///                                           int64_t size, int64_t type,
8975
///                                           void *name = nullptr) {
8976
///   // Allocate space for an array section first or add a base/begin for
8977
///   // pointer dereference.
8978
///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
8979
///       !maptype.IsDelete)
8980
///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8981
///                                 size*sizeof(Ty), clearToFromMember(type));
8982
///   // Map members.
8983
///   for (unsigned i = 0; i < size; i++) {
8984
///     // For each component specified by this mapper:
8985
///     for (auto c : begin[i]->all_components) {
8986
///       if (c.hasMapper())
8987
///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8988
///                       c.arg_type, c.arg_name);
8989
///       else
8990
///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8991
///                                     c.arg_begin, c.arg_size, c.arg_type,
8992
///                                     c.arg_name);
8993
///     }
8994
///   }
8995
///   // Delete the array section.
8996
///   if (size > 1 && maptype.IsDelete)
8997
///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8998
///                                 size*sizeof(Ty), clearToFromMember(type));
8999
/// }
9000
/// \endcode
9001
void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9002
0
                                            CodeGenFunction *CGF) {
9003
0
  if (UDMMap.count(D) > 0)
9004
0
    return;
9005
0
  ASTContext &C = CGM.getContext();
9006
0
  QualType Ty = D->getType();
9007
0
  QualType PtrTy = C.getPointerType(Ty).withRestrict();
9008
0
  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9009
0
  auto *MapperVarDecl =
9010
0
      cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9011
0
  SourceLocation Loc = D->getLocation();
9012
0
  CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9013
0
  llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9014
9015
  // Prepare mapper function arguments and attributes.
9016
0
  ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9017
0
                              C.VoidPtrTy, ImplicitParamKind::Other);
9018
0
  ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9019
0
                            ImplicitParamKind::Other);
9020
0
  ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9021
0
                             C.VoidPtrTy, ImplicitParamKind::Other);
9022
0
  ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9023
0
                            ImplicitParamKind::Other);
9024
0
  ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9025
0
                            ImplicitParamKind::Other);
9026
0
  ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9027
0
                            ImplicitParamKind::Other);
9028
0
  FunctionArgList Args;
9029
0
  Args.push_back(&HandleArg);
9030
0
  Args.push_back(&BaseArg);
9031
0
  Args.push_back(&BeginArg);
9032
0
  Args.push_back(&SizeArg);
9033
0
  Args.push_back(&TypeArg);
9034
0
  Args.push_back(&NameArg);
9035
0
  const CGFunctionInfo &FnInfo =
9036
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9037
0
  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9038
0
  SmallString<64> TyStr;
9039
0
  llvm::raw_svector_ostream Out(TyStr);
9040
0
  CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9041
0
  std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9042
0
  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9043
0
                                    Name, &CGM.getModule());
9044
0
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9045
0
  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9046
  // Start the mapper function code generation.
9047
0
  CodeGenFunction MapperCGF(CGM);
9048
0
  MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9049
  // Compute the starting and end addresses of array elements.
9050
0
  llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9051
0
      MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9052
0
      C.getPointerType(Int64Ty), Loc);
9053
  // Prepare common arguments for array initiation and deletion.
9054
0
  llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9055
0
      MapperCGF.GetAddrOfLocalVar(&HandleArg),
9056
0
      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9057
0
  llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9058
0
      MapperCGF.GetAddrOfLocalVar(&BaseArg),
9059
0
      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9060
0
  llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9061
0
      MapperCGF.GetAddrOfLocalVar(&BeginArg),
9062
0
      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9063
  // Convert the size in bytes into the number of array elements.
9064
0
  Size = MapperCGF.Builder.CreateExactUDiv(
9065
0
      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9066
0
  llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9067
0
      BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9068
0
  llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9069
0
  llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9070
0
      MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9071
0
      C.getPointerType(Int64Ty), Loc);
9072
0
  llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9073
0
      MapperCGF.GetAddrOfLocalVar(&NameArg),
9074
0
      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9075
9076
  // Emit array initiation if this is an array section and \p MapType indicates
9077
  // that memory allocation is required.
9078
0
  llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9079
0
  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9080
0
                             MapName, ElementSize, HeadBB, /*IsInit=*/true);
9081
9082
  // Emit a for loop to iterate through SizeArg of elements and map all of them.
9083
9084
  // Emit the loop header block.
9085
0
  MapperCGF.EmitBlock(HeadBB);
9086
0
  llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9087
0
  llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9088
  // Evaluate whether the initial condition is satisfied.
9089
0
  llvm::Value *IsEmpty =
9090
0
      MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9091
0
  MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9092
0
  llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9093
9094
  // Emit the loop body block.
9095
0
  MapperCGF.EmitBlock(BodyBB);
9096
0
  llvm::BasicBlock *LastBB = BodyBB;
9097
0
  llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9098
0
      PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9099
0
  PtrPHI->addIncoming(PtrBegin, EntryBB);
9100
0
  Address PtrCurrent(PtrPHI, ElemTy,
9101
0
                     MapperCGF.GetAddrOfLocalVar(&BeginArg)
9102
0
                         .getAlignment()
9103
0
                         .alignmentOfArrayElement(ElementSize));
9104
  // Privatize the declared variable of mapper to be the current array element.
9105
0
  CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9106
0
  Scope.addPrivate(MapperVarDecl, PtrCurrent);
9107
0
  (void)Scope.Privatize();
9108
9109
  // Get map clause information. Fill up the arrays with all mapped variables.
9110
0
  MappableExprsHandler::MapCombinedInfoTy Info;
9111
0
  MappableExprsHandler MEHandler(*D, MapperCGF);
9112
0
  MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9113
9114
  // Call the runtime API __tgt_mapper_num_components to get the number of
9115
  // pre-existing components.
9116
0
  llvm::Value *OffloadingArgs[] = {Handle};
9117
0
  llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9118
0
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9119
0
                                            OMPRTL___tgt_mapper_num_components),
9120
0
      OffloadingArgs);
9121
0
  llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9122
0
      PreviousSize,
9123
0
      MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9124
9125
  // Fill up the runtime mapper handle for all components.
9126
0
  for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9127
0
    llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9128
0
        Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9129
0
    llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9130
0
        Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9131
0
    llvm::Value *CurSizeArg = Info.Sizes[I];
9132
0
    llvm::Value *CurNameArg =
9133
0
        (CGM.getCodeGenOpts().getDebugInfo() ==
9134
0
         llvm::codegenoptions::NoDebugInfo)
9135
0
            ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9136
0
            : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9137
9138
    // Extract the MEMBER_OF field from the map type.
9139
0
    llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9140
0
        static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9141
0
            Info.Types[I]));
9142
0
    llvm::Value *MemberMapType =
9143
0
        MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9144
9145
    // Combine the map type inherited from user-defined mapper with that
9146
    // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9147
    // bits of the \a MapType, which is the input argument of the mapper
9148
    // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9149
    // bits of MemberMapType.
9150
    // [OpenMP 5.0], 1.2.6. map-type decay.
9151
    //        | alloc |  to   | from  | tofrom | release | delete
9152
    // ----------------------------------------------------------
9153
    // alloc  | alloc | alloc | alloc | alloc  | release | delete
9154
    // to     | alloc |  to   | alloc |   to   | release | delete
9155
    // from   | alloc | alloc | from  |  from  | release | delete
9156
    // tofrom | alloc |  to   | from  | tofrom | release | delete
9157
0
    llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9158
0
        MapType,
9159
0
        MapperCGF.Builder.getInt64(
9160
0
            static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9161
0
                OpenMPOffloadMappingFlags::OMP_MAP_TO |
9162
0
                OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9163
0
    llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9164
0
    llvm::BasicBlock *AllocElseBB =
9165
0
        MapperCGF.createBasicBlock("omp.type.alloc.else");
9166
0
    llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9167
0
    llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9168
0
    llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9169
0
    llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9170
0
    llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9171
0
    MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9172
    // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9173
0
    MapperCGF.EmitBlock(AllocBB);
9174
0
    llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9175
0
        MemberMapType,
9176
0
        MapperCGF.Builder.getInt64(
9177
0
            ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9178
0
                OpenMPOffloadMappingFlags::OMP_MAP_TO |
9179
0
                OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9180
0
    MapperCGF.Builder.CreateBr(EndBB);
9181
0
    MapperCGF.EmitBlock(AllocElseBB);
9182
0
    llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9183
0
        LeftToFrom,
9184
0
        MapperCGF.Builder.getInt64(
9185
0
            static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9186
0
                OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9187
0
    MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9188
    // In case of to, clear OMP_MAP_FROM.
9189
0
    MapperCGF.EmitBlock(ToBB);
9190
0
    llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9191
0
        MemberMapType,
9192
0
        MapperCGF.Builder.getInt64(
9193
0
            ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9194
0
                OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9195
0
    MapperCGF.Builder.CreateBr(EndBB);
9196
0
    MapperCGF.EmitBlock(ToElseBB);
9197
0
    llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9198
0
        LeftToFrom,
9199
0
        MapperCGF.Builder.getInt64(
9200
0
            static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9201
0
                OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9202
0
    MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9203
    // In case of from, clear OMP_MAP_TO.
9204
0
    MapperCGF.EmitBlock(FromBB);
9205
0
    llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9206
0
        MemberMapType,
9207
0
        MapperCGF.Builder.getInt64(
9208
0
            ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9209
0
                OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9210
    // In case of tofrom, do nothing.
9211
0
    MapperCGF.EmitBlock(EndBB);
9212
0
    LastBB = EndBB;
9213
0
    llvm::PHINode *CurMapType =
9214
0
        MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9215
0
    CurMapType->addIncoming(AllocMapType, AllocBB);
9216
0
    CurMapType->addIncoming(ToMapType, ToBB);
9217
0
    CurMapType->addIncoming(FromMapType, FromBB);
9218
0
    CurMapType->addIncoming(MemberMapType, ToElseBB);
9219
9220
0
    llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9221
0
                                     CurSizeArg, CurMapType, CurNameArg};
9222
0
    if (Info.Mappers[I]) {
9223
      // Call the corresponding mapper function.
9224
0
      llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9225
0
          cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9226
0
      assert(MapperFunc && "Expect a valid mapper function is available.");
9227
0
      MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9228
0
    } else {
9229
      // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9230
      // data structure.
9231
0
      MapperCGF.EmitRuntimeCall(
9232
0
          OMPBuilder.getOrCreateRuntimeFunction(
9233
0
              CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9234
0
          OffloadingArgs);
9235
0
    }
9236
0
  }
9237
9238
  // Update the pointer to point to the next element that needs to be mapped,
9239
  // and check whether we have mapped all elements.
9240
0
  llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9241
0
      ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9242
0
  PtrPHI->addIncoming(PtrNext, LastBB);
9243
0
  llvm::Value *IsDone =
9244
0
      MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9245
0
  llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9246
0
  MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9247
9248
0
  MapperCGF.EmitBlock(ExitBB);
9249
  // Emit array deletion if this is an array section and \p MapType indicates
9250
  // that deletion is required.
9251
0
  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9252
0
                             MapName, ElementSize, DoneBB, /*IsInit=*/false);
9253
9254
  // Emit the function exit block.
9255
0
  MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9256
0
  MapperCGF.FinishFunction();
9257
0
  UDMMap.try_emplace(D, Fn);
9258
0
  if (CGF) {
9259
0
    auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9260
0
    Decls.second.push_back(D);
9261
0
  }
9262
0
}
9263
9264
/// Emit the array initialization or deletion portion for user-defined mapper
9265
/// code generation. First, it evaluates whether an array section is mapped and
9266
/// whether the \a MapType instructs to delete this section. If \a IsInit is
9267
/// true, and \a MapType indicates to not delete this array, array
9268
/// initialization code is generated. If \a IsInit is false, and \a MapType
9269
/// indicates to not this array, array deletion code is generated.
9270
void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9271
    CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9272
    llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9273
    llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9274
0
    bool IsInit) {
9275
0
  StringRef Prefix = IsInit ? ".init" : ".del";
9276
9277
  // Evaluate if this is an array section.
9278
0
  llvm::BasicBlock *BodyBB =
9279
0
      MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9280
0
  llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9281
0
      Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9282
0
  llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9283
0
      MapType,
9284
0
      MapperCGF.Builder.getInt64(
9285
0
          static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9286
0
              OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9287
0
  llvm::Value *DeleteCond;
9288
0
  llvm::Value *Cond;
9289
0
  if (IsInit) {
9290
    // base != begin?
9291
0
    llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9292
    // IsPtrAndObj?
9293
0
    llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9294
0
        MapType,
9295
0
        MapperCGF.Builder.getInt64(
9296
0
            static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9297
0
                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9298
0
    PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9299
0
    BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9300
0
    Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9301
0
    DeleteCond = MapperCGF.Builder.CreateIsNull(
9302
0
        DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9303
0
  } else {
9304
0
    Cond = IsArray;
9305
0
    DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9306
0
        DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9307
0
  }
9308
0
  Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9309
0
  MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9310
9311
0
  MapperCGF.EmitBlock(BodyBB);
9312
  // Get the array size by multiplying element size and element number (i.e., \p
9313
  // Size).
9314
0
  llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9315
0
      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9316
  // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9317
  // memory allocation/deletion purpose only.
9318
0
  llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9319
0
      MapType,
9320
0
      MapperCGF.Builder.getInt64(
9321
0
          ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9322
0
              OpenMPOffloadMappingFlags::OMP_MAP_TO |
9323
0
              OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9324
0
  MapTypeArg = MapperCGF.Builder.CreateOr(
9325
0
      MapTypeArg,
9326
0
      MapperCGF.Builder.getInt64(
9327
0
          static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9328
0
              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9329
9330
  // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9331
  // data structure.
9332
0
  llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9333
0
                                   ArraySize, MapTypeArg, MapName};
9334
0
  MapperCGF.EmitRuntimeCall(
9335
0
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9336
0
                                            OMPRTL___tgt_push_mapper_component),
9337
0
      OffloadingArgs);
9338
0
}
9339
9340
llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9341
0
    const OMPDeclareMapperDecl *D) {
9342
0
  auto I = UDMMap.find(D);
9343
0
  if (I != UDMMap.end())
9344
0
    return I->second;
9345
0
  emitUserDefinedMapper(D);
9346
0
  return UDMMap.lookup(D);
9347
0
}
9348
9349
llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9350
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
9351
    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9352
                                     const OMPLoopDirective &D)>
9353
0
        SizeEmitter) {
9354
0
  OpenMPDirectiveKind Kind = D.getDirectiveKind();
9355
0
  const OMPExecutableDirective *TD = &D;
9356
  // Get nested teams distribute kind directive, if any.
9357
0
  if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9358
0
      Kind != OMPD_target_teams_loop)
9359
0
    TD = getNestedDistributeDirective(CGM.getContext(), D);
9360
0
  if (!TD)
9361
0
    return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9362
9363
0
  const auto *LD = cast<OMPLoopDirective>(TD);
9364
0
  if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9365
0
    return NumIterations;
9366
0
  return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9367
0
}
9368
9369
static void
9370
emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9371
                       const OMPExecutableDirective &D,
9372
                       llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9373
                       bool RequiresOuterTask, const CapturedStmt &CS,
9374
0
                       bool OffloadingMandatory, CodeGenFunction &CGF) {
9375
0
  if (OffloadingMandatory) {
9376
0
    CGF.Builder.CreateUnreachable();
9377
0
  } else {
9378
0
    if (RequiresOuterTask) {
9379
0
      CapturedVars.clear();
9380
0
      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9381
0
    }
9382
0
    OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9383
0
                                         CapturedVars);
9384
0
  }
9385
0
}
9386
9387
static llvm::Value *emitDeviceID(
9388
    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9389
0
    CodeGenFunction &CGF) {
9390
  // Emit device ID if any.
9391
0
  llvm::Value *DeviceID;
9392
0
  if (Device.getPointer()) {
9393
0
    assert((Device.getInt() == OMPC_DEVICE_unknown ||
9394
0
            Device.getInt() == OMPC_DEVICE_device_num) &&
9395
0
           "Expected device_num modifier.");
9396
0
    llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9397
0
    DeviceID =
9398
0
        CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9399
0
  } else {
9400
0
    DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9401
0
  }
9402
0
  return DeviceID;
9403
0
}
9404
9405
llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9406
0
                               CodeGenFunction &CGF) {
9407
0
  llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9408
9409
0
  if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9410
0
    CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9411
0
    llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9412
0
        DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9413
0
    DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9414
0
                                             /*isSigned=*/false);
9415
0
  }
9416
0
  return DynCGroupMem;
9417
0
}
9418
9419
static void emitTargetCallKernelLaunch(
9420
    CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9421
    const OMPExecutableDirective &D,
9422
    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9423
    const CapturedStmt &CS, bool OffloadingMandatory,
9424
    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9425
    llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9426
    llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9427
    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9428
                                     const OMPLoopDirective &D)>
9429
        SizeEmitter,
9430
0
    CodeGenFunction &CGF, CodeGenModule &CGM) {
9431
0
  llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9432
9433
  // Fill up the arrays with all the captured variables.
9434
0
  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9435
9436
  // Get mappable expression information.
9437
0
  MappableExprsHandler MEHandler(D, CGF);
9438
0
  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9439
0
  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9440
9441
0
  auto RI = CS.getCapturedRecordDecl()->field_begin();
9442
0
  auto *CV = CapturedVars.begin();
9443
0
  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9444
0
                                            CE = CS.capture_end();
9445
0
       CI != CE; ++CI, ++RI, ++CV) {
9446
0
    MappableExprsHandler::MapCombinedInfoTy CurInfo;
9447
0
    MappableExprsHandler::StructRangeInfoTy PartialStruct;
9448
9449
    // VLA sizes are passed to the outlined region by copy and do not have map
9450
    // information associated.
9451
0
    if (CI->capturesVariableArrayType()) {
9452
0
      CurInfo.Exprs.push_back(nullptr);
9453
0
      CurInfo.BasePointers.push_back(*CV);
9454
0
      CurInfo.DevicePtrDecls.push_back(nullptr);
9455
0
      CurInfo.DevicePointers.push_back(
9456
0
          MappableExprsHandler::DeviceInfoTy::None);
9457
0
      CurInfo.Pointers.push_back(*CV);
9458
0
      CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9459
0
          CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9460
      // Copy to the device as an argument. No need to retrieve it.
9461
0
      CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9462
0
                              OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9463
0
                              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9464
0
      CurInfo.Mappers.push_back(nullptr);
9465
0
    } else {
9466
      // If we have any information in the map clause, we use it, otherwise we
9467
      // just do a default mapping.
9468
0
      MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9469
0
      if (!CI->capturesThis())
9470
0
        MappedVarSet.insert(CI->getCapturedVar());
9471
0
      else
9472
0
        MappedVarSet.insert(nullptr);
9473
0
      if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9474
0
        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9475
      // Generate correct mapping for variables captured by reference in
9476
      // lambdas.
9477
0
      if (CI->capturesVariable())
9478
0
        MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9479
0
                                                CurInfo, LambdaPointers);
9480
0
    }
9481
    // We expect to have at least an element of information for this capture.
9482
0
    assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9483
0
           "Non-existing map pointer for capture!");
9484
0
    assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9485
0
           CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9486
0
           CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9487
0
           CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9488
0
           "Inconsistent map information sizes!");
9489
9490
    // If there is an entry in PartialStruct it means we have a struct with
9491
    // individual members mapped. Emit an extra combined entry.
9492
0
    if (PartialStruct.Base.isValid()) {
9493
0
      CombinedInfo.append(PartialStruct.PreliminaryMapData);
9494
0
      MEHandler.emitCombinedEntry(
9495
0
          CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9496
0
          OMPBuilder, nullptr,
9497
0
          !PartialStruct.PreliminaryMapData.BasePointers.empty());
9498
0
    }
9499
9500
    // We need to append the results of this capture to what we already have.
9501
0
    CombinedInfo.append(CurInfo);
9502
0
  }
9503
  // Adjust MEMBER_OF flags for the lambdas captures.
9504
0
  MEHandler.adjustMemberOfForLambdaCaptures(
9505
0
      OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9506
0
      CombinedInfo.Pointers, CombinedInfo.Types);
9507
  // Map any list items in a map clause that were not captures because they
9508
  // weren't referenced within the construct.
9509
0
  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9510
9511
0
  CGOpenMPRuntime::TargetDataInfo Info;
9512
  // Fill up the arrays and create the arguments.
9513
0
  emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9514
0
  bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9515
0
                   llvm::codegenoptions::NoDebugInfo;
9516
0
  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9517
0
                                          EmitDebug,
9518
0
                                          /*ForEndCall=*/false);
9519
9520
0
  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9521
0
  InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9522
0
                                        CGF.VoidPtrTy, CGM.getPointerAlign());
9523
0
  InputInfo.PointersArray =
9524
0
      Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9525
0
  InputInfo.SizesArray =
9526
0
      Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9527
0
  InputInfo.MappersArray =
9528
0
      Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9529
0
  MapTypesArray = Info.RTArgs.MapTypesArray;
9530
0
  MapNamesArray = Info.RTArgs.MapNamesArray;
9531
9532
0
  auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9533
0
                    RequiresOuterTask, &CS, OffloadingMandatory, Device,
9534
0
                    OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9535
0
                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9536
0
    bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9537
9538
0
    if (IsReverseOffloading) {
9539
      // Reverse offloading is not supported, so just execute on the host.
9540
      // FIXME: This fallback solution is incorrect since it ignores the
9541
      // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9542
      // assert here and ensure SEMA emits an error.
9543
0
      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9544
0
                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
9545
0
      return;
9546
0
    }
9547
9548
0
    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9549
0
    unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9550
9551
0
    llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9552
0
    llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9553
0
    llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9554
0
    llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9555
9556
0
    auto &&EmitTargetCallFallbackCB =
9557
0
        [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9558
0
         OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9559
0
        -> llvm::OpenMPIRBuilder::InsertPointTy {
9560
0
      CGF.Builder.restoreIP(IP);
9561
0
      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9562
0
                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
9563
0
      return CGF.Builder.saveIP();
9564
0
    };
9565
9566
0
    llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9567
0
    llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9568
0
    llvm::Value *NumThreads =
9569
0
        OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9570
0
    llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9571
0
    llvm::Value *NumIterations =
9572
0
        OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9573
0
    llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9574
0
    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9575
0
        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9576
9577
0
    llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9578
0
        BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9579
0
        nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9580
9581
0
    llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9582
0
        NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9583
0
        DynCGGroupMem, HasNoWait);
9584
9585
0
    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9586
0
        CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9587
0
        DeviceID, RTLoc, AllocaIP));
9588
0
  };
9589
9590
0
  if (RequiresOuterTask)
9591
0
    CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9592
0
  else
9593
0
    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9594
0
}
9595
9596
static void
9597
emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9598
                   const OMPExecutableDirective &D,
9599
                   llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9600
                   bool RequiresOuterTask, const CapturedStmt &CS,
9601
0
                   bool OffloadingMandatory, CodeGenFunction &CGF) {
9602
9603
  // Notify that the host version must be executed.
9604
0
  auto &&ElseGen =
9605
0
      [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9606
0
       OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9607
0
        emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9608
0
                               RequiresOuterTask, CS, OffloadingMandatory, CGF);
9609
0
      };
9610
9611
0
  if (RequiresOuterTask) {
9612
0
    CodeGenFunction::OMPTargetDataInfo InputInfo;
9613
0
    CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9614
0
  } else {
9615
0
    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9616
0
  }
9617
0
}
9618
9619
void CGOpenMPRuntime::emitTargetCall(
9620
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
9621
    llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9622
    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9623
    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9624
                                     const OMPLoopDirective &D)>
9625
0
        SizeEmitter) {
9626
0
  if (!CGF.HaveInsertPoint())
9627
0
    return;
9628
9629
0
  const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9630
0
                                   CGM.getLangOpts().OpenMPOffloadMandatory;
9631
9632
0
  assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9633
9634
0
  const bool RequiresOuterTask =
9635
0
      D.hasClausesOfKind<OMPDependClause>() ||
9636
0
      D.hasClausesOfKind<OMPNowaitClause>() ||
9637
0
      D.hasClausesOfKind<OMPInReductionClause>() ||
9638
0
      (CGM.getLangOpts().OpenMP >= 51 &&
9639
0
       needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9640
0
       D.hasClausesOfKind<OMPThreadLimitClause>());
9641
0
  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9642
0
  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9643
0
  auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9644
0
                                            PrePostActionTy &) {
9645
0
    CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9646
0
  };
9647
0
  emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9648
9649
0
  CodeGenFunction::OMPTargetDataInfo InputInfo;
9650
0
  llvm::Value *MapTypesArray = nullptr;
9651
0
  llvm::Value *MapNamesArray = nullptr;
9652
9653
0
  auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9654
0
                          RequiresOuterTask, &CS, OffloadingMandatory, Device,
9655
0
                          OutlinedFnID, &InputInfo, &MapTypesArray,
9656
0
                          &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9657
0
                                                       PrePostActionTy &) {
9658
0
    emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9659
0
                               RequiresOuterTask, CS, OffloadingMandatory,
9660
0
                               Device, OutlinedFnID, InputInfo, MapTypesArray,
9661
0
                               MapNamesArray, SizeEmitter, CGF, CGM);
9662
0
  };
9663
9664
0
  auto &&TargetElseGen =
9665
0
      [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9666
0
       OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9667
0
        emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9668
0
                           CS, OffloadingMandatory, CGF);
9669
0
      };
9670
9671
  // If we have a target function ID it means that we need to support
9672
  // offloading, otherwise, just execute on the host. We need to execute on host
9673
  // regardless of the conditional in the if clause if, e.g., the user do not
9674
  // specify target triples.
9675
0
  if (OutlinedFnID) {
9676
0
    if (IfCond) {
9677
0
      emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9678
0
    } else {
9679
0
      RegionCodeGenTy ThenRCG(TargetThenGen);
9680
0
      ThenRCG(CGF);
9681
0
    }
9682
0
  } else {
9683
0
    RegionCodeGenTy ElseRCG(TargetElseGen);
9684
0
    ElseRCG(CGF);
9685
0
  }
9686
0
}
9687
9688
void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9689
0
                                                    StringRef ParentName) {
9690
0
  if (!S)
9691
0
    return;
9692
9693
  // Codegen OMP target directives that offload compute to the device.
9694
0
  bool RequiresDeviceCodegen =
9695
0
      isa<OMPExecutableDirective>(S) &&
9696
0
      isOpenMPTargetExecutionDirective(
9697
0
          cast<OMPExecutableDirective>(S)->getDirectiveKind());
9698
9699
0
  if (RequiresDeviceCodegen) {
9700
0
    const auto &E = *cast<OMPExecutableDirective>(S);
9701
9702
0
    llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9703
0
        CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9704
9705
    // Is this a target region that should not be emitted as an entry point? If
9706
    // so just signal we are done with this target region.
9707
0
    if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9708
0
      return;
9709
9710
0
    switch (E.getDirectiveKind()) {
9711
0
    case OMPD_target:
9712
0
      CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9713
0
                                                   cast<OMPTargetDirective>(E));
9714
0
      break;
9715
0
    case OMPD_target_parallel:
9716
0
      CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9717
0
          CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9718
0
      break;
9719
0
    case OMPD_target_teams:
9720
0
      CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9721
0
          CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9722
0
      break;
9723
0
    case OMPD_target_teams_distribute:
9724
0
      CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9725
0
          CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9726
0
      break;
9727
0
    case OMPD_target_teams_distribute_simd:
9728
0
      CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9729
0
          CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9730
0
      break;
9731
0
    case OMPD_target_parallel_for:
9732
0
      CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9733
0
          CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9734
0
      break;
9735
0
    case OMPD_target_parallel_for_simd:
9736
0
      CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9737
0
          CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9738
0
      break;
9739
0
    case OMPD_target_simd:
9740
0
      CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9741
0
          CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9742
0
      break;
9743
0
    case OMPD_target_teams_distribute_parallel_for:
9744
0
      CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9745
0
          CGM, ParentName,
9746
0
          cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9747
0
      break;
9748
0
    case OMPD_target_teams_distribute_parallel_for_simd:
9749
0
      CodeGenFunction::
9750
0
          EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9751
0
              CGM, ParentName,
9752
0
              cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9753
0
      break;
9754
0
    case OMPD_target_teams_loop:
9755
0
      CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9756
0
          CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9757
0
      break;
9758
0
    case OMPD_target_parallel_loop:
9759
0
      CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9760
0
          CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9761
0
      break;
9762
0
    case OMPD_parallel:
9763
0
    case OMPD_for:
9764
0
    case OMPD_parallel_for:
9765
0
    case OMPD_parallel_master:
9766
0
    case OMPD_parallel_sections:
9767
0
    case OMPD_for_simd:
9768
0
    case OMPD_parallel_for_simd:
9769
0
    case OMPD_cancel:
9770
0
    case OMPD_cancellation_point:
9771
0
    case OMPD_ordered:
9772
0
    case OMPD_threadprivate:
9773
0
    case OMPD_allocate:
9774
0
    case OMPD_task:
9775
0
    case OMPD_simd:
9776
0
    case OMPD_tile:
9777
0
    case OMPD_unroll:
9778
0
    case OMPD_sections:
9779
0
    case OMPD_section:
9780
0
    case OMPD_single:
9781
0
    case OMPD_master:
9782
0
    case OMPD_critical:
9783
0
    case OMPD_taskyield:
9784
0
    case OMPD_barrier:
9785
0
    case OMPD_taskwait:
9786
0
    case OMPD_taskgroup:
9787
0
    case OMPD_atomic:
9788
0
    case OMPD_flush:
9789
0
    case OMPD_depobj:
9790
0
    case OMPD_scan:
9791
0
    case OMPD_teams:
9792
0
    case OMPD_target_data:
9793
0
    case OMPD_target_exit_data:
9794
0
    case OMPD_target_enter_data:
9795
0
    case OMPD_distribute:
9796
0
    case OMPD_distribute_simd:
9797
0
    case OMPD_distribute_parallel_for:
9798
0
    case OMPD_distribute_parallel_for_simd:
9799
0
    case OMPD_teams_distribute:
9800
0
    case OMPD_teams_distribute_simd:
9801
0
    case OMPD_teams_distribute_parallel_for:
9802
0
    case OMPD_teams_distribute_parallel_for_simd:
9803
0
    case OMPD_target_update:
9804
0
    case OMPD_declare_simd:
9805
0
    case OMPD_declare_variant:
9806
0
    case OMPD_begin_declare_variant:
9807
0
    case OMPD_end_declare_variant:
9808
0
    case OMPD_declare_target:
9809
0
    case OMPD_end_declare_target:
9810
0
    case OMPD_declare_reduction:
9811
0
    case OMPD_declare_mapper:
9812
0
    case OMPD_taskloop:
9813
0
    case OMPD_taskloop_simd:
9814
0
    case OMPD_master_taskloop:
9815
0
    case OMPD_master_taskloop_simd:
9816
0
    case OMPD_parallel_master_taskloop:
9817
0
    case OMPD_parallel_master_taskloop_simd:
9818
0
    case OMPD_requires:
9819
0
    case OMPD_metadirective:
9820
0
    case OMPD_unknown:
9821
0
    default:
9822
0
      llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9823
0
    }
9824
0
    return;
9825
0
  }
9826
9827
0
  if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9828
0
    if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9829
0
      return;
9830
9831
0
    scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9832
0
    return;
9833
0
  }
9834
9835
  // If this is a lambda function, look into its body.
9836
0
  if (const auto *L = dyn_cast<LambdaExpr>(S))
9837
0
    S = L->getBody();
9838
9839
  // Keep looking for target regions recursively.
9840
0
  for (const Stmt *II : S->children())
9841
0
    scanForTargetRegionsFunctions(II, ParentName);
9842
0
}
9843
9844
0
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9845
0
  std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9846
0
      OMPDeclareTargetDeclAttr::getDeviceType(VD);
9847
0
  if (!DevTy)
9848
0
    return false;
9849
  // Do not emit device_type(nohost) functions for the host.
9850
0
  if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9851
0
    return true;
9852
  // Do not emit device_type(host) functions for the device.
9853
0
  if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9854
0
    return true;
9855
0
  return false;
9856
0
}
9857
9858
0
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9859
  // If emitting code for the host, we do not process FD here. Instead we do
9860
  // the normal code generation.
9861
0
  if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9862
0
    if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9863
0
      if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9864
0
                                  CGM.getLangOpts().OpenMPIsTargetDevice))
9865
0
        return true;
9866
0
    return false;
9867
0
  }
9868
9869
0
  const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9870
  // Try to detect target regions in the function.
9871
0
  if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9872
0
    StringRef Name = CGM.getMangledName(GD);
9873
0
    scanForTargetRegionsFunctions(FD->getBody(), Name);
9874
0
    if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9875
0
                                CGM.getLangOpts().OpenMPIsTargetDevice))
9876
0
      return true;
9877
0
  }
9878
9879
  // Do not to emit function if it is not marked as declare target.
9880
0
  return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9881
0
         AlreadyEmittedTargetDecls.count(VD) == 0;
9882
0
}
9883
9884
0
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9885
0
  if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9886
0
                              CGM.getLangOpts().OpenMPIsTargetDevice))
9887
0
    return true;
9888
9889
0
  if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9890
0
    return false;
9891
9892
  // Check if there are Ctors/Dtors in this declaration and look for target
9893
  // regions in it. We use the complete variant to produce the kernel name
9894
  // mangling.
9895
0
  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9896
0
  if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9897
0
    for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9898
0
      StringRef ParentName =
9899
0
          CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9900
0
      scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9901
0
    }
9902
0
    if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9903
0
      StringRef ParentName =
9904
0
          CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9905
0
      scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9906
0
    }
9907
0
  }
9908
9909
  // Do not to emit variable if it is not marked as declare target.
9910
0
  std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9911
0
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9912
0
          cast<VarDecl>(GD.getDecl()));
9913
0
  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9914
0
      ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9915
0
        *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9916
0
       HasRequiresUnifiedSharedMemory)) {
9917
0
    DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9918
0
    return true;
9919
0
  }
9920
0
  return false;
9921
0
}
9922
9923
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9924
0
                                                   llvm::Constant *Addr) {
9925
0
  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9926
0
      !CGM.getLangOpts().OpenMPIsTargetDevice)
9927
0
    return;
9928
9929
0
  std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9930
0
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9931
9932
  // If this is an 'extern' declaration we defer to the canonical definition and
9933
  // do not emit an offloading entry.
9934
0
  if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9935
0
      VD->hasExternalStorage())
9936
0
    return;
9937
9938
0
  if (!Res) {
9939
0
    if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9940
      // Register non-target variables being emitted in device code (debug info
9941
      // may cause this).
9942
0
      StringRef VarName = CGM.getMangledName(VD);
9943
0
      EmittedNonTargetVariables.try_emplace(VarName, Addr);
9944
0
    }
9945
0
    return;
9946
0
  }
9947
9948
0
  auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9949
0
  auto LinkageForVariable = [&VD, this]() {
9950
0
    return CGM.getLLVMLinkageVarDefinition(VD);
9951
0
  };
9952
9953
0
  std::vector<llvm::GlobalVariable *> GeneratedRefs;
9954
0
  OMPBuilder.registerTargetGlobalVariable(
9955
0
      convertCaptureClause(VD), convertDeviceClause(VD),
9956
0
      VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
9957
0
      VD->isExternallyVisible(),
9958
0
      getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
9959
0
                                  VD->getCanonicalDecl()->getBeginLoc()),
9960
0
      CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9961
0
      CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9962
0
      CGM.getTypes().ConvertTypeForMem(
9963
0
          CGM.getContext().getPointerType(VD->getType())),
9964
0
      Addr);
9965
9966
0
  for (auto *ref : GeneratedRefs)
9967
0
    CGM.addCompilerUsedGlobal(ref);
9968
0
}
9969
9970
0
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9971
0
  if (isa<FunctionDecl>(GD.getDecl()) ||
9972
0
      isa<OMPDeclareReductionDecl>(GD.getDecl()))
9973
0
    return emitTargetFunctions(GD);
9974
9975
0
  return emitTargetGlobalVariable(GD);
9976
0
}
9977
9978
0
void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9979
0
  for (const VarDecl *VD : DeferredGlobalVariables) {
9980
0
    std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9981
0
        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9982
0
    if (!Res)
9983
0
      continue;
9984
0
    if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9985
0
         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9986
0
        !HasRequiresUnifiedSharedMemory) {
9987
0
      CGM.EmitGlobal(VD);
9988
0
    } else {
9989
0
      assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9990
0
              ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9991
0
                *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9992
0
               HasRequiresUnifiedSharedMemory)) &&
9993
0
             "Expected link clause or to clause with unified memory.");
9994
0
      (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9995
0
    }
9996
0
  }
9997
0
}
9998
9999
void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10000
0
    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10001
0
  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10002
0
         " Expected target-based directive.");
10003
0
}
10004
10005
0
void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10006
0
  for (const OMPClause *Clause : D->clauselists()) {
10007
0
    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10008
0
      HasRequiresUnifiedSharedMemory = true;
10009
0
      OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10010
0
    } else if (const auto *AC =
10011
0
                   dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10012
0
      switch (AC->getAtomicDefaultMemOrderKind()) {
10013
0
      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10014
0
        RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10015
0
        break;
10016
0
      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10017
0
        RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10018
0
        break;
10019
0
      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10020
0
        RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10021
0
        break;
10022
0
      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10023
0
        break;
10024
0
      }
10025
0
    }
10026
0
  }
10027
0
}
10028
10029
0
llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10030
0
  return RequiresAtomicOrdering;
10031
0
}
10032
10033
bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10034
0
                                                       LangAS &AS) {
10035
0
  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10036
0
    return false;
10037
0
  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10038
0
  switch(A->getAllocatorType()) {
10039
0
  case OMPAllocateDeclAttr::OMPNullMemAlloc:
10040
0
  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10041
  // Not supported, fallback to the default mem space.
10042
0
  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10043
0
  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10044
0
  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10045
0
  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10046
0
  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10047
0
  case OMPAllocateDeclAttr::OMPConstMemAlloc:
10048
0
  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10049
0
    AS = LangAS::Default;
10050
0
    return true;
10051
0
  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10052
0
    llvm_unreachable("Expected predefined allocator for the variables with the "
10053
0
                     "static storage.");
10054
0
  }
10055
0
  return false;
10056
0
}
10057
10058
0
bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10059
0
  return HasRequiresUnifiedSharedMemory;
10060
0
}
10061
10062
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10063
    CodeGenModule &CGM)
10064
0
    : CGM(CGM) {
10065
0
  if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10066
0
    SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10067
0
    CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10068
0
  }
10069
0
}
10070
10071
0
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10072
0
  if (CGM.getLangOpts().OpenMPIsTargetDevice)
10073
0
    CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10074
0
}
10075
10076
0
bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10077
0
  if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10078
0
    return true;
10079
10080
0
  const auto *D = cast<FunctionDecl>(GD.getDecl());
10081
  // Do not to emit function if it is marked as declare target as it was already
10082
  // emitted.
10083
0
  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10084
0
    if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10085
0
      if (auto *F = dyn_cast_or_null<llvm::Function>(
10086
0
              CGM.GetGlobalValue(CGM.getMangledName(GD))))
10087
0
        return !F->isDeclaration();
10088
0
      return false;
10089
0
    }
10090
0
    return true;
10091
0
  }
10092
10093
0
  return !AlreadyEmittedTargetDecls.insert(D).second;
10094
0
}
10095
10096
0
llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10097
  // If we don't have entries or if we are emitting code for the device, we
10098
  // don't need to do anything.
10099
0
  if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10100
0
      CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice ||
10101
0
      (OMPBuilder.OffloadInfoManager.empty() &&
10102
0
       !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10103
0
    return nullptr;
10104
10105
  // Create and register the function that handles the requires directives.
10106
0
  ASTContext &C = CGM.getContext();
10107
10108
0
  llvm::Function *RequiresRegFn;
10109
0
  {
10110
0
    CodeGenFunction CGF(CGM);
10111
0
    const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10112
0
    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10113
0
    std::string ReqName = getName({"omp_offloading", "requires_reg"});
10114
0
    RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10115
0
    CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10116
    // TODO: check for other requires clauses.
10117
    // The requires directive takes effect only when a target region is
10118
    // present in the compilation unit. Otherwise it is ignored and not
10119
    // passed to the runtime. This avoids the runtime from throwing an error
10120
    // for mismatching requires clauses across compilation units that don't
10121
    // contain at least 1 target region.
10122
0
    assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10123
0
            !OMPBuilder.OffloadInfoManager.empty()) &&
10124
0
           "Target or declare target region expected.");
10125
0
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10126
0
                            CGM.getModule(), OMPRTL___tgt_register_requires),
10127
0
                        llvm::ConstantInt::get(
10128
0
                            CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags()));
10129
0
    CGF.FinishFunction();
10130
0
  }
10131
0
  return RequiresRegFn;
10132
0
}
10133
10134
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10135
                                    const OMPExecutableDirective &D,
10136
                                    SourceLocation Loc,
10137
                                    llvm::Function *OutlinedFn,
10138
0
                                    ArrayRef<llvm::Value *> CapturedVars) {
10139
0
  if (!CGF.HaveInsertPoint())
10140
0
    return;
10141
10142
0
  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10143
0
  CodeGenFunction::RunCleanupsScope Scope(CGF);
10144
10145
  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10146
0
  llvm::Value *Args[] = {
10147
0
      RTLoc,
10148
0
      CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10149
0
      CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10150
0
  llvm::SmallVector<llvm::Value *, 16> RealArgs;
10151
0
  RealArgs.append(std::begin(Args), std::end(Args));
10152
0
  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10153
10154
0
  llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10155
0
      CGM.getModule(), OMPRTL___kmpc_fork_teams);
10156
0
  CGF.EmitRuntimeCall(RTLFn, RealArgs);
10157
0
}
10158
10159
void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10160
                                         const Expr *NumTeams,
10161
                                         const Expr *ThreadLimit,
10162
0
                                         SourceLocation Loc) {
10163
0
  if (!CGF.HaveInsertPoint())
10164
0
    return;
10165
10166
0
  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10167
10168
0
  llvm::Value *NumTeamsVal =
10169
0
      NumTeams
10170
0
          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10171
0
                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
10172
0
          : CGF.Builder.getInt32(0);
10173
10174
0
  llvm::Value *ThreadLimitVal =
10175
0
      ThreadLimit
10176
0
          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10177
0
                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
10178
0
          : CGF.Builder.getInt32(0);
10179
10180
  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10181
0
  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10182
0
                                     ThreadLimitVal};
10183
0
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10184
0
                          CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10185
0
                      PushNumTeamsArgs);
10186
0
}
10187
10188
void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10189
                                            const Expr *ThreadLimit,
10190
0
                                            SourceLocation Loc) {
10191
0
  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10192
0
  llvm::Value *ThreadLimitVal =
10193
0
      ThreadLimit
10194
0
          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10195
0
                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
10196
0
          : CGF.Builder.getInt32(0);
10197
10198
  // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10199
0
  llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10200
0
                                    ThreadLimitVal};
10201
0
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10202
0
                          CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10203
0
                      ThreadLimitArgs);
10204
0
}
10205
10206
void CGOpenMPRuntime::emitTargetDataCalls(
10207
    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10208
    const Expr *Device, const RegionCodeGenTy &CodeGen,
10209
0
    CGOpenMPRuntime::TargetDataInfo &Info) {
10210
0
  if (!CGF.HaveInsertPoint())
10211
0
    return;
10212
10213
  // Action used to replace the default codegen action and turn privatization
10214
  // off.
10215
0
  PrePostActionTy NoPrivAction;
10216
10217
0
  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10218
10219
0
  llvm::Value *IfCondVal = nullptr;
10220
0
  if (IfCond)
10221
0
    IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10222
10223
  // Emit device ID if any.
10224
0
  llvm::Value *DeviceID = nullptr;
10225
0
  if (Device) {
10226
0
    DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10227
0
                                         CGF.Int64Ty, /*isSigned=*/true);
10228
0
  } else {
10229
0
    DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10230
0
  }
10231
10232
  // Fill up the arrays with all the mapped variables.
10233
0
  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10234
0
  auto GenMapInfoCB =
10235
0
      [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10236
0
    CGF.Builder.restoreIP(CodeGenIP);
10237
    // Get map clause information.
10238
0
    MappableExprsHandler MEHandler(D, CGF);
10239
0
    MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10240
10241
0
    auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10242
0
      return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10243
0
    };
10244
0
    if (CGM.getCodeGenOpts().getDebugInfo() !=
10245
0
        llvm::codegenoptions::NoDebugInfo) {
10246
0
      CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10247
0
      llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10248
0
                      FillInfoMap);
10249
0
    }
10250
10251
0
    return CombinedInfo;
10252
0
  };
10253
0
  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10254
0
  auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10255
0
    CGF.Builder.restoreIP(CodeGenIP);
10256
0
    switch (BodyGenType) {
10257
0
    case BodyGenTy::Priv:
10258
0
      if (!Info.CaptureDeviceAddrMap.empty())
10259
0
        CodeGen(CGF);
10260
0
      break;
10261
0
    case BodyGenTy::DupNoPriv:
10262
0
      if (!Info.CaptureDeviceAddrMap.empty()) {
10263
0
        CodeGen.setAction(NoPrivAction);
10264
0
        CodeGen(CGF);
10265
0
      }
10266
0
      break;
10267
0
    case BodyGenTy::NoPriv:
10268
0
      if (Info.CaptureDeviceAddrMap.empty()) {
10269
0
        CodeGen.setAction(NoPrivAction);
10270
0
        CodeGen(CGF);
10271
0
      }
10272
0
      break;
10273
0
    }
10274
0
    return InsertPointTy(CGF.Builder.GetInsertBlock(),
10275
0
                         CGF.Builder.GetInsertPoint());
10276
0
  };
10277
10278
0
  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10279
0
    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10280
0
      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10281
0
    }
10282
0
  };
10283
10284
0
  auto CustomMapperCB = [&](unsigned int I) {
10285
0
    llvm::Value *MFunc = nullptr;
10286
0
    if (CombinedInfo.Mappers[I]) {
10287
0
      Info.HasMapper = true;
10288
0
      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10289
0
          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10290
0
    }
10291
0
    return MFunc;
10292
0
  };
10293
10294
  // Source location for the ident struct
10295
0
  llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10296
10297
0
  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10298
0
                         CGF.AllocaInsertPt->getIterator());
10299
0
  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10300
0
                          CGF.Builder.GetInsertPoint());
10301
0
  llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10302
0
  CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10303
0
      OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10304
0
      /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10305
0
}
10306
10307
void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10308
    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10309
0
    const Expr *Device) {
10310
0
  if (!CGF.HaveInsertPoint())
10311
0
    return;
10312
10313
0
  assert((isa<OMPTargetEnterDataDirective>(D) ||
10314
0
          isa<OMPTargetExitDataDirective>(D) ||
10315
0
          isa<OMPTargetUpdateDirective>(D)) &&
10316
0
         "Expecting either target enter, exit data, or update directives.");
10317
10318
0
  CodeGenFunction::OMPTargetDataInfo InputInfo;
10319
0
  llvm::Value *MapTypesArray = nullptr;
10320
0
  llvm::Value *MapNamesArray = nullptr;
10321
  // Generate the code for the opening of the data environment.
10322
0
  auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10323
0
                    &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10324
    // Emit device ID if any.
10325
0
    llvm::Value *DeviceID = nullptr;
10326
0
    if (Device) {
10327
0
      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10328
0
                                           CGF.Int64Ty, /*isSigned=*/true);
10329
0
    } else {
10330
0
      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10331
0
    }
10332
10333
    // Emit the number of elements in the offloading arrays.
10334
0
    llvm::Constant *PointerNum =
10335
0
        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10336
10337
    // Source location for the ident struct
10338
0
    llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10339
10340
0
    llvm::Value *OffloadingArgs[] = {RTLoc,
10341
0
                                     DeviceID,
10342
0
                                     PointerNum,
10343
0
                                     InputInfo.BasePointersArray.getPointer(),
10344
0
                                     InputInfo.PointersArray.getPointer(),
10345
0
                                     InputInfo.SizesArray.getPointer(),
10346
0
                                     MapTypesArray,
10347
0
                                     MapNamesArray,
10348
0
                                     InputInfo.MappersArray.getPointer()};
10349
10350
    // Select the right runtime function call for each standalone
10351
    // directive.
10352
0
    const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10353
0
    RuntimeFunction RTLFn;
10354
0
    switch (D.getDirectiveKind()) {
10355
0
    case OMPD_target_enter_data:
10356
0
      RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10357
0
                        : OMPRTL___tgt_target_data_begin_mapper;
10358
0
      break;
10359
0
    case OMPD_target_exit_data:
10360
0
      RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10361
0
                        : OMPRTL___tgt_target_data_end_mapper;
10362
0
      break;
10363
0
    case OMPD_target_update:
10364
0
      RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10365
0
                        : OMPRTL___tgt_target_data_update_mapper;
10366
0
      break;
10367
0
    case OMPD_parallel:
10368
0
    case OMPD_for:
10369
0
    case OMPD_parallel_for:
10370
0
    case OMPD_parallel_master:
10371
0
    case OMPD_parallel_sections:
10372
0
    case OMPD_for_simd:
10373
0
    case OMPD_parallel_for_simd:
10374
0
    case OMPD_cancel:
10375
0
    case OMPD_cancellation_point:
10376
0
    case OMPD_ordered:
10377
0
    case OMPD_threadprivate:
10378
0
    case OMPD_allocate:
10379
0
    case OMPD_task:
10380
0
    case OMPD_simd:
10381
0
    case OMPD_tile:
10382
0
    case OMPD_unroll:
10383
0
    case OMPD_sections:
10384
0
    case OMPD_section:
10385
0
    case OMPD_single:
10386
0
    case OMPD_master:
10387
0
    case OMPD_critical:
10388
0
    case OMPD_taskyield:
10389
0
    case OMPD_barrier:
10390
0
    case OMPD_taskwait:
10391
0
    case OMPD_taskgroup:
10392
0
    case OMPD_atomic:
10393
0
    case OMPD_flush:
10394
0
    case OMPD_depobj:
10395
0
    case OMPD_scan:
10396
0
    case OMPD_teams:
10397
0
    case OMPD_target_data:
10398
0
    case OMPD_distribute:
10399
0
    case OMPD_distribute_simd:
10400
0
    case OMPD_distribute_parallel_for:
10401
0
    case OMPD_distribute_parallel_for_simd:
10402
0
    case OMPD_teams_distribute:
10403
0
    case OMPD_teams_distribute_simd:
10404
0
    case OMPD_teams_distribute_parallel_for:
10405
0
    case OMPD_teams_distribute_parallel_for_simd:
10406
0
    case OMPD_declare_simd:
10407
0
    case OMPD_declare_variant:
10408
0
    case OMPD_begin_declare_variant:
10409
0
    case OMPD_end_declare_variant:
10410
0
    case OMPD_declare_target:
10411
0
    case OMPD_end_declare_target:
10412
0
    case OMPD_declare_reduction:
10413
0
    case OMPD_declare_mapper:
10414
0
    case OMPD_taskloop:
10415
0
    case OMPD_taskloop_simd:
10416
0
    case OMPD_master_taskloop:
10417
0
    case OMPD_master_taskloop_simd:
10418
0
    case OMPD_parallel_master_taskloop:
10419
0
    case OMPD_parallel_master_taskloop_simd:
10420
0
    case OMPD_target:
10421
0
    case OMPD_target_simd:
10422
0
    case OMPD_target_teams_distribute:
10423
0
    case OMPD_target_teams_distribute_simd:
10424
0
    case OMPD_target_teams_distribute_parallel_for:
10425
0
    case OMPD_target_teams_distribute_parallel_for_simd:
10426
0
    case OMPD_target_teams:
10427
0
    case OMPD_target_parallel:
10428
0
    case OMPD_target_parallel_for:
10429
0
    case OMPD_target_parallel_for_simd:
10430
0
    case OMPD_requires:
10431
0
    case OMPD_metadirective:
10432
0
    case OMPD_unknown:
10433
0
    default:
10434
0
      llvm_unreachable("Unexpected standalone target data directive.");
10435
0
      break;
10436
0
    }
10437
0
    CGF.EmitRuntimeCall(
10438
0
        OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10439
0
        OffloadingArgs);
10440
0
  };
10441
10442
0
  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10443
0
                          &MapNamesArray](CodeGenFunction &CGF,
10444
0
                                          PrePostActionTy &) {
10445
    // Fill up the arrays with all the mapped variables.
10446
0
    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10447
10448
    // Get map clause information.
10449
0
    MappableExprsHandler MEHandler(D, CGF);
10450
0
    MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10451
10452
0
    CGOpenMPRuntime::TargetDataInfo Info;
10453
    // Fill up the arrays and create the arguments.
10454
0
    emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10455
0
                         /*IsNonContiguous=*/true);
10456
0
    bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10457
0
                             D.hasClausesOfKind<OMPNowaitClause>();
10458
0
    bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10459
0
                     llvm::codegenoptions::NoDebugInfo;
10460
0
    OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10461
0
                                            EmitDebug,
10462
0
                                            /*ForEndCall=*/false);
10463
0
    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10464
0
    InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10465
0
                                          CGF.VoidPtrTy, CGM.getPointerAlign());
10466
0
    InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10467
0
                                      CGM.getPointerAlign());
10468
0
    InputInfo.SizesArray =
10469
0
        Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10470
0
    InputInfo.MappersArray =
10471
0
        Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10472
0
    MapTypesArray = Info.RTArgs.MapTypesArray;
10473
0
    MapNamesArray = Info.RTArgs.MapNamesArray;
10474
0
    if (RequiresOuterTask)
10475
0
      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10476
0
    else
10477
0
      emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10478
0
  };
10479
10480
0
  if (IfCond) {
10481
0
    emitIfClause(CGF, IfCond, TargetThenGen,
10482
0
                 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10483
0
  } else {
10484
0
    RegionCodeGenTy ThenRCG(TargetThenGen);
10485
0
    ThenRCG(CGF);
10486
0
  }
10487
0
}
10488
10489
namespace {
10490
  /// Kind of parameter in a function with 'declare simd' directive.
10491
enum ParamKindTy {
10492
  Linear,
10493
  LinearRef,
10494
  LinearUVal,
10495
  LinearVal,
10496
  Uniform,
10497
  Vector,
10498
};
10499
/// Attribute set of the parameter.
10500
struct ParamAttrTy {
10501
  ParamKindTy Kind = Vector;
10502
  llvm::APSInt StrideOrArg;
10503
  llvm::APSInt Alignment;
10504
  bool HasVarStride = false;
10505
};
10506
} // namespace
10507
10508
static unsigned evaluateCDTSize(const FunctionDecl *FD,
10509
0
                                ArrayRef<ParamAttrTy> ParamAttrs) {
10510
  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10511
  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10512
  // of that clause. The VLEN value must be power of 2.
10513
  // In other case the notion of the function`s "characteristic data type" (CDT)
10514
  // is used to compute the vector length.
10515
  // CDT is defined in the following order:
10516
  //   a) For non-void function, the CDT is the return type.
10517
  //   b) If the function has any non-uniform, non-linear parameters, then the
10518
  //   CDT is the type of the first such parameter.
10519
  //   c) If the CDT determined by a) or b) above is struct, union, or class
10520
  //   type which is pass-by-value (except for the type that maps to the
10521
  //   built-in complex data type), the characteristic data type is int.
10522
  //   d) If none of the above three cases is applicable, the CDT is int.
10523
  // The VLEN is then determined based on the CDT and the size of vector
10524
  // register of that ISA for which current vector version is generated. The
10525
  // VLEN is computed using the formula below:
10526
  //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10527
  // where vector register size specified in section 3.2.1 Registers and the
10528
  // Stack Frame of original AMD64 ABI document.
10529
0
  QualType RetType = FD->getReturnType();
10530
0
  if (RetType.isNull())
10531
0
    return 0;
10532
0
  ASTContext &C = FD->getASTContext();
10533
0
  QualType CDT;
10534
0
  if (!RetType.isNull() && !RetType->isVoidType()) {
10535
0
    CDT = RetType;
10536
0
  } else {
10537
0
    unsigned Offset = 0;
10538
0
    if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10539
0
      if (ParamAttrs[Offset].Kind == Vector)
10540
0
        CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10541
0
      ++Offset;
10542
0
    }
10543
0
    if (CDT.isNull()) {
10544
0
      for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10545
0
        if (ParamAttrs[I + Offset].Kind == Vector) {
10546
0
          CDT = FD->getParamDecl(I)->getType();
10547
0
          break;
10548
0
        }
10549
0
      }
10550
0
    }
10551
0
  }
10552
0
  if (CDT.isNull())
10553
0
    CDT = C.IntTy;
10554
0
  CDT = CDT->getCanonicalTypeUnqualified();
10555
0
  if (CDT->isRecordType() || CDT->isUnionType())
10556
0
    CDT = C.IntTy;
10557
0
  return C.getTypeSize(CDT);
10558
0
}
10559
10560
/// Mangle the parameter part of the vector function name according to
10561
/// their OpenMP classification. The mangling function is defined in
10562
/// section 4.5 of the AAVFABI(2021Q1).
10563
0
static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10564
0
  SmallString<256> Buffer;
10565
0
  llvm::raw_svector_ostream Out(Buffer);
10566
0
  for (const auto &ParamAttr : ParamAttrs) {
10567
0
    switch (ParamAttr.Kind) {
10568
0
    case Linear:
10569
0
      Out << 'l';
10570
0
      break;
10571
0
    case LinearRef:
10572
0
      Out << 'R';
10573
0
      break;
10574
0
    case LinearUVal:
10575
0
      Out << 'U';
10576
0
      break;
10577
0
    case LinearVal:
10578
0
      Out << 'L';
10579
0
      break;
10580
0
    case Uniform:
10581
0
      Out << 'u';
10582
0
      break;
10583
0
    case Vector:
10584
0
      Out << 'v';
10585
0
      break;
10586
0
    }
10587
0
    if (ParamAttr.HasVarStride)
10588
0
      Out << "s" << ParamAttr.StrideOrArg;
10589
0
    else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10590
0
             ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10591
      // Don't print the step value if it is not present or if it is
10592
      // equal to 1.
10593
0
      if (ParamAttr.StrideOrArg < 0)
10594
0
        Out << 'n' << -ParamAttr.StrideOrArg;
10595
0
      else if (ParamAttr.StrideOrArg != 1)
10596
0
        Out << ParamAttr.StrideOrArg;
10597
0
    }
10598
10599
0
    if (!!ParamAttr.Alignment)
10600
0
      Out << 'a' << ParamAttr.Alignment;
10601
0
  }
10602
10603
0
  return std::string(Out.str());
10604
0
}
10605
10606
static void
10607
emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10608
                           const llvm::APSInt &VLENVal,
10609
                           ArrayRef<ParamAttrTy> ParamAttrs,
10610
0
                           OMPDeclareSimdDeclAttr::BranchStateTy State) {
10611
0
  struct ISADataTy {
10612
0
    char ISA;
10613
0
    unsigned VecRegSize;
10614
0
  };
10615
0
  ISADataTy ISAData[] = {
10616
0
      {
10617
0
          'b', 128
10618
0
      }, // SSE
10619
0
      {
10620
0
          'c', 256
10621
0
      }, // AVX
10622
0
      {
10623
0
          'd', 256
10624
0
      }, // AVX2
10625
0
      {
10626
0
          'e', 512
10627
0
      }, // AVX512
10628
0
  };
10629
0
  llvm::SmallVector<char, 2> Masked;
10630
0
  switch (State) {
10631
0
  case OMPDeclareSimdDeclAttr::BS_Undefined:
10632
0
    Masked.push_back('N');
10633
0
    Masked.push_back('M');
10634
0
    break;
10635
0
  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10636
0
    Masked.push_back('N');
10637
0
    break;
10638
0
  case OMPDeclareSimdDeclAttr::BS_Inbranch:
10639
0
    Masked.push_back('M');
10640
0
    break;
10641
0
  }
10642
0
  for (char Mask : Masked) {
10643
0
    for (const ISADataTy &Data : ISAData) {
10644
0
      SmallString<256> Buffer;
10645
0
      llvm::raw_svector_ostream Out(Buffer);
10646
0
      Out << "_ZGV" << Data.ISA << Mask;
10647
0
      if (!VLENVal) {
10648
0
        unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10649
0
        assert(NumElts && "Non-zero simdlen/cdtsize expected");
10650
0
        Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10651
0
      } else {
10652
0
        Out << VLENVal;
10653
0
      }
10654
0
      Out << mangleVectorParameters(ParamAttrs);
10655
0
      Out << '_' << Fn->getName();
10656
0
      Fn->addFnAttr(Out.str());
10657
0
    }
10658
0
  }
10659
0
}
10660
10661
// This are the Functions that are needed to mangle the name of the
10662
// vector functions generated by the compiler, according to the rules
10663
// defined in the "Vector Function ABI specifications for AArch64",
10664
// available at
10665
// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10666
10667
/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10668
0
static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10669
0
  QT = QT.getCanonicalType();
10670
10671
0
  if (QT->isVoidType())
10672
0
    return false;
10673
10674
0
  if (Kind == ParamKindTy::Uniform)
10675
0
    return false;
10676
10677
0
  if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10678
0
    return false;
10679
10680
0
  if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10681
0
      !QT->isReferenceType())
10682
0
    return false;
10683
10684
0
  return true;
10685
0
}
10686
10687
/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10688
0
static bool getAArch64PBV(QualType QT, ASTContext &C) {
10689
0
  QT = QT.getCanonicalType();
10690
0
  unsigned Size = C.getTypeSize(QT);
10691
10692
  // Only scalars and complex within 16 bytes wide set PVB to true.
10693
0
  if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10694
0
    return false;
10695
10696
0
  if (QT->isFloatingType())
10697
0
    return true;
10698
10699
0
  if (QT->isIntegerType())
10700
0
    return true;
10701
10702
0
  if (QT->isPointerType())
10703
0
    return true;
10704
10705
  // TODO: Add support for complex types (section 3.1.2, item 2).
10706
10707
0
  return false;
10708
0
}
10709
10710
/// Computes the lane size (LS) of a return type or of an input parameter,
10711
/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10712
/// TODO: Add support for references, section 3.2.1, item 1.
10713
0
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10714
0
  if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10715
0
    QualType PTy = QT.getCanonicalType()->getPointeeType();
10716
0
    if (getAArch64PBV(PTy, C))
10717
0
      return C.getTypeSize(PTy);
10718
0
  }
10719
0
  if (getAArch64PBV(QT, C))
10720
0
    return C.getTypeSize(QT);
10721
10722
0
  return C.getTypeSize(C.getUIntPtrType());
10723
0
}
10724
10725
// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10726
// signature of the scalar function, as defined in 3.2.2 of the
10727
// AAVFABI.
10728
static std::tuple<unsigned, unsigned, bool>
10729
0
getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10730
0
  QualType RetType = FD->getReturnType().getCanonicalType();
10731
10732
0
  ASTContext &C = FD->getASTContext();
10733
10734
0
  bool OutputBecomesInput = false;
10735
10736
0
  llvm::SmallVector<unsigned, 8> Sizes;
10737
0
  if (!RetType->isVoidType()) {
10738
0
    Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10739
0
    if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10740
0
      OutputBecomesInput = true;
10741
0
  }
10742
0
  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10743
0
    QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10744
0
    Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10745
0
  }
10746
10747
0
  assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10748
  // The LS of a function parameter / return value can only be a power
10749
  // of 2, starting from 8 bits, up to 128.
10750
0
  assert(llvm::all_of(Sizes,
10751
0
                      [](unsigned Size) {
10752
0
                        return Size == 8 || Size == 16 || Size == 32 ||
10753
0
                               Size == 64 || Size == 128;
10754
0
                      }) &&
10755
0
         "Invalid size");
10756
10757
0
  return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10758
0
                         *std::max_element(std::begin(Sizes), std::end(Sizes)),
10759
0
                         OutputBecomesInput);
10760
0
}
10761
10762
// Function used to add the attribute. The parameter `VLEN` is
10763
// templated to allow the use of "x" when targeting scalable functions
10764
// for SVE.
10765
template <typename T>
10766
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10767
                                 char ISA, StringRef ParSeq,
10768
                                 StringRef MangledName, bool OutputBecomesInput,
10769
0
                                 llvm::Function *Fn) {
10770
0
  SmallString<256> Buffer;
10771
0
  llvm::raw_svector_ostream Out(Buffer);
10772
0
  Out << Prefix << ISA << LMask << VLEN;
10773
0
  if (OutputBecomesInput)
10774
0
    Out << "v";
10775
0
  Out << ParSeq << "_" << MangledName;
10776
0
  Fn->addFnAttr(Out.str());
10777
0
}
Unexecuted instantiation: CGOpenMPRuntime.cpp:void addAArch64VectorName<unsigned int>(unsigned int, llvm::StringRef, llvm::StringRef, char, llvm::StringRef, llvm::StringRef, bool, llvm::Function*)
Unexecuted instantiation: CGOpenMPRuntime.cpp:void addAArch64VectorName<char const*>(char const*, llvm::StringRef, llvm::StringRef, char, llvm::StringRef, llvm::StringRef, bool, llvm::Function*)
Unexecuted instantiation: CGOpenMPRuntime.cpp:void addAArch64VectorName<int>(int, llvm::StringRef, llvm::StringRef, char, llvm::StringRef, llvm::StringRef, bool, llvm::Function*)
10778
10779
// Helper function to generate the Advanced SIMD names depending on
10780
// the value of the NDS when simdlen is not present.
10781
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10782
                                      StringRef Prefix, char ISA,
10783
                                      StringRef ParSeq, StringRef MangledName,
10784
                                      bool OutputBecomesInput,
10785
0
                                      llvm::Function *Fn) {
10786
0
  switch (NDS) {
10787
0
  case 8:
10788
0
    addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10789
0
                         OutputBecomesInput, Fn);
10790
0
    addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10791
0
                         OutputBecomesInput, Fn);
10792
0
    break;
10793
0
  case 16:
10794
0
    addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10795
0
                         OutputBecomesInput, Fn);
10796
0
    addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10797
0
                         OutputBecomesInput, Fn);
10798
0
    break;
10799
0
  case 32:
10800
0
    addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10801
0
                         OutputBecomesInput, Fn);
10802
0
    addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10803
0
                         OutputBecomesInput, Fn);
10804
0
    break;
10805
0
  case 64:
10806
0
  case 128:
10807
0
    addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10808
0
                         OutputBecomesInput, Fn);
10809
0
    break;
10810
0
  default:
10811
0
    llvm_unreachable("Scalar type is too wide.");
10812
0
  }
10813
0
}
10814
10815
/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10816
static void emitAArch64DeclareSimdFunction(
10817
    CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10818
    ArrayRef<ParamAttrTy> ParamAttrs,
10819
    OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10820
0
    char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10821
10822
  // Get basic data for building the vector signature.
10823
0
  const auto Data = getNDSWDS(FD, ParamAttrs);
10824
0
  const unsigned NDS = std::get<0>(Data);
10825
0
  const unsigned WDS = std::get<1>(Data);
10826
0
  const bool OutputBecomesInput = std::get<2>(Data);
10827
10828
  // Check the values provided via `simdlen` by the user.
10829
  // 1. A `simdlen(1)` doesn't produce vector signatures,
10830
0
  if (UserVLEN == 1) {
10831
0
    unsigned DiagID = CGM.getDiags().getCustomDiagID(
10832
0
        DiagnosticsEngine::Warning,
10833
0
        "The clause simdlen(1) has no effect when targeting aarch64.");
10834
0
    CGM.getDiags().Report(SLoc, DiagID);
10835
0
    return;
10836
0
  }
10837
10838
  // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10839
  // Advanced SIMD output.
10840
0
  if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10841
0
    unsigned DiagID = CGM.getDiags().getCustomDiagID(
10842
0
        DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10843
0
                                    "power of 2 when targeting Advanced SIMD.");
10844
0
    CGM.getDiags().Report(SLoc, DiagID);
10845
0
    return;
10846
0
  }
10847
10848
  // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10849
  // limits.
10850
0
  if (ISA == 's' && UserVLEN != 0) {
10851
0
    if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10852
0
      unsigned DiagID = CGM.getDiags().getCustomDiagID(
10853
0
          DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10854
0
                                      "lanes in the architectural constraints "
10855
0
                                      "for SVE (min is 128-bit, max is "
10856
0
                                      "2048-bit, by steps of 128-bit)");
10857
0
      CGM.getDiags().Report(SLoc, DiagID) << WDS;
10858
0
      return;
10859
0
    }
10860
0
  }
10861
10862
  // Sort out parameter sequence.
10863
0
  const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10864
0
  StringRef Prefix = "_ZGV";
10865
  // Generate simdlen from user input (if any).
10866
0
  if (UserVLEN) {
10867
0
    if (ISA == 's') {
10868
      // SVE generates only a masked function.
10869
0
      addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10870
0
                           OutputBecomesInput, Fn);
10871
0
    } else {
10872
0
      assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10873
      // Advanced SIMD generates one or two functions, depending on
10874
      // the `[not]inbranch` clause.
10875
0
      switch (State) {
10876
0
      case OMPDeclareSimdDeclAttr::BS_Undefined:
10877
0
        addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10878
0
                             OutputBecomesInput, Fn);
10879
0
        addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10880
0
                             OutputBecomesInput, Fn);
10881
0
        break;
10882
0
      case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10883
0
        addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10884
0
                             OutputBecomesInput, Fn);
10885
0
        break;
10886
0
      case OMPDeclareSimdDeclAttr::BS_Inbranch:
10887
0
        addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10888
0
                             OutputBecomesInput, Fn);
10889
0
        break;
10890
0
      }
10891
0
    }
10892
0
  } else {
10893
    // If no user simdlen is provided, follow the AAVFABI rules for
10894
    // generating the vector length.
10895
0
    if (ISA == 's') {
10896
      // SVE, section 3.4.1, item 1.
10897
0
      addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10898
0
                           OutputBecomesInput, Fn);
10899
0
    } else {
10900
0
      assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10901
      // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10902
      // two vector names depending on the use of the clause
10903
      // `[not]inbranch`.
10904
0
      switch (State) {
10905
0
      case OMPDeclareSimdDeclAttr::BS_Undefined:
10906
0
        addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10907
0
                                  OutputBecomesInput, Fn);
10908
0
        addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10909
0
                                  OutputBecomesInput, Fn);
10910
0
        break;
10911
0
      case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10912
0
        addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10913
0
                                  OutputBecomesInput, Fn);
10914
0
        break;
10915
0
      case OMPDeclareSimdDeclAttr::BS_Inbranch:
10916
0
        addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10917
0
                                  OutputBecomesInput, Fn);
10918
0
        break;
10919
0
      }
10920
0
    }
10921
0
  }
10922
0
}
10923
10924
void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10925
0
                                              llvm::Function *Fn) {
10926
0
  ASTContext &C = CGM.getContext();
10927
0
  FD = FD->getMostRecentDecl();
10928
0
  while (FD) {
10929
    // Map params to their positions in function decl.
10930
0
    llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10931
0
    if (isa<CXXMethodDecl>(FD))
10932
0
      ParamPositions.try_emplace(FD, 0);
10933
0
    unsigned ParamPos = ParamPositions.size();
10934
0
    for (const ParmVarDecl *P : FD->parameters()) {
10935
0
      ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10936
0
      ++ParamPos;
10937
0
    }
10938
0
    for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10939
0
      llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10940
      // Mark uniform parameters.
10941
0
      for (const Expr *E : Attr->uniforms()) {
10942
0
        E = E->IgnoreParenImpCasts();
10943
0
        unsigned Pos;
10944
0
        if (isa<CXXThisExpr>(E)) {
10945
0
          Pos = ParamPositions[FD];
10946
0
        } else {
10947
0
          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10948
0
                                ->getCanonicalDecl();
10949
0
          auto It = ParamPositions.find(PVD);
10950
0
          assert(It != ParamPositions.end() && "Function parameter not found");
10951
0
          Pos = It->second;
10952
0
        }
10953
0
        ParamAttrs[Pos].Kind = Uniform;
10954
0
      }
10955
      // Get alignment info.
10956
0
      auto *NI = Attr->alignments_begin();
10957
0
      for (const Expr *E : Attr->aligneds()) {
10958
0
        E = E->IgnoreParenImpCasts();
10959
0
        unsigned Pos;
10960
0
        QualType ParmTy;
10961
0
        if (isa<CXXThisExpr>(E)) {
10962
0
          Pos = ParamPositions[FD];
10963
0
          ParmTy = E->getType();
10964
0
        } else {
10965
0
          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10966
0
                                ->getCanonicalDecl();
10967
0
          auto It = ParamPositions.find(PVD);
10968
0
          assert(It != ParamPositions.end() && "Function parameter not found");
10969
0
          Pos = It->second;
10970
0
          ParmTy = PVD->getType();
10971
0
        }
10972
0
        ParamAttrs[Pos].Alignment =
10973
0
            (*NI)
10974
0
                ? (*NI)->EvaluateKnownConstInt(C)
10975
0
                : llvm::APSInt::getUnsigned(
10976
0
                      C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10977
0
                          .getQuantity());
10978
0
        ++NI;
10979
0
      }
10980
      // Mark linear parameters.
10981
0
      auto *SI = Attr->steps_begin();
10982
0
      auto *MI = Attr->modifiers_begin();
10983
0
      for (const Expr *E : Attr->linears()) {
10984
0
        E = E->IgnoreParenImpCasts();
10985
0
        unsigned Pos;
10986
0
        bool IsReferenceType = false;
10987
        // Rescaling factor needed to compute the linear parameter
10988
        // value in the mangled name.
10989
0
        unsigned PtrRescalingFactor = 1;
10990
0
        if (isa<CXXThisExpr>(E)) {
10991
0
          Pos = ParamPositions[FD];
10992
0
          auto *P = cast<PointerType>(E->getType());
10993
0
          PtrRescalingFactor = CGM.getContext()
10994
0
                                   .getTypeSizeInChars(P->getPointeeType())
10995
0
                                   .getQuantity();
10996
0
        } else {
10997
0
          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10998
0
                                ->getCanonicalDecl();
10999
0
          auto It = ParamPositions.find(PVD);
11000
0
          assert(It != ParamPositions.end() && "Function parameter not found");
11001
0
          Pos = It->second;
11002
0
          if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11003
0
            PtrRescalingFactor = CGM.getContext()
11004
0
                                     .getTypeSizeInChars(P->getPointeeType())
11005
0
                                     .getQuantity();
11006
0
          else if (PVD->getType()->isReferenceType()) {
11007
0
            IsReferenceType = true;
11008
0
            PtrRescalingFactor =
11009
0
                CGM.getContext()
11010
0
                    .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11011
0
                    .getQuantity();
11012
0
          }
11013
0
        }
11014
0
        ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11015
0
        if (*MI == OMPC_LINEAR_ref)
11016
0
          ParamAttr.Kind = LinearRef;
11017
0
        else if (*MI == OMPC_LINEAR_uval)
11018
0
          ParamAttr.Kind = LinearUVal;
11019
0
        else if (IsReferenceType)
11020
0
          ParamAttr.Kind = LinearVal;
11021
0
        else
11022
0
          ParamAttr.Kind = Linear;
11023
        // Assuming a stride of 1, for `linear` without modifiers.
11024
0
        ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11025
0
        if (*SI) {
11026
0
          Expr::EvalResult Result;
11027
0
          if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11028
0
            if (const auto *DRE =
11029
0
                    cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11030
0
              if (const auto *StridePVD =
11031
0
                      dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11032
0
                ParamAttr.HasVarStride = true;
11033
0
                auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11034
0
                assert(It != ParamPositions.end() &&
11035
0
                       "Function parameter not found");
11036
0
                ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11037
0
              }
11038
0
            }
11039
0
          } else {
11040
0
            ParamAttr.StrideOrArg = Result.Val.getInt();
11041
0
          }
11042
0
        }
11043
        // If we are using a linear clause on a pointer, we need to
11044
        // rescale the value of linear_step with the byte size of the
11045
        // pointee type.
11046
0
        if (!ParamAttr.HasVarStride &&
11047
0
            (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11048
0
          ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11049
0
        ++SI;
11050
0
        ++MI;
11051
0
      }
11052
0
      llvm::APSInt VLENVal;
11053
0
      SourceLocation ExprLoc;
11054
0
      const Expr *VLENExpr = Attr->getSimdlen();
11055
0
      if (VLENExpr) {
11056
0
        VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11057
0
        ExprLoc = VLENExpr->getExprLoc();
11058
0
      }
11059
0
      OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11060
0
      if (CGM.getTriple().isX86()) {
11061
0
        emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11062
0
      } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11063
0
        unsigned VLEN = VLENVal.getExtValue();
11064
0
        StringRef MangledName = Fn->getName();
11065
0
        if (CGM.getTarget().hasFeature("sve"))
11066
0
          emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11067
0
                                         MangledName, 's', 128, Fn, ExprLoc);
11068
0
        else if (CGM.getTarget().hasFeature("neon"))
11069
0
          emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11070
0
                                         MangledName, 'n', 128, Fn, ExprLoc);
11071
0
      }
11072
0
    }
11073
0
    FD = FD->getPreviousDecl();
11074
0
  }
11075
0
}
11076
11077
namespace {
11078
/// Cleanup action for doacross support.
11079
class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11080
public:
11081
  static const int DoacrossFinArgs = 2;
11082
11083
private:
11084
  llvm::FunctionCallee RTLFn;
11085
  llvm::Value *Args[DoacrossFinArgs];
11086
11087
public:
11088
  DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11089
                    ArrayRef<llvm::Value *> CallArgs)
11090
0
      : RTLFn(RTLFn) {
11091
0
    assert(CallArgs.size() == DoacrossFinArgs);
11092
0
    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11093
0
  }
11094
0
  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11095
0
    if (!CGF.HaveInsertPoint())
11096
0
      return;
11097
0
    CGF.EmitRuntimeCall(RTLFn, Args);
11098
0
  }
11099
};
11100
} // namespace
11101
11102
void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11103
                                       const OMPLoopDirective &D,
11104
0
                                       ArrayRef<Expr *> NumIterations) {
11105
0
  if (!CGF.HaveInsertPoint())
11106
0
    return;
11107
11108
0
  ASTContext &C = CGM.getContext();
11109
0
  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11110
0
  RecordDecl *RD;
11111
0
  if (KmpDimTy.isNull()) {
11112
    // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11113
    //  kmp_int64 lo; // lower
11114
    //  kmp_int64 up; // upper
11115
    //  kmp_int64 st; // stride
11116
    // };
11117
0
    RD = C.buildImplicitRecord("kmp_dim");
11118
0
    RD->startDefinition();
11119
0
    addFieldToRecordDecl(C, RD, Int64Ty);
11120
0
    addFieldToRecordDecl(C, RD, Int64Ty);
11121
0
    addFieldToRecordDecl(C, RD, Int64Ty);
11122
0
    RD->completeDefinition();
11123
0
    KmpDimTy = C.getRecordType(RD);
11124
0
  } else {
11125
0
    RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11126
0
  }
11127
0
  llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11128
0
  QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11129
0
                                            ArraySizeModifier::Normal, 0);
11130
11131
0
  Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11132
0
  CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11133
0
  enum { LowerFD = 0, UpperFD, StrideFD };
11134
  // Fill dims with data.
11135
0
  for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11136
0
    LValue DimsLVal = CGF.MakeAddrLValue(
11137
0
        CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11138
    // dims.upper = num_iterations;
11139
0
    LValue UpperLVal = CGF.EmitLValueForField(
11140
0
        DimsLVal, *std::next(RD->field_begin(), UpperFD));
11141
0
    llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11142
0
        CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11143
0
        Int64Ty, NumIterations[I]->getExprLoc());
11144
0
    CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11145
    // dims.stride = 1;
11146
0
    LValue StrideLVal = CGF.EmitLValueForField(
11147
0
        DimsLVal, *std::next(RD->field_begin(), StrideFD));
11148
0
    CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11149
0
                          StrideLVal);
11150
0
  }
11151
11152
  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11153
  // kmp_int32 num_dims, struct kmp_dim * dims);
11154
0
  llvm::Value *Args[] = {
11155
0
      emitUpdateLocation(CGF, D.getBeginLoc()),
11156
0
      getThreadID(CGF, D.getBeginLoc()),
11157
0
      llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11158
0
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11159
0
          CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11160
0
          CGM.VoidPtrTy)};
11161
11162
0
  llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11163
0
      CGM.getModule(), OMPRTL___kmpc_doacross_init);
11164
0
  CGF.EmitRuntimeCall(RTLFn, Args);
11165
0
  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11166
0
      emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11167
0
  llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11168
0
      CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11169
0
  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11170
0
                                             llvm::ArrayRef(FiniArgs));
11171
0
}
11172
11173
template <typename T>
11174
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11175
                                const T *C, llvm::Value *ULoc,
11176
0
                                llvm::Value *ThreadID) {
11177
0
  QualType Int64Ty =
11178
0
      CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11179
0
  llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11180
0
  QualType ArrayTy = CGM.getContext().getConstantArrayType(
11181
0
      Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11182
0
  Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11183
0
  for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11184
0
    const Expr *CounterVal = C->getLoopData(I);
11185
0
    assert(CounterVal);
11186
0
    llvm::Value *CntVal = CGF.EmitScalarConversion(
11187
0
        CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11188
0
        CounterVal->getExprLoc());
11189
0
    CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11190
0
                          /*Volatile=*/false, Int64Ty);
11191
0
  }
11192
0
  llvm::Value *Args[] = {
11193
0
      ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11194
0
  llvm::FunctionCallee RTLFn;
11195
0
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11196
0
  OMPDoacrossKind<T> ODK;
11197
0
  if (ODK.isSource(C)) {
11198
0
    RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11199
0
                                                  OMPRTL___kmpc_doacross_post);
11200
0
  } else {
11201
0
    assert(ODK.isSink(C) && "Expect sink modifier.");
11202
0
    RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11203
0
                                                  OMPRTL___kmpc_doacross_wait);
11204
0
  }
11205
0
  CGF.EmitRuntimeCall(RTLFn, Args);
11206
0
}
Unexecuted instantiation: CGOpenMPRuntime.cpp:void EmitDoacrossOrdered<clang::OMPDependClause>(clang::CodeGen::CodeGenFunction&, clang::CodeGen::CodeGenModule&, clang::OMPDependClause const*, llvm::Value*, llvm::Value*)
Unexecuted instantiation: CGOpenMPRuntime.cpp:void EmitDoacrossOrdered<clang::OMPDoacrossClause>(clang::CodeGen::CodeGenFunction&, clang::CodeGen::CodeGenModule&, clang::OMPDoacrossClause const*, llvm::Value*, llvm::Value*)
11207
11208
void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11209
0
                                          const OMPDependClause *C) {
11210
0
  return EmitDoacrossOrdered<OMPDependClause>(
11211
0
      CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11212
0
      getThreadID(CGF, C->getBeginLoc()));
11213
0
}
11214
11215
void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11216
0
                                          const OMPDoacrossClause *C) {
11217
0
  return EmitDoacrossOrdered<OMPDoacrossClause>(
11218
0
      CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11219
0
      getThreadID(CGF, C->getBeginLoc()));
11220
0
}
11221
11222
void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11223
                               llvm::FunctionCallee Callee,
11224
0
                               ArrayRef<llvm::Value *> Args) const {
11225
0
  assert(Loc.isValid() && "Outlined function call location must be valid.");
11226
0
  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11227
11228
0
  if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11229
0
    if (Fn->doesNotThrow()) {
11230
0
      CGF.EmitNounwindRuntimeCall(Fn, Args);
11231
0
      return;
11232
0
    }
11233
0
  }
11234
0
  CGF.EmitRuntimeCall(Callee, Args);
11235
0
}
11236
11237
void CGOpenMPRuntime::emitOutlinedFunctionCall(
11238
    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11239
0
    ArrayRef<llvm::Value *> Args) const {
11240
0
  emitCall(CGF, Loc, OutlinedFn, Args);
11241
0
}
11242
11243
0
void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11244
0
  if (const auto *FD = dyn_cast<FunctionDecl>(D))
11245
0
    if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11246
0
      HasEmittedDeclareTargetRegion = true;
11247
0
}
11248
11249
Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11250
                                             const VarDecl *NativeParam,
11251
0
                                             const VarDecl *TargetParam) const {
11252
0
  return CGF.GetAddrOfLocalVar(NativeParam);
11253
0
}
11254
11255
/// Return allocator value from expression, or return a null allocator (default
11256
/// when no allocator specified).
11257
static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11258
0
                                    const Expr *Allocator) {
11259
0
  llvm::Value *AllocVal;
11260
0
  if (Allocator) {
11261
0
    AllocVal = CGF.EmitScalarExpr(Allocator);
11262
    // According to the standard, the original allocator type is a enum
11263
    // (integer). Convert to pointer type, if required.
11264
0
    AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11265
0
                                        CGF.getContext().VoidPtrTy,
11266
0
                                        Allocator->getExprLoc());
11267
0
  } else {
11268
    // If no allocator specified, it defaults to the null allocator.
11269
0
    AllocVal = llvm::Constant::getNullValue(
11270
0
        CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11271
0
  }
11272
0
  return AllocVal;
11273
0
}
11274
11275
/// Return the alignment from an allocate directive if present.
11276
0
static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11277
0
  std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11278
11279
0
  if (!AllocateAlignment)
11280
0
    return nullptr;
11281
11282
0
  return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11283
0
}
11284
11285
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11286
0
                                                   const VarDecl *VD) {
11287
0
  if (!VD)
11288
0
    return Address::invalid();
11289
0
  Address UntiedAddr = Address::invalid();
11290
0
  Address UntiedRealAddr = Address::invalid();
11291
0
  auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11292
0
  if (It != FunctionToUntiedTaskStackMap.end()) {
11293
0
    const UntiedLocalVarsAddressesMap &UntiedData =
11294
0
        UntiedLocalVarsStack[It->second];
11295
0
    auto I = UntiedData.find(VD);
11296
0
    if (I != UntiedData.end()) {
11297
0
      UntiedAddr = I->second.first;
11298
0
      UntiedRealAddr = I->second.second;
11299
0
    }
11300
0
  }
11301
0
  const VarDecl *CVD = VD->getCanonicalDecl();
11302
0
  if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11303
    // Use the default allocation.
11304
0
    if (!isAllocatableDecl(VD))
11305
0
      return UntiedAddr;
11306
0
    llvm::Value *Size;
11307
0
    CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11308
0
    if (CVD->getType()->isVariablyModifiedType()) {
11309
0
      Size = CGF.getTypeSize(CVD->getType());
11310
      // Align the size: ((size + align - 1) / align) * align
11311
0
      Size = CGF.Builder.CreateNUWAdd(
11312
0
          Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11313
0
      Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11314
0
      Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11315
0
    } else {
11316
0
      CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11317
0
      Size = CGM.getSize(Sz.alignTo(Align));
11318
0
    }
11319
0
    llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11320
0
    const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11321
0
    const Expr *Allocator = AA->getAllocator();
11322
0
    llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11323
0
    llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11324
0
    SmallVector<llvm::Value *, 4> Args;
11325
0
    Args.push_back(ThreadID);
11326
0
    if (Alignment)
11327
0
      Args.push_back(Alignment);
11328
0
    Args.push_back(Size);
11329
0
    Args.push_back(AllocVal);
11330
0
    llvm::omp::RuntimeFunction FnID =
11331
0
        Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11332
0
    llvm::Value *Addr = CGF.EmitRuntimeCall(
11333
0
        OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11334
0
        getName({CVD->getName(), ".void.addr"}));
11335
0
    llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11336
0
        CGM.getModule(), OMPRTL___kmpc_free);
11337
0
    QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11338
0
    Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11339
0
        Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11340
0
    if (UntiedAddr.isValid())
11341
0
      CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11342
11343
    // Cleanup action for allocate support.
11344
0
    class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11345
0
      llvm::FunctionCallee RTLFn;
11346
0
      SourceLocation::UIntTy LocEncoding;
11347
0
      Address Addr;
11348
0
      const Expr *AllocExpr;
11349
11350
0
    public:
11351
0
      OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11352
0
                           SourceLocation::UIntTy LocEncoding, Address Addr,
11353
0
                           const Expr *AllocExpr)
11354
0
          : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11355
0
            AllocExpr(AllocExpr) {}
11356
0
      void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11357
0
        if (!CGF.HaveInsertPoint())
11358
0
          return;
11359
0
        llvm::Value *Args[3];
11360
0
        Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11361
0
            CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11362
0
        Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11363
0
            Addr.getPointer(), CGF.VoidPtrTy);
11364
0
        llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11365
0
        Args[2] = AllocVal;
11366
0
        CGF.EmitRuntimeCall(RTLFn, Args);
11367
0
      }
11368
0
    };
11369
0
    Address VDAddr =
11370
0
        UntiedRealAddr.isValid()
11371
0
            ? UntiedRealAddr
11372
0
            : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11373
0
    CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11374
0
        NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11375
0
        VDAddr, Allocator);
11376
0
    if (UntiedRealAddr.isValid())
11377
0
      if (auto *Region =
11378
0
              dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11379
0
        Region->emitUntiedSwitch(CGF);
11380
0
    return VDAddr;
11381
0
  }
11382
0
  return UntiedAddr;
11383
0
}
11384
11385
bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11386
0
                                             const VarDecl *VD) const {
11387
0
  auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11388
0
  if (It == FunctionToUntiedTaskStackMap.end())
11389
0
    return false;
11390
0
  return UntiedLocalVarsStack[It->second].count(VD) > 0;
11391
0
}
11392
11393
CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11394
    CodeGenModule &CGM, const OMPLoopDirective &S)
11395
0
    : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11396
0
  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11397
0
  if (!NeedToPush)
11398
0
    return;
11399
0
  NontemporalDeclsSet &DS =
11400
0
      CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11401
0
  for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11402
0
    for (const Stmt *Ref : C->private_refs()) {
11403
0
      const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11404
0
      const ValueDecl *VD;
11405
0
      if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11406
0
        VD = DRE->getDecl();
11407
0
      } else {
11408
0
        const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11409
0
        assert((ME->isImplicitCXXThis() ||
11410
0
                isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11411
0
               "Expected member of current class.");
11412
0
        VD = ME->getMemberDecl();
11413
0
      }
11414
0
      DS.insert(VD);
11415
0
    }
11416
0
  }
11417
0
}
11418
11419
0
CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11420
0
  if (!NeedToPush)
11421
0
    return;
11422
0
  CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11423
0
}
11424
11425
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11426
    CodeGenFunction &CGF,
11427
    const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11428
                          std::pair<Address, Address>> &LocalVars)
11429
0
    : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11430
0
  if (!NeedToPush)
11431
0
    return;
11432
0
  CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11433
0
      CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11434
0
  CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11435
0
}
11436
11437
0
CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11438
0
  if (!NeedToPush)
11439
0
    return;
11440
0
  CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11441
0
}
11442
11443
0
bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11444
0
  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11445
11446
0
  return llvm::any_of(
11447
0
      CGM.getOpenMPRuntime().NontemporalDeclsStack,
11448
0
      [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11449
0
}
11450
11451
void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11452
    const OMPExecutableDirective &S,
11453
    llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11454
0
    const {
11455
0
  llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11456
  // Vars in target/task regions must be excluded completely.
11457
0
  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11458
0
      isOpenMPTaskingDirective(S.getDirectiveKind())) {
11459
0
    SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11460
0
    getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11461
0
    const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11462
0
    for (const CapturedStmt::Capture &Cap : CS->captures()) {
11463
0
      if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11464
0
        NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11465
0
    }
11466
0
  }
11467
  // Exclude vars in private clauses.
11468
0
  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11469
0
    for (const Expr *Ref : C->varlists()) {
11470
0
      if (!Ref->getType()->isScalarType())
11471
0
        continue;
11472
0
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11473
0
      if (!DRE)
11474
0
        continue;
11475
0
      NeedToCheckForLPCs.insert(DRE->getDecl());
11476
0
    }
11477
0
  }
11478
0
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11479
0
    for (const Expr *Ref : C->varlists()) {
11480
0
      if (!Ref->getType()->isScalarType())
11481
0
        continue;
11482
0
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11483
0
      if (!DRE)
11484
0
        continue;
11485
0
      NeedToCheckForLPCs.insert(DRE->getDecl());
11486
0
    }
11487
0
  }
11488
0
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11489
0
    for (const Expr *Ref : C->varlists()) {
11490
0
      if (!Ref->getType()->isScalarType())
11491
0
        continue;
11492
0
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11493
0
      if (!DRE)
11494
0
        continue;
11495
0
      NeedToCheckForLPCs.insert(DRE->getDecl());
11496
0
    }
11497
0
  }
11498
0
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11499
0
    for (const Expr *Ref : C->varlists()) {
11500
0
      if (!Ref->getType()->isScalarType())
11501
0
        continue;
11502
0
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11503
0
      if (!DRE)
11504
0
        continue;
11505
0
      NeedToCheckForLPCs.insert(DRE->getDecl());
11506
0
    }
11507
0
  }
11508
0
  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11509
0
    for (const Expr *Ref : C->varlists()) {
11510
0
      if (!Ref->getType()->isScalarType())
11511
0
        continue;
11512
0
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11513
0
      if (!DRE)
11514
0
        continue;
11515
0
      NeedToCheckForLPCs.insert(DRE->getDecl());
11516
0
    }
11517
0
  }
11518
0
  for (const Decl *VD : NeedToCheckForLPCs) {
11519
0
    for (const LastprivateConditionalData &Data :
11520
0
         llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11521
0
      if (Data.DeclToUniqueName.count(VD) > 0) {
11522
0
        if (!Data.Disabled)
11523
0
          NeedToAddForLPCsAsDisabled.insert(VD);
11524
0
        break;
11525
0
      }
11526
0
    }
11527
0
  }
11528
0
}
11529
11530
CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11531
    CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11532
    : CGM(CGF.CGM),
11533
      Action((CGM.getLangOpts().OpenMP >= 50 &&
11534
              llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11535
0
                           [](const OMPLastprivateClause *C) {
11536
0
                             return C->getKind() ==
11537
0
                                    OMPC_LASTPRIVATE_conditional;
11538
0
                           }))
11539
                 ? ActionToDo::PushAsLastprivateConditional
11540
0
                 : ActionToDo::DoNotPush) {
11541
0
  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11542
0
  if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11543
0
    return;
11544
0
  assert(Action == ActionToDo::PushAsLastprivateConditional &&
11545
0
         "Expected a push action.");
11546
0
  LastprivateConditionalData &Data =
11547
0
      CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11548
0
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11549
0
    if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11550
0
      continue;
11551
11552
0
    for (const Expr *Ref : C->varlists()) {
11553
0
      Data.DeclToUniqueName.insert(std::make_pair(
11554
0
          cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11555
0
          SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11556
0
    }
11557
0
  }
11558
0
  Data.IVLVal = IVLVal;
11559
0
  Data.Fn = CGF.CurFn;
11560
0
}
11561
11562
CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11563
    CodeGenFunction &CGF, const OMPExecutableDirective &S)
11564
0
    : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11565
0
  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11566
0
  if (CGM.getLangOpts().OpenMP < 50)
11567
0
    return;
11568
0
  llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11569
0
  tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11570
0
  if (!NeedToAddForLPCsAsDisabled.empty()) {
11571
0
    Action = ActionToDo::DisableLastprivateConditional;
11572
0
    LastprivateConditionalData &Data =
11573
0
        CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11574
0
    for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11575
0
      Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11576
0
    Data.Fn = CGF.CurFn;
11577
0
    Data.Disabled = true;
11578
0
  }
11579
0
}
11580
11581
CGOpenMPRuntime::LastprivateConditionalRAII
11582
CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11583
0
    CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11584
0
  return LastprivateConditionalRAII(CGF, S);
11585
0
}
11586
11587
0
CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11588
0
  if (CGM.getLangOpts().OpenMP < 50)
11589
0
    return;
11590
0
  if (Action == ActionToDo::DisableLastprivateConditional) {
11591
0
    assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11592
0
           "Expected list of disabled private vars.");
11593
0
    CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11594
0
  }
11595
0
  if (Action == ActionToDo::PushAsLastprivateConditional) {
11596
0
    assert(
11597
0
        !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11598
0
        "Expected list of lastprivate conditional vars.");
11599
0
    CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11600
0
  }
11601
0
}
11602
11603
Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11604
0
                                                        const VarDecl *VD) {
11605
0
  ASTContext &C = CGM.getContext();
11606
0
  auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11607
0
  if (I == LastprivateConditionalToTypes.end())
11608
0
    I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11609
0
  QualType NewType;
11610
0
  const FieldDecl *VDField;
11611
0
  const FieldDecl *FiredField;
11612
0
  LValue BaseLVal;
11613
0
  auto VI = I->getSecond().find(VD);
11614
0
  if (VI == I->getSecond().end()) {
11615
0
    RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11616
0
    RD->startDefinition();
11617
0
    VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11618
0
    FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11619
0
    RD->completeDefinition();
11620
0
    NewType = C.getRecordType(RD);
11621
0
    Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11622
0
    BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11623
0
    I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11624
0
  } else {
11625
0
    NewType = std::get<0>(VI->getSecond());
11626
0
    VDField = std::get<1>(VI->getSecond());
11627
0
    FiredField = std::get<2>(VI->getSecond());
11628
0
    BaseLVal = std::get<3>(VI->getSecond());
11629
0
  }
11630
0
  LValue FiredLVal =
11631
0
      CGF.EmitLValueForField(BaseLVal, FiredField);
11632
0
  CGF.EmitStoreOfScalar(
11633
0
      llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11634
0
      FiredLVal);
11635
0
  return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11636
0
}
11637
11638
namespace {
11639
/// Checks if the lastprivate conditional variable is referenced in LHS.
11640
class LastprivateConditionalRefChecker final
11641
    : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11642
  ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11643
  const Expr *FoundE = nullptr;
11644
  const Decl *FoundD = nullptr;
11645
  StringRef UniqueDeclName;
11646
  LValue IVLVal;
11647
  llvm::Function *FoundFn = nullptr;
11648
  SourceLocation Loc;
11649
11650
public:
11651
0
  bool VisitDeclRefExpr(const DeclRefExpr *E) {
11652
0
    for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11653
0
         llvm::reverse(LPM)) {
11654
0
      auto It = D.DeclToUniqueName.find(E->getDecl());
11655
0
      if (It == D.DeclToUniqueName.end())
11656
0
        continue;
11657
0
      if (D.Disabled)
11658
0
        return false;
11659
0
      FoundE = E;
11660
0
      FoundD = E->getDecl()->getCanonicalDecl();
11661
0
      UniqueDeclName = It->second;
11662
0
      IVLVal = D.IVLVal;
11663
0
      FoundFn = D.Fn;
11664
0
      break;
11665
0
    }
11666
0
    return FoundE == E;
11667
0
  }
11668
0
  bool VisitMemberExpr(const MemberExpr *E) {
11669
0
    if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11670
0
      return false;
11671
0
    for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11672
0
         llvm::reverse(LPM)) {
11673
0
      auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11674
0
      if (It == D.DeclToUniqueName.end())
11675
0
        continue;
11676
0
      if (D.Disabled)
11677
0
        return false;
11678
0
      FoundE = E;
11679
0
      FoundD = E->getMemberDecl()->getCanonicalDecl();
11680
0
      UniqueDeclName = It->second;
11681
0
      IVLVal = D.IVLVal;
11682
0
      FoundFn = D.Fn;
11683
0
      break;
11684
0
    }
11685
0
    return FoundE == E;
11686
0
  }
11687
0
  bool VisitStmt(const Stmt *S) {
11688
0
    for (const Stmt *Child : S->children()) {
11689
0
      if (!Child)
11690
0
        continue;
11691
0
      if (const auto *E = dyn_cast<Expr>(Child))
11692
0
        if (!E->isGLValue())
11693
0
          continue;
11694
0
      if (Visit(Child))
11695
0
        return true;
11696
0
    }
11697
0
    return false;
11698
0
  }
11699
  explicit LastprivateConditionalRefChecker(
11700
      ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11701
0
      : LPM(LPM) {}
11702
  std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11703
0
  getFoundData() const {
11704
0
    return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11705
0
  }
11706
};
11707
} // namespace
11708
11709
void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11710
                                                       LValue IVLVal,
11711
                                                       StringRef UniqueDeclName,
11712
                                                       LValue LVal,
11713
0
                                                       SourceLocation Loc) {
11714
  // Last updated loop counter for the lastprivate conditional var.
11715
  // int<xx> last_iv = 0;
11716
0
  llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11717
0
  llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11718
0
      LLIVTy, getName({UniqueDeclName, "iv"}));
11719
0
  cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11720
0
      IVLVal.getAlignment().getAsAlign());
11721
0
  LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11722
11723
  // Last value of the lastprivate conditional.
11724
  // decltype(priv_a) last_a;
11725
0
  llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11726
0
      CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11727
0
  Last->setAlignment(LVal.getAlignment().getAsAlign());
11728
0
  LValue LastLVal = CGF.MakeAddrLValue(
11729
0
      Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11730
11731
  // Global loop counter. Required to handle inner parallel-for regions.
11732
  // iv
11733
0
  llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11734
11735
  // #pragma omp critical(a)
11736
  // if (last_iv <= iv) {
11737
  //   last_iv = iv;
11738
  //   last_a = priv_a;
11739
  // }
11740
0
  auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11741
0
                    Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11742
0
    Action.Enter(CGF);
11743
0
    llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11744
    // (last_iv <= iv) ? Check if the variable is updated and store new
11745
    // value in global var.
11746
0
    llvm::Value *CmpRes;
11747
0
    if (IVLVal.getType()->isSignedIntegerType()) {
11748
0
      CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11749
0
    } else {
11750
0
      assert(IVLVal.getType()->isUnsignedIntegerType() &&
11751
0
             "Loop iteration variable must be integer.");
11752
0
      CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11753
0
    }
11754
0
    llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11755
0
    llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11756
0
    CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11757
    // {
11758
0
    CGF.EmitBlock(ThenBB);
11759
11760
    //   last_iv = iv;
11761
0
    CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11762
11763
    //   last_a = priv_a;
11764
0
    switch (CGF.getEvaluationKind(LVal.getType())) {
11765
0
    case TEK_Scalar: {
11766
0
      llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11767
0
      CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11768
0
      break;
11769
0
    }
11770
0
    case TEK_Complex: {
11771
0
      CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11772
0
      CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11773
0
      break;
11774
0
    }
11775
0
    case TEK_Aggregate:
11776
0
      llvm_unreachable(
11777
0
          "Aggregates are not supported in lastprivate conditional.");
11778
0
    }
11779
    // }
11780
0
    CGF.EmitBranch(ExitBB);
11781
    // There is no need to emit line number for unconditional branch.
11782
0
    (void)ApplyDebugLocation::CreateEmpty(CGF);
11783
0
    CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11784
0
  };
11785
11786
0
  if (CGM.getLangOpts().OpenMPSimd) {
11787
    // Do not emit as a critical region as no parallel region could be emitted.
11788
0
    RegionCodeGenTy ThenRCG(CodeGen);
11789
0
    ThenRCG(CGF);
11790
0
  } else {
11791
0
    emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11792
0
  }
11793
0
}
11794
11795
void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11796
0
                                                         const Expr *LHS) {
11797
0
  if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11798
0
    return;
11799
0
  LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11800
0
  if (!Checker.Visit(LHS))
11801
0
    return;
11802
0
  const Expr *FoundE;
11803
0
  const Decl *FoundD;
11804
0
  StringRef UniqueDeclName;
11805
0
  LValue IVLVal;
11806
0
  llvm::Function *FoundFn;
11807
0
  std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11808
0
      Checker.getFoundData();
11809
0
  if (FoundFn != CGF.CurFn) {
11810
    // Special codegen for inner parallel regions.
11811
    // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11812
0
    auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11813
0
    assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11814
0
           "Lastprivate conditional is not found in outer region.");
11815
0
    QualType StructTy = std::get<0>(It->getSecond());
11816
0
    const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11817
0
    LValue PrivLVal = CGF.EmitLValue(FoundE);
11818
0
    Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11819
0
        PrivLVal.getAddress(CGF),
11820
0
        CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11821
0
        CGF.ConvertTypeForMem(StructTy));
11822
0
    LValue BaseLVal =
11823
0
        CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11824
0
    LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11825
0
    CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11826
0
                            CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11827
0
                        FiredLVal, llvm::AtomicOrdering::Unordered,
11828
0
                        /*IsVolatile=*/true, /*isInit=*/false);
11829
0
    return;
11830
0
  }
11831
11832
  // Private address of the lastprivate conditional in the current context.
11833
  // priv_a
11834
0
  LValue LVal = CGF.EmitLValue(FoundE);
11835
0
  emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11836
0
                                   FoundE->getExprLoc());
11837
0
}
11838
11839
void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11840
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
11841
0
    const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11842
0
  if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11843
0
    return;
11844
0
  auto Range = llvm::reverse(LastprivateConditionalStack);
11845
0
  auto It = llvm::find_if(
11846
0
      Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11847
0
  if (It == Range.end() || It->Fn != CGF.CurFn)
11848
0
    return;
11849
0
  auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11850
0
  assert(LPCI != LastprivateConditionalToTypes.end() &&
11851
0
         "Lastprivates must be registered already.");
11852
0
  SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11853
0
  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11854
0
  const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11855
0
  for (const auto &Pair : It->DeclToUniqueName) {
11856
0
    const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11857
0
    if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11858
0
      continue;
11859
0
    auto I = LPCI->getSecond().find(Pair.first);
11860
0
    assert(I != LPCI->getSecond().end() &&
11861
0
           "Lastprivate must be rehistered already.");
11862
    // bool Cmp = priv_a.Fired != 0;
11863
0
    LValue BaseLVal = std::get<3>(I->getSecond());
11864
0
    LValue FiredLVal =
11865
0
        CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11866
0
    llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11867
0
    llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11868
0
    llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11869
0
    llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11870
    // if (Cmp) {
11871
0
    CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11872
0
    CGF.EmitBlock(ThenBB);
11873
0
    Address Addr = CGF.GetAddrOfLocalVar(VD);
11874
0
    LValue LVal;
11875
0
    if (VD->getType()->isReferenceType())
11876
0
      LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11877
0
                                           AlignmentSource::Decl);
11878
0
    else
11879
0
      LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11880
0
                                AlignmentSource::Decl);
11881
0
    emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11882
0
                                     D.getBeginLoc());
11883
0
    auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11884
0
    CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11885
    // }
11886
0
  }
11887
0
}
11888
11889
void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11890
    CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11891
0
    SourceLocation Loc) {
11892
0
  if (CGF.getLangOpts().OpenMP < 50)
11893
0
    return;
11894
0
  auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11895
0
  assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11896
0
         "Unknown lastprivate conditional variable.");
11897
0
  StringRef UniqueName = It->second;
11898
0
  llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11899
  // The variable was not updated in the region - exit.
11900
0
  if (!GV)
11901
0
    return;
11902
0
  LValue LPLVal = CGF.MakeAddrLValue(
11903
0
      Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
11904
0
      PrivLVal.getType().getNonReferenceType());
11905
0
  llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11906
0
  CGF.EmitStoreOfScalar(Res, PrivLVal);
11907
0
}
11908
11909
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11910
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
11911
    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11912
0
    const RegionCodeGenTy &CodeGen) {
11913
0
  llvm_unreachable("Not supported in SIMD-only mode");
11914
0
}
11915
11916
llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11917
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
11918
    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11919
0
    const RegionCodeGenTy &CodeGen) {
11920
0
  llvm_unreachable("Not supported in SIMD-only mode");
11921
0
}
11922
11923
llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11924
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11925
    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11926
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11927
0
    bool Tied, unsigned &NumberOfParts) {
11928
0
  llvm_unreachable("Not supported in SIMD-only mode");
11929
0
}
11930
11931
void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11932
                                           SourceLocation Loc,
11933
                                           llvm::Function *OutlinedFn,
11934
                                           ArrayRef<llvm::Value *> CapturedVars,
11935
                                           const Expr *IfCond,
11936
0
                                           llvm::Value *NumThreads) {
11937
0
  llvm_unreachable("Not supported in SIMD-only mode");
11938
0
}
11939
11940
void CGOpenMPSIMDRuntime::emitCriticalRegion(
11941
    CodeGenFunction &CGF, StringRef CriticalName,
11942
    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11943
0
    const Expr *Hint) {
11944
0
  llvm_unreachable("Not supported in SIMD-only mode");
11945
0
}
11946
11947
void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11948
                                           const RegionCodeGenTy &MasterOpGen,
11949
0
                                           SourceLocation Loc) {
11950
0
  llvm_unreachable("Not supported in SIMD-only mode");
11951
0
}
11952
11953
void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11954
                                           const RegionCodeGenTy &MasterOpGen,
11955
                                           SourceLocation Loc,
11956
0
                                           const Expr *Filter) {
11957
0
  llvm_unreachable("Not supported in SIMD-only mode");
11958
0
}
11959
11960
void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11961
0
                                            SourceLocation Loc) {
11962
0
  llvm_unreachable("Not supported in SIMD-only mode");
11963
0
}
11964
11965
void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11966
    CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11967
0
    SourceLocation Loc) {
11968
0
  llvm_unreachable("Not supported in SIMD-only mode");
11969
0
}
11970
11971
void CGOpenMPSIMDRuntime::emitSingleRegion(
11972
    CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11973
    SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11974
    ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11975
0
    ArrayRef<const Expr *> AssignmentOps) {
11976
0
  llvm_unreachable("Not supported in SIMD-only mode");
11977
0
}
11978
11979
void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11980
                                            const RegionCodeGenTy &OrderedOpGen,
11981
                                            SourceLocation Loc,
11982
0
                                            bool IsThreads) {
11983
0
  llvm_unreachable("Not supported in SIMD-only mode");
11984
0
}
11985
11986
void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11987
                                          SourceLocation Loc,
11988
                                          OpenMPDirectiveKind Kind,
11989
                                          bool EmitChecks,
11990
0
                                          bool ForceSimpleCall) {
11991
0
  llvm_unreachable("Not supported in SIMD-only mode");
11992
0
}
11993
11994
void CGOpenMPSIMDRuntime::emitForDispatchInit(
11995
    CodeGenFunction &CGF, SourceLocation Loc,
11996
    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11997
0
    bool Ordered, const DispatchRTInput &DispatchValues) {
11998
0
  llvm_unreachable("Not supported in SIMD-only mode");
11999
0
}
12000
12001
void CGOpenMPSIMDRuntime::emitForStaticInit(
12002
    CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12003
0
    const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12004
0
  llvm_unreachable("Not supported in SIMD-only mode");
12005
0
}
12006
12007
void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12008
    CodeGenFunction &CGF, SourceLocation Loc,
12009
0
    OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12010
0
  llvm_unreachable("Not supported in SIMD-only mode");
12011
0
}
12012
12013
void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12014
                                                     SourceLocation Loc,
12015
                                                     unsigned IVSize,
12016
0
                                                     bool IVSigned) {
12017
0
  llvm_unreachable("Not supported in SIMD-only mode");
12018
0
}
12019
12020
void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12021
                                              SourceLocation Loc,
12022
0
                                              OpenMPDirectiveKind DKind) {
12023
0
  llvm_unreachable("Not supported in SIMD-only mode");
12024
0
}
12025
12026
llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12027
                                              SourceLocation Loc,
12028
                                              unsigned IVSize, bool IVSigned,
12029
                                              Address IL, Address LB,
12030
0
                                              Address UB, Address ST) {
12031
0
  llvm_unreachable("Not supported in SIMD-only mode");
12032
0
}
12033
12034
void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12035
                                               llvm::Value *NumThreads,
12036
0
                                               SourceLocation Loc) {
12037
0
  llvm_unreachable("Not supported in SIMD-only mode");
12038
0
}
12039
12040
void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12041
                                             ProcBindKind ProcBind,
12042
0
                                             SourceLocation Loc) {
12043
0
  llvm_unreachable("Not supported in SIMD-only mode");
12044
0
}
12045
12046
Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12047
                                                    const VarDecl *VD,
12048
                                                    Address VDAddr,
12049
0
                                                    SourceLocation Loc) {
12050
0
  llvm_unreachable("Not supported in SIMD-only mode");
12051
0
}
12052
12053
llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12054
    const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12055
0
    CodeGenFunction *CGF) {
12056
0
  llvm_unreachable("Not supported in SIMD-only mode");
12057
0
}
12058
12059
Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12060
0
    CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12061
0
  llvm_unreachable("Not supported in SIMD-only mode");
12062
0
}
12063
12064
void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12065
                                    ArrayRef<const Expr *> Vars,
12066
                                    SourceLocation Loc,
12067
0
                                    llvm::AtomicOrdering AO) {
12068
0
  llvm_unreachable("Not supported in SIMD-only mode");
12069
0
}
12070
12071
void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12072
                                       const OMPExecutableDirective &D,
12073
                                       llvm::Function *TaskFunction,
12074
                                       QualType SharedsTy, Address Shareds,
12075
                                       const Expr *IfCond,
12076
0
                                       const OMPTaskDataTy &Data) {
12077
0
  llvm_unreachable("Not supported in SIMD-only mode");
12078
0
}
12079
12080
void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12081
    CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12082
    llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12083
0
    const Expr *IfCond, const OMPTaskDataTy &Data) {
12084
0
  llvm_unreachable("Not supported in SIMD-only mode");
12085
0
}
12086
12087
void CGOpenMPSIMDRuntime::emitReduction(
12088
    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12089
    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12090
0
    ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12091
0
  assert(Options.SimpleReduction && "Only simple reduction is expected.");
12092
0
  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12093
0
                                 ReductionOps, Options);
12094
0
}
12095
12096
llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12097
    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12098
0
    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12099
0
  llvm_unreachable("Not supported in SIMD-only mode");
12100
0
}
12101
12102
void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12103
                                                SourceLocation Loc,
12104
0
                                                bool IsWorksharingReduction) {
12105
0
  llvm_unreachable("Not supported in SIMD-only mode");
12106
0
}
12107
12108
void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12109
                                                  SourceLocation Loc,
12110
                                                  ReductionCodeGen &RCG,
12111
0
                                                  unsigned N) {
12112
0
  llvm_unreachable("Not supported in SIMD-only mode");
12113
0
}
12114
12115
Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12116
                                                  SourceLocation Loc,
12117
                                                  llvm::Value *ReductionsPtr,
12118
0
                                                  LValue SharedLVal) {
12119
0
  llvm_unreachable("Not supported in SIMD-only mode");
12120
0
}
12121
12122
void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12123
                                           SourceLocation Loc,
12124
0
                                           const OMPTaskDataTy &Data) {
12125
0
  llvm_unreachable("Not supported in SIMD-only mode");
12126
0
}
12127
12128
void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12129
    CodeGenFunction &CGF, SourceLocation Loc,
12130
0
    OpenMPDirectiveKind CancelRegion) {
12131
0
  llvm_unreachable("Not supported in SIMD-only mode");
12132
0
}
12133
12134
void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12135
                                         SourceLocation Loc, const Expr *IfCond,
12136
0
                                         OpenMPDirectiveKind CancelRegion) {
12137
0
  llvm_unreachable("Not supported in SIMD-only mode");
12138
0
}
12139
12140
void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12141
    const OMPExecutableDirective &D, StringRef ParentName,
12142
    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12143
0
    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12144
0
  llvm_unreachable("Not supported in SIMD-only mode");
12145
0
}
12146
12147
void CGOpenMPSIMDRuntime::emitTargetCall(
12148
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
12149
    llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12150
    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12151
    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12152
                                     const OMPLoopDirective &D)>
12153
0
        SizeEmitter) {
12154
0
  llvm_unreachable("Not supported in SIMD-only mode");
12155
0
}
12156
12157
0
bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12158
0
  llvm_unreachable("Not supported in SIMD-only mode");
12159
0
}
12160
12161
0
bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12162
0
  llvm_unreachable("Not supported in SIMD-only mode");
12163
0
}
12164
12165
0
bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12166
0
  return false;
12167
0
}
12168
12169
void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12170
                                        const OMPExecutableDirective &D,
12171
                                        SourceLocation Loc,
12172
                                        llvm::Function *OutlinedFn,
12173
0
                                        ArrayRef<llvm::Value *> CapturedVars) {
12174
0
  llvm_unreachable("Not supported in SIMD-only mode");
12175
0
}
12176
12177
void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12178
                                             const Expr *NumTeams,
12179
                                             const Expr *ThreadLimit,
12180
0
                                             SourceLocation Loc) {
12181
0
  llvm_unreachable("Not supported in SIMD-only mode");
12182
0
}
12183
12184
void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12185
    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12186
    const Expr *Device, const RegionCodeGenTy &CodeGen,
12187
0
    CGOpenMPRuntime::TargetDataInfo &Info) {
12188
0
  llvm_unreachable("Not supported in SIMD-only mode");
12189
0
}
12190
12191
void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12192
    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12193
0
    const Expr *Device) {
12194
0
  llvm_unreachable("Not supported in SIMD-only mode");
12195
0
}
12196
12197
void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12198
                                           const OMPLoopDirective &D,
12199
0
                                           ArrayRef<Expr *> NumIterations) {
12200
0
  llvm_unreachable("Not supported in SIMD-only mode");
12201
0
}
12202
12203
void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12204
0
                                              const OMPDependClause *C) {
12205
0
  llvm_unreachable("Not supported in SIMD-only mode");
12206
0
}
12207
12208
void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12209
0
                                              const OMPDoacrossClause *C) {
12210
0
  llvm_unreachable("Not supported in SIMD-only mode");
12211
0
}
12212
12213
const VarDecl *
12214
CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12215
0
                                        const VarDecl *NativeParam) const {
12216
0
  llvm_unreachable("Not supported in SIMD-only mode");
12217
0
}
12218
12219
Address
12220
CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12221
                                         const VarDecl *NativeParam,
12222
0
                                         const VarDecl *TargetParam) const {
12223
0
  llvm_unreachable("Not supported in SIMD-only mode");
12224
0
}