/src/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This contains code to emit OpenMP nodes as LLVM code. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "CGCleanup.h" |
14 | | #include "CGOpenMPRuntime.h" |
15 | | #include "CodeGenFunction.h" |
16 | | #include "CodeGenModule.h" |
17 | | #include "TargetInfo.h" |
18 | | #include "clang/AST/ASTContext.h" |
19 | | #include "clang/AST/Attr.h" |
20 | | #include "clang/AST/DeclOpenMP.h" |
21 | | #include "clang/AST/OpenMPClause.h" |
22 | | #include "clang/AST/Stmt.h" |
23 | | #include "clang/AST/StmtOpenMP.h" |
24 | | #include "clang/AST/StmtVisitor.h" |
25 | | #include "clang/Basic/OpenMPKinds.h" |
26 | | #include "clang/Basic/PrettyStackTrace.h" |
27 | | #include "llvm/ADT/SmallSet.h" |
28 | | #include "llvm/BinaryFormat/Dwarf.h" |
29 | | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
30 | | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
31 | | #include "llvm/IR/Constants.h" |
32 | | #include "llvm/IR/DebugInfoMetadata.h" |
33 | | #include "llvm/IR/Instructions.h" |
34 | | #include "llvm/IR/IntrinsicInst.h" |
35 | | #include "llvm/IR/Metadata.h" |
36 | | #include "llvm/Support/AtomicOrdering.h" |
37 | | #include <optional> |
38 | | using namespace clang; |
39 | | using namespace CodeGen; |
40 | | using namespace llvm::omp; |
41 | | |
42 | | static const VarDecl *getBaseDecl(const Expr *Ref); |
43 | | |
44 | | namespace { |
45 | | /// Lexical scope for OpenMP executable constructs, that handles correct codegen |
46 | | /// for captured expressions. |
47 | | class OMPLexicalScope : public CodeGenFunction::LexicalScope { |
48 | 0 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
49 | 0 | for (const auto *C : S.clauses()) { |
50 | 0 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
51 | 0 | if (const auto *PreInit = |
52 | 0 | cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { |
53 | 0 | for (const auto *I : PreInit->decls()) { |
54 | 0 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
55 | 0 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
56 | 0 | } else { |
57 | 0 | CodeGenFunction::AutoVarEmission Emission = |
58 | 0 | CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); |
59 | 0 | CGF.EmitAutoVarCleanups(Emission); |
60 | 0 | } |
61 | 0 | } |
62 | 0 | } |
63 | 0 | } |
64 | 0 | } |
65 | 0 | } |
66 | | CodeGenFunction::OMPPrivateScope InlinedShareds; |
67 | | |
68 | 0 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
69 | 0 | return CGF.LambdaCaptureFields.lookup(VD) || |
70 | 0 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
71 | 0 | (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && |
72 | 0 | cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); |
73 | 0 | } |
74 | | |
75 | | public: |
76 | | OMPLexicalScope( |
77 | | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
78 | | const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, |
79 | | const bool EmitPreInitStmt = true) |
80 | | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
81 | 0 | InlinedShareds(CGF) { |
82 | 0 | if (EmitPreInitStmt) |
83 | 0 | emitPreInitStmt(CGF, S); |
84 | 0 | if (!CapturedRegion) |
85 | 0 | return; |
86 | 0 | assert(S.hasAssociatedStmt() && |
87 | 0 | "Expected associated statement for inlined directive."); |
88 | 0 | const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); |
89 | 0 | for (const auto &C : CS->captures()) { |
90 | 0 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
91 | 0 | auto *VD = C.getCapturedVar(); |
92 | 0 | assert(VD == VD->getCanonicalDecl() && |
93 | 0 | "Canonical decl must be captured."); |
94 | 0 | DeclRefExpr DRE( |
95 | 0 | CGF.getContext(), const_cast<VarDecl *>(VD), |
96 | 0 | isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && |
97 | 0 | InlinedShareds.isGlobalVarCaptured(VD)), |
98 | 0 | VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); |
99 | 0 | InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); |
100 | 0 | } |
101 | 0 | } |
102 | 0 | (void)InlinedShareds.Privatize(); |
103 | 0 | } |
104 | | }; |
105 | | |
106 | | /// Lexical scope for OpenMP parallel construct, that handles correct codegen |
107 | | /// for captured expressions. |
108 | | class OMPParallelScope final : public OMPLexicalScope { |
109 | 0 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
110 | 0 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
111 | 0 | return !(isOpenMPTargetExecutionDirective(Kind) || |
112 | 0 | isOpenMPLoopBoundSharingDirective(Kind)) && |
113 | 0 | isOpenMPParallelDirective(Kind); |
114 | 0 | } |
115 | | |
116 | | public: |
117 | | OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
118 | | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
119 | 0 | EmitPreInitStmt(S)) {} |
120 | | }; |
121 | | |
122 | | /// Lexical scope for OpenMP teams construct, that handles correct codegen |
123 | | /// for captured expressions. |
124 | | class OMPTeamsScope final : public OMPLexicalScope { |
125 | 0 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
126 | 0 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
127 | 0 | return !isOpenMPTargetExecutionDirective(Kind) && |
128 | 0 | isOpenMPTeamsDirective(Kind); |
129 | 0 | } |
130 | | |
131 | | public: |
132 | | OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
133 | | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
134 | 0 | EmitPreInitStmt(S)) {} |
135 | | }; |
136 | | |
137 | | /// Private scope for OpenMP loop-based directives, that supports capturing |
138 | | /// of used expression from loop statement. |
139 | | class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { |
140 | 0 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { |
141 | 0 | const DeclStmt *PreInits; |
142 | 0 | CodeGenFunction::OMPMapVars PreCondVars; |
143 | 0 | if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { |
144 | 0 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
145 | 0 | for (const auto *E : LD->counters()) { |
146 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
147 | 0 | EmittedAsPrivate.insert(VD->getCanonicalDecl()); |
148 | 0 | (void)PreCondVars.setVarAddr( |
149 | 0 | CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); |
150 | 0 | } |
151 | | // Mark private vars as undefs. |
152 | 0 | for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { |
153 | 0 | for (const Expr *IRef : C->varlists()) { |
154 | 0 | const auto *OrigVD = |
155 | 0 | cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
156 | 0 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
157 | 0 | QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); |
158 | 0 | (void)PreCondVars.setVarAddr( |
159 | 0 | CGF, OrigVD, |
160 | 0 | Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( |
161 | 0 | CGF.getContext().getPointerType(OrigVDTy))), |
162 | 0 | CGF.ConvertTypeForMem(OrigVDTy), |
163 | 0 | CGF.getContext().getDeclAlign(OrigVD))); |
164 | 0 | } |
165 | 0 | } |
166 | 0 | } |
167 | 0 | (void)PreCondVars.apply(CGF); |
168 | | // Emit init, __range and __end variables for C++ range loops. |
169 | 0 | (void)OMPLoopBasedDirective::doForAllLoops( |
170 | 0 | LD->getInnermostCapturedStmt()->getCapturedStmt(), |
171 | 0 | /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), |
172 | 0 | [&CGF](unsigned Cnt, const Stmt *CurStmt) { |
173 | 0 | if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { |
174 | 0 | if (const Stmt *Init = CXXFor->getInit()) |
175 | 0 | CGF.EmitStmt(Init); |
176 | 0 | CGF.EmitStmt(CXXFor->getRangeStmt()); |
177 | 0 | CGF.EmitStmt(CXXFor->getEndStmt()); |
178 | 0 | } |
179 | 0 | return false; |
180 | 0 | }); |
181 | 0 | PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); |
182 | 0 | } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { |
183 | 0 | PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); |
184 | 0 | } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { |
185 | 0 | PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits()); |
186 | 0 | } else { |
187 | 0 | llvm_unreachable("Unknown loop-based directive kind."); |
188 | 0 | } |
189 | 0 | if (PreInits) { |
190 | 0 | for (const auto *I : PreInits->decls()) |
191 | 0 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
192 | 0 | } |
193 | 0 | PreCondVars.restore(CGF); |
194 | 0 | } |
195 | | |
196 | | public: |
197 | | OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) |
198 | 0 | : CodeGenFunction::RunCleanupsScope(CGF) { |
199 | 0 | emitPreInitStmt(CGF, S); |
200 | 0 | } |
201 | | }; |
202 | | |
203 | | class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { |
204 | | CodeGenFunction::OMPPrivateScope InlinedShareds; |
205 | | |
206 | 0 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
207 | 0 | return CGF.LambdaCaptureFields.lookup(VD) || |
208 | 0 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
209 | 0 | (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && |
210 | 0 | cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); |
211 | 0 | } |
212 | | |
213 | | public: |
214 | | OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
215 | | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
216 | 0 | InlinedShareds(CGF) { |
217 | 0 | for (const auto *C : S.clauses()) { |
218 | 0 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
219 | 0 | if (const auto *PreInit = |
220 | 0 | cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { |
221 | 0 | for (const auto *I : PreInit->decls()) { |
222 | 0 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
223 | 0 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
224 | 0 | } else { |
225 | 0 | CodeGenFunction::AutoVarEmission Emission = |
226 | 0 | CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); |
227 | 0 | CGF.EmitAutoVarCleanups(Emission); |
228 | 0 | } |
229 | 0 | } |
230 | 0 | } |
231 | 0 | } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { |
232 | 0 | for (const Expr *E : UDP->varlists()) { |
233 | 0 | const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
234 | 0 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
235 | 0 | CGF.EmitVarDecl(*OED); |
236 | 0 | } |
237 | 0 | } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { |
238 | 0 | for (const Expr *E : UDP->varlists()) { |
239 | 0 | const Decl *D = getBaseDecl(E); |
240 | 0 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
241 | 0 | CGF.EmitVarDecl(*OED); |
242 | 0 | } |
243 | 0 | } |
244 | 0 | } |
245 | 0 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) |
246 | 0 | CGF.EmitOMPPrivateClause(S, InlinedShareds); |
247 | 0 | if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { |
248 | 0 | if (const Expr *E = TG->getReductionRef()) |
249 | 0 | CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); |
250 | 0 | } |
251 | | // Temp copy arrays for inscan reductions should not be emitted as they are |
252 | | // not used in simd only mode. |
253 | 0 | llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; |
254 | 0 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
255 | 0 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
256 | 0 | continue; |
257 | 0 | for (const Expr *E : C->copy_array_temps()) |
258 | 0 | CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); |
259 | 0 | } |
260 | 0 | const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); |
261 | 0 | while (CS) { |
262 | 0 | for (auto &C : CS->captures()) { |
263 | 0 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
264 | 0 | auto *VD = C.getCapturedVar(); |
265 | 0 | if (CopyArrayTemps.contains(VD)) |
266 | 0 | continue; |
267 | 0 | assert(VD == VD->getCanonicalDecl() && |
268 | 0 | "Canonical decl must be captured."); |
269 | 0 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
270 | 0 | isCapturedVar(CGF, VD) || |
271 | 0 | (CGF.CapturedStmtInfo && |
272 | 0 | InlinedShareds.isGlobalVarCaptured(VD)), |
273 | 0 | VD->getType().getNonReferenceType(), VK_LValue, |
274 | 0 | C.getLocation()); |
275 | 0 | InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); |
276 | 0 | } |
277 | 0 | } |
278 | 0 | CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); |
279 | 0 | } |
280 | 0 | (void)InlinedShareds.Privatize(); |
281 | 0 | } |
282 | | }; |
283 | | |
284 | | } // namespace |
285 | | |
286 | | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
287 | | const OMPExecutableDirective &S, |
288 | | const RegionCodeGenTy &CodeGen); |
289 | | |
290 | 0 | LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { |
291 | 0 | if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { |
292 | 0 | if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { |
293 | 0 | OrigVD = OrigVD->getCanonicalDecl(); |
294 | 0 | bool IsCaptured = |
295 | 0 | LambdaCaptureFields.lookup(OrigVD) || |
296 | 0 | (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || |
297 | 0 | (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); |
298 | 0 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, |
299 | 0 | OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); |
300 | 0 | return EmitLValue(&DRE); |
301 | 0 | } |
302 | 0 | } |
303 | 0 | return EmitLValue(E); |
304 | 0 | } |
305 | | |
306 | 0 | llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { |
307 | 0 | ASTContext &C = getContext(); |
308 | 0 | llvm::Value *Size = nullptr; |
309 | 0 | auto SizeInChars = C.getTypeSizeInChars(Ty); |
310 | 0 | if (SizeInChars.isZero()) { |
311 | | // getTypeSizeInChars() returns 0 for a VLA. |
312 | 0 | while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { |
313 | 0 | VlaSizePair VlaSize = getVLASize(VAT); |
314 | 0 | Ty = VlaSize.Type; |
315 | 0 | Size = |
316 | 0 | Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; |
317 | 0 | } |
318 | 0 | SizeInChars = C.getTypeSizeInChars(Ty); |
319 | 0 | if (SizeInChars.isZero()) |
320 | 0 | return llvm::ConstantInt::get(SizeTy, /*V=*/0); |
321 | 0 | return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); |
322 | 0 | } |
323 | 0 | return CGM.getSize(SizeInChars); |
324 | 0 | } |
325 | | |
326 | | void CodeGenFunction::GenerateOpenMPCapturedVars( |
327 | 0 | const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { |
328 | 0 | const RecordDecl *RD = S.getCapturedRecordDecl(); |
329 | 0 | auto CurField = RD->field_begin(); |
330 | 0 | auto CurCap = S.captures().begin(); |
331 | 0 | for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), |
332 | 0 | E = S.capture_init_end(); |
333 | 0 | I != E; ++I, ++CurField, ++CurCap) { |
334 | 0 | if (CurField->hasCapturedVLAType()) { |
335 | 0 | const VariableArrayType *VAT = CurField->getCapturedVLAType(); |
336 | 0 | llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; |
337 | 0 | CapturedVars.push_back(Val); |
338 | 0 | } else if (CurCap->capturesThis()) { |
339 | 0 | CapturedVars.push_back(CXXThisValue); |
340 | 0 | } else if (CurCap->capturesVariableByCopy()) { |
341 | 0 | llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); |
342 | | |
343 | | // If the field is not a pointer, we need to save the actual value |
344 | | // and load it as a void pointer. |
345 | 0 | if (!CurField->getType()->isAnyPointerType()) { |
346 | 0 | ASTContext &Ctx = getContext(); |
347 | 0 | Address DstAddr = CreateMemTemp( |
348 | 0 | Ctx.getUIntPtrType(), |
349 | 0 | Twine(CurCap->getCapturedVar()->getName(), ".casted")); |
350 | 0 | LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); |
351 | |
|
352 | 0 | llvm::Value *SrcAddrVal = EmitScalarConversion( |
353 | 0 | DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), |
354 | 0 | Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); |
355 | 0 | LValue SrcLV = |
356 | 0 | MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); |
357 | | |
358 | | // Store the value using the source type pointer. |
359 | 0 | EmitStoreThroughLValue(RValue::get(CV), SrcLV); |
360 | | |
361 | | // Load the value using the destination type pointer. |
362 | 0 | CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); |
363 | 0 | } |
364 | 0 | CapturedVars.push_back(CV); |
365 | 0 | } else { |
366 | 0 | assert(CurCap->capturesVariable() && "Expected capture by reference."); |
367 | 0 | CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); |
368 | 0 | } |
369 | 0 | } |
370 | 0 | } |
371 | | |
372 | | static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, |
373 | | QualType DstType, StringRef Name, |
374 | 0 | LValue AddrLV) { |
375 | 0 | ASTContext &Ctx = CGF.getContext(); |
376 | |
|
377 | 0 | llvm::Value *CastedPtr = CGF.EmitScalarConversion( |
378 | 0 | AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), |
379 | 0 | Ctx.getPointerType(DstType), Loc); |
380 | 0 | Address TmpAddr = |
381 | 0 | CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF); |
382 | 0 | return TmpAddr; |
383 | 0 | } |
384 | | |
385 | 0 | static QualType getCanonicalParamType(ASTContext &C, QualType T) { |
386 | 0 | if (T->isLValueReferenceType()) |
387 | 0 | return C.getLValueReferenceType( |
388 | 0 | getCanonicalParamType(C, T.getNonReferenceType()), |
389 | 0 | /*SpelledAsLValue=*/false); |
390 | 0 | if (T->isPointerType()) |
391 | 0 | return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); |
392 | 0 | if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { |
393 | 0 | if (const auto *VLA = dyn_cast<VariableArrayType>(A)) |
394 | 0 | return getCanonicalParamType(C, VLA->getElementType()); |
395 | 0 | if (!A->isVariablyModifiedType()) |
396 | 0 | return C.getCanonicalType(T); |
397 | 0 | } |
398 | 0 | return C.getCanonicalParamType(T); |
399 | 0 | } |
400 | | |
401 | | namespace { |
402 | | /// Contains required data for proper outlined function codegen. |
403 | | struct FunctionOptions { |
404 | | /// Captured statement for which the function is generated. |
405 | | const CapturedStmt *S = nullptr; |
406 | | /// true if cast to/from UIntPtr is required for variables captured by |
407 | | /// value. |
408 | | const bool UIntPtrCastRequired = true; |
409 | | /// true if only casted arguments must be registered as local args or VLA |
410 | | /// sizes. |
411 | | const bool RegisterCastedArgsOnly = false; |
412 | | /// Name of the generated function. |
413 | | const StringRef FunctionName; |
414 | | /// Location of the non-debug version of the outlined function. |
415 | | SourceLocation Loc; |
416 | | explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, |
417 | | bool RegisterCastedArgsOnly, StringRef FunctionName, |
418 | | SourceLocation Loc) |
419 | | : S(S), UIntPtrCastRequired(UIntPtrCastRequired), |
420 | | RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), |
421 | 0 | FunctionName(FunctionName), Loc(Loc) {} |
422 | | }; |
423 | | } // namespace |
424 | | |
425 | | static llvm::Function *emitOutlinedFunctionPrologue( |
426 | | CodeGenFunction &CGF, FunctionArgList &Args, |
427 | | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> |
428 | | &LocalAddrs, |
429 | | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> |
430 | | &VLASizes, |
431 | 0 | llvm::Value *&CXXThisValue, const FunctionOptions &FO) { |
432 | 0 | const CapturedDecl *CD = FO.S->getCapturedDecl(); |
433 | 0 | const RecordDecl *RD = FO.S->getCapturedRecordDecl(); |
434 | 0 | assert(CD->hasBody() && "missing CapturedDecl body"); |
435 | | |
436 | 0 | CXXThisValue = nullptr; |
437 | | // Build the argument list. |
438 | 0 | CodeGenModule &CGM = CGF.CGM; |
439 | 0 | ASTContext &Ctx = CGM.getContext(); |
440 | 0 | FunctionArgList TargetArgs; |
441 | 0 | Args.append(CD->param_begin(), |
442 | 0 | std::next(CD->param_begin(), CD->getContextParamPosition())); |
443 | 0 | TargetArgs.append( |
444 | 0 | CD->param_begin(), |
445 | 0 | std::next(CD->param_begin(), CD->getContextParamPosition())); |
446 | 0 | auto I = FO.S->captures().begin(); |
447 | 0 | FunctionDecl *DebugFunctionDecl = nullptr; |
448 | 0 | if (!FO.UIntPtrCastRequired) { |
449 | 0 | FunctionProtoType::ExtProtoInfo EPI; |
450 | 0 | QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI); |
451 | 0 | DebugFunctionDecl = FunctionDecl::Create( |
452 | 0 | Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), |
453 | 0 | SourceLocation(), DeclarationName(), FunctionTy, |
454 | 0 | Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, |
455 | 0 | /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, |
456 | 0 | /*hasWrittenPrototype=*/false); |
457 | 0 | } |
458 | 0 | for (const FieldDecl *FD : RD->fields()) { |
459 | 0 | QualType ArgType = FD->getType(); |
460 | 0 | IdentifierInfo *II = nullptr; |
461 | 0 | VarDecl *CapVar = nullptr; |
462 | | |
463 | | // If this is a capture by copy and the type is not a pointer, the outlined |
464 | | // function argument type should be uintptr and the value properly casted to |
465 | | // uintptr. This is necessary given that the runtime library is only able to |
466 | | // deal with pointers. We can pass in the same way the VLA type sizes to the |
467 | | // outlined function. |
468 | 0 | if (FO.UIntPtrCastRequired && |
469 | 0 | ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || |
470 | 0 | I->capturesVariableArrayType())) |
471 | 0 | ArgType = Ctx.getUIntPtrType(); |
472 | |
|
473 | 0 | if (I->capturesVariable() || I->capturesVariableByCopy()) { |
474 | 0 | CapVar = I->getCapturedVar(); |
475 | 0 | II = CapVar->getIdentifier(); |
476 | 0 | } else if (I->capturesThis()) { |
477 | 0 | II = &Ctx.Idents.get("this"); |
478 | 0 | } else { |
479 | 0 | assert(I->capturesVariableArrayType()); |
480 | 0 | II = &Ctx.Idents.get("vla"); |
481 | 0 | } |
482 | 0 | if (ArgType->isVariablyModifiedType()) |
483 | 0 | ArgType = getCanonicalParamType(Ctx, ArgType); |
484 | 0 | VarDecl *Arg; |
485 | 0 | if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { |
486 | 0 | Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), |
487 | 0 | II, ArgType, |
488 | 0 | ImplicitParamKind::ThreadPrivateVar); |
489 | 0 | } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { |
490 | 0 | Arg = ParmVarDecl::Create( |
491 | 0 | Ctx, DebugFunctionDecl, |
492 | 0 | CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), |
493 | 0 | CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, |
494 | 0 | /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); |
495 | 0 | } else { |
496 | 0 | Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), |
497 | 0 | II, ArgType, ImplicitParamKind::Other); |
498 | 0 | } |
499 | 0 | Args.emplace_back(Arg); |
500 | | // Do not cast arguments if we emit function with non-original types. |
501 | 0 | TargetArgs.emplace_back( |
502 | 0 | FO.UIntPtrCastRequired |
503 | 0 | ? Arg |
504 | 0 | : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); |
505 | 0 | ++I; |
506 | 0 | } |
507 | 0 | Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), |
508 | 0 | CD->param_end()); |
509 | 0 | TargetArgs.append( |
510 | 0 | std::next(CD->param_begin(), CD->getContextParamPosition() + 1), |
511 | 0 | CD->param_end()); |
512 | | |
513 | | // Create the function declaration. |
514 | 0 | const CGFunctionInfo &FuncInfo = |
515 | 0 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); |
516 | 0 | llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); |
517 | |
|
518 | 0 | auto *F = |
519 | 0 | llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, |
520 | 0 | FO.FunctionName, &CGM.getModule()); |
521 | 0 | CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); |
522 | 0 | if (CD->isNothrow()) |
523 | 0 | F->setDoesNotThrow(); |
524 | 0 | F->setDoesNotRecurse(); |
525 | | |
526 | | // Always inline the outlined function if optimizations are enabled. |
527 | 0 | if (CGM.getCodeGenOpts().OptimizationLevel != 0) { |
528 | 0 | F->removeFnAttr(llvm::Attribute::NoInline); |
529 | 0 | F->addFnAttr(llvm::Attribute::AlwaysInline); |
530 | 0 | } |
531 | | |
532 | | // Generate the function. |
533 | 0 | CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, |
534 | 0 | FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), |
535 | 0 | FO.UIntPtrCastRequired ? FO.Loc |
536 | 0 | : CD->getBody()->getBeginLoc()); |
537 | 0 | unsigned Cnt = CD->getContextParamPosition(); |
538 | 0 | I = FO.S->captures().begin(); |
539 | 0 | for (const FieldDecl *FD : RD->fields()) { |
540 | | // Do not map arguments if we emit function with non-original types. |
541 | 0 | Address LocalAddr(Address::invalid()); |
542 | 0 | if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { |
543 | 0 | LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], |
544 | 0 | TargetArgs[Cnt]); |
545 | 0 | } else { |
546 | 0 | LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); |
547 | 0 | } |
548 | | // If we are capturing a pointer by copy we don't need to do anything, just |
549 | | // use the value that we get from the arguments. |
550 | 0 | if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { |
551 | 0 | const VarDecl *CurVD = I->getCapturedVar(); |
552 | 0 | if (!FO.RegisterCastedArgsOnly) |
553 | 0 | LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); |
554 | 0 | ++Cnt; |
555 | 0 | ++I; |
556 | 0 | continue; |
557 | 0 | } |
558 | | |
559 | 0 | LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), |
560 | 0 | AlignmentSource::Decl); |
561 | 0 | if (FD->hasCapturedVLAType()) { |
562 | 0 | if (FO.UIntPtrCastRequired) { |
563 | 0 | ArgLVal = CGF.MakeAddrLValue( |
564 | 0 | castValueFromUintptr(CGF, I->getLocation(), FD->getType(), |
565 | 0 | Args[Cnt]->getName(), ArgLVal), |
566 | 0 | FD->getType(), AlignmentSource::Decl); |
567 | 0 | } |
568 | 0 | llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); |
569 | 0 | const VariableArrayType *VAT = FD->getCapturedVLAType(); |
570 | 0 | VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); |
571 | 0 | } else if (I->capturesVariable()) { |
572 | 0 | const VarDecl *Var = I->getCapturedVar(); |
573 | 0 | QualType VarTy = Var->getType(); |
574 | 0 | Address ArgAddr = ArgLVal.getAddress(CGF); |
575 | 0 | if (ArgLVal.getType()->isLValueReferenceType()) { |
576 | 0 | ArgAddr = CGF.EmitLoadOfReference(ArgLVal); |
577 | 0 | } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { |
578 | 0 | assert(ArgLVal.getType()->isPointerType()); |
579 | 0 | ArgAddr = CGF.EmitLoadOfPointer( |
580 | 0 | ArgAddr, ArgLVal.getType()->castAs<PointerType>()); |
581 | 0 | } |
582 | 0 | if (!FO.RegisterCastedArgsOnly) { |
583 | 0 | LocalAddrs.insert( |
584 | 0 | {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}}); |
585 | 0 | } |
586 | 0 | } else if (I->capturesVariableByCopy()) { |
587 | 0 | assert(!FD->getType()->isAnyPointerType() && |
588 | 0 | "Not expecting a captured pointer."); |
589 | 0 | const VarDecl *Var = I->getCapturedVar(); |
590 | 0 | LocalAddrs.insert({Args[Cnt], |
591 | 0 | {Var, FO.UIntPtrCastRequired |
592 | 0 | ? castValueFromUintptr( |
593 | 0 | CGF, I->getLocation(), FD->getType(), |
594 | 0 | Args[Cnt]->getName(), ArgLVal) |
595 | 0 | : ArgLVal.getAddress(CGF)}}); |
596 | 0 | } else { |
597 | | // If 'this' is captured, load it into CXXThisValue. |
598 | 0 | assert(I->capturesThis()); |
599 | 0 | CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); |
600 | 0 | LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); |
601 | 0 | } |
602 | 0 | ++Cnt; |
603 | 0 | ++I; |
604 | 0 | } |
605 | |
|
606 | 0 | return F; |
607 | 0 | } |
608 | | |
609 | | llvm::Function * |
610 | | CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, |
611 | 0 | SourceLocation Loc) { |
612 | 0 | assert( |
613 | 0 | CapturedStmtInfo && |
614 | 0 | "CapturedStmtInfo should be set when generating the captured function"); |
615 | 0 | const CapturedDecl *CD = S.getCapturedDecl(); |
616 | | // Build the argument list. |
617 | 0 | bool NeedWrapperFunction = |
618 | 0 | getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); |
619 | 0 | FunctionArgList Args; |
620 | 0 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; |
621 | 0 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; |
622 | 0 | SmallString<256> Buffer; |
623 | 0 | llvm::raw_svector_ostream Out(Buffer); |
624 | 0 | Out << CapturedStmtInfo->getHelperName(); |
625 | 0 | if (NeedWrapperFunction) |
626 | 0 | Out << "_debug__"; |
627 | 0 | FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, |
628 | 0 | Out.str(), Loc); |
629 | 0 | llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, |
630 | 0 | VLASizes, CXXThisValue, FO); |
631 | 0 | CodeGenFunction::OMPPrivateScope LocalScope(*this); |
632 | 0 | for (const auto &LocalAddrPair : LocalAddrs) { |
633 | 0 | if (LocalAddrPair.second.first) { |
634 | 0 | LocalScope.addPrivate(LocalAddrPair.second.first, |
635 | 0 | LocalAddrPair.second.second); |
636 | 0 | } |
637 | 0 | } |
638 | 0 | (void)LocalScope.Privatize(); |
639 | 0 | for (const auto &VLASizePair : VLASizes) |
640 | 0 | VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; |
641 | 0 | PGO.assignRegionCounters(GlobalDecl(CD), F); |
642 | 0 | CapturedStmtInfo->EmitBody(*this, CD->getBody()); |
643 | 0 | (void)LocalScope.ForceCleanup(); |
644 | 0 | FinishFunction(CD->getBodyRBrace()); |
645 | 0 | if (!NeedWrapperFunction) |
646 | 0 | return F; |
647 | | |
648 | 0 | FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, |
649 | 0 | /*RegisterCastedArgsOnly=*/true, |
650 | 0 | CapturedStmtInfo->getHelperName(), Loc); |
651 | 0 | CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); |
652 | 0 | WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; |
653 | 0 | Args.clear(); |
654 | 0 | LocalAddrs.clear(); |
655 | 0 | VLASizes.clear(); |
656 | 0 | llvm::Function *WrapperF = |
657 | 0 | emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, |
658 | 0 | WrapperCGF.CXXThisValue, WrapperFO); |
659 | 0 | llvm::SmallVector<llvm::Value *, 4> CallArgs; |
660 | 0 | auto *PI = F->arg_begin(); |
661 | 0 | for (const auto *Arg : Args) { |
662 | 0 | llvm::Value *CallArg; |
663 | 0 | auto I = LocalAddrs.find(Arg); |
664 | 0 | if (I != LocalAddrs.end()) { |
665 | 0 | LValue LV = WrapperCGF.MakeAddrLValue( |
666 | 0 | I->second.second, |
667 | 0 | I->second.first ? I->second.first->getType() : Arg->getType(), |
668 | 0 | AlignmentSource::Decl); |
669 | 0 | if (LV.getType()->isAnyComplexType()) |
670 | 0 | LV.setAddress(LV.getAddress(WrapperCGF).withElementType(PI->getType())); |
671 | 0 | CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
672 | 0 | } else { |
673 | 0 | auto EI = VLASizes.find(Arg); |
674 | 0 | if (EI != VLASizes.end()) { |
675 | 0 | CallArg = EI->second.second; |
676 | 0 | } else { |
677 | 0 | LValue LV = |
678 | 0 | WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), |
679 | 0 | Arg->getType(), AlignmentSource::Decl); |
680 | 0 | CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
681 | 0 | } |
682 | 0 | } |
683 | 0 | CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); |
684 | 0 | ++PI; |
685 | 0 | } |
686 | 0 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); |
687 | 0 | WrapperCGF.FinishFunction(); |
688 | 0 | return WrapperF; |
689 | 0 | } |
690 | | |
691 | | //===----------------------------------------------------------------------===// |
692 | | // OpenMP Directive Emission |
693 | | //===----------------------------------------------------------------------===// |
694 | | void CodeGenFunction::EmitOMPAggregateAssign( |
695 | | Address DestAddr, Address SrcAddr, QualType OriginalType, |
696 | 0 | const llvm::function_ref<void(Address, Address)> CopyGen) { |
697 | | // Perform element-by-element initialization. |
698 | 0 | QualType ElementTy; |
699 | | |
700 | | // Drill down to the base element type on both arrays. |
701 | 0 | const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); |
702 | 0 | llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); |
703 | 0 | SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); |
704 | |
|
705 | 0 | llvm::Value *SrcBegin = SrcAddr.getPointer(); |
706 | 0 | llvm::Value *DestBegin = DestAddr.getPointer(); |
707 | | // Cast from pointer to array type to pointer to single element. |
708 | 0 | llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(), |
709 | 0 | DestBegin, NumElements); |
710 | | |
711 | | // The basic structure here is a while-do loop. |
712 | 0 | llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); |
713 | 0 | llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); |
714 | 0 | llvm::Value *IsEmpty = |
715 | 0 | Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); |
716 | 0 | Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
717 | | |
718 | | // Enter the loop body, making that address the current address. |
719 | 0 | llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); |
720 | 0 | EmitBlock(BodyBB); |
721 | |
|
722 | 0 | CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); |
723 | |
|
724 | 0 | llvm::PHINode *SrcElementPHI = |
725 | 0 | Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); |
726 | 0 | SrcElementPHI->addIncoming(SrcBegin, EntryBB); |
727 | 0 | Address SrcElementCurrent = |
728 | 0 | Address(SrcElementPHI, SrcAddr.getElementType(), |
729 | 0 | SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
730 | |
|
731 | 0 | llvm::PHINode *DestElementPHI = Builder.CreatePHI( |
732 | 0 | DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); |
733 | 0 | DestElementPHI->addIncoming(DestBegin, EntryBB); |
734 | 0 | Address DestElementCurrent = |
735 | 0 | Address(DestElementPHI, DestAddr.getElementType(), |
736 | 0 | DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
737 | | |
738 | | // Emit copy. |
739 | 0 | CopyGen(DestElementCurrent, SrcElementCurrent); |
740 | | |
741 | | // Shift the address forward by one element. |
742 | 0 | llvm::Value *DestElementNext = |
743 | 0 | Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, |
744 | 0 | /*Idx0=*/1, "omp.arraycpy.dest.element"); |
745 | 0 | llvm::Value *SrcElementNext = |
746 | 0 | Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, |
747 | 0 | /*Idx0=*/1, "omp.arraycpy.src.element"); |
748 | | // Check whether we've reached the end. |
749 | 0 | llvm::Value *Done = |
750 | 0 | Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); |
751 | 0 | Builder.CreateCondBr(Done, DoneBB, BodyBB); |
752 | 0 | DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); |
753 | 0 | SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); |
754 | | |
755 | | // Done. |
756 | 0 | EmitBlock(DoneBB, /*IsFinished=*/true); |
757 | 0 | } |
758 | | |
759 | | void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, |
760 | | Address SrcAddr, const VarDecl *DestVD, |
761 | 0 | const VarDecl *SrcVD, const Expr *Copy) { |
762 | 0 | if (OriginalType->isArrayType()) { |
763 | 0 | const auto *BO = dyn_cast<BinaryOperator>(Copy); |
764 | 0 | if (BO && BO->getOpcode() == BO_Assign) { |
765 | | // Perform simple memcpy for simple copying. |
766 | 0 | LValue Dest = MakeAddrLValue(DestAddr, OriginalType); |
767 | 0 | LValue Src = MakeAddrLValue(SrcAddr, OriginalType); |
768 | 0 | EmitAggregateAssign(Dest, Src, OriginalType); |
769 | 0 | } else { |
770 | | // For arrays with complex element types perform element by element |
771 | | // copying. |
772 | 0 | EmitOMPAggregateAssign( |
773 | 0 | DestAddr, SrcAddr, OriginalType, |
774 | 0 | [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { |
775 | | // Working with the single array element, so have to remap |
776 | | // destination and source variables to corresponding array |
777 | | // elements. |
778 | 0 | CodeGenFunction::OMPPrivateScope Remap(*this); |
779 | 0 | Remap.addPrivate(DestVD, DestElement); |
780 | 0 | Remap.addPrivate(SrcVD, SrcElement); |
781 | 0 | (void)Remap.Privatize(); |
782 | 0 | EmitIgnoredExpr(Copy); |
783 | 0 | }); |
784 | 0 | } |
785 | 0 | } else { |
786 | | // Remap pseudo source variable to private copy. |
787 | 0 | CodeGenFunction::OMPPrivateScope Remap(*this); |
788 | 0 | Remap.addPrivate(SrcVD, SrcAddr); |
789 | 0 | Remap.addPrivate(DestVD, DestAddr); |
790 | 0 | (void)Remap.Privatize(); |
791 | | // Emit copying of the whole variable. |
792 | 0 | EmitIgnoredExpr(Copy); |
793 | 0 | } |
794 | 0 | } |
795 | | |
796 | | bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, |
797 | 0 | OMPPrivateScope &PrivateScope) { |
798 | 0 | if (!HaveInsertPoint()) |
799 | 0 | return false; |
800 | 0 | bool DeviceConstTarget = |
801 | 0 | getLangOpts().OpenMPIsTargetDevice && |
802 | 0 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()); |
803 | 0 | bool FirstprivateIsLastprivate = false; |
804 | 0 | llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; |
805 | 0 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
806 | 0 | for (const auto *D : C->varlists()) |
807 | 0 | Lastprivates.try_emplace( |
808 | 0 | cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), |
809 | 0 | C->getKind()); |
810 | 0 | } |
811 | 0 | llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; |
812 | 0 | llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; |
813 | 0 | getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); |
814 | | // Force emission of the firstprivate copy if the directive does not emit |
815 | | // outlined function, like omp for, omp simd, omp distribute etc. |
816 | 0 | bool MustEmitFirstprivateCopy = |
817 | 0 | CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; |
818 | 0 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
819 | 0 | const auto *IRef = C->varlist_begin(); |
820 | 0 | const auto *InitsRef = C->inits().begin(); |
821 | 0 | for (const Expr *IInit : C->private_copies()) { |
822 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
823 | 0 | bool ThisFirstprivateIsLastprivate = |
824 | 0 | Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; |
825 | 0 | const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); |
826 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
827 | 0 | if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && |
828 | 0 | !FD->getType()->isReferenceType() && |
829 | 0 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
830 | 0 | EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
831 | 0 | ++IRef; |
832 | 0 | ++InitsRef; |
833 | 0 | continue; |
834 | 0 | } |
835 | | // Do not emit copy for firstprivate constant variables in target regions, |
836 | | // captured by reference. |
837 | 0 | if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && |
838 | 0 | FD && FD->getType()->isReferenceType() && |
839 | 0 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
840 | 0 | EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
841 | 0 | ++IRef; |
842 | 0 | ++InitsRef; |
843 | 0 | continue; |
844 | 0 | } |
845 | 0 | FirstprivateIsLastprivate = |
846 | 0 | FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; |
847 | 0 | if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { |
848 | 0 | const auto *VDInit = |
849 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); |
850 | 0 | bool IsRegistered; |
851 | 0 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
852 | 0 | /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, |
853 | 0 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
854 | 0 | LValue OriginalLVal; |
855 | 0 | if (!FD) { |
856 | | // Check if the firstprivate variable is just a constant value. |
857 | 0 | ConstantEmission CE = tryEmitAsConstant(&DRE); |
858 | 0 | if (CE && !CE.isReference()) { |
859 | | // Constant value, no need to create a copy. |
860 | 0 | ++IRef; |
861 | 0 | ++InitsRef; |
862 | 0 | continue; |
863 | 0 | } |
864 | 0 | if (CE && CE.isReference()) { |
865 | 0 | OriginalLVal = CE.getReferenceLValue(*this, &DRE); |
866 | 0 | } else { |
867 | 0 | assert(!CE && "Expected non-constant firstprivate."); |
868 | 0 | OriginalLVal = EmitLValue(&DRE); |
869 | 0 | } |
870 | 0 | } else { |
871 | 0 | OriginalLVal = EmitLValue(&DRE); |
872 | 0 | } |
873 | 0 | QualType Type = VD->getType(); |
874 | 0 | if (Type->isArrayType()) { |
875 | | // Emit VarDecl with copy init for arrays. |
876 | | // Get the address of the original variable captured in current |
877 | | // captured region. |
878 | 0 | AutoVarEmission Emission = EmitAutoVarAlloca(*VD); |
879 | 0 | const Expr *Init = VD->getInit(); |
880 | 0 | if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { |
881 | | // Perform simple memcpy. |
882 | 0 | LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type); |
883 | 0 | EmitAggregateAssign(Dest, OriginalLVal, Type); |
884 | 0 | } else { |
885 | 0 | EmitOMPAggregateAssign( |
886 | 0 | Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this), |
887 | 0 | Type, |
888 | 0 | [this, VDInit, Init](Address DestElement, Address SrcElement) { |
889 | | // Clean up any temporaries needed by the |
890 | | // initialization. |
891 | 0 | RunCleanupsScope InitScope(*this); |
892 | | // Emit initialization for single element. |
893 | 0 | setAddrOfLocalVar(VDInit, SrcElement); |
894 | 0 | EmitAnyExprToMem(Init, DestElement, |
895 | 0 | Init->getType().getQualifiers(), |
896 | 0 | /*IsInitializer*/ false); |
897 | 0 | LocalDeclMap.erase(VDInit); |
898 | 0 | }); |
899 | 0 | } |
900 | 0 | EmitAutoVarCleanups(Emission); |
901 | 0 | IsRegistered = |
902 | 0 | PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress()); |
903 | 0 | } else { |
904 | 0 | Address OriginalAddr = OriginalLVal.getAddress(*this); |
905 | | // Emit private VarDecl with copy init. |
906 | | // Remap temp VDInit variable to the address of the original |
907 | | // variable (for proper handling of captured global variables). |
908 | 0 | setAddrOfLocalVar(VDInit, OriginalAddr); |
909 | 0 | EmitDecl(*VD); |
910 | 0 | LocalDeclMap.erase(VDInit); |
911 | 0 | Address VDAddr = GetAddrOfLocalVar(VD); |
912 | 0 | if (ThisFirstprivateIsLastprivate && |
913 | 0 | Lastprivates[OrigVD->getCanonicalDecl()] == |
914 | 0 | OMPC_LASTPRIVATE_conditional) { |
915 | | // Create/init special variable for lastprivate conditionals. |
916 | 0 | llvm::Value *V = |
917 | 0 | EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(), |
918 | 0 | AlignmentSource::Decl), |
919 | 0 | (*IRef)->getExprLoc()); |
920 | 0 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
921 | 0 | *this, OrigVD); |
922 | 0 | EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(), |
923 | 0 | AlignmentSource::Decl)); |
924 | 0 | LocalDeclMap.erase(VD); |
925 | 0 | setAddrOfLocalVar(VD, VDAddr); |
926 | 0 | } |
927 | 0 | IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); |
928 | 0 | } |
929 | 0 | assert(IsRegistered && |
930 | 0 | "firstprivate var already registered as private"); |
931 | | // Silence the warning about unused variable. |
932 | 0 | (void)IsRegistered; |
933 | 0 | } |
934 | 0 | ++IRef; |
935 | 0 | ++InitsRef; |
936 | 0 | } |
937 | 0 | } |
938 | 0 | return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); |
939 | 0 | } |
940 | | |
941 | | void CodeGenFunction::EmitOMPPrivateClause( |
942 | | const OMPExecutableDirective &D, |
943 | 0 | CodeGenFunction::OMPPrivateScope &PrivateScope) { |
944 | 0 | if (!HaveInsertPoint()) |
945 | 0 | return; |
946 | 0 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
947 | 0 | for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { |
948 | 0 | auto IRef = C->varlist_begin(); |
949 | 0 | for (const Expr *IInit : C->private_copies()) { |
950 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
951 | 0 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
952 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
953 | 0 | EmitDecl(*VD); |
954 | | // Emit private VarDecl with copy init. |
955 | 0 | bool IsRegistered = |
956 | 0 | PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD)); |
957 | 0 | assert(IsRegistered && "private var already registered as private"); |
958 | | // Silence the warning about unused variable. |
959 | 0 | (void)IsRegistered; |
960 | 0 | } |
961 | 0 | ++IRef; |
962 | 0 | } |
963 | 0 | } |
964 | 0 | } |
965 | | |
966 | 0 | bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { |
967 | 0 | if (!HaveInsertPoint()) |
968 | 0 | return false; |
969 | | // threadprivate_var1 = master_threadprivate_var1; |
970 | | // operator=(threadprivate_var2, master_threadprivate_var2); |
971 | | // ... |
972 | | // __kmpc_barrier(&loc, global_tid); |
973 | 0 | llvm::DenseSet<const VarDecl *> CopiedVars; |
974 | 0 | llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; |
975 | 0 | for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { |
976 | 0 | auto IRef = C->varlist_begin(); |
977 | 0 | auto ISrcRef = C->source_exprs().begin(); |
978 | 0 | auto IDestRef = C->destination_exprs().begin(); |
979 | 0 | for (const Expr *AssignOp : C->assignment_ops()) { |
980 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
981 | 0 | QualType Type = VD->getType(); |
982 | 0 | if (CopiedVars.insert(VD->getCanonicalDecl()).second) { |
983 | | // Get the address of the master variable. If we are emitting code with |
984 | | // TLS support, the address is passed from the master as field in the |
985 | | // captured declaration. |
986 | 0 | Address MasterAddr = Address::invalid(); |
987 | 0 | if (getLangOpts().OpenMPUseTLS && |
988 | 0 | getContext().getTargetInfo().isTLSSupported()) { |
989 | 0 | assert(CapturedStmtInfo->lookup(VD) && |
990 | 0 | "Copyin threadprivates should have been captured!"); |
991 | 0 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, |
992 | 0 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
993 | 0 | MasterAddr = EmitLValue(&DRE).getAddress(*this); |
994 | 0 | LocalDeclMap.erase(VD); |
995 | 0 | } else { |
996 | 0 | MasterAddr = |
997 | 0 | Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) |
998 | 0 | : CGM.GetAddrOfGlobal(VD), |
999 | 0 | CGM.getTypes().ConvertTypeForMem(VD->getType()), |
1000 | 0 | getContext().getDeclAlign(VD)); |
1001 | 0 | } |
1002 | | // Get the address of the threadprivate variable. |
1003 | 0 | Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); |
1004 | 0 | if (CopiedVars.size() == 1) { |
1005 | | // At first check if current thread is a master thread. If it is, no |
1006 | | // need to copy data. |
1007 | 0 | CopyBegin = createBasicBlock("copyin.not.master"); |
1008 | 0 | CopyEnd = createBasicBlock("copyin.not.master.end"); |
1009 | | // TODO: Avoid ptrtoint conversion. |
1010 | 0 | auto *MasterAddrInt = |
1011 | 0 | Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); |
1012 | 0 | auto *PrivateAddrInt = |
1013 | 0 | Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); |
1014 | 0 | Builder.CreateCondBr( |
1015 | 0 | Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, |
1016 | 0 | CopyEnd); |
1017 | 0 | EmitBlock(CopyBegin); |
1018 | 0 | } |
1019 | 0 | const auto *SrcVD = |
1020 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
1021 | 0 | const auto *DestVD = |
1022 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1023 | 0 | EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); |
1024 | 0 | } |
1025 | 0 | ++IRef; |
1026 | 0 | ++ISrcRef; |
1027 | 0 | ++IDestRef; |
1028 | 0 | } |
1029 | 0 | } |
1030 | 0 | if (CopyEnd) { |
1031 | | // Exit out of copying procedure for non-master thread. |
1032 | 0 | EmitBlock(CopyEnd, /*IsFinished=*/true); |
1033 | 0 | return true; |
1034 | 0 | } |
1035 | 0 | return false; |
1036 | 0 | } |
1037 | | |
1038 | | bool CodeGenFunction::EmitOMPLastprivateClauseInit( |
1039 | 0 | const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { |
1040 | 0 | if (!HaveInsertPoint()) |
1041 | 0 | return false; |
1042 | 0 | bool HasAtLeastOneLastprivate = false; |
1043 | 0 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
1044 | 0 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
1045 | 0 | const auto *LoopDirective = cast<OMPLoopDirective>(&D); |
1046 | 0 | for (const Expr *C : LoopDirective->counters()) { |
1047 | 0 | SIMDLCVs.insert( |
1048 | 0 | cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); |
1049 | 0 | } |
1050 | 0 | } |
1051 | 0 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1052 | 0 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1053 | 0 | HasAtLeastOneLastprivate = true; |
1054 | 0 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
1055 | 0 | !getLangOpts().OpenMPSimd) |
1056 | 0 | break; |
1057 | 0 | const auto *IRef = C->varlist_begin(); |
1058 | 0 | const auto *IDestRef = C->destination_exprs().begin(); |
1059 | 0 | for (const Expr *IInit : C->private_copies()) { |
1060 | | // Keep the address of the original variable for future update at the end |
1061 | | // of the loop. |
1062 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1063 | | // Taskloops do not require additional initialization, it is done in |
1064 | | // runtime support library. |
1065 | 0 | if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { |
1066 | 0 | const auto *DestVD = |
1067 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1068 | 0 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1069 | | /*RefersToEnclosingVariableOrCapture=*/ |
1070 | 0 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
1071 | 0 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1072 | 0 | PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this)); |
1073 | | // Check if the variable is also a firstprivate: in this case IInit is |
1074 | | // not generated. Initialization of this variable will happen in codegen |
1075 | | // for 'firstprivate' clause. |
1076 | 0 | if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { |
1077 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
1078 | 0 | Address VDAddr = Address::invalid(); |
1079 | 0 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) { |
1080 | 0 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
1081 | 0 | *this, OrigVD); |
1082 | 0 | setAddrOfLocalVar(VD, VDAddr); |
1083 | 0 | } else { |
1084 | | // Emit private VarDecl with copy init. |
1085 | 0 | EmitDecl(*VD); |
1086 | 0 | VDAddr = GetAddrOfLocalVar(VD); |
1087 | 0 | } |
1088 | 0 | bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); |
1089 | 0 | assert(IsRegistered && |
1090 | 0 | "lastprivate var already registered as private"); |
1091 | 0 | (void)IsRegistered; |
1092 | 0 | } |
1093 | 0 | } |
1094 | 0 | ++IRef; |
1095 | 0 | ++IDestRef; |
1096 | 0 | } |
1097 | 0 | } |
1098 | 0 | return HasAtLeastOneLastprivate; |
1099 | 0 | } |
1100 | | |
1101 | | void CodeGenFunction::EmitOMPLastprivateClauseFinal( |
1102 | | const OMPExecutableDirective &D, bool NoFinals, |
1103 | 0 | llvm::Value *IsLastIterCond) { |
1104 | 0 | if (!HaveInsertPoint()) |
1105 | 0 | return; |
1106 | | // Emit following code: |
1107 | | // if (<IsLastIterCond>) { |
1108 | | // orig_var1 = private_orig_var1; |
1109 | | // ... |
1110 | | // orig_varn = private_orig_varn; |
1111 | | // } |
1112 | 0 | llvm::BasicBlock *ThenBB = nullptr; |
1113 | 0 | llvm::BasicBlock *DoneBB = nullptr; |
1114 | 0 | if (IsLastIterCond) { |
1115 | | // Emit implicit barrier if at least one lastprivate conditional is found |
1116 | | // and this is not a simd mode. |
1117 | 0 | if (!getLangOpts().OpenMPSimd && |
1118 | 0 | llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), |
1119 | 0 | [](const OMPLastprivateClause *C) { |
1120 | 0 | return C->getKind() == OMPC_LASTPRIVATE_conditional; |
1121 | 0 | })) { |
1122 | 0 | CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), |
1123 | 0 | OMPD_unknown, |
1124 | 0 | /*EmitChecks=*/false, |
1125 | 0 | /*ForceSimpleCall=*/true); |
1126 | 0 | } |
1127 | 0 | ThenBB = createBasicBlock(".omp.lastprivate.then"); |
1128 | 0 | DoneBB = createBasicBlock(".omp.lastprivate.done"); |
1129 | 0 | Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); |
1130 | 0 | EmitBlock(ThenBB); |
1131 | 0 | } |
1132 | 0 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1133 | 0 | llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; |
1134 | 0 | if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { |
1135 | 0 | auto IC = LoopDirective->counters().begin(); |
1136 | 0 | for (const Expr *F : LoopDirective->finals()) { |
1137 | 0 | const auto *D = |
1138 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); |
1139 | 0 | if (NoFinals) |
1140 | 0 | AlreadyEmittedVars.insert(D); |
1141 | 0 | else |
1142 | 0 | LoopCountersAndUpdates[D] = F; |
1143 | 0 | ++IC; |
1144 | 0 | } |
1145 | 0 | } |
1146 | 0 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1147 | 0 | auto IRef = C->varlist_begin(); |
1148 | 0 | auto ISrcRef = C->source_exprs().begin(); |
1149 | 0 | auto IDestRef = C->destination_exprs().begin(); |
1150 | 0 | for (const Expr *AssignOp : C->assignment_ops()) { |
1151 | 0 | const auto *PrivateVD = |
1152 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1153 | 0 | QualType Type = PrivateVD->getType(); |
1154 | 0 | const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); |
1155 | 0 | if (AlreadyEmittedVars.insert(CanonicalVD).second) { |
1156 | | // If lastprivate variable is a loop control variable for loop-based |
1157 | | // directive, update its value before copyin back to original |
1158 | | // variable. |
1159 | 0 | if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) |
1160 | 0 | EmitIgnoredExpr(FinalExpr); |
1161 | 0 | const auto *SrcVD = |
1162 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
1163 | 0 | const auto *DestVD = |
1164 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1165 | | // Get the address of the private variable. |
1166 | 0 | Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); |
1167 | 0 | if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) |
1168 | 0 | PrivateAddr = Address( |
1169 | 0 | Builder.CreateLoad(PrivateAddr), |
1170 | 0 | CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()), |
1171 | 0 | CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); |
1172 | | // Store the last value to the private copy in the last iteration. |
1173 | 0 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) |
1174 | 0 | CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( |
1175 | 0 | *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, |
1176 | 0 | (*IRef)->getExprLoc()); |
1177 | | // Get the address of the original variable. |
1178 | 0 | Address OriginalAddr = GetAddrOfLocalVar(DestVD); |
1179 | 0 | EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); |
1180 | 0 | } |
1181 | 0 | ++IRef; |
1182 | 0 | ++ISrcRef; |
1183 | 0 | ++IDestRef; |
1184 | 0 | } |
1185 | 0 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
1186 | 0 | EmitIgnoredExpr(PostUpdate); |
1187 | 0 | } |
1188 | 0 | if (IsLastIterCond) |
1189 | 0 | EmitBlock(DoneBB, /*IsFinished=*/true); |
1190 | 0 | } |
1191 | | |
1192 | | void CodeGenFunction::EmitOMPReductionClauseInit( |
1193 | | const OMPExecutableDirective &D, |
1194 | 0 | CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { |
1195 | 0 | if (!HaveInsertPoint()) |
1196 | 0 | return; |
1197 | 0 | SmallVector<const Expr *, 4> Shareds; |
1198 | 0 | SmallVector<const Expr *, 4> Privates; |
1199 | 0 | SmallVector<const Expr *, 4> ReductionOps; |
1200 | 0 | SmallVector<const Expr *, 4> LHSs; |
1201 | 0 | SmallVector<const Expr *, 4> RHSs; |
1202 | 0 | OMPTaskDataTy Data; |
1203 | 0 | SmallVector<const Expr *, 4> TaskLHSs; |
1204 | 0 | SmallVector<const Expr *, 4> TaskRHSs; |
1205 | 0 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1206 | 0 | if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) |
1207 | 0 | continue; |
1208 | 0 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
1209 | 0 | Privates.append(C->privates().begin(), C->privates().end()); |
1210 | 0 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1211 | 0 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1212 | 0 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1213 | 0 | if (C->getModifier() == OMPC_REDUCTION_task) { |
1214 | 0 | Data.ReductionVars.append(C->privates().begin(), C->privates().end()); |
1215 | 0 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
1216 | 0 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
1217 | 0 | Data.ReductionOps.append(C->reduction_ops().begin(), |
1218 | 0 | C->reduction_ops().end()); |
1219 | 0 | TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1220 | 0 | TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1221 | 0 | } |
1222 | 0 | } |
1223 | 0 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
1224 | 0 | unsigned Count = 0; |
1225 | 0 | auto *ILHS = LHSs.begin(); |
1226 | 0 | auto *IRHS = RHSs.begin(); |
1227 | 0 | auto *IPriv = Privates.begin(); |
1228 | 0 | for (const Expr *IRef : Shareds) { |
1229 | 0 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); |
1230 | | // Emit private VarDecl with reduction init. |
1231 | 0 | RedCG.emitSharedOrigLValue(*this, Count); |
1232 | 0 | RedCG.emitAggregateType(*this, Count); |
1233 | 0 | AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); |
1234 | 0 | RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), |
1235 | 0 | RedCG.getSharedLValue(Count).getAddress(*this), |
1236 | 0 | [&Emission](CodeGenFunction &CGF) { |
1237 | 0 | CGF.EmitAutoVarInit(Emission); |
1238 | 0 | return true; |
1239 | 0 | }); |
1240 | 0 | EmitAutoVarCleanups(Emission); |
1241 | 0 | Address BaseAddr = RedCG.adjustPrivateAddress( |
1242 | 0 | *this, Count, Emission.getAllocatedAddress()); |
1243 | 0 | bool IsRegistered = |
1244 | 0 | PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr); |
1245 | 0 | assert(IsRegistered && "private var already registered as private"); |
1246 | | // Silence the warning about unused variable. |
1247 | 0 | (void)IsRegistered; |
1248 | |
|
1249 | 0 | const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
1250 | 0 | const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
1251 | 0 | QualType Type = PrivateVD->getType(); |
1252 | 0 | bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); |
1253 | 0 | if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { |
1254 | | // Store the address of the original variable associated with the LHS |
1255 | | // implicit variable. |
1256 | 0 | PrivateScope.addPrivate(LHSVD, |
1257 | 0 | RedCG.getSharedLValue(Count).getAddress(*this)); |
1258 | 0 | PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD)); |
1259 | 0 | } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || |
1260 | 0 | isa<ArraySubscriptExpr>(IRef)) { |
1261 | | // Store the address of the original variable associated with the LHS |
1262 | | // implicit variable. |
1263 | 0 | PrivateScope.addPrivate(LHSVD, |
1264 | 0 | RedCG.getSharedLValue(Count).getAddress(*this)); |
1265 | 0 | PrivateScope.addPrivate(RHSVD, |
1266 | 0 | GetAddrOfLocalVar(PrivateVD).withElementType( |
1267 | 0 | ConvertTypeForMem(RHSVD->getType()))); |
1268 | 0 | } else { |
1269 | 0 | QualType Type = PrivateVD->getType(); |
1270 | 0 | bool IsArray = getContext().getAsArrayType(Type) != nullptr; |
1271 | 0 | Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); |
1272 | | // Store the address of the original variable associated with the LHS |
1273 | | // implicit variable. |
1274 | 0 | if (IsArray) { |
1275 | 0 | OriginalAddr = |
1276 | 0 | OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType())); |
1277 | 0 | } |
1278 | 0 | PrivateScope.addPrivate(LHSVD, OriginalAddr); |
1279 | 0 | PrivateScope.addPrivate( |
1280 | 0 | RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType( |
1281 | 0 | ConvertTypeForMem(RHSVD->getType())) |
1282 | 0 | : GetAddrOfLocalVar(PrivateVD)); |
1283 | 0 | } |
1284 | 0 | ++ILHS; |
1285 | 0 | ++IRHS; |
1286 | 0 | ++IPriv; |
1287 | 0 | ++Count; |
1288 | 0 | } |
1289 | 0 | if (!Data.ReductionVars.empty()) { |
1290 | 0 | Data.IsReductionWithTaskMod = true; |
1291 | 0 | Data.IsWorksharingReduction = |
1292 | 0 | isOpenMPWorksharingDirective(D.getDirectiveKind()); |
1293 | 0 | llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( |
1294 | 0 | *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); |
1295 | 0 | const Expr *TaskRedRef = nullptr; |
1296 | 0 | switch (D.getDirectiveKind()) { |
1297 | 0 | case OMPD_parallel: |
1298 | 0 | TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); |
1299 | 0 | break; |
1300 | 0 | case OMPD_for: |
1301 | 0 | TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); |
1302 | 0 | break; |
1303 | 0 | case OMPD_sections: |
1304 | 0 | TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); |
1305 | 0 | break; |
1306 | 0 | case OMPD_parallel_for: |
1307 | 0 | TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); |
1308 | 0 | break; |
1309 | 0 | case OMPD_parallel_master: |
1310 | 0 | TaskRedRef = |
1311 | 0 | cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); |
1312 | 0 | break; |
1313 | 0 | case OMPD_parallel_sections: |
1314 | 0 | TaskRedRef = |
1315 | 0 | cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); |
1316 | 0 | break; |
1317 | 0 | case OMPD_target_parallel: |
1318 | 0 | TaskRedRef = |
1319 | 0 | cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); |
1320 | 0 | break; |
1321 | 0 | case OMPD_target_parallel_for: |
1322 | 0 | TaskRedRef = |
1323 | 0 | cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); |
1324 | 0 | break; |
1325 | 0 | case OMPD_distribute_parallel_for: |
1326 | 0 | TaskRedRef = |
1327 | 0 | cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); |
1328 | 0 | break; |
1329 | 0 | case OMPD_teams_distribute_parallel_for: |
1330 | 0 | TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) |
1331 | 0 | .getTaskReductionRefExpr(); |
1332 | 0 | break; |
1333 | 0 | case OMPD_target_teams_distribute_parallel_for: |
1334 | 0 | TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) |
1335 | 0 | .getTaskReductionRefExpr(); |
1336 | 0 | break; |
1337 | 0 | case OMPD_simd: |
1338 | 0 | case OMPD_for_simd: |
1339 | 0 | case OMPD_section: |
1340 | 0 | case OMPD_single: |
1341 | 0 | case OMPD_master: |
1342 | 0 | case OMPD_critical: |
1343 | 0 | case OMPD_parallel_for_simd: |
1344 | 0 | case OMPD_task: |
1345 | 0 | case OMPD_taskyield: |
1346 | 0 | case OMPD_error: |
1347 | 0 | case OMPD_barrier: |
1348 | 0 | case OMPD_taskwait: |
1349 | 0 | case OMPD_taskgroup: |
1350 | 0 | case OMPD_flush: |
1351 | 0 | case OMPD_depobj: |
1352 | 0 | case OMPD_scan: |
1353 | 0 | case OMPD_ordered: |
1354 | 0 | case OMPD_atomic: |
1355 | 0 | case OMPD_teams: |
1356 | 0 | case OMPD_target: |
1357 | 0 | case OMPD_cancellation_point: |
1358 | 0 | case OMPD_cancel: |
1359 | 0 | case OMPD_target_data: |
1360 | 0 | case OMPD_target_enter_data: |
1361 | 0 | case OMPD_target_exit_data: |
1362 | 0 | case OMPD_taskloop: |
1363 | 0 | case OMPD_taskloop_simd: |
1364 | 0 | case OMPD_master_taskloop: |
1365 | 0 | case OMPD_master_taskloop_simd: |
1366 | 0 | case OMPD_parallel_master_taskloop: |
1367 | 0 | case OMPD_parallel_master_taskloop_simd: |
1368 | 0 | case OMPD_distribute: |
1369 | 0 | case OMPD_target_update: |
1370 | 0 | case OMPD_distribute_parallel_for_simd: |
1371 | 0 | case OMPD_distribute_simd: |
1372 | 0 | case OMPD_target_parallel_for_simd: |
1373 | 0 | case OMPD_target_simd: |
1374 | 0 | case OMPD_teams_distribute: |
1375 | 0 | case OMPD_teams_distribute_simd: |
1376 | 0 | case OMPD_teams_distribute_parallel_for_simd: |
1377 | 0 | case OMPD_target_teams: |
1378 | 0 | case OMPD_target_teams_distribute: |
1379 | 0 | case OMPD_target_teams_distribute_parallel_for_simd: |
1380 | 0 | case OMPD_target_teams_distribute_simd: |
1381 | 0 | case OMPD_declare_target: |
1382 | 0 | case OMPD_end_declare_target: |
1383 | 0 | case OMPD_threadprivate: |
1384 | 0 | case OMPD_allocate: |
1385 | 0 | case OMPD_declare_reduction: |
1386 | 0 | case OMPD_declare_mapper: |
1387 | 0 | case OMPD_declare_simd: |
1388 | 0 | case OMPD_requires: |
1389 | 0 | case OMPD_declare_variant: |
1390 | 0 | case OMPD_begin_declare_variant: |
1391 | 0 | case OMPD_end_declare_variant: |
1392 | 0 | case OMPD_unknown: |
1393 | 0 | default: |
1394 | 0 | llvm_unreachable("Enexpected directive with task reductions."); |
1395 | 0 | } |
1396 | | |
1397 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); |
1398 | 0 | EmitVarDecl(*VD); |
1399 | 0 | EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), |
1400 | 0 | /*Volatile=*/false, TaskRedRef->getType()); |
1401 | 0 | } |
1402 | 0 | } |
1403 | | |
1404 | | void CodeGenFunction::EmitOMPReductionClauseFinal( |
1405 | 0 | const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { |
1406 | 0 | if (!HaveInsertPoint()) |
1407 | 0 | return; |
1408 | 0 | llvm::SmallVector<const Expr *, 8> Privates; |
1409 | 0 | llvm::SmallVector<const Expr *, 8> LHSExprs; |
1410 | 0 | llvm::SmallVector<const Expr *, 8> RHSExprs; |
1411 | 0 | llvm::SmallVector<const Expr *, 8> ReductionOps; |
1412 | 0 | bool HasAtLeastOneReduction = false; |
1413 | 0 | bool IsReductionWithTaskMod = false; |
1414 | 0 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1415 | | // Do not emit for inscan reductions. |
1416 | 0 | if (C->getModifier() == OMPC_REDUCTION_inscan) |
1417 | 0 | continue; |
1418 | 0 | HasAtLeastOneReduction = true; |
1419 | 0 | Privates.append(C->privates().begin(), C->privates().end()); |
1420 | 0 | LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1421 | 0 | RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1422 | 0 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1423 | 0 | IsReductionWithTaskMod = |
1424 | 0 | IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; |
1425 | 0 | } |
1426 | 0 | if (HasAtLeastOneReduction) { |
1427 | 0 | if (IsReductionWithTaskMod) { |
1428 | 0 | CGM.getOpenMPRuntime().emitTaskReductionFini( |
1429 | 0 | *this, D.getBeginLoc(), |
1430 | 0 | isOpenMPWorksharingDirective(D.getDirectiveKind())); |
1431 | 0 | } |
1432 | 0 | bool WithNowait = D.getSingleClause<OMPNowaitClause>() || |
1433 | 0 | isOpenMPParallelDirective(D.getDirectiveKind()) || |
1434 | 0 | ReductionKind == OMPD_simd; |
1435 | 0 | bool SimpleReduction = ReductionKind == OMPD_simd; |
1436 | | // Emit nowait reduction if nowait clause is present or directive is a |
1437 | | // parallel directive (it always has implicit barrier). |
1438 | 0 | CGM.getOpenMPRuntime().emitReduction( |
1439 | 0 | *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, |
1440 | 0 | {WithNowait, SimpleReduction, ReductionKind}); |
1441 | 0 | } |
1442 | 0 | } |
1443 | | |
1444 | | static void emitPostUpdateForReductionClause( |
1445 | | CodeGenFunction &CGF, const OMPExecutableDirective &D, |
1446 | 0 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
1447 | 0 | if (!CGF.HaveInsertPoint()) |
1448 | 0 | return; |
1449 | 0 | llvm::BasicBlock *DoneBB = nullptr; |
1450 | 0 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1451 | 0 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) { |
1452 | 0 | if (!DoneBB) { |
1453 | 0 | if (llvm::Value *Cond = CondGen(CGF)) { |
1454 | | // If the first post-update expression is found, emit conditional |
1455 | | // block if it was requested. |
1456 | 0 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); |
1457 | 0 | DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); |
1458 | 0 | CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
1459 | 0 | CGF.EmitBlock(ThenBB); |
1460 | 0 | } |
1461 | 0 | } |
1462 | 0 | CGF.EmitIgnoredExpr(PostUpdate); |
1463 | 0 | } |
1464 | 0 | } |
1465 | 0 | if (DoneBB) |
1466 | 0 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
1467 | 0 | } |
1468 | | |
1469 | | namespace { |
1470 | | /// Codegen lambda for appending distribute lower and upper bounds to outlined |
1471 | | /// parallel function. This is necessary for combined constructs such as |
1472 | | /// 'distribute parallel for' |
1473 | | typedef llvm::function_ref<void(CodeGenFunction &, |
1474 | | const OMPExecutableDirective &, |
1475 | | llvm::SmallVectorImpl<llvm::Value *> &)> |
1476 | | CodeGenBoundParametersTy; |
1477 | | } // anonymous namespace |
1478 | | |
1479 | | static void |
1480 | | checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, |
1481 | 0 | const OMPExecutableDirective &S) { |
1482 | 0 | if (CGF.getLangOpts().OpenMP < 50) |
1483 | 0 | return; |
1484 | 0 | llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; |
1485 | 0 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
1486 | 0 | for (const Expr *Ref : C->varlists()) { |
1487 | 0 | if (!Ref->getType()->isScalarType()) |
1488 | 0 | continue; |
1489 | 0 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1490 | 0 | if (!DRE) |
1491 | 0 | continue; |
1492 | 0 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1493 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1494 | 0 | } |
1495 | 0 | } |
1496 | 0 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
1497 | 0 | for (const Expr *Ref : C->varlists()) { |
1498 | 0 | if (!Ref->getType()->isScalarType()) |
1499 | 0 | continue; |
1500 | 0 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1501 | 0 | if (!DRE) |
1502 | 0 | continue; |
1503 | 0 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1504 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1505 | 0 | } |
1506 | 0 | } |
1507 | 0 | for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { |
1508 | 0 | for (const Expr *Ref : C->varlists()) { |
1509 | 0 | if (!Ref->getType()->isScalarType()) |
1510 | 0 | continue; |
1511 | 0 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1512 | 0 | if (!DRE) |
1513 | 0 | continue; |
1514 | 0 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1515 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1516 | 0 | } |
1517 | 0 | } |
1518 | | // Privates should ne analyzed since they are not captured at all. |
1519 | | // Task reductions may be skipped - tasks are ignored. |
1520 | | // Firstprivates do not return value but may be passed by reference - no need |
1521 | | // to check for updated lastprivate conditional. |
1522 | 0 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
1523 | 0 | for (const Expr *Ref : C->varlists()) { |
1524 | 0 | if (!Ref->getType()->isScalarType()) |
1525 | 0 | continue; |
1526 | 0 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1527 | 0 | if (!DRE) |
1528 | 0 | continue; |
1529 | 0 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1530 | 0 | } |
1531 | 0 | } |
1532 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( |
1533 | 0 | CGF, S, PrivateDecls); |
1534 | 0 | } |
1535 | | |
1536 | | static void emitCommonOMPParallelDirective( |
1537 | | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
1538 | | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1539 | 0 | const CodeGenBoundParametersTy &CodeGenBoundParameters) { |
1540 | 0 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1541 | 0 | llvm::Value *NumThreads = nullptr; |
1542 | 0 | llvm::Function *OutlinedFn = |
1543 | 0 | CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( |
1544 | 0 | CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, |
1545 | 0 | CodeGen); |
1546 | 0 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { |
1547 | 0 | CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); |
1548 | 0 | NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), |
1549 | 0 | /*IgnoreResultAssign=*/true); |
1550 | 0 | CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( |
1551 | 0 | CGF, NumThreads, NumThreadsClause->getBeginLoc()); |
1552 | 0 | } |
1553 | 0 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { |
1554 | 0 | CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); |
1555 | 0 | CGF.CGM.getOpenMPRuntime().emitProcBindClause( |
1556 | 0 | CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); |
1557 | 0 | } |
1558 | 0 | const Expr *IfCond = nullptr; |
1559 | 0 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
1560 | 0 | if (C->getNameModifier() == OMPD_unknown || |
1561 | 0 | C->getNameModifier() == OMPD_parallel) { |
1562 | 0 | IfCond = C->getCondition(); |
1563 | 0 | break; |
1564 | 0 | } |
1565 | 0 | } |
1566 | |
|
1567 | 0 | OMPParallelScope Scope(CGF, S); |
1568 | 0 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
1569 | | // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk |
1570 | | // lower and upper bounds with the pragma 'for' chunking mechanism. |
1571 | | // The following lambda takes care of appending the lower and upper bound |
1572 | | // parameters when necessary |
1573 | 0 | CodeGenBoundParameters(CGF, S, CapturedVars); |
1574 | 0 | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
1575 | 0 | CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, |
1576 | 0 | CapturedVars, IfCond, NumThreads); |
1577 | 0 | } |
1578 | | |
1579 | 0 | static bool isAllocatableDecl(const VarDecl *VD) { |
1580 | 0 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1581 | 0 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
1582 | 0 | return false; |
1583 | 0 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1584 | | // Use the default allocation. |
1585 | 0 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
1586 | 0 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
1587 | 0 | !AA->getAllocator()); |
1588 | 0 | } |
1589 | | |
1590 | | static void emitEmptyBoundParameters(CodeGenFunction &, |
1591 | | const OMPExecutableDirective &, |
1592 | 0 | llvm::SmallVectorImpl<llvm::Value *> &) {} |
1593 | | |
1594 | | static void emitOMPCopyinClause(CodeGenFunction &CGF, |
1595 | 0 | const OMPExecutableDirective &S) { |
1596 | 0 | bool Copyins = CGF.EmitOMPCopyinClause(S); |
1597 | 0 | if (Copyins) { |
1598 | | // Emit implicit barrier to synchronize threads and avoid data races on |
1599 | | // propagation master's thread values of threadprivate variables to local |
1600 | | // instances of that variables of all other implicit threads. |
1601 | 0 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
1602 | 0 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
1603 | 0 | /*ForceSimpleCall=*/true); |
1604 | 0 | } |
1605 | 0 | } |
1606 | | |
1607 | | Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( |
1608 | 0 | CodeGenFunction &CGF, const VarDecl *VD) { |
1609 | 0 | CodeGenModule &CGM = CGF.CGM; |
1610 | 0 | auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1611 | |
|
1612 | 0 | if (!VD) |
1613 | 0 | return Address::invalid(); |
1614 | 0 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1615 | 0 | if (!isAllocatableDecl(CVD)) |
1616 | 0 | return Address::invalid(); |
1617 | 0 | llvm::Value *Size; |
1618 | 0 | CharUnits Align = CGM.getContext().getDeclAlign(CVD); |
1619 | 0 | if (CVD->getType()->isVariablyModifiedType()) { |
1620 | 0 | Size = CGF.getTypeSize(CVD->getType()); |
1621 | | // Align the size: ((size + align - 1) / align) * align |
1622 | 0 | Size = CGF.Builder.CreateNUWAdd( |
1623 | 0 | Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); |
1624 | 0 | Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); |
1625 | 0 | Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); |
1626 | 0 | } else { |
1627 | 0 | CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); |
1628 | 0 | Size = CGM.getSize(Sz.alignTo(Align)); |
1629 | 0 | } |
1630 | |
|
1631 | 0 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1632 | 0 | assert(AA->getAllocator() && |
1633 | 0 | "Expected allocator expression for non-default allocator."); |
1634 | 0 | llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); |
1635 | | // According to the standard, the original allocator type is a enum (integer). |
1636 | | // Convert to pointer type, if required. |
1637 | 0 | if (Allocator->getType()->isIntegerTy()) |
1638 | 0 | Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); |
1639 | 0 | else if (Allocator->getType()->isPointerTy()) |
1640 | 0 | Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, |
1641 | 0 | CGM.VoidPtrTy); |
1642 | |
|
1643 | 0 | llvm::Value *Addr = OMPBuilder.createOMPAlloc( |
1644 | 0 | CGF.Builder, Size, Allocator, |
1645 | 0 | getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); |
1646 | 0 | llvm::CallInst *FreeCI = |
1647 | 0 | OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); |
1648 | |
|
1649 | 0 | CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); |
1650 | 0 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1651 | 0 | Addr, |
1652 | 0 | CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), |
1653 | 0 | getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); |
1654 | 0 | return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); |
1655 | 0 | } |
1656 | | |
1657 | | Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( |
1658 | | CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, |
1659 | 0 | SourceLocation Loc) { |
1660 | 0 | CodeGenModule &CGM = CGF.CGM; |
1661 | 0 | if (CGM.getLangOpts().OpenMPUseTLS && |
1662 | 0 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1663 | 0 | return VDAddr; |
1664 | | |
1665 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1666 | |
|
1667 | 0 | llvm::Type *VarTy = VDAddr.getElementType(); |
1668 | 0 | llvm::Value *Data = |
1669 | 0 | CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); |
1670 | 0 | llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); |
1671 | 0 | std::string Suffix = getNameWithSeparators({"cache", ""}); |
1672 | 0 | llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); |
1673 | |
|
1674 | 0 | llvm::CallInst *ThreadPrivateCacheCall = |
1675 | 0 | OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); |
1676 | |
|
1677 | 0 | return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); |
1678 | 0 | } |
1679 | | |
1680 | | std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( |
1681 | 0 | ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { |
1682 | 0 | SmallString<128> Buffer; |
1683 | 0 | llvm::raw_svector_ostream OS(Buffer); |
1684 | 0 | StringRef Sep = FirstSeparator; |
1685 | 0 | for (StringRef Part : Parts) { |
1686 | 0 | OS << Sep << Part; |
1687 | 0 | Sep = Separator; |
1688 | 0 | } |
1689 | 0 | return OS.str().str(); |
1690 | 0 | } |
1691 | | |
1692 | | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
1693 | | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1694 | 0 | InsertPointTy CodeGenIP, Twine RegionName) { |
1695 | 0 | CGBuilderTy &Builder = CGF.Builder; |
1696 | 0 | Builder.restoreIP(CodeGenIP); |
1697 | 0 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1698 | 0 | "." + RegionName + ".after"); |
1699 | |
|
1700 | 0 | { |
1701 | 0 | OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1702 | 0 | CGF.EmitStmt(RegionBodyStmt); |
1703 | 0 | } |
1704 | |
|
1705 | 0 | if (Builder.saveIP().isSet()) |
1706 | 0 | Builder.CreateBr(FiniBB); |
1707 | 0 | } |
1708 | | |
1709 | | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1710 | | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1711 | 0 | InsertPointTy CodeGenIP, Twine RegionName) { |
1712 | 0 | CGBuilderTy &Builder = CGF.Builder; |
1713 | 0 | Builder.restoreIP(CodeGenIP); |
1714 | 0 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1715 | 0 | "." + RegionName + ".after"); |
1716 | |
|
1717 | 0 | { |
1718 | 0 | OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1719 | 0 | CGF.EmitStmt(RegionBodyStmt); |
1720 | 0 | } |
1721 | |
|
1722 | 0 | if (Builder.saveIP().isSet()) |
1723 | 0 | Builder.CreateBr(FiniBB); |
1724 | 0 | } |
1725 | | |
1726 | 0 | void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
1727 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1728 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1729 | | // Check if we have any if clause associated with the directive. |
1730 | 0 | llvm::Value *IfCond = nullptr; |
1731 | 0 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
1732 | 0 | IfCond = EmitScalarExpr(C->getCondition(), |
1733 | 0 | /*IgnoreResultAssign=*/true); |
1734 | |
|
1735 | 0 | llvm::Value *NumThreads = nullptr; |
1736 | 0 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) |
1737 | 0 | NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), |
1738 | 0 | /*IgnoreResultAssign=*/true); |
1739 | |
|
1740 | 0 | ProcBindKind ProcBind = OMP_PROC_BIND_default; |
1741 | 0 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) |
1742 | 0 | ProcBind = ProcBindClause->getProcBindKind(); |
1743 | |
|
1744 | 0 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
1745 | | |
1746 | | // The cleanup callback that finalizes all variabels at the given location, |
1747 | | // thus calls destructors etc. |
1748 | 0 | auto FiniCB = [this](InsertPointTy IP) { |
1749 | 0 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
1750 | 0 | }; |
1751 | | |
1752 | | // Privatization callback that performs appropriate action for |
1753 | | // shared/private/firstprivate/lastprivate/copyin/... variables. |
1754 | | // |
1755 | | // TODO: This defaults to shared right now. |
1756 | 0 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1757 | 0 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
1758 | | // The next line is appropriate only for variables (Val) with the |
1759 | | // data-sharing attribute "shared". |
1760 | 0 | ReplVal = &Val; |
1761 | |
|
1762 | 0 | return CodeGenIP; |
1763 | 0 | }; |
1764 | |
|
1765 | 0 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1766 | 0 | const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); |
1767 | |
|
1768 | 0 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
1769 | 0 | InsertPointTy CodeGenIP) { |
1770 | 0 | OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1771 | 0 | *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); |
1772 | 0 | }; |
1773 | |
|
1774 | 0 | CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); |
1775 | 0 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
1776 | 0 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
1777 | 0 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
1778 | 0 | Builder.restoreIP( |
1779 | 0 | OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, |
1780 | 0 | IfCond, NumThreads, ProcBind, S.hasCancel())); |
1781 | 0 | return; |
1782 | 0 | } |
1783 | | |
1784 | | // Emit parallel region as a standalone region. |
1785 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
1786 | 0 | Action.Enter(CGF); |
1787 | 0 | OMPPrivateScope PrivateScope(CGF); |
1788 | 0 | emitOMPCopyinClause(CGF, S); |
1789 | 0 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
1790 | 0 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
1791 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
1792 | 0 | (void)PrivateScope.Privatize(); |
1793 | 0 | CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); |
1794 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
1795 | 0 | }; |
1796 | 0 | { |
1797 | 0 | auto LPCRegion = |
1798 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
1799 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, |
1800 | 0 | emitEmptyBoundParameters); |
1801 | 0 | emitPostUpdateForReductionClause(*this, S, |
1802 | 0 | [](CodeGenFunction &) { return nullptr; }); |
1803 | 0 | } |
1804 | | // Check for outer lastprivate conditional update. |
1805 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
1806 | 0 | } |
1807 | | |
1808 | 0 | void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { |
1809 | 0 | EmitStmt(S.getIfStmt()); |
1810 | 0 | } |
1811 | | |
1812 | | namespace { |
1813 | | /// RAII to handle scopes for loop transformation directives. |
1814 | | class OMPTransformDirectiveScopeRAII { |
1815 | | OMPLoopScope *Scope = nullptr; |
1816 | | CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; |
1817 | | CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; |
1818 | | |
1819 | | OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = |
1820 | | delete; |
1821 | | OMPTransformDirectiveScopeRAII & |
1822 | | operator=(const OMPTransformDirectiveScopeRAII &) = delete; |
1823 | | |
1824 | | public: |
1825 | 0 | OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { |
1826 | 0 | if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { |
1827 | 0 | Scope = new OMPLoopScope(CGF, *Dir); |
1828 | 0 | CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); |
1829 | 0 | CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); |
1830 | 0 | } |
1831 | 0 | } |
1832 | 0 | ~OMPTransformDirectiveScopeRAII() { |
1833 | 0 | if (!Scope) |
1834 | 0 | return; |
1835 | 0 | delete CapInfoRAII; |
1836 | 0 | delete CGSI; |
1837 | 0 | delete Scope; |
1838 | 0 | } |
1839 | | }; |
1840 | | } // namespace |
1841 | | |
1842 | | static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, |
1843 | 0 | int MaxLevel, int Level = 0) { |
1844 | 0 | assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); |
1845 | 0 | const Stmt *SimplifiedS = S->IgnoreContainers(); |
1846 | 0 | if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { |
1847 | 0 | PrettyStackTraceLoc CrashInfo( |
1848 | 0 | CGF.getContext().getSourceManager(), CS->getLBracLoc(), |
1849 | 0 | "LLVM IR generation of compound statement ('{}')"); |
1850 | | |
1851 | | // Keep track of the current cleanup stack depth, including debug scopes. |
1852 | 0 | CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); |
1853 | 0 | for (const Stmt *CurStmt : CS->body()) |
1854 | 0 | emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); |
1855 | 0 | return; |
1856 | 0 | } |
1857 | 0 | if (SimplifiedS == NextLoop) { |
1858 | 0 | if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) |
1859 | 0 | SimplifiedS = Dir->getTransformedStmt(); |
1860 | 0 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) |
1861 | 0 | SimplifiedS = CanonLoop->getLoopStmt(); |
1862 | 0 | if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { |
1863 | 0 | S = For->getBody(); |
1864 | 0 | } else { |
1865 | 0 | assert(isa<CXXForRangeStmt>(SimplifiedS) && |
1866 | 0 | "Expected canonical for loop or range-based for loop."); |
1867 | 0 | const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); |
1868 | 0 | CGF.EmitStmt(CXXFor->getLoopVarStmt()); |
1869 | 0 | S = CXXFor->getBody(); |
1870 | 0 | } |
1871 | 0 | if (Level + 1 < MaxLevel) { |
1872 | 0 | NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( |
1873 | 0 | S, /*TryImperfectlyNestedLoops=*/true); |
1874 | 0 | emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); |
1875 | 0 | return; |
1876 | 0 | } |
1877 | 0 | } |
1878 | 0 | CGF.EmitStmt(S); |
1879 | 0 | } |
1880 | | |
1881 | | void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, |
1882 | 0 | JumpDest LoopExit) { |
1883 | 0 | RunCleanupsScope BodyScope(*this); |
1884 | | // Update counters values on current iteration. |
1885 | 0 | for (const Expr *UE : D.updates()) |
1886 | 0 | EmitIgnoredExpr(UE); |
1887 | | // Update the linear variables. |
1888 | | // In distribute directives only loop counters may be marked as linear, no |
1889 | | // need to generate the code for them. |
1890 | 0 | if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { |
1891 | 0 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
1892 | 0 | for (const Expr *UE : C->updates()) |
1893 | 0 | EmitIgnoredExpr(UE); |
1894 | 0 | } |
1895 | 0 | } |
1896 | | |
1897 | | // On a continue in the body, jump to the end. |
1898 | 0 | JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); |
1899 | 0 | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
1900 | 0 | for (const Expr *E : D.finals_conditions()) { |
1901 | 0 | if (!E) |
1902 | 0 | continue; |
1903 | | // Check that loop counter in non-rectangular nest fits into the iteration |
1904 | | // space. |
1905 | 0 | llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); |
1906 | 0 | EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), |
1907 | 0 | getProfileCount(D.getBody())); |
1908 | 0 | EmitBlock(NextBB); |
1909 | 0 | } |
1910 | |
|
1911 | 0 | OMPPrivateScope InscanScope(*this); |
1912 | 0 | EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); |
1913 | 0 | bool IsInscanRegion = InscanScope.Privatize(); |
1914 | 0 | if (IsInscanRegion) { |
1915 | | // Need to remember the block before and after scan directive |
1916 | | // to dispatch them correctly depending on the clause used in |
1917 | | // this directive, inclusive or exclusive. For inclusive scan the natural |
1918 | | // order of the blocks is used, for exclusive clause the blocks must be |
1919 | | // executed in reverse order. |
1920 | 0 | OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); |
1921 | 0 | OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); |
1922 | | // No need to allocate inscan exit block, in simd mode it is selected in the |
1923 | | // codegen for the scan directive. |
1924 | 0 | if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) |
1925 | 0 | OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); |
1926 | 0 | OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); |
1927 | 0 | EmitBranch(OMPScanDispatch); |
1928 | 0 | EmitBlock(OMPBeforeScanBlock); |
1929 | 0 | } |
1930 | | |
1931 | | // Emit loop variables for C++ range loops. |
1932 | 0 | const Stmt *Body = |
1933 | 0 | D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); |
1934 | | // Emit loop body. |
1935 | 0 | emitBody(*this, Body, |
1936 | 0 | OMPLoopBasedDirective::tryToFindNextInnerLoop( |
1937 | 0 | Body, /*TryImperfectlyNestedLoops=*/true), |
1938 | 0 | D.getLoopsNumber()); |
1939 | | |
1940 | | // Jump to the dispatcher at the end of the loop body. |
1941 | 0 | if (IsInscanRegion) |
1942 | 0 | EmitBranch(OMPScanExitBlock); |
1943 | | |
1944 | | // The end (updates/cleanups). |
1945 | 0 | EmitBlock(Continue.getBlock()); |
1946 | 0 | BreakContinueStack.pop_back(); |
1947 | 0 | } |
1948 | | |
1949 | | using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; |
1950 | | |
1951 | | /// Emit a captured statement and return the function as well as its captured |
1952 | | /// closure context. |
1953 | | static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, |
1954 | 0 | const CapturedStmt *S) { |
1955 | 0 | LValue CapStruct = ParentCGF.InitCapturedStruct(*S); |
1956 | 0 | CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); |
1957 | 0 | std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = |
1958 | 0 | std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); |
1959 | 0 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); |
1960 | 0 | llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); |
1961 | |
|
1962 | 0 | return {F, CapStruct.getPointer(ParentCGF)}; |
1963 | 0 | } |
1964 | | |
1965 | | /// Emit a call to a previously captured closure. |
1966 | | static llvm::CallInst * |
1967 | | emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, |
1968 | 0 | llvm::ArrayRef<llvm::Value *> Args) { |
1969 | | // Append the closure context to the argument. |
1970 | 0 | SmallVector<llvm::Value *> EffectiveArgs; |
1971 | 0 | EffectiveArgs.reserve(Args.size() + 1); |
1972 | 0 | llvm::append_range(EffectiveArgs, Args); |
1973 | 0 | EffectiveArgs.push_back(Cap.second); |
1974 | |
|
1975 | 0 | return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); |
1976 | 0 | } |
1977 | | |
1978 | | llvm::CanonicalLoopInfo * |
1979 | 0 | CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { |
1980 | 0 | assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); |
1981 | | |
1982 | | // The caller is processing the loop-associated directive processing the \p |
1983 | | // Depth loops nested in \p S. Put the previous pending loop-associated |
1984 | | // directive to the stack. If the current loop-associated directive is a loop |
1985 | | // transformation directive, it will push its generated loops onto the stack |
1986 | | // such that together with the loops left here they form the combined loop |
1987 | | // nest for the parent loop-associated directive. |
1988 | 0 | int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; |
1989 | 0 | ExpectedOMPLoopDepth = Depth; |
1990 | |
|
1991 | 0 | EmitStmt(S); |
1992 | 0 | assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); |
1993 | | |
1994 | | // The last added loop is the outermost one. |
1995 | 0 | llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); |
1996 | | |
1997 | | // Pop the \p Depth loops requested by the call from that stack and restore |
1998 | | // the previous context. |
1999 | 0 | OMPLoopNestStack.pop_back_n(Depth); |
2000 | 0 | ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; |
2001 | |
|
2002 | 0 | return Result; |
2003 | 0 | } |
2004 | | |
2005 | 0 | void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { |
2006 | 0 | const Stmt *SyntacticalLoop = S->getLoopStmt(); |
2007 | 0 | if (!getLangOpts().OpenMPIRBuilder) { |
2008 | | // Ignore if OpenMPIRBuilder is not enabled. |
2009 | 0 | EmitStmt(SyntacticalLoop); |
2010 | 0 | return; |
2011 | 0 | } |
2012 | | |
2013 | 0 | LexicalScope ForScope(*this, S->getSourceRange()); |
2014 | | |
2015 | | // Emit init statements. The Distance/LoopVar funcs may reference variable |
2016 | | // declarations they contain. |
2017 | 0 | const Stmt *BodyStmt; |
2018 | 0 | if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { |
2019 | 0 | if (const Stmt *InitStmt = For->getInit()) |
2020 | 0 | EmitStmt(InitStmt); |
2021 | 0 | BodyStmt = For->getBody(); |
2022 | 0 | } else if (const auto *RangeFor = |
2023 | 0 | dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { |
2024 | 0 | if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) |
2025 | 0 | EmitStmt(RangeStmt); |
2026 | 0 | if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) |
2027 | 0 | EmitStmt(BeginStmt); |
2028 | 0 | if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) |
2029 | 0 | EmitStmt(EndStmt); |
2030 | 0 | if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) |
2031 | 0 | EmitStmt(LoopVarStmt); |
2032 | 0 | BodyStmt = RangeFor->getBody(); |
2033 | 0 | } else |
2034 | 0 | llvm_unreachable("Expected for-stmt or range-based for-stmt"); |
2035 | | |
2036 | | // Emit closure for later use. By-value captures will be captured here. |
2037 | 0 | const CapturedStmt *DistanceFunc = S->getDistanceFunc(); |
2038 | 0 | EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); |
2039 | 0 | const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); |
2040 | 0 | EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); |
2041 | | |
2042 | | // Call the distance function to get the number of iterations of the loop to |
2043 | | // come. |
2044 | 0 | QualType LogicalTy = DistanceFunc->getCapturedDecl() |
2045 | 0 | ->getParam(0) |
2046 | 0 | ->getType() |
2047 | 0 | .getNonReferenceType(); |
2048 | 0 | Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); |
2049 | 0 | emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); |
2050 | 0 | llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); |
2051 | | |
2052 | | // Emit the loop structure. |
2053 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2054 | 0 | auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, |
2055 | 0 | llvm::Value *IndVar) { |
2056 | 0 | Builder.restoreIP(CodeGenIP); |
2057 | | |
2058 | | // Emit the loop body: Convert the logical iteration number to the loop |
2059 | | // variable and emit the body. |
2060 | 0 | const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); |
2061 | 0 | LValue LCVal = EmitLValue(LoopVarRef); |
2062 | 0 | Address LoopVarAddress = LCVal.getAddress(*this); |
2063 | 0 | emitCapturedStmtCall(*this, LoopVarClosure, |
2064 | 0 | {LoopVarAddress.getPointer(), IndVar}); |
2065 | |
|
2066 | 0 | RunCleanupsScope BodyScope(*this); |
2067 | 0 | EmitStmt(BodyStmt); |
2068 | 0 | }; |
2069 | 0 | llvm::CanonicalLoopInfo *CL = |
2070 | 0 | OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); |
2071 | | |
2072 | | // Finish up the loop. |
2073 | 0 | Builder.restoreIP(CL->getAfterIP()); |
2074 | 0 | ForScope.ForceCleanup(); |
2075 | | |
2076 | | // Remember the CanonicalLoopInfo for parent AST nodes consuming it. |
2077 | 0 | OMPLoopNestStack.push_back(CL); |
2078 | 0 | } |
2079 | | |
2080 | | void CodeGenFunction::EmitOMPInnerLoop( |
2081 | | const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, |
2082 | | const Expr *IncExpr, |
2083 | | const llvm::function_ref<void(CodeGenFunction &)> BodyGen, |
2084 | 0 | const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { |
2085 | 0 | auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); |
2086 | | |
2087 | | // Start the loop with a block that tests the condition. |
2088 | 0 | auto CondBlock = createBasicBlock("omp.inner.for.cond"); |
2089 | 0 | EmitBlock(CondBlock); |
2090 | 0 | const SourceRange R = S.getSourceRange(); |
2091 | | |
2092 | | // If attributes are attached, push to the basic block with them. |
2093 | 0 | const auto &OMPED = cast<OMPExecutableDirective>(S); |
2094 | 0 | const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); |
2095 | 0 | const Stmt *SS = ICS->getCapturedStmt(); |
2096 | 0 | const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); |
2097 | 0 | OMPLoopNestStack.clear(); |
2098 | 0 | if (AS) |
2099 | 0 | LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), |
2100 | 0 | AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), |
2101 | 0 | SourceLocToDebugLoc(R.getEnd())); |
2102 | 0 | else |
2103 | 0 | LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), |
2104 | 0 | SourceLocToDebugLoc(R.getEnd())); |
2105 | | |
2106 | | // If there are any cleanups between here and the loop-exit scope, |
2107 | | // create a block to stage a loop exit along. |
2108 | 0 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2109 | 0 | if (RequiresCleanup) |
2110 | 0 | ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); |
2111 | |
|
2112 | 0 | llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); |
2113 | | |
2114 | | // Emit condition. |
2115 | 0 | EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); |
2116 | 0 | if (ExitBlock != LoopExit.getBlock()) { |
2117 | 0 | EmitBlock(ExitBlock); |
2118 | 0 | EmitBranchThroughCleanup(LoopExit); |
2119 | 0 | } |
2120 | |
|
2121 | 0 | EmitBlock(LoopBody); |
2122 | 0 | incrementProfileCounter(&S); |
2123 | | |
2124 | | // Create a block for the increment. |
2125 | 0 | JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); |
2126 | 0 | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
2127 | |
|
2128 | 0 | BodyGen(*this); |
2129 | | |
2130 | | // Emit "IV = IV + 1" and a back-edge to the condition block. |
2131 | 0 | EmitBlock(Continue.getBlock()); |
2132 | 0 | EmitIgnoredExpr(IncExpr); |
2133 | 0 | PostIncGen(*this); |
2134 | 0 | BreakContinueStack.pop_back(); |
2135 | 0 | EmitBranch(CondBlock); |
2136 | 0 | LoopStack.pop(); |
2137 | | // Emit the fall-through block. |
2138 | 0 | EmitBlock(LoopExit.getBlock()); |
2139 | 0 | } |
2140 | | |
2141 | 0 | bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { |
2142 | 0 | if (!HaveInsertPoint()) |
2143 | 0 | return false; |
2144 | | // Emit inits for the linear variables. |
2145 | 0 | bool HasLinears = false; |
2146 | 0 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2147 | 0 | for (const Expr *Init : C->inits()) { |
2148 | 0 | HasLinears = true; |
2149 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); |
2150 | 0 | if (const auto *Ref = |
2151 | 0 | dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { |
2152 | 0 | AutoVarEmission Emission = EmitAutoVarAlloca(*VD); |
2153 | 0 | const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); |
2154 | 0 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2155 | 0 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
2156 | 0 | VD->getInit()->getType(), VK_LValue, |
2157 | 0 | VD->getInit()->getExprLoc()); |
2158 | 0 | EmitExprAsInit( |
2159 | 0 | &DRE, VD, |
2160 | 0 | MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), |
2161 | 0 | /*capturedByInit=*/false); |
2162 | 0 | EmitAutoVarCleanups(Emission); |
2163 | 0 | } else { |
2164 | 0 | EmitVarDecl(*VD); |
2165 | 0 | } |
2166 | 0 | } |
2167 | | // Emit the linear steps for the linear clauses. |
2168 | | // If a step is not constant, it is pre-calculated before the loop. |
2169 | 0 | if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) |
2170 | 0 | if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { |
2171 | 0 | EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); |
2172 | | // Emit calculation of the linear step. |
2173 | 0 | EmitIgnoredExpr(CS); |
2174 | 0 | } |
2175 | 0 | } |
2176 | 0 | return HasLinears; |
2177 | 0 | } |
2178 | | |
2179 | | void CodeGenFunction::EmitOMPLinearClauseFinal( |
2180 | | const OMPLoopDirective &D, |
2181 | 0 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2182 | 0 | if (!HaveInsertPoint()) |
2183 | 0 | return; |
2184 | 0 | llvm::BasicBlock *DoneBB = nullptr; |
2185 | | // Emit the final values of the linear variables. |
2186 | 0 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2187 | 0 | auto IC = C->varlist_begin(); |
2188 | 0 | for (const Expr *F : C->finals()) { |
2189 | 0 | if (!DoneBB) { |
2190 | 0 | if (llvm::Value *Cond = CondGen(*this)) { |
2191 | | // If the first post-update expression is found, emit conditional |
2192 | | // block if it was requested. |
2193 | 0 | llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); |
2194 | 0 | DoneBB = createBasicBlock(".omp.linear.pu.done"); |
2195 | 0 | Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
2196 | 0 | EmitBlock(ThenBB); |
2197 | 0 | } |
2198 | 0 | } |
2199 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); |
2200 | 0 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2201 | 0 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
2202 | 0 | (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); |
2203 | 0 | Address OrigAddr = EmitLValue(&DRE).getAddress(*this); |
2204 | 0 | CodeGenFunction::OMPPrivateScope VarScope(*this); |
2205 | 0 | VarScope.addPrivate(OrigVD, OrigAddr); |
2206 | 0 | (void)VarScope.Privatize(); |
2207 | 0 | EmitIgnoredExpr(F); |
2208 | 0 | ++IC; |
2209 | 0 | } |
2210 | 0 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
2211 | 0 | EmitIgnoredExpr(PostUpdate); |
2212 | 0 | } |
2213 | 0 | if (DoneBB) |
2214 | 0 | EmitBlock(DoneBB, /*IsFinished=*/true); |
2215 | 0 | } |
2216 | | |
2217 | | static void emitAlignedClause(CodeGenFunction &CGF, |
2218 | 0 | const OMPExecutableDirective &D) { |
2219 | 0 | if (!CGF.HaveInsertPoint()) |
2220 | 0 | return; |
2221 | 0 | for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { |
2222 | 0 | llvm::APInt ClauseAlignment(64, 0); |
2223 | 0 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2224 | 0 | auto *AlignmentCI = |
2225 | 0 | cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); |
2226 | 0 | ClauseAlignment = AlignmentCI->getValue(); |
2227 | 0 | } |
2228 | 0 | for (const Expr *E : Clause->varlists()) { |
2229 | 0 | llvm::APInt Alignment(ClauseAlignment); |
2230 | 0 | if (Alignment == 0) { |
2231 | | // OpenMP [2.8.1, Description] |
2232 | | // If no optional parameter is specified, implementation-defined default |
2233 | | // alignments for SIMD instructions on the target platforms are assumed. |
2234 | 0 | Alignment = |
2235 | 0 | CGF.getContext() |
2236 | 0 | .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( |
2237 | 0 | E->getType()->getPointeeType())) |
2238 | 0 | .getQuantity(); |
2239 | 0 | } |
2240 | 0 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2241 | 0 | "alignment is not power of 2"); |
2242 | 0 | if (Alignment != 0) { |
2243 | 0 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2244 | 0 | CGF.emitAlignmentAssumption( |
2245 | 0 | PtrValue, E, /*No second loc needed*/ SourceLocation(), |
2246 | 0 | llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); |
2247 | 0 | } |
2248 | 0 | } |
2249 | 0 | } |
2250 | 0 | } |
2251 | | |
2252 | | void CodeGenFunction::EmitOMPPrivateLoopCounters( |
2253 | 0 | const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { |
2254 | 0 | if (!HaveInsertPoint()) |
2255 | 0 | return; |
2256 | 0 | auto I = S.private_counters().begin(); |
2257 | 0 | for (const Expr *E : S.counters()) { |
2258 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2259 | 0 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); |
2260 | | // Emit var without initialization. |
2261 | 0 | AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); |
2262 | 0 | EmitAutoVarCleanups(VarEmission); |
2263 | 0 | LocalDeclMap.erase(PrivateVD); |
2264 | 0 | (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress()); |
2265 | 0 | if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || |
2266 | 0 | VD->hasGlobalStorage()) { |
2267 | 0 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), |
2268 | 0 | LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), |
2269 | 0 | E->getType(), VK_LValue, E->getExprLoc()); |
2270 | 0 | (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this)); |
2271 | 0 | } else { |
2272 | 0 | (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress()); |
2273 | 0 | } |
2274 | 0 | ++I; |
2275 | 0 | } |
2276 | | // Privatize extra loop counters used in loops for ordered(n) clauses. |
2277 | 0 | for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { |
2278 | 0 | if (!C->getNumForLoops()) |
2279 | 0 | continue; |
2280 | 0 | for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); |
2281 | 0 | I < E; ++I) { |
2282 | 0 | const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); |
2283 | 0 | const auto *VD = cast<VarDecl>(DRE->getDecl()); |
2284 | | // Override only those variables that can be captured to avoid re-emission |
2285 | | // of the variables declared within the loops. |
2286 | 0 | if (DRE->refersToEnclosingVariableOrCapture()) { |
2287 | 0 | (void)LoopScope.addPrivate( |
2288 | 0 | VD, CreateMemTemp(DRE->getType(), VD->getName())); |
2289 | 0 | } |
2290 | 0 | } |
2291 | 0 | } |
2292 | 0 | } |
2293 | | |
2294 | | static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2295 | | const Expr *Cond, llvm::BasicBlock *TrueBlock, |
2296 | 0 | llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { |
2297 | 0 | if (!CGF.HaveInsertPoint()) |
2298 | 0 | return; |
2299 | 0 | { |
2300 | 0 | CodeGenFunction::OMPPrivateScope PreCondScope(CGF); |
2301 | 0 | CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); |
2302 | 0 | (void)PreCondScope.Privatize(); |
2303 | | // Get initial values of real counters. |
2304 | 0 | for (const Expr *I : S.inits()) { |
2305 | 0 | CGF.EmitIgnoredExpr(I); |
2306 | 0 | } |
2307 | 0 | } |
2308 | | // Create temp loop control variables with their init values to support |
2309 | | // non-rectangular loops. |
2310 | 0 | CodeGenFunction::OMPMapVars PreCondVars; |
2311 | 0 | for (const Expr *E : S.dependent_counters()) { |
2312 | 0 | if (!E) |
2313 | 0 | continue; |
2314 | 0 | assert(!E->getType().getNonReferenceType()->isRecordType() && |
2315 | 0 | "dependent counter must not be an iterator."); |
2316 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2317 | 0 | Address CounterAddr = |
2318 | 0 | CGF.CreateMemTemp(VD->getType().getNonReferenceType()); |
2319 | 0 | (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); |
2320 | 0 | } |
2321 | 0 | (void)PreCondVars.apply(CGF); |
2322 | 0 | for (const Expr *E : S.dependent_inits()) { |
2323 | 0 | if (!E) |
2324 | 0 | continue; |
2325 | 0 | CGF.EmitIgnoredExpr(E); |
2326 | 0 | } |
2327 | | // Check that loop is executed at least one time. |
2328 | 0 | CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); |
2329 | 0 | PreCondVars.restore(CGF); |
2330 | 0 | } |
2331 | | |
2332 | | void CodeGenFunction::EmitOMPLinearClause( |
2333 | 0 | const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { |
2334 | 0 | if (!HaveInsertPoint()) |
2335 | 0 | return; |
2336 | 0 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
2337 | 0 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
2338 | 0 | const auto *LoopDirective = cast<OMPLoopDirective>(&D); |
2339 | 0 | for (const Expr *C : LoopDirective->counters()) { |
2340 | 0 | SIMDLCVs.insert( |
2341 | 0 | cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); |
2342 | 0 | } |
2343 | 0 | } |
2344 | 0 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2345 | 0 | auto CurPrivate = C->privates().begin(); |
2346 | 0 | for (const Expr *E : C->varlists()) { |
2347 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2348 | 0 | const auto *PrivateVD = |
2349 | 0 | cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); |
2350 | 0 | if (!SIMDLCVs.count(VD->getCanonicalDecl())) { |
2351 | | // Emit private VarDecl with copy init. |
2352 | 0 | EmitVarDecl(*PrivateVD); |
2353 | 0 | bool IsRegistered = |
2354 | 0 | PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD)); |
2355 | 0 | assert(IsRegistered && "linear var already registered as private"); |
2356 | | // Silence the warning about unused variable. |
2357 | 0 | (void)IsRegistered; |
2358 | 0 | } else { |
2359 | 0 | EmitVarDecl(*PrivateVD); |
2360 | 0 | } |
2361 | 0 | ++CurPrivate; |
2362 | 0 | } |
2363 | 0 | } |
2364 | 0 | } |
2365 | | |
2366 | | static void emitSimdlenSafelenClause(CodeGenFunction &CGF, |
2367 | 0 | const OMPExecutableDirective &D) { |
2368 | 0 | if (!CGF.HaveInsertPoint()) |
2369 | 0 | return; |
2370 | 0 | if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { |
2371 | 0 | RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), |
2372 | 0 | /*ignoreResult=*/true); |
2373 | 0 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2374 | 0 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2375 | | // In presence of finite 'safelen', it may be unsafe to mark all |
2376 | | // the memory instructions parallel, because loop-carried |
2377 | | // dependences of 'safelen' iterations are possible. |
2378 | 0 | CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); |
2379 | 0 | } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { |
2380 | 0 | RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), |
2381 | 0 | /*ignoreResult=*/true); |
2382 | 0 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2383 | 0 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2384 | | // In presence of finite 'safelen', it may be unsafe to mark all |
2385 | | // the memory instructions parallel, because loop-carried |
2386 | | // dependences of 'safelen' iterations are possible. |
2387 | 0 | CGF.LoopStack.setParallel(/*Enable=*/false); |
2388 | 0 | } |
2389 | 0 | } |
2390 | | |
2391 | 0 | void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { |
2392 | | // Walk clauses and process safelen/lastprivate. |
2393 | 0 | LoopStack.setParallel(/*Enable=*/true); |
2394 | 0 | LoopStack.setVectorizeEnable(); |
2395 | 0 | emitSimdlenSafelenClause(*this, D); |
2396 | 0 | if (const auto *C = D.getSingleClause<OMPOrderClause>()) |
2397 | 0 | if (C->getKind() == OMPC_ORDER_concurrent) |
2398 | 0 | LoopStack.setParallel(/*Enable=*/true); |
2399 | 0 | if ((D.getDirectiveKind() == OMPD_simd || |
2400 | 0 | (getLangOpts().OpenMPSimd && |
2401 | 0 | isOpenMPSimdDirective(D.getDirectiveKind()))) && |
2402 | 0 | llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), |
2403 | 0 | [](const OMPReductionClause *C) { |
2404 | 0 | return C->getModifier() == OMPC_REDUCTION_inscan; |
2405 | 0 | })) |
2406 | | // Disable parallel access in case of prefix sum. |
2407 | 0 | LoopStack.setParallel(/*Enable=*/false); |
2408 | 0 | } |
2409 | | |
2410 | | void CodeGenFunction::EmitOMPSimdFinal( |
2411 | | const OMPLoopDirective &D, |
2412 | 0 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2413 | 0 | if (!HaveInsertPoint()) |
2414 | 0 | return; |
2415 | 0 | llvm::BasicBlock *DoneBB = nullptr; |
2416 | 0 | auto IC = D.counters().begin(); |
2417 | 0 | auto IPC = D.private_counters().begin(); |
2418 | 0 | for (const Expr *F : D.finals()) { |
2419 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); |
2420 | 0 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); |
2421 | 0 | const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); |
2422 | 0 | if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || |
2423 | 0 | OrigVD->hasGlobalStorage() || CED) { |
2424 | 0 | if (!DoneBB) { |
2425 | 0 | if (llvm::Value *Cond = CondGen(*this)) { |
2426 | | // If the first post-update expression is found, emit conditional |
2427 | | // block if it was requested. |
2428 | 0 | llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); |
2429 | 0 | DoneBB = createBasicBlock(".omp.final.done"); |
2430 | 0 | Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
2431 | 0 | EmitBlock(ThenBB); |
2432 | 0 | } |
2433 | 0 | } |
2434 | 0 | Address OrigAddr = Address::invalid(); |
2435 | 0 | if (CED) { |
2436 | 0 | OrigAddr = |
2437 | 0 | EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); |
2438 | 0 | } else { |
2439 | 0 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), |
2440 | 0 | /*RefersToEnclosingVariableOrCapture=*/false, |
2441 | 0 | (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); |
2442 | 0 | OrigAddr = EmitLValue(&DRE).getAddress(*this); |
2443 | 0 | } |
2444 | 0 | OMPPrivateScope VarScope(*this); |
2445 | 0 | VarScope.addPrivate(OrigVD, OrigAddr); |
2446 | 0 | (void)VarScope.Privatize(); |
2447 | 0 | EmitIgnoredExpr(F); |
2448 | 0 | } |
2449 | 0 | ++IC; |
2450 | 0 | ++IPC; |
2451 | 0 | } |
2452 | 0 | if (DoneBB) |
2453 | 0 | EmitBlock(DoneBB, /*IsFinished=*/true); |
2454 | 0 | } |
2455 | | |
2456 | | static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, |
2457 | | const OMPLoopDirective &S, |
2458 | 0 | CodeGenFunction::JumpDest LoopExit) { |
2459 | 0 | CGF.EmitOMPLoopBody(S, LoopExit); |
2460 | 0 | CGF.EmitStopPoint(&S); |
2461 | 0 | } |
2462 | | |
2463 | | /// Emit a helper variable and return corresponding lvalue. |
2464 | | static LValue EmitOMPHelperVar(CodeGenFunction &CGF, |
2465 | 0 | const DeclRefExpr *Helper) { |
2466 | 0 | auto VDecl = cast<VarDecl>(Helper->getDecl()); |
2467 | 0 | CGF.EmitVarDecl(*VDecl); |
2468 | 0 | return CGF.EmitLValue(Helper); |
2469 | 0 | } |
2470 | | |
2471 | | static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2472 | | const RegionCodeGenTy &SimdInitGen, |
2473 | 0 | const RegionCodeGenTy &BodyCodeGen) { |
2474 | 0 | auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, |
2475 | 0 | PrePostActionTy &) { |
2476 | 0 | CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); |
2477 | 0 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2478 | 0 | SimdInitGen(CGF); |
2479 | |
|
2480 | 0 | BodyCodeGen(CGF); |
2481 | 0 | }; |
2482 | 0 | auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { |
2483 | 0 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2484 | 0 | CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); |
2485 | |
|
2486 | 0 | BodyCodeGen(CGF); |
2487 | 0 | }; |
2488 | 0 | const Expr *IfCond = nullptr; |
2489 | 0 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
2490 | 0 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
2491 | 0 | if (CGF.getLangOpts().OpenMP >= 50 && |
2492 | 0 | (C->getNameModifier() == OMPD_unknown || |
2493 | 0 | C->getNameModifier() == OMPD_simd)) { |
2494 | 0 | IfCond = C->getCondition(); |
2495 | 0 | break; |
2496 | 0 | } |
2497 | 0 | } |
2498 | 0 | } |
2499 | 0 | if (IfCond) { |
2500 | 0 | CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); |
2501 | 0 | } else { |
2502 | 0 | RegionCodeGenTy ThenRCG(ThenGen); |
2503 | 0 | ThenRCG(CGF); |
2504 | 0 | } |
2505 | 0 | } |
2506 | | |
2507 | | static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2508 | 0 | PrePostActionTy &Action) { |
2509 | 0 | Action.Enter(CGF); |
2510 | 0 | assert(isOpenMPSimdDirective(S.getDirectiveKind()) && |
2511 | 0 | "Expected simd directive"); |
2512 | 0 | OMPLoopScope PreInitScope(CGF, S); |
2513 | | // if (PreCond) { |
2514 | | // for (IV in 0..LastIteration) BODY; |
2515 | | // <Final counter/linear vars updates>; |
2516 | | // } |
2517 | | // |
2518 | 0 | if (isOpenMPDistributeDirective(S.getDirectiveKind()) || |
2519 | 0 | isOpenMPWorksharingDirective(S.getDirectiveKind()) || |
2520 | 0 | isOpenMPTaskLoopDirective(S.getDirectiveKind())) { |
2521 | 0 | (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); |
2522 | 0 | (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); |
2523 | 0 | } |
2524 | | |
2525 | | // Emit: if (PreCond) - begin. |
2526 | | // If the condition constant folds and can be elided, avoid emitting the |
2527 | | // whole loop. |
2528 | 0 | bool CondConstant; |
2529 | 0 | llvm::BasicBlock *ContBlock = nullptr; |
2530 | 0 | if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
2531 | 0 | if (!CondConstant) |
2532 | 0 | return; |
2533 | 0 | } else { |
2534 | 0 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); |
2535 | 0 | ContBlock = CGF.createBasicBlock("simd.if.end"); |
2536 | 0 | emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, |
2537 | 0 | CGF.getProfileCount(&S)); |
2538 | 0 | CGF.EmitBlock(ThenBlock); |
2539 | 0 | CGF.incrementProfileCounter(&S); |
2540 | 0 | } |
2541 | | |
2542 | | // Emit the loop iteration variable. |
2543 | 0 | const Expr *IVExpr = S.getIterationVariable(); |
2544 | 0 | const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); |
2545 | 0 | CGF.EmitVarDecl(*IVDecl); |
2546 | 0 | CGF.EmitIgnoredExpr(S.getInit()); |
2547 | | |
2548 | | // Emit the iterations count variable. |
2549 | | // If it is not a variable, Sema decided to calculate iterations count on |
2550 | | // each iteration (e.g., it is foldable into a constant). |
2551 | 0 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
2552 | 0 | CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
2553 | | // Emit calculation of the iterations count. |
2554 | 0 | CGF.EmitIgnoredExpr(S.getCalcLastIteration()); |
2555 | 0 | } |
2556 | |
|
2557 | 0 | emitAlignedClause(CGF, S); |
2558 | 0 | (void)CGF.EmitOMPLinearClauseInit(S); |
2559 | 0 | { |
2560 | 0 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
2561 | 0 | CGF.EmitOMPPrivateClause(S, LoopScope); |
2562 | 0 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
2563 | 0 | CGF.EmitOMPLinearClause(S, LoopScope); |
2564 | 0 | CGF.EmitOMPReductionClauseInit(S, LoopScope); |
2565 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
2566 | 0 | CGF, S, CGF.EmitLValue(S.getIterationVariable())); |
2567 | 0 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
2568 | 0 | (void)LoopScope.Privatize(); |
2569 | 0 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
2570 | 0 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
2571 | |
|
2572 | 0 | emitCommonSimdLoop( |
2573 | 0 | CGF, S, |
2574 | 0 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2575 | 0 | CGF.EmitOMPSimdInit(S); |
2576 | 0 | }, |
2577 | 0 | [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2578 | 0 | CGF.EmitOMPInnerLoop( |
2579 | 0 | S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
2580 | 0 | [&S](CodeGenFunction &CGF) { |
2581 | 0 | emitOMPLoopBodyWithStopPoint(CGF, S, |
2582 | 0 | CodeGenFunction::JumpDest()); |
2583 | 0 | }, |
2584 | 0 | [](CodeGenFunction &) {}); |
2585 | 0 | }); |
2586 | 0 | CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); |
2587 | | // Emit final copy of the lastprivate variables at the end of loops. |
2588 | 0 | if (HasLastprivateClause) |
2589 | 0 | CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); |
2590 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); |
2591 | 0 | emitPostUpdateForReductionClause(CGF, S, |
2592 | 0 | [](CodeGenFunction &) { return nullptr; }); |
2593 | 0 | LoopScope.restoreMap(); |
2594 | 0 | CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); |
2595 | 0 | } |
2596 | | // Emit: if (PreCond) - end. |
2597 | 0 | if (ContBlock) { |
2598 | 0 | CGF.EmitBranch(ContBlock); |
2599 | 0 | CGF.EmitBlock(ContBlock, true); |
2600 | 0 | } |
2601 | 0 | } |
2602 | | |
2603 | 0 | static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { |
2604 | | // Check for unsupported clauses |
2605 | 0 | for (OMPClause *C : S.clauses()) { |
2606 | | // Currently only order, simdlen and safelen clauses are supported |
2607 | 0 | if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || |
2608 | 0 | isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) |
2609 | 0 | return false; |
2610 | 0 | } |
2611 | | |
2612 | | // Check if we have a statement with the ordered directive. |
2613 | | // Visit the statement hierarchy to find a compound statement |
2614 | | // with a ordered directive in it. |
2615 | 0 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { |
2616 | 0 | if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { |
2617 | 0 | for (const Stmt *SubStmt : SyntacticalLoop->children()) { |
2618 | 0 | if (!SubStmt) |
2619 | 0 | continue; |
2620 | 0 | if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { |
2621 | 0 | for (const Stmt *CSSubStmt : CS->children()) { |
2622 | 0 | if (!CSSubStmt) |
2623 | 0 | continue; |
2624 | 0 | if (isa<OMPOrderedDirective>(CSSubStmt)) { |
2625 | 0 | return false; |
2626 | 0 | } |
2627 | 0 | } |
2628 | 0 | } |
2629 | 0 | } |
2630 | 0 | } |
2631 | 0 | } |
2632 | 0 | return true; |
2633 | 0 | } |
2634 | | static llvm::MapVector<llvm::Value *, llvm::Value *> |
2635 | 0 | GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { |
2636 | 0 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; |
2637 | 0 | for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { |
2638 | 0 | llvm::APInt ClauseAlignment(64, 0); |
2639 | 0 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2640 | 0 | auto *AlignmentCI = |
2641 | 0 | cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); |
2642 | 0 | ClauseAlignment = AlignmentCI->getValue(); |
2643 | 0 | } |
2644 | 0 | for (const Expr *E : Clause->varlists()) { |
2645 | 0 | llvm::APInt Alignment(ClauseAlignment); |
2646 | 0 | if (Alignment == 0) { |
2647 | | // OpenMP [2.8.1, Description] |
2648 | | // If no optional parameter is specified, implementation-defined default |
2649 | | // alignments for SIMD instructions on the target platforms are assumed. |
2650 | 0 | Alignment = |
2651 | 0 | CGF.getContext() |
2652 | 0 | .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( |
2653 | 0 | E->getType()->getPointeeType())) |
2654 | 0 | .getQuantity(); |
2655 | 0 | } |
2656 | 0 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2657 | 0 | "alignment is not power of 2"); |
2658 | 0 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2659 | 0 | AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); |
2660 | 0 | } |
2661 | 0 | } |
2662 | 0 | return AlignedVars; |
2663 | 0 | } |
2664 | | |
2665 | 0 | void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { |
2666 | 0 | bool UseOMPIRBuilder = |
2667 | 0 | CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); |
2668 | 0 | if (UseOMPIRBuilder) { |
2669 | 0 | auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, |
2670 | 0 | PrePostActionTy &) { |
2671 | | // Use the OpenMPIRBuilder if enabled. |
2672 | 0 | if (UseOMPIRBuilder) { |
2673 | 0 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = |
2674 | 0 | GetAlignedMapping(S, CGF); |
2675 | | // Emit the associated statement and get its loop representation. |
2676 | 0 | const Stmt *Inner = S.getRawStmt(); |
2677 | 0 | llvm::CanonicalLoopInfo *CLI = |
2678 | 0 | EmitOMPCollapsedCanonicalLoopNest(Inner, 1); |
2679 | |
|
2680 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = |
2681 | 0 | CGM.getOpenMPRuntime().getOMPBuilder(); |
2682 | | // Add SIMD specific metadata |
2683 | 0 | llvm::ConstantInt *Simdlen = nullptr; |
2684 | 0 | if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { |
2685 | 0 | RValue Len = |
2686 | 0 | this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), |
2687 | 0 | /*ignoreResult=*/true); |
2688 | 0 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2689 | 0 | Simdlen = Val; |
2690 | 0 | } |
2691 | 0 | llvm::ConstantInt *Safelen = nullptr; |
2692 | 0 | if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { |
2693 | 0 | RValue Len = |
2694 | 0 | this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), |
2695 | 0 | /*ignoreResult=*/true); |
2696 | 0 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2697 | 0 | Safelen = Val; |
2698 | 0 | } |
2699 | 0 | llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; |
2700 | 0 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
2701 | 0 | if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) { |
2702 | 0 | Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; |
2703 | 0 | } |
2704 | 0 | } |
2705 | | // Add simd metadata to the collapsed loop. Do not generate |
2706 | | // another loop for if clause. Support for if clause is done earlier. |
2707 | 0 | OMPBuilder.applySimd(CLI, AlignedVars, |
2708 | 0 | /*IfCond*/ nullptr, Order, Simdlen, Safelen); |
2709 | 0 | return; |
2710 | 0 | } |
2711 | 0 | }; |
2712 | 0 | { |
2713 | 0 | auto LPCRegion = |
2714 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
2715 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2716 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, |
2717 | 0 | CodeGenIRBuilder); |
2718 | 0 | } |
2719 | 0 | return; |
2720 | 0 | } |
2721 | | |
2722 | 0 | ParentLoopDirectiveForScanRegion ScanRegion(*this, S); |
2723 | 0 | OMPFirstScanLoop = true; |
2724 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2725 | 0 | emitOMPSimdRegion(CGF, S, Action); |
2726 | 0 | }; |
2727 | 0 | { |
2728 | 0 | auto LPCRegion = |
2729 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
2730 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2731 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
2732 | 0 | } |
2733 | | // Check for outer lastprivate conditional update. |
2734 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
2735 | 0 | } |
2736 | | |
2737 | 0 | void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { |
2738 | | // Emit the de-sugared statement. |
2739 | 0 | OMPTransformDirectiveScopeRAII TileScope(*this, &S); |
2740 | 0 | EmitStmt(S.getTransformedStmt()); |
2741 | 0 | } |
2742 | | |
2743 | 0 | void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { |
2744 | 0 | bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; |
2745 | |
|
2746 | 0 | if (UseOMPIRBuilder) { |
2747 | 0 | auto DL = SourceLocToDebugLoc(S.getBeginLoc()); |
2748 | 0 | const Stmt *Inner = S.getRawStmt(); |
2749 | | |
2750 | | // Consume nested loop. Clear the entire remaining loop stack because a |
2751 | | // fully unrolled loop is non-transformable. For partial unrolling the |
2752 | | // generated outer loop is pushed back to the stack. |
2753 | 0 | llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); |
2754 | 0 | OMPLoopNestStack.clear(); |
2755 | |
|
2756 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2757 | |
|
2758 | 0 | bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; |
2759 | 0 | llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; |
2760 | |
|
2761 | 0 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2762 | 0 | assert(ExpectedOMPLoopDepth == 0); |
2763 | 0 | OMPBuilder.unrollLoopFull(DL, CLI); |
2764 | 0 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2765 | 0 | uint64_t Factor = 0; |
2766 | 0 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2767 | 0 | Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); |
2768 | 0 | assert(Factor >= 1 && "Only positive factors are valid"); |
2769 | 0 | } |
2770 | 0 | OMPBuilder.unrollLoopPartial(DL, CLI, Factor, |
2771 | 0 | NeedsUnrolledCLI ? &UnrolledCLI : nullptr); |
2772 | 0 | } else { |
2773 | 0 | OMPBuilder.unrollLoopHeuristic(DL, CLI); |
2774 | 0 | } |
2775 | | |
2776 | 0 | assert((!NeedsUnrolledCLI || UnrolledCLI) && |
2777 | 0 | "NeedsUnrolledCLI implies UnrolledCLI to be set"); |
2778 | 0 | if (UnrolledCLI) |
2779 | 0 | OMPLoopNestStack.push_back(UnrolledCLI); |
2780 | |
|
2781 | 0 | return; |
2782 | 0 | } |
2783 | | |
2784 | | // This function is only called if the unrolled loop is not consumed by any |
2785 | | // other loop-associated construct. Such a loop-associated construct will have |
2786 | | // used the transformed AST. |
2787 | | |
2788 | | // Set the unroll metadata for the next emitted loop. |
2789 | 0 | LoopStack.setUnrollState(LoopAttributes::Enable); |
2790 | |
|
2791 | 0 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2792 | 0 | LoopStack.setUnrollState(LoopAttributes::Full); |
2793 | 0 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2794 | 0 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2795 | 0 | uint64_t Factor = |
2796 | 0 | FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); |
2797 | 0 | assert(Factor >= 1 && "Only positive factors are valid"); |
2798 | 0 | LoopStack.setUnrollCount(Factor); |
2799 | 0 | } |
2800 | 0 | } |
2801 | | |
2802 | 0 | EmitStmt(S.getAssociatedStmt()); |
2803 | 0 | } |
2804 | | |
2805 | | void CodeGenFunction::EmitOMPOuterLoop( |
2806 | | bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, |
2807 | | CodeGenFunction::OMPPrivateScope &LoopScope, |
2808 | | const CodeGenFunction::OMPLoopArguments &LoopArgs, |
2809 | | const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, |
2810 | 0 | const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { |
2811 | 0 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2812 | |
|
2813 | 0 | const Expr *IVExpr = S.getIterationVariable(); |
2814 | 0 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
2815 | 0 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2816 | |
|
2817 | 0 | JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); |
2818 | | |
2819 | | // Start the loop with a block that tests the condition. |
2820 | 0 | llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); |
2821 | 0 | EmitBlock(CondBlock); |
2822 | 0 | const SourceRange R = S.getSourceRange(); |
2823 | 0 | OMPLoopNestStack.clear(); |
2824 | 0 | LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), |
2825 | 0 | SourceLocToDebugLoc(R.getEnd())); |
2826 | |
|
2827 | 0 | llvm::Value *BoolCondVal = nullptr; |
2828 | 0 | if (!DynamicOrOrdered) { |
2829 | | // UB = min(UB, GlobalUB) or |
2830 | | // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. |
2831 | | // 'distribute parallel for') |
2832 | 0 | EmitIgnoredExpr(LoopArgs.EUB); |
2833 | | // IV = LB |
2834 | 0 | EmitIgnoredExpr(LoopArgs.Init); |
2835 | | // IV < UB |
2836 | 0 | BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); |
2837 | 0 | } else { |
2838 | 0 | BoolCondVal = |
2839 | 0 | RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, |
2840 | 0 | LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); |
2841 | 0 | } |
2842 | | |
2843 | | // If there are any cleanups between here and the loop-exit scope, |
2844 | | // create a block to stage a loop exit along. |
2845 | 0 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2846 | 0 | if (LoopScope.requiresCleanups()) |
2847 | 0 | ExitBlock = createBasicBlock("omp.dispatch.cleanup"); |
2848 | |
|
2849 | 0 | llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); |
2850 | 0 | Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); |
2851 | 0 | if (ExitBlock != LoopExit.getBlock()) { |
2852 | 0 | EmitBlock(ExitBlock); |
2853 | 0 | EmitBranchThroughCleanup(LoopExit); |
2854 | 0 | } |
2855 | 0 | EmitBlock(LoopBody); |
2856 | | |
2857 | | // Emit "IV = LB" (in case of static schedule, we have already calculated new |
2858 | | // LB for loop condition and emitted it above). |
2859 | 0 | if (DynamicOrOrdered) |
2860 | 0 | EmitIgnoredExpr(LoopArgs.Init); |
2861 | | |
2862 | | // Create a block for the increment. |
2863 | 0 | JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); |
2864 | 0 | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
2865 | |
|
2866 | 0 | emitCommonSimdLoop( |
2867 | 0 | *this, S, |
2868 | 0 | [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { |
2869 | | // Generate !llvm.loop.parallel metadata for loads and stores for loops |
2870 | | // with dynamic/guided scheduling and without ordered clause. |
2871 | 0 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) { |
2872 | 0 | CGF.LoopStack.setParallel(!IsMonotonic); |
2873 | 0 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) |
2874 | 0 | if (C->getKind() == OMPC_ORDER_concurrent) |
2875 | 0 | CGF.LoopStack.setParallel(/*Enable=*/true); |
2876 | 0 | } else { |
2877 | 0 | CGF.EmitOMPSimdInit(S); |
2878 | 0 | } |
2879 | 0 | }, |
2880 | 0 | [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, |
2881 | 0 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2882 | 0 | SourceLocation Loc = S.getBeginLoc(); |
2883 | | // when 'distribute' is not combined with a 'for': |
2884 | | // while (idx <= UB) { BODY; ++idx; } |
2885 | | // when 'distribute' is combined with a 'for' |
2886 | | // (e.g. 'distribute parallel for') |
2887 | | // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } |
2888 | 0 | CGF.EmitOMPInnerLoop( |
2889 | 0 | S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, |
2890 | 0 | [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
2891 | 0 | CodeGenLoop(CGF, S, LoopExit); |
2892 | 0 | }, |
2893 | 0 | [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { |
2894 | 0 | CodeGenOrdered(CGF, Loc, IVSize, IVSigned); |
2895 | 0 | }); |
2896 | 0 | }); |
2897 | |
|
2898 | 0 | EmitBlock(Continue.getBlock()); |
2899 | 0 | BreakContinueStack.pop_back(); |
2900 | 0 | if (!DynamicOrOrdered) { |
2901 | | // Emit "LB = LB + Stride", "UB = UB + Stride". |
2902 | 0 | EmitIgnoredExpr(LoopArgs.NextLB); |
2903 | 0 | EmitIgnoredExpr(LoopArgs.NextUB); |
2904 | 0 | } |
2905 | |
|
2906 | 0 | EmitBranch(CondBlock); |
2907 | 0 | OMPLoopNestStack.clear(); |
2908 | 0 | LoopStack.pop(); |
2909 | | // Emit the fall-through block. |
2910 | 0 | EmitBlock(LoopExit.getBlock()); |
2911 | | |
2912 | | // Tell the runtime we are done. |
2913 | 0 | auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { |
2914 | 0 | if (!DynamicOrOrdered) |
2915 | 0 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
2916 | 0 | S.getDirectiveKind()); |
2917 | 0 | }; |
2918 | 0 | OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
2919 | 0 | } |
2920 | | |
2921 | | void CodeGenFunction::EmitOMPForOuterLoop( |
2922 | | const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, |
2923 | | const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, |
2924 | | const OMPLoopArguments &LoopArgs, |
2925 | 0 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
2926 | 0 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2927 | | |
2928 | | // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). |
2929 | 0 | const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); |
2930 | |
|
2931 | 0 | assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, |
2932 | 0 | LoopArgs.Chunk != nullptr)) && |
2933 | 0 | "static non-chunked schedule does not need outer loop"); |
2934 | | |
2935 | | // Emit outer loop. |
2936 | | // |
2937 | | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2938 | | // When schedule(dynamic,chunk_size) is specified, the iterations are |
2939 | | // distributed to threads in the team in chunks as the threads request them. |
2940 | | // Each thread executes a chunk of iterations, then requests another chunk, |
2941 | | // until no chunks remain to be distributed. Each chunk contains chunk_size |
2942 | | // iterations, except for the last chunk to be distributed, which may have |
2943 | | // fewer iterations. When no chunk_size is specified, it defaults to 1. |
2944 | | // |
2945 | | // When schedule(guided,chunk_size) is specified, the iterations are assigned |
2946 | | // to threads in the team in chunks as the executing threads request them. |
2947 | | // Each thread executes a chunk of iterations, then requests another chunk, |
2948 | | // until no chunks remain to be assigned. For a chunk_size of 1, the size of |
2949 | | // each chunk is proportional to the number of unassigned iterations divided |
2950 | | // by the number of threads in the team, decreasing to 1. For a chunk_size |
2951 | | // with value k (greater than 1), the size of each chunk is determined in the |
2952 | | // same way, with the restriction that the chunks do not contain fewer than k |
2953 | | // iterations (except for the last chunk to be assigned, which may have fewer |
2954 | | // than k iterations). |
2955 | | // |
2956 | | // When schedule(auto) is specified, the decision regarding scheduling is |
2957 | | // delegated to the compiler and/or runtime system. The programmer gives the |
2958 | | // implementation the freedom to choose any possible mapping of iterations to |
2959 | | // threads in the team. |
2960 | | // |
2961 | | // When schedule(runtime) is specified, the decision regarding scheduling is |
2962 | | // deferred until run time, and the schedule and chunk size are taken from the |
2963 | | // run-sched-var ICV. If the ICV is set to auto, the schedule is |
2964 | | // implementation defined |
2965 | | // |
2966 | | // while(__kmpc_dispatch_next(&LB, &UB)) { |
2967 | | // idx = LB; |
2968 | | // while (idx <= UB) { BODY; ++idx; |
2969 | | // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. |
2970 | | // } // inner loop |
2971 | | // } |
2972 | | // |
2973 | | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2974 | | // When schedule(static, chunk_size) is specified, iterations are divided into |
2975 | | // chunks of size chunk_size, and the chunks are assigned to the threads in |
2976 | | // the team in a round-robin fashion in the order of the thread number. |
2977 | | // |
2978 | | // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { |
2979 | | // while (idx <= UB) { BODY; ++idx; } // inner loop |
2980 | | // LB = LB + ST; |
2981 | | // UB = UB + ST; |
2982 | | // } |
2983 | | // |
2984 | | |
2985 | 0 | const Expr *IVExpr = S.getIterationVariable(); |
2986 | 0 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
2987 | 0 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2988 | |
|
2989 | 0 | if (DynamicOrOrdered) { |
2990 | 0 | const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = |
2991 | 0 | CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); |
2992 | 0 | llvm::Value *LBVal = DispatchBounds.first; |
2993 | 0 | llvm::Value *UBVal = DispatchBounds.second; |
2994 | 0 | CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, |
2995 | 0 | LoopArgs.Chunk}; |
2996 | 0 | RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, |
2997 | 0 | IVSigned, Ordered, DipatchRTInputValues); |
2998 | 0 | } else { |
2999 | 0 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3000 | 0 | IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, |
3001 | 0 | LoopArgs.ST, LoopArgs.Chunk); |
3002 | 0 | RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), |
3003 | 0 | ScheduleKind, StaticInit); |
3004 | 0 | } |
3005 | |
|
3006 | 0 | auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, |
3007 | 0 | const unsigned IVSize, |
3008 | 0 | const bool IVSigned) { |
3009 | 0 | if (Ordered) { |
3010 | 0 | CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, |
3011 | 0 | IVSigned); |
3012 | 0 | } |
3013 | 0 | }; |
3014 | |
|
3015 | 0 | OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, |
3016 | 0 | LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); |
3017 | 0 | OuterLoopArgs.IncExpr = S.getInc(); |
3018 | 0 | OuterLoopArgs.Init = S.getInit(); |
3019 | 0 | OuterLoopArgs.Cond = S.getCond(); |
3020 | 0 | OuterLoopArgs.NextLB = S.getNextLowerBound(); |
3021 | 0 | OuterLoopArgs.NextUB = S.getNextUpperBound(); |
3022 | 0 | EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, |
3023 | 0 | emitOMPLoopBodyWithStopPoint, CodeGenOrdered); |
3024 | 0 | } |
3025 | | |
3026 | | static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, |
3027 | 0 | const unsigned IVSize, const bool IVSigned) {} |
3028 | | |
3029 | | void CodeGenFunction::EmitOMPDistributeOuterLoop( |
3030 | | OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, |
3031 | | OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, |
3032 | 0 | const CodeGenLoopTy &CodeGenLoopContent) { |
3033 | |
|
3034 | 0 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3035 | | |
3036 | | // Emit outer loop. |
3037 | | // Same behavior as a OMPForOuterLoop, except that schedule cannot be |
3038 | | // dynamic |
3039 | | // |
3040 | |
|
3041 | 0 | const Expr *IVExpr = S.getIterationVariable(); |
3042 | 0 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
3043 | 0 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3044 | |
|
3045 | 0 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3046 | 0 | IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, |
3047 | 0 | LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); |
3048 | 0 | RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); |
3049 | | |
3050 | | // for combined 'distribute' and 'for' the increment expression of distribute |
3051 | | // is stored in DistInc. For 'distribute' alone, it is in Inc. |
3052 | 0 | Expr *IncExpr; |
3053 | 0 | if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) |
3054 | 0 | IncExpr = S.getDistInc(); |
3055 | 0 | else |
3056 | 0 | IncExpr = S.getInc(); |
3057 | | |
3058 | | // this routine is shared by 'omp distribute parallel for' and |
3059 | | // 'omp distribute': select the right EUB expression depending on the |
3060 | | // directive |
3061 | 0 | OMPLoopArguments OuterLoopArgs; |
3062 | 0 | OuterLoopArgs.LB = LoopArgs.LB; |
3063 | 0 | OuterLoopArgs.UB = LoopArgs.UB; |
3064 | 0 | OuterLoopArgs.ST = LoopArgs.ST; |
3065 | 0 | OuterLoopArgs.IL = LoopArgs.IL; |
3066 | 0 | OuterLoopArgs.Chunk = LoopArgs.Chunk; |
3067 | 0 | OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3068 | 0 | ? S.getCombinedEnsureUpperBound() |
3069 | 0 | : S.getEnsureUpperBound(); |
3070 | 0 | OuterLoopArgs.IncExpr = IncExpr; |
3071 | 0 | OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3072 | 0 | ? S.getCombinedInit() |
3073 | 0 | : S.getInit(); |
3074 | 0 | OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3075 | 0 | ? S.getCombinedCond() |
3076 | 0 | : S.getCond(); |
3077 | 0 | OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3078 | 0 | ? S.getCombinedNextLowerBound() |
3079 | 0 | : S.getNextLowerBound(); |
3080 | 0 | OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3081 | 0 | ? S.getCombinedNextUpperBound() |
3082 | 0 | : S.getNextUpperBound(); |
3083 | |
|
3084 | 0 | EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, |
3085 | 0 | LoopScope, OuterLoopArgs, CodeGenLoopContent, |
3086 | 0 | emitEmptyOrdered); |
3087 | 0 | } |
3088 | | |
3089 | | static std::pair<LValue, LValue> |
3090 | | emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, |
3091 | 0 | const OMPExecutableDirective &S) { |
3092 | 0 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); |
3093 | 0 | LValue LB = |
3094 | 0 | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); |
3095 | 0 | LValue UB = |
3096 | 0 | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); |
3097 | | |
3098 | | // When composing 'distribute' with 'for' (e.g. as in 'distribute |
3099 | | // parallel for') we need to use the 'distribute' |
3100 | | // chunk lower and upper bounds rather than the whole loop iteration |
3101 | | // space. These are parameters to the outlined function for 'parallel' |
3102 | | // and we copy the bounds of the previous schedule into the |
3103 | | // the current ones. |
3104 | 0 | LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); |
3105 | 0 | LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); |
3106 | 0 | llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( |
3107 | 0 | PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); |
3108 | 0 | PrevLBVal = CGF.EmitScalarConversion( |
3109 | 0 | PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), |
3110 | 0 | LS.getIterationVariable()->getType(), |
3111 | 0 | LS.getPrevLowerBoundVariable()->getExprLoc()); |
3112 | 0 | llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( |
3113 | 0 | PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); |
3114 | 0 | PrevUBVal = CGF.EmitScalarConversion( |
3115 | 0 | PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), |
3116 | 0 | LS.getIterationVariable()->getType(), |
3117 | 0 | LS.getPrevUpperBoundVariable()->getExprLoc()); |
3118 | |
|
3119 | 0 | CGF.EmitStoreOfScalar(PrevLBVal, LB); |
3120 | 0 | CGF.EmitStoreOfScalar(PrevUBVal, UB); |
3121 | |
|
3122 | 0 | return {LB, UB}; |
3123 | 0 | } |
3124 | | |
3125 | | /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then |
3126 | | /// we need to use the LB and UB expressions generated by the worksharing |
3127 | | /// code generation support, whereas in non combined situations we would |
3128 | | /// just emit 0 and the LastIteration expression |
3129 | | /// This function is necessary due to the difference of the LB and UB |
3130 | | /// types for the RT emission routines for 'for_static_init' and |
3131 | | /// 'for_dispatch_init' |
3132 | | static std::pair<llvm::Value *, llvm::Value *> |
3133 | | emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, |
3134 | | const OMPExecutableDirective &S, |
3135 | 0 | Address LB, Address UB) { |
3136 | 0 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); |
3137 | 0 | const Expr *IVExpr = LS.getIterationVariable(); |
3138 | | // when implementing a dynamic schedule for a 'for' combined with a |
3139 | | // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop |
3140 | | // is not normalized as each team only executes its own assigned |
3141 | | // distribute chunk |
3142 | 0 | QualType IteratorTy = IVExpr->getType(); |
3143 | 0 | llvm::Value *LBVal = |
3144 | 0 | CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); |
3145 | 0 | llvm::Value *UBVal = |
3146 | 0 | CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); |
3147 | 0 | return {LBVal, UBVal}; |
3148 | 0 | } |
3149 | | |
3150 | | static void emitDistributeParallelForDistributeInnerBoundParams( |
3151 | | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3152 | 0 | llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { |
3153 | 0 | const auto &Dir = cast<OMPLoopDirective>(S); |
3154 | 0 | LValue LB = |
3155 | 0 | CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); |
3156 | 0 | llvm::Value *LBCast = |
3157 | 0 | CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), |
3158 | 0 | CGF.SizeTy, /*isSigned=*/false); |
3159 | 0 | CapturedVars.push_back(LBCast); |
3160 | 0 | LValue UB = |
3161 | 0 | CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); |
3162 | |
|
3163 | 0 | llvm::Value *UBCast = |
3164 | 0 | CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), |
3165 | 0 | CGF.SizeTy, /*isSigned=*/false); |
3166 | 0 | CapturedVars.push_back(UBCast); |
3167 | 0 | } |
3168 | | |
3169 | | static void |
3170 | | emitInnerParallelForWhenCombined(CodeGenFunction &CGF, |
3171 | | const OMPLoopDirective &S, |
3172 | 0 | CodeGenFunction::JumpDest LoopExit) { |
3173 | 0 | auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, |
3174 | 0 | PrePostActionTy &Action) { |
3175 | 0 | Action.Enter(CGF); |
3176 | 0 | bool HasCancel = false; |
3177 | 0 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) { |
3178 | 0 | if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) |
3179 | 0 | HasCancel = D->hasCancel(); |
3180 | 0 | else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) |
3181 | 0 | HasCancel = D->hasCancel(); |
3182 | 0 | else if (const auto *D = |
3183 | 0 | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) |
3184 | 0 | HasCancel = D->hasCancel(); |
3185 | 0 | } |
3186 | 0 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3187 | 0 | HasCancel); |
3188 | 0 | CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), |
3189 | 0 | emitDistributeParallelForInnerBounds, |
3190 | 0 | emitDistributeParallelForDispatchBounds); |
3191 | 0 | }; |
3192 | |
|
3193 | 0 | emitCommonOMPParallelDirective( |
3194 | 0 | CGF, S, |
3195 | 0 | isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, |
3196 | 0 | CGInlinedWorksharingLoop, |
3197 | 0 | emitDistributeParallelForDistributeInnerBoundParams); |
3198 | 0 | } |
3199 | | |
3200 | | void CodeGenFunction::EmitOMPDistributeParallelForDirective( |
3201 | 0 | const OMPDistributeParallelForDirective &S) { |
3202 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3203 | 0 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
3204 | 0 | S.getDistInc()); |
3205 | 0 | }; |
3206 | 0 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3207 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
3208 | 0 | } |
3209 | | |
3210 | | void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( |
3211 | 0 | const OMPDistributeParallelForSimdDirective &S) { |
3212 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3213 | 0 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
3214 | 0 | S.getDistInc()); |
3215 | 0 | }; |
3216 | 0 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3217 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
3218 | 0 | } |
3219 | | |
3220 | | void CodeGenFunction::EmitOMPDistributeSimdDirective( |
3221 | 0 | const OMPDistributeSimdDirective &S) { |
3222 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3223 | 0 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
3224 | 0 | }; |
3225 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3226 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
3227 | 0 | } |
3228 | | |
3229 | | void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( |
3230 | 0 | CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { |
3231 | | // Emit SPMD target parallel for region as a standalone region. |
3232 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3233 | 0 | emitOMPSimdRegion(CGF, S, Action); |
3234 | 0 | }; |
3235 | 0 | llvm::Function *Fn; |
3236 | 0 | llvm::Constant *Addr; |
3237 | | // Emit target region as a standalone region. |
3238 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
3239 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
3240 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
3241 | 0 | } |
3242 | | |
3243 | | void CodeGenFunction::EmitOMPTargetSimdDirective( |
3244 | 0 | const OMPTargetSimdDirective &S) { |
3245 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3246 | 0 | emitOMPSimdRegion(CGF, S, Action); |
3247 | 0 | }; |
3248 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
3249 | 0 | } |
3250 | | |
3251 | | namespace { |
3252 | | struct ScheduleKindModifiersTy { |
3253 | | OpenMPScheduleClauseKind Kind; |
3254 | | OpenMPScheduleClauseModifier M1; |
3255 | | OpenMPScheduleClauseModifier M2; |
3256 | | ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, |
3257 | | OpenMPScheduleClauseModifier M1, |
3258 | | OpenMPScheduleClauseModifier M2) |
3259 | 0 | : Kind(Kind), M1(M1), M2(M2) {} |
3260 | | }; |
3261 | | } // namespace |
3262 | | |
3263 | | bool CodeGenFunction::EmitOMPWorksharingLoop( |
3264 | | const OMPLoopDirective &S, Expr *EUB, |
3265 | | const CodeGenLoopBoundsTy &CodeGenLoopBounds, |
3266 | 0 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
3267 | | // Emit the loop iteration variable. |
3268 | 0 | const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); |
3269 | 0 | const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); |
3270 | 0 | EmitVarDecl(*IVDecl); |
3271 | | |
3272 | | // Emit the iterations count variable. |
3273 | | // If it is not a variable, Sema decided to calculate iterations count on each |
3274 | | // iteration (e.g., it is foldable into a constant). |
3275 | 0 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
3276 | 0 | EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
3277 | | // Emit calculation of the iterations count. |
3278 | 0 | EmitIgnoredExpr(S.getCalcLastIteration()); |
3279 | 0 | } |
3280 | |
|
3281 | 0 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3282 | |
|
3283 | 0 | bool HasLastprivateClause; |
3284 | | // Check pre-condition. |
3285 | 0 | { |
3286 | 0 | OMPLoopScope PreInitScope(*this, S); |
3287 | | // Skip the entire loop if we don't meet the precondition. |
3288 | | // If the condition constant folds and can be elided, avoid emitting the |
3289 | | // whole loop. |
3290 | 0 | bool CondConstant; |
3291 | 0 | llvm::BasicBlock *ContBlock = nullptr; |
3292 | 0 | if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
3293 | 0 | if (!CondConstant) |
3294 | 0 | return false; |
3295 | 0 | } else { |
3296 | 0 | llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); |
3297 | 0 | ContBlock = createBasicBlock("omp.precond.end"); |
3298 | 0 | emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, |
3299 | 0 | getProfileCount(&S)); |
3300 | 0 | EmitBlock(ThenBlock); |
3301 | 0 | incrementProfileCounter(&S); |
3302 | 0 | } |
3303 | | |
3304 | 0 | RunCleanupsScope DoacrossCleanupScope(*this); |
3305 | 0 | bool Ordered = false; |
3306 | 0 | if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { |
3307 | 0 | if (OrderedClause->getNumForLoops()) |
3308 | 0 | RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); |
3309 | 0 | else |
3310 | 0 | Ordered = true; |
3311 | 0 | } |
3312 | |
|
3313 | 0 | llvm::DenseSet<const Expr *> EmittedFinals; |
3314 | 0 | emitAlignedClause(*this, S); |
3315 | 0 | bool HasLinears = EmitOMPLinearClauseInit(S); |
3316 | | // Emit helper vars inits. |
3317 | |
|
3318 | 0 | std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); |
3319 | 0 | LValue LB = Bounds.first; |
3320 | 0 | LValue UB = Bounds.second; |
3321 | 0 | LValue ST = |
3322 | 0 | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); |
3323 | 0 | LValue IL = |
3324 | 0 | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); |
3325 | | |
3326 | | // Emit 'then' code. |
3327 | 0 | { |
3328 | 0 | OMPPrivateScope LoopScope(*this); |
3329 | 0 | if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { |
3330 | | // Emit implicit barrier to synchronize threads and avoid data races on |
3331 | | // initialization of firstprivate variables and post-update of |
3332 | | // lastprivate variables. |
3333 | 0 | CGM.getOpenMPRuntime().emitBarrierCall( |
3334 | 0 | *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
3335 | 0 | /*ForceSimpleCall=*/true); |
3336 | 0 | } |
3337 | 0 | EmitOMPPrivateClause(S, LoopScope); |
3338 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
3339 | 0 | *this, S, EmitLValue(S.getIterationVariable())); |
3340 | 0 | HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); |
3341 | 0 | EmitOMPReductionClauseInit(S, LoopScope); |
3342 | 0 | EmitOMPPrivateLoopCounters(S, LoopScope); |
3343 | 0 | EmitOMPLinearClause(S, LoopScope); |
3344 | 0 | (void)LoopScope.Privatize(); |
3345 | 0 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
3346 | 0 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); |
3347 | | |
3348 | | // Detect the loop schedule kind and chunk. |
3349 | 0 | const Expr *ChunkExpr = nullptr; |
3350 | 0 | OpenMPScheduleTy ScheduleKind; |
3351 | 0 | if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { |
3352 | 0 | ScheduleKind.Schedule = C->getScheduleKind(); |
3353 | 0 | ScheduleKind.M1 = C->getFirstScheduleModifier(); |
3354 | 0 | ScheduleKind.M2 = C->getSecondScheduleModifier(); |
3355 | 0 | ChunkExpr = C->getChunkSize(); |
3356 | 0 | } else { |
3357 | | // Default behaviour for schedule clause. |
3358 | 0 | CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( |
3359 | 0 | *this, S, ScheduleKind.Schedule, ChunkExpr); |
3360 | 0 | } |
3361 | 0 | bool HasChunkSizeOne = false; |
3362 | 0 | llvm::Value *Chunk = nullptr; |
3363 | 0 | if (ChunkExpr) { |
3364 | 0 | Chunk = EmitScalarExpr(ChunkExpr); |
3365 | 0 | Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), |
3366 | 0 | S.getIterationVariable()->getType(), |
3367 | 0 | S.getBeginLoc()); |
3368 | 0 | Expr::EvalResult Result; |
3369 | 0 | if (ChunkExpr->EvaluateAsInt(Result, getContext())) { |
3370 | 0 | llvm::APSInt EvaluatedChunk = Result.Val.getInt(); |
3371 | 0 | HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); |
3372 | 0 | } |
3373 | 0 | } |
3374 | 0 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
3375 | 0 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3376 | | // OpenMP 4.5, 2.7.1 Loop Construct, Description. |
3377 | | // If the static schedule kind is specified or if the ordered clause is |
3378 | | // specified, and if no monotonic modifier is specified, the effect will |
3379 | | // be as if the monotonic modifier was specified. |
3380 | 0 | bool StaticChunkedOne = |
3381 | 0 | RT.isStaticChunked(ScheduleKind.Schedule, |
3382 | 0 | /* Chunked */ Chunk != nullptr) && |
3383 | 0 | HasChunkSizeOne && |
3384 | 0 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
3385 | 0 | bool IsMonotonic = |
3386 | 0 | Ordered || |
3387 | 0 | (ScheduleKind.Schedule == OMPC_SCHEDULE_static && |
3388 | 0 | !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || |
3389 | 0 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || |
3390 | 0 | ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || |
3391 | 0 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; |
3392 | 0 | if ((RT.isStaticNonchunked(ScheduleKind.Schedule, |
3393 | 0 | /* Chunked */ Chunk != nullptr) || |
3394 | 0 | StaticChunkedOne) && |
3395 | 0 | !Ordered) { |
3396 | 0 | JumpDest LoopExit = |
3397 | 0 | getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); |
3398 | 0 | emitCommonSimdLoop( |
3399 | 0 | *this, S, |
3400 | 0 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3401 | 0 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3402 | 0 | CGF.EmitOMPSimdInit(S); |
3403 | 0 | } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
3404 | 0 | if (C->getKind() == OMPC_ORDER_concurrent) |
3405 | 0 | CGF.LoopStack.setParallel(/*Enable=*/true); |
3406 | 0 | } |
3407 | 0 | }, |
3408 | 0 | [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, |
3409 | 0 | &S, ScheduleKind, LoopExit, |
3410 | 0 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
3411 | | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3412 | | // When no chunk_size is specified, the iteration space is divided |
3413 | | // into chunks that are approximately equal in size, and at most |
3414 | | // one chunk is distributed to each thread. Note that the size of |
3415 | | // the chunks is unspecified in this case. |
3416 | 0 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3417 | 0 | IVSize, IVSigned, Ordered, IL.getAddress(CGF), |
3418 | 0 | LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), |
3419 | 0 | StaticChunkedOne ? Chunk : nullptr); |
3420 | 0 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
3421 | 0 | CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, |
3422 | 0 | StaticInit); |
3423 | | // UB = min(UB, GlobalUB); |
3424 | 0 | if (!StaticChunkedOne) |
3425 | 0 | CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); |
3426 | | // IV = LB; |
3427 | 0 | CGF.EmitIgnoredExpr(S.getInit()); |
3428 | | // For unchunked static schedule generate: |
3429 | | // |
3430 | | // while (idx <= UB) { |
3431 | | // BODY; |
3432 | | // ++idx; |
3433 | | // } |
3434 | | // |
3435 | | // For static schedule with chunk one: |
3436 | | // |
3437 | | // while (IV <= PrevUB) { |
3438 | | // BODY; |
3439 | | // IV += ST; |
3440 | | // } |
3441 | 0 | CGF.EmitOMPInnerLoop( |
3442 | 0 | S, LoopScope.requiresCleanups(), |
3443 | 0 | StaticChunkedOne ? S.getCombinedParForInDistCond() |
3444 | 0 | : S.getCond(), |
3445 | 0 | StaticChunkedOne ? S.getDistInc() : S.getInc(), |
3446 | 0 | [&S, LoopExit](CodeGenFunction &CGF) { |
3447 | 0 | emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); |
3448 | 0 | }, |
3449 | 0 | [](CodeGenFunction &) {}); |
3450 | 0 | }); |
3451 | 0 | EmitBlock(LoopExit.getBlock()); |
3452 | | // Tell the runtime we are done. |
3453 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
3454 | 0 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
3455 | 0 | S.getDirectiveKind()); |
3456 | 0 | }; |
3457 | 0 | OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
3458 | 0 | } else { |
3459 | | // Emit the outer loop, which requests its work chunk [LB..UB] from |
3460 | | // runtime and runs the inner loop to process it. |
3461 | 0 | const OMPLoopArguments LoopArguments( |
3462 | 0 | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
3463 | 0 | IL.getAddress(*this), Chunk, EUB); |
3464 | 0 | EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, |
3465 | 0 | LoopArguments, CGDispatchBounds); |
3466 | 0 | } |
3467 | 0 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3468 | 0 | EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { |
3469 | 0 | return CGF.Builder.CreateIsNotNull( |
3470 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3471 | 0 | }); |
3472 | 0 | } |
3473 | 0 | EmitOMPReductionClauseFinal( |
3474 | 0 | S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) |
3475 | 0 | ? /*Parallel and Simd*/ OMPD_parallel_for_simd |
3476 | 0 | : /*Parallel only*/ OMPD_parallel); |
3477 | | // Emit post-update of the reduction variables if IsLastIter != 0. |
3478 | 0 | emitPostUpdateForReductionClause( |
3479 | 0 | *this, S, [IL, &S](CodeGenFunction &CGF) { |
3480 | 0 | return CGF.Builder.CreateIsNotNull( |
3481 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3482 | 0 | }); |
3483 | | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
3484 | 0 | if (HasLastprivateClause) |
3485 | 0 | EmitOMPLastprivateClauseFinal( |
3486 | 0 | S, isOpenMPSimdDirective(S.getDirectiveKind()), |
3487 | 0 | Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); |
3488 | 0 | LoopScope.restoreMap(); |
3489 | 0 | EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { |
3490 | 0 | return CGF.Builder.CreateIsNotNull( |
3491 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3492 | 0 | }); |
3493 | 0 | } |
3494 | 0 | DoacrossCleanupScope.ForceCleanup(); |
3495 | | // We're now done with the loop, so jump to the continuation block. |
3496 | 0 | if (ContBlock) { |
3497 | 0 | EmitBranch(ContBlock); |
3498 | 0 | EmitBlock(ContBlock, /*IsFinished=*/true); |
3499 | 0 | } |
3500 | 0 | } |
3501 | 0 | return HasLastprivateClause; |
3502 | 0 | } |
3503 | | |
3504 | | /// The following two functions generate expressions for the loop lower |
3505 | | /// and upper bounds in case of static and dynamic (dispatch) schedule |
3506 | | /// of the associated 'for' or 'distribute' loop. |
3507 | | static std::pair<LValue, LValue> |
3508 | 0 | emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
3509 | 0 | const auto &LS = cast<OMPLoopDirective>(S); |
3510 | 0 | LValue LB = |
3511 | 0 | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); |
3512 | 0 | LValue UB = |
3513 | 0 | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); |
3514 | 0 | return {LB, UB}; |
3515 | 0 | } |
3516 | | |
3517 | | /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not |
3518 | | /// consider the lower and upper bound expressions generated by the |
3519 | | /// worksharing loop support, but we use 0 and the iteration space size as |
3520 | | /// constants |
3521 | | static std::pair<llvm::Value *, llvm::Value *> |
3522 | | emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3523 | 0 | Address LB, Address UB) { |
3524 | 0 | const auto &LS = cast<OMPLoopDirective>(S); |
3525 | 0 | const Expr *IVExpr = LS.getIterationVariable(); |
3526 | 0 | const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); |
3527 | 0 | llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); |
3528 | 0 | llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); |
3529 | 0 | return {LBVal, UBVal}; |
3530 | 0 | } |
3531 | | |
3532 | | /// Emits internal temp array declarations for the directive with inscan |
3533 | | /// reductions. |
3534 | | /// The code is the following: |
3535 | | /// \code |
3536 | | /// size num_iters = <num_iters>; |
3537 | | /// <type> buffer[num_iters]; |
3538 | | /// \endcode |
3539 | | static void emitScanBasedDirectiveDecls( |
3540 | | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3541 | 0 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3542 | 0 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3543 | 0 | NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); |
3544 | 0 | SmallVector<const Expr *, 4> Shareds; |
3545 | 0 | SmallVector<const Expr *, 4> Privates; |
3546 | 0 | SmallVector<const Expr *, 4> ReductionOps; |
3547 | 0 | SmallVector<const Expr *, 4> CopyArrayTemps; |
3548 | 0 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3549 | 0 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3550 | 0 | "Only inscan reductions are expected."); |
3551 | 0 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
3552 | 0 | Privates.append(C->privates().begin(), C->privates().end()); |
3553 | 0 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
3554 | 0 | CopyArrayTemps.append(C->copy_array_temps().begin(), |
3555 | 0 | C->copy_array_temps().end()); |
3556 | 0 | } |
3557 | 0 | { |
3558 | | // Emit buffers for each reduction variables. |
3559 | | // ReductionCodeGen is required to emit correctly the code for array |
3560 | | // reductions. |
3561 | 0 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
3562 | 0 | unsigned Count = 0; |
3563 | 0 | auto *ITA = CopyArrayTemps.begin(); |
3564 | 0 | for (const Expr *IRef : Privates) { |
3565 | 0 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
3566 | | // Emit variably modified arrays, used for arrays/array sections |
3567 | | // reductions. |
3568 | 0 | if (PrivateVD->getType()->isVariablyModifiedType()) { |
3569 | 0 | RedCG.emitSharedOrigLValue(CGF, Count); |
3570 | 0 | RedCG.emitAggregateType(CGF, Count); |
3571 | 0 | } |
3572 | 0 | CodeGenFunction::OpaqueValueMapping DimMapping( |
3573 | 0 | CGF, |
3574 | 0 | cast<OpaqueValueExpr>( |
3575 | 0 | cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) |
3576 | 0 | ->getSizeExpr()), |
3577 | 0 | RValue::get(OMPScanNumIterations)); |
3578 | | // Emit temp buffer. |
3579 | 0 | CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); |
3580 | 0 | ++ITA; |
3581 | 0 | ++Count; |
3582 | 0 | } |
3583 | 0 | } |
3584 | 0 | } |
3585 | | |
3586 | | /// Copies final inscan reductions values to the original variables. |
3587 | | /// The code is the following: |
3588 | | /// \code |
3589 | | /// <orig_var> = buffer[num_iters-1]; |
3590 | | /// \endcode |
3591 | | static void emitScanBasedDirectiveFinals( |
3592 | | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3593 | 0 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3594 | 0 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3595 | 0 | NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); |
3596 | 0 | SmallVector<const Expr *, 4> Shareds; |
3597 | 0 | SmallVector<const Expr *, 4> LHSs; |
3598 | 0 | SmallVector<const Expr *, 4> RHSs; |
3599 | 0 | SmallVector<const Expr *, 4> Privates; |
3600 | 0 | SmallVector<const Expr *, 4> CopyOps; |
3601 | 0 | SmallVector<const Expr *, 4> CopyArrayElems; |
3602 | 0 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3603 | 0 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3604 | 0 | "Only inscan reductions are expected."); |
3605 | 0 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
3606 | 0 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
3607 | 0 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
3608 | 0 | Privates.append(C->privates().begin(), C->privates().end()); |
3609 | 0 | CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); |
3610 | 0 | CopyArrayElems.append(C->copy_array_elems().begin(), |
3611 | 0 | C->copy_array_elems().end()); |
3612 | 0 | } |
3613 | | // Create temp var and copy LHS value to this temp value. |
3614 | | // LHS = TMP[LastIter]; |
3615 | 0 | llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( |
3616 | 0 | OMPScanNumIterations, |
3617 | 0 | llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); |
3618 | 0 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
3619 | 0 | const Expr *PrivateExpr = Privates[I]; |
3620 | 0 | const Expr *OrigExpr = Shareds[I]; |
3621 | 0 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
3622 | 0 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3623 | 0 | CGF, |
3624 | 0 | cast<OpaqueValueExpr>( |
3625 | 0 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
3626 | 0 | RValue::get(OMPLast)); |
3627 | 0 | LValue DestLVal = CGF.EmitLValue(OrigExpr); |
3628 | 0 | LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); |
3629 | 0 | CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF), |
3630 | 0 | SrcLVal.getAddress(CGF), |
3631 | 0 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
3632 | 0 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
3633 | 0 | CopyOps[I]); |
3634 | 0 | } |
3635 | 0 | } |
3636 | | |
3637 | | /// Emits the code for the directive with inscan reductions. |
3638 | | /// The code is the following: |
3639 | | /// \code |
3640 | | /// #pragma omp ... |
3641 | | /// for (i: 0..<num_iters>) { |
3642 | | /// <input phase>; |
3643 | | /// buffer[i] = red; |
3644 | | /// } |
3645 | | /// #pragma omp master // in parallel region |
3646 | | /// for (int k = 0; k != ceil(log2(num_iters)); ++k) |
3647 | | /// for (size cnt = last_iter; cnt >= pow(2, k); --k) |
3648 | | /// buffer[i] op= buffer[i-pow(2,k)]; |
3649 | | /// #pragma omp barrier // in parallel region |
3650 | | /// #pragma omp ... |
3651 | | /// for (0..<num_iters>) { |
3652 | | /// red = InclusiveScan ? buffer[i] : buffer[i-1]; |
3653 | | /// <scan phase>; |
3654 | | /// } |
3655 | | /// \endcode |
3656 | | static void emitScanBasedDirective( |
3657 | | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3658 | | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, |
3659 | | llvm::function_ref<void(CodeGenFunction &)> FirstGen, |
3660 | 0 | llvm::function_ref<void(CodeGenFunction &)> SecondGen) { |
3661 | 0 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3662 | 0 | NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); |
3663 | 0 | SmallVector<const Expr *, 4> Privates; |
3664 | 0 | SmallVector<const Expr *, 4> ReductionOps; |
3665 | 0 | SmallVector<const Expr *, 4> LHSs; |
3666 | 0 | SmallVector<const Expr *, 4> RHSs; |
3667 | 0 | SmallVector<const Expr *, 4> CopyArrayElems; |
3668 | 0 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3669 | 0 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3670 | 0 | "Only inscan reductions are expected."); |
3671 | 0 | Privates.append(C->privates().begin(), C->privates().end()); |
3672 | 0 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
3673 | 0 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
3674 | 0 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
3675 | 0 | CopyArrayElems.append(C->copy_array_elems().begin(), |
3676 | 0 | C->copy_array_elems().end()); |
3677 | 0 | } |
3678 | 0 | CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); |
3679 | 0 | { |
3680 | | // Emit loop with input phase: |
3681 | | // #pragma omp ... |
3682 | | // for (i: 0..<num_iters>) { |
3683 | | // <input phase>; |
3684 | | // buffer[i] = red; |
3685 | | // } |
3686 | 0 | CGF.OMPFirstScanLoop = true; |
3687 | 0 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3688 | 0 | FirstGen(CGF); |
3689 | 0 | } |
3690 | | // #pragma omp barrier // in parallel region |
3691 | 0 | auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, |
3692 | 0 | &ReductionOps, |
3693 | 0 | &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3694 | 0 | Action.Enter(CGF); |
3695 | | // Emit prefix reduction: |
3696 | | // #pragma omp master // in parallel region |
3697 | | // for (int k = 0; k <= ceil(log2(n)); ++k) |
3698 | 0 | llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); |
3699 | 0 | llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); |
3700 | 0 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); |
3701 | 0 | llvm::Function *F = |
3702 | 0 | CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); |
3703 | 0 | llvm::Value *Arg = |
3704 | 0 | CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); |
3705 | 0 | llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); |
3706 | 0 | F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); |
3707 | 0 | LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); |
3708 | 0 | LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); |
3709 | 0 | llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( |
3710 | 0 | OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); |
3711 | 0 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); |
3712 | 0 | CGF.EmitBlock(LoopBB); |
3713 | 0 | auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); |
3714 | | // size pow2k = 1; |
3715 | 0 | auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); |
3716 | 0 | Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); |
3717 | 0 | Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); |
3718 | | // for (size i = n - 1; i >= 2 ^ k; --i) |
3719 | | // tmp[i] op= tmp[i-pow2k]; |
3720 | 0 | llvm::BasicBlock *InnerLoopBB = |
3721 | 0 | CGF.createBasicBlock("omp.inner.log.scan.body"); |
3722 | 0 | llvm::BasicBlock *InnerExitBB = |
3723 | 0 | CGF.createBasicBlock("omp.inner.log.scan.exit"); |
3724 | 0 | llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); |
3725 | 0 | CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); |
3726 | 0 | CGF.EmitBlock(InnerLoopBB); |
3727 | 0 | auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); |
3728 | 0 | IVal->addIncoming(NMin1, LoopBB); |
3729 | 0 | { |
3730 | 0 | CodeGenFunction::OMPPrivateScope PrivScope(CGF); |
3731 | 0 | auto *ILHS = LHSs.begin(); |
3732 | 0 | auto *IRHS = RHSs.begin(); |
3733 | 0 | for (const Expr *CopyArrayElem : CopyArrayElems) { |
3734 | 0 | const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
3735 | 0 | const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
3736 | 0 | Address LHSAddr = Address::invalid(); |
3737 | 0 | { |
3738 | 0 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3739 | 0 | CGF, |
3740 | 0 | cast<OpaqueValueExpr>( |
3741 | 0 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
3742 | 0 | RValue::get(IVal)); |
3743 | 0 | LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); |
3744 | 0 | } |
3745 | 0 | PrivScope.addPrivate(LHSVD, LHSAddr); |
3746 | 0 | Address RHSAddr = Address::invalid(); |
3747 | 0 | { |
3748 | 0 | llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); |
3749 | 0 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3750 | 0 | CGF, |
3751 | 0 | cast<OpaqueValueExpr>( |
3752 | 0 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
3753 | 0 | RValue::get(OffsetIVal)); |
3754 | 0 | RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); |
3755 | 0 | } |
3756 | 0 | PrivScope.addPrivate(RHSVD, RHSAddr); |
3757 | 0 | ++ILHS; |
3758 | 0 | ++IRHS; |
3759 | 0 | } |
3760 | 0 | PrivScope.Privatize(); |
3761 | 0 | CGF.CGM.getOpenMPRuntime().emitReduction( |
3762 | 0 | CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, |
3763 | 0 | {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); |
3764 | 0 | } |
3765 | 0 | llvm::Value *NextIVal = |
3766 | 0 | CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); |
3767 | 0 | IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); |
3768 | 0 | CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); |
3769 | 0 | CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); |
3770 | 0 | CGF.EmitBlock(InnerExitBB); |
3771 | 0 | llvm::Value *Next = |
3772 | 0 | CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); |
3773 | 0 | Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); |
3774 | | // pow2k <<= 1; |
3775 | 0 | llvm::Value *NextPow2K = |
3776 | 0 | CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); |
3777 | 0 | Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); |
3778 | 0 | llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); |
3779 | 0 | CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); |
3780 | 0 | auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); |
3781 | 0 | CGF.EmitBlock(ExitBB); |
3782 | 0 | }; |
3783 | 0 | if (isOpenMPParallelDirective(S.getDirectiveKind())) { |
3784 | 0 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); |
3785 | 0 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
3786 | 0 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
3787 | 0 | /*ForceSimpleCall=*/true); |
3788 | 0 | } else { |
3789 | 0 | RegionCodeGenTy RCG(CodeGen); |
3790 | 0 | RCG(CGF); |
3791 | 0 | } |
3792 | |
|
3793 | 0 | CGF.OMPFirstScanLoop = false; |
3794 | 0 | SecondGen(CGF); |
3795 | 0 | } |
3796 | | |
3797 | | static bool emitWorksharingDirective(CodeGenFunction &CGF, |
3798 | | const OMPLoopDirective &S, |
3799 | 0 | bool HasCancel) { |
3800 | 0 | bool HasLastprivates; |
3801 | 0 | if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
3802 | 0 | [](const OMPReductionClause *C) { |
3803 | 0 | return C->getModifier() == OMPC_REDUCTION_inscan; |
3804 | 0 | })) { |
3805 | 0 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
3806 | 0 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3807 | 0 | OMPLoopScope LoopScope(CGF, S); |
3808 | 0 | return CGF.EmitScalarExpr(S.getNumIterations()); |
3809 | 0 | }; |
3810 | 0 | const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { |
3811 | 0 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3812 | 0 | CGF, S.getDirectiveKind(), HasCancel); |
3813 | 0 | (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3814 | 0 | emitForLoopBounds, |
3815 | 0 | emitDispatchForLoopBounds); |
3816 | | // Emit an implicit barrier at the end. |
3817 | 0 | CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), |
3818 | 0 | OMPD_for); |
3819 | 0 | }; |
3820 | 0 | const auto &&SecondGen = [&S, HasCancel, |
3821 | 0 | &HasLastprivates](CodeGenFunction &CGF) { |
3822 | 0 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3823 | 0 | CGF, S.getDirectiveKind(), HasCancel); |
3824 | 0 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3825 | 0 | emitForLoopBounds, |
3826 | 0 | emitDispatchForLoopBounds); |
3827 | 0 | }; |
3828 | 0 | if (!isOpenMPParallelDirective(S.getDirectiveKind())) |
3829 | 0 | emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); |
3830 | 0 | emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); |
3831 | 0 | if (!isOpenMPParallelDirective(S.getDirectiveKind())) |
3832 | 0 | emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); |
3833 | 0 | } else { |
3834 | 0 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3835 | 0 | HasCancel); |
3836 | 0 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3837 | 0 | emitForLoopBounds, |
3838 | 0 | emitDispatchForLoopBounds); |
3839 | 0 | } |
3840 | 0 | return HasLastprivates; |
3841 | 0 | } |
3842 | | |
3843 | 0 | static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { |
3844 | 0 | if (S.hasCancel()) |
3845 | 0 | return false; |
3846 | 0 | for (OMPClause *C : S.clauses()) { |
3847 | 0 | if (isa<OMPNowaitClause>(C)) |
3848 | 0 | continue; |
3849 | | |
3850 | 0 | if (auto *SC = dyn_cast<OMPScheduleClause>(C)) { |
3851 | 0 | if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
3852 | 0 | return false; |
3853 | 0 | if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
3854 | 0 | return false; |
3855 | 0 | switch (SC->getScheduleKind()) { |
3856 | 0 | case OMPC_SCHEDULE_auto: |
3857 | 0 | case OMPC_SCHEDULE_dynamic: |
3858 | 0 | case OMPC_SCHEDULE_runtime: |
3859 | 0 | case OMPC_SCHEDULE_guided: |
3860 | 0 | case OMPC_SCHEDULE_static: |
3861 | 0 | continue; |
3862 | 0 | case OMPC_SCHEDULE_unknown: |
3863 | 0 | return false; |
3864 | 0 | } |
3865 | 0 | } |
3866 | | |
3867 | 0 | return false; |
3868 | 0 | } |
3869 | | |
3870 | 0 | return true; |
3871 | 0 | } |
3872 | | |
3873 | | static llvm::omp::ScheduleKind |
3874 | 0 | convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { |
3875 | 0 | switch (ScheduleClauseKind) { |
3876 | 0 | case OMPC_SCHEDULE_unknown: |
3877 | 0 | return llvm::omp::OMP_SCHEDULE_Default; |
3878 | 0 | case OMPC_SCHEDULE_auto: |
3879 | 0 | return llvm::omp::OMP_SCHEDULE_Auto; |
3880 | 0 | case OMPC_SCHEDULE_dynamic: |
3881 | 0 | return llvm::omp::OMP_SCHEDULE_Dynamic; |
3882 | 0 | case OMPC_SCHEDULE_guided: |
3883 | 0 | return llvm::omp::OMP_SCHEDULE_Guided; |
3884 | 0 | case OMPC_SCHEDULE_runtime: |
3885 | 0 | return llvm::omp::OMP_SCHEDULE_Runtime; |
3886 | 0 | case OMPC_SCHEDULE_static: |
3887 | 0 | return llvm::omp::OMP_SCHEDULE_Static; |
3888 | 0 | } |
3889 | 0 | llvm_unreachable("Unhandled schedule kind"); |
3890 | 0 | } |
3891 | | |
3892 | 0 | void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { |
3893 | 0 | bool HasLastprivates = false; |
3894 | 0 | bool UseOMPIRBuilder = |
3895 | 0 | CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); |
3896 | 0 | auto &&CodeGen = [this, &S, &HasLastprivates, |
3897 | 0 | UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { |
3898 | | // Use the OpenMPIRBuilder if enabled. |
3899 | 0 | if (UseOMPIRBuilder) { |
3900 | 0 | bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); |
3901 | |
|
3902 | 0 | llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; |
3903 | 0 | llvm::Value *ChunkSize = nullptr; |
3904 | 0 | if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { |
3905 | 0 | SchedKind = |
3906 | 0 | convertClauseKindToSchedKind(SchedClause->getScheduleKind()); |
3907 | 0 | if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) |
3908 | 0 | ChunkSize = EmitScalarExpr(ChunkSizeExpr); |
3909 | 0 | } |
3910 | | |
3911 | | // Emit the associated statement and get its loop representation. |
3912 | 0 | const Stmt *Inner = S.getRawStmt(); |
3913 | 0 | llvm::CanonicalLoopInfo *CLI = |
3914 | 0 | EmitOMPCollapsedCanonicalLoopNest(Inner, 1); |
3915 | |
|
3916 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = |
3917 | 0 | CGM.getOpenMPRuntime().getOMPBuilder(); |
3918 | 0 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
3919 | 0 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
3920 | 0 | OMPBuilder.applyWorkshareLoop( |
3921 | 0 | Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, |
3922 | 0 | SchedKind, ChunkSize, /*HasSimdModifier=*/false, |
3923 | 0 | /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, |
3924 | 0 | /*HasOrderedClause=*/false); |
3925 | 0 | return; |
3926 | 0 | } |
3927 | | |
3928 | 0 | HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); |
3929 | 0 | }; |
3930 | 0 | { |
3931 | 0 | auto LPCRegion = |
3932 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3933 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3934 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, |
3935 | 0 | S.hasCancel()); |
3936 | 0 | } |
3937 | |
|
3938 | 0 | if (!UseOMPIRBuilder) { |
3939 | | // Emit an implicit barrier at the end. |
3940 | 0 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
3941 | 0 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); |
3942 | 0 | } |
3943 | | // Check for outer lastprivate conditional update. |
3944 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
3945 | 0 | } |
3946 | | |
3947 | 0 | void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { |
3948 | 0 | bool HasLastprivates = false; |
3949 | 0 | auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, |
3950 | 0 | PrePostActionTy &) { |
3951 | 0 | HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
3952 | 0 | }; |
3953 | 0 | { |
3954 | 0 | auto LPCRegion = |
3955 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3956 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3957 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
3958 | 0 | } |
3959 | | |
3960 | | // Emit an implicit barrier at the end. |
3961 | 0 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
3962 | 0 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); |
3963 | | // Check for outer lastprivate conditional update. |
3964 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
3965 | 0 | } |
3966 | | |
3967 | | static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, |
3968 | | const Twine &Name, |
3969 | 0 | llvm::Value *Init = nullptr) { |
3970 | 0 | LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); |
3971 | 0 | if (Init) |
3972 | 0 | CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); |
3973 | 0 | return LVal; |
3974 | 0 | } |
3975 | | |
3976 | 0 | void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { |
3977 | 0 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
3978 | 0 | const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); |
3979 | 0 | bool HasLastprivates = false; |
3980 | 0 | auto &&CodeGen = [&S, CapturedStmt, CS, |
3981 | 0 | &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { |
3982 | 0 | const ASTContext &C = CGF.getContext(); |
3983 | 0 | QualType KmpInt32Ty = |
3984 | 0 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
3985 | | // Emit helper vars inits. |
3986 | 0 | LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", |
3987 | 0 | CGF.Builder.getInt32(0)); |
3988 | 0 | llvm::ConstantInt *GlobalUBVal = CS != nullptr |
3989 | 0 | ? CGF.Builder.getInt32(CS->size() - 1) |
3990 | 0 | : CGF.Builder.getInt32(0); |
3991 | 0 | LValue UB = |
3992 | 0 | createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); |
3993 | 0 | LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", |
3994 | 0 | CGF.Builder.getInt32(1)); |
3995 | 0 | LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", |
3996 | 0 | CGF.Builder.getInt32(0)); |
3997 | | // Loop counter. |
3998 | 0 | LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); |
3999 | 0 | OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4000 | 0 | CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); |
4001 | 0 | OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4002 | 0 | CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); |
4003 | | // Generate condition for loop. |
4004 | 0 | BinaryOperator *Cond = BinaryOperator::Create( |
4005 | 0 | C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, |
4006 | 0 | S.getBeginLoc(), FPOptionsOverride()); |
4007 | | // Increment for loop counter. |
4008 | 0 | UnaryOperator *Inc = UnaryOperator::Create( |
4009 | 0 | C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, |
4010 | 0 | S.getBeginLoc(), true, FPOptionsOverride()); |
4011 | 0 | auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { |
4012 | | // Iterate through all sections and emit a switch construct: |
4013 | | // switch (IV) { |
4014 | | // case 0: |
4015 | | // <SectionStmt[0]>; |
4016 | | // break; |
4017 | | // ... |
4018 | | // case <NumSection> - 1: |
4019 | | // <SectionStmt[<NumSection> - 1]>; |
4020 | | // break; |
4021 | | // } |
4022 | | // .omp.sections.exit: |
4023 | 0 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); |
4024 | 0 | llvm::SwitchInst *SwitchStmt = |
4025 | 0 | CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), |
4026 | 0 | ExitBB, CS == nullptr ? 1 : CS->size()); |
4027 | 0 | if (CS) { |
4028 | 0 | unsigned CaseNumber = 0; |
4029 | 0 | for (const Stmt *SubStmt : CS->children()) { |
4030 | 0 | auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); |
4031 | 0 | CGF.EmitBlock(CaseBB); |
4032 | 0 | SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); |
4033 | 0 | CGF.EmitStmt(SubStmt); |
4034 | 0 | CGF.EmitBranch(ExitBB); |
4035 | 0 | ++CaseNumber; |
4036 | 0 | } |
4037 | 0 | } else { |
4038 | 0 | llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); |
4039 | 0 | CGF.EmitBlock(CaseBB); |
4040 | 0 | SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); |
4041 | 0 | CGF.EmitStmt(CapturedStmt); |
4042 | 0 | CGF.EmitBranch(ExitBB); |
4043 | 0 | } |
4044 | 0 | CGF.EmitBlock(ExitBB, /*IsFinished=*/true); |
4045 | 0 | }; |
4046 | |
|
4047 | 0 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
4048 | 0 | if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { |
4049 | | // Emit implicit barrier to synchronize threads and avoid data races on |
4050 | | // initialization of firstprivate variables and post-update of lastprivate |
4051 | | // variables. |
4052 | 0 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
4053 | 0 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
4054 | 0 | /*ForceSimpleCall=*/true); |
4055 | 0 | } |
4056 | 0 | CGF.EmitOMPPrivateClause(S, LoopScope); |
4057 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); |
4058 | 0 | HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
4059 | 0 | CGF.EmitOMPReductionClauseInit(S, LoopScope); |
4060 | 0 | (void)LoopScope.Privatize(); |
4061 | 0 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
4062 | 0 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
4063 | | |
4064 | | // Emit static non-chunked loop. |
4065 | 0 | OpenMPScheduleTy ScheduleKind; |
4066 | 0 | ScheduleKind.Schedule = OMPC_SCHEDULE_static; |
4067 | 0 | CGOpenMPRuntime::StaticRTInput StaticInit( |
4068 | 0 | /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), |
4069 | 0 | LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); |
4070 | 0 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
4071 | 0 | CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); |
4072 | | // UB = min(UB, GlobalUB); |
4073 | 0 | llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); |
4074 | 0 | llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( |
4075 | 0 | CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); |
4076 | 0 | CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); |
4077 | | // IV = LB; |
4078 | 0 | CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); |
4079 | | // while (idx <= UB) { BODY; ++idx; } |
4080 | 0 | CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, |
4081 | 0 | [](CodeGenFunction &) {}); |
4082 | | // Tell the runtime we are done. |
4083 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
4084 | 0 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
4085 | 0 | S.getDirectiveKind()); |
4086 | 0 | }; |
4087 | 0 | CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); |
4088 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4089 | | // Emit post-update of the reduction variables if IsLastIter != 0. |
4090 | 0 | emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { |
4091 | 0 | return CGF.Builder.CreateIsNotNull( |
4092 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
4093 | 0 | }); |
4094 | | |
4095 | | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
4096 | 0 | if (HasLastprivates) |
4097 | 0 | CGF.EmitOMPLastprivateClauseFinal( |
4098 | 0 | S, /*NoFinals=*/false, |
4099 | 0 | CGF.Builder.CreateIsNotNull( |
4100 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); |
4101 | 0 | }; |
4102 | |
|
4103 | 0 | bool HasCancel = false; |
4104 | 0 | if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) |
4105 | 0 | HasCancel = OSD->hasCancel(); |
4106 | 0 | else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) |
4107 | 0 | HasCancel = OPSD->hasCancel(); |
4108 | 0 | OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); |
4109 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, |
4110 | 0 | HasCancel); |
4111 | | // Emit barrier for lastprivates only if 'sections' directive has 'nowait' |
4112 | | // clause. Otherwise the barrier will be generated by the codegen for the |
4113 | | // directive. |
4114 | 0 | if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { |
4115 | | // Emit implicit barrier to synchronize threads and avoid data races on |
4116 | | // initialization of firstprivate variables. |
4117 | 0 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), |
4118 | 0 | OMPD_unknown); |
4119 | 0 | } |
4120 | 0 | } |
4121 | | |
4122 | 0 | void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { |
4123 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4124 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4125 | 0 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4126 | 0 | using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; |
4127 | |
|
4128 | 0 | auto FiniCB = [this](InsertPointTy IP) { |
4129 | 0 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4130 | 0 | }; |
4131 | |
|
4132 | 0 | const CapturedStmt *ICS = S.getInnermostCapturedStmt(); |
4133 | 0 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
4134 | 0 | const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); |
4135 | 0 | llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; |
4136 | 0 | if (CS) { |
4137 | 0 | for (const Stmt *SubStmt : CS->children()) { |
4138 | 0 | auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, |
4139 | 0 | InsertPointTy CodeGenIP) { |
4140 | 0 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4141 | 0 | *this, SubStmt, AllocaIP, CodeGenIP, "section"); |
4142 | 0 | }; |
4143 | 0 | SectionCBVector.push_back(SectionCB); |
4144 | 0 | } |
4145 | 0 | } else { |
4146 | 0 | auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, |
4147 | 0 | InsertPointTy CodeGenIP) { |
4148 | 0 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4149 | 0 | *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); |
4150 | 0 | }; |
4151 | 0 | SectionCBVector.push_back(SectionCB); |
4152 | 0 | } |
4153 | | |
4154 | | // Privatization callback that performs appropriate action for |
4155 | | // shared/private/firstprivate/lastprivate/copyin/... variables. |
4156 | | // |
4157 | | // TODO: This defaults to shared right now. |
4158 | 0 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
4159 | 0 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
4160 | | // The next line is appropriate only for variables (Val) with the |
4161 | | // data-sharing attribute "shared". |
4162 | 0 | ReplVal = &Val; |
4163 | |
|
4164 | 0 | return CodeGenIP; |
4165 | 0 | }; |
4166 | |
|
4167 | 0 | CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); |
4168 | 0 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
4169 | 0 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
4170 | 0 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
4171 | 0 | Builder.restoreIP(OMPBuilder.createSections( |
4172 | 0 | Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), |
4173 | 0 | S.getSingleClause<OMPNowaitClause>())); |
4174 | 0 | return; |
4175 | 0 | } |
4176 | 0 | { |
4177 | 0 | auto LPCRegion = |
4178 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4179 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4180 | 0 | EmitSections(S); |
4181 | 0 | } |
4182 | | // Emit an implicit barrier at the end. |
4183 | 0 | if (!S.getSingleClause<OMPNowaitClause>()) { |
4184 | 0 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), |
4185 | 0 | OMPD_sections); |
4186 | 0 | } |
4187 | | // Check for outer lastprivate conditional update. |
4188 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
4189 | 0 | } |
4190 | | |
4191 | 0 | void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { |
4192 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4193 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4194 | 0 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4195 | |
|
4196 | 0 | const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); |
4197 | 0 | auto FiniCB = [this](InsertPointTy IP) { |
4198 | 0 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4199 | 0 | }; |
4200 | |
|
4201 | 0 | auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, |
4202 | 0 | InsertPointTy CodeGenIP) { |
4203 | 0 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4204 | 0 | *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); |
4205 | 0 | }; |
4206 | |
|
4207 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
4208 | 0 | EmitStopPoint(&S); |
4209 | 0 | Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); |
4210 | |
|
4211 | 0 | return; |
4212 | 0 | } |
4213 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
4214 | 0 | EmitStopPoint(&S); |
4215 | 0 | EmitStmt(S.getAssociatedStmt()); |
4216 | 0 | } |
4217 | | |
4218 | 0 | void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { |
4219 | 0 | llvm::SmallVector<const Expr *, 8> CopyprivateVars; |
4220 | 0 | llvm::SmallVector<const Expr *, 8> DestExprs; |
4221 | 0 | llvm::SmallVector<const Expr *, 8> SrcExprs; |
4222 | 0 | llvm::SmallVector<const Expr *, 8> AssignmentOps; |
4223 | | // Check if there are any 'copyprivate' clauses associated with this |
4224 | | // 'single' construct. |
4225 | | // Build a list of copyprivate variables along with helper expressions |
4226 | | // (<source>, <destination>, <destination>=<source> expressions) |
4227 | 0 | for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { |
4228 | 0 | CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); |
4229 | 0 | DestExprs.append(C->destination_exprs().begin(), |
4230 | 0 | C->destination_exprs().end()); |
4231 | 0 | SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); |
4232 | 0 | AssignmentOps.append(C->assignment_ops().begin(), |
4233 | 0 | C->assignment_ops().end()); |
4234 | 0 | } |
4235 | | // Emit code for 'single' region along with 'copyprivate' clauses |
4236 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4237 | 0 | Action.Enter(CGF); |
4238 | 0 | OMPPrivateScope SingleScope(CGF); |
4239 | 0 | (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); |
4240 | 0 | CGF.EmitOMPPrivateClause(S, SingleScope); |
4241 | 0 | (void)SingleScope.Privatize(); |
4242 | 0 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
4243 | 0 | }; |
4244 | 0 | { |
4245 | 0 | auto LPCRegion = |
4246 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4247 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4248 | 0 | CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), |
4249 | 0 | CopyprivateVars, DestExprs, |
4250 | 0 | SrcExprs, AssignmentOps); |
4251 | 0 | } |
4252 | | // Emit an implicit barrier at the end (to avoid data race on firstprivate |
4253 | | // init or if no 'nowait' clause was specified and no 'copyprivate' clause). |
4254 | 0 | if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { |
4255 | 0 | CGM.getOpenMPRuntime().emitBarrierCall( |
4256 | 0 | *this, S.getBeginLoc(), |
4257 | 0 | S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); |
4258 | 0 | } |
4259 | | // Check for outer lastprivate conditional update. |
4260 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
4261 | 0 | } |
4262 | | |
4263 | 0 | static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4264 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4265 | 0 | Action.Enter(CGF); |
4266 | 0 | CGF.EmitStmt(S.getRawStmt()); |
4267 | 0 | }; |
4268 | 0 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); |
4269 | 0 | } |
4270 | | |
4271 | 0 | void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { |
4272 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4273 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4274 | 0 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4275 | |
|
4276 | 0 | const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); |
4277 | |
|
4278 | 0 | auto FiniCB = [this](InsertPointTy IP) { |
4279 | 0 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4280 | 0 | }; |
4281 | |
|
4282 | 0 | auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, |
4283 | 0 | InsertPointTy CodeGenIP) { |
4284 | 0 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4285 | 0 | *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); |
4286 | 0 | }; |
4287 | |
|
4288 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
4289 | 0 | EmitStopPoint(&S); |
4290 | 0 | Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); |
4291 | |
|
4292 | 0 | return; |
4293 | 0 | } |
4294 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
4295 | 0 | EmitStopPoint(&S); |
4296 | 0 | emitMaster(*this, S); |
4297 | 0 | } |
4298 | | |
4299 | 0 | static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4300 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4301 | 0 | Action.Enter(CGF); |
4302 | 0 | CGF.EmitStmt(S.getRawStmt()); |
4303 | 0 | }; |
4304 | 0 | Expr *Filter = nullptr; |
4305 | 0 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4306 | 0 | Filter = FilterClause->getThreadID(); |
4307 | 0 | CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), |
4308 | 0 | Filter); |
4309 | 0 | } |
4310 | | |
4311 | 0 | void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { |
4312 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4313 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4314 | 0 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4315 | |
|
4316 | 0 | const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); |
4317 | 0 | const Expr *Filter = nullptr; |
4318 | 0 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4319 | 0 | Filter = FilterClause->getThreadID(); |
4320 | 0 | llvm::Value *FilterVal = Filter |
4321 | 0 | ? EmitScalarExpr(Filter, CGM.Int32Ty) |
4322 | 0 | : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); |
4323 | |
|
4324 | 0 | auto FiniCB = [this](InsertPointTy IP) { |
4325 | 0 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4326 | 0 | }; |
4327 | |
|
4328 | 0 | auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, |
4329 | 0 | InsertPointTy CodeGenIP) { |
4330 | 0 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4331 | 0 | *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); |
4332 | 0 | }; |
4333 | |
|
4334 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
4335 | 0 | EmitStopPoint(&S); |
4336 | 0 | Builder.restoreIP( |
4337 | 0 | OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); |
4338 | |
|
4339 | 0 | return; |
4340 | 0 | } |
4341 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
4342 | 0 | EmitStopPoint(&S); |
4343 | 0 | emitMasked(*this, S); |
4344 | 0 | } |
4345 | | |
4346 | 0 | void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { |
4347 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4348 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4349 | 0 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4350 | |
|
4351 | 0 | const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); |
4352 | 0 | const Expr *Hint = nullptr; |
4353 | 0 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4354 | 0 | Hint = HintClause->getHint(); |
4355 | | |
4356 | | // TODO: This is slightly different from what's currently being done in |
4357 | | // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything |
4358 | | // about typing is final. |
4359 | 0 | llvm::Value *HintInst = nullptr; |
4360 | 0 | if (Hint) |
4361 | 0 | HintInst = |
4362 | 0 | Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); |
4363 | |
|
4364 | 0 | auto FiniCB = [this](InsertPointTy IP) { |
4365 | 0 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4366 | 0 | }; |
4367 | |
|
4368 | 0 | auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, |
4369 | 0 | InsertPointTy CodeGenIP) { |
4370 | 0 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4371 | 0 | *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); |
4372 | 0 | }; |
4373 | |
|
4374 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
4375 | 0 | EmitStopPoint(&S); |
4376 | 0 | Builder.restoreIP(OMPBuilder.createCritical( |
4377 | 0 | Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), |
4378 | 0 | HintInst)); |
4379 | |
|
4380 | 0 | return; |
4381 | 0 | } |
4382 | | |
4383 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4384 | 0 | Action.Enter(CGF); |
4385 | 0 | CGF.EmitStmt(S.getAssociatedStmt()); |
4386 | 0 | }; |
4387 | 0 | const Expr *Hint = nullptr; |
4388 | 0 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4389 | 0 | Hint = HintClause->getHint(); |
4390 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
4391 | 0 | EmitStopPoint(&S); |
4392 | 0 | CGM.getOpenMPRuntime().emitCriticalRegion(*this, |
4393 | 0 | S.getDirectiveName().getAsString(), |
4394 | 0 | CodeGen, S.getBeginLoc(), Hint); |
4395 | 0 | } |
4396 | | |
4397 | | void CodeGenFunction::EmitOMPParallelForDirective( |
4398 | 0 | const OMPParallelForDirective &S) { |
4399 | | // Emit directive as a combined directive that consists of two implicit |
4400 | | // directives: 'parallel' with 'for' directive. |
4401 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4402 | 0 | Action.Enter(CGF); |
4403 | 0 | emitOMPCopyinClause(CGF, S); |
4404 | 0 | (void)emitWorksharingDirective(CGF, S, S.hasCancel()); |
4405 | 0 | }; |
4406 | 0 | { |
4407 | 0 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4408 | 0 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4409 | 0 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4410 | 0 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4411 | 0 | OMPLoopScope LoopScope(CGF, S); |
4412 | 0 | return CGF.EmitScalarExpr(S.getNumIterations()); |
4413 | 0 | }; |
4414 | 0 | bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
4415 | 0 | [](const OMPReductionClause *C) { |
4416 | 0 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4417 | 0 | }); |
4418 | 0 | if (IsInscan) |
4419 | 0 | emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); |
4420 | 0 | auto LPCRegion = |
4421 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4422 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, |
4423 | 0 | emitEmptyBoundParameters); |
4424 | 0 | if (IsInscan) |
4425 | 0 | emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); |
4426 | 0 | } |
4427 | | // Check for outer lastprivate conditional update. |
4428 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
4429 | 0 | } |
4430 | | |
4431 | | void CodeGenFunction::EmitOMPParallelForSimdDirective( |
4432 | 0 | const OMPParallelForSimdDirective &S) { |
4433 | | // Emit directive as a combined directive that consists of two implicit |
4434 | | // directives: 'parallel' with 'for' directive. |
4435 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4436 | 0 | Action.Enter(CGF); |
4437 | 0 | emitOMPCopyinClause(CGF, S); |
4438 | 0 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
4439 | 0 | }; |
4440 | 0 | { |
4441 | 0 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4442 | 0 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4443 | 0 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4444 | 0 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4445 | 0 | OMPLoopScope LoopScope(CGF, S); |
4446 | 0 | return CGF.EmitScalarExpr(S.getNumIterations()); |
4447 | 0 | }; |
4448 | 0 | bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
4449 | 0 | [](const OMPReductionClause *C) { |
4450 | 0 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4451 | 0 | }); |
4452 | 0 | if (IsInscan) |
4453 | 0 | emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); |
4454 | 0 | auto LPCRegion = |
4455 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4456 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, |
4457 | 0 | emitEmptyBoundParameters); |
4458 | 0 | if (IsInscan) |
4459 | 0 | emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); |
4460 | 0 | } |
4461 | | // Check for outer lastprivate conditional update. |
4462 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
4463 | 0 | } |
4464 | | |
4465 | | void CodeGenFunction::EmitOMPParallelMasterDirective( |
4466 | 0 | const OMPParallelMasterDirective &S) { |
4467 | | // Emit directive as a combined directive that consists of two implicit |
4468 | | // directives: 'parallel' with 'master' directive. |
4469 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4470 | 0 | Action.Enter(CGF); |
4471 | 0 | OMPPrivateScope PrivateScope(CGF); |
4472 | 0 | emitOMPCopyinClause(CGF, S); |
4473 | 0 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
4474 | 0 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
4475 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
4476 | 0 | (void)PrivateScope.Privatize(); |
4477 | 0 | emitMaster(CGF, S); |
4478 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4479 | 0 | }; |
4480 | 0 | { |
4481 | 0 | auto LPCRegion = |
4482 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4483 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, |
4484 | 0 | emitEmptyBoundParameters); |
4485 | 0 | emitPostUpdateForReductionClause(*this, S, |
4486 | 0 | [](CodeGenFunction &) { return nullptr; }); |
4487 | 0 | } |
4488 | | // Check for outer lastprivate conditional update. |
4489 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
4490 | 0 | } |
4491 | | |
4492 | | void CodeGenFunction::EmitOMPParallelMaskedDirective( |
4493 | 0 | const OMPParallelMaskedDirective &S) { |
4494 | | // Emit directive as a combined directive that consists of two implicit |
4495 | | // directives: 'parallel' with 'masked' directive. |
4496 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4497 | 0 | Action.Enter(CGF); |
4498 | 0 | OMPPrivateScope PrivateScope(CGF); |
4499 | 0 | emitOMPCopyinClause(CGF, S); |
4500 | 0 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
4501 | 0 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
4502 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
4503 | 0 | (void)PrivateScope.Privatize(); |
4504 | 0 | emitMasked(CGF, S); |
4505 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4506 | 0 | }; |
4507 | 0 | { |
4508 | 0 | auto LPCRegion = |
4509 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4510 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, |
4511 | 0 | emitEmptyBoundParameters); |
4512 | 0 | emitPostUpdateForReductionClause(*this, S, |
4513 | 0 | [](CodeGenFunction &) { return nullptr; }); |
4514 | 0 | } |
4515 | | // Check for outer lastprivate conditional update. |
4516 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
4517 | 0 | } |
4518 | | |
4519 | | void CodeGenFunction::EmitOMPParallelSectionsDirective( |
4520 | 0 | const OMPParallelSectionsDirective &S) { |
4521 | | // Emit directive as a combined directive that consists of two implicit |
4522 | | // directives: 'parallel' with 'sections' directive. |
4523 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4524 | 0 | Action.Enter(CGF); |
4525 | 0 | emitOMPCopyinClause(CGF, S); |
4526 | 0 | CGF.EmitSections(S); |
4527 | 0 | }; |
4528 | 0 | { |
4529 | 0 | auto LPCRegion = |
4530 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4531 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, |
4532 | 0 | emitEmptyBoundParameters); |
4533 | 0 | } |
4534 | | // Check for outer lastprivate conditional update. |
4535 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
4536 | 0 | } |
4537 | | |
4538 | | namespace { |
4539 | | /// Get the list of variables declared in the context of the untied tasks. |
4540 | | class CheckVarsEscapingUntiedTaskDeclContext final |
4541 | | : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { |
4542 | | llvm::SmallVector<const VarDecl *, 4> PrivateDecls; |
4543 | | |
4544 | | public: |
4545 | 0 | explicit CheckVarsEscapingUntiedTaskDeclContext() = default; |
4546 | 0 | virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; |
4547 | 0 | void VisitDeclStmt(const DeclStmt *S) { |
4548 | 0 | if (!S) |
4549 | 0 | return; |
4550 | | // Need to privatize only local vars, static locals can be processed as is. |
4551 | 0 | for (const Decl *D : S->decls()) { |
4552 | 0 | if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) |
4553 | 0 | if (VD->hasLocalStorage()) |
4554 | 0 | PrivateDecls.push_back(VD); |
4555 | 0 | } |
4556 | 0 | } |
4557 | 0 | void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} |
4558 | 0 | void VisitCapturedStmt(const CapturedStmt *) {} |
4559 | 0 | void VisitLambdaExpr(const LambdaExpr *) {} |
4560 | 0 | void VisitBlockExpr(const BlockExpr *) {} |
4561 | 0 | void VisitStmt(const Stmt *S) { |
4562 | 0 | if (!S) |
4563 | 0 | return; |
4564 | 0 | for (const Stmt *Child : S->children()) |
4565 | 0 | if (Child) |
4566 | 0 | Visit(Child); |
4567 | 0 | } |
4568 | | |
4569 | | /// Swaps list of vars with the provided one. |
4570 | 0 | ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } |
4571 | | }; |
4572 | | } // anonymous namespace |
4573 | | |
4574 | | static void buildDependences(const OMPExecutableDirective &S, |
4575 | 0 | OMPTaskDataTy &Data) { |
4576 | | |
4577 | | // First look for 'omp_all_memory' and add this first. |
4578 | 0 | bool OmpAllMemory = false; |
4579 | 0 | if (llvm::any_of( |
4580 | 0 | S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) { |
4581 | 0 | return C->getDependencyKind() == OMPC_DEPEND_outallmemory || |
4582 | 0 | C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; |
4583 | 0 | })) { |
4584 | 0 | OmpAllMemory = true; |
4585 | | // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are |
4586 | | // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to |
4587 | | // simplify. |
4588 | 0 | OMPTaskDataTy::DependData &DD = |
4589 | 0 | Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory, |
4590 | 0 | /*IteratorExpr=*/nullptr); |
4591 | | // Add a nullptr Expr to simplify the codegen in emitDependData. |
4592 | 0 | DD.DepExprs.push_back(nullptr); |
4593 | 0 | } |
4594 | | // Add remaining dependences skipping any 'out' or 'inout' if they are |
4595 | | // overridden by 'omp_all_memory'. |
4596 | 0 | for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
4597 | 0 | OpenMPDependClauseKind Kind = C->getDependencyKind(); |
4598 | 0 | if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) |
4599 | 0 | continue; |
4600 | 0 | if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) |
4601 | 0 | continue; |
4602 | 0 | OMPTaskDataTy::DependData &DD = |
4603 | 0 | Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); |
4604 | 0 | DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); |
4605 | 0 | } |
4606 | 0 | } |
4607 | | |
4608 | | void CodeGenFunction::EmitOMPTaskBasedDirective( |
4609 | | const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, |
4610 | | const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, |
4611 | 0 | OMPTaskDataTy &Data) { |
4612 | | // Emit outlined function for task construct. |
4613 | 0 | const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); |
4614 | 0 | auto I = CS->getCapturedDecl()->param_begin(); |
4615 | 0 | auto PartId = std::next(I); |
4616 | 0 | auto TaskT = std::next(I, 4); |
4617 | | // Check if the task is final |
4618 | 0 | if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { |
4619 | | // If the condition constant folds and can be elided, try to avoid emitting |
4620 | | // the condition and the dead arm of the if/else. |
4621 | 0 | const Expr *Cond = Clause->getCondition(); |
4622 | 0 | bool CondConstant; |
4623 | 0 | if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) |
4624 | 0 | Data.Final.setInt(CondConstant); |
4625 | 0 | else |
4626 | 0 | Data.Final.setPointer(EvaluateExprAsBool(Cond)); |
4627 | 0 | } else { |
4628 | | // By default the task is not final. |
4629 | 0 | Data.Final.setInt(/*IntVal=*/false); |
4630 | 0 | } |
4631 | | // Check if the task has 'priority' clause. |
4632 | 0 | if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { |
4633 | 0 | const Expr *Prio = Clause->getPriority(); |
4634 | 0 | Data.Priority.setInt(/*IntVal=*/true); |
4635 | 0 | Data.Priority.setPointer(EmitScalarConversion( |
4636 | 0 | EmitScalarExpr(Prio), Prio->getType(), |
4637 | 0 | getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), |
4638 | 0 | Prio->getExprLoc())); |
4639 | 0 | } |
4640 | | // The first function argument for tasks is a thread id, the second one is a |
4641 | | // part id (0 for tied tasks, >=0 for untied task). |
4642 | 0 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
4643 | | // Get list of private variables. |
4644 | 0 | for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { |
4645 | 0 | auto IRef = C->varlist_begin(); |
4646 | 0 | for (const Expr *IInit : C->private_copies()) { |
4647 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4648 | 0 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4649 | 0 | Data.PrivateVars.push_back(*IRef); |
4650 | 0 | Data.PrivateCopies.push_back(IInit); |
4651 | 0 | } |
4652 | 0 | ++IRef; |
4653 | 0 | } |
4654 | 0 | } |
4655 | 0 | EmittedAsPrivate.clear(); |
4656 | | // Get list of firstprivate variables. |
4657 | 0 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
4658 | 0 | auto IRef = C->varlist_begin(); |
4659 | 0 | auto IElemInitRef = C->inits().begin(); |
4660 | 0 | for (const Expr *IInit : C->private_copies()) { |
4661 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4662 | 0 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4663 | 0 | Data.FirstprivateVars.push_back(*IRef); |
4664 | 0 | Data.FirstprivateCopies.push_back(IInit); |
4665 | 0 | Data.FirstprivateInits.push_back(*IElemInitRef); |
4666 | 0 | } |
4667 | 0 | ++IRef; |
4668 | 0 | ++IElemInitRef; |
4669 | 0 | } |
4670 | 0 | } |
4671 | | // Get list of lastprivate variables (for taskloops). |
4672 | 0 | llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; |
4673 | 0 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
4674 | 0 | auto IRef = C->varlist_begin(); |
4675 | 0 | auto ID = C->destination_exprs().begin(); |
4676 | 0 | for (const Expr *IInit : C->private_copies()) { |
4677 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4678 | 0 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4679 | 0 | Data.LastprivateVars.push_back(*IRef); |
4680 | 0 | Data.LastprivateCopies.push_back(IInit); |
4681 | 0 | } |
4682 | 0 | LastprivateDstsOrigs.insert( |
4683 | 0 | std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), |
4684 | 0 | cast<DeclRefExpr>(*IRef))); |
4685 | 0 | ++IRef; |
4686 | 0 | ++ID; |
4687 | 0 | } |
4688 | 0 | } |
4689 | 0 | SmallVector<const Expr *, 4> LHSs; |
4690 | 0 | SmallVector<const Expr *, 4> RHSs; |
4691 | 0 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
4692 | 0 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
4693 | 0 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
4694 | 0 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
4695 | 0 | Data.ReductionOps.append(C->reduction_ops().begin(), |
4696 | 0 | C->reduction_ops().end()); |
4697 | 0 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
4698 | 0 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
4699 | 0 | } |
4700 | 0 | Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( |
4701 | 0 | *this, S.getBeginLoc(), LHSs, RHSs, Data); |
4702 | | // Build list of dependences. |
4703 | 0 | buildDependences(S, Data); |
4704 | | // Get list of local vars for untied tasks. |
4705 | 0 | if (!Data.Tied) { |
4706 | 0 | CheckVarsEscapingUntiedTaskDeclContext Checker; |
4707 | 0 | Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); |
4708 | 0 | Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), |
4709 | 0 | Checker.getPrivateDecls().end()); |
4710 | 0 | } |
4711 | 0 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, |
4712 | 0 | CapturedRegion](CodeGenFunction &CGF, |
4713 | 0 | PrePostActionTy &Action) { |
4714 | 0 | llvm::MapVector<CanonicalDeclPtr<const VarDecl>, |
4715 | 0 | std::pair<Address, Address>> |
4716 | 0 | UntiedLocalVars; |
4717 | | // Set proper addresses for generated private copies. |
4718 | 0 | OMPPrivateScope Scope(CGF); |
4719 | | // Generate debug info for variables present in shared clause. |
4720 | 0 | if (auto *DI = CGF.getDebugInfo()) { |
4721 | 0 | llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = |
4722 | 0 | CGF.CapturedStmtInfo->getCaptureFields(); |
4723 | 0 | llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); |
4724 | 0 | if (CaptureFields.size() && ContextValue) { |
4725 | 0 | unsigned CharWidth = CGF.getContext().getCharWidth(); |
4726 | | // The shared variables are packed together as members of structure. |
4727 | | // So the address of each shared variable can be computed by adding |
4728 | | // offset of it (within record) to the base address of record. For each |
4729 | | // shared variable, debug intrinsic llvm.dbg.declare is generated with |
4730 | | // appropriate expressions (DIExpression). |
4731 | | // Ex: |
4732 | | // %12 = load %struct.anon*, %struct.anon** %__context.addr.i |
4733 | | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4734 | | // metadata !svar1, |
4735 | | // metadata !DIExpression(DW_OP_deref)) |
4736 | | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4737 | | // metadata !svar2, |
4738 | | // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) |
4739 | 0 | for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { |
4740 | 0 | const VarDecl *SharedVar = It->first; |
4741 | 0 | RecordDecl *CaptureRecord = It->second->getParent(); |
4742 | 0 | const ASTRecordLayout &Layout = |
4743 | 0 | CGF.getContext().getASTRecordLayout(CaptureRecord); |
4744 | 0 | unsigned Offset = |
4745 | 0 | Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; |
4746 | 0 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4747 | 0 | (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, |
4748 | 0 | CGF.Builder, false); |
4749 | 0 | llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); |
4750 | | // Get the call dbg.declare instruction we just created and update |
4751 | | // its DIExpression to add offset to base address. |
4752 | 0 | if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) { |
4753 | 0 | SmallVector<uint64_t, 8> Ops; |
4754 | | // Add offset to the base address if non zero. |
4755 | 0 | if (Offset) { |
4756 | 0 | Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); |
4757 | 0 | Ops.push_back(Offset); |
4758 | 0 | } |
4759 | 0 | Ops.push_back(llvm::dwarf::DW_OP_deref); |
4760 | 0 | auto &Ctx = DDI->getContext(); |
4761 | 0 | llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops); |
4762 | 0 | Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr)); |
4763 | 0 | } |
4764 | 0 | } |
4765 | 0 | } |
4766 | 0 | } |
4767 | 0 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; |
4768 | 0 | if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || |
4769 | 0 | !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { |
4770 | 0 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
4771 | 0 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
4772 | 0 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); |
4773 | 0 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( |
4774 | 0 | CS->getCapturedDecl()->getParam(PrivatesParam))); |
4775 | | // Map privates. |
4776 | 0 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
4777 | 0 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
4778 | 0 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
4779 | 0 | CallArgs.push_back(PrivatesPtr); |
4780 | 0 | ParamTypes.push_back(PrivatesPtr->getType()); |
4781 | 0 | for (const Expr *E : Data.PrivateVars) { |
4782 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4783 | 0 | Address PrivatePtr = CGF.CreateMemTemp( |
4784 | 0 | CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); |
4785 | 0 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
4786 | 0 | CallArgs.push_back(PrivatePtr.getPointer()); |
4787 | 0 | ParamTypes.push_back(PrivatePtr.getType()); |
4788 | 0 | } |
4789 | 0 | for (const Expr *E : Data.FirstprivateVars) { |
4790 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4791 | 0 | Address PrivatePtr = |
4792 | 0 | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
4793 | 0 | ".firstpriv.ptr.addr"); |
4794 | 0 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
4795 | 0 | FirstprivatePtrs.emplace_back(VD, PrivatePtr); |
4796 | 0 | CallArgs.push_back(PrivatePtr.getPointer()); |
4797 | 0 | ParamTypes.push_back(PrivatePtr.getType()); |
4798 | 0 | } |
4799 | 0 | for (const Expr *E : Data.LastprivateVars) { |
4800 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4801 | 0 | Address PrivatePtr = |
4802 | 0 | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
4803 | 0 | ".lastpriv.ptr.addr"); |
4804 | 0 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
4805 | 0 | CallArgs.push_back(PrivatePtr.getPointer()); |
4806 | 0 | ParamTypes.push_back(PrivatePtr.getType()); |
4807 | 0 | } |
4808 | 0 | for (const VarDecl *VD : Data.PrivateLocals) { |
4809 | 0 | QualType Ty = VD->getType().getNonReferenceType(); |
4810 | 0 | if (VD->getType()->isLValueReferenceType()) |
4811 | 0 | Ty = CGF.getContext().getPointerType(Ty); |
4812 | 0 | if (isAllocatableDecl(VD)) |
4813 | 0 | Ty = CGF.getContext().getPointerType(Ty); |
4814 | 0 | Address PrivatePtr = CGF.CreateMemTemp( |
4815 | 0 | CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); |
4816 | 0 | auto Result = UntiedLocalVars.insert( |
4817 | 0 | std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); |
4818 | | // If key exists update in place. |
4819 | 0 | if (Result.second == false) |
4820 | 0 | *Result.first = std::make_pair( |
4821 | 0 | VD, std::make_pair(PrivatePtr, Address::invalid())); |
4822 | 0 | CallArgs.push_back(PrivatePtr.getPointer()); |
4823 | 0 | ParamTypes.push_back(PrivatePtr.getType()); |
4824 | 0 | } |
4825 | 0 | auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), |
4826 | 0 | ParamTypes, /*isVarArg=*/false); |
4827 | 0 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
4828 | 0 | CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); |
4829 | 0 | for (const auto &Pair : LastprivateDstsOrigs) { |
4830 | 0 | const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); |
4831 | 0 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), |
4832 | | /*RefersToEnclosingVariableOrCapture=*/ |
4833 | 0 | CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, |
4834 | 0 | Pair.second->getType(), VK_LValue, |
4835 | 0 | Pair.second->getExprLoc()); |
4836 | 0 | Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF)); |
4837 | 0 | } |
4838 | 0 | for (const auto &Pair : PrivatePtrs) { |
4839 | 0 | Address Replacement = Address( |
4840 | 0 | CGF.Builder.CreateLoad(Pair.second), |
4841 | 0 | CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), |
4842 | 0 | CGF.getContext().getDeclAlign(Pair.first)); |
4843 | 0 | Scope.addPrivate(Pair.first, Replacement); |
4844 | 0 | if (auto *DI = CGF.getDebugInfo()) |
4845 | 0 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4846 | 0 | (void)DI->EmitDeclareOfAutoVariable( |
4847 | 0 | Pair.first, Pair.second.getPointer(), CGF.Builder, |
4848 | 0 | /*UsePointerValue*/ true); |
4849 | 0 | } |
4850 | | // Adjust mapping for internal locals by mapping actual memory instead of |
4851 | | // a pointer to this memory. |
4852 | 0 | for (auto &Pair : UntiedLocalVars) { |
4853 | 0 | QualType VDType = Pair.first->getType().getNonReferenceType(); |
4854 | 0 | if (Pair.first->getType()->isLValueReferenceType()) |
4855 | 0 | VDType = CGF.getContext().getPointerType(VDType); |
4856 | 0 | if (isAllocatableDecl(Pair.first)) { |
4857 | 0 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); |
4858 | 0 | Address Replacement( |
4859 | 0 | Ptr, |
4860 | 0 | CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)), |
4861 | 0 | CGF.getPointerAlign()); |
4862 | 0 | Pair.second.first = Replacement; |
4863 | 0 | Ptr = CGF.Builder.CreateLoad(Replacement); |
4864 | 0 | Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType), |
4865 | 0 | CGF.getContext().getDeclAlign(Pair.first)); |
4866 | 0 | Pair.second.second = Replacement; |
4867 | 0 | } else { |
4868 | 0 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); |
4869 | 0 | Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType), |
4870 | 0 | CGF.getContext().getDeclAlign(Pair.first)); |
4871 | 0 | Pair.second.first = Replacement; |
4872 | 0 | } |
4873 | 0 | } |
4874 | 0 | } |
4875 | 0 | if (Data.Reductions) { |
4876 | 0 | OMPPrivateScope FirstprivateScope(CGF); |
4877 | 0 | for (const auto &Pair : FirstprivatePtrs) { |
4878 | 0 | Address Replacement( |
4879 | 0 | CGF.Builder.CreateLoad(Pair.second), |
4880 | 0 | CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), |
4881 | 0 | CGF.getContext().getDeclAlign(Pair.first)); |
4882 | 0 | FirstprivateScope.addPrivate(Pair.first, Replacement); |
4883 | 0 | } |
4884 | 0 | (void)FirstprivateScope.Privatize(); |
4885 | 0 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
4886 | 0 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
4887 | 0 | Data.ReductionCopies, Data.ReductionOps); |
4888 | 0 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
4889 | 0 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); |
4890 | 0 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
4891 | 0 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
4892 | 0 | RedCG.emitAggregateType(CGF, Cnt); |
4893 | | // FIXME: This must removed once the runtime library is fixed. |
4894 | | // Emit required threadprivate variables for |
4895 | | // initializer/combiner/finalizer. |
4896 | 0 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
4897 | 0 | RedCG, Cnt); |
4898 | 0 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4899 | 0 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
4900 | 0 | Replacement = |
4901 | 0 | Address(CGF.EmitScalarConversion( |
4902 | 0 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
4903 | 0 | CGF.getContext().getPointerType( |
4904 | 0 | Data.ReductionCopies[Cnt]->getType()), |
4905 | 0 | Data.ReductionCopies[Cnt]->getExprLoc()), |
4906 | 0 | CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), |
4907 | 0 | Replacement.getAlignment()); |
4908 | 0 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
4909 | 0 | Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); |
4910 | 0 | } |
4911 | 0 | } |
4912 | | // Privatize all private variables except for in_reduction items. |
4913 | 0 | (void)Scope.Privatize(); |
4914 | 0 | SmallVector<const Expr *, 4> InRedVars; |
4915 | 0 | SmallVector<const Expr *, 4> InRedPrivs; |
4916 | 0 | SmallVector<const Expr *, 4> InRedOps; |
4917 | 0 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
4918 | 0 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
4919 | 0 | auto IPriv = C->privates().begin(); |
4920 | 0 | auto IRed = C->reduction_ops().begin(); |
4921 | 0 | auto ITD = C->taskgroup_descriptors().begin(); |
4922 | 0 | for (const Expr *Ref : C->varlists()) { |
4923 | 0 | InRedVars.emplace_back(Ref); |
4924 | 0 | InRedPrivs.emplace_back(*IPriv); |
4925 | 0 | InRedOps.emplace_back(*IRed); |
4926 | 0 | TaskgroupDescriptors.emplace_back(*ITD); |
4927 | 0 | std::advance(IPriv, 1); |
4928 | 0 | std::advance(IRed, 1); |
4929 | 0 | std::advance(ITD, 1); |
4930 | 0 | } |
4931 | 0 | } |
4932 | | // Privatize in_reduction items here, because taskgroup descriptors must be |
4933 | | // privatized earlier. |
4934 | 0 | OMPPrivateScope InRedScope(CGF); |
4935 | 0 | if (!InRedVars.empty()) { |
4936 | 0 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
4937 | 0 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
4938 | 0 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
4939 | 0 | RedCG.emitAggregateType(CGF, Cnt); |
4940 | | // The taskgroup descriptor variable is always implicit firstprivate and |
4941 | | // privatized already during processing of the firstprivates. |
4942 | | // FIXME: This must removed once the runtime library is fixed. |
4943 | | // Emit required threadprivate variables for |
4944 | | // initializer/combiner/finalizer. |
4945 | 0 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
4946 | 0 | RedCG, Cnt); |
4947 | 0 | llvm::Value *ReductionsPtr; |
4948 | 0 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
4949 | 0 | ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), |
4950 | 0 | TRExpr->getExprLoc()); |
4951 | 0 | } else { |
4952 | 0 | ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4953 | 0 | } |
4954 | 0 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4955 | 0 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
4956 | 0 | Replacement = Address( |
4957 | 0 | CGF.EmitScalarConversion( |
4958 | 0 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
4959 | 0 | CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), |
4960 | 0 | InRedPrivs[Cnt]->getExprLoc()), |
4961 | 0 | CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), |
4962 | 0 | Replacement.getAlignment()); |
4963 | 0 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
4964 | 0 | InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); |
4965 | 0 | } |
4966 | 0 | } |
4967 | 0 | (void)InRedScope.Privatize(); |
4968 | |
|
4969 | 0 | CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, |
4970 | 0 | UntiedLocalVars); |
4971 | 0 | Action.Enter(CGF); |
4972 | 0 | BodyGen(CGF); |
4973 | 0 | }; |
4974 | 0 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
4975 | 0 | S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, |
4976 | 0 | Data.NumberOfParts); |
4977 | 0 | OMPLexicalScope Scope(*this, S, std::nullopt, |
4978 | 0 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
4979 | 0 | !isOpenMPSimdDirective(S.getDirectiveKind())); |
4980 | 0 | TaskGen(*this, OutlinedFn, Data); |
4981 | 0 | } |
4982 | | |
4983 | | static ImplicitParamDecl * |
4984 | | createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, |
4985 | | QualType Ty, CapturedDecl *CD, |
4986 | 0 | SourceLocation Loc) { |
4987 | 0 | auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, |
4988 | 0 | ImplicitParamKind::Other); |
4989 | 0 | auto *OrigRef = DeclRefExpr::Create( |
4990 | 0 | C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, |
4991 | 0 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); |
4992 | 0 | auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, |
4993 | 0 | ImplicitParamKind::Other); |
4994 | 0 | auto *PrivateRef = DeclRefExpr::Create( |
4995 | 0 | C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, |
4996 | 0 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); |
4997 | 0 | QualType ElemType = C.getBaseElementType(Ty); |
4998 | 0 | auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, |
4999 | 0 | ImplicitParamKind::Other); |
5000 | 0 | auto *InitRef = DeclRefExpr::Create( |
5001 | 0 | C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, |
5002 | 0 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); |
5003 | 0 | PrivateVD->setInitStyle(VarDecl::CInit); |
5004 | 0 | PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, |
5005 | 0 | InitRef, /*BasePath=*/nullptr, |
5006 | 0 | VK_PRValue, FPOptionsOverride())); |
5007 | 0 | Data.FirstprivateVars.emplace_back(OrigRef); |
5008 | 0 | Data.FirstprivateCopies.emplace_back(PrivateRef); |
5009 | 0 | Data.FirstprivateInits.emplace_back(InitRef); |
5010 | 0 | return OrigVD; |
5011 | 0 | } |
5012 | | |
5013 | | void CodeGenFunction::EmitOMPTargetTaskBasedDirective( |
5014 | | const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, |
5015 | 0 | OMPTargetDataInfo &InputInfo) { |
5016 | | // Emit outlined function for task construct. |
5017 | 0 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); |
5018 | 0 | Address CapturedStruct = GenerateCapturedStmtArgument(*CS); |
5019 | 0 | QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
5020 | 0 | auto I = CS->getCapturedDecl()->param_begin(); |
5021 | 0 | auto PartId = std::next(I); |
5022 | 0 | auto TaskT = std::next(I, 4); |
5023 | 0 | OMPTaskDataTy Data; |
5024 | | // The task is not final. |
5025 | 0 | Data.Final.setInt(/*IntVal=*/false); |
5026 | | // Get list of firstprivate variables. |
5027 | 0 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
5028 | 0 | auto IRef = C->varlist_begin(); |
5029 | 0 | auto IElemInitRef = C->inits().begin(); |
5030 | 0 | for (auto *IInit : C->private_copies()) { |
5031 | 0 | Data.FirstprivateVars.push_back(*IRef); |
5032 | 0 | Data.FirstprivateCopies.push_back(IInit); |
5033 | 0 | Data.FirstprivateInits.push_back(*IElemInitRef); |
5034 | 0 | ++IRef; |
5035 | 0 | ++IElemInitRef; |
5036 | 0 | } |
5037 | 0 | } |
5038 | 0 | SmallVector<const Expr *, 4> LHSs; |
5039 | 0 | SmallVector<const Expr *, 4> RHSs; |
5040 | 0 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5041 | 0 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
5042 | 0 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
5043 | 0 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
5044 | 0 | Data.ReductionOps.append(C->reduction_ops().begin(), |
5045 | 0 | C->reduction_ops().end()); |
5046 | 0 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
5047 | 0 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
5048 | 0 | } |
5049 | 0 | OMPPrivateScope TargetScope(*this); |
5050 | 0 | VarDecl *BPVD = nullptr; |
5051 | 0 | VarDecl *PVD = nullptr; |
5052 | 0 | VarDecl *SVD = nullptr; |
5053 | 0 | VarDecl *MVD = nullptr; |
5054 | 0 | if (InputInfo.NumberOfTargetItems > 0) { |
5055 | 0 | auto *CD = CapturedDecl::Create( |
5056 | 0 | getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); |
5057 | 0 | llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); |
5058 | 0 | QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( |
5059 | 0 | getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal, |
5060 | 0 | /*IndexTypeQuals=*/0); |
5061 | 0 | BPVD = createImplicitFirstprivateForType( |
5062 | 0 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5063 | 0 | PVD = createImplicitFirstprivateForType( |
5064 | 0 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5065 | 0 | QualType SizesType = getContext().getConstantArrayType( |
5066 | 0 | getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), |
5067 | 0 | ArrSize, nullptr, ArraySizeModifier::Normal, |
5068 | 0 | /*IndexTypeQuals=*/0); |
5069 | 0 | SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, |
5070 | 0 | S.getBeginLoc()); |
5071 | 0 | TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray); |
5072 | 0 | TargetScope.addPrivate(PVD, InputInfo.PointersArray); |
5073 | 0 | TargetScope.addPrivate(SVD, InputInfo.SizesArray); |
5074 | | // If there is no user-defined mapper, the mapper array will be nullptr. In |
5075 | | // this case, we don't need to privatize it. |
5076 | 0 | if (!isa_and_nonnull<llvm::ConstantPointerNull>( |
5077 | 0 | InputInfo.MappersArray.getPointer())) { |
5078 | 0 | MVD = createImplicitFirstprivateForType( |
5079 | 0 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5080 | 0 | TargetScope.addPrivate(MVD, InputInfo.MappersArray); |
5081 | 0 | } |
5082 | 0 | } |
5083 | 0 | (void)TargetScope.Privatize(); |
5084 | 0 | buildDependences(S, Data); |
5085 | 0 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, |
5086 | 0 | &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5087 | | // Set proper addresses for generated private copies. |
5088 | 0 | OMPPrivateScope Scope(CGF); |
5089 | 0 | if (!Data.FirstprivateVars.empty()) { |
5090 | 0 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
5091 | 0 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
5092 | 0 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); |
5093 | 0 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( |
5094 | 0 | CS->getCapturedDecl()->getParam(PrivatesParam))); |
5095 | | // Map privates. |
5096 | 0 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
5097 | 0 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
5098 | 0 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
5099 | 0 | CallArgs.push_back(PrivatesPtr); |
5100 | 0 | ParamTypes.push_back(PrivatesPtr->getType()); |
5101 | 0 | for (const Expr *E : Data.FirstprivateVars) { |
5102 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
5103 | 0 | Address PrivatePtr = |
5104 | 0 | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
5105 | 0 | ".firstpriv.ptr.addr"); |
5106 | 0 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
5107 | 0 | CallArgs.push_back(PrivatePtr.getPointer()); |
5108 | 0 | ParamTypes.push_back(PrivatePtr.getType()); |
5109 | 0 | } |
5110 | 0 | auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), |
5111 | 0 | ParamTypes, /*isVarArg=*/false); |
5112 | 0 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
5113 | 0 | CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); |
5114 | 0 | for (const auto &Pair : PrivatePtrs) { |
5115 | 0 | Address Replacement( |
5116 | 0 | CGF.Builder.CreateLoad(Pair.second), |
5117 | 0 | CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), |
5118 | 0 | CGF.getContext().getDeclAlign(Pair.first)); |
5119 | 0 | Scope.addPrivate(Pair.first, Replacement); |
5120 | 0 | } |
5121 | 0 | } |
5122 | 0 | CGF.processInReduction(S, Data, CGF, CS, Scope); |
5123 | 0 | if (InputInfo.NumberOfTargetItems > 0) { |
5124 | 0 | InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( |
5125 | 0 | CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); |
5126 | 0 | InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( |
5127 | 0 | CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); |
5128 | 0 | InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( |
5129 | 0 | CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); |
5130 | | // If MVD is nullptr, the mapper array is not privatized |
5131 | 0 | if (MVD) |
5132 | 0 | InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( |
5133 | 0 | CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); |
5134 | 0 | } |
5135 | |
|
5136 | 0 | Action.Enter(CGF); |
5137 | 0 | OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); |
5138 | 0 | auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
5139 | 0 | if (CGF.CGM.getLangOpts().OpenMP >= 51 && |
5140 | 0 | needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) { |
5141 | | // Emit __kmpc_set_thread_limit() to set the thread_limit for the task |
5142 | | // enclosing this target region. This will indirectly set the thread_limit |
5143 | | // for every applicable construct within target region. |
5144 | 0 | CGF.CGM.getOpenMPRuntime().emitThreadLimitClause( |
5145 | 0 | CGF, TL->getThreadLimit(), S.getBeginLoc()); |
5146 | 0 | } |
5147 | 0 | BodyGen(CGF); |
5148 | 0 | }; |
5149 | 0 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
5150 | 0 | S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, |
5151 | 0 | Data.NumberOfParts); |
5152 | 0 | llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); |
5153 | 0 | IntegerLiteral IfCond(getContext(), TrueOrFalse, |
5154 | 0 | getContext().getIntTypeForBitwidth(32, /*Signed=*/0), |
5155 | 0 | SourceLocation()); |
5156 | 0 | CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, |
5157 | 0 | SharedsTy, CapturedStruct, &IfCond, Data); |
5158 | 0 | } |
5159 | | |
5160 | | void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, |
5161 | | OMPTaskDataTy &Data, |
5162 | | CodeGenFunction &CGF, |
5163 | | const CapturedStmt *CS, |
5164 | 0 | OMPPrivateScope &Scope) { |
5165 | 0 | if (Data.Reductions) { |
5166 | 0 | OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind(); |
5167 | 0 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
5168 | 0 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
5169 | 0 | Data.ReductionCopies, Data.ReductionOps); |
5170 | 0 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
5171 | 0 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4))); |
5172 | 0 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
5173 | 0 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
5174 | 0 | RedCG.emitAggregateType(CGF, Cnt); |
5175 | | // FIXME: This must removed once the runtime library is fixed. |
5176 | | // Emit required threadprivate variables for |
5177 | | // initializer/combiner/finalizer. |
5178 | 0 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
5179 | 0 | RedCG, Cnt); |
5180 | 0 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5181 | 0 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
5182 | 0 | Replacement = |
5183 | 0 | Address(CGF.EmitScalarConversion( |
5184 | 0 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
5185 | 0 | CGF.getContext().getPointerType( |
5186 | 0 | Data.ReductionCopies[Cnt]->getType()), |
5187 | 0 | Data.ReductionCopies[Cnt]->getExprLoc()), |
5188 | 0 | CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), |
5189 | 0 | Replacement.getAlignment()); |
5190 | 0 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
5191 | 0 | Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); |
5192 | 0 | } |
5193 | 0 | } |
5194 | 0 | (void)Scope.Privatize(); |
5195 | 0 | SmallVector<const Expr *, 4> InRedVars; |
5196 | 0 | SmallVector<const Expr *, 4> InRedPrivs; |
5197 | 0 | SmallVector<const Expr *, 4> InRedOps; |
5198 | 0 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
5199 | 0 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5200 | 0 | auto IPriv = C->privates().begin(); |
5201 | 0 | auto IRed = C->reduction_ops().begin(); |
5202 | 0 | auto ITD = C->taskgroup_descriptors().begin(); |
5203 | 0 | for (const Expr *Ref : C->varlists()) { |
5204 | 0 | InRedVars.emplace_back(Ref); |
5205 | 0 | InRedPrivs.emplace_back(*IPriv); |
5206 | 0 | InRedOps.emplace_back(*IRed); |
5207 | 0 | TaskgroupDescriptors.emplace_back(*ITD); |
5208 | 0 | std::advance(IPriv, 1); |
5209 | 0 | std::advance(IRed, 1); |
5210 | 0 | std::advance(ITD, 1); |
5211 | 0 | } |
5212 | 0 | } |
5213 | 0 | OMPPrivateScope InRedScope(CGF); |
5214 | 0 | if (!InRedVars.empty()) { |
5215 | 0 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
5216 | 0 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
5217 | 0 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
5218 | 0 | RedCG.emitAggregateType(CGF, Cnt); |
5219 | | // FIXME: This must removed once the runtime library is fixed. |
5220 | | // Emit required threadprivate variables for |
5221 | | // initializer/combiner/finalizer. |
5222 | 0 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
5223 | 0 | RedCG, Cnt); |
5224 | 0 | llvm::Value *ReductionsPtr; |
5225 | 0 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
5226 | 0 | ReductionsPtr = |
5227 | 0 | CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc()); |
5228 | 0 | } else { |
5229 | 0 | ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
5230 | 0 | } |
5231 | 0 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5232 | 0 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
5233 | 0 | Replacement = Address( |
5234 | 0 | CGF.EmitScalarConversion( |
5235 | 0 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
5236 | 0 | CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), |
5237 | 0 | InRedPrivs[Cnt]->getExprLoc()), |
5238 | 0 | CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), |
5239 | 0 | Replacement.getAlignment()); |
5240 | 0 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
5241 | 0 | InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); |
5242 | 0 | } |
5243 | 0 | } |
5244 | 0 | (void)InRedScope.Privatize(); |
5245 | 0 | } |
5246 | | |
5247 | 0 | void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { |
5248 | | // Emit outlined function for task construct. |
5249 | 0 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); |
5250 | 0 | Address CapturedStruct = GenerateCapturedStmtArgument(*CS); |
5251 | 0 | QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
5252 | 0 | const Expr *IfCond = nullptr; |
5253 | 0 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
5254 | 0 | if (C->getNameModifier() == OMPD_unknown || |
5255 | 0 | C->getNameModifier() == OMPD_task) { |
5256 | 0 | IfCond = C->getCondition(); |
5257 | 0 | break; |
5258 | 0 | } |
5259 | 0 | } |
5260 | |
|
5261 | 0 | OMPTaskDataTy Data; |
5262 | | // Check if we should emit tied or untied task. |
5263 | 0 | Data.Tied = !S.getSingleClause<OMPUntiedClause>(); |
5264 | 0 | auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { |
5265 | 0 | CGF.EmitStmt(CS->getCapturedStmt()); |
5266 | 0 | }; |
5267 | 0 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
5268 | 0 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
5269 | 0 | const OMPTaskDataTy &Data) { |
5270 | 0 | CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, |
5271 | 0 | SharedsTy, CapturedStruct, IfCond, |
5272 | 0 | Data); |
5273 | 0 | }; |
5274 | 0 | auto LPCRegion = |
5275 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
5276 | 0 | EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); |
5277 | 0 | } |
5278 | | |
5279 | | void CodeGenFunction::EmitOMPTaskyieldDirective( |
5280 | 0 | const OMPTaskyieldDirective &S) { |
5281 | 0 | CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); |
5282 | 0 | } |
5283 | | |
5284 | 0 | void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { |
5285 | 0 | const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); |
5286 | 0 | Expr *ME = MC ? MC->getMessageString() : nullptr; |
5287 | 0 | const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); |
5288 | 0 | bool IsFatal = false; |
5289 | 0 | if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) |
5290 | 0 | IsFatal = true; |
5291 | 0 | CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal); |
5292 | 0 | } |
5293 | | |
5294 | 0 | void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { |
5295 | 0 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); |
5296 | 0 | } |
5297 | | |
5298 | 0 | void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { |
5299 | 0 | OMPTaskDataTy Data; |
5300 | | // Build list of dependences |
5301 | 0 | buildDependences(S, Data); |
5302 | 0 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
5303 | 0 | CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); |
5304 | 0 | } |
5305 | | |
5306 | 0 | bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { |
5307 | 0 | return T.clauses().empty(); |
5308 | 0 | } |
5309 | | |
5310 | | void CodeGenFunction::EmitOMPTaskgroupDirective( |
5311 | 0 | const OMPTaskgroupDirective &S) { |
5312 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5313 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) { |
5314 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5315 | 0 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5316 | 0 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5317 | 0 | AllocaInsertPt->getIterator()); |
5318 | |
|
5319 | 0 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
5320 | 0 | InsertPointTy CodeGenIP) { |
5321 | 0 | Builder.restoreIP(CodeGenIP); |
5322 | 0 | EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
5323 | 0 | }; |
5324 | 0 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5325 | 0 | if (!CapturedStmtInfo) |
5326 | 0 | CapturedStmtInfo = &CapStmtInfo; |
5327 | 0 | Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); |
5328 | 0 | return; |
5329 | 0 | } |
5330 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5331 | 0 | Action.Enter(CGF); |
5332 | 0 | if (const Expr *E = S.getReductionRef()) { |
5333 | 0 | SmallVector<const Expr *, 4> LHSs; |
5334 | 0 | SmallVector<const Expr *, 4> RHSs; |
5335 | 0 | OMPTaskDataTy Data; |
5336 | 0 | for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { |
5337 | 0 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
5338 | 0 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
5339 | 0 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
5340 | 0 | Data.ReductionOps.append(C->reduction_ops().begin(), |
5341 | 0 | C->reduction_ops().end()); |
5342 | 0 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
5343 | 0 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
5344 | 0 | } |
5345 | 0 | llvm::Value *ReductionDesc = |
5346 | 0 | CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), |
5347 | 0 | LHSs, RHSs, Data); |
5348 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
5349 | 0 | CGF.EmitVarDecl(*VD); |
5350 | 0 | CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), |
5351 | 0 | /*Volatile=*/false, E->getType()); |
5352 | 0 | } |
5353 | 0 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
5354 | 0 | }; |
5355 | 0 | CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); |
5356 | 0 | } |
5357 | | |
5358 | 0 | void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { |
5359 | 0 | llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() |
5360 | 0 | ? llvm::AtomicOrdering::NotAtomic |
5361 | 0 | : llvm::AtomicOrdering::AcquireRelease; |
5362 | 0 | CGM.getOpenMPRuntime().emitFlush( |
5363 | 0 | *this, |
5364 | 0 | [&S]() -> ArrayRef<const Expr *> { |
5365 | 0 | if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) |
5366 | 0 | return llvm::ArrayRef(FlushClause->varlist_begin(), |
5367 | 0 | FlushClause->varlist_end()); |
5368 | 0 | return std::nullopt; |
5369 | 0 | }(), |
5370 | 0 | S.getBeginLoc(), AO); |
5371 | 0 | } |
5372 | | |
5373 | 0 | void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { |
5374 | 0 | const auto *DO = S.getSingleClause<OMPDepobjClause>(); |
5375 | 0 | LValue DOLVal = EmitLValue(DO->getDepobj()); |
5376 | 0 | if (const auto *DC = S.getSingleClause<OMPDependClause>()) { |
5377 | 0 | OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), |
5378 | 0 | DC->getModifier()); |
5379 | 0 | Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); |
5380 | 0 | Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( |
5381 | 0 | *this, Dependencies, DC->getBeginLoc()); |
5382 | 0 | EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); |
5383 | 0 | return; |
5384 | 0 | } |
5385 | 0 | if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { |
5386 | 0 | CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); |
5387 | 0 | return; |
5388 | 0 | } |
5389 | 0 | if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { |
5390 | 0 | CGM.getOpenMPRuntime().emitUpdateClause( |
5391 | 0 | *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); |
5392 | 0 | return; |
5393 | 0 | } |
5394 | 0 | } |
5395 | | |
5396 | 0 | void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { |
5397 | 0 | if (!OMPParentLoopDirectiveForScan) |
5398 | 0 | return; |
5399 | 0 | const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; |
5400 | 0 | bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); |
5401 | 0 | SmallVector<const Expr *, 4> Shareds; |
5402 | 0 | SmallVector<const Expr *, 4> Privates; |
5403 | 0 | SmallVector<const Expr *, 4> LHSs; |
5404 | 0 | SmallVector<const Expr *, 4> RHSs; |
5405 | 0 | SmallVector<const Expr *, 4> ReductionOps; |
5406 | 0 | SmallVector<const Expr *, 4> CopyOps; |
5407 | 0 | SmallVector<const Expr *, 4> CopyArrayTemps; |
5408 | 0 | SmallVector<const Expr *, 4> CopyArrayElems; |
5409 | 0 | for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { |
5410 | 0 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
5411 | 0 | continue; |
5412 | 0 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
5413 | 0 | Privates.append(C->privates().begin(), C->privates().end()); |
5414 | 0 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
5415 | 0 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
5416 | 0 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
5417 | 0 | CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); |
5418 | 0 | CopyArrayTemps.append(C->copy_array_temps().begin(), |
5419 | 0 | C->copy_array_temps().end()); |
5420 | 0 | CopyArrayElems.append(C->copy_array_elems().begin(), |
5421 | 0 | C->copy_array_elems().end()); |
5422 | 0 | } |
5423 | 0 | if (ParentDir.getDirectiveKind() == OMPD_simd || |
5424 | 0 | (getLangOpts().OpenMPSimd && |
5425 | 0 | isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { |
5426 | | // For simd directive and simd-based directives in simd only mode, use the |
5427 | | // following codegen: |
5428 | | // int x = 0; |
5429 | | // #pragma omp simd reduction(inscan, +: x) |
5430 | | // for (..) { |
5431 | | // <first part> |
5432 | | // #pragma omp scan inclusive(x) |
5433 | | // <second part> |
5434 | | // } |
5435 | | // is transformed to: |
5436 | | // int x = 0; |
5437 | | // for (..) { |
5438 | | // int x_priv = 0; |
5439 | | // <first part> |
5440 | | // x = x_priv + x; |
5441 | | // x_priv = x; |
5442 | | // <second part> |
5443 | | // } |
5444 | | // and |
5445 | | // int x = 0; |
5446 | | // #pragma omp simd reduction(inscan, +: x) |
5447 | | // for (..) { |
5448 | | // <first part> |
5449 | | // #pragma omp scan exclusive(x) |
5450 | | // <second part> |
5451 | | // } |
5452 | | // to |
5453 | | // int x = 0; |
5454 | | // for (..) { |
5455 | | // int x_priv = 0; |
5456 | | // <second part> |
5457 | | // int temp = x; |
5458 | | // x = x_priv + x; |
5459 | | // x_priv = temp; |
5460 | | // <first part> |
5461 | | // } |
5462 | 0 | llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); |
5463 | 0 | EmitBranch(IsInclusive |
5464 | 0 | ? OMPScanReduce |
5465 | 0 | : BreakContinueStack.back().ContinueBlock.getBlock()); |
5466 | 0 | EmitBlock(OMPScanDispatch); |
5467 | 0 | { |
5468 | | // New scope for correct construction/destruction of temp variables for |
5469 | | // exclusive scan. |
5470 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
5471 | 0 | EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); |
5472 | 0 | EmitBlock(OMPScanReduce); |
5473 | 0 | if (!IsInclusive) { |
5474 | | // Create temp var and copy LHS value to this temp value. |
5475 | | // TMP = LHS; |
5476 | 0 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5477 | 0 | const Expr *PrivateExpr = Privates[I]; |
5478 | 0 | const Expr *TempExpr = CopyArrayTemps[I]; |
5479 | 0 | EmitAutoVarDecl( |
5480 | 0 | *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); |
5481 | 0 | LValue DestLVal = EmitLValue(TempExpr); |
5482 | 0 | LValue SrcLVal = EmitLValue(LHSs[I]); |
5483 | 0 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
5484 | 0 | SrcLVal.getAddress(*this), |
5485 | 0 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
5486 | 0 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
5487 | 0 | CopyOps[I]); |
5488 | 0 | } |
5489 | 0 | } |
5490 | 0 | CGM.getOpenMPRuntime().emitReduction( |
5491 | 0 | *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, |
5492 | 0 | {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); |
5493 | 0 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5494 | 0 | const Expr *PrivateExpr = Privates[I]; |
5495 | 0 | LValue DestLVal; |
5496 | 0 | LValue SrcLVal; |
5497 | 0 | if (IsInclusive) { |
5498 | 0 | DestLVal = EmitLValue(RHSs[I]); |
5499 | 0 | SrcLVal = EmitLValue(LHSs[I]); |
5500 | 0 | } else { |
5501 | 0 | const Expr *TempExpr = CopyArrayTemps[I]; |
5502 | 0 | DestLVal = EmitLValue(RHSs[I]); |
5503 | 0 | SrcLVal = EmitLValue(TempExpr); |
5504 | 0 | } |
5505 | 0 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
5506 | 0 | SrcLVal.getAddress(*this), |
5507 | 0 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
5508 | 0 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
5509 | 0 | CopyOps[I]); |
5510 | 0 | } |
5511 | 0 | } |
5512 | 0 | EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); |
5513 | 0 | OMPScanExitBlock = IsInclusive |
5514 | 0 | ? BreakContinueStack.back().ContinueBlock.getBlock() |
5515 | 0 | : OMPScanReduce; |
5516 | 0 | EmitBlock(OMPAfterScanBlock); |
5517 | 0 | return; |
5518 | 0 | } |
5519 | 0 | if (!IsInclusive) { |
5520 | 0 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
5521 | 0 | EmitBlock(OMPScanExitBlock); |
5522 | 0 | } |
5523 | 0 | if (OMPFirstScanLoop) { |
5524 | | // Emit buffer[i] = red; at the end of the input phase. |
5525 | 0 | const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) |
5526 | 0 | .getIterationVariable() |
5527 | 0 | ->IgnoreParenImpCasts(); |
5528 | 0 | LValue IdxLVal = EmitLValue(IVExpr); |
5529 | 0 | llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); |
5530 | 0 | IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); |
5531 | 0 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5532 | 0 | const Expr *PrivateExpr = Privates[I]; |
5533 | 0 | const Expr *OrigExpr = Shareds[I]; |
5534 | 0 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5535 | 0 | OpaqueValueMapping IdxMapping( |
5536 | 0 | *this, |
5537 | 0 | cast<OpaqueValueExpr>( |
5538 | 0 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
5539 | 0 | RValue::get(IdxVal)); |
5540 | 0 | LValue DestLVal = EmitLValue(CopyArrayElem); |
5541 | 0 | LValue SrcLVal = EmitLValue(OrigExpr); |
5542 | 0 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
5543 | 0 | SrcLVal.getAddress(*this), |
5544 | 0 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
5545 | 0 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
5546 | 0 | CopyOps[I]); |
5547 | 0 | } |
5548 | 0 | } |
5549 | 0 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
5550 | 0 | if (IsInclusive) { |
5551 | 0 | EmitBlock(OMPScanExitBlock); |
5552 | 0 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
5553 | 0 | } |
5554 | 0 | EmitBlock(OMPScanDispatch); |
5555 | 0 | if (!OMPFirstScanLoop) { |
5556 | | // Emit red = buffer[i]; at the entrance to the scan phase. |
5557 | 0 | const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) |
5558 | 0 | .getIterationVariable() |
5559 | 0 | ->IgnoreParenImpCasts(); |
5560 | 0 | LValue IdxLVal = EmitLValue(IVExpr); |
5561 | 0 | llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); |
5562 | 0 | IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); |
5563 | 0 | llvm::BasicBlock *ExclusiveExitBB = nullptr; |
5564 | 0 | if (!IsInclusive) { |
5565 | 0 | llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); |
5566 | 0 | ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); |
5567 | 0 | llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); |
5568 | 0 | Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); |
5569 | 0 | EmitBlock(ContBB); |
5570 | | // Use idx - 1 iteration for exclusive scan. |
5571 | 0 | IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); |
5572 | 0 | } |
5573 | 0 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5574 | 0 | const Expr *PrivateExpr = Privates[I]; |
5575 | 0 | const Expr *OrigExpr = Shareds[I]; |
5576 | 0 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5577 | 0 | OpaqueValueMapping IdxMapping( |
5578 | 0 | *this, |
5579 | 0 | cast<OpaqueValueExpr>( |
5580 | 0 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
5581 | 0 | RValue::get(IdxVal)); |
5582 | 0 | LValue SrcLVal = EmitLValue(CopyArrayElem); |
5583 | 0 | LValue DestLVal = EmitLValue(OrigExpr); |
5584 | 0 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
5585 | 0 | SrcLVal.getAddress(*this), |
5586 | 0 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
5587 | 0 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
5588 | 0 | CopyOps[I]); |
5589 | 0 | } |
5590 | 0 | if (!IsInclusive) { |
5591 | 0 | EmitBlock(ExclusiveExitBB); |
5592 | 0 | } |
5593 | 0 | } |
5594 | 0 | EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock |
5595 | 0 | : OMPAfterScanBlock); |
5596 | 0 | EmitBlock(OMPAfterScanBlock); |
5597 | 0 | } |
5598 | | |
5599 | | void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, |
5600 | | const CodeGenLoopTy &CodeGenLoop, |
5601 | 0 | Expr *IncExpr) { |
5602 | | // Emit the loop iteration variable. |
5603 | 0 | const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); |
5604 | 0 | const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); |
5605 | 0 | EmitVarDecl(*IVDecl); |
5606 | | |
5607 | | // Emit the iterations count variable. |
5608 | | // If it is not a variable, Sema decided to calculate iterations count on each |
5609 | | // iteration (e.g., it is foldable into a constant). |
5610 | 0 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
5611 | 0 | EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
5612 | | // Emit calculation of the iterations count. |
5613 | 0 | EmitIgnoredExpr(S.getCalcLastIteration()); |
5614 | 0 | } |
5615 | |
|
5616 | 0 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
5617 | |
|
5618 | 0 | bool HasLastprivateClause = false; |
5619 | | // Check pre-condition. |
5620 | 0 | { |
5621 | 0 | OMPLoopScope PreInitScope(*this, S); |
5622 | | // Skip the entire loop if we don't meet the precondition. |
5623 | | // If the condition constant folds and can be elided, avoid emitting the |
5624 | | // whole loop. |
5625 | 0 | bool CondConstant; |
5626 | 0 | llvm::BasicBlock *ContBlock = nullptr; |
5627 | 0 | if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
5628 | 0 | if (!CondConstant) |
5629 | 0 | return; |
5630 | 0 | } else { |
5631 | 0 | llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); |
5632 | 0 | ContBlock = createBasicBlock("omp.precond.end"); |
5633 | 0 | emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, |
5634 | 0 | getProfileCount(&S)); |
5635 | 0 | EmitBlock(ThenBlock); |
5636 | 0 | incrementProfileCounter(&S); |
5637 | 0 | } |
5638 | | |
5639 | 0 | emitAlignedClause(*this, S); |
5640 | | // Emit 'then' code. |
5641 | 0 | { |
5642 | | // Emit helper vars inits. |
5643 | |
|
5644 | 0 | LValue LB = EmitOMPHelperVar( |
5645 | 0 | *this, cast<DeclRefExpr>( |
5646 | 0 | (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5647 | 0 | ? S.getCombinedLowerBoundVariable() |
5648 | 0 | : S.getLowerBoundVariable()))); |
5649 | 0 | LValue UB = EmitOMPHelperVar( |
5650 | 0 | *this, cast<DeclRefExpr>( |
5651 | 0 | (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5652 | 0 | ? S.getCombinedUpperBoundVariable() |
5653 | 0 | : S.getUpperBoundVariable()))); |
5654 | 0 | LValue ST = |
5655 | 0 | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); |
5656 | 0 | LValue IL = |
5657 | 0 | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); |
5658 | |
|
5659 | 0 | OMPPrivateScope LoopScope(*this); |
5660 | 0 | if (EmitOMPFirstprivateClause(S, LoopScope)) { |
5661 | | // Emit implicit barrier to synchronize threads and avoid data races |
5662 | | // on initialization of firstprivate variables and post-update of |
5663 | | // lastprivate variables. |
5664 | 0 | CGM.getOpenMPRuntime().emitBarrierCall( |
5665 | 0 | *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
5666 | 0 | /*ForceSimpleCall=*/true); |
5667 | 0 | } |
5668 | 0 | EmitOMPPrivateClause(S, LoopScope); |
5669 | 0 | if (isOpenMPSimdDirective(S.getDirectiveKind()) && |
5670 | 0 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
5671 | 0 | !isOpenMPTeamsDirective(S.getDirectiveKind())) |
5672 | 0 | EmitOMPReductionClauseInit(S, LoopScope); |
5673 | 0 | HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); |
5674 | 0 | EmitOMPPrivateLoopCounters(S, LoopScope); |
5675 | 0 | (void)LoopScope.Privatize(); |
5676 | 0 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
5677 | 0 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); |
5678 | | |
5679 | | // Detect the distribute schedule kind and chunk. |
5680 | 0 | llvm::Value *Chunk = nullptr; |
5681 | 0 | OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; |
5682 | 0 | if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { |
5683 | 0 | ScheduleKind = C->getDistScheduleKind(); |
5684 | 0 | if (const Expr *Ch = C->getChunkSize()) { |
5685 | 0 | Chunk = EmitScalarExpr(Ch); |
5686 | 0 | Chunk = EmitScalarConversion(Chunk, Ch->getType(), |
5687 | 0 | S.getIterationVariable()->getType(), |
5688 | 0 | S.getBeginLoc()); |
5689 | 0 | } |
5690 | 0 | } else { |
5691 | | // Default behaviour for dist_schedule clause. |
5692 | 0 | CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( |
5693 | 0 | *this, S, ScheduleKind, Chunk); |
5694 | 0 | } |
5695 | 0 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
5696 | 0 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
5697 | | |
5698 | | // OpenMP [2.10.8, distribute Construct, Description] |
5699 | | // If dist_schedule is specified, kind must be static. If specified, |
5700 | | // iterations are divided into chunks of size chunk_size, chunks are |
5701 | | // assigned to the teams of the league in a round-robin fashion in the |
5702 | | // order of the team number. When no chunk_size is specified, the |
5703 | | // iteration space is divided into chunks that are approximately equal |
5704 | | // in size, and at most one chunk is distributed to each team of the |
5705 | | // league. The size of the chunks is unspecified in this case. |
5706 | 0 | bool StaticChunked = |
5707 | 0 | RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && |
5708 | 0 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
5709 | 0 | if (RT.isStaticNonchunked(ScheduleKind, |
5710 | 0 | /* Chunked */ Chunk != nullptr) || |
5711 | 0 | StaticChunked) { |
5712 | 0 | CGOpenMPRuntime::StaticRTInput StaticInit( |
5713 | 0 | IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), |
5714 | 0 | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
5715 | 0 | StaticChunked ? Chunk : nullptr); |
5716 | 0 | RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, |
5717 | 0 | StaticInit); |
5718 | 0 | JumpDest LoopExit = |
5719 | 0 | getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); |
5720 | | // UB = min(UB, GlobalUB); |
5721 | 0 | EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5722 | 0 | ? S.getCombinedEnsureUpperBound() |
5723 | 0 | : S.getEnsureUpperBound()); |
5724 | | // IV = LB; |
5725 | 0 | EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5726 | 0 | ? S.getCombinedInit() |
5727 | 0 | : S.getInit()); |
5728 | |
|
5729 | 0 | const Expr *Cond = |
5730 | 0 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5731 | 0 | ? S.getCombinedCond() |
5732 | 0 | : S.getCond(); |
5733 | |
|
5734 | 0 | if (StaticChunked) |
5735 | 0 | Cond = S.getCombinedDistCond(); |
5736 | | |
5737 | | // For static unchunked schedules generate: |
5738 | | // |
5739 | | // 1. For distribute alone, codegen |
5740 | | // while (idx <= UB) { |
5741 | | // BODY; |
5742 | | // ++idx; |
5743 | | // } |
5744 | | // |
5745 | | // 2. When combined with 'for' (e.g. as in 'distribute parallel for') |
5746 | | // while (idx <= UB) { |
5747 | | // <CodeGen rest of pragma>(LB, UB); |
5748 | | // idx += ST; |
5749 | | // } |
5750 | | // |
5751 | | // For static chunk one schedule generate: |
5752 | | // |
5753 | | // while (IV <= GlobalUB) { |
5754 | | // <CodeGen rest of pragma>(LB, UB); |
5755 | | // LB += ST; |
5756 | | // UB += ST; |
5757 | | // UB = min(UB, GlobalUB); |
5758 | | // IV = LB; |
5759 | | // } |
5760 | | // |
5761 | 0 | emitCommonSimdLoop( |
5762 | 0 | *this, S, |
5763 | 0 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5764 | 0 | if (isOpenMPSimdDirective(S.getDirectiveKind())) |
5765 | 0 | CGF.EmitOMPSimdInit(S); |
5766 | 0 | }, |
5767 | 0 | [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, |
5768 | 0 | StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { |
5769 | 0 | CGF.EmitOMPInnerLoop( |
5770 | 0 | S, LoopScope.requiresCleanups(), Cond, IncExpr, |
5771 | 0 | [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
5772 | 0 | CodeGenLoop(CGF, S, LoopExit); |
5773 | 0 | }, |
5774 | 0 | [&S, StaticChunked](CodeGenFunction &CGF) { |
5775 | 0 | if (StaticChunked) { |
5776 | 0 | CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); |
5777 | 0 | CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); |
5778 | 0 | CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); |
5779 | 0 | CGF.EmitIgnoredExpr(S.getCombinedInit()); |
5780 | 0 | } |
5781 | 0 | }); |
5782 | 0 | }); |
5783 | 0 | EmitBlock(LoopExit.getBlock()); |
5784 | | // Tell the runtime we are done. |
5785 | 0 | RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); |
5786 | 0 | } else { |
5787 | | // Emit the outer loop, which requests its work chunk [LB..UB] from |
5788 | | // runtime and runs the inner loop to process it. |
5789 | 0 | const OMPLoopArguments LoopArguments = { |
5790 | 0 | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
5791 | 0 | IL.getAddress(*this), Chunk}; |
5792 | 0 | EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, |
5793 | 0 | CodeGenLoop); |
5794 | 0 | } |
5795 | 0 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
5796 | 0 | EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { |
5797 | 0 | return CGF.Builder.CreateIsNotNull( |
5798 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
5799 | 0 | }); |
5800 | 0 | } |
5801 | 0 | if (isOpenMPSimdDirective(S.getDirectiveKind()) && |
5802 | 0 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
5803 | 0 | !isOpenMPTeamsDirective(S.getDirectiveKind())) { |
5804 | 0 | EmitOMPReductionClauseFinal(S, OMPD_simd); |
5805 | | // Emit post-update of the reduction variables if IsLastIter != 0. |
5806 | 0 | emitPostUpdateForReductionClause( |
5807 | 0 | *this, S, [IL, &S](CodeGenFunction &CGF) { |
5808 | 0 | return CGF.Builder.CreateIsNotNull( |
5809 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
5810 | 0 | }); |
5811 | 0 | } |
5812 | | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
5813 | 0 | if (HasLastprivateClause) { |
5814 | 0 | EmitOMPLastprivateClauseFinal( |
5815 | 0 | S, /*NoFinals=*/false, |
5816 | 0 | Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); |
5817 | 0 | } |
5818 | 0 | } |
5819 | | |
5820 | | // We're now done with the loop, so jump to the continuation block. |
5821 | 0 | if (ContBlock) { |
5822 | 0 | EmitBranch(ContBlock); |
5823 | 0 | EmitBlock(ContBlock, true); |
5824 | 0 | } |
5825 | 0 | } |
5826 | 0 | } |
5827 | | |
5828 | | void CodeGenFunction::EmitOMPDistributeDirective( |
5829 | 0 | const OMPDistributeDirective &S) { |
5830 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5831 | 0 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
5832 | 0 | }; |
5833 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5834 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
5835 | 0 | } |
5836 | | |
5837 | | static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, |
5838 | | const CapturedStmt *S, |
5839 | 0 | SourceLocation Loc) { |
5840 | 0 | CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); |
5841 | 0 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5842 | 0 | CGF.CapturedStmtInfo = &CapStmtInfo; |
5843 | 0 | llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); |
5844 | 0 | Fn->setDoesNotRecurse(); |
5845 | 0 | return Fn; |
5846 | 0 | } |
5847 | | |
5848 | | template <typename T> |
5849 | | static void emitRestoreIP(CodeGenFunction &CGF, const T *C, |
5850 | | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, |
5851 | 0 | llvm::OpenMPIRBuilder &OMPBuilder) { |
5852 | |
|
5853 | 0 | unsigned NumLoops = C->getNumLoops(); |
5854 | 0 | QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( |
5855 | 0 | /*DestWidth=*/64, /*Signed=*/1); |
5856 | 0 | llvm::SmallVector<llvm::Value *> StoreValues; |
5857 | 0 | for (unsigned I = 0; I < NumLoops; I++) { |
5858 | 0 | const Expr *CounterVal = C->getLoopData(I); |
5859 | 0 | assert(CounterVal); |
5860 | 0 | llvm::Value *StoreValue = CGF.EmitScalarConversion( |
5861 | 0 | CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, |
5862 | 0 | CounterVal->getExprLoc()); |
5863 | 0 | StoreValues.emplace_back(StoreValue); |
5864 | 0 | } |
5865 | 0 | OMPDoacrossKind<T> ODK; |
5866 | 0 | bool IsDependSource = ODK.isSource(C); |
5867 | 0 | CGF.Builder.restoreIP( |
5868 | 0 | OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops, |
5869 | 0 | StoreValues, ".cnt.addr", IsDependSource)); |
5870 | 0 | } Unexecuted instantiation: CGStmtOpenMP.cpp:void emitRestoreIP<clang::OMPDependClause>(clang::CodeGen::CodeGenFunction&, clang::OMPDependClause const*, llvm::IRBuilderBase::InsertPoint, llvm::OpenMPIRBuilder&) Unexecuted instantiation: CGStmtOpenMP.cpp:void emitRestoreIP<clang::OMPDoacrossClause>(clang::CodeGen::CodeGenFunction&, clang::OMPDoacrossClause const*, llvm::IRBuilderBase::InsertPoint, llvm::OpenMPIRBuilder&) |
5871 | | |
5872 | 0 | void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { |
5873 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
5874 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5875 | 0 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5876 | |
|
5877 | 0 | if (S.hasClausesOfKind<OMPDependClause>() || |
5878 | 0 | S.hasClausesOfKind<OMPDoacrossClause>()) { |
5879 | | // The ordered directive with depend clause. |
5880 | 0 | assert(!S.hasAssociatedStmt() && "No associated statement must be in " |
5881 | 0 | "ordered depend|doacross construct."); |
5882 | 0 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5883 | 0 | AllocaInsertPt->getIterator()); |
5884 | 0 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
5885 | 0 | emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); |
5886 | 0 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
5887 | 0 | emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); |
5888 | 0 | } else { |
5889 | | // The ordered directive with threads or simd clause, or without clause. |
5890 | | // Without clause, it behaves as if the threads clause is specified. |
5891 | 0 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
5892 | |
|
5893 | 0 | auto FiniCB = [this](InsertPointTy IP) { |
5894 | 0 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
5895 | 0 | }; |
5896 | |
|
5897 | 0 | auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, |
5898 | 0 | InsertPointTy CodeGenIP) { |
5899 | 0 | Builder.restoreIP(CodeGenIP); |
5900 | |
|
5901 | 0 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
5902 | 0 | if (C) { |
5903 | 0 | llvm::BasicBlock *FiniBB = splitBBWithSuffix( |
5904 | 0 | Builder, /*CreateBranch=*/false, ".ordered.after"); |
5905 | 0 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
5906 | 0 | GenerateOpenMPCapturedVars(*CS, CapturedVars); |
5907 | 0 | llvm::Function *OutlinedFn = |
5908 | 0 | emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); |
5909 | 0 | assert(S.getBeginLoc().isValid() && |
5910 | 0 | "Outlined function call location must be valid."); |
5911 | 0 | ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); |
5912 | 0 | OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB, |
5913 | 0 | OutlinedFn, CapturedVars); |
5914 | 0 | } else { |
5915 | 0 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
5916 | 0 | *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); |
5917 | 0 | } |
5918 | 0 | }; |
5919 | |
|
5920 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5921 | 0 | Builder.restoreIP( |
5922 | 0 | OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); |
5923 | 0 | } |
5924 | 0 | return; |
5925 | 0 | } |
5926 | | |
5927 | 0 | if (S.hasClausesOfKind<OMPDependClause>()) { |
5928 | 0 | assert(!S.hasAssociatedStmt() && |
5929 | 0 | "No associated statement must be in ordered depend construct."); |
5930 | 0 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
5931 | 0 | CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); |
5932 | 0 | return; |
5933 | 0 | } |
5934 | 0 | if (S.hasClausesOfKind<OMPDoacrossClause>()) { |
5935 | 0 | assert(!S.hasAssociatedStmt() && |
5936 | 0 | "No associated statement must be in ordered doacross construct."); |
5937 | 0 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
5938 | 0 | CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); |
5939 | 0 | return; |
5940 | 0 | } |
5941 | 0 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
5942 | 0 | auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, |
5943 | 0 | PrePostActionTy &Action) { |
5944 | 0 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
5945 | 0 | if (C) { |
5946 | 0 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
5947 | 0 | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
5948 | 0 | llvm::Function *OutlinedFn = |
5949 | 0 | emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); |
5950 | 0 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), |
5951 | 0 | OutlinedFn, CapturedVars); |
5952 | 0 | } else { |
5953 | 0 | Action.Enter(CGF); |
5954 | 0 | CGF.EmitStmt(CS->getCapturedStmt()); |
5955 | 0 | } |
5956 | 0 | }; |
5957 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5958 | 0 | CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); |
5959 | 0 | } |
5960 | | |
5961 | | static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, |
5962 | | QualType SrcType, QualType DestType, |
5963 | 0 | SourceLocation Loc) { |
5964 | 0 | assert(CGF.hasScalarEvaluationKind(DestType) && |
5965 | 0 | "DestType must have scalar evaluation kind."); |
5966 | 0 | assert(!Val.isAggregate() && "Must be a scalar or complex."); |
5967 | 0 | return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, |
5968 | 0 | DestType, Loc) |
5969 | 0 | : CGF.EmitComplexToScalarConversion( |
5970 | 0 | Val.getComplexVal(), SrcType, DestType, Loc); |
5971 | 0 | } |
5972 | | |
5973 | | static CodeGenFunction::ComplexPairTy |
5974 | | convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, |
5975 | 0 | QualType DestType, SourceLocation Loc) { |
5976 | 0 | assert(CGF.getEvaluationKind(DestType) == TEK_Complex && |
5977 | 0 | "DestType must have complex evaluation kind."); |
5978 | 0 | CodeGenFunction::ComplexPairTy ComplexVal; |
5979 | 0 | if (Val.isScalar()) { |
5980 | | // Convert the input element to the element type of the complex. |
5981 | 0 | QualType DestElementType = |
5982 | 0 | DestType->castAs<ComplexType>()->getElementType(); |
5983 | 0 | llvm::Value *ScalarVal = CGF.EmitScalarConversion( |
5984 | 0 | Val.getScalarVal(), SrcType, DestElementType, Loc); |
5985 | 0 | ComplexVal = CodeGenFunction::ComplexPairTy( |
5986 | 0 | ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); |
5987 | 0 | } else { |
5988 | 0 | assert(Val.isComplex() && "Must be a scalar or complex."); |
5989 | 0 | QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); |
5990 | 0 | QualType DestElementType = |
5991 | 0 | DestType->castAs<ComplexType>()->getElementType(); |
5992 | 0 | ComplexVal.first = CGF.EmitScalarConversion( |
5993 | 0 | Val.getComplexVal().first, SrcElementType, DestElementType, Loc); |
5994 | 0 | ComplexVal.second = CGF.EmitScalarConversion( |
5995 | 0 | Val.getComplexVal().second, SrcElementType, DestElementType, Loc); |
5996 | 0 | } |
5997 | 0 | return ComplexVal; |
5998 | 0 | } |
5999 | | |
6000 | | static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6001 | 0 | LValue LVal, RValue RVal) { |
6002 | 0 | if (LVal.isGlobalReg()) |
6003 | 0 | CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); |
6004 | 0 | else |
6005 | 0 | CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); |
6006 | 0 | } |
6007 | | |
6008 | | static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, |
6009 | | llvm::AtomicOrdering AO, LValue LVal, |
6010 | 0 | SourceLocation Loc) { |
6011 | 0 | if (LVal.isGlobalReg()) |
6012 | 0 | return CGF.EmitLoadOfLValue(LVal, Loc); |
6013 | 0 | return CGF.EmitAtomicLoad( |
6014 | 0 | LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), |
6015 | 0 | LVal.isVolatile()); |
6016 | 0 | } |
6017 | | |
6018 | | void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, |
6019 | 0 | QualType RValTy, SourceLocation Loc) { |
6020 | 0 | switch (getEvaluationKind(LVal.getType())) { |
6021 | 0 | case TEK_Scalar: |
6022 | 0 | EmitStoreThroughLValue(RValue::get(convertToScalarValue( |
6023 | 0 | *this, RVal, RValTy, LVal.getType(), Loc)), |
6024 | 0 | LVal); |
6025 | 0 | break; |
6026 | 0 | case TEK_Complex: |
6027 | 0 | EmitStoreOfComplex( |
6028 | 0 | convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, |
6029 | 0 | /*isInit=*/false); |
6030 | 0 | break; |
6031 | 0 | case TEK_Aggregate: |
6032 | 0 | llvm_unreachable("Must be a scalar or complex."); |
6033 | 0 | } |
6034 | 0 | } |
6035 | | |
6036 | | static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6037 | | const Expr *X, const Expr *V, |
6038 | 0 | SourceLocation Loc) { |
6039 | | // v = x; |
6040 | 0 | assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); |
6041 | 0 | assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); |
6042 | 0 | LValue XLValue = CGF.EmitLValue(X); |
6043 | 0 | LValue VLValue = CGF.EmitLValue(V); |
6044 | 0 | RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); |
6045 | | // OpenMP, 2.17.7, atomic Construct |
6046 | | // If the read or capture clause is specified and the acquire, acq_rel, or |
6047 | | // seq_cst clause is specified then the strong flush on exit from the atomic |
6048 | | // operation is also an acquire flush. |
6049 | 0 | switch (AO) { |
6050 | 0 | case llvm::AtomicOrdering::Acquire: |
6051 | 0 | case llvm::AtomicOrdering::AcquireRelease: |
6052 | 0 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6053 | 0 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6054 | 0 | llvm::AtomicOrdering::Acquire); |
6055 | 0 | break; |
6056 | 0 | case llvm::AtomicOrdering::Monotonic: |
6057 | 0 | case llvm::AtomicOrdering::Release: |
6058 | 0 | break; |
6059 | 0 | case llvm::AtomicOrdering::NotAtomic: |
6060 | 0 | case llvm::AtomicOrdering::Unordered: |
6061 | 0 | llvm_unreachable("Unexpected ordering."); |
6062 | 0 | } |
6063 | 0 | CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); |
6064 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); |
6065 | 0 | } |
6066 | | |
6067 | | static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, |
6068 | | llvm::AtomicOrdering AO, const Expr *X, |
6069 | 0 | const Expr *E, SourceLocation Loc) { |
6070 | | // x = expr; |
6071 | 0 | assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); |
6072 | 0 | emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); |
6073 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
6074 | | // OpenMP, 2.17.7, atomic Construct |
6075 | | // If the write, update, or capture clause is specified and the release, |
6076 | | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6077 | | // the atomic operation is also a release flush. |
6078 | 0 | switch (AO) { |
6079 | 0 | case llvm::AtomicOrdering::Release: |
6080 | 0 | case llvm::AtomicOrdering::AcquireRelease: |
6081 | 0 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6082 | 0 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6083 | 0 | llvm::AtomicOrdering::Release); |
6084 | 0 | break; |
6085 | 0 | case llvm::AtomicOrdering::Acquire: |
6086 | 0 | case llvm::AtomicOrdering::Monotonic: |
6087 | 0 | break; |
6088 | 0 | case llvm::AtomicOrdering::NotAtomic: |
6089 | 0 | case llvm::AtomicOrdering::Unordered: |
6090 | 0 | llvm_unreachable("Unexpected ordering."); |
6091 | 0 | } |
6092 | 0 | } |
6093 | | |
6094 | | static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, |
6095 | | RValue Update, |
6096 | | BinaryOperatorKind BO, |
6097 | | llvm::AtomicOrdering AO, |
6098 | 0 | bool IsXLHSInRHSPart) { |
6099 | 0 | ASTContext &Context = CGF.getContext(); |
6100 | | // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' |
6101 | | // expression is simple and atomic is allowed for the given type for the |
6102 | | // target platform. |
6103 | 0 | if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || |
6104 | 0 | (!isa<llvm::ConstantInt>(Update.getScalarVal()) && |
6105 | 0 | (Update.getScalarVal()->getType() != |
6106 | 0 | X.getAddress(CGF).getElementType())) || |
6107 | 0 | !Context.getTargetInfo().hasBuiltinAtomic( |
6108 | 0 | Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) |
6109 | 0 | return std::make_pair(false, RValue::get(nullptr)); |
6110 | | |
6111 | 0 | auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { |
6112 | 0 | if (T->isIntegerTy()) |
6113 | 0 | return true; |
6114 | | |
6115 | 0 | if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) |
6116 | 0 | return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T)); |
6117 | | |
6118 | 0 | return false; |
6119 | 0 | }; |
6120 | |
|
6121 | 0 | if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || |
6122 | 0 | !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO)) |
6123 | 0 | return std::make_pair(false, RValue::get(nullptr)); |
6124 | | |
6125 | 0 | bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy(); |
6126 | 0 | llvm::AtomicRMWInst::BinOp RMWOp; |
6127 | 0 | switch (BO) { |
6128 | 0 | case BO_Add: |
6129 | 0 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; |
6130 | 0 | break; |
6131 | 0 | case BO_Sub: |
6132 | 0 | if (!IsXLHSInRHSPart) |
6133 | 0 | return std::make_pair(false, RValue::get(nullptr)); |
6134 | 0 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; |
6135 | 0 | break; |
6136 | 0 | case BO_And: |
6137 | 0 | RMWOp = llvm::AtomicRMWInst::And; |
6138 | 0 | break; |
6139 | 0 | case BO_Or: |
6140 | 0 | RMWOp = llvm::AtomicRMWInst::Or; |
6141 | 0 | break; |
6142 | 0 | case BO_Xor: |
6143 | 0 | RMWOp = llvm::AtomicRMWInst::Xor; |
6144 | 0 | break; |
6145 | 0 | case BO_LT: |
6146 | 0 | if (IsInteger) |
6147 | 0 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6148 | 0 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min |
6149 | 0 | : llvm::AtomicRMWInst::Max) |
6150 | 0 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin |
6151 | 0 | : llvm::AtomicRMWInst::UMax); |
6152 | 0 | else |
6153 | 0 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin |
6154 | 0 | : llvm::AtomicRMWInst::FMax; |
6155 | 0 | break; |
6156 | 0 | case BO_GT: |
6157 | 0 | if (IsInteger) |
6158 | 0 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6159 | 0 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max |
6160 | 0 | : llvm::AtomicRMWInst::Min) |
6161 | 0 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax |
6162 | 0 | : llvm::AtomicRMWInst::UMin); |
6163 | 0 | else |
6164 | 0 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax |
6165 | 0 | : llvm::AtomicRMWInst::FMin; |
6166 | 0 | break; |
6167 | 0 | case BO_Assign: |
6168 | 0 | RMWOp = llvm::AtomicRMWInst::Xchg; |
6169 | 0 | break; |
6170 | 0 | case BO_Mul: |
6171 | 0 | case BO_Div: |
6172 | 0 | case BO_Rem: |
6173 | 0 | case BO_Shl: |
6174 | 0 | case BO_Shr: |
6175 | 0 | case BO_LAnd: |
6176 | 0 | case BO_LOr: |
6177 | 0 | return std::make_pair(false, RValue::get(nullptr)); |
6178 | 0 | case BO_PtrMemD: |
6179 | 0 | case BO_PtrMemI: |
6180 | 0 | case BO_LE: |
6181 | 0 | case BO_GE: |
6182 | 0 | case BO_EQ: |
6183 | 0 | case BO_NE: |
6184 | 0 | case BO_Cmp: |
6185 | 0 | case BO_AddAssign: |
6186 | 0 | case BO_SubAssign: |
6187 | 0 | case BO_AndAssign: |
6188 | 0 | case BO_OrAssign: |
6189 | 0 | case BO_XorAssign: |
6190 | 0 | case BO_MulAssign: |
6191 | 0 | case BO_DivAssign: |
6192 | 0 | case BO_RemAssign: |
6193 | 0 | case BO_ShlAssign: |
6194 | 0 | case BO_ShrAssign: |
6195 | 0 | case BO_Comma: |
6196 | 0 | llvm_unreachable("Unsupported atomic update operation"); |
6197 | 0 | } |
6198 | 0 | llvm::Value *UpdateVal = Update.getScalarVal(); |
6199 | 0 | if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { |
6200 | 0 | if (IsInteger) |
6201 | 0 | UpdateVal = CGF.Builder.CreateIntCast( |
6202 | 0 | IC, X.getAddress(CGF).getElementType(), |
6203 | 0 | X.getType()->hasSignedIntegerRepresentation()); |
6204 | 0 | else |
6205 | 0 | UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, |
6206 | 0 | X.getAddress(CGF).getElementType()); |
6207 | 0 | } |
6208 | 0 | llvm::Value *Res = |
6209 | 0 | CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(CGF), UpdateVal, AO); |
6210 | 0 | return std::make_pair(true, RValue::get(Res)); |
6211 | 0 | } |
6212 | | |
6213 | | std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( |
6214 | | LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, |
6215 | | llvm::AtomicOrdering AO, SourceLocation Loc, |
6216 | 0 | const llvm::function_ref<RValue(RValue)> CommonGen) { |
6217 | | // Update expressions are allowed to have the following forms: |
6218 | | // x binop= expr; -> xrval + expr; |
6219 | | // x++, ++x -> xrval + 1; |
6220 | | // x--, --x -> xrval - 1; |
6221 | | // x = x binop expr; -> xrval binop expr |
6222 | | // x = expr Op x; - > expr binop xrval; |
6223 | 0 | auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); |
6224 | 0 | if (!Res.first) { |
6225 | 0 | if (X.isGlobalReg()) { |
6226 | | // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop |
6227 | | // 'xrval'. |
6228 | 0 | EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); |
6229 | 0 | } else { |
6230 | | // Perform compare-and-swap procedure. |
6231 | 0 | EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); |
6232 | 0 | } |
6233 | 0 | } |
6234 | 0 | return Res; |
6235 | 0 | } |
6236 | | |
6237 | | static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, |
6238 | | llvm::AtomicOrdering AO, const Expr *X, |
6239 | | const Expr *E, const Expr *UE, |
6240 | 0 | bool IsXLHSInRHSPart, SourceLocation Loc) { |
6241 | 0 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6242 | 0 | "Update expr in 'atomic update' must be a binary operator."); |
6243 | 0 | const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); |
6244 | | // Update expressions are allowed to have the following forms: |
6245 | | // x binop= expr; -> xrval + expr; |
6246 | | // x++, ++x -> xrval + 1; |
6247 | | // x--, --x -> xrval - 1; |
6248 | | // x = x binop expr; -> xrval binop expr |
6249 | | // x = expr Op x; - > expr binop xrval; |
6250 | 0 | assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); |
6251 | 0 | LValue XLValue = CGF.EmitLValue(X); |
6252 | 0 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6253 | 0 | const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); |
6254 | 0 | const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); |
6255 | 0 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6256 | 0 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6257 | 0 | auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { |
6258 | 0 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6259 | 0 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6260 | 0 | return CGF.EmitAnyExpr(UE); |
6261 | 0 | }; |
6262 | 0 | (void)CGF.EmitOMPAtomicSimpleUpdateExpr( |
6263 | 0 | XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); |
6264 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
6265 | | // OpenMP, 2.17.7, atomic Construct |
6266 | | // If the write, update, or capture clause is specified and the release, |
6267 | | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6268 | | // the atomic operation is also a release flush. |
6269 | 0 | switch (AO) { |
6270 | 0 | case llvm::AtomicOrdering::Release: |
6271 | 0 | case llvm::AtomicOrdering::AcquireRelease: |
6272 | 0 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6273 | 0 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6274 | 0 | llvm::AtomicOrdering::Release); |
6275 | 0 | break; |
6276 | 0 | case llvm::AtomicOrdering::Acquire: |
6277 | 0 | case llvm::AtomicOrdering::Monotonic: |
6278 | 0 | break; |
6279 | 0 | case llvm::AtomicOrdering::NotAtomic: |
6280 | 0 | case llvm::AtomicOrdering::Unordered: |
6281 | 0 | llvm_unreachable("Unexpected ordering."); |
6282 | 0 | } |
6283 | 0 | } |
6284 | | |
6285 | | static RValue convertToType(CodeGenFunction &CGF, RValue Value, |
6286 | | QualType SourceType, QualType ResType, |
6287 | 0 | SourceLocation Loc) { |
6288 | 0 | switch (CGF.getEvaluationKind(ResType)) { |
6289 | 0 | case TEK_Scalar: |
6290 | 0 | return RValue::get( |
6291 | 0 | convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); |
6292 | 0 | case TEK_Complex: { |
6293 | 0 | auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); |
6294 | 0 | return RValue::getComplex(Res.first, Res.second); |
6295 | 0 | } |
6296 | 0 | case TEK_Aggregate: |
6297 | 0 | break; |
6298 | 0 | } |
6299 | 0 | llvm_unreachable("Must be a scalar or complex."); |
6300 | 0 | } |
6301 | | |
6302 | | static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, |
6303 | | llvm::AtomicOrdering AO, |
6304 | | bool IsPostfixUpdate, const Expr *V, |
6305 | | const Expr *X, const Expr *E, |
6306 | | const Expr *UE, bool IsXLHSInRHSPart, |
6307 | 0 | SourceLocation Loc) { |
6308 | 0 | assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); |
6309 | 0 | assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); |
6310 | 0 | RValue NewVVal; |
6311 | 0 | LValue VLValue = CGF.EmitLValue(V); |
6312 | 0 | LValue XLValue = CGF.EmitLValue(X); |
6313 | 0 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6314 | 0 | QualType NewVValType; |
6315 | 0 | if (UE) { |
6316 | | // 'x' is updated with some additional value. |
6317 | 0 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6318 | 0 | "Update expr in 'atomic capture' must be a binary operator."); |
6319 | 0 | const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); |
6320 | | // Update expressions are allowed to have the following forms: |
6321 | | // x binop= expr; -> xrval + expr; |
6322 | | // x++, ++x -> xrval + 1; |
6323 | | // x--, --x -> xrval - 1; |
6324 | | // x = x binop expr; -> xrval binop expr |
6325 | | // x = expr Op x; - > expr binop xrval; |
6326 | 0 | const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); |
6327 | 0 | const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); |
6328 | 0 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6329 | 0 | NewVValType = XRValExpr->getType(); |
6330 | 0 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6331 | 0 | auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, |
6332 | 0 | IsPostfixUpdate](RValue XRValue) { |
6333 | 0 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6334 | 0 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6335 | 0 | RValue Res = CGF.EmitAnyExpr(UE); |
6336 | 0 | NewVVal = IsPostfixUpdate ? XRValue : Res; |
6337 | 0 | return Res; |
6338 | 0 | }; |
6339 | 0 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6340 | 0 | XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); |
6341 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
6342 | 0 | if (Res.first) { |
6343 | | // 'atomicrmw' instruction was generated. |
6344 | 0 | if (IsPostfixUpdate) { |
6345 | | // Use old value from 'atomicrmw'. |
6346 | 0 | NewVVal = Res.second; |
6347 | 0 | } else { |
6348 | | // 'atomicrmw' does not provide new value, so evaluate it using old |
6349 | | // value of 'x'. |
6350 | 0 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6351 | 0 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); |
6352 | 0 | NewVVal = CGF.EmitAnyExpr(UE); |
6353 | 0 | } |
6354 | 0 | } |
6355 | 0 | } else { |
6356 | | // 'x' is simply rewritten with some 'expr'. |
6357 | 0 | NewVValType = X->getType().getNonReferenceType(); |
6358 | 0 | ExprRValue = convertToType(CGF, ExprRValue, E->getType(), |
6359 | 0 | X->getType().getNonReferenceType(), Loc); |
6360 | 0 | auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { |
6361 | 0 | NewVVal = XRValue; |
6362 | 0 | return ExprRValue; |
6363 | 0 | }; |
6364 | | // Try to perform atomicrmw xchg, otherwise simple exchange. |
6365 | 0 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6366 | 0 | XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, |
6367 | 0 | Loc, Gen); |
6368 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
6369 | 0 | if (Res.first) { |
6370 | | // 'atomicrmw' instruction was generated. |
6371 | 0 | NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; |
6372 | 0 | } |
6373 | 0 | } |
6374 | | // Emit post-update store to 'v' of old/new 'x' value. |
6375 | 0 | CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); |
6376 | 0 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); |
6377 | | // OpenMP 5.1 removes the required flush for capture clause. |
6378 | 0 | if (CGF.CGM.getLangOpts().OpenMP < 51) { |
6379 | | // OpenMP, 2.17.7, atomic Construct |
6380 | | // If the write, update, or capture clause is specified and the release, |
6381 | | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6382 | | // the atomic operation is also a release flush. |
6383 | | // If the read or capture clause is specified and the acquire, acq_rel, or |
6384 | | // seq_cst clause is specified then the strong flush on exit from the atomic |
6385 | | // operation is also an acquire flush. |
6386 | 0 | switch (AO) { |
6387 | 0 | case llvm::AtomicOrdering::Release: |
6388 | 0 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6389 | 0 | llvm::AtomicOrdering::Release); |
6390 | 0 | break; |
6391 | 0 | case llvm::AtomicOrdering::Acquire: |
6392 | 0 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6393 | 0 | llvm::AtomicOrdering::Acquire); |
6394 | 0 | break; |
6395 | 0 | case llvm::AtomicOrdering::AcquireRelease: |
6396 | 0 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6397 | 0 | CGF.CGM.getOpenMPRuntime().emitFlush( |
6398 | 0 | CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease); |
6399 | 0 | break; |
6400 | 0 | case llvm::AtomicOrdering::Monotonic: |
6401 | 0 | break; |
6402 | 0 | case llvm::AtomicOrdering::NotAtomic: |
6403 | 0 | case llvm::AtomicOrdering::Unordered: |
6404 | 0 | llvm_unreachable("Unexpected ordering."); |
6405 | 0 | } |
6406 | 0 | } |
6407 | 0 | } |
6408 | | |
6409 | | static void emitOMPAtomicCompareExpr( |
6410 | | CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, |
6411 | | const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, |
6412 | | const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, |
6413 | 0 | SourceLocation Loc) { |
6414 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = |
6415 | 0 | CGF.CGM.getOpenMPRuntime().getOMPBuilder(); |
6416 | |
|
6417 | 0 | OMPAtomicCompareOp Op; |
6418 | 0 | assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator"); |
6419 | 0 | switch (cast<BinaryOperator>(CE)->getOpcode()) { |
6420 | 0 | case BO_EQ: |
6421 | 0 | Op = OMPAtomicCompareOp::EQ; |
6422 | 0 | break; |
6423 | 0 | case BO_LT: |
6424 | 0 | Op = OMPAtomicCompareOp::MIN; |
6425 | 0 | break; |
6426 | 0 | case BO_GT: |
6427 | 0 | Op = OMPAtomicCompareOp::MAX; |
6428 | 0 | break; |
6429 | 0 | default: |
6430 | 0 | llvm_unreachable("unsupported atomic compare binary operator"); |
6431 | 0 | } |
6432 | | |
6433 | 0 | LValue XLVal = CGF.EmitLValue(X); |
6434 | 0 | Address XAddr = XLVal.getAddress(CGF); |
6435 | |
|
6436 | 0 | auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { |
6437 | 0 | if (X->getType() == E->getType()) |
6438 | 0 | return CGF.EmitScalarExpr(E); |
6439 | 0 | const Expr *NewE = E->IgnoreImplicitAsWritten(); |
6440 | 0 | llvm::Value *V = CGF.EmitScalarExpr(NewE); |
6441 | 0 | if (NewE->getType() == X->getType()) |
6442 | 0 | return V; |
6443 | 0 | return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc); |
6444 | 0 | }; |
6445 | |
|
6446 | 0 | llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); |
6447 | 0 | llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; |
6448 | 0 | if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal)) |
6449 | 0 | EVal = CGF.Builder.CreateIntCast( |
6450 | 0 | CI, XLVal.getAddress(CGF).getElementType(), |
6451 | 0 | E->getType()->hasSignedIntegerRepresentation()); |
6452 | 0 | if (DVal) |
6453 | 0 | if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal)) |
6454 | 0 | DVal = CGF.Builder.CreateIntCast( |
6455 | 0 | CI, XLVal.getAddress(CGF).getElementType(), |
6456 | 0 | D->getType()->hasSignedIntegerRepresentation()); |
6457 | |
|
6458 | 0 | llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ |
6459 | 0 | XAddr.getPointer(), XAddr.getElementType(), |
6460 | 0 | X->getType()->hasSignedIntegerRepresentation(), |
6461 | 0 | X->getType().isVolatileQualified()}; |
6462 | 0 | llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; |
6463 | 0 | if (V) { |
6464 | 0 | LValue LV = CGF.EmitLValue(V); |
6465 | 0 | Address Addr = LV.getAddress(CGF); |
6466 | 0 | VOpVal = {Addr.getPointer(), Addr.getElementType(), |
6467 | 0 | V->getType()->hasSignedIntegerRepresentation(), |
6468 | 0 | V->getType().isVolatileQualified()}; |
6469 | 0 | } |
6470 | 0 | if (R) { |
6471 | 0 | LValue LV = CGF.EmitLValue(R); |
6472 | 0 | Address Addr = LV.getAddress(CGF); |
6473 | 0 | ROpVal = {Addr.getPointer(), Addr.getElementType(), |
6474 | 0 | R->getType()->hasSignedIntegerRepresentation(), |
6475 | 0 | R->getType().isVolatileQualified()}; |
6476 | 0 | } |
6477 | |
|
6478 | 0 | if (FailAO == llvm::AtomicOrdering::NotAtomic) { |
6479 | | // fail clause was not mentionend on the |
6480 | | // "#pragma omp atomic compare" construct. |
6481 | 0 | CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( |
6482 | 0 | CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, |
6483 | 0 | IsPostfixUpdate, IsFailOnly)); |
6484 | 0 | } else |
6485 | 0 | CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( |
6486 | 0 | CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, |
6487 | 0 | IsPostfixUpdate, IsFailOnly, FailAO)); |
6488 | 0 | } |
6489 | | |
6490 | | static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, |
6491 | | llvm::AtomicOrdering AO, |
6492 | | llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, |
6493 | | const Expr *X, const Expr *V, const Expr *R, |
6494 | | const Expr *E, const Expr *UE, const Expr *D, |
6495 | | const Expr *CE, bool IsXLHSInRHSPart, |
6496 | 0 | bool IsFailOnly, SourceLocation Loc) { |
6497 | 0 | switch (Kind) { |
6498 | 0 | case OMPC_read: |
6499 | 0 | emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); |
6500 | 0 | break; |
6501 | 0 | case OMPC_write: |
6502 | 0 | emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); |
6503 | 0 | break; |
6504 | 0 | case OMPC_unknown: |
6505 | 0 | case OMPC_update: |
6506 | 0 | emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); |
6507 | 0 | break; |
6508 | 0 | case OMPC_capture: |
6509 | 0 | emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, |
6510 | 0 | IsXLHSInRHSPart, Loc); |
6511 | 0 | break; |
6512 | 0 | case OMPC_compare: { |
6513 | 0 | emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE, |
6514 | 0 | IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc); |
6515 | 0 | break; |
6516 | 0 | } |
6517 | 0 | default: |
6518 | 0 | llvm_unreachable("Clause is not allowed in 'omp atomic'."); |
6519 | 0 | } |
6520 | 0 | } |
6521 | | |
6522 | 0 | void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { |
6523 | 0 | llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; |
6524 | | // Fail Memory Clause Ordering. |
6525 | 0 | llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; |
6526 | 0 | bool MemOrderingSpecified = false; |
6527 | 0 | if (S.getSingleClause<OMPSeqCstClause>()) { |
6528 | 0 | AO = llvm::AtomicOrdering::SequentiallyConsistent; |
6529 | 0 | MemOrderingSpecified = true; |
6530 | 0 | } else if (S.getSingleClause<OMPAcqRelClause>()) { |
6531 | 0 | AO = llvm::AtomicOrdering::AcquireRelease; |
6532 | 0 | MemOrderingSpecified = true; |
6533 | 0 | } else if (S.getSingleClause<OMPAcquireClause>()) { |
6534 | 0 | AO = llvm::AtomicOrdering::Acquire; |
6535 | 0 | MemOrderingSpecified = true; |
6536 | 0 | } else if (S.getSingleClause<OMPReleaseClause>()) { |
6537 | 0 | AO = llvm::AtomicOrdering::Release; |
6538 | 0 | MemOrderingSpecified = true; |
6539 | 0 | } else if (S.getSingleClause<OMPRelaxedClause>()) { |
6540 | 0 | AO = llvm::AtomicOrdering::Monotonic; |
6541 | 0 | MemOrderingSpecified = true; |
6542 | 0 | } |
6543 | 0 | llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; |
6544 | 0 | OpenMPClauseKind Kind = OMPC_unknown; |
6545 | 0 | for (const OMPClause *C : S.clauses()) { |
6546 | | // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, |
6547 | | // if it is first). |
6548 | 0 | OpenMPClauseKind K = C->getClauseKind(); |
6549 | 0 | if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || |
6550 | 0 | K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) |
6551 | 0 | continue; |
6552 | 0 | Kind = K; |
6553 | 0 | KindsEncountered.insert(K); |
6554 | 0 | } |
6555 | | // We just need to correct Kind here. No need to set a bool saying it is |
6556 | | // actually compare capture because we can tell from whether V and R are |
6557 | | // nullptr. |
6558 | 0 | if (KindsEncountered.contains(OMPC_compare) && |
6559 | 0 | KindsEncountered.contains(OMPC_capture)) |
6560 | 0 | Kind = OMPC_compare; |
6561 | 0 | if (!MemOrderingSpecified) { |
6562 | 0 | llvm::AtomicOrdering DefaultOrder = |
6563 | 0 | CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); |
6564 | 0 | if (DefaultOrder == llvm::AtomicOrdering::Monotonic || |
6565 | 0 | DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || |
6566 | 0 | (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && |
6567 | 0 | Kind == OMPC_capture)) { |
6568 | 0 | AO = DefaultOrder; |
6569 | 0 | } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { |
6570 | 0 | if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { |
6571 | 0 | AO = llvm::AtomicOrdering::Release; |
6572 | 0 | } else if (Kind == OMPC_read) { |
6573 | 0 | assert(Kind == OMPC_read && "Unexpected atomic kind."); |
6574 | 0 | AO = llvm::AtomicOrdering::Acquire; |
6575 | 0 | } |
6576 | 0 | } |
6577 | 0 | } |
6578 | | |
6579 | 0 | if (KindsEncountered.contains(OMPC_compare) && |
6580 | 0 | KindsEncountered.contains(OMPC_fail)) { |
6581 | 0 | Kind = OMPC_compare; |
6582 | 0 | const auto *FailClause = S.getSingleClause<OMPFailClause>(); |
6583 | 0 | if (FailClause) { |
6584 | 0 | OpenMPClauseKind FailParameter = FailClause->getFailParameter(); |
6585 | 0 | if (FailParameter == llvm::omp::OMPC_relaxed) |
6586 | 0 | FailAO = llvm::AtomicOrdering::Monotonic; |
6587 | 0 | else if (FailParameter == llvm::omp::OMPC_acquire) |
6588 | 0 | FailAO = llvm::AtomicOrdering::Acquire; |
6589 | 0 | else if (FailParameter == llvm::omp::OMPC_seq_cst) |
6590 | 0 | FailAO = llvm::AtomicOrdering::SequentiallyConsistent; |
6591 | 0 | } |
6592 | 0 | } |
6593 | |
|
6594 | 0 | LexicalScope Scope(*this, S.getSourceRange()); |
6595 | 0 | EmitStopPoint(S.getAssociatedStmt()); |
6596 | 0 | emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(), |
6597 | 0 | S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(), |
6598 | 0 | S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(), |
6599 | 0 | S.isFailOnly(), S.getBeginLoc()); |
6600 | 0 | } |
6601 | | |
6602 | | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
6603 | | const OMPExecutableDirective &S, |
6604 | 0 | const RegionCodeGenTy &CodeGen) { |
6605 | 0 | assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); |
6606 | 0 | CodeGenModule &CGM = CGF.CGM; |
6607 | | |
6608 | | // On device emit this construct as inlined code. |
6609 | 0 | if (CGM.getLangOpts().OpenMPIsTargetDevice) { |
6610 | 0 | OMPLexicalScope Scope(CGF, S, OMPD_target); |
6611 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective( |
6612 | 0 | CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6613 | 0 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
6614 | 0 | }); |
6615 | 0 | return; |
6616 | 0 | } |
6617 | | |
6618 | 0 | auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); |
6619 | 0 | llvm::Function *Fn = nullptr; |
6620 | 0 | llvm::Constant *FnID = nullptr; |
6621 | |
|
6622 | 0 | const Expr *IfCond = nullptr; |
6623 | | // Check for the at most one if clause associated with the target region. |
6624 | 0 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
6625 | 0 | if (C->getNameModifier() == OMPD_unknown || |
6626 | 0 | C->getNameModifier() == OMPD_target) { |
6627 | 0 | IfCond = C->getCondition(); |
6628 | 0 | break; |
6629 | 0 | } |
6630 | 0 | } |
6631 | | |
6632 | | // Check if we have any device clause associated with the directive. |
6633 | 0 | llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( |
6634 | 0 | nullptr, OMPC_DEVICE_unknown); |
6635 | 0 | if (auto *C = S.getSingleClause<OMPDeviceClause>()) |
6636 | 0 | Device.setPointerAndInt(C->getDevice(), C->getModifier()); |
6637 | | |
6638 | | // Check if we have an if clause whose conditional always evaluates to false |
6639 | | // or if we do not have any targets specified. If so the target region is not |
6640 | | // an offload entry point. |
6641 | 0 | bool IsOffloadEntry = true; |
6642 | 0 | if (IfCond) { |
6643 | 0 | bool Val; |
6644 | 0 | if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) |
6645 | 0 | IsOffloadEntry = false; |
6646 | 0 | } |
6647 | 0 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
6648 | 0 | IsOffloadEntry = false; |
6649 | |
|
6650 | 0 | if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { |
6651 | 0 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
6652 | 0 | DiagnosticsEngine::Error, |
6653 | 0 | "No offloading entry generated while offloading is mandatory."); |
6654 | 0 | CGM.getDiags().Report(DiagID); |
6655 | 0 | } |
6656 | |
|
6657 | 0 | assert(CGF.CurFuncDecl && "No parent declaration for target region!"); |
6658 | 0 | StringRef ParentName; |
6659 | | // In case we have Ctors/Dtors we use the complete type variant to produce |
6660 | | // the mangling of the device outlined kernel. |
6661 | 0 | if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) |
6662 | 0 | ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); |
6663 | 0 | else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) |
6664 | 0 | ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); |
6665 | 0 | else |
6666 | 0 | ParentName = |
6667 | 0 | CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); |
6668 | | |
6669 | | // Emit target region as a standalone region. |
6670 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, |
6671 | 0 | IsOffloadEntry, CodeGen); |
6672 | 0 | OMPLexicalScope Scope(CGF, S, OMPD_task); |
6673 | 0 | auto &&SizeEmitter = |
6674 | 0 | [IsOffloadEntry](CodeGenFunction &CGF, |
6675 | 0 | const OMPLoopDirective &D) -> llvm::Value * { |
6676 | 0 | if (IsOffloadEntry) { |
6677 | 0 | OMPLoopScope(CGF, D); |
6678 | | // Emit calculation of the iterations count. |
6679 | 0 | llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); |
6680 | 0 | NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, |
6681 | 0 | /*isSigned=*/false); |
6682 | 0 | return NumIterations; |
6683 | 0 | } |
6684 | 0 | return nullptr; |
6685 | 0 | }; |
6686 | 0 | CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, |
6687 | 0 | SizeEmitter); |
6688 | 0 | } |
6689 | | |
6690 | | static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, |
6691 | 0 | PrePostActionTy &Action) { |
6692 | 0 | Action.Enter(CGF); |
6693 | 0 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6694 | 0 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6695 | 0 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6696 | 0 | (void)PrivateScope.Privatize(); |
6697 | 0 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
6698 | 0 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
6699 | |
|
6700 | 0 | CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); |
6701 | 0 | CGF.EnsureInsertPoint(); |
6702 | 0 | } |
6703 | | |
6704 | | void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, |
6705 | | StringRef ParentName, |
6706 | 0 | const OMPTargetDirective &S) { |
6707 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6708 | 0 | emitTargetRegion(CGF, S, Action); |
6709 | 0 | }; |
6710 | 0 | llvm::Function *Fn; |
6711 | 0 | llvm::Constant *Addr; |
6712 | | // Emit target region as a standalone region. |
6713 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6714 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6715 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
6716 | 0 | } |
6717 | | |
6718 | 0 | void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { |
6719 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6720 | 0 | emitTargetRegion(CGF, S, Action); |
6721 | 0 | }; |
6722 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6723 | 0 | } |
6724 | | |
6725 | | static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, |
6726 | | const OMPExecutableDirective &S, |
6727 | | OpenMPDirectiveKind InnermostKind, |
6728 | 0 | const RegionCodeGenTy &CodeGen) { |
6729 | 0 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); |
6730 | 0 | llvm::Function *OutlinedFn = |
6731 | 0 | CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( |
6732 | 0 | CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, |
6733 | 0 | CodeGen); |
6734 | |
|
6735 | 0 | const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); |
6736 | 0 | const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
6737 | 0 | if (NT || TL) { |
6738 | 0 | const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; |
6739 | 0 | const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; |
6740 | |
|
6741 | 0 | CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, |
6742 | 0 | S.getBeginLoc()); |
6743 | 0 | } |
6744 | |
|
6745 | 0 | OMPTeamsScope Scope(CGF, S); |
6746 | 0 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
6747 | 0 | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
6748 | 0 | CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, |
6749 | 0 | CapturedVars); |
6750 | 0 | } |
6751 | | |
6752 | 0 | void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { |
6753 | | // Emit teams region as a standalone region. |
6754 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6755 | 0 | Action.Enter(CGF); |
6756 | 0 | OMPPrivateScope PrivateScope(CGF); |
6757 | 0 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6758 | 0 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6759 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6760 | 0 | (void)PrivateScope.Privatize(); |
6761 | 0 | CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); |
6762 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6763 | 0 | }; |
6764 | 0 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); |
6765 | 0 | emitPostUpdateForReductionClause(*this, S, |
6766 | 0 | [](CodeGenFunction &) { return nullptr; }); |
6767 | 0 | } |
6768 | | |
6769 | | static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
6770 | 0 | const OMPTargetTeamsDirective &S) { |
6771 | 0 | auto *CS = S.getCapturedStmt(OMPD_teams); |
6772 | 0 | Action.Enter(CGF); |
6773 | | // Emit teams region as a standalone region. |
6774 | 0 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6775 | 0 | Action.Enter(CGF); |
6776 | 0 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6777 | 0 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6778 | 0 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6779 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6780 | 0 | (void)PrivateScope.Privatize(); |
6781 | 0 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
6782 | 0 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
6783 | 0 | CGF.EmitStmt(CS->getCapturedStmt()); |
6784 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6785 | 0 | }; |
6786 | 0 | emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); |
6787 | 0 | emitPostUpdateForReductionClause(CGF, S, |
6788 | 0 | [](CodeGenFunction &) { return nullptr; }); |
6789 | 0 | } |
6790 | | |
6791 | | void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( |
6792 | | CodeGenModule &CGM, StringRef ParentName, |
6793 | 0 | const OMPTargetTeamsDirective &S) { |
6794 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6795 | 0 | emitTargetTeamsRegion(CGF, Action, S); |
6796 | 0 | }; |
6797 | 0 | llvm::Function *Fn; |
6798 | 0 | llvm::Constant *Addr; |
6799 | | // Emit target region as a standalone region. |
6800 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6801 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6802 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
6803 | 0 | } |
6804 | | |
6805 | | void CodeGenFunction::EmitOMPTargetTeamsDirective( |
6806 | 0 | const OMPTargetTeamsDirective &S) { |
6807 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6808 | 0 | emitTargetTeamsRegion(CGF, Action, S); |
6809 | 0 | }; |
6810 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6811 | 0 | } |
6812 | | |
6813 | | static void |
6814 | | emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
6815 | 0 | const OMPTargetTeamsDistributeDirective &S) { |
6816 | 0 | Action.Enter(CGF); |
6817 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6818 | 0 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
6819 | 0 | }; |
6820 | | |
6821 | | // Emit teams region as a standalone region. |
6822 | 0 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6823 | 0 | PrePostActionTy &Action) { |
6824 | 0 | Action.Enter(CGF); |
6825 | 0 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6826 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6827 | 0 | (void)PrivateScope.Privatize(); |
6828 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6829 | 0 | CodeGenDistribute); |
6830 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6831 | 0 | }; |
6832 | 0 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); |
6833 | 0 | emitPostUpdateForReductionClause(CGF, S, |
6834 | 0 | [](CodeGenFunction &) { return nullptr; }); |
6835 | 0 | } |
6836 | | |
6837 | | void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( |
6838 | | CodeGenModule &CGM, StringRef ParentName, |
6839 | 0 | const OMPTargetTeamsDistributeDirective &S) { |
6840 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6841 | 0 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
6842 | 0 | }; |
6843 | 0 | llvm::Function *Fn; |
6844 | 0 | llvm::Constant *Addr; |
6845 | | // Emit target region as a standalone region. |
6846 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6847 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6848 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
6849 | 0 | } |
6850 | | |
6851 | | void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( |
6852 | 0 | const OMPTargetTeamsDistributeDirective &S) { |
6853 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6854 | 0 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
6855 | 0 | }; |
6856 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6857 | 0 | } |
6858 | | |
6859 | | static void emitTargetTeamsDistributeSimdRegion( |
6860 | | CodeGenFunction &CGF, PrePostActionTy &Action, |
6861 | 0 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6862 | 0 | Action.Enter(CGF); |
6863 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6864 | 0 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
6865 | 0 | }; |
6866 | | |
6867 | | // Emit teams region as a standalone region. |
6868 | 0 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6869 | 0 | PrePostActionTy &Action) { |
6870 | 0 | Action.Enter(CGF); |
6871 | 0 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6872 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6873 | 0 | (void)PrivateScope.Privatize(); |
6874 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6875 | 0 | CodeGenDistribute); |
6876 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6877 | 0 | }; |
6878 | 0 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); |
6879 | 0 | emitPostUpdateForReductionClause(CGF, S, |
6880 | 0 | [](CodeGenFunction &) { return nullptr; }); |
6881 | 0 | } |
6882 | | |
6883 | | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( |
6884 | | CodeGenModule &CGM, StringRef ParentName, |
6885 | 0 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6886 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6887 | 0 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
6888 | 0 | }; |
6889 | 0 | llvm::Function *Fn; |
6890 | 0 | llvm::Constant *Addr; |
6891 | | // Emit target region as a standalone region. |
6892 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6893 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6894 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
6895 | 0 | } |
6896 | | |
6897 | | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( |
6898 | 0 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6899 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6900 | 0 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
6901 | 0 | }; |
6902 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6903 | 0 | } |
6904 | | |
6905 | | void CodeGenFunction::EmitOMPTeamsDistributeDirective( |
6906 | 0 | const OMPTeamsDistributeDirective &S) { |
6907 | |
|
6908 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6909 | 0 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
6910 | 0 | }; |
6911 | | |
6912 | | // Emit teams region as a standalone region. |
6913 | 0 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6914 | 0 | PrePostActionTy &Action) { |
6915 | 0 | Action.Enter(CGF); |
6916 | 0 | OMPPrivateScope PrivateScope(CGF); |
6917 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6918 | 0 | (void)PrivateScope.Privatize(); |
6919 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6920 | 0 | CodeGenDistribute); |
6921 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6922 | 0 | }; |
6923 | 0 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); |
6924 | 0 | emitPostUpdateForReductionClause(*this, S, |
6925 | 0 | [](CodeGenFunction &) { return nullptr; }); |
6926 | 0 | } |
6927 | | |
6928 | | void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( |
6929 | 0 | const OMPTeamsDistributeSimdDirective &S) { |
6930 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6931 | 0 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
6932 | 0 | }; |
6933 | | |
6934 | | // Emit teams region as a standalone region. |
6935 | 0 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6936 | 0 | PrePostActionTy &Action) { |
6937 | 0 | Action.Enter(CGF); |
6938 | 0 | OMPPrivateScope PrivateScope(CGF); |
6939 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6940 | 0 | (void)PrivateScope.Privatize(); |
6941 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, |
6942 | 0 | CodeGenDistribute); |
6943 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6944 | 0 | }; |
6945 | 0 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); |
6946 | 0 | emitPostUpdateForReductionClause(*this, S, |
6947 | 0 | [](CodeGenFunction &) { return nullptr; }); |
6948 | 0 | } |
6949 | | |
6950 | | void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( |
6951 | 0 | const OMPTeamsDistributeParallelForDirective &S) { |
6952 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6953 | 0 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
6954 | 0 | S.getDistInc()); |
6955 | 0 | }; |
6956 | | |
6957 | | // Emit teams region as a standalone region. |
6958 | 0 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6959 | 0 | PrePostActionTy &Action) { |
6960 | 0 | Action.Enter(CGF); |
6961 | 0 | OMPPrivateScope PrivateScope(CGF); |
6962 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6963 | 0 | (void)PrivateScope.Privatize(); |
6964 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6965 | 0 | CodeGenDistribute); |
6966 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6967 | 0 | }; |
6968 | 0 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); |
6969 | 0 | emitPostUpdateForReductionClause(*this, S, |
6970 | 0 | [](CodeGenFunction &) { return nullptr; }); |
6971 | 0 | } |
6972 | | |
6973 | | void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( |
6974 | 0 | const OMPTeamsDistributeParallelForSimdDirective &S) { |
6975 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6976 | 0 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
6977 | 0 | S.getDistInc()); |
6978 | 0 | }; |
6979 | | |
6980 | | // Emit teams region as a standalone region. |
6981 | 0 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6982 | 0 | PrePostActionTy &Action) { |
6983 | 0 | Action.Enter(CGF); |
6984 | 0 | OMPPrivateScope PrivateScope(CGF); |
6985 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6986 | 0 | (void)PrivateScope.Privatize(); |
6987 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
6988 | 0 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
6989 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6990 | 0 | }; |
6991 | 0 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, |
6992 | 0 | CodeGen); |
6993 | 0 | emitPostUpdateForReductionClause(*this, S, |
6994 | 0 | [](CodeGenFunction &) { return nullptr; }); |
6995 | 0 | } |
6996 | | |
6997 | 0 | void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { |
6998 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
6999 | 0 | llvm::Value *Device = nullptr; |
7000 | 0 | llvm::Value *NumDependences = nullptr; |
7001 | 0 | llvm::Value *DependenceList = nullptr; |
7002 | |
|
7003 | 0 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7004 | 0 | Device = EmitScalarExpr(C->getDevice()); |
7005 | | |
7006 | | // Build list and emit dependences |
7007 | 0 | OMPTaskDataTy Data; |
7008 | 0 | buildDependences(S, Data); |
7009 | 0 | if (!Data.Dependences.empty()) { |
7010 | 0 | Address DependenciesArray = Address::invalid(); |
7011 | 0 | std::tie(NumDependences, DependenciesArray) = |
7012 | 0 | CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences, |
7013 | 0 | S.getBeginLoc()); |
7014 | 0 | DependenceList = DependenciesArray.getPointer(); |
7015 | 0 | } |
7016 | 0 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
7017 | |
|
7018 | 0 | assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || |
7019 | 0 | S.getSingleClause<OMPDestroyClause>() || |
7020 | 0 | S.getSingleClause<OMPUseClause>())) && |
7021 | 0 | "OMPNowaitClause clause is used separately in OMPInteropDirective."); |
7022 | | |
7023 | 0 | if (const auto *C = S.getSingleClause<OMPInitClause>()) { |
7024 | 0 | llvm::Value *InteropvarPtr = |
7025 | 0 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7026 | 0 | llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown; |
7027 | 0 | if (C->getIsTarget()) { |
7028 | 0 | InteropType = llvm::omp::OMPInteropType::Target; |
7029 | 0 | } else { |
7030 | 0 | assert(C->getIsTargetSync() && "Expected interop-type target/targetsync"); |
7031 | 0 | InteropType = llvm::omp::OMPInteropType::TargetSync; |
7032 | 0 | } |
7033 | 0 | OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device, |
7034 | 0 | NumDependences, DependenceList, |
7035 | 0 | Data.HasNowaitClause); |
7036 | 0 | } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) { |
7037 | 0 | llvm::Value *InteropvarPtr = |
7038 | 0 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7039 | 0 | OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, |
7040 | 0 | NumDependences, DependenceList, |
7041 | 0 | Data.HasNowaitClause); |
7042 | 0 | } else if (const auto *C = S.getSingleClause<OMPUseClause>()) { |
7043 | 0 | llvm::Value *InteropvarPtr = |
7044 | 0 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7045 | 0 | OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, |
7046 | 0 | NumDependences, DependenceList, |
7047 | 0 | Data.HasNowaitClause); |
7048 | 0 | } |
7049 | 0 | } |
7050 | | |
7051 | | static void emitTargetTeamsDistributeParallelForRegion( |
7052 | | CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, |
7053 | 0 | PrePostActionTy &Action) { |
7054 | 0 | Action.Enter(CGF); |
7055 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7056 | 0 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
7057 | 0 | S.getDistInc()); |
7058 | 0 | }; |
7059 | | |
7060 | | // Emit teams region as a standalone region. |
7061 | 0 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7062 | 0 | PrePostActionTy &Action) { |
7063 | 0 | Action.Enter(CGF); |
7064 | 0 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7065 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7066 | 0 | (void)PrivateScope.Privatize(); |
7067 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7068 | 0 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7069 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7070 | 0 | }; |
7071 | |
|
7072 | 0 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, |
7073 | 0 | CodeGenTeams); |
7074 | 0 | emitPostUpdateForReductionClause(CGF, S, |
7075 | 0 | [](CodeGenFunction &) { return nullptr; }); |
7076 | 0 | } |
7077 | | |
7078 | | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( |
7079 | | CodeGenModule &CGM, StringRef ParentName, |
7080 | 0 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7081 | | // Emit SPMD target teams distribute parallel for region as a standalone |
7082 | | // region. |
7083 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7084 | 0 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7085 | 0 | }; |
7086 | 0 | llvm::Function *Fn; |
7087 | 0 | llvm::Constant *Addr; |
7088 | | // Emit target region as a standalone region. |
7089 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7090 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7091 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
7092 | 0 | } |
7093 | | |
7094 | | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( |
7095 | 0 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7096 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7097 | 0 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7098 | 0 | }; |
7099 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7100 | 0 | } |
7101 | | |
7102 | | static void emitTargetTeamsDistributeParallelForSimdRegion( |
7103 | | CodeGenFunction &CGF, |
7104 | | const OMPTargetTeamsDistributeParallelForSimdDirective &S, |
7105 | 0 | PrePostActionTy &Action) { |
7106 | 0 | Action.Enter(CGF); |
7107 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7108 | 0 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
7109 | 0 | S.getDistInc()); |
7110 | 0 | }; |
7111 | | |
7112 | | // Emit teams region as a standalone region. |
7113 | 0 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7114 | 0 | PrePostActionTy &Action) { |
7115 | 0 | Action.Enter(CGF); |
7116 | 0 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7117 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7118 | 0 | (void)PrivateScope.Privatize(); |
7119 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7120 | 0 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7121 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7122 | 0 | }; |
7123 | |
|
7124 | 0 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, |
7125 | 0 | CodeGenTeams); |
7126 | 0 | emitPostUpdateForReductionClause(CGF, S, |
7127 | 0 | [](CodeGenFunction &) { return nullptr; }); |
7128 | 0 | } |
7129 | | |
7130 | | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( |
7131 | | CodeGenModule &CGM, StringRef ParentName, |
7132 | 0 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7133 | | // Emit SPMD target teams distribute parallel for simd region as a standalone |
7134 | | // region. |
7135 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7136 | 0 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7137 | 0 | }; |
7138 | 0 | llvm::Function *Fn; |
7139 | 0 | llvm::Constant *Addr; |
7140 | | // Emit target region as a standalone region. |
7141 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7142 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7143 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
7144 | 0 | } |
7145 | | |
7146 | | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( |
7147 | 0 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7148 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7149 | 0 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7150 | 0 | }; |
7151 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7152 | 0 | } |
7153 | | |
7154 | | void CodeGenFunction::EmitOMPCancellationPointDirective( |
7155 | 0 | const OMPCancellationPointDirective &S) { |
7156 | 0 | CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), |
7157 | 0 | S.getCancelRegion()); |
7158 | 0 | } |
7159 | | |
7160 | 0 | void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { |
7161 | 0 | const Expr *IfCond = nullptr; |
7162 | 0 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7163 | 0 | if (C->getNameModifier() == OMPD_unknown || |
7164 | 0 | C->getNameModifier() == OMPD_cancel) { |
7165 | 0 | IfCond = C->getCondition(); |
7166 | 0 | break; |
7167 | 0 | } |
7168 | 0 | } |
7169 | 0 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
7170 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
7171 | | // TODO: This check is necessary as we only generate `omp parallel` through |
7172 | | // the OpenMPIRBuilder for now. |
7173 | 0 | if (S.getCancelRegion() == OMPD_parallel || |
7174 | 0 | S.getCancelRegion() == OMPD_sections || |
7175 | 0 | S.getCancelRegion() == OMPD_section) { |
7176 | 0 | llvm::Value *IfCondition = nullptr; |
7177 | 0 | if (IfCond) |
7178 | 0 | IfCondition = EmitScalarExpr(IfCond, |
7179 | 0 | /*IgnoreResultAssign=*/true); |
7180 | 0 | return Builder.restoreIP( |
7181 | 0 | OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); |
7182 | 0 | } |
7183 | 0 | } |
7184 | | |
7185 | 0 | CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, |
7186 | 0 | S.getCancelRegion()); |
7187 | 0 | } |
7188 | | |
7189 | | CodeGenFunction::JumpDest |
7190 | 0 | CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { |
7191 | 0 | if (Kind == OMPD_parallel || Kind == OMPD_task || |
7192 | 0 | Kind == OMPD_target_parallel || Kind == OMPD_taskloop || |
7193 | 0 | Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) |
7194 | 0 | return ReturnBlock; |
7195 | 0 | assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || |
7196 | 0 | Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || |
7197 | 0 | Kind == OMPD_distribute_parallel_for || |
7198 | 0 | Kind == OMPD_target_parallel_for || |
7199 | 0 | Kind == OMPD_teams_distribute_parallel_for || |
7200 | 0 | Kind == OMPD_target_teams_distribute_parallel_for); |
7201 | 0 | return OMPCancelStack.getExitBlock(); |
7202 | 0 | } |
7203 | | |
7204 | | void CodeGenFunction::EmitOMPUseDevicePtrClause( |
7205 | | const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, |
7206 | | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7207 | 0 | CaptureDeviceAddrMap) { |
7208 | 0 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7209 | 0 | for (const Expr *OrigVarIt : C.varlists()) { |
7210 | 0 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl()); |
7211 | 0 | if (!Processed.insert(OrigVD).second) |
7212 | 0 | continue; |
7213 | | |
7214 | | // In order to identify the right initializer we need to match the |
7215 | | // declaration used by the mapping logic. In some cases we may get |
7216 | | // OMPCapturedExprDecl that refers to the original declaration. |
7217 | 0 | const ValueDecl *MatchingVD = OrigVD; |
7218 | 0 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { |
7219 | | // OMPCapturedExprDecl are used to privative fields of the current |
7220 | | // structure. |
7221 | 0 | const auto *ME = cast<MemberExpr>(OED->getInit()); |
7222 | 0 | assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && |
7223 | 0 | "Base should be the current struct!"); |
7224 | 0 | MatchingVD = ME->getMemberDecl(); |
7225 | 0 | } |
7226 | | |
7227 | | // If we don't have information about the current list item, move on to |
7228 | | // the next one. |
7229 | 0 | auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); |
7230 | 0 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7231 | 0 | continue; |
7232 | | |
7233 | 0 | llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); |
7234 | | |
7235 | | // Return the address of the private variable. |
7236 | 0 | bool IsRegistered = PrivateScope.addPrivate( |
7237 | 0 | OrigVD, |
7238 | 0 | Address(InitAddrIt->second, Ty, |
7239 | 0 | getContext().getTypeAlignInChars(getContext().VoidPtrTy))); |
7240 | 0 | assert(IsRegistered && "firstprivate var already registered as private"); |
7241 | | // Silence the warning about unused variable. |
7242 | 0 | (void)IsRegistered; |
7243 | 0 | } |
7244 | 0 | } |
7245 | | |
7246 | 0 | static const VarDecl *getBaseDecl(const Expr *Ref) { |
7247 | 0 | const Expr *Base = Ref->IgnoreParenImpCasts(); |
7248 | 0 | while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) |
7249 | 0 | Base = OASE->getBase()->IgnoreParenImpCasts(); |
7250 | 0 | while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) |
7251 | 0 | Base = ASE->getBase()->IgnoreParenImpCasts(); |
7252 | 0 | return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); |
7253 | 0 | } |
7254 | | |
7255 | | void CodeGenFunction::EmitOMPUseDeviceAddrClause( |
7256 | | const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, |
7257 | | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7258 | 0 | CaptureDeviceAddrMap) { |
7259 | 0 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7260 | 0 | for (const Expr *Ref : C.varlists()) { |
7261 | 0 | const VarDecl *OrigVD = getBaseDecl(Ref); |
7262 | 0 | if (!Processed.insert(OrigVD).second) |
7263 | 0 | continue; |
7264 | | // In order to identify the right initializer we need to match the |
7265 | | // declaration used by the mapping logic. In some cases we may get |
7266 | | // OMPCapturedExprDecl that refers to the original declaration. |
7267 | 0 | const ValueDecl *MatchingVD = OrigVD; |
7268 | 0 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { |
7269 | | // OMPCapturedExprDecl are used to privative fields of the current |
7270 | | // structure. |
7271 | 0 | const auto *ME = cast<MemberExpr>(OED->getInit()); |
7272 | 0 | assert(isa<CXXThisExpr>(ME->getBase()) && |
7273 | 0 | "Base should be the current struct!"); |
7274 | 0 | MatchingVD = ME->getMemberDecl(); |
7275 | 0 | } |
7276 | | |
7277 | | // If we don't have information about the current list item, move on to |
7278 | | // the next one. |
7279 | 0 | auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); |
7280 | 0 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7281 | 0 | continue; |
7282 | | |
7283 | 0 | llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); |
7284 | |
|
7285 | 0 | Address PrivAddr = |
7286 | 0 | Address(InitAddrIt->second, Ty, |
7287 | 0 | getContext().getTypeAlignInChars(getContext().VoidPtrTy)); |
7288 | | // For declrefs and variable length array need to load the pointer for |
7289 | | // correct mapping, since the pointer to the data was passed to the runtime. |
7290 | 0 | if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || |
7291 | 0 | MatchingVD->getType()->isArrayType()) { |
7292 | 0 | QualType PtrTy = getContext().getPointerType( |
7293 | 0 | OrigVD->getType().getNonReferenceType()); |
7294 | 0 | PrivAddr = |
7295 | 0 | EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)), |
7296 | 0 | PtrTy->castAs<PointerType>()); |
7297 | 0 | } |
7298 | |
|
7299 | 0 | (void)PrivateScope.addPrivate(OrigVD, PrivAddr); |
7300 | 0 | } |
7301 | 0 | } |
7302 | | |
7303 | | // Generate the instructions for '#pragma omp target data' directive. |
7304 | | void CodeGenFunction::EmitOMPTargetDataDirective( |
7305 | 0 | const OMPTargetDataDirective &S) { |
7306 | 0 | CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, |
7307 | 0 | /*SeparateBeginEndCalls=*/true); |
7308 | | |
7309 | | // Create a pre/post action to signal the privatization of the device pointer. |
7310 | | // This action can be replaced by the OpenMP runtime code generation to |
7311 | | // deactivate privatization. |
7312 | 0 | bool PrivatizeDevicePointers = false; |
7313 | 0 | class DevicePointerPrivActionTy : public PrePostActionTy { |
7314 | 0 | bool &PrivatizeDevicePointers; |
7315 | |
|
7316 | 0 | public: |
7317 | 0 | explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) |
7318 | 0 | : PrivatizeDevicePointers(PrivatizeDevicePointers) {} |
7319 | 0 | void Enter(CodeGenFunction &CGF) override { |
7320 | 0 | PrivatizeDevicePointers = true; |
7321 | 0 | } |
7322 | 0 | }; |
7323 | 0 | DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); |
7324 | |
|
7325 | 0 | auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7326 | 0 | auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7327 | 0 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
7328 | 0 | }; |
7329 | | |
7330 | | // Codegen that selects whether to generate the privatization code or not. |
7331 | 0 | auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7332 | 0 | RegionCodeGenTy RCG(InnermostCodeGen); |
7333 | 0 | PrivatizeDevicePointers = false; |
7334 | | |
7335 | | // Call the pre-action to change the status of PrivatizeDevicePointers if |
7336 | | // needed. |
7337 | 0 | Action.Enter(CGF); |
7338 | |
|
7339 | 0 | if (PrivatizeDevicePointers) { |
7340 | 0 | OMPPrivateScope PrivateScope(CGF); |
7341 | | // Emit all instances of the use_device_ptr clause. |
7342 | 0 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7343 | 0 | CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, |
7344 | 0 | Info.CaptureDeviceAddrMap); |
7345 | 0 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7346 | 0 | CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, |
7347 | 0 | Info.CaptureDeviceAddrMap); |
7348 | 0 | (void)PrivateScope.Privatize(); |
7349 | 0 | RCG(CGF); |
7350 | 0 | } else { |
7351 | | // If we don't have target devices, don't bother emitting the data |
7352 | | // mapping code. |
7353 | 0 | std::optional<OpenMPDirectiveKind> CaptureRegion; |
7354 | 0 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7355 | | // Emit helper decls of the use_device_ptr/use_device_addr clauses. |
7356 | 0 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7357 | 0 | for (const Expr *E : C->varlists()) { |
7358 | 0 | const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
7359 | 0 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
7360 | 0 | CGF.EmitVarDecl(*OED); |
7361 | 0 | } |
7362 | 0 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7363 | 0 | for (const Expr *E : C->varlists()) { |
7364 | 0 | const Decl *D = getBaseDecl(E); |
7365 | 0 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
7366 | 0 | CGF.EmitVarDecl(*OED); |
7367 | 0 | } |
7368 | 0 | } else { |
7369 | 0 | CaptureRegion = OMPD_unknown; |
7370 | 0 | } |
7371 | |
|
7372 | 0 | OMPLexicalScope Scope(CGF, S, CaptureRegion); |
7373 | 0 | RCG(CGF); |
7374 | 0 | } |
7375 | 0 | }; |
7376 | | |
7377 | | // Forward the provided action to the privatization codegen. |
7378 | 0 | RegionCodeGenTy PrivRCG(PrivCodeGen); |
7379 | 0 | PrivRCG.setAction(Action); |
7380 | | |
7381 | | // Notwithstanding the body of the region is emitted as inlined directive, |
7382 | | // we don't use an inline scope as changes in the references inside the |
7383 | | // region are expected to be visible outside, so we do not privative them. |
7384 | 0 | OMPLexicalScope Scope(CGF, S); |
7385 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, |
7386 | 0 | PrivRCG); |
7387 | 0 | }; |
7388 | |
|
7389 | 0 | RegionCodeGenTy RCG(CodeGen); |
7390 | | |
7391 | | // If we don't have target devices, don't bother emitting the data mapping |
7392 | | // code. |
7393 | 0 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7394 | 0 | RCG(*this); |
7395 | 0 | return; |
7396 | 0 | } |
7397 | | |
7398 | | // Check if we have any if clause associated with the directive. |
7399 | 0 | const Expr *IfCond = nullptr; |
7400 | 0 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7401 | 0 | IfCond = C->getCondition(); |
7402 | | |
7403 | | // Check if we have any device clause associated with the directive. |
7404 | 0 | const Expr *Device = nullptr; |
7405 | 0 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7406 | 0 | Device = C->getDevice(); |
7407 | | |
7408 | | // Set the action to signal privatization of device pointers. |
7409 | 0 | RCG.setAction(PrivAction); |
7410 | | |
7411 | | // Emit region code. |
7412 | 0 | CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, |
7413 | 0 | Info); |
7414 | 0 | } |
7415 | | |
7416 | | void CodeGenFunction::EmitOMPTargetEnterDataDirective( |
7417 | 0 | const OMPTargetEnterDataDirective &S) { |
7418 | | // If we don't have target devices, don't bother emitting the data mapping |
7419 | | // code. |
7420 | 0 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7421 | 0 | return; |
7422 | | |
7423 | | // Check if we have any if clause associated with the directive. |
7424 | 0 | const Expr *IfCond = nullptr; |
7425 | 0 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7426 | 0 | IfCond = C->getCondition(); |
7427 | | |
7428 | | // Check if we have any device clause associated with the directive. |
7429 | 0 | const Expr *Device = nullptr; |
7430 | 0 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7431 | 0 | Device = C->getDevice(); |
7432 | |
|
7433 | 0 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7434 | 0 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7435 | 0 | } |
7436 | | |
7437 | | void CodeGenFunction::EmitOMPTargetExitDataDirective( |
7438 | 0 | const OMPTargetExitDataDirective &S) { |
7439 | | // If we don't have target devices, don't bother emitting the data mapping |
7440 | | // code. |
7441 | 0 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7442 | 0 | return; |
7443 | | |
7444 | | // Check if we have any if clause associated with the directive. |
7445 | 0 | const Expr *IfCond = nullptr; |
7446 | 0 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7447 | 0 | IfCond = C->getCondition(); |
7448 | | |
7449 | | // Check if we have any device clause associated with the directive. |
7450 | 0 | const Expr *Device = nullptr; |
7451 | 0 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7452 | 0 | Device = C->getDevice(); |
7453 | |
|
7454 | 0 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7455 | 0 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7456 | 0 | } |
7457 | | |
7458 | | static void emitTargetParallelRegion(CodeGenFunction &CGF, |
7459 | | const OMPTargetParallelDirective &S, |
7460 | 0 | PrePostActionTy &Action) { |
7461 | | // Get the captured statement associated with the 'parallel' region. |
7462 | 0 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
7463 | 0 | Action.Enter(CGF); |
7464 | 0 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7465 | 0 | Action.Enter(CGF); |
7466 | 0 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7467 | 0 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
7468 | 0 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
7469 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7470 | 0 | (void)PrivateScope.Privatize(); |
7471 | 0 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
7472 | 0 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
7473 | | // TODO: Add support for clauses. |
7474 | 0 | CGF.EmitStmt(CS->getCapturedStmt()); |
7475 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
7476 | 0 | }; |
7477 | 0 | emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, |
7478 | 0 | emitEmptyBoundParameters); |
7479 | 0 | emitPostUpdateForReductionClause(CGF, S, |
7480 | 0 | [](CodeGenFunction &) { return nullptr; }); |
7481 | 0 | } |
7482 | | |
7483 | | void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( |
7484 | | CodeGenModule &CGM, StringRef ParentName, |
7485 | 0 | const OMPTargetParallelDirective &S) { |
7486 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7487 | 0 | emitTargetParallelRegion(CGF, S, Action); |
7488 | 0 | }; |
7489 | 0 | llvm::Function *Fn; |
7490 | 0 | llvm::Constant *Addr; |
7491 | | // Emit target region as a standalone region. |
7492 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7493 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7494 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
7495 | 0 | } |
7496 | | |
7497 | | void CodeGenFunction::EmitOMPTargetParallelDirective( |
7498 | 0 | const OMPTargetParallelDirective &S) { |
7499 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7500 | 0 | emitTargetParallelRegion(CGF, S, Action); |
7501 | 0 | }; |
7502 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7503 | 0 | } |
7504 | | |
7505 | | static void emitTargetParallelForRegion(CodeGenFunction &CGF, |
7506 | | const OMPTargetParallelForDirective &S, |
7507 | 0 | PrePostActionTy &Action) { |
7508 | 0 | Action.Enter(CGF); |
7509 | | // Emit directive as a combined directive that consists of two implicit |
7510 | | // directives: 'parallel' with 'for' directive. |
7511 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7512 | 0 | Action.Enter(CGF); |
7513 | 0 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
7514 | 0 | CGF, OMPD_target_parallel_for, S.hasCancel()); |
7515 | 0 | CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, |
7516 | 0 | emitDispatchForLoopBounds); |
7517 | 0 | }; |
7518 | 0 | emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, |
7519 | 0 | emitEmptyBoundParameters); |
7520 | 0 | } |
7521 | | |
7522 | | void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( |
7523 | | CodeGenModule &CGM, StringRef ParentName, |
7524 | 0 | const OMPTargetParallelForDirective &S) { |
7525 | | // Emit SPMD target parallel for region as a standalone region. |
7526 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7527 | 0 | emitTargetParallelForRegion(CGF, S, Action); |
7528 | 0 | }; |
7529 | 0 | llvm::Function *Fn; |
7530 | 0 | llvm::Constant *Addr; |
7531 | | // Emit target region as a standalone region. |
7532 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7533 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7534 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
7535 | 0 | } |
7536 | | |
7537 | | void CodeGenFunction::EmitOMPTargetParallelForDirective( |
7538 | 0 | const OMPTargetParallelForDirective &S) { |
7539 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7540 | 0 | emitTargetParallelForRegion(CGF, S, Action); |
7541 | 0 | }; |
7542 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7543 | 0 | } |
7544 | | |
7545 | | static void |
7546 | | emitTargetParallelForSimdRegion(CodeGenFunction &CGF, |
7547 | | const OMPTargetParallelForSimdDirective &S, |
7548 | 0 | PrePostActionTy &Action) { |
7549 | 0 | Action.Enter(CGF); |
7550 | | // Emit directive as a combined directive that consists of two implicit |
7551 | | // directives: 'parallel' with 'for' directive. |
7552 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7553 | 0 | Action.Enter(CGF); |
7554 | 0 | CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, |
7555 | 0 | emitDispatchForLoopBounds); |
7556 | 0 | }; |
7557 | 0 | emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, |
7558 | 0 | emitEmptyBoundParameters); |
7559 | 0 | } |
7560 | | |
7561 | | void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( |
7562 | | CodeGenModule &CGM, StringRef ParentName, |
7563 | 0 | const OMPTargetParallelForSimdDirective &S) { |
7564 | | // Emit SPMD target parallel for region as a standalone region. |
7565 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7566 | 0 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7567 | 0 | }; |
7568 | 0 | llvm::Function *Fn; |
7569 | 0 | llvm::Constant *Addr; |
7570 | | // Emit target region as a standalone region. |
7571 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7572 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7573 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
7574 | 0 | } |
7575 | | |
7576 | | void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( |
7577 | 0 | const OMPTargetParallelForSimdDirective &S) { |
7578 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7579 | 0 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7580 | 0 | }; |
7581 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7582 | 0 | } |
7583 | | |
7584 | | /// Emit a helper variable and return corresponding lvalue. |
7585 | | static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, |
7586 | | const ImplicitParamDecl *PVD, |
7587 | 0 | CodeGenFunction::OMPPrivateScope &Privates) { |
7588 | 0 | const auto *VDecl = cast<VarDecl>(Helper->getDecl()); |
7589 | 0 | Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD)); |
7590 | 0 | } |
7591 | | |
7592 | 0 | void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { |
7593 | 0 | assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); |
7594 | | // Emit outlined function for task construct. |
7595 | 0 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); |
7596 | 0 | Address CapturedStruct = Address::invalid(); |
7597 | 0 | { |
7598 | 0 | OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7599 | 0 | CapturedStruct = GenerateCapturedStmtArgument(*CS); |
7600 | 0 | } |
7601 | 0 | QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
7602 | 0 | const Expr *IfCond = nullptr; |
7603 | 0 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7604 | 0 | if (C->getNameModifier() == OMPD_unknown || |
7605 | 0 | C->getNameModifier() == OMPD_taskloop) { |
7606 | 0 | IfCond = C->getCondition(); |
7607 | 0 | break; |
7608 | 0 | } |
7609 | 0 | } |
7610 | |
|
7611 | 0 | OMPTaskDataTy Data; |
7612 | | // Check if taskloop must be emitted without taskgroup. |
7613 | 0 | Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); |
7614 | | // TODO: Check if we should emit tied or untied task. |
7615 | 0 | Data.Tied = true; |
7616 | | // Set scheduling for taskloop |
7617 | 0 | if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { |
7618 | | // grainsize clause |
7619 | 0 | Data.Schedule.setInt(/*IntVal=*/false); |
7620 | 0 | Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); |
7621 | 0 | } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { |
7622 | | // num_tasks clause |
7623 | 0 | Data.Schedule.setInt(/*IntVal=*/true); |
7624 | 0 | Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); |
7625 | 0 | } |
7626 | |
|
7627 | 0 | auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { |
7628 | | // if (PreCond) { |
7629 | | // for (IV in 0..LastIteration) BODY; |
7630 | | // <Final counter/linear vars updates>; |
7631 | | // } |
7632 | | // |
7633 | | |
7634 | | // Emit: if (PreCond) - begin. |
7635 | | // If the condition constant folds and can be elided, avoid emitting the |
7636 | | // whole loop. |
7637 | 0 | bool CondConstant; |
7638 | 0 | llvm::BasicBlock *ContBlock = nullptr; |
7639 | 0 | OMPLoopScope PreInitScope(CGF, S); |
7640 | 0 | if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
7641 | 0 | if (!CondConstant) |
7642 | 0 | return; |
7643 | 0 | } else { |
7644 | 0 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); |
7645 | 0 | ContBlock = CGF.createBasicBlock("taskloop.if.end"); |
7646 | 0 | emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, |
7647 | 0 | CGF.getProfileCount(&S)); |
7648 | 0 | CGF.EmitBlock(ThenBlock); |
7649 | 0 | CGF.incrementProfileCounter(&S); |
7650 | 0 | } |
7651 | | |
7652 | 0 | (void)CGF.EmitOMPLinearClauseInit(S); |
7653 | |
|
7654 | 0 | OMPPrivateScope LoopScope(CGF); |
7655 | | // Emit helper vars inits. |
7656 | 0 | enum { LowerBound = 5, UpperBound, Stride, LastIter }; |
7657 | 0 | auto *I = CS->getCapturedDecl()->param_begin(); |
7658 | 0 | auto *LBP = std::next(I, LowerBound); |
7659 | 0 | auto *UBP = std::next(I, UpperBound); |
7660 | 0 | auto *STP = std::next(I, Stride); |
7661 | 0 | auto *LIP = std::next(I, LastIter); |
7662 | 0 | mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, |
7663 | 0 | LoopScope); |
7664 | 0 | mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, |
7665 | 0 | LoopScope); |
7666 | 0 | mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); |
7667 | 0 | mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, |
7668 | 0 | LoopScope); |
7669 | 0 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7670 | 0 | CGF.EmitOMPLinearClause(S, LoopScope); |
7671 | 0 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
7672 | 0 | (void)LoopScope.Privatize(); |
7673 | | // Emit the loop iteration variable. |
7674 | 0 | const Expr *IVExpr = S.getIterationVariable(); |
7675 | 0 | const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); |
7676 | 0 | CGF.EmitVarDecl(*IVDecl); |
7677 | 0 | CGF.EmitIgnoredExpr(S.getInit()); |
7678 | | |
7679 | | // Emit the iterations count variable. |
7680 | | // If it is not a variable, Sema decided to calculate iterations count on |
7681 | | // each iteration (e.g., it is foldable into a constant). |
7682 | 0 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
7683 | 0 | CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
7684 | | // Emit calculation of the iterations count. |
7685 | 0 | CGF.EmitIgnoredExpr(S.getCalcLastIteration()); |
7686 | 0 | } |
7687 | |
|
7688 | 0 | { |
7689 | 0 | OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7690 | 0 | emitCommonSimdLoop( |
7691 | 0 | CGF, S, |
7692 | 0 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7693 | 0 | if (isOpenMPSimdDirective(S.getDirectiveKind())) |
7694 | 0 | CGF.EmitOMPSimdInit(S); |
7695 | 0 | }, |
7696 | 0 | [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
7697 | 0 | CGF.EmitOMPInnerLoop( |
7698 | 0 | S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
7699 | 0 | [&S](CodeGenFunction &CGF) { |
7700 | 0 | emitOMPLoopBodyWithStopPoint(CGF, S, |
7701 | 0 | CodeGenFunction::JumpDest()); |
7702 | 0 | }, |
7703 | 0 | [](CodeGenFunction &) {}); |
7704 | 0 | }); |
7705 | 0 | } |
7706 | | // Emit: if (PreCond) - end. |
7707 | 0 | if (ContBlock) { |
7708 | 0 | CGF.EmitBranch(ContBlock); |
7709 | 0 | CGF.EmitBlock(ContBlock, true); |
7710 | 0 | } |
7711 | | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
7712 | 0 | if (HasLastprivateClause) { |
7713 | 0 | CGF.EmitOMPLastprivateClauseFinal( |
7714 | 0 | S, isOpenMPSimdDirective(S.getDirectiveKind()), |
7715 | 0 | CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( |
7716 | 0 | CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, |
7717 | 0 | (*LIP)->getType(), S.getBeginLoc()))); |
7718 | 0 | } |
7719 | 0 | LoopScope.restoreMap(); |
7720 | 0 | CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { |
7721 | 0 | return CGF.Builder.CreateIsNotNull( |
7722 | 0 | CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, |
7723 | 0 | (*LIP)->getType(), S.getBeginLoc())); |
7724 | 0 | }); |
7725 | 0 | }; |
7726 | 0 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
7727 | 0 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
7728 | 0 | const OMPTaskDataTy &Data) { |
7729 | 0 | auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, |
7730 | 0 | &Data](CodeGenFunction &CGF, PrePostActionTy &) { |
7731 | 0 | OMPLoopScope PreInitScope(CGF, S); |
7732 | 0 | CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, |
7733 | 0 | OutlinedFn, SharedsTy, |
7734 | 0 | CapturedStruct, IfCond, Data); |
7735 | 0 | }; |
7736 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, |
7737 | 0 | CodeGen); |
7738 | 0 | }; |
7739 | 0 | if (Data.Nogroup) { |
7740 | 0 | EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); |
7741 | 0 | } else { |
7742 | 0 | CGM.getOpenMPRuntime().emitTaskgroupRegion( |
7743 | 0 | *this, |
7744 | 0 | [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, |
7745 | 0 | PrePostActionTy &Action) { |
7746 | 0 | Action.Enter(CGF); |
7747 | 0 | CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, |
7748 | 0 | Data); |
7749 | 0 | }, |
7750 | 0 | S.getBeginLoc()); |
7751 | 0 | } |
7752 | 0 | } |
7753 | | |
7754 | 0 | void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { |
7755 | 0 | auto LPCRegion = |
7756 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7757 | 0 | EmitOMPTaskLoopBasedDirective(S); |
7758 | 0 | } |
7759 | | |
7760 | | void CodeGenFunction::EmitOMPTaskLoopSimdDirective( |
7761 | 0 | const OMPTaskLoopSimdDirective &S) { |
7762 | 0 | auto LPCRegion = |
7763 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7764 | 0 | OMPLexicalScope Scope(*this, S); |
7765 | 0 | EmitOMPTaskLoopBasedDirective(S); |
7766 | 0 | } |
7767 | | |
7768 | | void CodeGenFunction::EmitOMPMasterTaskLoopDirective( |
7769 | 0 | const OMPMasterTaskLoopDirective &S) { |
7770 | 0 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7771 | 0 | Action.Enter(CGF); |
7772 | 0 | EmitOMPTaskLoopBasedDirective(S); |
7773 | 0 | }; |
7774 | 0 | auto LPCRegion = |
7775 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7776 | 0 | OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); |
7777 | 0 | CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); |
7778 | 0 | } |
7779 | | |
7780 | | void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( |
7781 | 0 | const OMPMasterTaskLoopSimdDirective &S) { |
7782 | 0 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7783 | 0 | Action.Enter(CGF); |
7784 | 0 | EmitOMPTaskLoopBasedDirective(S); |
7785 | 0 | }; |
7786 | 0 | auto LPCRegion = |
7787 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7788 | 0 | OMPLexicalScope Scope(*this, S); |
7789 | 0 | CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); |
7790 | 0 | } |
7791 | | |
7792 | | void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( |
7793 | 0 | const OMPParallelMasterTaskLoopDirective &S) { |
7794 | 0 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7795 | 0 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
7796 | 0 | PrePostActionTy &Action) { |
7797 | 0 | Action.Enter(CGF); |
7798 | 0 | CGF.EmitOMPTaskLoopBasedDirective(S); |
7799 | 0 | }; |
7800 | 0 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
7801 | 0 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, |
7802 | 0 | S.getBeginLoc()); |
7803 | 0 | }; |
7804 | 0 | auto LPCRegion = |
7805 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7806 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, |
7807 | 0 | emitEmptyBoundParameters); |
7808 | 0 | } |
7809 | | |
7810 | | void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( |
7811 | 0 | const OMPParallelMasterTaskLoopSimdDirective &S) { |
7812 | 0 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7813 | 0 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
7814 | 0 | PrePostActionTy &Action) { |
7815 | 0 | Action.Enter(CGF); |
7816 | 0 | CGF.EmitOMPTaskLoopBasedDirective(S); |
7817 | 0 | }; |
7818 | 0 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
7819 | 0 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, |
7820 | 0 | S.getBeginLoc()); |
7821 | 0 | }; |
7822 | 0 | auto LPCRegion = |
7823 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7824 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, |
7825 | 0 | emitEmptyBoundParameters); |
7826 | 0 | } |
7827 | | |
7828 | | // Generate the instructions for '#pragma omp target update' directive. |
7829 | | void CodeGenFunction::EmitOMPTargetUpdateDirective( |
7830 | 0 | const OMPTargetUpdateDirective &S) { |
7831 | | // If we don't have target devices, don't bother emitting the data mapping |
7832 | | // code. |
7833 | 0 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7834 | 0 | return; |
7835 | | |
7836 | | // Check if we have any if clause associated with the directive. |
7837 | 0 | const Expr *IfCond = nullptr; |
7838 | 0 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7839 | 0 | IfCond = C->getCondition(); |
7840 | | |
7841 | | // Check if we have any device clause associated with the directive. |
7842 | 0 | const Expr *Device = nullptr; |
7843 | 0 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7844 | 0 | Device = C->getDevice(); |
7845 | |
|
7846 | 0 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7847 | 0 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7848 | 0 | } |
7849 | | |
7850 | | void CodeGenFunction::EmitOMPGenericLoopDirective( |
7851 | 0 | const OMPGenericLoopDirective &S) { |
7852 | | // Unimplemented, just inline the underlying statement for now. |
7853 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7854 | | // Emit the loop iteration variable. |
7855 | 0 | const Stmt *CS = |
7856 | 0 | cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); |
7857 | 0 | const auto *ForS = dyn_cast<ForStmt>(CS); |
7858 | 0 | if (ForS && !isa<DeclStmt>(ForS->getInit())) { |
7859 | 0 | OMPPrivateScope LoopScope(CGF); |
7860 | 0 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7861 | 0 | (void)LoopScope.Privatize(); |
7862 | 0 | CGF.EmitStmt(CS); |
7863 | 0 | LoopScope.restoreMap(); |
7864 | 0 | } else { |
7865 | 0 | CGF.EmitStmt(CS); |
7866 | 0 | } |
7867 | 0 | }; |
7868 | 0 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
7869 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); |
7870 | 0 | } |
7871 | | |
7872 | | void CodeGenFunction::EmitOMPParallelGenericLoopDirective( |
7873 | 0 | const OMPLoopDirective &S) { |
7874 | | // Emit combined directive as if its consituent constructs are 'parallel' |
7875 | | // and 'for'. |
7876 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7877 | 0 | Action.Enter(CGF); |
7878 | 0 | emitOMPCopyinClause(CGF, S); |
7879 | 0 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
7880 | 0 | }; |
7881 | 0 | { |
7882 | 0 | auto LPCRegion = |
7883 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7884 | 0 | emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, |
7885 | 0 | emitEmptyBoundParameters); |
7886 | 0 | } |
7887 | | // Check for outer lastprivate conditional update. |
7888 | 0 | checkForLastprivateConditionalUpdate(*this, S); |
7889 | 0 | } |
7890 | | |
7891 | | void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( |
7892 | 0 | const OMPTeamsGenericLoopDirective &S) { |
7893 | | // To be consistent with current behavior of 'target teams loop', emit |
7894 | | // 'teams loop' as if its constituent constructs are 'distribute, |
7895 | | // 'parallel, and 'for'. |
7896 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7897 | 0 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
7898 | 0 | S.getDistInc()); |
7899 | 0 | }; |
7900 | | |
7901 | | // Emit teams region as a standalone region. |
7902 | 0 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7903 | 0 | PrePostActionTy &Action) { |
7904 | 0 | Action.Enter(CGF); |
7905 | 0 | OMPPrivateScope PrivateScope(CGF); |
7906 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7907 | 0 | (void)PrivateScope.Privatize(); |
7908 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
7909 | 0 | CodeGenDistribute); |
7910 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7911 | 0 | }; |
7912 | 0 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); |
7913 | 0 | emitPostUpdateForReductionClause(*this, S, |
7914 | 0 | [](CodeGenFunction &) { return nullptr; }); |
7915 | 0 | } |
7916 | | |
7917 | | static void |
7918 | | emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, |
7919 | | const OMPTargetTeamsGenericLoopDirective &S, |
7920 | 0 | PrePostActionTy &Action) { |
7921 | 0 | Action.Enter(CGF); |
7922 | | // Emit 'teams loop' as if its constituent constructs are 'distribute, |
7923 | | // 'parallel, and 'for'. |
7924 | 0 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7925 | 0 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
7926 | 0 | S.getDistInc()); |
7927 | 0 | }; |
7928 | | |
7929 | | // Emit teams region as a standalone region. |
7930 | 0 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7931 | 0 | PrePostActionTy &Action) { |
7932 | 0 | Action.Enter(CGF); |
7933 | 0 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7934 | 0 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7935 | 0 | (void)PrivateScope.Privatize(); |
7936 | 0 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7937 | 0 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7938 | 0 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7939 | 0 | }; |
7940 | |
|
7941 | 0 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, |
7942 | 0 | CodeGenTeams); |
7943 | 0 | emitPostUpdateForReductionClause(CGF, S, |
7944 | 0 | [](CodeGenFunction &) { return nullptr; }); |
7945 | 0 | } |
7946 | | |
7947 | | /// Emit combined directive 'target teams loop' as if its constituent |
7948 | | /// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'. |
7949 | | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( |
7950 | 0 | const OMPTargetTeamsGenericLoopDirective &S) { |
7951 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7952 | 0 | emitTargetTeamsGenericLoopRegion(CGF, S, Action); |
7953 | 0 | }; |
7954 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7955 | 0 | } |
7956 | | |
7957 | | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( |
7958 | | CodeGenModule &CGM, StringRef ParentName, |
7959 | 0 | const OMPTargetTeamsGenericLoopDirective &S) { |
7960 | | // Emit SPMD target parallel loop region as a standalone region. |
7961 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7962 | 0 | emitTargetTeamsGenericLoopRegion(CGF, S, Action); |
7963 | 0 | }; |
7964 | 0 | llvm::Function *Fn; |
7965 | 0 | llvm::Constant *Addr; |
7966 | | // Emit target region as a standalone region. |
7967 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7968 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7969 | 0 | assert(Fn && Addr && |
7970 | 0 | "Target device function emission failed for 'target teams loop'."); |
7971 | 0 | } |
7972 | | |
7973 | | static void emitTargetParallelGenericLoopRegion( |
7974 | | CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, |
7975 | 0 | PrePostActionTy &Action) { |
7976 | 0 | Action.Enter(CGF); |
7977 | | // Emit as 'parallel for'. |
7978 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7979 | 0 | Action.Enter(CGF); |
7980 | 0 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
7981 | 0 | CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); |
7982 | 0 | CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, |
7983 | 0 | emitDispatchForLoopBounds); |
7984 | 0 | }; |
7985 | 0 | emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, |
7986 | 0 | emitEmptyBoundParameters); |
7987 | 0 | } |
7988 | | |
7989 | | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( |
7990 | | CodeGenModule &CGM, StringRef ParentName, |
7991 | 0 | const OMPTargetParallelGenericLoopDirective &S) { |
7992 | | // Emit target parallel loop region as a standalone region. |
7993 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7994 | 0 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
7995 | 0 | }; |
7996 | 0 | llvm::Function *Fn; |
7997 | 0 | llvm::Constant *Addr; |
7998 | | // Emit target region as a standalone region. |
7999 | 0 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
8000 | 0 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
8001 | 0 | assert(Fn && Addr && "Target device function emission failed."); |
8002 | 0 | } |
8003 | | |
8004 | | /// Emit combined directive 'target parallel loop' as if its constituent |
8005 | | /// constructs are 'target', 'parallel', and 'for'. |
8006 | | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( |
8007 | 0 | const OMPTargetParallelGenericLoopDirective &S) { |
8008 | 0 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8009 | 0 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
8010 | 0 | }; |
8011 | 0 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
8012 | 0 | } |
8013 | | |
8014 | | void CodeGenFunction::EmitSimpleOMPExecutableDirective( |
8015 | 0 | const OMPExecutableDirective &D) { |
8016 | 0 | if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { |
8017 | 0 | EmitOMPScanDirective(*SD); |
8018 | 0 | return; |
8019 | 0 | } |
8020 | 0 | if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) |
8021 | 0 | return; |
8022 | 0 | auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8023 | 0 | OMPPrivateScope GlobalsScope(CGF); |
8024 | 0 | if (isOpenMPTaskingDirective(D.getDirectiveKind())) { |
8025 | | // Capture global firstprivates to avoid crash. |
8026 | 0 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
8027 | 0 | for (const Expr *Ref : C->varlists()) { |
8028 | 0 | const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
8029 | 0 | if (!DRE) |
8030 | 0 | continue; |
8031 | 0 | const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); |
8032 | 0 | if (!VD || VD->hasLocalStorage()) |
8033 | 0 | continue; |
8034 | 0 | if (!CGF.LocalDeclMap.count(VD)) { |
8035 | 0 | LValue GlobLVal = CGF.EmitLValue(Ref); |
8036 | 0 | GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); |
8037 | 0 | } |
8038 | 0 | } |
8039 | 0 | } |
8040 | 0 | } |
8041 | 0 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
8042 | 0 | (void)GlobalsScope.Privatize(); |
8043 | 0 | ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); |
8044 | 0 | emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); |
8045 | 0 | } else { |
8046 | 0 | if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { |
8047 | 0 | for (const Expr *E : LD->counters()) { |
8048 | 0 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
8049 | 0 | if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { |
8050 | 0 | LValue GlobLVal = CGF.EmitLValue(E); |
8051 | 0 | GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); |
8052 | 0 | } |
8053 | 0 | if (isa<OMPCapturedExprDecl>(VD)) { |
8054 | | // Emit only those that were not explicitly referenced in clauses. |
8055 | 0 | if (!CGF.LocalDeclMap.count(VD)) |
8056 | 0 | CGF.EmitVarDecl(*VD); |
8057 | 0 | } |
8058 | 0 | } |
8059 | 0 | for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { |
8060 | 0 | if (!C->getNumForLoops()) |
8061 | 0 | continue; |
8062 | 0 | for (unsigned I = LD->getLoopsNumber(), |
8063 | 0 | E = C->getLoopNumIterations().size(); |
8064 | 0 | I < E; ++I) { |
8065 | 0 | if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( |
8066 | 0 | cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { |
8067 | | // Emit only those that were not explicitly referenced in clauses. |
8068 | 0 | if (!CGF.LocalDeclMap.count(VD)) |
8069 | 0 | CGF.EmitVarDecl(*VD); |
8070 | 0 | } |
8071 | 0 | } |
8072 | 0 | } |
8073 | 0 | } |
8074 | 0 | (void)GlobalsScope.Privatize(); |
8075 | 0 | CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); |
8076 | 0 | } |
8077 | 0 | }; |
8078 | 0 | if (D.getDirectiveKind() == OMPD_atomic || |
8079 | 0 | D.getDirectiveKind() == OMPD_critical || |
8080 | 0 | D.getDirectiveKind() == OMPD_section || |
8081 | 0 | D.getDirectiveKind() == OMPD_master || |
8082 | 0 | D.getDirectiveKind() == OMPD_masked || |
8083 | 0 | D.getDirectiveKind() == OMPD_unroll) { |
8084 | 0 | EmitStmt(D.getAssociatedStmt()); |
8085 | 0 | } else { |
8086 | 0 | auto LPCRegion = |
8087 | 0 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); |
8088 | 0 | OMPSimdLexicalScope Scope(*this, D); |
8089 | 0 | CGM.getOpenMPRuntime().emitInlinedDirective( |
8090 | 0 | *this, |
8091 | 0 | isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd |
8092 | 0 | : D.getDirectiveKind(), |
8093 | 0 | CodeGen); |
8094 | 0 | } |
8095 | | // Check for outer lastprivate conditional update. |
8096 | 0 | checkForLastprivateConditionalUpdate(*this, D); |
8097 | 0 | } |