Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp
Line
Count
Source (jump to first uncovered line)
1
//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This contains code to emit Builtin calls as LLVM code.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "ABIInfo.h"
14
#include "CGCUDARuntime.h"
15
#include "CGCXXABI.h"
16
#include "CGObjCRuntime.h"
17
#include "CGOpenCLRuntime.h"
18
#include "CGRecordLayout.h"
19
#include "CodeGenFunction.h"
20
#include "CodeGenModule.h"
21
#include "ConstantEmitter.h"
22
#include "PatternInit.h"
23
#include "TargetInfo.h"
24
#include "clang/AST/ASTContext.h"
25
#include "clang/AST/Attr.h"
26
#include "clang/AST/Decl.h"
27
#include "clang/AST/OSLog.h"
28
#include "clang/AST/OperationKinds.h"
29
#include "clang/Basic/TargetBuiltins.h"
30
#include "clang/Basic/TargetInfo.h"
31
#include "clang/Basic/TargetOptions.h"
32
#include "clang/CodeGen/CGFunctionInfo.h"
33
#include "clang/Frontend/FrontendDiagnostic.h"
34
#include "llvm/ADT/APFloat.h"
35
#include "llvm/ADT/APInt.h"
36
#include "llvm/ADT/FloatingPointMode.h"
37
#include "llvm/ADT/SmallPtrSet.h"
38
#include "llvm/ADT/StringExtras.h"
39
#include "llvm/Analysis/ValueTracking.h"
40
#include "llvm/IR/DataLayout.h"
41
#include "llvm/IR/InlineAsm.h"
42
#include "llvm/IR/Intrinsics.h"
43
#include "llvm/IR/IntrinsicsAArch64.h"
44
#include "llvm/IR/IntrinsicsAMDGPU.h"
45
#include "llvm/IR/IntrinsicsARM.h"
46
#include "llvm/IR/IntrinsicsBPF.h"
47
#include "llvm/IR/IntrinsicsHexagon.h"
48
#include "llvm/IR/IntrinsicsNVPTX.h"
49
#include "llvm/IR/IntrinsicsPowerPC.h"
50
#include "llvm/IR/IntrinsicsR600.h"
51
#include "llvm/IR/IntrinsicsRISCV.h"
52
#include "llvm/IR/IntrinsicsS390.h"
53
#include "llvm/IR/IntrinsicsVE.h"
54
#include "llvm/IR/IntrinsicsWebAssembly.h"
55
#include "llvm/IR/IntrinsicsX86.h"
56
#include "llvm/IR/MDBuilder.h"
57
#include "llvm/IR/MatrixBuilder.h"
58
#include "llvm/Support/ConvertUTF.h"
59
#include "llvm/Support/MathExtras.h"
60
#include "llvm/Support/ScopedPrinter.h"
61
#include "llvm/TargetParser/AArch64TargetParser.h"
62
#include "llvm/TargetParser/X86TargetParser.h"
63
#include <optional>
64
#include <sstream>
65
66
using namespace clang;
67
using namespace CodeGen;
68
using namespace llvm;
69
70
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
71
0
                             Align AlignmentInBytes) {
72
0
  ConstantInt *Byte;
73
0
  switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
74
0
  case LangOptions::TrivialAutoVarInitKind::Uninitialized:
75
    // Nothing to initialize.
76
0
    return;
77
0
  case LangOptions::TrivialAutoVarInitKind::Zero:
78
0
    Byte = CGF.Builder.getInt8(0x00);
79
0
    break;
80
0
  case LangOptions::TrivialAutoVarInitKind::Pattern: {
81
0
    llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
82
0
    Byte = llvm::dyn_cast<llvm::ConstantInt>(
83
0
        initializationPatternFor(CGF.CGM, Int8));
84
0
    break;
85
0
  }
86
0
  }
87
0
  if (CGF.CGM.stopAutoInit())
88
0
    return;
89
0
  auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
90
0
  I->addAnnotationMetadata("auto-init");
91
0
}
92
93
/// getBuiltinLibFunction - Given a builtin id for a function like
94
/// "__builtin_fabsf", return a Function* for "fabsf".
95
llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
96
0
                                                     unsigned BuiltinID) {
97
0
  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
98
99
  // Get the name, skip over the __builtin_ prefix (if necessary).
100
0
  StringRef Name;
101
0
  GlobalDecl D(FD);
102
103
  // TODO: This list should be expanded or refactored after all GCC-compatible
104
  // std libcall builtins are implemented.
105
0
  static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
106
0
      {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
107
0
      {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
108
0
      {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
109
0
      {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
110
0
      {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
111
0
      {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
112
0
      {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
113
0
      {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
114
0
      {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
115
0
      {Builtin::BI__builtin_printf, "__printfieee128"},
116
0
      {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
117
0
      {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
118
0
      {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
119
0
      {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
120
0
      {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
121
0
      {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
122
0
      {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
123
0
      {Builtin::BI__builtin_scanf, "__scanfieee128"},
124
0
      {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
125
0
      {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
126
0
      {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
127
0
      {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
128
0
      {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
129
0
  };
130
131
  // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
132
  // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
133
  // if it is 64-bit 'long double' mode.
134
0
  static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
135
0
      {Builtin::BI__builtin_frexpl, "frexp"},
136
0
      {Builtin::BI__builtin_ldexpl, "ldexp"},
137
0
      {Builtin::BI__builtin_modfl, "modf"},
138
0
  };
139
140
  // If the builtin has been declared explicitly with an assembler label,
141
  // use the mangled name. This differs from the plain label on platforms
142
  // that prefix labels.
143
0
  if (FD->hasAttr<AsmLabelAttr>())
144
0
    Name = getMangledName(D);
145
0
  else {
146
    // TODO: This mutation should also be applied to other targets other than
147
    // PPC, after backend supports IEEE 128-bit style libcalls.
148
0
    if (getTriple().isPPC64() &&
149
0
        &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
150
0
        F128Builtins.contains(BuiltinID))
151
0
      Name = F128Builtins[BuiltinID];
152
0
    else if (getTriple().isOSAIX() &&
153
0
             &getTarget().getLongDoubleFormat() ==
154
0
                 &llvm::APFloat::IEEEdouble() &&
155
0
             AIXLongDouble64Builtins.contains(BuiltinID))
156
0
      Name = AIXLongDouble64Builtins[BuiltinID];
157
0
    else
158
0
      Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
159
0
  }
160
161
0
  llvm::FunctionType *Ty =
162
0
    cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
163
164
0
  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
165
0
}
166
167
/// Emit the conversions required to turn the given value into an
168
/// integer of the given size.
169
static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
170
0
                        QualType T, llvm::IntegerType *IntType) {
171
0
  V = CGF.EmitToMemory(V, T);
172
173
0
  if (V->getType()->isPointerTy())
174
0
    return CGF.Builder.CreatePtrToInt(V, IntType);
175
176
0
  assert(V->getType() == IntType);
177
0
  return V;
178
0
}
179
180
static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
181
0
                          QualType T, llvm::Type *ResultType) {
182
0
  V = CGF.EmitFromMemory(V, T);
183
184
0
  if (ResultType->isPointerTy())
185
0
    return CGF.Builder.CreateIntToPtr(V, ResultType);
186
187
0
  assert(V->getType() == ResultType);
188
0
  return V;
189
0
}
190
191
0
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) {
192
0
  ASTContext &Ctx = CGF.getContext();
193
0
  Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
194
0
  unsigned Bytes = Ptr.getElementType()->isPointerTy()
195
0
                       ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity()
196
0
                       : Ptr.getElementType()->getScalarSizeInBits() / 8;
197
0
  unsigned Align = Ptr.getAlignment().getQuantity();
198
0
  if (Align % Bytes != 0) {
199
0
    DiagnosticsEngine &Diags = CGF.CGM.getDiags();
200
0
    Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
201
    // Force address to be at least naturally-aligned.
202
0
    return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
203
0
  }
204
0
  return Ptr;
205
0
}
206
207
/// Utility to insert an atomic instruction based on Intrinsic::ID
208
/// and the expression node.
209
static Value *MakeBinaryAtomicValue(
210
    CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
211
0
    AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
212
213
0
  QualType T = E->getType();
214
0
  assert(E->getArg(0)->getType()->isPointerType());
215
0
  assert(CGF.getContext().hasSameUnqualifiedType(T,
216
0
                                  E->getArg(0)->getType()->getPointeeType()));
217
0
  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
218
219
0
  Address DestAddr = CheckAtomicAlignment(CGF, E);
220
221
0
  llvm::IntegerType *IntType = llvm::IntegerType::get(
222
0
      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
223
224
0
  llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
225
0
  llvm::Type *ValueType = Val->getType();
226
0
  Val = EmitToInt(CGF, Val, T, IntType);
227
228
0
  llvm::Value *Result =
229
0
      CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
230
0
  return EmitFromInt(CGF, Result, T, ValueType);
231
0
}
232
233
0
static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
234
0
  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
235
0
  Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
236
237
0
  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
238
0
  LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
239
0
  LV.setNontemporal(true);
240
0
  CGF.EmitStoreOfScalar(Val, LV, false);
241
0
  return nullptr;
242
0
}
243
244
0
static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
245
0
  Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
246
247
0
  LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
248
0
  LV.setNontemporal(true);
249
0
  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
250
0
}
251
252
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
253
                               llvm::AtomicRMWInst::BinOp Kind,
254
0
                               const CallExpr *E) {
255
0
  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
256
0
}
257
258
/// Utility to insert an atomic instruction based Intrinsic::ID and
259
/// the expression node, where the return value is the result of the
260
/// operation.
261
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
262
                                   llvm::AtomicRMWInst::BinOp Kind,
263
                                   const CallExpr *E,
264
                                   Instruction::BinaryOps Op,
265
0
                                   bool Invert = false) {
266
0
  QualType T = E->getType();
267
0
  assert(E->getArg(0)->getType()->isPointerType());
268
0
  assert(CGF.getContext().hasSameUnqualifiedType(T,
269
0
                                  E->getArg(0)->getType()->getPointeeType()));
270
0
  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
271
272
0
  Address DestAddr = CheckAtomicAlignment(CGF, E);
273
274
0
  llvm::IntegerType *IntType = llvm::IntegerType::get(
275
0
      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
276
277
0
  llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
278
0
  llvm::Type *ValueType = Val->getType();
279
0
  Val = EmitToInt(CGF, Val, T, IntType);
280
281
0
  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
282
0
      Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
283
0
  Result = CGF.Builder.CreateBinOp(Op, Result, Val);
284
0
  if (Invert)
285
0
    Result =
286
0
        CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
287
0
                                llvm::ConstantInt::getAllOnesValue(IntType));
288
0
  Result = EmitFromInt(CGF, Result, T, ValueType);
289
0
  return RValue::get(Result);
290
0
}
291
292
/// Utility to insert an atomic cmpxchg instruction.
293
///
294
/// @param CGF The current codegen function.
295
/// @param E   Builtin call expression to convert to cmpxchg.
296
///            arg0 - address to operate on
297
///            arg1 - value to compare with
298
///            arg2 - new value
299
/// @param ReturnBool Specifies whether to return success flag of
300
///                   cmpxchg result or the old value.
301
///
302
/// @returns result of cmpxchg, according to ReturnBool
303
///
304
/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
305
/// invoke the function EmitAtomicCmpXchgForMSIntrin.
306
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
307
0
                                     bool ReturnBool) {
308
0
  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
309
0
  Address DestAddr = CheckAtomicAlignment(CGF, E);
310
311
0
  llvm::IntegerType *IntType = llvm::IntegerType::get(
312
0
      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
313
314
0
  Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
315
0
  llvm::Type *ValueType = Cmp->getType();
316
0
  Cmp = EmitToInt(CGF, Cmp, T, IntType);
317
0
  Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
318
319
0
  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
320
0
      DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
321
0
      llvm::AtomicOrdering::SequentiallyConsistent);
322
0
  if (ReturnBool)
323
    // Extract boolean success flag and zext it to int.
324
0
    return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
325
0
                                  CGF.ConvertType(E->getType()));
326
0
  else
327
    // Extract old value and emit it using the same type as compare value.
328
0
    return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
329
0
                       ValueType);
330
0
}
331
332
/// This function should be invoked to emit atomic cmpxchg for Microsoft's
333
/// _InterlockedCompareExchange* intrinsics which have the following signature:
334
/// T _InterlockedCompareExchange(T volatile *Destination,
335
///                               T Exchange,
336
///                               T Comparand);
337
///
338
/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
339
/// cmpxchg *Destination, Comparand, Exchange.
340
/// So we need to swap Comparand and Exchange when invoking
341
/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
342
/// function MakeAtomicCmpXchgValue since it expects the arguments to be
343
/// already swapped.
344
345
static
346
Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
347
0
    AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
348
0
  assert(E->getArg(0)->getType()->isPointerType());
349
0
  assert(CGF.getContext().hasSameUnqualifiedType(
350
0
      E->getType(), E->getArg(0)->getType()->getPointeeType()));
351
0
  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
352
0
                                                 E->getArg(1)->getType()));
353
0
  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
354
0
                                                 E->getArg(2)->getType()));
355
356
0
  Address DestAddr = CheckAtomicAlignment(CGF, E);
357
358
0
  auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
359
0
  auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
360
361
  // For Release ordering, the failure ordering should be Monotonic.
362
0
  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
363
0
                         AtomicOrdering::Monotonic :
364
0
                         SuccessOrdering;
365
366
  // The atomic instruction is marked volatile for consistency with MSVC. This
367
  // blocks the few atomics optimizations that LLVM has. If we want to optimize
368
  // _Interlocked* operations in the future, we will have to remove the volatile
369
  // marker.
370
0
  auto *Result = CGF.Builder.CreateAtomicCmpXchg(
371
0
      DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
372
0
  Result->setVolatile(true);
373
0
  return CGF.Builder.CreateExtractValue(Result, 0);
374
0
}
375
376
// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
377
// prototyped like this:
378
//
379
// unsigned char _InterlockedCompareExchange128...(
380
//     __int64 volatile * _Destination,
381
//     __int64 _ExchangeHigh,
382
//     __int64 _ExchangeLow,
383
//     __int64 * _ComparandResult);
384
//
385
// Note that Destination is assumed to be at least 16-byte aligned, despite
386
// being typed int64.
387
388
static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
389
                                              const CallExpr *E,
390
0
                                              AtomicOrdering SuccessOrdering) {
391
0
  assert(E->getNumArgs() == 4);
392
0
  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
393
0
  llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
394
0
  llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
395
0
  Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
396
397
0
  assert(DestPtr->getType()->isPointerTy());
398
0
  assert(!ExchangeHigh->getType()->isPointerTy());
399
0
  assert(!ExchangeLow->getType()->isPointerTy());
400
401
  // For Release ordering, the failure ordering should be Monotonic.
402
0
  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
403
0
                             ? AtomicOrdering::Monotonic
404
0
                             : SuccessOrdering;
405
406
  // Convert to i128 pointers and values. Alignment is also overridden for
407
  // destination pointer.
408
0
  llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
409
0
  Address DestAddr(DestPtr, Int128Ty,
410
0
                   CGF.getContext().toCharUnitsFromBits(128));
411
0
  ComparandAddr = ComparandAddr.withElementType(Int128Ty);
412
413
  // (((i128)hi) << 64) | ((i128)lo)
414
0
  ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
415
0
  ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
416
0
  ExchangeHigh =
417
0
      CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
418
0
  llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
419
420
  // Load the comparand for the instruction.
421
0
  llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
422
423
0
  auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
424
0
                                              SuccessOrdering, FailureOrdering);
425
426
  // The atomic instruction is marked volatile for consistency with MSVC. This
427
  // blocks the few atomics optimizations that LLVM has. If we want to optimize
428
  // _Interlocked* operations in the future, we will have to remove the volatile
429
  // marker.
430
0
  CXI->setVolatile(true);
431
432
  // Store the result as an outparameter.
433
0
  CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
434
0
                          ComparandAddr);
435
436
  // Get the success boolean and zero extend it to i8.
437
0
  Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
438
0
  return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
439
0
}
440
441
static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
442
0
    AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
443
0
  assert(E->getArg(0)->getType()->isPointerType());
444
445
0
  auto *IntTy = CGF.ConvertType(E->getType());
446
0
  Address DestAddr = CheckAtomicAlignment(CGF, E);
447
0
  auto *Result = CGF.Builder.CreateAtomicRMW(
448
0
      AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
449
0
  return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
450
0
}
451
452
static Value *EmitAtomicDecrementValue(
453
    CodeGenFunction &CGF, const CallExpr *E,
454
0
    AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
455
0
  assert(E->getArg(0)->getType()->isPointerType());
456
457
0
  auto *IntTy = CGF.ConvertType(E->getType());
458
0
  Address DestAddr = CheckAtomicAlignment(CGF, E);
459
0
  auto *Result = CGF.Builder.CreateAtomicRMW(
460
0
      AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
461
0
  return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
462
0
}
463
464
// Build a plain volatile load.
465
0
static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
466
0
  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
467
0
  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
468
0
  CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
469
0
  llvm::Type *ITy =
470
0
      llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
471
0
  llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
472
0
  Load->setVolatile(true);
473
0
  return Load;
474
0
}
475
476
// Build a plain volatile store.
477
0
static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
478
0
  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
479
0
  Value *Value = CGF.EmitScalarExpr(E->getArg(1));
480
0
  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
481
0
  CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
482
0
  llvm::StoreInst *Store =
483
0
      CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
484
0
  Store->setVolatile(true);
485
0
  return Store;
486
0
}
487
488
// Emit a simple mangled intrinsic that has 1 argument and a return type
489
// matching the argument type. Depending on mode, this may be a constrained
490
// floating-point intrinsic.
491
static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
492
                                const CallExpr *E, unsigned IntrinsicID,
493
0
                                unsigned ConstrainedIntrinsicID) {
494
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
495
496
0
  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
497
0
  if (CGF.Builder.getIsFPConstrained()) {
498
0
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
499
0
    return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
500
0
  } else {
501
0
    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
502
0
    return CGF.Builder.CreateCall(F, Src0);
503
0
  }
504
0
}
505
506
// Emit an intrinsic that has 2 operands of the same type as its result.
507
// Depending on mode, this may be a constrained floating-point intrinsic.
508
static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
509
                                const CallExpr *E, unsigned IntrinsicID,
510
0
                                unsigned ConstrainedIntrinsicID) {
511
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
512
0
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
513
514
0
  if (CGF.Builder.getIsFPConstrained()) {
515
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
516
0
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
517
0
    return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
518
0
  } else {
519
0
    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
520
0
    return CGF.Builder.CreateCall(F, { Src0, Src1 });
521
0
  }
522
0
}
523
524
// Has second type mangled argument.
525
static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
526
    CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
527
0
    llvm::Intrinsic::ID ConstrainedIntrinsicID) {
528
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
529
0
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
530
531
0
  if (CGF.Builder.getIsFPConstrained()) {
532
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
533
0
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
534
0
                                       {Src0->getType(), Src1->getType()});
535
0
    return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
536
0
  }
537
538
0
  Function *F =
539
0
      CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
540
0
  return CGF.Builder.CreateCall(F, {Src0, Src1});
541
0
}
542
543
// Emit an intrinsic that has 3 operands of the same type as its result.
544
// Depending on mode, this may be a constrained floating-point intrinsic.
545
static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
546
                                 const CallExpr *E, unsigned IntrinsicID,
547
0
                                 unsigned ConstrainedIntrinsicID) {
548
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
549
0
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
550
0
  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
551
552
0
  if (CGF.Builder.getIsFPConstrained()) {
553
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
554
0
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
555
0
    return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
556
0
  } else {
557
0
    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
558
0
    return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
559
0
  }
560
0
}
561
562
// Emit an intrinsic where all operands are of the same type as the result.
563
// Depending on mode, this may be a constrained floating-point intrinsic.
564
static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
565
                                                unsigned IntrinsicID,
566
                                                unsigned ConstrainedIntrinsicID,
567
                                                llvm::Type *Ty,
568
0
                                                ArrayRef<Value *> Args) {
569
0
  Function *F;
570
0
  if (CGF.Builder.getIsFPConstrained())
571
0
    F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
572
0
  else
573
0
    F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
574
575
0
  if (CGF.Builder.getIsFPConstrained())
576
0
    return CGF.Builder.CreateConstrainedFPCall(F, Args);
577
0
  else
578
0
    return CGF.Builder.CreateCall(F, Args);
579
0
}
580
581
// Emit a simple mangled intrinsic that has 1 argument and a return type
582
// matching the argument type.
583
static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E,
584
                               unsigned IntrinsicID,
585
0
                               llvm::StringRef Name = "") {
586
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
587
588
0
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
589
0
  return CGF.Builder.CreateCall(F, Src0, Name);
590
0
}
591
592
// Emit an intrinsic that has 2 operands of the same type as its result.
593
static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
594
                                const CallExpr *E,
595
0
                                unsigned IntrinsicID) {
596
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
597
0
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
598
599
0
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
600
0
  return CGF.Builder.CreateCall(F, { Src0, Src1 });
601
0
}
602
603
// Emit an intrinsic that has 3 operands of the same type as its result.
604
static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
605
                                 const CallExpr *E,
606
0
                                 unsigned IntrinsicID) {
607
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
608
0
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
609
0
  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
610
611
0
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
612
0
  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
613
0
}
614
615
// Emit an intrinsic that has 1 float or double operand, and 1 integer.
616
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
617
                               const CallExpr *E,
618
0
                               unsigned IntrinsicID) {
619
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
620
0
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
621
622
0
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
623
0
  return CGF.Builder.CreateCall(F, {Src0, Src1});
624
0
}
625
626
// Emit an intrinsic that has overloaded integer result and fp operand.
627
static Value *
628
emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
629
                                        unsigned IntrinsicID,
630
0
                                        unsigned ConstrainedIntrinsicID) {
631
0
  llvm::Type *ResultType = CGF.ConvertType(E->getType());
632
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
633
634
0
  if (CGF.Builder.getIsFPConstrained()) {
635
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
636
0
    Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
637
0
                                       {ResultType, Src0->getType()});
638
0
    return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
639
0
  } else {
640
0
    Function *F =
641
0
        CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
642
0
    return CGF.Builder.CreateCall(F, Src0);
643
0
  }
644
0
}
645
646
static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,
647
0
                               llvm::Intrinsic::ID IntrinsicID) {
648
0
  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
649
0
  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
650
651
0
  QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
652
0
  llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
653
0
  llvm::Function *F =
654
0
      CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
655
0
  llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
656
657
0
  llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
658
0
  LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
659
0
  CGF.EmitStoreOfScalar(Exp, LV);
660
661
0
  return CGF.Builder.CreateExtractValue(Call, 0);
662
0
}
663
664
/// EmitFAbs - Emit a call to @llvm.fabs().
665
0
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
666
0
  Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
667
0
  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
668
0
  Call->setDoesNotAccessMemory();
669
0
  return Call;
670
0
}
671
672
/// Emit the computation of the sign bit for a floating point value. Returns
673
/// the i1 sign bit value.
674
0
static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
675
0
  LLVMContext &C = CGF.CGM.getLLVMContext();
676
677
0
  llvm::Type *Ty = V->getType();
678
0
  int Width = Ty->getPrimitiveSizeInBits();
679
0
  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
680
0
  V = CGF.Builder.CreateBitCast(V, IntTy);
681
0
  if (Ty->isPPC_FP128Ty()) {
682
    // We want the sign bit of the higher-order double. The bitcast we just
683
    // did works as if the double-double was stored to memory and then
684
    // read as an i128. The "store" will put the higher-order double in the
685
    // lower address in both little- and big-Endian modes, but the "load"
686
    // will treat those bits as a different part of the i128: the low bits in
687
    // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
688
    // we need to shift the high bits down to the low before truncating.
689
0
    Width >>= 1;
690
0
    if (CGF.getTarget().isBigEndian()) {
691
0
      Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
692
0
      V = CGF.Builder.CreateLShr(V, ShiftCst);
693
0
    }
694
    // We are truncating value in order to extract the higher-order
695
    // double, which we will be using to extract the sign from.
696
0
    IntTy = llvm::IntegerType::get(C, Width);
697
0
    V = CGF.Builder.CreateTrunc(V, IntTy);
698
0
  }
699
0
  Value *Zero = llvm::Constant::getNullValue(IntTy);
700
0
  return CGF.Builder.CreateICmpSLT(V, Zero);
701
0
}
702
703
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
704
0
                              const CallExpr *E, llvm::Constant *calleeValue) {
705
0
  CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
706
0
  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
707
0
}
708
709
/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
710
/// depending on IntrinsicID.
711
///
712
/// \arg CGF The current codegen function.
713
/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
714
/// \arg X The first argument to the llvm.*.with.overflow.*.
715
/// \arg Y The second argument to the llvm.*.with.overflow.*.
716
/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
717
/// \returns The result (i.e. sum/product) returned by the intrinsic.
718
static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
719
                                          const llvm::Intrinsic::ID IntrinsicID,
720
                                          llvm::Value *X, llvm::Value *Y,
721
0
                                          llvm::Value *&Carry) {
722
  // Make sure we have integers of the same width.
723
0
  assert(X->getType() == Y->getType() &&
724
0
         "Arguments must be the same type. (Did you forget to make sure both "
725
0
         "arguments have the same integer width?)");
726
727
0
  Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
728
0
  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
729
0
  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
730
0
  return CGF.Builder.CreateExtractValue(Tmp, 0);
731
0
}
732
733
static Value *emitRangedBuiltin(CodeGenFunction &CGF,
734
                                unsigned IntrinsicID,
735
0
                                int low, int high) {
736
0
    llvm::MDBuilder MDHelper(CGF.getLLVMContext());
737
0
    llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
738
0
    Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
739
0
    llvm::Instruction *Call = CGF.Builder.CreateCall(F);
740
0
    Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
741
0
    Call->setMetadata(llvm::LLVMContext::MD_noundef,
742
0
                      llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
743
0
    return Call;
744
0
}
745
746
namespace {
747
  struct WidthAndSignedness {
748
    unsigned Width;
749
    bool Signed;
750
  };
751
}
752
753
static WidthAndSignedness
754
getIntegerWidthAndSignedness(const clang::ASTContext &context,
755
0
                             const clang::QualType Type) {
756
0
  assert(Type->isIntegerType() && "Given type is not an integer.");
757
0
  unsigned Width = Type->isBooleanType()  ? 1
758
0
                   : Type->isBitIntType() ? context.getIntWidth(Type)
759
0
                                          : context.getTypeInfo(Type).Width;
760
0
  bool Signed = Type->isSignedIntegerType();
761
0
  return {Width, Signed};
762
0
}
763
764
// Given one or more integer types, this function produces an integer type that
765
// encompasses them: any value in one of the given types could be expressed in
766
// the encompassing type.
767
static struct WidthAndSignedness
768
0
EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
769
0
  assert(Types.size() > 0 && "Empty list of types.");
770
771
  // If any of the given types is signed, we must return a signed type.
772
0
  bool Signed = false;
773
0
  for (const auto &Type : Types) {
774
0
    Signed |= Type.Signed;
775
0
  }
776
777
  // The encompassing type must have a width greater than or equal to the width
778
  // of the specified types.  Additionally, if the encompassing type is signed,
779
  // its width must be strictly greater than the width of any unsigned types
780
  // given.
781
0
  unsigned Width = 0;
782
0
  for (const auto &Type : Types) {
783
0
    unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
784
0
    if (Width < MinWidth) {
785
0
      Width = MinWidth;
786
0
    }
787
0
  }
788
789
0
  return {Width, Signed};
790
0
}
791
792
0
Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
793
0
  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
794
0
  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
795
0
}
796
797
/// Checks if using the result of __builtin_object_size(p, @p From) in place of
798
/// __builtin_object_size(p, @p To) is correct
799
0
static bool areBOSTypesCompatible(int From, int To) {
800
  // Note: Our __builtin_object_size implementation currently treats Type=0 and
801
  // Type=2 identically. Encoding this implementation detail here may make
802
  // improving __builtin_object_size difficult in the future, so it's omitted.
803
0
  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
804
0
}
805
806
static llvm::Value *
807
0
getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
808
0
  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
809
0
}
810
811
llvm::Value *
812
CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
813
                                                 llvm::IntegerType *ResType,
814
                                                 llvm::Value *EmittedE,
815
0
                                                 bool IsDynamic) {
816
0
  uint64_t ObjectSize;
817
0
  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
818
0
    return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
819
0
  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
820
0
}
821
822
const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberField(
823
0
    ASTContext &Ctx, const RecordDecl *RD, StringRef Name, uint64_t &Offset) {
824
0
  const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
825
0
      getLangOpts().getStrictFlexArraysLevel();
826
0
  unsigned FieldNo = 0;
827
0
  bool IsUnion = RD->isUnion();
828
829
0
  for (const Decl *D : RD->decls()) {
830
0
    if (const auto *Field = dyn_cast<FieldDecl>(D);
831
0
        Field && (Name.empty() || Field->getNameAsString() == Name) &&
832
0
        Decl::isFlexibleArrayMemberLike(
833
0
            Ctx, Field, Field->getType(), StrictFlexArraysLevel,
834
0
            /*IgnoreTemplateOrMacroSubstitution=*/true)) {
835
0
      const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
836
0
      Offset += Layout.getFieldOffset(FieldNo);
837
0
      return Field;
838
0
    }
839
840
0
    if (const auto *Record = dyn_cast<RecordDecl>(D))
841
0
      if (const FieldDecl *Field =
842
0
              FindFlexibleArrayMemberField(Ctx, Record, Name, Offset)) {
843
0
        const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
844
0
        Offset += Layout.getFieldOffset(FieldNo);
845
0
        return Field;
846
0
      }
847
848
0
    if (!IsUnion && isa<FieldDecl>(D))
849
0
      ++FieldNo;
850
0
  }
851
852
0
  return nullptr;
853
0
}
854
855
0
static unsigned CountCountedByAttrs(const RecordDecl *RD) {
856
0
  unsigned Num = 0;
857
858
0
  for (const Decl *D : RD->decls()) {
859
0
    if (const auto *FD = dyn_cast<FieldDecl>(D);
860
0
        FD && FD->hasAttr<CountedByAttr>()) {
861
0
      return ++Num;
862
0
    }
863
864
0
    if (const auto *Rec = dyn_cast<RecordDecl>(D))
865
0
      Num += CountCountedByAttrs(Rec);
866
0
  }
867
868
0
  return Num;
869
0
}
870
871
llvm::Value *
872
CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
873
0
                                             llvm::IntegerType *ResType) {
874
  // The code generated here calculates the size of a struct with a flexible
875
  // array member that uses the counted_by attribute. There are two instances
876
  // we handle:
877
  //
878
  //       struct s {
879
  //         unsigned long flags;
880
  //         int count;
881
  //         int array[] __attribute__((counted_by(count)));
882
  //       }
883
  //
884
  //   1) bdos of the flexible array itself:
885
  //
886
  //     __builtin_dynamic_object_size(p->array, 1) ==
887
  //         p->count * sizeof(*p->array)
888
  //
889
  //   2) bdos of a pointer into the flexible array:
890
  //
891
  //     __builtin_dynamic_object_size(&p->array[42], 1) ==
892
  //         (p->count - 42) * sizeof(*p->array)
893
  //
894
  //   2) bdos of the whole struct, including the flexible array:
895
  //
896
  //     __builtin_dynamic_object_size(p, 1) ==
897
  //        max(sizeof(struct s),
898
  //            offsetof(struct s, array) + p->count * sizeof(*p->array))
899
  //
900
0
  ASTContext &Ctx = getContext();
901
0
  const Expr *Base = E->IgnoreParenImpCasts();
902
0
  const Expr *Idx = nullptr;
903
904
0
  if (const auto *UO = dyn_cast<UnaryOperator>(Base);
905
0
      UO && UO->getOpcode() == UO_AddrOf) {
906
0
    Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
907
0
    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
908
0
      Base = ASE->getBase()->IgnoreParenImpCasts();
909
0
      Idx = ASE->getIdx()->IgnoreParenImpCasts();
910
911
0
      if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
912
0
        int64_t Val = IL->getValue().getSExtValue();
913
0
        if (Val < 0)
914
0
          return getDefaultBuiltinObjectSizeResult(Type, ResType);
915
916
0
        if (Val == 0)
917
          // The index is 0, so we don't need to take it into account.
918
0
          Idx = nullptr;
919
0
      }
920
0
    } else {
921
      // Potential pointer to another element in the struct.
922
0
      Base = SubExpr;
923
0
    }
924
0
  }
925
926
  // Get the flexible array member Decl.
927
0
  const RecordDecl *OuterRD = nullptr;
928
0
  std::string FAMName;
929
0
  if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
930
    // Check if \p Base is referencing the FAM itself.
931
0
    const ValueDecl *VD = ME->getMemberDecl();
932
0
    OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
933
0
    FAMName = VD->getNameAsString();
934
0
  } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
935
    // Check if we're pointing to the whole struct.
936
0
    QualType Ty = DRE->getDecl()->getType();
937
0
    if (Ty->isPointerType())
938
0
      Ty = Ty->getPointeeType();
939
0
    OuterRD = Ty->getAsRecordDecl();
940
941
    // If we have a situation like this:
942
    //
943
    //     struct union_of_fams {
944
    //         int flags;
945
    //         union {
946
    //             signed char normal_field;
947
    //             struct {
948
    //                 int count1;
949
    //                 int arr1[] __counted_by(count1);
950
    //             };
951
    //             struct {
952
    //                 signed char count2;
953
    //                 int arr2[] __counted_by(count2);
954
    //             };
955
    //         };
956
    //    };
957
    //
958
    // We don't konw which 'count' to use in this scenario:
959
    //
960
    //     size_t get_size(struct union_of_fams *p) {
961
    //         return __builtin_dynamic_object_size(p, 1);
962
    //     }
963
    //
964
    // Instead of calculating a wrong number, we give up.
965
0
    if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
966
0
      return nullptr;
967
0
  }
968
969
0
  if (!OuterRD)
970
0
    return nullptr;
971
972
0
  uint64_t Offset = 0;
973
0
  const FieldDecl *FAMDecl =
974
0
      FindFlexibleArrayMemberField(Ctx, OuterRD, FAMName, Offset);
975
0
  Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
976
977
0
  if (!FAMDecl || !FAMDecl->hasAttr<CountedByAttr>())
978
    // No flexible array member found or it doesn't have the "counted_by"
979
    // attribute.
980
0
    return nullptr;
981
982
0
  const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
983
0
  if (!CountedByFD)
984
    // Can't find the field referenced by the "counted_by" attribute.
985
0
    return nullptr;
986
987
  // Build a load of the counted_by field.
988
0
  bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
989
0
  Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);
990
0
  if (!CountedByInst)
991
0
    return getDefaultBuiltinObjectSizeResult(Type, ResType);
992
993
0
  CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
994
995
  // Build a load of the index and subtract it from the count.
996
0
  Value *IdxInst = nullptr;
997
0
  if (Idx) {
998
0
    if (Idx->HasSideEffects(getContext()))
999
      // We can't have side-effects.
1000
0
      return getDefaultBuiltinObjectSizeResult(Type, ResType);
1001
1002
0
    bool IdxSigned = Idx->getType()->isSignedIntegerType();
1003
0
    IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1004
0
    IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1005
1006
    // We go ahead with the calculation here. If the index turns out to be
1007
    // negative, we'll catch it at the end.
1008
0
    CountedByInst =
1009
0
        Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1010
0
  }
1011
1012
  // Calculate how large the flexible array member is in bytes.
1013
0
  const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1014
0
  CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
1015
0
  llvm::Constant *ElemSize =
1016
0
      llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1017
0
  Value *FAMSize =
1018
0
      Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1019
0
  FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
1020
0
  Value *Res = FAMSize;
1021
1022
0
  if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1023
    // The whole struct is specificed in the __bdos.
1024
0
    const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
1025
1026
    // Get the offset of the FAM.
1027
0
    llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
1028
0
    Value *OffsetAndFAMSize =
1029
0
        Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
1030
1031
    // Get the full size of the struct.
1032
0
    llvm::Constant *SizeofStruct =
1033
0
        ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
1034
1035
    // max(sizeof(struct s),
1036
    //     offsetof(struct s, array) + p->count * sizeof(*p->array))
1037
0
    Res = IsSigned
1038
0
              ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1039
0
                                              OffsetAndFAMSize, SizeofStruct)
1040
0
              : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1041
0
                                              OffsetAndFAMSize, SizeofStruct);
1042
0
  }
1043
1044
  // A negative \p IdxInst or \p CountedByInst means that the index lands
1045
  // outside of the flexible array member. If that's the case, we want to
1046
  // return 0.
1047
0
  Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1048
0
  if (IdxInst)
1049
0
    Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1050
1051
0
  return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1052
0
}
1053
1054
/// Returns a Value corresponding to the size of the given expression.
1055
/// This Value may be either of the following:
1056
///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
1057
///     it)
1058
///   - A call to the @llvm.objectsize intrinsic
1059
///
1060
/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1061
/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1062
/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1063
llvm::Value *
1064
CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1065
                                       llvm::IntegerType *ResType,
1066
0
                                       llvm::Value *EmittedE, bool IsDynamic) {
1067
  // We need to reference an argument if the pointer is a parameter with the
1068
  // pass_object_size attribute.
1069
0
  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1070
0
    auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1071
0
    auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1072
0
    if (Param != nullptr && PS != nullptr &&
1073
0
        areBOSTypesCompatible(PS->getType(), Type)) {
1074
0
      auto Iter = SizeArguments.find(Param);
1075
0
      assert(Iter != SizeArguments.end());
1076
1077
0
      const ImplicitParamDecl *D = Iter->second;
1078
0
      auto DIter = LocalDeclMap.find(D);
1079
0
      assert(DIter != LocalDeclMap.end());
1080
1081
0
      return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1082
0
                              getContext().getSizeType(), E->getBeginLoc());
1083
0
    }
1084
0
  }
1085
1086
0
  if (IsDynamic) {
1087
    // Emit special code for a flexible array member with the "counted_by"
1088
    // attribute.
1089
0
    if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1090
0
      return V;
1091
0
  }
1092
1093
  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1094
  // evaluate E for side-effects. In either case, we shouldn't lower to
1095
  // @llvm.objectsize.
1096
0
  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1097
0
    return getDefaultBuiltinObjectSizeResult(Type, ResType);
1098
1099
0
  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1100
0
  assert(Ptr->getType()->isPointerTy() &&
1101
0
         "Non-pointer passed to __builtin_object_size?");
1102
1103
0
  Function *F =
1104
0
      CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1105
1106
  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1107
0
  Value *Min = Builder.getInt1((Type & 2) != 0);
1108
  // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1109
0
  Value *NullIsUnknown = Builder.getTrue();
1110
0
  Value *Dynamic = Builder.getInt1(IsDynamic);
1111
0
  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1112
0
}
1113
1114
namespace {
1115
/// A struct to generically describe a bit test intrinsic.
1116
struct BitTest {
1117
  enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1118
  enum InterlockingKind : uint8_t {
1119
    Unlocked,
1120
    Sequential,
1121
    Acquire,
1122
    Release,
1123
    NoFence
1124
  };
1125
1126
  ActionKind Action;
1127
  InterlockingKind Interlocking;
1128
  bool Is64Bit;
1129
1130
  static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1131
};
1132
} // namespace
1133
1134
0
BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1135
0
  switch (BuiltinID) {
1136
    // Main portable variants.
1137
0
  case Builtin::BI_bittest:
1138
0
    return {TestOnly, Unlocked, false};
1139
0
  case Builtin::BI_bittestandcomplement:
1140
0
    return {Complement, Unlocked, false};
1141
0
  case Builtin::BI_bittestandreset:
1142
0
    return {Reset, Unlocked, false};
1143
0
  case Builtin::BI_bittestandset:
1144
0
    return {Set, Unlocked, false};
1145
0
  case Builtin::BI_interlockedbittestandreset:
1146
0
    return {Reset, Sequential, false};
1147
0
  case Builtin::BI_interlockedbittestandset:
1148
0
    return {Set, Sequential, false};
1149
1150
    // X86-specific 64-bit variants.
1151
0
  case Builtin::BI_bittest64:
1152
0
    return {TestOnly, Unlocked, true};
1153
0
  case Builtin::BI_bittestandcomplement64:
1154
0
    return {Complement, Unlocked, true};
1155
0
  case Builtin::BI_bittestandreset64:
1156
0
    return {Reset, Unlocked, true};
1157
0
  case Builtin::BI_bittestandset64:
1158
0
    return {Set, Unlocked, true};
1159
0
  case Builtin::BI_interlockedbittestandreset64:
1160
0
    return {Reset, Sequential, true};
1161
0
  case Builtin::BI_interlockedbittestandset64:
1162
0
    return {Set, Sequential, true};
1163
1164
    // ARM/AArch64-specific ordering variants.
1165
0
  case Builtin::BI_interlockedbittestandset_acq:
1166
0
    return {Set, Acquire, false};
1167
0
  case Builtin::BI_interlockedbittestandset_rel:
1168
0
    return {Set, Release, false};
1169
0
  case Builtin::BI_interlockedbittestandset_nf:
1170
0
    return {Set, NoFence, false};
1171
0
  case Builtin::BI_interlockedbittestandreset_acq:
1172
0
    return {Reset, Acquire, false};
1173
0
  case Builtin::BI_interlockedbittestandreset_rel:
1174
0
    return {Reset, Release, false};
1175
0
  case Builtin::BI_interlockedbittestandreset_nf:
1176
0
    return {Reset, NoFence, false};
1177
0
  }
1178
0
  llvm_unreachable("expected only bittest intrinsics");
1179
0
}
1180
1181
0
static char bitActionToX86BTCode(BitTest::ActionKind A) {
1182
0
  switch (A) {
1183
0
  case BitTest::TestOnly:   return '\0';
1184
0
  case BitTest::Complement: return 'c';
1185
0
  case BitTest::Reset:      return 'r';
1186
0
  case BitTest::Set:        return 's';
1187
0
  }
1188
0
  llvm_unreachable("invalid action");
1189
0
}
1190
1191
static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
1192
                                            BitTest BT,
1193
                                            const CallExpr *E, Value *BitBase,
1194
0
                                            Value *BitPos) {
1195
0
  char Action = bitActionToX86BTCode(BT.Action);
1196
0
  char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1197
1198
  // Build the assembly.
1199
0
  SmallString<64> Asm;
1200
0
  raw_svector_ostream AsmOS(Asm);
1201
0
  if (BT.Interlocking != BitTest::Unlocked)
1202
0
    AsmOS << "lock ";
1203
0
  AsmOS << "bt";
1204
0
  if (Action)
1205
0
    AsmOS << Action;
1206
0
  AsmOS << SizeSuffix << " $2, ($1)";
1207
1208
  // Build the constraints. FIXME: We should support immediates when possible.
1209
0
  std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1210
0
  std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1211
0
  if (!MachineClobbers.empty()) {
1212
0
    Constraints += ',';
1213
0
    Constraints += MachineClobbers;
1214
0
  }
1215
0
  llvm::IntegerType *IntType = llvm::IntegerType::get(
1216
0
      CGF.getLLVMContext(),
1217
0
      CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1218
0
  llvm::FunctionType *FTy =
1219
0
      llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1220
1221
0
  llvm::InlineAsm *IA =
1222
0
      llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1223
0
  return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1224
0
}
1225
1226
static llvm::AtomicOrdering
1227
0
getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1228
0
  switch (I) {
1229
0
  case BitTest::Unlocked:   return llvm::AtomicOrdering::NotAtomic;
1230
0
  case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1231
0
  case BitTest::Acquire:    return llvm::AtomicOrdering::Acquire;
1232
0
  case BitTest::Release:    return llvm::AtomicOrdering::Release;
1233
0
  case BitTest::NoFence:    return llvm::AtomicOrdering::Monotonic;
1234
0
  }
1235
0
  llvm_unreachable("invalid interlocking");
1236
0
}
1237
1238
/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1239
/// bits and a bit position and read and optionally modify the bit at that
1240
/// position. The position index can be arbitrarily large, i.e. it can be larger
1241
/// than 31 or 63, so we need an indexed load in the general case.
1242
static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1243
                                         unsigned BuiltinID,
1244
0
                                         const CallExpr *E) {
1245
0
  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1246
0
  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1247
1248
0
  BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1249
1250
  // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1251
  // indexing operation internally. Use them if possible.
1252
0
  if (CGF.getTarget().getTriple().isX86())
1253
0
    return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1254
1255
  // Otherwise, use generic code to load one byte and test the bit. Use all but
1256
  // the bottom three bits as the array index, and the bottom three bits to form
1257
  // a mask.
1258
  // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1259
0
  Value *ByteIndex = CGF.Builder.CreateAShr(
1260
0
      BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1261
0
  Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1262
0
  Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1263
0
                                                 ByteIndex, "bittest.byteaddr"),
1264
0
                   CGF.Int8Ty, CharUnits::One());
1265
0
  Value *PosLow =
1266
0
      CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1267
0
                            llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1268
1269
  // The updating instructions will need a mask.
1270
0
  Value *Mask = nullptr;
1271
0
  if (BT.Action != BitTest::TestOnly) {
1272
0
    Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1273
0
                                 "bittest.mask");
1274
0
  }
1275
1276
  // Check the action and ordering of the interlocked intrinsics.
1277
0
  llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1278
1279
0
  Value *OldByte = nullptr;
1280
0
  if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1281
    // Emit a combined atomicrmw load/store operation for the interlocked
1282
    // intrinsics.
1283
0
    llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1284
0
    if (BT.Action == BitTest::Reset) {
1285
0
      Mask = CGF.Builder.CreateNot(Mask);
1286
0
      RMWOp = llvm::AtomicRMWInst::And;
1287
0
    }
1288
0
    OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1289
0
  } else {
1290
    // Emit a plain load for the non-interlocked intrinsics.
1291
0
    OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1292
0
    Value *NewByte = nullptr;
1293
0
    switch (BT.Action) {
1294
0
    case BitTest::TestOnly:
1295
      // Don't store anything.
1296
0
      break;
1297
0
    case BitTest::Complement:
1298
0
      NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1299
0
      break;
1300
0
    case BitTest::Reset:
1301
0
      NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1302
0
      break;
1303
0
    case BitTest::Set:
1304
0
      NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1305
0
      break;
1306
0
    }
1307
0
    if (NewByte)
1308
0
      CGF.Builder.CreateStore(NewByte, ByteAddr);
1309
0
  }
1310
1311
  // However we loaded the old byte, either by plain load or atomicrmw, shift
1312
  // the bit into the low position and mask it to 0 or 1.
1313
0
  Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1314
0
  return CGF.Builder.CreateAnd(
1315
0
      ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1316
0
}
1317
1318
static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
1319
                                                unsigned BuiltinID,
1320
0
                                                const CallExpr *E) {
1321
0
  Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1322
1323
0
  SmallString<64> Asm;
1324
0
  raw_svector_ostream AsmOS(Asm);
1325
0
  llvm::IntegerType *RetType = CGF.Int32Ty;
1326
1327
0
  switch (BuiltinID) {
1328
0
  case clang::PPC::BI__builtin_ppc_ldarx:
1329
0
    AsmOS << "ldarx ";
1330
0
    RetType = CGF.Int64Ty;
1331
0
    break;
1332
0
  case clang::PPC::BI__builtin_ppc_lwarx:
1333
0
    AsmOS << "lwarx ";
1334
0
    RetType = CGF.Int32Ty;
1335
0
    break;
1336
0
  case clang::PPC::BI__builtin_ppc_lharx:
1337
0
    AsmOS << "lharx ";
1338
0
    RetType = CGF.Int16Ty;
1339
0
    break;
1340
0
  case clang::PPC::BI__builtin_ppc_lbarx:
1341
0
    AsmOS << "lbarx ";
1342
0
    RetType = CGF.Int8Ty;
1343
0
    break;
1344
0
  default:
1345
0
    llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1346
0
  }
1347
1348
0
  AsmOS << "$0, ${1:y}";
1349
1350
0
  std::string Constraints = "=r,*Z,~{memory}";
1351
0
  std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1352
0
  if (!MachineClobbers.empty()) {
1353
0
    Constraints += ',';
1354
0
    Constraints += MachineClobbers;
1355
0
  }
1356
1357
0
  llvm::Type *PtrType = CGF.UnqualPtrTy;
1358
0
  llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1359
1360
0
  llvm::InlineAsm *IA =
1361
0
      llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1362
0
  llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1363
0
  CI->addParamAttr(
1364
0
      0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1365
0
  return CI;
1366
0
}
1367
1368
namespace {
1369
enum class MSVCSetJmpKind {
1370
  _setjmpex,
1371
  _setjmp3,
1372
  _setjmp
1373
};
1374
}
1375
1376
/// MSVC handles setjmp a bit differently on different platforms. On every
1377
/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1378
/// parameters can be passed as variadic arguments, but we always pass none.
1379
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1380
0
                               const CallExpr *E) {
1381
0
  llvm::Value *Arg1 = nullptr;
1382
0
  llvm::Type *Arg1Ty = nullptr;
1383
0
  StringRef Name;
1384
0
  bool IsVarArg = false;
1385
0
  if (SJKind == MSVCSetJmpKind::_setjmp3) {
1386
0
    Name = "_setjmp3";
1387
0
    Arg1Ty = CGF.Int32Ty;
1388
0
    Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1389
0
    IsVarArg = true;
1390
0
  } else {
1391
0
    Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1392
0
    Arg1Ty = CGF.Int8PtrTy;
1393
0
    if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1394
0
      Arg1 = CGF.Builder.CreateCall(
1395
0
          CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1396
0
    } else
1397
0
      Arg1 = CGF.Builder.CreateCall(
1398
0
          CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1399
0
          llvm::ConstantInt::get(CGF.Int32Ty, 0));
1400
0
  }
1401
1402
  // Mark the call site and declaration with ReturnsTwice.
1403
0
  llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1404
0
  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1405
0
      CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1406
0
      llvm::Attribute::ReturnsTwice);
1407
0
  llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1408
0
      llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1409
0
      ReturnsTwiceAttr, /*Local=*/true);
1410
1411
0
  llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1412
0
      CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1413
0
  llvm::Value *Args[] = {Buf, Arg1};
1414
0
  llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1415
0
  CB->setAttributes(ReturnsTwiceAttr);
1416
0
  return RValue::get(CB);
1417
0
}
1418
1419
// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1420
// we handle them here.
1421
enum class CodeGenFunction::MSVCIntrin {
1422
  _BitScanForward,
1423
  _BitScanReverse,
1424
  _InterlockedAnd,
1425
  _InterlockedDecrement,
1426
  _InterlockedExchange,
1427
  _InterlockedExchangeAdd,
1428
  _InterlockedExchangeSub,
1429
  _InterlockedIncrement,
1430
  _InterlockedOr,
1431
  _InterlockedXor,
1432
  _InterlockedExchangeAdd_acq,
1433
  _InterlockedExchangeAdd_rel,
1434
  _InterlockedExchangeAdd_nf,
1435
  _InterlockedExchange_acq,
1436
  _InterlockedExchange_rel,
1437
  _InterlockedExchange_nf,
1438
  _InterlockedCompareExchange_acq,
1439
  _InterlockedCompareExchange_rel,
1440
  _InterlockedCompareExchange_nf,
1441
  _InterlockedCompareExchange128,
1442
  _InterlockedCompareExchange128_acq,
1443
  _InterlockedCompareExchange128_rel,
1444
  _InterlockedCompareExchange128_nf,
1445
  _InterlockedOr_acq,
1446
  _InterlockedOr_rel,
1447
  _InterlockedOr_nf,
1448
  _InterlockedXor_acq,
1449
  _InterlockedXor_rel,
1450
  _InterlockedXor_nf,
1451
  _InterlockedAnd_acq,
1452
  _InterlockedAnd_rel,
1453
  _InterlockedAnd_nf,
1454
  _InterlockedIncrement_acq,
1455
  _InterlockedIncrement_rel,
1456
  _InterlockedIncrement_nf,
1457
  _InterlockedDecrement_acq,
1458
  _InterlockedDecrement_rel,
1459
  _InterlockedDecrement_nf,
1460
  __fastfail,
1461
};
1462
1463
static std::optional<CodeGenFunction::MSVCIntrin>
1464
0
translateArmToMsvcIntrin(unsigned BuiltinID) {
1465
0
  using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1466
0
  switch (BuiltinID) {
1467
0
  default:
1468
0
    return std::nullopt;
1469
0
  case clang::ARM::BI_BitScanForward:
1470
0
  case clang::ARM::BI_BitScanForward64:
1471
0
    return MSVCIntrin::_BitScanForward;
1472
0
  case clang::ARM::BI_BitScanReverse:
1473
0
  case clang::ARM::BI_BitScanReverse64:
1474
0
    return MSVCIntrin::_BitScanReverse;
1475
0
  case clang::ARM::BI_InterlockedAnd64:
1476
0
    return MSVCIntrin::_InterlockedAnd;
1477
0
  case clang::ARM::BI_InterlockedExchange64:
1478
0
    return MSVCIntrin::_InterlockedExchange;
1479
0
  case clang::ARM::BI_InterlockedExchangeAdd64:
1480
0
    return MSVCIntrin::_InterlockedExchangeAdd;
1481
0
  case clang::ARM::BI_InterlockedExchangeSub64:
1482
0
    return MSVCIntrin::_InterlockedExchangeSub;
1483
0
  case clang::ARM::BI_InterlockedOr64:
1484
0
    return MSVCIntrin::_InterlockedOr;
1485
0
  case clang::ARM::BI_InterlockedXor64:
1486
0
    return MSVCIntrin::_InterlockedXor;
1487
0
  case clang::ARM::BI_InterlockedDecrement64:
1488
0
    return MSVCIntrin::_InterlockedDecrement;
1489
0
  case clang::ARM::BI_InterlockedIncrement64:
1490
0
    return MSVCIntrin::_InterlockedIncrement;
1491
0
  case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1492
0
  case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1493
0
  case clang::ARM::BI_InterlockedExchangeAdd_acq:
1494
0
  case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1495
0
    return MSVCIntrin::_InterlockedExchangeAdd_acq;
1496
0
  case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1497
0
  case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1498
0
  case clang::ARM::BI_InterlockedExchangeAdd_rel:
1499
0
  case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1500
0
    return MSVCIntrin::_InterlockedExchangeAdd_rel;
1501
0
  case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1502
0
  case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1503
0
  case clang::ARM::BI_InterlockedExchangeAdd_nf:
1504
0
  case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1505
0
    return MSVCIntrin::_InterlockedExchangeAdd_nf;
1506
0
  case clang::ARM::BI_InterlockedExchange8_acq:
1507
0
  case clang::ARM::BI_InterlockedExchange16_acq:
1508
0
  case clang::ARM::BI_InterlockedExchange_acq:
1509
0
  case clang::ARM::BI_InterlockedExchange64_acq:
1510
0
    return MSVCIntrin::_InterlockedExchange_acq;
1511
0
  case clang::ARM::BI_InterlockedExchange8_rel:
1512
0
  case clang::ARM::BI_InterlockedExchange16_rel:
1513
0
  case clang::ARM::BI_InterlockedExchange_rel:
1514
0
  case clang::ARM::BI_InterlockedExchange64_rel:
1515
0
    return MSVCIntrin::_InterlockedExchange_rel;
1516
0
  case clang::ARM::BI_InterlockedExchange8_nf:
1517
0
  case clang::ARM::BI_InterlockedExchange16_nf:
1518
0
  case clang::ARM::BI_InterlockedExchange_nf:
1519
0
  case clang::ARM::BI_InterlockedExchange64_nf:
1520
0
    return MSVCIntrin::_InterlockedExchange_nf;
1521
0
  case clang::ARM::BI_InterlockedCompareExchange8_acq:
1522
0
  case clang::ARM::BI_InterlockedCompareExchange16_acq:
1523
0
  case clang::ARM::BI_InterlockedCompareExchange_acq:
1524
0
  case clang::ARM::BI_InterlockedCompareExchange64_acq:
1525
0
    return MSVCIntrin::_InterlockedCompareExchange_acq;
1526
0
  case clang::ARM::BI_InterlockedCompareExchange8_rel:
1527
0
  case clang::ARM::BI_InterlockedCompareExchange16_rel:
1528
0
  case clang::ARM::BI_InterlockedCompareExchange_rel:
1529
0
  case clang::ARM::BI_InterlockedCompareExchange64_rel:
1530
0
    return MSVCIntrin::_InterlockedCompareExchange_rel;
1531
0
  case clang::ARM::BI_InterlockedCompareExchange8_nf:
1532
0
  case clang::ARM::BI_InterlockedCompareExchange16_nf:
1533
0
  case clang::ARM::BI_InterlockedCompareExchange_nf:
1534
0
  case clang::ARM::BI_InterlockedCompareExchange64_nf:
1535
0
    return MSVCIntrin::_InterlockedCompareExchange_nf;
1536
0
  case clang::ARM::BI_InterlockedOr8_acq:
1537
0
  case clang::ARM::BI_InterlockedOr16_acq:
1538
0
  case clang::ARM::BI_InterlockedOr_acq:
1539
0
  case clang::ARM::BI_InterlockedOr64_acq:
1540
0
    return MSVCIntrin::_InterlockedOr_acq;
1541
0
  case clang::ARM::BI_InterlockedOr8_rel:
1542
0
  case clang::ARM::BI_InterlockedOr16_rel:
1543
0
  case clang::ARM::BI_InterlockedOr_rel:
1544
0
  case clang::ARM::BI_InterlockedOr64_rel:
1545
0
    return MSVCIntrin::_InterlockedOr_rel;
1546
0
  case clang::ARM::BI_InterlockedOr8_nf:
1547
0
  case clang::ARM::BI_InterlockedOr16_nf:
1548
0
  case clang::ARM::BI_InterlockedOr_nf:
1549
0
  case clang::ARM::BI_InterlockedOr64_nf:
1550
0
    return MSVCIntrin::_InterlockedOr_nf;
1551
0
  case clang::ARM::BI_InterlockedXor8_acq:
1552
0
  case clang::ARM::BI_InterlockedXor16_acq:
1553
0
  case clang::ARM::BI_InterlockedXor_acq:
1554
0
  case clang::ARM::BI_InterlockedXor64_acq:
1555
0
    return MSVCIntrin::_InterlockedXor_acq;
1556
0
  case clang::ARM::BI_InterlockedXor8_rel:
1557
0
  case clang::ARM::BI_InterlockedXor16_rel:
1558
0
  case clang::ARM::BI_InterlockedXor_rel:
1559
0
  case clang::ARM::BI_InterlockedXor64_rel:
1560
0
    return MSVCIntrin::_InterlockedXor_rel;
1561
0
  case clang::ARM::BI_InterlockedXor8_nf:
1562
0
  case clang::ARM::BI_InterlockedXor16_nf:
1563
0
  case clang::ARM::BI_InterlockedXor_nf:
1564
0
  case clang::ARM::BI_InterlockedXor64_nf:
1565
0
    return MSVCIntrin::_InterlockedXor_nf;
1566
0
  case clang::ARM::BI_InterlockedAnd8_acq:
1567
0
  case clang::ARM::BI_InterlockedAnd16_acq:
1568
0
  case clang::ARM::BI_InterlockedAnd_acq:
1569
0
  case clang::ARM::BI_InterlockedAnd64_acq:
1570
0
    return MSVCIntrin::_InterlockedAnd_acq;
1571
0
  case clang::ARM::BI_InterlockedAnd8_rel:
1572
0
  case clang::ARM::BI_InterlockedAnd16_rel:
1573
0
  case clang::ARM::BI_InterlockedAnd_rel:
1574
0
  case clang::ARM::BI_InterlockedAnd64_rel:
1575
0
    return MSVCIntrin::_InterlockedAnd_rel;
1576
0
  case clang::ARM::BI_InterlockedAnd8_nf:
1577
0
  case clang::ARM::BI_InterlockedAnd16_nf:
1578
0
  case clang::ARM::BI_InterlockedAnd_nf:
1579
0
  case clang::ARM::BI_InterlockedAnd64_nf:
1580
0
    return MSVCIntrin::_InterlockedAnd_nf;
1581
0
  case clang::ARM::BI_InterlockedIncrement16_acq:
1582
0
  case clang::ARM::BI_InterlockedIncrement_acq:
1583
0
  case clang::ARM::BI_InterlockedIncrement64_acq:
1584
0
    return MSVCIntrin::_InterlockedIncrement_acq;
1585
0
  case clang::ARM::BI_InterlockedIncrement16_rel:
1586
0
  case clang::ARM::BI_InterlockedIncrement_rel:
1587
0
  case clang::ARM::BI_InterlockedIncrement64_rel:
1588
0
    return MSVCIntrin::_InterlockedIncrement_rel;
1589
0
  case clang::ARM::BI_InterlockedIncrement16_nf:
1590
0
  case clang::ARM::BI_InterlockedIncrement_nf:
1591
0
  case clang::ARM::BI_InterlockedIncrement64_nf:
1592
0
    return MSVCIntrin::_InterlockedIncrement_nf;
1593
0
  case clang::ARM::BI_InterlockedDecrement16_acq:
1594
0
  case clang::ARM::BI_InterlockedDecrement_acq:
1595
0
  case clang::ARM::BI_InterlockedDecrement64_acq:
1596
0
    return MSVCIntrin::_InterlockedDecrement_acq;
1597
0
  case clang::ARM::BI_InterlockedDecrement16_rel:
1598
0
  case clang::ARM::BI_InterlockedDecrement_rel:
1599
0
  case clang::ARM::BI_InterlockedDecrement64_rel:
1600
0
    return MSVCIntrin::_InterlockedDecrement_rel;
1601
0
  case clang::ARM::BI_InterlockedDecrement16_nf:
1602
0
  case clang::ARM::BI_InterlockedDecrement_nf:
1603
0
  case clang::ARM::BI_InterlockedDecrement64_nf:
1604
0
    return MSVCIntrin::_InterlockedDecrement_nf;
1605
0
  }
1606
0
  llvm_unreachable("must return from switch");
1607
0
}
1608
1609
static std::optional<CodeGenFunction::MSVCIntrin>
1610
0
translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1611
0
  using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1612
0
  switch (BuiltinID) {
1613
0
  default:
1614
0
    return std::nullopt;
1615
0
  case clang::AArch64::BI_BitScanForward:
1616
0
  case clang::AArch64::BI_BitScanForward64:
1617
0
    return MSVCIntrin::_BitScanForward;
1618
0
  case clang::AArch64::BI_BitScanReverse:
1619
0
  case clang::AArch64::BI_BitScanReverse64:
1620
0
    return MSVCIntrin::_BitScanReverse;
1621
0
  case clang::AArch64::BI_InterlockedAnd64:
1622
0
    return MSVCIntrin::_InterlockedAnd;
1623
0
  case clang::AArch64::BI_InterlockedExchange64:
1624
0
    return MSVCIntrin::_InterlockedExchange;
1625
0
  case clang::AArch64::BI_InterlockedExchangeAdd64:
1626
0
    return MSVCIntrin::_InterlockedExchangeAdd;
1627
0
  case clang::AArch64::BI_InterlockedExchangeSub64:
1628
0
    return MSVCIntrin::_InterlockedExchangeSub;
1629
0
  case clang::AArch64::BI_InterlockedOr64:
1630
0
    return MSVCIntrin::_InterlockedOr;
1631
0
  case clang::AArch64::BI_InterlockedXor64:
1632
0
    return MSVCIntrin::_InterlockedXor;
1633
0
  case clang::AArch64::BI_InterlockedDecrement64:
1634
0
    return MSVCIntrin::_InterlockedDecrement;
1635
0
  case clang::AArch64::BI_InterlockedIncrement64:
1636
0
    return MSVCIntrin::_InterlockedIncrement;
1637
0
  case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1638
0
  case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1639
0
  case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1640
0
  case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1641
0
    return MSVCIntrin::_InterlockedExchangeAdd_acq;
1642
0
  case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1643
0
  case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1644
0
  case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1645
0
  case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1646
0
    return MSVCIntrin::_InterlockedExchangeAdd_rel;
1647
0
  case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1648
0
  case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1649
0
  case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1650
0
  case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1651
0
    return MSVCIntrin::_InterlockedExchangeAdd_nf;
1652
0
  case clang::AArch64::BI_InterlockedExchange8_acq:
1653
0
  case clang::AArch64::BI_InterlockedExchange16_acq:
1654
0
  case clang::AArch64::BI_InterlockedExchange_acq:
1655
0
  case clang::AArch64::BI_InterlockedExchange64_acq:
1656
0
    return MSVCIntrin::_InterlockedExchange_acq;
1657
0
  case clang::AArch64::BI_InterlockedExchange8_rel:
1658
0
  case clang::AArch64::BI_InterlockedExchange16_rel:
1659
0
  case clang::AArch64::BI_InterlockedExchange_rel:
1660
0
  case clang::AArch64::BI_InterlockedExchange64_rel:
1661
0
    return MSVCIntrin::_InterlockedExchange_rel;
1662
0
  case clang::AArch64::BI_InterlockedExchange8_nf:
1663
0
  case clang::AArch64::BI_InterlockedExchange16_nf:
1664
0
  case clang::AArch64::BI_InterlockedExchange_nf:
1665
0
  case clang::AArch64::BI_InterlockedExchange64_nf:
1666
0
    return MSVCIntrin::_InterlockedExchange_nf;
1667
0
  case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1668
0
  case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1669
0
  case clang::AArch64::BI_InterlockedCompareExchange_acq:
1670
0
  case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1671
0
    return MSVCIntrin::_InterlockedCompareExchange_acq;
1672
0
  case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1673
0
  case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1674
0
  case clang::AArch64::BI_InterlockedCompareExchange_rel:
1675
0
  case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1676
0
    return MSVCIntrin::_InterlockedCompareExchange_rel;
1677
0
  case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1678
0
  case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1679
0
  case clang::AArch64::BI_InterlockedCompareExchange_nf:
1680
0
  case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1681
0
    return MSVCIntrin::_InterlockedCompareExchange_nf;
1682
0
  case clang::AArch64::BI_InterlockedCompareExchange128:
1683
0
    return MSVCIntrin::_InterlockedCompareExchange128;
1684
0
  case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1685
0
    return MSVCIntrin::_InterlockedCompareExchange128_acq;
1686
0
  case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1687
0
    return MSVCIntrin::_InterlockedCompareExchange128_nf;
1688
0
  case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1689
0
    return MSVCIntrin::_InterlockedCompareExchange128_rel;
1690
0
  case clang::AArch64::BI_InterlockedOr8_acq:
1691
0
  case clang::AArch64::BI_InterlockedOr16_acq:
1692
0
  case clang::AArch64::BI_InterlockedOr_acq:
1693
0
  case clang::AArch64::BI_InterlockedOr64_acq:
1694
0
    return MSVCIntrin::_InterlockedOr_acq;
1695
0
  case clang::AArch64::BI_InterlockedOr8_rel:
1696
0
  case clang::AArch64::BI_InterlockedOr16_rel:
1697
0
  case clang::AArch64::BI_InterlockedOr_rel:
1698
0
  case clang::AArch64::BI_InterlockedOr64_rel:
1699
0
    return MSVCIntrin::_InterlockedOr_rel;
1700
0
  case clang::AArch64::BI_InterlockedOr8_nf:
1701
0
  case clang::AArch64::BI_InterlockedOr16_nf:
1702
0
  case clang::AArch64::BI_InterlockedOr_nf:
1703
0
  case clang::AArch64::BI_InterlockedOr64_nf:
1704
0
    return MSVCIntrin::_InterlockedOr_nf;
1705
0
  case clang::AArch64::BI_InterlockedXor8_acq:
1706
0
  case clang::AArch64::BI_InterlockedXor16_acq:
1707
0
  case clang::AArch64::BI_InterlockedXor_acq:
1708
0
  case clang::AArch64::BI_InterlockedXor64_acq:
1709
0
    return MSVCIntrin::_InterlockedXor_acq;
1710
0
  case clang::AArch64::BI_InterlockedXor8_rel:
1711
0
  case clang::AArch64::BI_InterlockedXor16_rel:
1712
0
  case clang::AArch64::BI_InterlockedXor_rel:
1713
0
  case clang::AArch64::BI_InterlockedXor64_rel:
1714
0
    return MSVCIntrin::_InterlockedXor_rel;
1715
0
  case clang::AArch64::BI_InterlockedXor8_nf:
1716
0
  case clang::AArch64::BI_InterlockedXor16_nf:
1717
0
  case clang::AArch64::BI_InterlockedXor_nf:
1718
0
  case clang::AArch64::BI_InterlockedXor64_nf:
1719
0
    return MSVCIntrin::_InterlockedXor_nf;
1720
0
  case clang::AArch64::BI_InterlockedAnd8_acq:
1721
0
  case clang::AArch64::BI_InterlockedAnd16_acq:
1722
0
  case clang::AArch64::BI_InterlockedAnd_acq:
1723
0
  case clang::AArch64::BI_InterlockedAnd64_acq:
1724
0
    return MSVCIntrin::_InterlockedAnd_acq;
1725
0
  case clang::AArch64::BI_InterlockedAnd8_rel:
1726
0
  case clang::AArch64::BI_InterlockedAnd16_rel:
1727
0
  case clang::AArch64::BI_InterlockedAnd_rel:
1728
0
  case clang::AArch64::BI_InterlockedAnd64_rel:
1729
0
    return MSVCIntrin::_InterlockedAnd_rel;
1730
0
  case clang::AArch64::BI_InterlockedAnd8_nf:
1731
0
  case clang::AArch64::BI_InterlockedAnd16_nf:
1732
0
  case clang::AArch64::BI_InterlockedAnd_nf:
1733
0
  case clang::AArch64::BI_InterlockedAnd64_nf:
1734
0
    return MSVCIntrin::_InterlockedAnd_nf;
1735
0
  case clang::AArch64::BI_InterlockedIncrement16_acq:
1736
0
  case clang::AArch64::BI_InterlockedIncrement_acq:
1737
0
  case clang::AArch64::BI_InterlockedIncrement64_acq:
1738
0
    return MSVCIntrin::_InterlockedIncrement_acq;
1739
0
  case clang::AArch64::BI_InterlockedIncrement16_rel:
1740
0
  case clang::AArch64::BI_InterlockedIncrement_rel:
1741
0
  case clang::AArch64::BI_InterlockedIncrement64_rel:
1742
0
    return MSVCIntrin::_InterlockedIncrement_rel;
1743
0
  case clang::AArch64::BI_InterlockedIncrement16_nf:
1744
0
  case clang::AArch64::BI_InterlockedIncrement_nf:
1745
0
  case clang::AArch64::BI_InterlockedIncrement64_nf:
1746
0
    return MSVCIntrin::_InterlockedIncrement_nf;
1747
0
  case clang::AArch64::BI_InterlockedDecrement16_acq:
1748
0
  case clang::AArch64::BI_InterlockedDecrement_acq:
1749
0
  case clang::AArch64::BI_InterlockedDecrement64_acq:
1750
0
    return MSVCIntrin::_InterlockedDecrement_acq;
1751
0
  case clang::AArch64::BI_InterlockedDecrement16_rel:
1752
0
  case clang::AArch64::BI_InterlockedDecrement_rel:
1753
0
  case clang::AArch64::BI_InterlockedDecrement64_rel:
1754
0
    return MSVCIntrin::_InterlockedDecrement_rel;
1755
0
  case clang::AArch64::BI_InterlockedDecrement16_nf:
1756
0
  case clang::AArch64::BI_InterlockedDecrement_nf:
1757
0
  case clang::AArch64::BI_InterlockedDecrement64_nf:
1758
0
    return MSVCIntrin::_InterlockedDecrement_nf;
1759
0
  }
1760
0
  llvm_unreachable("must return from switch");
1761
0
}
1762
1763
static std::optional<CodeGenFunction::MSVCIntrin>
1764
0
translateX86ToMsvcIntrin(unsigned BuiltinID) {
1765
0
  using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1766
0
  switch (BuiltinID) {
1767
0
  default:
1768
0
    return std::nullopt;
1769
0
  case clang::X86::BI_BitScanForward:
1770
0
  case clang::X86::BI_BitScanForward64:
1771
0
    return MSVCIntrin::_BitScanForward;
1772
0
  case clang::X86::BI_BitScanReverse:
1773
0
  case clang::X86::BI_BitScanReverse64:
1774
0
    return MSVCIntrin::_BitScanReverse;
1775
0
  case clang::X86::BI_InterlockedAnd64:
1776
0
    return MSVCIntrin::_InterlockedAnd;
1777
0
  case clang::X86::BI_InterlockedCompareExchange128:
1778
0
    return MSVCIntrin::_InterlockedCompareExchange128;
1779
0
  case clang::X86::BI_InterlockedExchange64:
1780
0
    return MSVCIntrin::_InterlockedExchange;
1781
0
  case clang::X86::BI_InterlockedExchangeAdd64:
1782
0
    return MSVCIntrin::_InterlockedExchangeAdd;
1783
0
  case clang::X86::BI_InterlockedExchangeSub64:
1784
0
    return MSVCIntrin::_InterlockedExchangeSub;
1785
0
  case clang::X86::BI_InterlockedOr64:
1786
0
    return MSVCIntrin::_InterlockedOr;
1787
0
  case clang::X86::BI_InterlockedXor64:
1788
0
    return MSVCIntrin::_InterlockedXor;
1789
0
  case clang::X86::BI_InterlockedDecrement64:
1790
0
    return MSVCIntrin::_InterlockedDecrement;
1791
0
  case clang::X86::BI_InterlockedIncrement64:
1792
0
    return MSVCIntrin::_InterlockedIncrement;
1793
0
  }
1794
0
  llvm_unreachable("must return from switch");
1795
0
}
1796
1797
// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1798
Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1799
0
                                            const CallExpr *E) {
1800
0
  switch (BuiltinID) {
1801
0
  case MSVCIntrin::_BitScanForward:
1802
0
  case MSVCIntrin::_BitScanReverse: {
1803
0
    Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1804
0
    Value *ArgValue = EmitScalarExpr(E->getArg(1));
1805
1806
0
    llvm::Type *ArgType = ArgValue->getType();
1807
0
    llvm::Type *IndexType = IndexAddress.getElementType();
1808
0
    llvm::Type *ResultType = ConvertType(E->getType());
1809
1810
0
    Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1811
0
    Value *ResZero = llvm::Constant::getNullValue(ResultType);
1812
0
    Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1813
1814
0
    BasicBlock *Begin = Builder.GetInsertBlock();
1815
0
    BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1816
0
    Builder.SetInsertPoint(End);
1817
0
    PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1818
1819
0
    Builder.SetInsertPoint(Begin);
1820
0
    Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1821
0
    BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1822
0
    Builder.CreateCondBr(IsZero, End, NotZero);
1823
0
    Result->addIncoming(ResZero, Begin);
1824
1825
0
    Builder.SetInsertPoint(NotZero);
1826
1827
0
    if (BuiltinID == MSVCIntrin::_BitScanForward) {
1828
0
      Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1829
0
      Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1830
0
      ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1831
0
      Builder.CreateStore(ZeroCount, IndexAddress, false);
1832
0
    } else {
1833
0
      unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1834
0
      Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1835
1836
0
      Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1837
0
      Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1838
0
      ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1839
0
      Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1840
0
      Builder.CreateStore(Index, IndexAddress, false);
1841
0
    }
1842
0
    Builder.CreateBr(End);
1843
0
    Result->addIncoming(ResOne, NotZero);
1844
1845
0
    Builder.SetInsertPoint(End);
1846
0
    return Result;
1847
0
  }
1848
0
  case MSVCIntrin::_InterlockedAnd:
1849
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1850
0
  case MSVCIntrin::_InterlockedExchange:
1851
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1852
0
  case MSVCIntrin::_InterlockedExchangeAdd:
1853
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1854
0
  case MSVCIntrin::_InterlockedExchangeSub:
1855
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1856
0
  case MSVCIntrin::_InterlockedOr:
1857
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1858
0
  case MSVCIntrin::_InterlockedXor:
1859
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1860
0
  case MSVCIntrin::_InterlockedExchangeAdd_acq:
1861
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1862
0
                                 AtomicOrdering::Acquire);
1863
0
  case MSVCIntrin::_InterlockedExchangeAdd_rel:
1864
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1865
0
                                 AtomicOrdering::Release);
1866
0
  case MSVCIntrin::_InterlockedExchangeAdd_nf:
1867
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1868
0
                                 AtomicOrdering::Monotonic);
1869
0
  case MSVCIntrin::_InterlockedExchange_acq:
1870
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1871
0
                                 AtomicOrdering::Acquire);
1872
0
  case MSVCIntrin::_InterlockedExchange_rel:
1873
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1874
0
                                 AtomicOrdering::Release);
1875
0
  case MSVCIntrin::_InterlockedExchange_nf:
1876
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1877
0
                                 AtomicOrdering::Monotonic);
1878
0
  case MSVCIntrin::_InterlockedCompareExchange_acq:
1879
0
    return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1880
0
  case MSVCIntrin::_InterlockedCompareExchange_rel:
1881
0
    return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1882
0
  case MSVCIntrin::_InterlockedCompareExchange_nf:
1883
0
    return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1884
0
  case MSVCIntrin::_InterlockedCompareExchange128:
1885
0
    return EmitAtomicCmpXchg128ForMSIntrin(
1886
0
        *this, E, AtomicOrdering::SequentiallyConsistent);
1887
0
  case MSVCIntrin::_InterlockedCompareExchange128_acq:
1888
0
    return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1889
0
  case MSVCIntrin::_InterlockedCompareExchange128_rel:
1890
0
    return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1891
0
  case MSVCIntrin::_InterlockedCompareExchange128_nf:
1892
0
    return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1893
0
  case MSVCIntrin::_InterlockedOr_acq:
1894
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1895
0
                                 AtomicOrdering::Acquire);
1896
0
  case MSVCIntrin::_InterlockedOr_rel:
1897
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1898
0
                                 AtomicOrdering::Release);
1899
0
  case MSVCIntrin::_InterlockedOr_nf:
1900
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1901
0
                                 AtomicOrdering::Monotonic);
1902
0
  case MSVCIntrin::_InterlockedXor_acq:
1903
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1904
0
                                 AtomicOrdering::Acquire);
1905
0
  case MSVCIntrin::_InterlockedXor_rel:
1906
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1907
0
                                 AtomicOrdering::Release);
1908
0
  case MSVCIntrin::_InterlockedXor_nf:
1909
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1910
0
                                 AtomicOrdering::Monotonic);
1911
0
  case MSVCIntrin::_InterlockedAnd_acq:
1912
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1913
0
                                 AtomicOrdering::Acquire);
1914
0
  case MSVCIntrin::_InterlockedAnd_rel:
1915
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1916
0
                                 AtomicOrdering::Release);
1917
0
  case MSVCIntrin::_InterlockedAnd_nf:
1918
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1919
0
                                 AtomicOrdering::Monotonic);
1920
0
  case MSVCIntrin::_InterlockedIncrement_acq:
1921
0
    return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1922
0
  case MSVCIntrin::_InterlockedIncrement_rel:
1923
0
    return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1924
0
  case MSVCIntrin::_InterlockedIncrement_nf:
1925
0
    return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1926
0
  case MSVCIntrin::_InterlockedDecrement_acq:
1927
0
    return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1928
0
  case MSVCIntrin::_InterlockedDecrement_rel:
1929
0
    return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1930
0
  case MSVCIntrin::_InterlockedDecrement_nf:
1931
0
    return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1932
1933
0
  case MSVCIntrin::_InterlockedDecrement:
1934
0
    return EmitAtomicDecrementValue(*this, E);
1935
0
  case MSVCIntrin::_InterlockedIncrement:
1936
0
    return EmitAtomicIncrementValue(*this, E);
1937
1938
0
  case MSVCIntrin::__fastfail: {
1939
    // Request immediate process termination from the kernel. The instruction
1940
    // sequences to do this are documented on MSDN:
1941
    // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1942
0
    llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1943
0
    StringRef Asm, Constraints;
1944
0
    switch (ISA) {
1945
0
    default:
1946
0
      ErrorUnsupported(E, "__fastfail call for this architecture");
1947
0
      break;
1948
0
    case llvm::Triple::x86:
1949
0
    case llvm::Triple::x86_64:
1950
0
      Asm = "int $$0x29";
1951
0
      Constraints = "{cx}";
1952
0
      break;
1953
0
    case llvm::Triple::thumb:
1954
0
      Asm = "udf #251";
1955
0
      Constraints = "{r0}";
1956
0
      break;
1957
0
    case llvm::Triple::aarch64:
1958
0
      Asm = "brk #0xF003";
1959
0
      Constraints = "{w0}";
1960
0
    }
1961
0
    llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1962
0
    llvm::InlineAsm *IA =
1963
0
        llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1964
0
    llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1965
0
        getLLVMContext(), llvm::AttributeList::FunctionIndex,
1966
0
        llvm::Attribute::NoReturn);
1967
0
    llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1968
0
    CI->setAttributes(NoReturnAttr);
1969
0
    return CI;
1970
0
  }
1971
0
  }
1972
0
  llvm_unreachable("Incorrect MSVC intrinsic!");
1973
0
}
1974
1975
namespace {
1976
// ARC cleanup for __builtin_os_log_format
1977
struct CallObjCArcUse final : EHScopeStack::Cleanup {
1978
0
  CallObjCArcUse(llvm::Value *object) : object(object) {}
1979
  llvm::Value *object;
1980
1981
0
  void Emit(CodeGenFunction &CGF, Flags flags) override {
1982
0
    CGF.EmitARCIntrinsicUse(object);
1983
0
  }
1984
};
1985
}
1986
1987
Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
1988
0
                                                 BuiltinCheckKind Kind) {
1989
0
  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1990
0
          && "Unsupported builtin check kind");
1991
1992
0
  Value *ArgValue = EmitScalarExpr(E);
1993
0
  if (!SanOpts.has(SanitizerKind::Builtin))
1994
0
    return ArgValue;
1995
1996
0
  SanitizerScope SanScope(this);
1997
0
  Value *Cond = Builder.CreateICmpNE(
1998
0
      ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
1999
0
  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2000
0
            SanitizerHandler::InvalidBuiltin,
2001
0
            {EmitCheckSourceLocation(E->getExprLoc()),
2002
0
             llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2003
0
            std::nullopt);
2004
0
  return ArgValue;
2005
0
}
2006
2007
0
static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2008
0
  return CGF.Builder.CreateBinaryIntrinsic(
2009
0
      Intrinsic::abs, ArgValue,
2010
0
      ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2011
0
}
2012
2013
static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E,
2014
0
                                     bool SanitizeOverflow) {
2015
0
  Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2016
2017
  // Try to eliminate overflow check.
2018
0
  if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2019
0
    if (!VCI->isMinSignedValue())
2020
0
      return EmitAbs(CGF, ArgValue, true);
2021
0
  }
2022
2023
0
  CodeGenFunction::SanitizerScope SanScope(&CGF);
2024
2025
0
  Constant *Zero = Constant::getNullValue(ArgValue->getType());
2026
0
  Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2027
0
      Intrinsic::ssub_with_overflow, Zero, ArgValue);
2028
0
  Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2029
0
  Value *NotOverflow = CGF.Builder.CreateNot(
2030
0
      CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2031
2032
  // TODO: support -ftrapv-handler.
2033
0
  if (SanitizeOverflow) {
2034
0
    CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2035
0
                  SanitizerHandler::NegateOverflow,
2036
0
                  {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2037
0
                   CGF.EmitCheckTypeDescriptor(E->getType())},
2038
0
                  {ArgValue});
2039
0
  } else
2040
0
    CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2041
2042
0
  Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2043
0
  return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2044
0
}
2045
2046
/// Get the argument type for arguments to os_log_helper.
2047
0
static CanQualType getOSLogArgType(ASTContext &C, int Size) {
2048
0
  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2049
0
  return C.getCanonicalType(UnsignedTy);
2050
0
}
2051
2052
llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
2053
    const analyze_os_log::OSLogBufferLayout &Layout,
2054
0
    CharUnits BufferAlignment) {
2055
0
  ASTContext &Ctx = getContext();
2056
2057
0
  llvm::SmallString<64> Name;
2058
0
  {
2059
0
    raw_svector_ostream OS(Name);
2060
0
    OS << "__os_log_helper";
2061
0
    OS << "_" << BufferAlignment.getQuantity();
2062
0
    OS << "_" << int(Layout.getSummaryByte());
2063
0
    OS << "_" << int(Layout.getNumArgsByte());
2064
0
    for (const auto &Item : Layout.Items)
2065
0
      OS << "_" << int(Item.getSizeByte()) << "_"
2066
0
         << int(Item.getDescriptorByte());
2067
0
  }
2068
2069
0
  if (llvm::Function *F = CGM.getModule().getFunction(Name))
2070
0
    return F;
2071
2072
0
  llvm::SmallVector<QualType, 4> ArgTys;
2073
0
  FunctionArgList Args;
2074
0
  Args.push_back(ImplicitParamDecl::Create(
2075
0
      Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2076
0
      ImplicitParamKind::Other));
2077
0
  ArgTys.emplace_back(Ctx.VoidPtrTy);
2078
2079
0
  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2080
0
    char Size = Layout.Items[I].getSizeByte();
2081
0
    if (!Size)
2082
0
      continue;
2083
2084
0
    QualType ArgTy = getOSLogArgType(Ctx, Size);
2085
0
    Args.push_back(ImplicitParamDecl::Create(
2086
0
        Ctx, nullptr, SourceLocation(),
2087
0
        &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2088
0
        ImplicitParamKind::Other));
2089
0
    ArgTys.emplace_back(ArgTy);
2090
0
  }
2091
2092
0
  QualType ReturnTy = Ctx.VoidTy;
2093
2094
  // The helper function has linkonce_odr linkage to enable the linker to merge
2095
  // identical functions. To ensure the merging always happens, 'noinline' is
2096
  // attached to the function when compiling with -Oz.
2097
0
  const CGFunctionInfo &FI =
2098
0
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
2099
0
  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2100
0
  llvm::Function *Fn = llvm::Function::Create(
2101
0
      FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2102
0
  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2103
0
  CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2104
0
  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
2105
0
  Fn->setDoesNotThrow();
2106
2107
  // Attach 'noinline' at -Oz.
2108
0
  if (CGM.getCodeGenOpts().OptimizeSize == 2)
2109
0
    Fn->addFnAttr(llvm::Attribute::NoInline);
2110
2111
0
  auto NL = ApplyDebugLocation::CreateEmpty(*this);
2112
0
  StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2113
2114
  // Create a scope with an artificial location for the body of this function.
2115
0
  auto AL = ApplyDebugLocation::CreateArtificial(*this);
2116
2117
0
  CharUnits Offset;
2118
0
  Address BufAddr =
2119
0
      Address(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Int8Ty,
2120
0
              BufferAlignment);
2121
0
  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2122
0
                      Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2123
0
  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2124
0
                      Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2125
2126
0
  unsigned I = 1;
2127
0
  for (const auto &Item : Layout.Items) {
2128
0
    Builder.CreateStore(
2129
0
        Builder.getInt8(Item.getDescriptorByte()),
2130
0
        Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2131
0
    Builder.CreateStore(
2132
0
        Builder.getInt8(Item.getSizeByte()),
2133
0
        Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2134
2135
0
    CharUnits Size = Item.size();
2136
0
    if (!Size.getQuantity())
2137
0
      continue;
2138
2139
0
    Address Arg = GetAddrOfLocalVar(Args[I]);
2140
0
    Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2141
0
    Addr = Addr.withElementType(Arg.getElementType());
2142
0
    Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
2143
0
    Offset += Size;
2144
0
    ++I;
2145
0
  }
2146
2147
0
  FinishFunction();
2148
2149
0
  return Fn;
2150
0
}
2151
2152
0
RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
2153
0
  assert(E.getNumArgs() >= 2 &&
2154
0
         "__builtin_os_log_format takes at least 2 arguments");
2155
0
  ASTContext &Ctx = getContext();
2156
0
  analyze_os_log::OSLogBufferLayout Layout;
2157
0
  analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
2158
0
  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2159
0
  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2160
2161
  // Ignore argument 1, the format string. It is not currently used.
2162
0
  CallArgList Args;
2163
0
  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
2164
2165
0
  for (const auto &Item : Layout.Items) {
2166
0
    int Size = Item.getSizeByte();
2167
0
    if (!Size)
2168
0
      continue;
2169
2170
0
    llvm::Value *ArgVal;
2171
2172
0
    if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2173
0
      uint64_t Val = 0;
2174
0
      for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2175
0
        Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2176
0
      ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2177
0
    } else if (const Expr *TheExpr = Item.getExpr()) {
2178
0
      ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2179
2180
      // If a temporary object that requires destruction after the full
2181
      // expression is passed, push a lifetime-extended cleanup to extend its
2182
      // lifetime to the end of the enclosing block scope.
2183
0
      auto LifetimeExtendObject = [&](const Expr *E) {
2184
0
        E = E->IgnoreParenCasts();
2185
        // Extend lifetimes of objects returned by function calls and message
2186
        // sends.
2187
2188
        // FIXME: We should do this in other cases in which temporaries are
2189
        //        created including arguments of non-ARC types (e.g., C++
2190
        //        temporaries).
2191
0
        if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2192
0
          return true;
2193
0
        return false;
2194
0
      };
2195
2196
0
      if (TheExpr->getType()->isObjCRetainableType() &&
2197
0
          getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2198
0
        assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2199
0
               "Only scalar can be a ObjC retainable type");
2200
0
        if (!isa<Constant>(ArgVal)) {
2201
0
          CleanupKind Cleanup = getARCCleanupKind();
2202
0
          QualType Ty = TheExpr->getType();
2203
0
          Address Alloca = Address::invalid();
2204
0
          Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2205
0
          ArgVal = EmitARCRetain(Ty, ArgVal);
2206
0
          Builder.CreateStore(ArgVal, Addr);
2207
0
          pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2208
0
                                      CodeGenFunction::destroyARCStrongPrecise,
2209
0
                                      Cleanup & EHCleanup);
2210
2211
          // Push a clang.arc.use call to ensure ARC optimizer knows that the
2212
          // argument has to be alive.
2213
0
          if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2214
0
            pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2215
0
        }
2216
0
      }
2217
0
    } else {
2218
0
      ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2219
0
    }
2220
2221
0
    unsigned ArgValSize =
2222
0
        CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2223
0
    llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2224
0
                                                     ArgValSize);
2225
0
    ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2226
0
    CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2227
    // If ArgVal has type x86_fp80, zero-extend ArgVal.
2228
0
    ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2229
0
    Args.add(RValue::get(ArgVal), ArgTy);
2230
0
  }
2231
2232
0
  const CGFunctionInfo &FI =
2233
0
      CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
2234
0
  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
2235
0
      Layout, BufAddr.getAlignment());
2236
0
  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
2237
0
  return RValue::get(BufAddr.getPointer());
2238
0
}
2239
2240
static bool isSpecialUnsignedMultiplySignedResult(
2241
    unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2242
0
    WidthAndSignedness ResultInfo) {
2243
0
  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2244
0
         Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2245
0
         !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2246
0
}
2247
2248
static RValue EmitCheckedUnsignedMultiplySignedResult(
2249
    CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2250
    const clang::Expr *Op2, WidthAndSignedness Op2Info,
2251
    const clang::Expr *ResultArg, QualType ResultQTy,
2252
0
    WidthAndSignedness ResultInfo) {
2253
0
  assert(isSpecialUnsignedMultiplySignedResult(
2254
0
             Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2255
0
         "Cannot specialize this multiply");
2256
2257
0
  llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2258
0
  llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2259
2260
0
  llvm::Value *HasOverflow;
2261
0
  llvm::Value *Result = EmitOverflowIntrinsic(
2262
0
      CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2263
2264
  // The intrinsic call will detect overflow when the value is > UINT_MAX,
2265
  // however, since the original builtin had a signed result, we need to report
2266
  // an overflow when the result is greater than INT_MAX.
2267
0
  auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2268
0
  llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2269
2270
0
  llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2271
0
  HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2272
2273
0
  bool isVolatile =
2274
0
      ResultArg->getType()->getPointeeType().isVolatileQualified();
2275
0
  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2276
0
  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2277
0
                          isVolatile);
2278
0
  return RValue::get(HasOverflow);
2279
0
}
2280
2281
/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2282
static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2283
                                       WidthAndSignedness Op1Info,
2284
                                       WidthAndSignedness Op2Info,
2285
0
                                       WidthAndSignedness ResultInfo) {
2286
0
  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2287
0
         std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2288
0
         Op1Info.Signed != Op2Info.Signed;
2289
0
}
2290
2291
/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2292
/// the generic checked-binop irgen.
2293
static RValue
2294
EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
2295
                             WidthAndSignedness Op1Info, const clang::Expr *Op2,
2296
                             WidthAndSignedness Op2Info,
2297
                             const clang::Expr *ResultArg, QualType ResultQTy,
2298
0
                             WidthAndSignedness ResultInfo) {
2299
0
  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2300
0
                                    Op2Info, ResultInfo) &&
2301
0
         "Not a mixed-sign multipliction we can specialize");
2302
2303
  // Emit the signed and unsigned operands.
2304
0
  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2305
0
  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2306
0
  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2307
0
  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2308
0
  unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2309
0
  unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2310
2311
  // One of the operands may be smaller than the other. If so, [s|z]ext it.
2312
0
  if (SignedOpWidth < UnsignedOpWidth)
2313
0
    Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2314
0
  if (UnsignedOpWidth < SignedOpWidth)
2315
0
    Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2316
2317
0
  llvm::Type *OpTy = Signed->getType();
2318
0
  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2319
0
  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2320
0
  llvm::Type *ResTy = ResultPtr.getElementType();
2321
0
  unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2322
2323
  // Take the absolute value of the signed operand.
2324
0
  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2325
0
  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2326
0
  llvm::Value *AbsSigned =
2327
0
      CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2328
2329
  // Perform a checked unsigned multiplication.
2330
0
  llvm::Value *UnsignedOverflow;
2331
0
  llvm::Value *UnsignedResult =
2332
0
      EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2333
0
                            Unsigned, UnsignedOverflow);
2334
2335
0
  llvm::Value *Overflow, *Result;
2336
0
  if (ResultInfo.Signed) {
2337
    // Signed overflow occurs if the result is greater than INT_MAX or lesser
2338
    // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2339
0
    auto IntMax =
2340
0
        llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2341
0
    llvm::Value *MaxResult =
2342
0
        CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2343
0
                              CGF.Builder.CreateZExt(IsNegative, OpTy));
2344
0
    llvm::Value *SignedOverflow =
2345
0
        CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2346
0
    Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2347
2348
    // Prepare the signed result (possibly by negating it).
2349
0
    llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2350
0
    llvm::Value *SignedResult =
2351
0
        CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2352
0
    Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2353
0
  } else {
2354
    // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2355
0
    llvm::Value *Underflow = CGF.Builder.CreateAnd(
2356
0
        IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2357
0
    Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2358
0
    if (ResultInfo.Width < OpWidth) {
2359
0
      auto IntMax =
2360
0
          llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2361
0
      llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2362
0
          UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2363
0
      Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2364
0
    }
2365
2366
    // Negate the product if it would be negative in infinite precision.
2367
0
    Result = CGF.Builder.CreateSelect(
2368
0
        IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2369
2370
0
    Result = CGF.Builder.CreateTrunc(Result, ResTy);
2371
0
  }
2372
0
  assert(Overflow && Result && "Missing overflow or result");
2373
2374
0
  bool isVolatile =
2375
0
      ResultArg->getType()->getPointeeType().isVolatileQualified();
2376
0
  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2377
0
                          isVolatile);
2378
0
  return RValue::get(Overflow);
2379
0
}
2380
2381
static bool
2382
TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
2383
0
                              llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2384
0
  if (const auto *Arr = Ctx.getAsArrayType(Ty))
2385
0
    Ty = Ctx.getBaseElementType(Arr);
2386
2387
0
  const auto *Record = Ty->getAsCXXRecordDecl();
2388
0
  if (!Record)
2389
0
    return false;
2390
2391
  // We've already checked this type, or are in the process of checking it.
2392
0
  if (!Seen.insert(Record).second)
2393
0
    return false;
2394
2395
0
  assert(Record->hasDefinition() &&
2396
0
         "Incomplete types should already be diagnosed");
2397
2398
0
  if (Record->isDynamicClass())
2399
0
    return true;
2400
2401
0
  for (FieldDecl *F : Record->fields()) {
2402
0
    if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2403
0
      return true;
2404
0
  }
2405
0
  return false;
2406
0
}
2407
2408
/// Determine if the specified type requires laundering by checking if it is a
2409
/// dynamic class type or contains a subobject which is a dynamic class type.
2410
0
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
2411
0
  if (!CGM.getCodeGenOpts().StrictVTablePointers)
2412
0
    return false;
2413
0
  llvm::SmallPtrSet<const Decl *, 16> Seen;
2414
0
  return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2415
0
}
2416
2417
0
RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2418
0
  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2419
0
  llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2420
2421
  // The builtin's shift arg may have a different type than the source arg and
2422
  // result, but the LLVM intrinsic uses the same type for all values.
2423
0
  llvm::Type *Ty = Src->getType();
2424
0
  ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2425
2426
  // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2427
0
  unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2428
0
  Function *F = CGM.getIntrinsic(IID, Ty);
2429
0
  return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2430
0
}
2431
2432
// Map math builtins for long-double to f128 version.
2433
0
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2434
0
  switch (BuiltinID) {
2435
0
#define MUTATE_LDBL(func) \
2436
0
  case Builtin::BI__builtin_##func##l: \
2437
0
    return Builtin::BI__builtin_##func##f128;
2438
0
  MUTATE_LDBL(sqrt)
2439
0
  MUTATE_LDBL(cbrt)
2440
0
  MUTATE_LDBL(fabs)
2441
0
  MUTATE_LDBL(log)
2442
0
  MUTATE_LDBL(log2)
2443
0
  MUTATE_LDBL(log10)
2444
0
  MUTATE_LDBL(log1p)
2445
0
  MUTATE_LDBL(logb)
2446
0
  MUTATE_LDBL(exp)
2447
0
  MUTATE_LDBL(exp2)
2448
0
  MUTATE_LDBL(expm1)
2449
0
  MUTATE_LDBL(fdim)
2450
0
  MUTATE_LDBL(hypot)
2451
0
  MUTATE_LDBL(ilogb)
2452
0
  MUTATE_LDBL(pow)
2453
0
  MUTATE_LDBL(fmin)
2454
0
  MUTATE_LDBL(fmax)
2455
0
  MUTATE_LDBL(ceil)
2456
0
  MUTATE_LDBL(trunc)
2457
0
  MUTATE_LDBL(rint)
2458
0
  MUTATE_LDBL(nearbyint)
2459
0
  MUTATE_LDBL(round)
2460
0
  MUTATE_LDBL(floor)
2461
0
  MUTATE_LDBL(lround)
2462
0
  MUTATE_LDBL(llround)
2463
0
  MUTATE_LDBL(lrint)
2464
0
  MUTATE_LDBL(llrint)
2465
0
  MUTATE_LDBL(fmod)
2466
0
  MUTATE_LDBL(modf)
2467
0
  MUTATE_LDBL(nan)
2468
0
  MUTATE_LDBL(nans)
2469
0
  MUTATE_LDBL(inf)
2470
0
  MUTATE_LDBL(fma)
2471
0
  MUTATE_LDBL(sin)
2472
0
  MUTATE_LDBL(cos)
2473
0
  MUTATE_LDBL(tan)
2474
0
  MUTATE_LDBL(sinh)
2475
0
  MUTATE_LDBL(cosh)
2476
0
  MUTATE_LDBL(tanh)
2477
0
  MUTATE_LDBL(asin)
2478
0
  MUTATE_LDBL(acos)
2479
0
  MUTATE_LDBL(atan)
2480
0
  MUTATE_LDBL(asinh)
2481
0
  MUTATE_LDBL(acosh)
2482
0
  MUTATE_LDBL(atanh)
2483
0
  MUTATE_LDBL(atan2)
2484
0
  MUTATE_LDBL(erf)
2485
0
  MUTATE_LDBL(erfc)
2486
0
  MUTATE_LDBL(ldexp)
2487
0
  MUTATE_LDBL(frexp)
2488
0
  MUTATE_LDBL(huge_val)
2489
0
  MUTATE_LDBL(copysign)
2490
0
  MUTATE_LDBL(nextafter)
2491
0
  MUTATE_LDBL(nexttoward)
2492
0
  MUTATE_LDBL(remainder)
2493
0
  MUTATE_LDBL(remquo)
2494
0
  MUTATE_LDBL(scalbln)
2495
0
  MUTATE_LDBL(scalbn)
2496
0
  MUTATE_LDBL(tgamma)
2497
0
  MUTATE_LDBL(lgamma)
2498
0
#undef MUTATE_LDBL
2499
0
  default:
2500
0
    return BuiltinID;
2501
0
  }
2502
0
}
2503
2504
static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2505
0
                               Value *V) {
2506
0
  if (CGF.Builder.getIsFPConstrained() &&
2507
0
      CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2508
0
    if (Value *Result =
2509
0
            CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2510
0
      return Result;
2511
0
  }
2512
0
  return nullptr;
2513
0
}
2514
2515
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
2516
0
                                              const FunctionDecl *FD) {
2517
0
  auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2518
0
  auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2519
0
  auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2520
2521
0
  SmallVector<Value *, 16> Args;
2522
0
  for (auto &&FormalTy : FnTy->params())
2523
0
    Args.push_back(llvm::PoisonValue::get(FormalTy));
2524
2525
0
  return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2526
0
}
2527
2528
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2529
                                        const CallExpr *E,
2530
0
                                        ReturnValueSlot ReturnValue) {
2531
0
  const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2532
  // See if we can constant fold this builtin.  If so, don't emit it at all.
2533
  // TODO: Extend this handling to all builtin calls that we can constant-fold.
2534
0
  Expr::EvalResult Result;
2535
0
  if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2536
0
      !Result.hasSideEffects()) {
2537
0
    if (Result.Val.isInt())
2538
0
      return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2539
0
                                                Result.Val.getInt()));
2540
0
    if (Result.Val.isFloat())
2541
0
      return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2542
0
                                               Result.Val.getFloat()));
2543
0
  }
2544
2545
  // If current long-double semantics is IEEE 128-bit, replace math builtins
2546
  // of long-double with f128 equivalent.
2547
  // TODO: This mutation should also be applied to other targets other than PPC,
2548
  // after backend supports IEEE 128-bit style libcalls.
2549
0
  if (getTarget().getTriple().isPPC64() &&
2550
0
      &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2551
0
    BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2552
2553
  // If the builtin has been declared explicitly with an assembler label,
2554
  // disable the specialized emitting below. Ideally we should communicate the
2555
  // rename in IR, or at least avoid generating the intrinsic calls that are
2556
  // likely to get lowered to the renamed library functions.
2557
0
  const unsigned BuiltinIDIfNoAsmLabel =
2558
0
      FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2559
2560
0
  std::optional<bool> ErrnoOverriden;
2561
  // ErrnoOverriden is true if math-errno is overriden via the
2562
  // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2563
  // which implies math-errno.
2564
0
  if (E->hasStoredFPFeatures()) {
2565
0
    FPOptionsOverride OP = E->getFPFeatures();
2566
0
    if (OP.hasMathErrnoOverride())
2567
0
      ErrnoOverriden = OP.getMathErrnoOverride();
2568
0
  }
2569
  // True if 'atttibute__((optnone)) is used. This attibute overrides
2570
  // fast-math which implies math-errno.
2571
0
  bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2572
2573
  // True if we are compiling at -O2 and errno has been disabled
2574
  // using the '#pragma float_control(precise, off)', and
2575
  // attribute opt-none hasn't been seen.
2576
0
  bool ErrnoOverridenToFalseWithOpt =
2577
0
       ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2578
0
       CGM.getCodeGenOpts().OptimizationLevel != 0;
2579
2580
  // There are LLVM math intrinsics/instructions corresponding to math library
2581
  // functions except the LLVM op will never set errno while the math library
2582
  // might. Also, math builtins have the same semantics as their math library
2583
  // twins. Thus, we can transform math library and builtin calls to their
2584
  // LLVM counterparts if the call is marked 'const' (known to never set errno).
2585
  // In case FP exceptions are enabled, the experimental versions of the
2586
  // intrinsics model those.
2587
0
  bool ConstAlways =
2588
0
      getContext().BuiltinInfo.isConst(BuiltinID);
2589
2590
  // There's a special case with the fma builtins where they are always const
2591
  // if the target environment is GNU or the target is OS is Windows and we're
2592
  // targeting the MSVCRT.dll environment.
2593
  // FIXME: This list can be become outdated. Need to find a way to get it some
2594
  // other way.
2595
0
  switch (BuiltinID) {
2596
0
  case Builtin::BI__builtin_fma:
2597
0
  case Builtin::BI__builtin_fmaf:
2598
0
  case Builtin::BI__builtin_fmal:
2599
0
  case Builtin::BIfma:
2600
0
  case Builtin::BIfmaf:
2601
0
  case Builtin::BIfmal: {
2602
0
    auto &Trip = CGM.getTriple();
2603
0
    if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2604
0
      ConstAlways = true;
2605
0
    break;
2606
0
  }
2607
0
  default:
2608
0
    break;
2609
0
  }
2610
2611
0
  bool ConstWithoutErrnoAndExceptions =
2612
0
      getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
2613
0
  bool ConstWithoutExceptions =
2614
0
      getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
2615
2616
  // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2617
  // disabled.
2618
  // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2619
  // or attributes that affect math-errno should prevent or allow math
2620
  // intrincs to be generated. Intrinsics are generated:
2621
  //   1- In fast math mode, unless math-errno is overriden
2622
  //      via '#pragma float_control(precise, on)', or via an
2623
  //      'attribute__((optnone))'.
2624
  //   2- If math-errno was enabled on command line but overriden
2625
  //      to false via '#pragma float_control(precise, off))' and
2626
  //      'attribute__((optnone))' hasn't been used.
2627
  //   3- If we are compiling with optimization and errno has been disabled
2628
  //      via '#pragma float_control(precise, off)', and
2629
  //      'attribute__((optnone))' hasn't been used.
2630
2631
0
  bool ConstWithoutErrnoOrExceptions =
2632
0
      ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2633
0
  bool GenerateIntrinsics =
2634
0
      (ConstAlways && !OptNone) ||
2635
0
      (!getLangOpts().MathErrno &&
2636
0
       !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2637
0
  if (!GenerateIntrinsics) {
2638
0
    GenerateIntrinsics =
2639
0
        ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2640
0
    if (!GenerateIntrinsics)
2641
0
      GenerateIntrinsics =
2642
0
          ConstWithoutErrnoOrExceptions &&
2643
0
          (!getLangOpts().MathErrno &&
2644
0
           !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2645
0
    if (!GenerateIntrinsics)
2646
0
      GenerateIntrinsics =
2647
0
          ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2648
0
  }
2649
0
  if (GenerateIntrinsics) {
2650
0
    switch (BuiltinIDIfNoAsmLabel) {
2651
0
    case Builtin::BIceil:
2652
0
    case Builtin::BIceilf:
2653
0
    case Builtin::BIceill:
2654
0
    case Builtin::BI__builtin_ceil:
2655
0
    case Builtin::BI__builtin_ceilf:
2656
0
    case Builtin::BI__builtin_ceilf16:
2657
0
    case Builtin::BI__builtin_ceill:
2658
0
    case Builtin::BI__builtin_ceilf128:
2659
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2660
0
                                   Intrinsic::ceil,
2661
0
                                   Intrinsic::experimental_constrained_ceil));
2662
2663
0
    case Builtin::BIcopysign:
2664
0
    case Builtin::BIcopysignf:
2665
0
    case Builtin::BIcopysignl:
2666
0
    case Builtin::BI__builtin_copysign:
2667
0
    case Builtin::BI__builtin_copysignf:
2668
0
    case Builtin::BI__builtin_copysignf16:
2669
0
    case Builtin::BI__builtin_copysignl:
2670
0
    case Builtin::BI__builtin_copysignf128:
2671
0
      return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
2672
2673
0
    case Builtin::BIcos:
2674
0
    case Builtin::BIcosf:
2675
0
    case Builtin::BIcosl:
2676
0
    case Builtin::BI__builtin_cos:
2677
0
    case Builtin::BI__builtin_cosf:
2678
0
    case Builtin::BI__builtin_cosf16:
2679
0
    case Builtin::BI__builtin_cosl:
2680
0
    case Builtin::BI__builtin_cosf128:
2681
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2682
0
                                   Intrinsic::cos,
2683
0
                                   Intrinsic::experimental_constrained_cos));
2684
2685
0
    case Builtin::BIexp:
2686
0
    case Builtin::BIexpf:
2687
0
    case Builtin::BIexpl:
2688
0
    case Builtin::BI__builtin_exp:
2689
0
    case Builtin::BI__builtin_expf:
2690
0
    case Builtin::BI__builtin_expf16:
2691
0
    case Builtin::BI__builtin_expl:
2692
0
    case Builtin::BI__builtin_expf128:
2693
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2694
0
                                   Intrinsic::exp,
2695
0
                                   Intrinsic::experimental_constrained_exp));
2696
2697
0
    case Builtin::BIexp2:
2698
0
    case Builtin::BIexp2f:
2699
0
    case Builtin::BIexp2l:
2700
0
    case Builtin::BI__builtin_exp2:
2701
0
    case Builtin::BI__builtin_exp2f:
2702
0
    case Builtin::BI__builtin_exp2f16:
2703
0
    case Builtin::BI__builtin_exp2l:
2704
0
    case Builtin::BI__builtin_exp2f128:
2705
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2706
0
                                   Intrinsic::exp2,
2707
0
                                   Intrinsic::experimental_constrained_exp2));
2708
0
    case Builtin::BI__builtin_exp10:
2709
0
    case Builtin::BI__builtin_exp10f:
2710
0
    case Builtin::BI__builtin_exp10f16:
2711
0
    case Builtin::BI__builtin_exp10l:
2712
0
    case Builtin::BI__builtin_exp10f128: {
2713
      // TODO: strictfp support
2714
0
      if (Builder.getIsFPConstrained())
2715
0
        break;
2716
0
      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10));
2717
0
    }
2718
0
    case Builtin::BIfabs:
2719
0
    case Builtin::BIfabsf:
2720
0
    case Builtin::BIfabsl:
2721
0
    case Builtin::BI__builtin_fabs:
2722
0
    case Builtin::BI__builtin_fabsf:
2723
0
    case Builtin::BI__builtin_fabsf16:
2724
0
    case Builtin::BI__builtin_fabsl:
2725
0
    case Builtin::BI__builtin_fabsf128:
2726
0
      return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2727
2728
0
    case Builtin::BIfloor:
2729
0
    case Builtin::BIfloorf:
2730
0
    case Builtin::BIfloorl:
2731
0
    case Builtin::BI__builtin_floor:
2732
0
    case Builtin::BI__builtin_floorf:
2733
0
    case Builtin::BI__builtin_floorf16:
2734
0
    case Builtin::BI__builtin_floorl:
2735
0
    case Builtin::BI__builtin_floorf128:
2736
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2737
0
                                   Intrinsic::floor,
2738
0
                                   Intrinsic::experimental_constrained_floor));
2739
2740
0
    case Builtin::BIfma:
2741
0
    case Builtin::BIfmaf:
2742
0
    case Builtin::BIfmal:
2743
0
    case Builtin::BI__builtin_fma:
2744
0
    case Builtin::BI__builtin_fmaf:
2745
0
    case Builtin::BI__builtin_fmaf16:
2746
0
    case Builtin::BI__builtin_fmal:
2747
0
    case Builtin::BI__builtin_fmaf128:
2748
0
      return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
2749
0
                                   Intrinsic::fma,
2750
0
                                   Intrinsic::experimental_constrained_fma));
2751
2752
0
    case Builtin::BIfmax:
2753
0
    case Builtin::BIfmaxf:
2754
0
    case Builtin::BIfmaxl:
2755
0
    case Builtin::BI__builtin_fmax:
2756
0
    case Builtin::BI__builtin_fmaxf:
2757
0
    case Builtin::BI__builtin_fmaxf16:
2758
0
    case Builtin::BI__builtin_fmaxl:
2759
0
    case Builtin::BI__builtin_fmaxf128:
2760
0
      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2761
0
                                   Intrinsic::maxnum,
2762
0
                                   Intrinsic::experimental_constrained_maxnum));
2763
2764
0
    case Builtin::BIfmin:
2765
0
    case Builtin::BIfminf:
2766
0
    case Builtin::BIfminl:
2767
0
    case Builtin::BI__builtin_fmin:
2768
0
    case Builtin::BI__builtin_fminf:
2769
0
    case Builtin::BI__builtin_fminf16:
2770
0
    case Builtin::BI__builtin_fminl:
2771
0
    case Builtin::BI__builtin_fminf128:
2772
0
      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2773
0
                                   Intrinsic::minnum,
2774
0
                                   Intrinsic::experimental_constrained_minnum));
2775
2776
    // fmod() is a special-case. It maps to the frem instruction rather than an
2777
    // LLVM intrinsic.
2778
0
    case Builtin::BIfmod:
2779
0
    case Builtin::BIfmodf:
2780
0
    case Builtin::BIfmodl:
2781
0
    case Builtin::BI__builtin_fmod:
2782
0
    case Builtin::BI__builtin_fmodf:
2783
0
    case Builtin::BI__builtin_fmodf16:
2784
0
    case Builtin::BI__builtin_fmodl:
2785
0
    case Builtin::BI__builtin_fmodf128: {
2786
0
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2787
0
      Value *Arg1 = EmitScalarExpr(E->getArg(0));
2788
0
      Value *Arg2 = EmitScalarExpr(E->getArg(1));
2789
0
      return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2790
0
    }
2791
2792
0
    case Builtin::BIlog:
2793
0
    case Builtin::BIlogf:
2794
0
    case Builtin::BIlogl:
2795
0
    case Builtin::BI__builtin_log:
2796
0
    case Builtin::BI__builtin_logf:
2797
0
    case Builtin::BI__builtin_logf16:
2798
0
    case Builtin::BI__builtin_logl:
2799
0
    case Builtin::BI__builtin_logf128:
2800
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2801
0
                                   Intrinsic::log,
2802
0
                                   Intrinsic::experimental_constrained_log));
2803
2804
0
    case Builtin::BIlog10:
2805
0
    case Builtin::BIlog10f:
2806
0
    case Builtin::BIlog10l:
2807
0
    case Builtin::BI__builtin_log10:
2808
0
    case Builtin::BI__builtin_log10f:
2809
0
    case Builtin::BI__builtin_log10f16:
2810
0
    case Builtin::BI__builtin_log10l:
2811
0
    case Builtin::BI__builtin_log10f128:
2812
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2813
0
                                   Intrinsic::log10,
2814
0
                                   Intrinsic::experimental_constrained_log10));
2815
2816
0
    case Builtin::BIlog2:
2817
0
    case Builtin::BIlog2f:
2818
0
    case Builtin::BIlog2l:
2819
0
    case Builtin::BI__builtin_log2:
2820
0
    case Builtin::BI__builtin_log2f:
2821
0
    case Builtin::BI__builtin_log2f16:
2822
0
    case Builtin::BI__builtin_log2l:
2823
0
    case Builtin::BI__builtin_log2f128:
2824
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2825
0
                                   Intrinsic::log2,
2826
0
                                   Intrinsic::experimental_constrained_log2));
2827
2828
0
    case Builtin::BInearbyint:
2829
0
    case Builtin::BInearbyintf:
2830
0
    case Builtin::BInearbyintl:
2831
0
    case Builtin::BI__builtin_nearbyint:
2832
0
    case Builtin::BI__builtin_nearbyintf:
2833
0
    case Builtin::BI__builtin_nearbyintl:
2834
0
    case Builtin::BI__builtin_nearbyintf128:
2835
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2836
0
                                Intrinsic::nearbyint,
2837
0
                                Intrinsic::experimental_constrained_nearbyint));
2838
2839
0
    case Builtin::BIpow:
2840
0
    case Builtin::BIpowf:
2841
0
    case Builtin::BIpowl:
2842
0
    case Builtin::BI__builtin_pow:
2843
0
    case Builtin::BI__builtin_powf:
2844
0
    case Builtin::BI__builtin_powf16:
2845
0
    case Builtin::BI__builtin_powl:
2846
0
    case Builtin::BI__builtin_powf128:
2847
0
      return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2848
0
                                   Intrinsic::pow,
2849
0
                                   Intrinsic::experimental_constrained_pow));
2850
2851
0
    case Builtin::BIrint:
2852
0
    case Builtin::BIrintf:
2853
0
    case Builtin::BIrintl:
2854
0
    case Builtin::BI__builtin_rint:
2855
0
    case Builtin::BI__builtin_rintf:
2856
0
    case Builtin::BI__builtin_rintf16:
2857
0
    case Builtin::BI__builtin_rintl:
2858
0
    case Builtin::BI__builtin_rintf128:
2859
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2860
0
                                   Intrinsic::rint,
2861
0
                                   Intrinsic::experimental_constrained_rint));
2862
2863
0
    case Builtin::BIround:
2864
0
    case Builtin::BIroundf:
2865
0
    case Builtin::BIroundl:
2866
0
    case Builtin::BI__builtin_round:
2867
0
    case Builtin::BI__builtin_roundf:
2868
0
    case Builtin::BI__builtin_roundf16:
2869
0
    case Builtin::BI__builtin_roundl:
2870
0
    case Builtin::BI__builtin_roundf128:
2871
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2872
0
                                   Intrinsic::round,
2873
0
                                   Intrinsic::experimental_constrained_round));
2874
2875
0
    case Builtin::BIroundeven:
2876
0
    case Builtin::BIroundevenf:
2877
0
    case Builtin::BIroundevenl:
2878
0
    case Builtin::BI__builtin_roundeven:
2879
0
    case Builtin::BI__builtin_roundevenf:
2880
0
    case Builtin::BI__builtin_roundevenf16:
2881
0
    case Builtin::BI__builtin_roundevenl:
2882
0
    case Builtin::BI__builtin_roundevenf128:
2883
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2884
0
                                   Intrinsic::roundeven,
2885
0
                                   Intrinsic::experimental_constrained_roundeven));
2886
2887
0
    case Builtin::BIsin:
2888
0
    case Builtin::BIsinf:
2889
0
    case Builtin::BIsinl:
2890
0
    case Builtin::BI__builtin_sin:
2891
0
    case Builtin::BI__builtin_sinf:
2892
0
    case Builtin::BI__builtin_sinf16:
2893
0
    case Builtin::BI__builtin_sinl:
2894
0
    case Builtin::BI__builtin_sinf128:
2895
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2896
0
                                   Intrinsic::sin,
2897
0
                                   Intrinsic::experimental_constrained_sin));
2898
2899
0
    case Builtin::BIsqrt:
2900
0
    case Builtin::BIsqrtf:
2901
0
    case Builtin::BIsqrtl:
2902
0
    case Builtin::BI__builtin_sqrt:
2903
0
    case Builtin::BI__builtin_sqrtf:
2904
0
    case Builtin::BI__builtin_sqrtf16:
2905
0
    case Builtin::BI__builtin_sqrtl:
2906
0
    case Builtin::BI__builtin_sqrtf128:
2907
0
    case Builtin::BI__builtin_elementwise_sqrt: {
2908
0
      llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
2909
0
          *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2910
0
      SetSqrtFPAccuracy(Call);
2911
0
      return RValue::get(Call);
2912
0
    }
2913
0
    case Builtin::BItrunc:
2914
0
    case Builtin::BItruncf:
2915
0
    case Builtin::BItruncl:
2916
0
    case Builtin::BI__builtin_trunc:
2917
0
    case Builtin::BI__builtin_truncf:
2918
0
    case Builtin::BI__builtin_truncf16:
2919
0
    case Builtin::BI__builtin_truncl:
2920
0
    case Builtin::BI__builtin_truncf128:
2921
0
      return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2922
0
                                   Intrinsic::trunc,
2923
0
                                   Intrinsic::experimental_constrained_trunc));
2924
2925
0
    case Builtin::BIlround:
2926
0
    case Builtin::BIlroundf:
2927
0
    case Builtin::BIlroundl:
2928
0
    case Builtin::BI__builtin_lround:
2929
0
    case Builtin::BI__builtin_lroundf:
2930
0
    case Builtin::BI__builtin_lroundl:
2931
0
    case Builtin::BI__builtin_lroundf128:
2932
0
      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2933
0
          *this, E, Intrinsic::lround,
2934
0
          Intrinsic::experimental_constrained_lround));
2935
2936
0
    case Builtin::BIllround:
2937
0
    case Builtin::BIllroundf:
2938
0
    case Builtin::BIllroundl:
2939
0
    case Builtin::BI__builtin_llround:
2940
0
    case Builtin::BI__builtin_llroundf:
2941
0
    case Builtin::BI__builtin_llroundl:
2942
0
    case Builtin::BI__builtin_llroundf128:
2943
0
      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2944
0
          *this, E, Intrinsic::llround,
2945
0
          Intrinsic::experimental_constrained_llround));
2946
2947
0
    case Builtin::BIlrint:
2948
0
    case Builtin::BIlrintf:
2949
0
    case Builtin::BIlrintl:
2950
0
    case Builtin::BI__builtin_lrint:
2951
0
    case Builtin::BI__builtin_lrintf:
2952
0
    case Builtin::BI__builtin_lrintl:
2953
0
    case Builtin::BI__builtin_lrintf128:
2954
0
      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2955
0
          *this, E, Intrinsic::lrint,
2956
0
          Intrinsic::experimental_constrained_lrint));
2957
2958
0
    case Builtin::BIllrint:
2959
0
    case Builtin::BIllrintf:
2960
0
    case Builtin::BIllrintl:
2961
0
    case Builtin::BI__builtin_llrint:
2962
0
    case Builtin::BI__builtin_llrintf:
2963
0
    case Builtin::BI__builtin_llrintl:
2964
0
    case Builtin::BI__builtin_llrintf128:
2965
0
      return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
2966
0
          *this, E, Intrinsic::llrint,
2967
0
          Intrinsic::experimental_constrained_llrint));
2968
0
    case Builtin::BI__builtin_ldexp:
2969
0
    case Builtin::BI__builtin_ldexpf:
2970
0
    case Builtin::BI__builtin_ldexpl:
2971
0
    case Builtin::BI__builtin_ldexpf16:
2972
0
    case Builtin::BI__builtin_ldexpf128: {
2973
0
      return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(
2974
0
          *this, E, Intrinsic::ldexp,
2975
0
          Intrinsic::experimental_constrained_ldexp));
2976
0
    }
2977
0
    default:
2978
0
      break;
2979
0
    }
2980
0
  }
2981
2982
  // Check NonnullAttribute/NullabilityArg and Alignment.
2983
0
  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
2984
0
                          unsigned ParmNum) {
2985
0
    Value *Val = A.getPointer();
2986
0
    EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
2987
0
                        ParmNum);
2988
2989
0
    if (SanOpts.has(SanitizerKind::Alignment)) {
2990
0
      SanitizerSet SkippedChecks;
2991
0
      SkippedChecks.set(SanitizerKind::All);
2992
0
      SkippedChecks.clear(SanitizerKind::Alignment);
2993
0
      SourceLocation Loc = Arg->getExprLoc();
2994
      // Strip an implicit cast.
2995
0
      if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
2996
0
        if (CE->getCastKind() == CK_BitCast)
2997
0
          Arg = CE->getSubExpr();
2998
0
      EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
2999
0
                    SkippedChecks);
3000
0
    }
3001
0
  };
3002
3003
0
  switch (BuiltinIDIfNoAsmLabel) {
3004
0
  default: break;
3005
0
  case Builtin::BI__builtin___CFStringMakeConstantString:
3006
0
  case Builtin::BI__builtin___NSStringMakeConstantString:
3007
0
    return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3008
0
  case Builtin::BI__builtin_stdarg_start:
3009
0
  case Builtin::BI__builtin_va_start:
3010
0
  case Builtin::BI__va_start:
3011
0
  case Builtin::BI__builtin_va_end:
3012
0
    EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3013
0
                       ? EmitScalarExpr(E->getArg(0))
3014
0
                       : EmitVAListRef(E->getArg(0)).getPointer(),
3015
0
                   BuiltinID != Builtin::BI__builtin_va_end);
3016
0
    return RValue::get(nullptr);
3017
0
  case Builtin::BI__builtin_va_copy: {
3018
0
    Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
3019
0
    Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
3020
0
    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
3021
0
    return RValue::get(nullptr);
3022
0
  }
3023
0
  case Builtin::BIabs:
3024
0
  case Builtin::BIlabs:
3025
0
  case Builtin::BIllabs:
3026
0
  case Builtin::BI__builtin_abs:
3027
0
  case Builtin::BI__builtin_labs:
3028
0
  case Builtin::BI__builtin_llabs: {
3029
0
    bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3030
3031
0
    Value *Result;
3032
0
    switch (getLangOpts().getSignedOverflowBehavior()) {
3033
0
    case LangOptions::SOB_Defined:
3034
0
      Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3035
0
      break;
3036
0
    case LangOptions::SOB_Undefined:
3037
0
      if (!SanitizeOverflow) {
3038
0
        Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3039
0
        break;
3040
0
      }
3041
0
      [[fallthrough]];
3042
0
    case LangOptions::SOB_Trapping:
3043
      // TODO: Somehow handle the corner case when the address of abs is taken.
3044
0
      Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3045
0
      break;
3046
0
    }
3047
0
    return RValue::get(Result);
3048
0
  }
3049
0
  case Builtin::BI__builtin_complex: {
3050
0
    Value *Real = EmitScalarExpr(E->getArg(0));
3051
0
    Value *Imag = EmitScalarExpr(E->getArg(1));
3052
0
    return RValue::getComplex({Real, Imag});
3053
0
  }
3054
0
  case Builtin::BI__builtin_conj:
3055
0
  case Builtin::BI__builtin_conjf:
3056
0
  case Builtin::BI__builtin_conjl:
3057
0
  case Builtin::BIconj:
3058
0
  case Builtin::BIconjf:
3059
0
  case Builtin::BIconjl: {
3060
0
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3061
0
    Value *Real = ComplexVal.first;
3062
0
    Value *Imag = ComplexVal.second;
3063
0
    Imag = Builder.CreateFNeg(Imag, "neg");
3064
0
    return RValue::getComplex(std::make_pair(Real, Imag));
3065
0
  }
3066
0
  case Builtin::BI__builtin_creal:
3067
0
  case Builtin::BI__builtin_crealf:
3068
0
  case Builtin::BI__builtin_creall:
3069
0
  case Builtin::BIcreal:
3070
0
  case Builtin::BIcrealf:
3071
0
  case Builtin::BIcreall: {
3072
0
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3073
0
    return RValue::get(ComplexVal.first);
3074
0
  }
3075
3076
0
  case Builtin::BI__builtin_preserve_access_index: {
3077
    // Only enabled preserved access index region when debuginfo
3078
    // is available as debuginfo is needed to preserve user-level
3079
    // access pattern.
3080
0
    if (!getDebugInfo()) {
3081
0
      CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3082
0
      return RValue::get(EmitScalarExpr(E->getArg(0)));
3083
0
    }
3084
3085
    // Nested builtin_preserve_access_index() not supported
3086
0
    if (IsInPreservedAIRegion) {
3087
0
      CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3088
0
      return RValue::get(EmitScalarExpr(E->getArg(0)));
3089
0
    }
3090
3091
0
    IsInPreservedAIRegion = true;
3092
0
    Value *Res = EmitScalarExpr(E->getArg(0));
3093
0
    IsInPreservedAIRegion = false;
3094
0
    return RValue::get(Res);
3095
0
  }
3096
3097
0
  case Builtin::BI__builtin_cimag:
3098
0
  case Builtin::BI__builtin_cimagf:
3099
0
  case Builtin::BI__builtin_cimagl:
3100
0
  case Builtin::BIcimag:
3101
0
  case Builtin::BIcimagf:
3102
0
  case Builtin::BIcimagl: {
3103
0
    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3104
0
    return RValue::get(ComplexVal.second);
3105
0
  }
3106
3107
0
  case Builtin::BI__builtin_clrsb:
3108
0
  case Builtin::BI__builtin_clrsbl:
3109
0
  case Builtin::BI__builtin_clrsbll: {
3110
    // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3111
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3112
3113
0
    llvm::Type *ArgType = ArgValue->getType();
3114
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3115
3116
0
    llvm::Type *ResultType = ConvertType(E->getType());
3117
0
    Value *Zero = llvm::Constant::getNullValue(ArgType);
3118
0
    Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3119
0
    Value *Inverse = Builder.CreateNot(ArgValue, "not");
3120
0
    Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3121
0
    Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3122
0
    Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3123
0
    Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3124
0
                                   "cast");
3125
0
    return RValue::get(Result);
3126
0
  }
3127
0
  case Builtin::BI__builtin_ctzs:
3128
0
  case Builtin::BI__builtin_ctz:
3129
0
  case Builtin::BI__builtin_ctzl:
3130
0
  case Builtin::BI__builtin_ctzll: {
3131
0
    Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
3132
3133
0
    llvm::Type *ArgType = ArgValue->getType();
3134
0
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3135
3136
0
    llvm::Type *ResultType = ConvertType(E->getType());
3137
0
    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
3138
0
    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3139
0
    if (Result->getType() != ResultType)
3140
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3141
0
                                     "cast");
3142
0
    return RValue::get(Result);
3143
0
  }
3144
0
  case Builtin::BI__builtin_clzs:
3145
0
  case Builtin::BI__builtin_clz:
3146
0
  case Builtin::BI__builtin_clzl:
3147
0
  case Builtin::BI__builtin_clzll: {
3148
0
    Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
3149
3150
0
    llvm::Type *ArgType = ArgValue->getType();
3151
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3152
3153
0
    llvm::Type *ResultType = ConvertType(E->getType());
3154
0
    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
3155
0
    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3156
0
    if (Result->getType() != ResultType)
3157
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3158
0
                                     "cast");
3159
0
    return RValue::get(Result);
3160
0
  }
3161
0
  case Builtin::BI__builtin_ffs:
3162
0
  case Builtin::BI__builtin_ffsl:
3163
0
  case Builtin::BI__builtin_ffsll: {
3164
    // ffs(x) -> x ? cttz(x) + 1 : 0
3165
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3166
3167
0
    llvm::Type *ArgType = ArgValue->getType();
3168
0
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3169
3170
0
    llvm::Type *ResultType = ConvertType(E->getType());
3171
0
    Value *Tmp =
3172
0
        Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3173
0
                          llvm::ConstantInt::get(ArgType, 1));
3174
0
    Value *Zero = llvm::Constant::getNullValue(ArgType);
3175
0
    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3176
0
    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3177
0
    if (Result->getType() != ResultType)
3178
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3179
0
                                     "cast");
3180
0
    return RValue::get(Result);
3181
0
  }
3182
0
  case Builtin::BI__builtin_parity:
3183
0
  case Builtin::BI__builtin_parityl:
3184
0
  case Builtin::BI__builtin_parityll: {
3185
    // parity(x) -> ctpop(x) & 1
3186
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3187
3188
0
    llvm::Type *ArgType = ArgValue->getType();
3189
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3190
3191
0
    llvm::Type *ResultType = ConvertType(E->getType());
3192
0
    Value *Tmp = Builder.CreateCall(F, ArgValue);
3193
0
    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3194
0
    if (Result->getType() != ResultType)
3195
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3196
0
                                     "cast");
3197
0
    return RValue::get(Result);
3198
0
  }
3199
0
  case Builtin::BI__lzcnt16:
3200
0
  case Builtin::BI__lzcnt:
3201
0
  case Builtin::BI__lzcnt64: {
3202
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3203
3204
0
    llvm::Type *ArgType = ArgValue->getType();
3205
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3206
3207
0
    llvm::Type *ResultType = ConvertType(E->getType());
3208
0
    Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3209
0
    if (Result->getType() != ResultType)
3210
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3211
0
                                     "cast");
3212
0
    return RValue::get(Result);
3213
0
  }
3214
0
  case Builtin::BI__popcnt16:
3215
0
  case Builtin::BI__popcnt:
3216
0
  case Builtin::BI__popcnt64:
3217
0
  case Builtin::BI__builtin_popcount:
3218
0
  case Builtin::BI__builtin_popcountl:
3219
0
  case Builtin::BI__builtin_popcountll: {
3220
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3221
3222
0
    llvm::Type *ArgType = ArgValue->getType();
3223
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3224
3225
0
    llvm::Type *ResultType = ConvertType(E->getType());
3226
0
    Value *Result = Builder.CreateCall(F, ArgValue);
3227
0
    if (Result->getType() != ResultType)
3228
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3229
0
                                     "cast");
3230
0
    return RValue::get(Result);
3231
0
  }
3232
0
  case Builtin::BI__builtin_unpredictable: {
3233
    // Always return the argument of __builtin_unpredictable. LLVM does not
3234
    // handle this builtin. Metadata for this builtin should be added directly
3235
    // to instructions such as branches or switches that use it.
3236
0
    return RValue::get(EmitScalarExpr(E->getArg(0)));
3237
0
  }
3238
0
  case Builtin::BI__builtin_expect: {
3239
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3240
0
    llvm::Type *ArgType = ArgValue->getType();
3241
3242
0
    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3243
    // Don't generate llvm.expect on -O0 as the backend won't use it for
3244
    // anything.
3245
    // Note, we still IRGen ExpectedValue because it could have side-effects.
3246
0
    if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3247
0
      return RValue::get(ArgValue);
3248
3249
0
    Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3250
0
    Value *Result =
3251
0
        Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3252
0
    return RValue::get(Result);
3253
0
  }
3254
0
  case Builtin::BI__builtin_expect_with_probability: {
3255
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3256
0
    llvm::Type *ArgType = ArgValue->getType();
3257
3258
0
    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3259
0
    llvm::APFloat Probability(0.0);
3260
0
    const Expr *ProbArg = E->getArg(2);
3261
0
    bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3262
0
    assert(EvalSucceed && "probability should be able to evaluate as float");
3263
0
    (void)EvalSucceed;
3264
0
    bool LoseInfo = false;
3265
0
    Probability.convert(llvm::APFloat::IEEEdouble(),
3266
0
                        llvm::RoundingMode::Dynamic, &LoseInfo);
3267
0
    llvm::Type *Ty = ConvertType(ProbArg->getType());
3268
0
    Constant *Confidence = ConstantFP::get(Ty, Probability);
3269
    // Don't generate llvm.expect.with.probability on -O0 as the backend
3270
    // won't use it for anything.
3271
    // Note, we still IRGen ExpectedValue because it could have side-effects.
3272
0
    if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3273
0
      return RValue::get(ArgValue);
3274
3275
0
    Function *FnExpect =
3276
0
        CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3277
0
    Value *Result = Builder.CreateCall(
3278
0
        FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3279
0
    return RValue::get(Result);
3280
0
  }
3281
0
  case Builtin::BI__builtin_assume_aligned: {
3282
0
    const Expr *Ptr = E->getArg(0);
3283
0
    Value *PtrValue = EmitScalarExpr(Ptr);
3284
0
    Value *OffsetValue =
3285
0
      (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3286
3287
0
    Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3288
0
    ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3289
0
    if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3290
0
      AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3291
0
                                     llvm::Value::MaximumAlignment);
3292
3293
0
    emitAlignmentAssumption(PtrValue, Ptr,
3294
0
                            /*The expr loc is sufficient.*/ SourceLocation(),
3295
0
                            AlignmentCI, OffsetValue);
3296
0
    return RValue::get(PtrValue);
3297
0
  }
3298
0
  case Builtin::BI__assume:
3299
0
  case Builtin::BI__builtin_assume: {
3300
0
    if (E->getArg(0)->HasSideEffects(getContext()))
3301
0
      return RValue::get(nullptr);
3302
3303
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3304
0
    Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3305
0
    Builder.CreateCall(FnAssume, ArgValue);
3306
0
    return RValue::get(nullptr);
3307
0
  }
3308
0
  case Builtin::BI__builtin_assume_separate_storage: {
3309
0
    const Expr *Arg0 = E->getArg(0);
3310
0
    const Expr *Arg1 = E->getArg(1);
3311
3312
0
    Value *Value0 = EmitScalarExpr(Arg0);
3313
0
    Value *Value1 = EmitScalarExpr(Arg1);
3314
3315
0
    Value *Values[] = {Value0, Value1};
3316
0
    OperandBundleDefT<Value *> OBD("separate_storage", Values);
3317
0
    Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3318
0
    return RValue::get(nullptr);
3319
0
  }
3320
0
  case Builtin::BI__arithmetic_fence: {
3321
    // Create the builtin call if FastMath is selected, and the target
3322
    // supports the builtin, otherwise just return the argument.
3323
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3324
0
    llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3325
0
    bool isArithmeticFenceEnabled =
3326
0
        FMF.allowReassoc() &&
3327
0
        getContext().getTargetInfo().checkArithmeticFenceSupported();
3328
0
    QualType ArgType = E->getArg(0)->getType();
3329
0
    if (ArgType->isComplexType()) {
3330
0
      if (isArithmeticFenceEnabled) {
3331
0
        QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3332
0
        ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3333
0
        Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3334
0
                                                    ConvertType(ElementType));
3335
0
        Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3336
0
                                                    ConvertType(ElementType));
3337
0
        return RValue::getComplex(std::make_pair(Real, Imag));
3338
0
      }
3339
0
      ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3340
0
      Value *Real = ComplexVal.first;
3341
0
      Value *Imag = ComplexVal.second;
3342
0
      return RValue::getComplex(std::make_pair(Real, Imag));
3343
0
    }
3344
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
3345
0
    if (isArithmeticFenceEnabled)
3346
0
      return RValue::get(
3347
0
          Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3348
0
    return RValue::get(ArgValue);
3349
0
  }
3350
0
  case Builtin::BI__builtin_bswap16:
3351
0
  case Builtin::BI__builtin_bswap32:
3352
0
  case Builtin::BI__builtin_bswap64:
3353
0
  case Builtin::BI_byteswap_ushort:
3354
0
  case Builtin::BI_byteswap_ulong:
3355
0
  case Builtin::BI_byteswap_uint64: {
3356
0
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
3357
0
  }
3358
0
  case Builtin::BI__builtin_bitreverse8:
3359
0
  case Builtin::BI__builtin_bitreverse16:
3360
0
  case Builtin::BI__builtin_bitreverse32:
3361
0
  case Builtin::BI__builtin_bitreverse64: {
3362
0
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
3363
0
  }
3364
0
  case Builtin::BI__builtin_rotateleft8:
3365
0
  case Builtin::BI__builtin_rotateleft16:
3366
0
  case Builtin::BI__builtin_rotateleft32:
3367
0
  case Builtin::BI__builtin_rotateleft64:
3368
0
  case Builtin::BI_rotl8: // Microsoft variants of rotate left
3369
0
  case Builtin::BI_rotl16:
3370
0
  case Builtin::BI_rotl:
3371
0
  case Builtin::BI_lrotl:
3372
0
  case Builtin::BI_rotl64:
3373
0
    return emitRotate(E, false);
3374
3375
0
  case Builtin::BI__builtin_rotateright8:
3376
0
  case Builtin::BI__builtin_rotateright16:
3377
0
  case Builtin::BI__builtin_rotateright32:
3378
0
  case Builtin::BI__builtin_rotateright64:
3379
0
  case Builtin::BI_rotr8: // Microsoft variants of rotate right
3380
0
  case Builtin::BI_rotr16:
3381
0
  case Builtin::BI_rotr:
3382
0
  case Builtin::BI_lrotr:
3383
0
  case Builtin::BI_rotr64:
3384
0
    return emitRotate(E, true);
3385
3386
0
  case Builtin::BI__builtin_constant_p: {
3387
0
    llvm::Type *ResultType = ConvertType(E->getType());
3388
3389
0
    const Expr *Arg = E->getArg(0);
3390
0
    QualType ArgType = Arg->getType();
3391
    // FIXME: The allowance for Obj-C pointers and block pointers is historical
3392
    // and likely a mistake.
3393
0
    if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3394
0
        !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3395
      // Per the GCC documentation, only numeric constants are recognized after
3396
      // inlining.
3397
0
      return RValue::get(ConstantInt::get(ResultType, 0));
3398
3399
0
    if (Arg->HasSideEffects(getContext()))
3400
      // The argument is unevaluated, so be conservative if it might have
3401
      // side-effects.
3402
0
      return RValue::get(ConstantInt::get(ResultType, 0));
3403
3404
0
    Value *ArgValue = EmitScalarExpr(Arg);
3405
0
    if (ArgType->isObjCObjectPointerType()) {
3406
      // Convert Objective-C objects to id because we cannot distinguish between
3407
      // LLVM types for Obj-C classes as they are opaque.
3408
0
      ArgType = CGM.getContext().getObjCIdType();
3409
0
      ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3410
0
    }
3411
0
    Function *F =
3412
0
        CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3413
0
    Value *Result = Builder.CreateCall(F, ArgValue);
3414
0
    if (Result->getType() != ResultType)
3415
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3416
0
    return RValue::get(Result);
3417
0
  }
3418
0
  case Builtin::BI__builtin_dynamic_object_size:
3419
0
  case Builtin::BI__builtin_object_size: {
3420
0
    unsigned Type =
3421
0
        E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3422
0
    auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3423
3424
    // We pass this builtin onto the optimizer so that it can figure out the
3425
    // object size in more complex cases.
3426
0
    bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3427
0
    return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3428
0
                                             /*EmittedE=*/nullptr, IsDynamic));
3429
0
  }
3430
0
  case Builtin::BI__builtin_prefetch: {
3431
0
    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3432
    // FIXME: Technically these constants should of type 'int', yes?
3433
0
    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3434
0
      llvm::ConstantInt::get(Int32Ty, 0);
3435
0
    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3436
0
      llvm::ConstantInt::get(Int32Ty, 3);
3437
0
    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3438
0
    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3439
0
    Builder.CreateCall(F, {Address, RW, Locality, Data});
3440
0
    return RValue::get(nullptr);
3441
0
  }
3442
0
  case Builtin::BI__builtin_readcyclecounter: {
3443
0
    Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3444
0
    return RValue::get(Builder.CreateCall(F));
3445
0
  }
3446
0
  case Builtin::BI__builtin___clear_cache: {
3447
0
    Value *Begin = EmitScalarExpr(E->getArg(0));
3448
0
    Value *End = EmitScalarExpr(E->getArg(1));
3449
0
    Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3450
0
    return RValue::get(Builder.CreateCall(F, {Begin, End}));
3451
0
  }
3452
0
  case Builtin::BI__builtin_trap:
3453
0
    EmitTrapCall(Intrinsic::trap);
3454
0
    return RValue::get(nullptr);
3455
0
  case Builtin::BI__debugbreak:
3456
0
    EmitTrapCall(Intrinsic::debugtrap);
3457
0
    return RValue::get(nullptr);
3458
0
  case Builtin::BI__builtin_unreachable: {
3459
0
    EmitUnreachable(E->getExprLoc());
3460
3461
    // We do need to preserve an insertion point.
3462
0
    EmitBlock(createBasicBlock("unreachable.cont"));
3463
3464
0
    return RValue::get(nullptr);
3465
0
  }
3466
3467
0
  case Builtin::BI__builtin_powi:
3468
0
  case Builtin::BI__builtin_powif:
3469
0
  case Builtin::BI__builtin_powil: {
3470
0
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3471
0
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3472
3473
0
    if (Builder.getIsFPConstrained()) {
3474
      // FIXME: llvm.powi has 2 mangling types,
3475
      // llvm.experimental.constrained.powi has one.
3476
0
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3477
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3478
0
                                     Src0->getType());
3479
0
      return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3480
0
    }
3481
3482
0
    Function *F = CGM.getIntrinsic(Intrinsic::powi,
3483
0
                                   { Src0->getType(), Src1->getType() });
3484
0
    return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3485
0
  }
3486
0
  case Builtin::BI__builtin_frexpl: {
3487
    // Linux PPC will not be adding additional PPCDoubleDouble support.
3488
    // WIP to switch default to IEEE long double. Will emit libcall for
3489
    // frexpl instead of legalizing this type in the BE.
3490
0
    if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3491
0
      break;
3492
0
    LLVM_FALLTHROUGH;
3493
0
  }
3494
0
  case Builtin::BI__builtin_frexp:
3495
0
  case Builtin::BI__builtin_frexpf:
3496
0
  case Builtin::BI__builtin_frexpf128:
3497
0
  case Builtin::BI__builtin_frexpf16:
3498
0
    return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3499
0
  case Builtin::BI__builtin_isgreater:
3500
0
  case Builtin::BI__builtin_isgreaterequal:
3501
0
  case Builtin::BI__builtin_isless:
3502
0
  case Builtin::BI__builtin_islessequal:
3503
0
  case Builtin::BI__builtin_islessgreater:
3504
0
  case Builtin::BI__builtin_isunordered: {
3505
    // Ordered comparisons: we know the arguments to these are matching scalar
3506
    // floating point values.
3507
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3508
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
3509
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
3510
3511
0
    switch (BuiltinID) {
3512
0
    default: llvm_unreachable("Unknown ordered comparison");
3513
0
    case Builtin::BI__builtin_isgreater:
3514
0
      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3515
0
      break;
3516
0
    case Builtin::BI__builtin_isgreaterequal:
3517
0
      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3518
0
      break;
3519
0
    case Builtin::BI__builtin_isless:
3520
0
      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3521
0
      break;
3522
0
    case Builtin::BI__builtin_islessequal:
3523
0
      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3524
0
      break;
3525
0
    case Builtin::BI__builtin_islessgreater:
3526
0
      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3527
0
      break;
3528
0
    case Builtin::BI__builtin_isunordered:
3529
0
      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3530
0
      break;
3531
0
    }
3532
    // ZExt bool to int type.
3533
0
    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3534
0
  }
3535
3536
0
  case Builtin::BI__builtin_isnan: {
3537
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3538
0
    Value *V = EmitScalarExpr(E->getArg(0));
3539
0
    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3540
0
      return RValue::get(Result);
3541
0
    return RValue::get(
3542
0
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3543
0
                           ConvertType(E->getType())));
3544
0
  }
3545
3546
0
  case Builtin::BI__builtin_issignaling: {
3547
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3548
0
    Value *V = EmitScalarExpr(E->getArg(0));
3549
0
    return RValue::get(
3550
0
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3551
0
                           ConvertType(E->getType())));
3552
0
  }
3553
3554
0
  case Builtin::BI__builtin_isinf: {
3555
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3556
0
    Value *V = EmitScalarExpr(E->getArg(0));
3557
0
    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3558
0
      return RValue::get(Result);
3559
0
    return RValue::get(
3560
0
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3561
0
                           ConvertType(E->getType())));
3562
0
  }
3563
3564
0
  case Builtin::BIfinite:
3565
0
  case Builtin::BI__finite:
3566
0
  case Builtin::BIfinitef:
3567
0
  case Builtin::BI__finitef:
3568
0
  case Builtin::BIfinitel:
3569
0
  case Builtin::BI__finitel:
3570
0
  case Builtin::BI__builtin_isfinite: {
3571
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3572
0
    Value *V = EmitScalarExpr(E->getArg(0));
3573
0
    if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3574
0
      return RValue::get(Result);
3575
0
    return RValue::get(
3576
0
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3577
0
                           ConvertType(E->getType())));
3578
0
  }
3579
3580
0
  case Builtin::BI__builtin_isnormal: {
3581
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3582
0
    Value *V = EmitScalarExpr(E->getArg(0));
3583
0
    return RValue::get(
3584
0
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3585
0
                           ConvertType(E->getType())));
3586
0
  }
3587
3588
0
  case Builtin::BI__builtin_issubnormal: {
3589
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3590
0
    Value *V = EmitScalarExpr(E->getArg(0));
3591
0
    return RValue::get(
3592
0
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3593
0
                           ConvertType(E->getType())));
3594
0
  }
3595
3596
0
  case Builtin::BI__builtin_iszero: {
3597
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3598
0
    Value *V = EmitScalarExpr(E->getArg(0));
3599
0
    return RValue::get(
3600
0
        Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3601
0
                           ConvertType(E->getType())));
3602
0
  }
3603
3604
0
  case Builtin::BI__builtin_isfpclass: {
3605
0
    Expr::EvalResult Result;
3606
0
    if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3607
0
      break;
3608
0
    uint64_t Test = Result.Val.getInt().getLimitedValue();
3609
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3610
0
    Value *V = EmitScalarExpr(E->getArg(0));
3611
0
    return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3612
0
                                          ConvertType(E->getType())));
3613
0
  }
3614
3615
0
  case Builtin::BI__builtin_nondeterministic_value: {
3616
0
    llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3617
3618
0
    Value *Result = PoisonValue::get(Ty);
3619
0
    Result = Builder.CreateFreeze(Result);
3620
3621
0
    return RValue::get(Result);
3622
0
  }
3623
3624
0
  case Builtin::BI__builtin_elementwise_abs: {
3625
0
    Value *Result;
3626
0
    QualType QT = E->getArg(0)->getType();
3627
3628
0
    if (auto *VecTy = QT->getAs<VectorType>())
3629
0
      QT = VecTy->getElementType();
3630
0
    if (QT->isIntegerType())
3631
0
      Result = Builder.CreateBinaryIntrinsic(
3632
0
          llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3633
0
          Builder.getFalse(), nullptr, "elt.abs");
3634
0
    else
3635
0
      Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3636
3637
0
    return RValue::get(Result);
3638
0
  }
3639
3640
0
  case Builtin::BI__builtin_elementwise_ceil:
3641
0
    return RValue::get(
3642
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3643
0
  case Builtin::BI__builtin_elementwise_exp:
3644
0
    return RValue::get(
3645
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp"));
3646
0
  case Builtin::BI__builtin_elementwise_exp2:
3647
0
    return RValue::get(
3648
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3649
0
  case Builtin::BI__builtin_elementwise_log:
3650
0
    return RValue::get(
3651
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log"));
3652
0
  case Builtin::BI__builtin_elementwise_log2:
3653
0
    return RValue::get(
3654
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2"));
3655
0
  case Builtin::BI__builtin_elementwise_log10:
3656
0
    return RValue::get(
3657
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10"));
3658
0
  case Builtin::BI__builtin_elementwise_pow: {
3659
0
    return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow));
3660
0
  }
3661
0
  case Builtin::BI__builtin_elementwise_bitreverse:
3662
0
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse,
3663
0
                                        "elt.bitreverse"));
3664
0
  case Builtin::BI__builtin_elementwise_cos:
3665
0
    return RValue::get(
3666
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
3667
0
  case Builtin::BI__builtin_elementwise_floor:
3668
0
    return RValue::get(
3669
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3670
0
  case Builtin::BI__builtin_elementwise_roundeven:
3671
0
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3672
0
                                        "elt.roundeven"));
3673
0
  case Builtin::BI__builtin_elementwise_round:
3674
0
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round,
3675
0
                                        "elt.round"));
3676
0
  case Builtin::BI__builtin_elementwise_rint:
3677
0
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint,
3678
0
                                        "elt.rint"));
3679
0
  case Builtin::BI__builtin_elementwise_nearbyint:
3680
0
    return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint,
3681
0
                                        "elt.nearbyint"));
3682
0
  case Builtin::BI__builtin_elementwise_sin:
3683
0
    return RValue::get(
3684
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
3685
3686
0
  case Builtin::BI__builtin_elementwise_trunc:
3687
0
    return RValue::get(
3688
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3689
0
  case Builtin::BI__builtin_elementwise_canonicalize:
3690
0
    return RValue::get(
3691
0
        emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3692
0
  case Builtin::BI__builtin_elementwise_copysign:
3693
0
    return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
3694
0
  case Builtin::BI__builtin_elementwise_fma:
3695
0
    return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma));
3696
0
  case Builtin::BI__builtin_elementwise_add_sat:
3697
0
  case Builtin::BI__builtin_elementwise_sub_sat: {
3698
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
3699
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
3700
0
    Value *Result;
3701
0
    assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3702
0
    QualType Ty = E->getArg(0)->getType();
3703
0
    if (auto *VecTy = Ty->getAs<VectorType>())
3704
0
      Ty = VecTy->getElementType();
3705
0
    bool IsSigned = Ty->isSignedIntegerType();
3706
0
    unsigned Opc;
3707
0
    if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3708
0
      Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3709
0
    else
3710
0
      Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3711
0
    Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3712
0
    return RValue::get(Result);
3713
0
  }
3714
3715
0
  case Builtin::BI__builtin_elementwise_max: {
3716
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
3717
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
3718
0
    Value *Result;
3719
0
    if (Op0->getType()->isIntOrIntVectorTy()) {
3720
0
      QualType Ty = E->getArg(0)->getType();
3721
0
      if (auto *VecTy = Ty->getAs<VectorType>())
3722
0
        Ty = VecTy->getElementType();
3723
0
      Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3724
0
                                                 ? llvm::Intrinsic::smax
3725
0
                                                 : llvm::Intrinsic::umax,
3726
0
                                             Op0, Op1, nullptr, "elt.max");
3727
0
    } else
3728
0
      Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3729
0
    return RValue::get(Result);
3730
0
  }
3731
0
  case Builtin::BI__builtin_elementwise_min: {
3732
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
3733
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
3734
0
    Value *Result;
3735
0
    if (Op0->getType()->isIntOrIntVectorTy()) {
3736
0
      QualType Ty = E->getArg(0)->getType();
3737
0
      if (auto *VecTy = Ty->getAs<VectorType>())
3738
0
        Ty = VecTy->getElementType();
3739
0
      Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3740
0
                                                 ? llvm::Intrinsic::smin
3741
0
                                                 : llvm::Intrinsic::umin,
3742
0
                                             Op0, Op1, nullptr, "elt.min");
3743
0
    } else
3744
0
      Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3745
0
    return RValue::get(Result);
3746
0
  }
3747
3748
0
  case Builtin::BI__builtin_reduce_max: {
3749
0
    auto GetIntrinsicID = [](QualType QT) {
3750
0
      if (auto *VecTy = QT->getAs<VectorType>())
3751
0
        QT = VecTy->getElementType();
3752
0
      if (QT->isSignedIntegerType())
3753
0
        return llvm::Intrinsic::vector_reduce_smax;
3754
0
      if (QT->isUnsignedIntegerType())
3755
0
        return llvm::Intrinsic::vector_reduce_umax;
3756
0
      assert(QT->isFloatingType() && "must have a float here");
3757
0
      return llvm::Intrinsic::vector_reduce_fmax;
3758
0
    };
3759
0
    return RValue::get(emitUnaryBuiltin(
3760
0
        *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3761
0
  }
3762
3763
0
  case Builtin::BI__builtin_reduce_min: {
3764
0
    auto GetIntrinsicID = [](QualType QT) {
3765
0
      if (auto *VecTy = QT->getAs<VectorType>())
3766
0
        QT = VecTy->getElementType();
3767
0
      if (QT->isSignedIntegerType())
3768
0
        return llvm::Intrinsic::vector_reduce_smin;
3769
0
      if (QT->isUnsignedIntegerType())
3770
0
        return llvm::Intrinsic::vector_reduce_umin;
3771
0
      assert(QT->isFloatingType() && "must have a float here");
3772
0
      return llvm::Intrinsic::vector_reduce_fmin;
3773
0
    };
3774
3775
0
    return RValue::get(emitUnaryBuiltin(
3776
0
        *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3777
0
  }
3778
3779
0
  case Builtin::BI__builtin_reduce_add:
3780
0
    return RValue::get(emitUnaryBuiltin(
3781
0
        *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3782
0
  case Builtin::BI__builtin_reduce_mul:
3783
0
    return RValue::get(emitUnaryBuiltin(
3784
0
        *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3785
0
  case Builtin::BI__builtin_reduce_xor:
3786
0
    return RValue::get(emitUnaryBuiltin(
3787
0
        *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3788
0
  case Builtin::BI__builtin_reduce_or:
3789
0
    return RValue::get(emitUnaryBuiltin(
3790
0
        *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3791
0
  case Builtin::BI__builtin_reduce_and:
3792
0
    return RValue::get(emitUnaryBuiltin(
3793
0
        *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3794
3795
0
  case Builtin::BI__builtin_matrix_transpose: {
3796
0
    auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3797
0
    Value *MatValue = EmitScalarExpr(E->getArg(0));
3798
0
    MatrixBuilder MB(Builder);
3799
0
    Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3800
0
                                             MatrixTy->getNumColumns());
3801
0
    return RValue::get(Result);
3802
0
  }
3803
3804
0
  case Builtin::BI__builtin_matrix_column_major_load: {
3805
0
    MatrixBuilder MB(Builder);
3806
    // Emit everything that isn't dependent on the first parameter type
3807
0
    Value *Stride = EmitScalarExpr(E->getArg(3));
3808
0
    const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3809
0
    auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3810
0
    assert(PtrTy && "arg0 must be of pointer type");
3811
0
    bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3812
3813
0
    Address Src = EmitPointerWithAlignment(E->getArg(0));
3814
0
    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
3815
0
                        E->getArg(0)->getExprLoc(), FD, 0);
3816
0
    Value *Result = MB.CreateColumnMajorLoad(
3817
0
        Src.getElementType(), Src.getPointer(),
3818
0
        Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
3819
0
        ResultTy->getNumRows(), ResultTy->getNumColumns(),
3820
0
        "matrix");
3821
0
    return RValue::get(Result);
3822
0
  }
3823
3824
0
  case Builtin::BI__builtin_matrix_column_major_store: {
3825
0
    MatrixBuilder MB(Builder);
3826
0
    Value *Matrix = EmitScalarExpr(E->getArg(0));
3827
0
    Address Dst = EmitPointerWithAlignment(E->getArg(1));
3828
0
    Value *Stride = EmitScalarExpr(E->getArg(2));
3829
3830
0
    const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
3831
0
    auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
3832
0
    assert(PtrTy && "arg1 must be of pointer type");
3833
0
    bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3834
3835
0
    EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(),
3836
0
                        E->getArg(1)->getExprLoc(), FD, 0);
3837
0
    Value *Result = MB.CreateColumnMajorStore(
3838
0
        Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()),
3839
0
        Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns());
3840
0
    return RValue::get(Result);
3841
0
  }
3842
3843
0
  case Builtin::BI__builtin_isinf_sign: {
3844
    // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3845
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3846
    // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3847
0
    Value *Arg = EmitScalarExpr(E->getArg(0));
3848
0
    Value *AbsArg = EmitFAbs(*this, Arg);
3849
0
    Value *IsInf = Builder.CreateFCmpOEQ(
3850
0
        AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
3851
0
    Value *IsNeg = EmitSignBit(*this, Arg);
3852
3853
0
    llvm::Type *IntTy = ConvertType(E->getType());
3854
0
    Value *Zero = Constant::getNullValue(IntTy);
3855
0
    Value *One = ConstantInt::get(IntTy, 1);
3856
0
    Value *NegativeOne = ConstantInt::get(IntTy, -1);
3857
0
    Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
3858
0
    Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
3859
0
    return RValue::get(Result);
3860
0
  }
3861
3862
0
  case Builtin::BI__builtin_flt_rounds: {
3863
0
    Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
3864
3865
0
    llvm::Type *ResultType = ConvertType(E->getType());
3866
0
    Value *Result = Builder.CreateCall(F);
3867
0
    if (Result->getType() != ResultType)
3868
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3869
0
                                     "cast");
3870
0
    return RValue::get(Result);
3871
0
  }
3872
3873
0
  case Builtin::BI__builtin_set_flt_rounds: {
3874
0
    Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
3875
3876
0
    Value *V = EmitScalarExpr(E->getArg(0));
3877
0
    Builder.CreateCall(F, V);
3878
0
    return RValue::get(nullptr);
3879
0
  }
3880
3881
0
  case Builtin::BI__builtin_fpclassify: {
3882
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3883
    // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3884
0
    Value *V = EmitScalarExpr(E->getArg(5));
3885
0
    llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
3886
3887
    // Create Result
3888
0
    BasicBlock *Begin = Builder.GetInsertBlock();
3889
0
    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
3890
0
    Builder.SetInsertPoint(End);
3891
0
    PHINode *Result =
3892
0
      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
3893
0
                        "fpclassify_result");
3894
3895
    // if (V==0) return FP_ZERO
3896
0
    Builder.SetInsertPoint(Begin);
3897
0
    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
3898
0
                                          "iszero");
3899
0
    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
3900
0
    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
3901
0
    Builder.CreateCondBr(IsZero, End, NotZero);
3902
0
    Result->addIncoming(ZeroLiteral, Begin);
3903
3904
    // if (V != V) return FP_NAN
3905
0
    Builder.SetInsertPoint(NotZero);
3906
0
    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
3907
0
    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
3908
0
    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
3909
0
    Builder.CreateCondBr(IsNan, End, NotNan);
3910
0
    Result->addIncoming(NanLiteral, NotZero);
3911
3912
    // if (fabs(V) == infinity) return FP_INFINITY
3913
0
    Builder.SetInsertPoint(NotNan);
3914
0
    Value *VAbs = EmitFAbs(*this, V);
3915
0
    Value *IsInf =
3916
0
      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
3917
0
                            "isinf");
3918
0
    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
3919
0
    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
3920
0
    Builder.CreateCondBr(IsInf, End, NotInf);
3921
0
    Result->addIncoming(InfLiteral, NotNan);
3922
3923
    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
3924
0
    Builder.SetInsertPoint(NotInf);
3925
0
    APFloat Smallest = APFloat::getSmallestNormalized(
3926
0
        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
3927
0
    Value *IsNormal =
3928
0
      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
3929
0
                            "isnormal");
3930
0
    Value *NormalResult =
3931
0
      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
3932
0
                           EmitScalarExpr(E->getArg(3)));
3933
0
    Builder.CreateBr(End);
3934
0
    Result->addIncoming(NormalResult, NotInf);
3935
3936
    // return Result
3937
0
    Builder.SetInsertPoint(End);
3938
0
    return RValue::get(Result);
3939
0
  }
3940
3941
  // An alloca will always return a pointer to the alloca (stack) address
3942
  // space. This address space need not be the same as the AST / Language
3943
  // default (e.g. in C / C++ auto vars are in the generic address space). At
3944
  // the AST level this is handled within CreateTempAlloca et al., but for the
3945
  // builtin / dynamic alloca we have to handle it here. We use an explicit cast
3946
  // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
3947
0
  case Builtin::BIalloca:
3948
0
  case Builtin::BI_alloca:
3949
0
  case Builtin::BI__builtin_alloca_uninitialized:
3950
0
  case Builtin::BI__builtin_alloca: {
3951
0
    Value *Size = EmitScalarExpr(E->getArg(0));
3952
0
    const TargetInfo &TI = getContext().getTargetInfo();
3953
    // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
3954
0
    const Align SuitableAlignmentInBytes =
3955
0
        CGM.getContext()
3956
0
            .toCharUnitsFromBits(TI.getSuitableAlign())
3957
0
            .getAsAlign();
3958
0
    AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3959
0
    AI->setAlignment(SuitableAlignmentInBytes);
3960
0
    if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
3961
0
      initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
3962
0
    LangAS AAS = getASTAllocaAddressSpace();
3963
0
    LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
3964
0
    if (AAS != EAS) {
3965
0
      llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
3966
0
      return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
3967
0
                                                               EAS, Ty));
3968
0
    }
3969
0
    return RValue::get(AI);
3970
0
  }
3971
3972
0
  case Builtin::BI__builtin_alloca_with_align_uninitialized:
3973
0
  case Builtin::BI__builtin_alloca_with_align: {
3974
0
    Value *Size = EmitScalarExpr(E->getArg(0));
3975
0
    Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
3976
0
    auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
3977
0
    unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
3978
0
    const Align AlignmentInBytes =
3979
0
        CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
3980
0
    AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
3981
0
    AI->setAlignment(AlignmentInBytes);
3982
0
    if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
3983
0
      initializeAlloca(*this, AI, Size, AlignmentInBytes);
3984
0
    LangAS AAS = getASTAllocaAddressSpace();
3985
0
    LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
3986
0
    if (AAS != EAS) {
3987
0
      llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
3988
0
      return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
3989
0
                                                               EAS, Ty));
3990
0
    }
3991
0
    return RValue::get(AI);
3992
0
  }
3993
3994
0
  case Builtin::BIbzero:
3995
0
  case Builtin::BI__builtin_bzero: {
3996
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
3997
0
    Value *SizeVal = EmitScalarExpr(E->getArg(1));
3998
0
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
3999
0
                        E->getArg(0)->getExprLoc(), FD, 0);
4000
0
    Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4001
0
    return RValue::get(nullptr);
4002
0
  }
4003
4004
0
  case Builtin::BIbcopy:
4005
0
  case Builtin::BI__builtin_bcopy: {
4006
0
    Address Src = EmitPointerWithAlignment(E->getArg(0));
4007
0
    Address Dest = EmitPointerWithAlignment(E->getArg(1));
4008
0
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
4009
0
    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
4010
0
                        E->getArg(0)->getExprLoc(), FD, 0);
4011
0
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(1)->getType(),
4012
0
                        E->getArg(1)->getExprLoc(), FD, 0);
4013
0
    Builder.CreateMemMove(Dest, Src, SizeVal, false);
4014
0
    return RValue::get(Dest.getPointer());
4015
0
  }
4016
4017
0
  case Builtin::BImemcpy:
4018
0
  case Builtin::BI__builtin_memcpy:
4019
0
  case Builtin::BImempcpy:
4020
0
  case Builtin::BI__builtin_mempcpy: {
4021
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4022
0
    Address Src = EmitPointerWithAlignment(E->getArg(1));
4023
0
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
4024
0
    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4025
0
    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4026
0
    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4027
0
    if (BuiltinID == Builtin::BImempcpy ||
4028
0
        BuiltinID == Builtin::BI__builtin_mempcpy)
4029
0
      return RValue::get(Builder.CreateInBoundsGEP(Dest.getElementType(),
4030
0
                                                   Dest.getPointer(), SizeVal));
4031
0
    else
4032
0
      return RValue::get(Dest.getPointer());
4033
0
  }
4034
4035
0
  case Builtin::BI__builtin_memcpy_inline: {
4036
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4037
0
    Address Src = EmitPointerWithAlignment(E->getArg(1));
4038
0
    uint64_t Size =
4039
0
        E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4040
0
    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4041
0
    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4042
0
    Builder.CreateMemCpyInline(Dest, Src, Size);
4043
0
    return RValue::get(nullptr);
4044
0
  }
4045
4046
0
  case Builtin::BI__builtin_char_memchr:
4047
0
    BuiltinID = Builtin::BI__builtin_memchr;
4048
0
    break;
4049
4050
0
  case Builtin::BI__builtin___memcpy_chk: {
4051
    // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4052
0
    Expr::EvalResult SizeResult, DstSizeResult;
4053
0
    if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4054
0
        !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4055
0
      break;
4056
0
    llvm::APSInt Size = SizeResult.Val.getInt();
4057
0
    llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4058
0
    if (Size.ugt(DstSize))
4059
0
      break;
4060
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4061
0
    Address Src = EmitPointerWithAlignment(E->getArg(1));
4062
0
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4063
0
    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4064
0
    return RValue::get(Dest.getPointer());
4065
0
  }
4066
4067
0
  case Builtin::BI__builtin_objc_memmove_collectable: {
4068
0
    Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4069
0
    Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4070
0
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
4071
0
    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
4072
0
                                                  DestAddr, SrcAddr, SizeVal);
4073
0
    return RValue::get(DestAddr.getPointer());
4074
0
  }
4075
4076
0
  case Builtin::BI__builtin___memmove_chk: {
4077
    // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4078
0
    Expr::EvalResult SizeResult, DstSizeResult;
4079
0
    if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4080
0
        !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4081
0
      break;
4082
0
    llvm::APSInt Size = SizeResult.Val.getInt();
4083
0
    llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4084
0
    if (Size.ugt(DstSize))
4085
0
      break;
4086
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4087
0
    Address Src = EmitPointerWithAlignment(E->getArg(1));
4088
0
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4089
0
    Builder.CreateMemMove(Dest, Src, SizeVal, false);
4090
0
    return RValue::get(Dest.getPointer());
4091
0
  }
4092
4093
0
  case Builtin::BImemmove:
4094
0
  case Builtin::BI__builtin_memmove: {
4095
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4096
0
    Address Src = EmitPointerWithAlignment(E->getArg(1));
4097
0
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
4098
0
    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4099
0
    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4100
0
    Builder.CreateMemMove(Dest, Src, SizeVal, false);
4101
0
    return RValue::get(Dest.getPointer());
4102
0
  }
4103
0
  case Builtin::BImemset:
4104
0
  case Builtin::BI__builtin_memset: {
4105
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4106
0
    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4107
0
                                         Builder.getInt8Ty());
4108
0
    Value *SizeVal = EmitScalarExpr(E->getArg(2));
4109
0
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
4110
0
                        E->getArg(0)->getExprLoc(), FD, 0);
4111
0
    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4112
0
    return RValue::get(Dest.getPointer());
4113
0
  }
4114
0
  case Builtin::BI__builtin_memset_inline: {
4115
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4116
0
    Value *ByteVal =
4117
0
        Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4118
0
    uint64_t Size =
4119
0
        E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4120
0
    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
4121
0
                        E->getArg(0)->getExprLoc(), FD, 0);
4122
0
    Builder.CreateMemSetInline(Dest, ByteVal, Size);
4123
0
    return RValue::get(nullptr);
4124
0
  }
4125
0
  case Builtin::BI__builtin___memset_chk: {
4126
    // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4127
0
    Expr::EvalResult SizeResult, DstSizeResult;
4128
0
    if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4129
0
        !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4130
0
      break;
4131
0
    llvm::APSInt Size = SizeResult.Val.getInt();
4132
0
    llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4133
0
    if (Size.ugt(DstSize))
4134
0
      break;
4135
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
4136
0
    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4137
0
                                         Builder.getInt8Ty());
4138
0
    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4139
0
    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4140
0
    return RValue::get(Dest.getPointer());
4141
0
  }
4142
0
  case Builtin::BI__builtin_wmemchr: {
4143
    // The MSVC runtime library does not provide a definition of wmemchr, so we
4144
    // need an inline implementation.
4145
0
    if (!getTarget().getTriple().isOSMSVCRT())
4146
0
      break;
4147
4148
0
    llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4149
0
    Value *Str = EmitScalarExpr(E->getArg(0));
4150
0
    Value *Chr = EmitScalarExpr(E->getArg(1));
4151
0
    Value *Size = EmitScalarExpr(E->getArg(2));
4152
4153
0
    BasicBlock *Entry = Builder.GetInsertBlock();
4154
0
    BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4155
0
    BasicBlock *Next = createBasicBlock("wmemchr.next");
4156
0
    BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4157
0
    Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4158
0
    Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4159
4160
0
    EmitBlock(CmpEq);
4161
0
    PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4162
0
    StrPhi->addIncoming(Str, Entry);
4163
0
    PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4164
0
    SizePhi->addIncoming(Size, Entry);
4165
0
    CharUnits WCharAlign =
4166
0
        getContext().getTypeAlignInChars(getContext().WCharTy);
4167
0
    Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4168
0
    Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4169
0
    Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4170
0
    Builder.CreateCondBr(StrEqChr, Exit, Next);
4171
4172
0
    EmitBlock(Next);
4173
0
    Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4174
0
    Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4175
0
    Value *NextSizeEq0 =
4176
0
        Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4177
0
    Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4178
0
    StrPhi->addIncoming(NextStr, Next);
4179
0
    SizePhi->addIncoming(NextSize, Next);
4180
4181
0
    EmitBlock(Exit);
4182
0
    PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4183
0
    Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4184
0
    Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4185
0
    Ret->addIncoming(FoundChr, CmpEq);
4186
0
    return RValue::get(Ret);
4187
0
  }
4188
0
  case Builtin::BI__builtin_wmemcmp: {
4189
    // The MSVC runtime library does not provide a definition of wmemcmp, so we
4190
    // need an inline implementation.
4191
0
    if (!getTarget().getTriple().isOSMSVCRT())
4192
0
      break;
4193
4194
0
    llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4195
4196
0
    Value *Dst = EmitScalarExpr(E->getArg(0));
4197
0
    Value *Src = EmitScalarExpr(E->getArg(1));
4198
0
    Value *Size = EmitScalarExpr(E->getArg(2));
4199
4200
0
    BasicBlock *Entry = Builder.GetInsertBlock();
4201
0
    BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4202
0
    BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4203
0
    BasicBlock *Next = createBasicBlock("wmemcmp.next");
4204
0
    BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4205
0
    Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4206
0
    Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4207
4208
0
    EmitBlock(CmpGT);
4209
0
    PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4210
0
    DstPhi->addIncoming(Dst, Entry);
4211
0
    PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4212
0
    SrcPhi->addIncoming(Src, Entry);
4213
0
    PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4214
0
    SizePhi->addIncoming(Size, Entry);
4215
0
    CharUnits WCharAlign =
4216
0
        getContext().getTypeAlignInChars(getContext().WCharTy);
4217
0
    Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4218
0
    Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4219
0
    Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4220
0
    Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4221
4222
0
    EmitBlock(CmpLT);
4223
0
    Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4224
0
    Builder.CreateCondBr(DstLtSrc, Exit, Next);
4225
4226
0
    EmitBlock(Next);
4227
0
    Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4228
0
    Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4229
0
    Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4230
0
    Value *NextSizeEq0 =
4231
0
        Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4232
0
    Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4233
0
    DstPhi->addIncoming(NextDst, Next);
4234
0
    SrcPhi->addIncoming(NextSrc, Next);
4235
0
    SizePhi->addIncoming(NextSize, Next);
4236
4237
0
    EmitBlock(Exit);
4238
0
    PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4239
0
    Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4240
0
    Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4241
0
    Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4242
0
    Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4243
0
    return RValue::get(Ret);
4244
0
  }
4245
0
  case Builtin::BI__builtin_dwarf_cfa: {
4246
    // The offset in bytes from the first argument to the CFA.
4247
    //
4248
    // Why on earth is this in the frontend?  Is there any reason at
4249
    // all that the backend can't reasonably determine this while
4250
    // lowering llvm.eh.dwarf.cfa()?
4251
    //
4252
    // TODO: If there's a satisfactory reason, add a target hook for
4253
    // this instead of hard-coding 0, which is correct for most targets.
4254
0
    int32_t Offset = 0;
4255
4256
0
    Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4257
0
    return RValue::get(Builder.CreateCall(F,
4258
0
                                      llvm::ConstantInt::get(Int32Ty, Offset)));
4259
0
  }
4260
0
  case Builtin::BI__builtin_return_address: {
4261
0
    Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4262
0
                                                   getContext().UnsignedIntTy);
4263
0
    Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4264
0
    return RValue::get(Builder.CreateCall(F, Depth));
4265
0
  }
4266
0
  case Builtin::BI_ReturnAddress: {
4267
0
    Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4268
0
    return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4269
0
  }
4270
0
  case Builtin::BI__builtin_frame_address: {
4271
0
    Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4272
0
                                                   getContext().UnsignedIntTy);
4273
0
    Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4274
0
    return RValue::get(Builder.CreateCall(F, Depth));
4275
0
  }
4276
0
  case Builtin::BI__builtin_extract_return_addr: {
4277
0
    Value *Address = EmitScalarExpr(E->getArg(0));
4278
0
    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
4279
0
    return RValue::get(Result);
4280
0
  }
4281
0
  case Builtin::BI__builtin_frob_return_addr: {
4282
0
    Value *Address = EmitScalarExpr(E->getArg(0));
4283
0
    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
4284
0
    return RValue::get(Result);
4285
0
  }
4286
0
  case Builtin::BI__builtin_dwarf_sp_column: {
4287
0
    llvm::IntegerType *Ty
4288
0
      = cast<llvm::IntegerType>(ConvertType(E->getType()));
4289
0
    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
4290
0
    if (Column == -1) {
4291
0
      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4292
0
      return RValue::get(llvm::UndefValue::get(Ty));
4293
0
    }
4294
0
    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4295
0
  }
4296
0
  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4297
0
    Value *Address = EmitScalarExpr(E->getArg(0));
4298
0
    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4299
0
      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4300
0
    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4301
0
  }
4302
0
  case Builtin::BI__builtin_eh_return: {
4303
0
    Value *Int = EmitScalarExpr(E->getArg(0));
4304
0
    Value *Ptr = EmitScalarExpr(E->getArg(1));
4305
4306
0
    llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4307
0
    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4308
0
           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4309
0
    Function *F =
4310
0
        CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4311
0
                                                    : Intrinsic::eh_return_i64);
4312
0
    Builder.CreateCall(F, {Int, Ptr});
4313
0
    Builder.CreateUnreachable();
4314
4315
    // We do need to preserve an insertion point.
4316
0
    EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4317
4318
0
    return RValue::get(nullptr);
4319
0
  }
4320
0
  case Builtin::BI__builtin_unwind_init: {
4321
0
    Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4322
0
    Builder.CreateCall(F);
4323
0
    return RValue::get(nullptr);
4324
0
  }
4325
0
  case Builtin::BI__builtin_extend_pointer: {
4326
    // Extends a pointer to the size of an _Unwind_Word, which is
4327
    // uint64_t on all platforms.  Generally this gets poked into a
4328
    // register and eventually used as an address, so if the
4329
    // addressing registers are wider than pointers and the platform
4330
    // doesn't implicitly ignore high-order bits when doing
4331
    // addressing, we need to make sure we zext / sext based on
4332
    // the platform's expectations.
4333
    //
4334
    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4335
4336
    // Cast the pointer to intptr_t.
4337
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
4338
0
    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4339
4340
    // If that's 64 bits, we're done.
4341
0
    if (IntPtrTy->getBitWidth() == 64)
4342
0
      return RValue::get(Result);
4343
4344
    // Otherwise, ask the codegen data what to do.
4345
0
    if (getTargetHooks().extendPointerWithSExt())
4346
0
      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4347
0
    else
4348
0
      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4349
0
  }
4350
0
  case Builtin::BI__builtin_setjmp: {
4351
    // Buffer is a void**.
4352
0
    Address Buf = EmitPointerWithAlignment(E->getArg(0));
4353
4354
    // Store the frame pointer to the setjmp buffer.
4355
0
    Value *FrameAddr = Builder.CreateCall(
4356
0
        CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4357
0
        ConstantInt::get(Int32Ty, 0));
4358
0
    Builder.CreateStore(FrameAddr, Buf);
4359
4360
    // Store the stack pointer to the setjmp buffer.
4361
0
    Value *StackAddr = Builder.CreateStackSave();
4362
0
    assert(Buf.getPointer()->getType() == StackAddr->getType());
4363
4364
0
    Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4365
0
    Builder.CreateStore(StackAddr, StackSaveSlot);
4366
4367
    // Call LLVM's EH setjmp, which is lightweight.
4368
0
    Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4369
0
    return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
4370
0
  }
4371
0
  case Builtin::BI__builtin_longjmp: {
4372
0
    Value *Buf = EmitScalarExpr(E->getArg(0));
4373
4374
    // Call LLVM's EH longjmp, which is lightweight.
4375
0
    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4376
4377
    // longjmp doesn't return; mark this as unreachable.
4378
0
    Builder.CreateUnreachable();
4379
4380
    // We do need to preserve an insertion point.
4381
0
    EmitBlock(createBasicBlock("longjmp.cont"));
4382
4383
0
    return RValue::get(nullptr);
4384
0
  }
4385
0
  case Builtin::BI__builtin_launder: {
4386
0
    const Expr *Arg = E->getArg(0);
4387
0
    QualType ArgTy = Arg->getType()->getPointeeType();
4388
0
    Value *Ptr = EmitScalarExpr(Arg);
4389
0
    if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4390
0
      Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
4391
4392
0
    return RValue::get(Ptr);
4393
0
  }
4394
0
  case Builtin::BI__sync_fetch_and_add:
4395
0
  case Builtin::BI__sync_fetch_and_sub:
4396
0
  case Builtin::BI__sync_fetch_and_or:
4397
0
  case Builtin::BI__sync_fetch_and_and:
4398
0
  case Builtin::BI__sync_fetch_and_xor:
4399
0
  case Builtin::BI__sync_fetch_and_nand:
4400
0
  case Builtin::BI__sync_add_and_fetch:
4401
0
  case Builtin::BI__sync_sub_and_fetch:
4402
0
  case Builtin::BI__sync_and_and_fetch:
4403
0
  case Builtin::BI__sync_or_and_fetch:
4404
0
  case Builtin::BI__sync_xor_and_fetch:
4405
0
  case Builtin::BI__sync_nand_and_fetch:
4406
0
  case Builtin::BI__sync_val_compare_and_swap:
4407
0
  case Builtin::BI__sync_bool_compare_and_swap:
4408
0
  case Builtin::BI__sync_lock_test_and_set:
4409
0
  case Builtin::BI__sync_lock_release:
4410
0
  case Builtin::BI__sync_swap:
4411
0
    llvm_unreachable("Shouldn't make it through sema");
4412
0
  case Builtin::BI__sync_fetch_and_add_1:
4413
0
  case Builtin::BI__sync_fetch_and_add_2:
4414
0
  case Builtin::BI__sync_fetch_and_add_4:
4415
0
  case Builtin::BI__sync_fetch_and_add_8:
4416
0
  case Builtin::BI__sync_fetch_and_add_16:
4417
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4418
0
  case Builtin::BI__sync_fetch_and_sub_1:
4419
0
  case Builtin::BI__sync_fetch_and_sub_2:
4420
0
  case Builtin::BI__sync_fetch_and_sub_4:
4421
0
  case Builtin::BI__sync_fetch_and_sub_8:
4422
0
  case Builtin::BI__sync_fetch_and_sub_16:
4423
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4424
0
  case Builtin::BI__sync_fetch_and_or_1:
4425
0
  case Builtin::BI__sync_fetch_and_or_2:
4426
0
  case Builtin::BI__sync_fetch_and_or_4:
4427
0
  case Builtin::BI__sync_fetch_and_or_8:
4428
0
  case Builtin::BI__sync_fetch_and_or_16:
4429
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4430
0
  case Builtin::BI__sync_fetch_and_and_1:
4431
0
  case Builtin::BI__sync_fetch_and_and_2:
4432
0
  case Builtin::BI__sync_fetch_and_and_4:
4433
0
  case Builtin::BI__sync_fetch_and_and_8:
4434
0
  case Builtin::BI__sync_fetch_and_and_16:
4435
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4436
0
  case Builtin::BI__sync_fetch_and_xor_1:
4437
0
  case Builtin::BI__sync_fetch_and_xor_2:
4438
0
  case Builtin::BI__sync_fetch_and_xor_4:
4439
0
  case Builtin::BI__sync_fetch_and_xor_8:
4440
0
  case Builtin::BI__sync_fetch_and_xor_16:
4441
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4442
0
  case Builtin::BI__sync_fetch_and_nand_1:
4443
0
  case Builtin::BI__sync_fetch_and_nand_2:
4444
0
  case Builtin::BI__sync_fetch_and_nand_4:
4445
0
  case Builtin::BI__sync_fetch_and_nand_8:
4446
0
  case Builtin::BI__sync_fetch_and_nand_16:
4447
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4448
4449
  // Clang extensions: not overloaded yet.
4450
0
  case Builtin::BI__sync_fetch_and_min:
4451
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4452
0
  case Builtin::BI__sync_fetch_and_max:
4453
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4454
0
  case Builtin::BI__sync_fetch_and_umin:
4455
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4456
0
  case Builtin::BI__sync_fetch_and_umax:
4457
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4458
4459
0
  case Builtin::BI__sync_add_and_fetch_1:
4460
0
  case Builtin::BI__sync_add_and_fetch_2:
4461
0
  case Builtin::BI__sync_add_and_fetch_4:
4462
0
  case Builtin::BI__sync_add_and_fetch_8:
4463
0
  case Builtin::BI__sync_add_and_fetch_16:
4464
0
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4465
0
                                llvm::Instruction::Add);
4466
0
  case Builtin::BI__sync_sub_and_fetch_1:
4467
0
  case Builtin::BI__sync_sub_and_fetch_2:
4468
0
  case Builtin::BI__sync_sub_and_fetch_4:
4469
0
  case Builtin::BI__sync_sub_and_fetch_8:
4470
0
  case Builtin::BI__sync_sub_and_fetch_16:
4471
0
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4472
0
                                llvm::Instruction::Sub);
4473
0
  case Builtin::BI__sync_and_and_fetch_1:
4474
0
  case Builtin::BI__sync_and_and_fetch_2:
4475
0
  case Builtin::BI__sync_and_and_fetch_4:
4476
0
  case Builtin::BI__sync_and_and_fetch_8:
4477
0
  case Builtin::BI__sync_and_and_fetch_16:
4478
0
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4479
0
                                llvm::Instruction::And);
4480
0
  case Builtin::BI__sync_or_and_fetch_1:
4481
0
  case Builtin::BI__sync_or_and_fetch_2:
4482
0
  case Builtin::BI__sync_or_and_fetch_4:
4483
0
  case Builtin::BI__sync_or_and_fetch_8:
4484
0
  case Builtin::BI__sync_or_and_fetch_16:
4485
0
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4486
0
                                llvm::Instruction::Or);
4487
0
  case Builtin::BI__sync_xor_and_fetch_1:
4488
0
  case Builtin::BI__sync_xor_and_fetch_2:
4489
0
  case Builtin::BI__sync_xor_and_fetch_4:
4490
0
  case Builtin::BI__sync_xor_and_fetch_8:
4491
0
  case Builtin::BI__sync_xor_and_fetch_16:
4492
0
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4493
0
                                llvm::Instruction::Xor);
4494
0
  case Builtin::BI__sync_nand_and_fetch_1:
4495
0
  case Builtin::BI__sync_nand_and_fetch_2:
4496
0
  case Builtin::BI__sync_nand_and_fetch_4:
4497
0
  case Builtin::BI__sync_nand_and_fetch_8:
4498
0
  case Builtin::BI__sync_nand_and_fetch_16:
4499
0
    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4500
0
                                llvm::Instruction::And, true);
4501
4502
0
  case Builtin::BI__sync_val_compare_and_swap_1:
4503
0
  case Builtin::BI__sync_val_compare_and_swap_2:
4504
0
  case Builtin::BI__sync_val_compare_and_swap_4:
4505
0
  case Builtin::BI__sync_val_compare_and_swap_8:
4506
0
  case Builtin::BI__sync_val_compare_and_swap_16:
4507
0
    return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4508
4509
0
  case Builtin::BI__sync_bool_compare_and_swap_1:
4510
0
  case Builtin::BI__sync_bool_compare_and_swap_2:
4511
0
  case Builtin::BI__sync_bool_compare_and_swap_4:
4512
0
  case Builtin::BI__sync_bool_compare_and_swap_8:
4513
0
  case Builtin::BI__sync_bool_compare_and_swap_16:
4514
0
    return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4515
4516
0
  case Builtin::BI__sync_swap_1:
4517
0
  case Builtin::BI__sync_swap_2:
4518
0
  case Builtin::BI__sync_swap_4:
4519
0
  case Builtin::BI__sync_swap_8:
4520
0
  case Builtin::BI__sync_swap_16:
4521
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4522
4523
0
  case Builtin::BI__sync_lock_test_and_set_1:
4524
0
  case Builtin::BI__sync_lock_test_and_set_2:
4525
0
  case Builtin::BI__sync_lock_test_and_set_4:
4526
0
  case Builtin::BI__sync_lock_test_and_set_8:
4527
0
  case Builtin::BI__sync_lock_test_and_set_16:
4528
0
    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4529
4530
0
  case Builtin::BI__sync_lock_release_1:
4531
0
  case Builtin::BI__sync_lock_release_2:
4532
0
  case Builtin::BI__sync_lock_release_4:
4533
0
  case Builtin::BI__sync_lock_release_8:
4534
0
  case Builtin::BI__sync_lock_release_16: {
4535
0
    Address Ptr = CheckAtomicAlignment(*this, E);
4536
0
    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4537
4538
0
    llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4539
0
                                             getContext().getTypeSize(ElTy));
4540
0
    llvm::StoreInst *Store =
4541
0
        Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4542
0
    Store->setAtomic(llvm::AtomicOrdering::Release);
4543
0
    return RValue::get(nullptr);
4544
0
  }
4545
4546
0
  case Builtin::BI__sync_synchronize: {
4547
    // We assume this is supposed to correspond to a C++0x-style
4548
    // sequentially-consistent fence (i.e. this is only usable for
4549
    // synchronization, not device I/O or anything like that). This intrinsic
4550
    // is really badly designed in the sense that in theory, there isn't
4551
    // any way to safely use it... but in practice, it mostly works
4552
    // to use it with non-atomic loads and stores to get acquire/release
4553
    // semantics.
4554
0
    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4555
0
    return RValue::get(nullptr);
4556
0
  }
4557
4558
0
  case Builtin::BI__builtin_nontemporal_load:
4559
0
    return RValue::get(EmitNontemporalLoad(*this, E));
4560
0
  case Builtin::BI__builtin_nontemporal_store:
4561
0
    return RValue::get(EmitNontemporalStore(*this, E));
4562
0
  case Builtin::BI__c11_atomic_is_lock_free:
4563
0
  case Builtin::BI__atomic_is_lock_free: {
4564
    // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4565
    // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4566
    // _Atomic(T) is always properly-aligned.
4567
0
    const char *LibCallName = "__atomic_is_lock_free";
4568
0
    CallArgList Args;
4569
0
    Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4570
0
             getContext().getSizeType());
4571
0
    if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4572
0
      Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4573
0
               getContext().VoidPtrTy);
4574
0
    else
4575
0
      Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4576
0
               getContext().VoidPtrTy);
4577
0
    const CGFunctionInfo &FuncInfo =
4578
0
        CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
4579
0
    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4580
0
    llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4581
0
    return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4582
0
                    ReturnValueSlot(), Args);
4583
0
  }
4584
4585
0
  case Builtin::BI__atomic_test_and_set: {
4586
    // Look at the argument type to determine whether this is a volatile
4587
    // operation. The parameter type is always volatile.
4588
0
    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4589
0
    bool Volatile =
4590
0
        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4591
4592
0
    Address Ptr =
4593
0
        EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty);
4594
4595
0
    Value *NewVal = Builder.getInt8(1);
4596
0
    Value *Order = EmitScalarExpr(E->getArg(1));
4597
0
    if (isa<llvm::ConstantInt>(Order)) {
4598
0
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4599
0
      AtomicRMWInst *Result = nullptr;
4600
0
      switch (ord) {
4601
0
      case 0:  // memory_order_relaxed
4602
0
      default: // invalid order
4603
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4604
0
                                         llvm::AtomicOrdering::Monotonic);
4605
0
        break;
4606
0
      case 1: // memory_order_consume
4607
0
      case 2: // memory_order_acquire
4608
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4609
0
                                         llvm::AtomicOrdering::Acquire);
4610
0
        break;
4611
0
      case 3: // memory_order_release
4612
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4613
0
                                         llvm::AtomicOrdering::Release);
4614
0
        break;
4615
0
      case 4: // memory_order_acq_rel
4616
4617
0
        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4618
0
                                         llvm::AtomicOrdering::AcquireRelease);
4619
0
        break;
4620
0
      case 5: // memory_order_seq_cst
4621
0
        Result = Builder.CreateAtomicRMW(
4622
0
            llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4623
0
            llvm::AtomicOrdering::SequentiallyConsistent);
4624
0
        break;
4625
0
      }
4626
0
      Result->setVolatile(Volatile);
4627
0
      return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4628
0
    }
4629
4630
0
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4631
4632
0
    llvm::BasicBlock *BBs[5] = {
4633
0
      createBasicBlock("monotonic", CurFn),
4634
0
      createBasicBlock("acquire", CurFn),
4635
0
      createBasicBlock("release", CurFn),
4636
0
      createBasicBlock("acqrel", CurFn),
4637
0
      createBasicBlock("seqcst", CurFn)
4638
0
    };
4639
0
    llvm::AtomicOrdering Orders[5] = {
4640
0
        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4641
0
        llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4642
0
        llvm::AtomicOrdering::SequentiallyConsistent};
4643
4644
0
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4645
0
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4646
4647
0
    Builder.SetInsertPoint(ContBB);
4648
0
    PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4649
4650
0
    for (unsigned i = 0; i < 5; ++i) {
4651
0
      Builder.SetInsertPoint(BBs[i]);
4652
0
      AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4653
0
                                                   Ptr, NewVal, Orders[i]);
4654
0
      RMW->setVolatile(Volatile);
4655
0
      Result->addIncoming(RMW, BBs[i]);
4656
0
      Builder.CreateBr(ContBB);
4657
0
    }
4658
4659
0
    SI->addCase(Builder.getInt32(0), BBs[0]);
4660
0
    SI->addCase(Builder.getInt32(1), BBs[1]);
4661
0
    SI->addCase(Builder.getInt32(2), BBs[1]);
4662
0
    SI->addCase(Builder.getInt32(3), BBs[2]);
4663
0
    SI->addCase(Builder.getInt32(4), BBs[3]);
4664
0
    SI->addCase(Builder.getInt32(5), BBs[4]);
4665
4666
0
    Builder.SetInsertPoint(ContBB);
4667
0
    return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4668
0
  }
4669
4670
0
  case Builtin::BI__atomic_clear: {
4671
0
    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4672
0
    bool Volatile =
4673
0
        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4674
4675
0
    Address Ptr = EmitPointerWithAlignment(E->getArg(0));
4676
0
    Ptr = Ptr.withElementType(Int8Ty);
4677
0
    Value *NewVal = Builder.getInt8(0);
4678
0
    Value *Order = EmitScalarExpr(E->getArg(1));
4679
0
    if (isa<llvm::ConstantInt>(Order)) {
4680
0
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4681
0
      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4682
0
      switch (ord) {
4683
0
      case 0:  // memory_order_relaxed
4684
0
      default: // invalid order
4685
0
        Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4686
0
        break;
4687
0
      case 3:  // memory_order_release
4688
0
        Store->setOrdering(llvm::AtomicOrdering::Release);
4689
0
        break;
4690
0
      case 5:  // memory_order_seq_cst
4691
0
        Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4692
0
        break;
4693
0
      }
4694
0
      return RValue::get(nullptr);
4695
0
    }
4696
4697
0
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4698
4699
0
    llvm::BasicBlock *BBs[3] = {
4700
0
      createBasicBlock("monotonic", CurFn),
4701
0
      createBasicBlock("release", CurFn),
4702
0
      createBasicBlock("seqcst", CurFn)
4703
0
    };
4704
0
    llvm::AtomicOrdering Orders[3] = {
4705
0
        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4706
0
        llvm::AtomicOrdering::SequentiallyConsistent};
4707
4708
0
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4709
0
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4710
4711
0
    for (unsigned i = 0; i < 3; ++i) {
4712
0
      Builder.SetInsertPoint(BBs[i]);
4713
0
      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4714
0
      Store->setOrdering(Orders[i]);
4715
0
      Builder.CreateBr(ContBB);
4716
0
    }
4717
4718
0
    SI->addCase(Builder.getInt32(0), BBs[0]);
4719
0
    SI->addCase(Builder.getInt32(3), BBs[1]);
4720
0
    SI->addCase(Builder.getInt32(5), BBs[2]);
4721
4722
0
    Builder.SetInsertPoint(ContBB);
4723
0
    return RValue::get(nullptr);
4724
0
  }
4725
4726
0
  case Builtin::BI__atomic_thread_fence:
4727
0
  case Builtin::BI__atomic_signal_fence:
4728
0
  case Builtin::BI__c11_atomic_thread_fence:
4729
0
  case Builtin::BI__c11_atomic_signal_fence: {
4730
0
    llvm::SyncScope::ID SSID;
4731
0
    if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4732
0
        BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4733
0
      SSID = llvm::SyncScope::SingleThread;
4734
0
    else
4735
0
      SSID = llvm::SyncScope::System;
4736
0
    Value *Order = EmitScalarExpr(E->getArg(0));
4737
0
    if (isa<llvm::ConstantInt>(Order)) {
4738
0
      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4739
0
      switch (ord) {
4740
0
      case 0:  // memory_order_relaxed
4741
0
      default: // invalid order
4742
0
        break;
4743
0
      case 1:  // memory_order_consume
4744
0
      case 2:  // memory_order_acquire
4745
0
        Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4746
0
        break;
4747
0
      case 3:  // memory_order_release
4748
0
        Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4749
0
        break;
4750
0
      case 4:  // memory_order_acq_rel
4751
0
        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4752
0
        break;
4753
0
      case 5:  // memory_order_seq_cst
4754
0
        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4755
0
        break;
4756
0
      }
4757
0
      return RValue::get(nullptr);
4758
0
    }
4759
4760
0
    llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4761
0
    AcquireBB = createBasicBlock("acquire", CurFn);
4762
0
    ReleaseBB = createBasicBlock("release", CurFn);
4763
0
    AcqRelBB = createBasicBlock("acqrel", CurFn);
4764
0
    SeqCstBB = createBasicBlock("seqcst", CurFn);
4765
0
    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4766
4767
0
    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4768
0
    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4769
4770
0
    Builder.SetInsertPoint(AcquireBB);
4771
0
    Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4772
0
    Builder.CreateBr(ContBB);
4773
0
    SI->addCase(Builder.getInt32(1), AcquireBB);
4774
0
    SI->addCase(Builder.getInt32(2), AcquireBB);
4775
4776
0
    Builder.SetInsertPoint(ReleaseBB);
4777
0
    Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4778
0
    Builder.CreateBr(ContBB);
4779
0
    SI->addCase(Builder.getInt32(3), ReleaseBB);
4780
4781
0
    Builder.SetInsertPoint(AcqRelBB);
4782
0
    Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4783
0
    Builder.CreateBr(ContBB);
4784
0
    SI->addCase(Builder.getInt32(4), AcqRelBB);
4785
4786
0
    Builder.SetInsertPoint(SeqCstBB);
4787
0
    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4788
0
    Builder.CreateBr(ContBB);
4789
0
    SI->addCase(Builder.getInt32(5), SeqCstBB);
4790
4791
0
    Builder.SetInsertPoint(ContBB);
4792
0
    return RValue::get(nullptr);
4793
0
  }
4794
4795
0
  case Builtin::BI__builtin_signbit:
4796
0
  case Builtin::BI__builtin_signbitf:
4797
0
  case Builtin::BI__builtin_signbitl: {
4798
0
    return RValue::get(
4799
0
        Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4800
0
                           ConvertType(E->getType())));
4801
0
  }
4802
0
  case Builtin::BI__warn_memset_zero_len:
4803
0
    return RValue::getIgnored();
4804
0
  case Builtin::BI__annotation: {
4805
    // Re-encode each wide string to UTF8 and make an MDString.
4806
0
    SmallVector<Metadata *, 1> Strings;
4807
0
    for (const Expr *Arg : E->arguments()) {
4808
0
      const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4809
0
      assert(Str->getCharByteWidth() == 2);
4810
0
      StringRef WideBytes = Str->getBytes();
4811
0
      std::string StrUtf8;
4812
0
      if (!convertUTF16ToUTF8String(
4813
0
              ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4814
0
        CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
4815
0
        continue;
4816
0
      }
4817
0
      Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
4818
0
    }
4819
4820
    // Build and MDTuple of MDStrings and emit the intrinsic call.
4821
0
    llvm::Function *F =
4822
0
        CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
4823
0
    MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
4824
0
    Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
4825
0
    return RValue::getIgnored();
4826
0
  }
4827
0
  case Builtin::BI__builtin_annotation: {
4828
0
    llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
4829
0
    llvm::Function *F =
4830
0
        CGM.getIntrinsic(llvm::Intrinsic::annotation,
4831
0
                         {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
4832
4833
    // Get the annotation string, go through casts. Sema requires this to be a
4834
    // non-wide string literal, potentially casted, so the cast<> is safe.
4835
0
    const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
4836
0
    StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
4837
0
    return RValue::get(
4838
0
        EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
4839
0
  }
4840
0
  case Builtin::BI__builtin_addcb:
4841
0
  case Builtin::BI__builtin_addcs:
4842
0
  case Builtin::BI__builtin_addc:
4843
0
  case Builtin::BI__builtin_addcl:
4844
0
  case Builtin::BI__builtin_addcll:
4845
0
  case Builtin::BI__builtin_subcb:
4846
0
  case Builtin::BI__builtin_subcs:
4847
0
  case Builtin::BI__builtin_subc:
4848
0
  case Builtin::BI__builtin_subcl:
4849
0
  case Builtin::BI__builtin_subcll: {
4850
4851
    // We translate all of these builtins from expressions of the form:
4852
    //   int x = ..., y = ..., carryin = ..., carryout, result;
4853
    //   result = __builtin_addc(x, y, carryin, &carryout);
4854
    //
4855
    // to LLVM IR of the form:
4856
    //
4857
    //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
4858
    //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
4859
    //   %carry1 = extractvalue {i32, i1} %tmp1, 1
4860
    //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
4861
    //                                                       i32 %carryin)
4862
    //   %result = extractvalue {i32, i1} %tmp2, 0
4863
    //   %carry2 = extractvalue {i32, i1} %tmp2, 1
4864
    //   %tmp3 = or i1 %carry1, %carry2
4865
    //   %tmp4 = zext i1 %tmp3 to i32
4866
    //   store i32 %tmp4, i32* %carryout
4867
4868
    // Scalarize our inputs.
4869
0
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
4870
0
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4871
0
    llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
4872
0
    Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
4873
4874
    // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
4875
0
    llvm::Intrinsic::ID IntrinsicId;
4876
0
    switch (BuiltinID) {
4877
0
    default: llvm_unreachable("Unknown multiprecision builtin id.");
4878
0
    case Builtin::BI__builtin_addcb:
4879
0
    case Builtin::BI__builtin_addcs:
4880
0
    case Builtin::BI__builtin_addc:
4881
0
    case Builtin::BI__builtin_addcl:
4882
0
    case Builtin::BI__builtin_addcll:
4883
0
      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4884
0
      break;
4885
0
    case Builtin::BI__builtin_subcb:
4886
0
    case Builtin::BI__builtin_subcs:
4887
0
    case Builtin::BI__builtin_subc:
4888
0
    case Builtin::BI__builtin_subcl:
4889
0
    case Builtin::BI__builtin_subcll:
4890
0
      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4891
0
      break;
4892
0
    }
4893
4894
    // Construct our resulting LLVM IR expression.
4895
0
    llvm::Value *Carry1;
4896
0
    llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
4897
0
                                              X, Y, Carry1);
4898
0
    llvm::Value *Carry2;
4899
0
    llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
4900
0
                                              Sum1, Carryin, Carry2);
4901
0
    llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
4902
0
                                               X->getType());
4903
0
    Builder.CreateStore(CarryOut, CarryOutPtr);
4904
0
    return RValue::get(Sum2);
4905
0
  }
4906
4907
0
  case Builtin::BI__builtin_add_overflow:
4908
0
  case Builtin::BI__builtin_sub_overflow:
4909
0
  case Builtin::BI__builtin_mul_overflow: {
4910
0
    const clang::Expr *LeftArg = E->getArg(0);
4911
0
    const clang::Expr *RightArg = E->getArg(1);
4912
0
    const clang::Expr *ResultArg = E->getArg(2);
4913
4914
0
    clang::QualType ResultQTy =
4915
0
        ResultArg->getType()->castAs<PointerType>()->getPointeeType();
4916
4917
0
    WidthAndSignedness LeftInfo =
4918
0
        getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
4919
0
    WidthAndSignedness RightInfo =
4920
0
        getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
4921
0
    WidthAndSignedness ResultInfo =
4922
0
        getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
4923
4924
    // Handle mixed-sign multiplication as a special case, because adding
4925
    // runtime or backend support for our generic irgen would be too expensive.
4926
0
    if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
4927
0
      return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
4928
0
                                          RightInfo, ResultArg, ResultQTy,
4929
0
                                          ResultInfo);
4930
4931
0
    if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
4932
0
                                              ResultInfo))
4933
0
      return EmitCheckedUnsignedMultiplySignedResult(
4934
0
          *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
4935
0
          ResultInfo);
4936
4937
0
    WidthAndSignedness EncompassingInfo =
4938
0
        EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
4939
4940
0
    llvm::Type *EncompassingLLVMTy =
4941
0
        llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
4942
4943
0
    llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
4944
4945
0
    llvm::Intrinsic::ID IntrinsicId;
4946
0
    switch (BuiltinID) {
4947
0
    default:
4948
0
      llvm_unreachable("Unknown overflow builtin id.");
4949
0
    case Builtin::BI__builtin_add_overflow:
4950
0
      IntrinsicId = EncompassingInfo.Signed
4951
0
                        ? llvm::Intrinsic::sadd_with_overflow
4952
0
                        : llvm::Intrinsic::uadd_with_overflow;
4953
0
      break;
4954
0
    case Builtin::BI__builtin_sub_overflow:
4955
0
      IntrinsicId = EncompassingInfo.Signed
4956
0
                        ? llvm::Intrinsic::ssub_with_overflow
4957
0
                        : llvm::Intrinsic::usub_with_overflow;
4958
0
      break;
4959
0
    case Builtin::BI__builtin_mul_overflow:
4960
0
      IntrinsicId = EncompassingInfo.Signed
4961
0
                        ? llvm::Intrinsic::smul_with_overflow
4962
0
                        : llvm::Intrinsic::umul_with_overflow;
4963
0
      break;
4964
0
    }
4965
4966
0
    llvm::Value *Left = EmitScalarExpr(LeftArg);
4967
0
    llvm::Value *Right = EmitScalarExpr(RightArg);
4968
0
    Address ResultPtr = EmitPointerWithAlignment(ResultArg);
4969
4970
    // Extend each operand to the encompassing type.
4971
0
    Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
4972
0
    Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
4973
4974
    // Perform the operation on the extended values.
4975
0
    llvm::Value *Overflow, *Result;
4976
0
    Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
4977
4978
0
    if (EncompassingInfo.Width > ResultInfo.Width) {
4979
      // The encompassing type is wider than the result type, so we need to
4980
      // truncate it.
4981
0
      llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
4982
4983
      // To see if the truncation caused an overflow, we will extend
4984
      // the result and then compare it to the original result.
4985
0
      llvm::Value *ResultTruncExt = Builder.CreateIntCast(
4986
0
          ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
4987
0
      llvm::Value *TruncationOverflow =
4988
0
          Builder.CreateICmpNE(Result, ResultTruncExt);
4989
4990
0
      Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
4991
0
      Result = ResultTrunc;
4992
0
    }
4993
4994
    // Finally, store the result using the pointer.
4995
0
    bool isVolatile =
4996
0
      ResultArg->getType()->getPointeeType().isVolatileQualified();
4997
0
    Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
4998
4999
0
    return RValue::get(Overflow);
5000
0
  }
5001
5002
0
  case Builtin::BI__builtin_uadd_overflow:
5003
0
  case Builtin::BI__builtin_uaddl_overflow:
5004
0
  case Builtin::BI__builtin_uaddll_overflow:
5005
0
  case Builtin::BI__builtin_usub_overflow:
5006
0
  case Builtin::BI__builtin_usubl_overflow:
5007
0
  case Builtin::BI__builtin_usubll_overflow:
5008
0
  case Builtin::BI__builtin_umul_overflow:
5009
0
  case Builtin::BI__builtin_umull_overflow:
5010
0
  case Builtin::BI__builtin_umulll_overflow:
5011
0
  case Builtin::BI__builtin_sadd_overflow:
5012
0
  case Builtin::BI__builtin_saddl_overflow:
5013
0
  case Builtin::BI__builtin_saddll_overflow:
5014
0
  case Builtin::BI__builtin_ssub_overflow:
5015
0
  case Builtin::BI__builtin_ssubl_overflow:
5016
0
  case Builtin::BI__builtin_ssubll_overflow:
5017
0
  case Builtin::BI__builtin_smul_overflow:
5018
0
  case Builtin::BI__builtin_smull_overflow:
5019
0
  case Builtin::BI__builtin_smulll_overflow: {
5020
5021
    // We translate all of these builtins directly to the relevant llvm IR node.
5022
5023
    // Scalarize our inputs.
5024
0
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
5025
0
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5026
0
    Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5027
5028
    // Decide which of the overflow intrinsics we are lowering to:
5029
0
    llvm::Intrinsic::ID IntrinsicId;
5030
0
    switch (BuiltinID) {
5031
0
    default: llvm_unreachable("Unknown overflow builtin id.");
5032
0
    case Builtin::BI__builtin_uadd_overflow:
5033
0
    case Builtin::BI__builtin_uaddl_overflow:
5034
0
    case Builtin::BI__builtin_uaddll_overflow:
5035
0
      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5036
0
      break;
5037
0
    case Builtin::BI__builtin_usub_overflow:
5038
0
    case Builtin::BI__builtin_usubl_overflow:
5039
0
    case Builtin::BI__builtin_usubll_overflow:
5040
0
      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5041
0
      break;
5042
0
    case Builtin::BI__builtin_umul_overflow:
5043
0
    case Builtin::BI__builtin_umull_overflow:
5044
0
    case Builtin::BI__builtin_umulll_overflow:
5045
0
      IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5046
0
      break;
5047
0
    case Builtin::BI__builtin_sadd_overflow:
5048
0
    case Builtin::BI__builtin_saddl_overflow:
5049
0
    case Builtin::BI__builtin_saddll_overflow:
5050
0
      IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5051
0
      break;
5052
0
    case Builtin::BI__builtin_ssub_overflow:
5053
0
    case Builtin::BI__builtin_ssubl_overflow:
5054
0
    case Builtin::BI__builtin_ssubll_overflow:
5055
0
      IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5056
0
      break;
5057
0
    case Builtin::BI__builtin_smul_overflow:
5058
0
    case Builtin::BI__builtin_smull_overflow:
5059
0
    case Builtin::BI__builtin_smulll_overflow:
5060
0
      IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5061
0
      break;
5062
0
    }
5063
5064
5065
0
    llvm::Value *Carry;
5066
0
    llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5067
0
    Builder.CreateStore(Sum, SumOutPtr);
5068
5069
0
    return RValue::get(Carry);
5070
0
  }
5071
0
  case Builtin::BIaddressof:
5072
0
  case Builtin::BI__addressof:
5073
0
  case Builtin::BI__builtin_addressof:
5074
0
    return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5075
0
  case Builtin::BI__builtin_function_start:
5076
0
    return RValue::get(CGM.GetFunctionStart(
5077
0
        E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
5078
0
  case Builtin::BI__builtin_operator_new:
5079
0
    return EmitBuiltinNewDeleteCall(
5080
0
        E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5081
0
  case Builtin::BI__builtin_operator_delete:
5082
0
    EmitBuiltinNewDeleteCall(
5083
0
        E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5084
0
    return RValue::get(nullptr);
5085
5086
0
  case Builtin::BI__builtin_is_aligned:
5087
0
    return EmitBuiltinIsAligned(E);
5088
0
  case Builtin::BI__builtin_align_up:
5089
0
    return EmitBuiltinAlignTo(E, true);
5090
0
  case Builtin::BI__builtin_align_down:
5091
0
    return EmitBuiltinAlignTo(E, false);
5092
5093
0
  case Builtin::BI__noop:
5094
    // __noop always evaluates to an integer literal zero.
5095
0
    return RValue::get(ConstantInt::get(IntTy, 0));
5096
0
  case Builtin::BI__builtin_call_with_static_chain: {
5097
0
    const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5098
0
    const Expr *Chain = E->getArg(1);
5099
0
    return EmitCall(Call->getCallee()->getType(),
5100
0
                    EmitCallee(Call->getCallee()), Call, ReturnValue,
5101
0
                    EmitScalarExpr(Chain));
5102
0
  }
5103
0
  case Builtin::BI_InterlockedExchange8:
5104
0
  case Builtin::BI_InterlockedExchange16:
5105
0
  case Builtin::BI_InterlockedExchange:
5106
0
  case Builtin::BI_InterlockedExchangePointer:
5107
0
    return RValue::get(
5108
0
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5109
0
  case Builtin::BI_InterlockedCompareExchangePointer:
5110
0
  case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5111
0
    llvm::Type *RTy;
5112
0
    llvm::IntegerType *IntType = IntegerType::get(
5113
0
        getLLVMContext(), getContext().getTypeSize(E->getType()));
5114
5115
0
    Address DestAddr = CheckAtomicAlignment(*this, E);
5116
5117
0
    llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5118
0
    RTy = Exchange->getType();
5119
0
    Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5120
5121
0
    llvm::Value *Comparand =
5122
0
      Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5123
5124
0
    auto Ordering =
5125
0
      BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5126
0
      AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5127
5128
0
    auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5129
0
                                              Ordering, Ordering);
5130
0
    Result->setVolatile(true);
5131
5132
0
    return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5133
0
                                                                         0),
5134
0
                                              RTy));
5135
0
  }
5136
0
  case Builtin::BI_InterlockedCompareExchange8:
5137
0
  case Builtin::BI_InterlockedCompareExchange16:
5138
0
  case Builtin::BI_InterlockedCompareExchange:
5139
0
  case Builtin::BI_InterlockedCompareExchange64:
5140
0
    return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
5141
0
  case Builtin::BI_InterlockedIncrement16:
5142
0
  case Builtin::BI_InterlockedIncrement:
5143
0
    return RValue::get(
5144
0
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5145
0
  case Builtin::BI_InterlockedDecrement16:
5146
0
  case Builtin::BI_InterlockedDecrement:
5147
0
    return RValue::get(
5148
0
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5149
0
  case Builtin::BI_InterlockedAnd8:
5150
0
  case Builtin::BI_InterlockedAnd16:
5151
0
  case Builtin::BI_InterlockedAnd:
5152
0
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5153
0
  case Builtin::BI_InterlockedExchangeAdd8:
5154
0
  case Builtin::BI_InterlockedExchangeAdd16:
5155
0
  case Builtin::BI_InterlockedExchangeAdd:
5156
0
    return RValue::get(
5157
0
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5158
0
  case Builtin::BI_InterlockedExchangeSub8:
5159
0
  case Builtin::BI_InterlockedExchangeSub16:
5160
0
  case Builtin::BI_InterlockedExchangeSub:
5161
0
    return RValue::get(
5162
0
        EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5163
0
  case Builtin::BI_InterlockedOr8:
5164
0
  case Builtin::BI_InterlockedOr16:
5165
0
  case Builtin::BI_InterlockedOr:
5166
0
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5167
0
  case Builtin::BI_InterlockedXor8:
5168
0
  case Builtin::BI_InterlockedXor16:
5169
0
  case Builtin::BI_InterlockedXor:
5170
0
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5171
5172
0
  case Builtin::BI_bittest64:
5173
0
  case Builtin::BI_bittest:
5174
0
  case Builtin::BI_bittestandcomplement64:
5175
0
  case Builtin::BI_bittestandcomplement:
5176
0
  case Builtin::BI_bittestandreset64:
5177
0
  case Builtin::BI_bittestandreset:
5178
0
  case Builtin::BI_bittestandset64:
5179
0
  case Builtin::BI_bittestandset:
5180
0
  case Builtin::BI_interlockedbittestandreset:
5181
0
  case Builtin::BI_interlockedbittestandreset64:
5182
0
  case Builtin::BI_interlockedbittestandset64:
5183
0
  case Builtin::BI_interlockedbittestandset:
5184
0
  case Builtin::BI_interlockedbittestandset_acq:
5185
0
  case Builtin::BI_interlockedbittestandset_rel:
5186
0
  case Builtin::BI_interlockedbittestandset_nf:
5187
0
  case Builtin::BI_interlockedbittestandreset_acq:
5188
0
  case Builtin::BI_interlockedbittestandreset_rel:
5189
0
  case Builtin::BI_interlockedbittestandreset_nf:
5190
0
    return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5191
5192
    // These builtins exist to emit regular volatile loads and stores not
5193
    // affected by the -fms-volatile setting.
5194
0
  case Builtin::BI__iso_volatile_load8:
5195
0
  case Builtin::BI__iso_volatile_load16:
5196
0
  case Builtin::BI__iso_volatile_load32:
5197
0
  case Builtin::BI__iso_volatile_load64:
5198
0
    return RValue::get(EmitISOVolatileLoad(*this, E));
5199
0
  case Builtin::BI__iso_volatile_store8:
5200
0
  case Builtin::BI__iso_volatile_store16:
5201
0
  case Builtin::BI__iso_volatile_store32:
5202
0
  case Builtin::BI__iso_volatile_store64:
5203
0
    return RValue::get(EmitISOVolatileStore(*this, E));
5204
5205
0
  case Builtin::BI__exception_code:
5206
0
  case Builtin::BI_exception_code:
5207
0
    return RValue::get(EmitSEHExceptionCode());
5208
0
  case Builtin::BI__exception_info:
5209
0
  case Builtin::BI_exception_info:
5210
0
    return RValue::get(EmitSEHExceptionInfo());
5211
0
  case Builtin::BI__abnormal_termination:
5212
0
  case Builtin::BI_abnormal_termination:
5213
0
    return RValue::get(EmitSEHAbnormalTermination());
5214
0
  case Builtin::BI_setjmpex:
5215
0
    if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5216
0
        E->getArg(0)->getType()->isPointerType())
5217
0
      return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5218
0
    break;
5219
0
  case Builtin::BI_setjmp:
5220
0
    if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5221
0
        E->getArg(0)->getType()->isPointerType()) {
5222
0
      if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5223
0
        return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5224
0
      else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5225
0
        return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5226
0
      return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5227
0
    }
5228
0
    break;
5229
5230
  // C++ std:: builtins.
5231
0
  case Builtin::BImove:
5232
0
  case Builtin::BImove_if_noexcept:
5233
0
  case Builtin::BIforward:
5234
0
  case Builtin::BIforward_like:
5235
0
  case Builtin::BIas_const:
5236
0
    return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5237
0
  case Builtin::BI__GetExceptionInfo: {
5238
0
    if (llvm::GlobalVariable *GV =
5239
0
            CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
5240
0
      return RValue::get(GV);
5241
0
    break;
5242
0
  }
5243
5244
0
  case Builtin::BI__fastfail:
5245
0
    return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5246
5247
0
  case Builtin::BI__builtin_coro_id:
5248
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5249
0
  case Builtin::BI__builtin_coro_promise:
5250
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5251
0
  case Builtin::BI__builtin_coro_resume:
5252
0
    EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5253
0
    return RValue::get(nullptr);
5254
0
  case Builtin::BI__builtin_coro_frame:
5255
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5256
0
  case Builtin::BI__builtin_coro_noop:
5257
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5258
0
  case Builtin::BI__builtin_coro_free:
5259
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5260
0
  case Builtin::BI__builtin_coro_destroy:
5261
0
    EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5262
0
    return RValue::get(nullptr);
5263
0
  case Builtin::BI__builtin_coro_done:
5264
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5265
0
  case Builtin::BI__builtin_coro_alloc:
5266
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5267
0
  case Builtin::BI__builtin_coro_begin:
5268
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5269
0
  case Builtin::BI__builtin_coro_end:
5270
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5271
0
  case Builtin::BI__builtin_coro_suspend:
5272
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5273
0
  case Builtin::BI__builtin_coro_size:
5274
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5275
0
  case Builtin::BI__builtin_coro_align:
5276
0
    return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5277
5278
  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5279
0
  case Builtin::BIread_pipe:
5280
0
  case Builtin::BIwrite_pipe: {
5281
0
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5282
0
          *Arg1 = EmitScalarExpr(E->getArg(1));
5283
0
    CGOpenCLRuntime OpenCLRT(CGM);
5284
0
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5285
0
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5286
5287
    // Type of the generic packet parameter.
5288
0
    unsigned GenericAS =
5289
0
        getContext().getTargetAddressSpace(LangAS::opencl_generic);
5290
0
    llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5291
5292
    // Testing which overloaded version we should generate the call for.
5293
0
    if (2U == E->getNumArgs()) {
5294
0
      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5295
0
                                                             : "__write_pipe_2";
5296
      // Creating a generic function type to be able to call with any builtin or
5297
      // user defined type.
5298
0
      llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5299
0
      llvm::FunctionType *FTy = llvm::FunctionType::get(
5300
0
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5301
0
      Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5302
0
      return RValue::get(
5303
0
          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5304
0
                          {Arg0, BCast, PacketSize, PacketAlign}));
5305
0
    } else {
5306
0
      assert(4 == E->getNumArgs() &&
5307
0
             "Illegal number of parameters to pipe function");
5308
0
      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5309
0
                                                             : "__write_pipe_4";
5310
5311
0
      llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5312
0
                              Int32Ty, Int32Ty};
5313
0
      Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5314
0
            *Arg3 = EmitScalarExpr(E->getArg(3));
5315
0
      llvm::FunctionType *FTy = llvm::FunctionType::get(
5316
0
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5317
0
      Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5318
      // We know the third argument is an integer type, but we may need to cast
5319
      // it to i32.
5320
0
      if (Arg2->getType() != Int32Ty)
5321
0
        Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5322
0
      return RValue::get(
5323
0
          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5324
0
                          {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5325
0
    }
5326
0
  }
5327
  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5328
  // functions
5329
0
  case Builtin::BIreserve_read_pipe:
5330
0
  case Builtin::BIreserve_write_pipe:
5331
0
  case Builtin::BIwork_group_reserve_read_pipe:
5332
0
  case Builtin::BIwork_group_reserve_write_pipe:
5333
0
  case Builtin::BIsub_group_reserve_read_pipe:
5334
0
  case Builtin::BIsub_group_reserve_write_pipe: {
5335
    // Composing the mangled name for the function.
5336
0
    const char *Name;
5337
0
    if (BuiltinID == Builtin::BIreserve_read_pipe)
5338
0
      Name = "__reserve_read_pipe";
5339
0
    else if (BuiltinID == Builtin::BIreserve_write_pipe)
5340
0
      Name = "__reserve_write_pipe";
5341
0
    else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5342
0
      Name = "__work_group_reserve_read_pipe";
5343
0
    else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5344
0
      Name = "__work_group_reserve_write_pipe";
5345
0
    else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5346
0
      Name = "__sub_group_reserve_read_pipe";
5347
0
    else
5348
0
      Name = "__sub_group_reserve_write_pipe";
5349
5350
0
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5351
0
          *Arg1 = EmitScalarExpr(E->getArg(1));
5352
0
    llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5353
0
    CGOpenCLRuntime OpenCLRT(CGM);
5354
0
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5355
0
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5356
5357
    // Building the generic function prototype.
5358
0
    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5359
0
    llvm::FunctionType *FTy = llvm::FunctionType::get(
5360
0
        ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5361
    // We know the second argument is an integer type, but we may need to cast
5362
    // it to i32.
5363
0
    if (Arg1->getType() != Int32Ty)
5364
0
      Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5365
0
    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5366
0
                                       {Arg0, Arg1, PacketSize, PacketAlign}));
5367
0
  }
5368
  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5369
  // functions
5370
0
  case Builtin::BIcommit_read_pipe:
5371
0
  case Builtin::BIcommit_write_pipe:
5372
0
  case Builtin::BIwork_group_commit_read_pipe:
5373
0
  case Builtin::BIwork_group_commit_write_pipe:
5374
0
  case Builtin::BIsub_group_commit_read_pipe:
5375
0
  case Builtin::BIsub_group_commit_write_pipe: {
5376
0
    const char *Name;
5377
0
    if (BuiltinID == Builtin::BIcommit_read_pipe)
5378
0
      Name = "__commit_read_pipe";
5379
0
    else if (BuiltinID == Builtin::BIcommit_write_pipe)
5380
0
      Name = "__commit_write_pipe";
5381
0
    else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5382
0
      Name = "__work_group_commit_read_pipe";
5383
0
    else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5384
0
      Name = "__work_group_commit_write_pipe";
5385
0
    else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5386
0
      Name = "__sub_group_commit_read_pipe";
5387
0
    else
5388
0
      Name = "__sub_group_commit_write_pipe";
5389
5390
0
    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5391
0
          *Arg1 = EmitScalarExpr(E->getArg(1));
5392
0
    CGOpenCLRuntime OpenCLRT(CGM);
5393
0
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5394
0
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5395
5396
    // Building the generic function prototype.
5397
0
    llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5398
0
    llvm::FunctionType *FTy =
5399
0
        llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5400
0
                                llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5401
5402
0
    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5403
0
                                       {Arg0, Arg1, PacketSize, PacketAlign}));
5404
0
  }
5405
  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5406
0
  case Builtin::BIget_pipe_num_packets:
5407
0
  case Builtin::BIget_pipe_max_packets: {
5408
0
    const char *BaseName;
5409
0
    const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5410
0
    if (BuiltinID == Builtin::BIget_pipe_num_packets)
5411
0
      BaseName = "__get_pipe_num_packets";
5412
0
    else
5413
0
      BaseName = "__get_pipe_max_packets";
5414
0
    std::string Name = std::string(BaseName) +
5415
0
                       std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5416
5417
    // Building the generic function prototype.
5418
0
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
5419
0
    CGOpenCLRuntime OpenCLRT(CGM);
5420
0
    Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5421
0
    Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5422
0
    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5423
0
    llvm::FunctionType *FTy = llvm::FunctionType::get(
5424
0
        Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5425
5426
0
    return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5427
0
                                       {Arg0, PacketSize, PacketAlign}));
5428
0
  }
5429
5430
  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5431
0
  case Builtin::BIto_global:
5432
0
  case Builtin::BIto_local:
5433
0
  case Builtin::BIto_private: {
5434
0
    auto Arg0 = EmitScalarExpr(E->getArg(0));
5435
0
    auto NewArgT = llvm::PointerType::get(
5436
0
        getLLVMContext(),
5437
0
        CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
5438
0
    auto NewRetT = llvm::PointerType::get(
5439
0
        getLLVMContext(),
5440
0
        CGM.getContext().getTargetAddressSpace(
5441
0
            E->getType()->getPointeeType().getAddressSpace()));
5442
0
    auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5443
0
    llvm::Value *NewArg;
5444
0
    if (Arg0->getType()->getPointerAddressSpace() !=
5445
0
        NewArgT->getPointerAddressSpace())
5446
0
      NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5447
0
    else
5448
0
      NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5449
0
    auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5450
0
    auto NewCall =
5451
0
        EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5452
0
    return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5453
0
      ConvertType(E->getType())));
5454
0
  }
5455
5456
  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5457
  // It contains four different overload formats specified in Table 6.13.17.1.
5458
0
  case Builtin::BIenqueue_kernel: {
5459
0
    StringRef Name; // Generated function call name
5460
0
    unsigned NumArgs = E->getNumArgs();
5461
5462
0
    llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5463
0
    llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5464
0
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
5465
5466
0
    llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5467
0
    llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5468
0
    LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5469
0
    llvm::Value *Range = NDRangeL.getAddress(*this).getPointer();
5470
0
    llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
5471
5472
0
    if (NumArgs == 4) {
5473
      // The most basic form of the call with parameters:
5474
      // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5475
0
      Name = "__enqueue_kernel_basic";
5476
0
      llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5477
0
                              GenericVoidPtrTy};
5478
0
      llvm::FunctionType *FTy = llvm::FunctionType::get(
5479
0
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5480
5481
0
      auto Info =
5482
0
          CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5483
0
      llvm::Value *Kernel =
5484
0
          Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5485
0
      llvm::Value *Block =
5486
0
          Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5487
5488
0
      AttrBuilder B(Builder.getContext());
5489
0
      B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
5490
0
      llvm::AttributeList ByValAttrSet =
5491
0
          llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5492
5493
0
      auto RTCall =
5494
0
          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5495
0
                          {Queue, Flags, Range, Kernel, Block});
5496
0
      RTCall->setAttributes(ByValAttrSet);
5497
0
      return RValue::get(RTCall);
5498
0
    }
5499
0
    assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5500
5501
    // Create a temporary array to hold the sizes of local pointer arguments
5502
    // for the block. \p First is the position of the first size argument.
5503
0
    auto CreateArrayForSizeVar = [=](unsigned First)
5504
0
        -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5505
0
      llvm::APInt ArraySize(32, NumArgs - First);
5506
0
      QualType SizeArrayTy = getContext().getConstantArrayType(
5507
0
          getContext().getSizeType(), ArraySize, nullptr,
5508
0
          ArraySizeModifier::Normal,
5509
0
          /*IndexTypeQuals=*/0);
5510
0
      auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5511
0
      llvm::Value *TmpPtr = Tmp.getPointer();
5512
0
      llvm::Value *TmpSize = EmitLifetimeStart(
5513
0
          CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5514
0
      llvm::Value *ElemPtr;
5515
      // Each of the following arguments specifies the size of the corresponding
5516
      // argument passed to the enqueued block.
5517
0
      auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5518
0
      for (unsigned I = First; I < NumArgs; ++I) {
5519
0
        auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5520
0
        auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5521
0
                                      {Zero, Index});
5522
0
        if (I == First)
5523
0
          ElemPtr = GEP;
5524
0
        auto *V =
5525
0
            Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5526
0
        Builder.CreateAlignedStore(
5527
0
            V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5528
0
      }
5529
0
      return std::tie(ElemPtr, TmpSize, TmpPtr);
5530
0
    };
5531
5532
    // Could have events and/or varargs.
5533
0
    if (E->getArg(3)->getType()->isBlockPointerType()) {
5534
      // No events passed, but has variadic arguments.
5535
0
      Name = "__enqueue_kernel_varargs";
5536
0
      auto Info =
5537
0
          CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5538
0
      llvm::Value *Kernel =
5539
0
          Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5540
0
      auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5541
0
      llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5542
0
      std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5543
5544
      // Create a vector of the arguments, as well as a constant value to
5545
      // express to the runtime the number of variadic arguments.
5546
0
      llvm::Value *const Args[] = {Queue,  Flags,
5547
0
                                   Range,  Kernel,
5548
0
                                   Block,  ConstantInt::get(IntTy, NumArgs - 4),
5549
0
                                   ElemPtr};
5550
0
      llvm::Type *const ArgTys[] = {
5551
0
          QueueTy,          IntTy, RangeTy,           GenericVoidPtrTy,
5552
0
          GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5553
5554
0
      llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5555
0
      auto Call = RValue::get(
5556
0
          EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5557
0
      if (TmpSize)
5558
0
        EmitLifetimeEnd(TmpSize, TmpPtr);
5559
0
      return Call;
5560
0
    }
5561
    // Any calls now have event arguments passed.
5562
0
    if (NumArgs >= 7) {
5563
0
      llvm::PointerType *PtrTy = llvm::PointerType::get(
5564
0
          CGM.getLLVMContext(),
5565
0
          CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
5566
5567
0
      llvm::Value *NumEvents =
5568
0
          Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5569
5570
      // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5571
      // to be a null pointer constant (including `0` literal), we can take it
5572
      // into account and emit null pointer directly.
5573
0
      llvm::Value *EventWaitList = nullptr;
5574
0
      if (E->getArg(4)->isNullPointerConstant(
5575
0
              getContext(), Expr::NPC_ValueDependentIsNotNull)) {
5576
0
        EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5577
0
      } else {
5578
0
        EventWaitList = E->getArg(4)->getType()->isArrayType()
5579
0
                        ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
5580
0
                        : EmitScalarExpr(E->getArg(4));
5581
        // Convert to generic address space.
5582
0
        EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5583
0
      }
5584
0
      llvm::Value *EventRet = nullptr;
5585
0
      if (E->getArg(5)->isNullPointerConstant(
5586
0
              getContext(), Expr::NPC_ValueDependentIsNotNull)) {
5587
0
        EventRet = llvm::ConstantPointerNull::get(PtrTy);
5588
0
      } else {
5589
0
        EventRet =
5590
0
            Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5591
0
      }
5592
5593
0
      auto Info =
5594
0
          CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
5595
0
      llvm::Value *Kernel =
5596
0
          Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5597
0
      llvm::Value *Block =
5598
0
          Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5599
5600
0
      std::vector<llvm::Type *> ArgTys = {
5601
0
          QueueTy, Int32Ty, RangeTy,          Int32Ty,
5602
0
          PtrTy,   PtrTy,   GenericVoidPtrTy, GenericVoidPtrTy};
5603
5604
0
      std::vector<llvm::Value *> Args = {Queue,     Flags,         Range,
5605
0
                                         NumEvents, EventWaitList, EventRet,
5606
0
                                         Kernel,    Block};
5607
5608
0
      if (NumArgs == 7) {
5609
        // Has events but no variadics.
5610
0
        Name = "__enqueue_kernel_basic_events";
5611
0
        llvm::FunctionType *FTy = llvm::FunctionType::get(
5612
0
            Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5613
0
        return RValue::get(
5614
0
            EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5615
0
                            llvm::ArrayRef<llvm::Value *>(Args)));
5616
0
      }
5617
      // Has event info and variadics
5618
      // Pass the number of variadics to the runtime function too.
5619
0
      Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5620
0
      ArgTys.push_back(Int32Ty);
5621
0
      Name = "__enqueue_kernel_events_varargs";
5622
5623
0
      llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5624
0
      std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5625
0
      Args.push_back(ElemPtr);
5626
0
      ArgTys.push_back(ElemPtr->getType());
5627
5628
0
      llvm::FunctionType *FTy = llvm::FunctionType::get(
5629
0
          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5630
0
      auto Call =
5631
0
          RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5632
0
                                      llvm::ArrayRef<llvm::Value *>(Args)));
5633
0
      if (TmpSize)
5634
0
        EmitLifetimeEnd(TmpSize, TmpPtr);
5635
0
      return Call;
5636
0
    }
5637
0
    [[fallthrough]];
5638
0
  }
5639
  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5640
  // parameter.
5641
0
  case Builtin::BIget_kernel_work_group_size: {
5642
0
    llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5643
0
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
5644
0
    auto Info =
5645
0
        CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5646
0
    Value *Kernel =
5647
0
        Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5648
0
    Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5649
0
    return RValue::get(EmitRuntimeCall(
5650
0
        CGM.CreateRuntimeFunction(
5651
0
            llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5652
0
                                    false),
5653
0
            "__get_kernel_work_group_size_impl"),
5654
0
        {Kernel, Arg}));
5655
0
  }
5656
0
  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5657
0
    llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5658
0
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
5659
0
    auto Info =
5660
0
        CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5661
0
    Value *Kernel =
5662
0
        Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5663
0
    Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5664
0
    return RValue::get(EmitRuntimeCall(
5665
0
        CGM.CreateRuntimeFunction(
5666
0
            llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5667
0
                                    false),
5668
0
            "__get_kernel_preferred_work_group_size_multiple_impl"),
5669
0
        {Kernel, Arg}));
5670
0
  }
5671
0
  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5672
0
  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5673
0
    llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5674
0
        getContext().getTargetAddressSpace(LangAS::opencl_generic));
5675
0
    LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5676
0
    llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
5677
0
    auto Info =
5678
0
        CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
5679
0
    Value *Kernel =
5680
0
        Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5681
0
    Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5682
0
    const char *Name =
5683
0
        BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5684
0
            ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5685
0
            : "__get_kernel_sub_group_count_for_ndrange_impl";
5686
0
    return RValue::get(EmitRuntimeCall(
5687
0
        CGM.CreateRuntimeFunction(
5688
0
            llvm::FunctionType::get(
5689
0
                IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5690
0
                false),
5691
0
            Name),
5692
0
        {NDRange, Kernel, Block}));
5693
0
  }
5694
5695
0
  case Builtin::BI__builtin_store_half:
5696
0
  case Builtin::BI__builtin_store_halff: {
5697
0
    Value *Val = EmitScalarExpr(E->getArg(0));
5698
0
    Address Address = EmitPointerWithAlignment(E->getArg(1));
5699
0
    Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5700
0
    Builder.CreateStore(HalfVal, Address);
5701
0
    return RValue::get(nullptr);
5702
0
  }
5703
0
  case Builtin::BI__builtin_load_half: {
5704
0
    Address Address = EmitPointerWithAlignment(E->getArg(0));
5705
0
    Value *HalfVal = Builder.CreateLoad(Address);
5706
0
    return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5707
0
  }
5708
0
  case Builtin::BI__builtin_load_halff: {
5709
0
    Address Address = EmitPointerWithAlignment(E->getArg(0));
5710
0
    Value *HalfVal = Builder.CreateLoad(Address);
5711
0
    return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5712
0
  }
5713
0
  case Builtin::BIprintf:
5714
0
    if (getTarget().getTriple().isNVPTX() ||
5715
0
        getTarget().getTriple().isAMDGCN()) {
5716
0
      if (getLangOpts().OpenMPIsTargetDevice)
5717
0
        return EmitOpenMPDevicePrintfCallExpr(E);
5718
0
      if (getTarget().getTriple().isNVPTX())
5719
0
        return EmitNVPTXDevicePrintfCallExpr(E);
5720
0
      if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5721
0
        return EmitAMDGPUDevicePrintfCallExpr(E);
5722
0
    }
5723
5724
0
    break;
5725
0
  case Builtin::BI__builtin_canonicalize:
5726
0
  case Builtin::BI__builtin_canonicalizef:
5727
0
  case Builtin::BI__builtin_canonicalizef16:
5728
0
  case Builtin::BI__builtin_canonicalizel:
5729
0
    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
5730
5731
0
  case Builtin::BI__builtin_thread_pointer: {
5732
0
    if (!getContext().getTargetInfo().isTLSSupported())
5733
0
      CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5734
    // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5735
0
    break;
5736
0
  }
5737
0
  case Builtin::BI__builtin_os_log_format:
5738
0
    return emitBuiltinOSLogFormat(*E);
5739
5740
0
  case Builtin::BI__xray_customevent: {
5741
0
    if (!ShouldXRayInstrumentFunction())
5742
0
      return RValue::getIgnored();
5743
5744
0
    if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
5745
0
            XRayInstrKind::Custom))
5746
0
      return RValue::getIgnored();
5747
5748
0
    if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5749
0
      if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
5750
0
        return RValue::getIgnored();
5751
5752
0
    Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
5753
0
    auto FTy = F->getFunctionType();
5754
0
    auto Arg0 = E->getArg(0);
5755
0
    auto Arg0Val = EmitScalarExpr(Arg0);
5756
0
    auto Arg0Ty = Arg0->getType();
5757
0
    auto PTy0 = FTy->getParamType(0);
5758
0
    if (PTy0 != Arg0Val->getType()) {
5759
0
      if (Arg0Ty->isArrayType())
5760
0
        Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
5761
0
      else
5762
0
        Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
5763
0
    }
5764
0
    auto Arg1 = EmitScalarExpr(E->getArg(1));
5765
0
    auto PTy1 = FTy->getParamType(1);
5766
0
    if (PTy1 != Arg1->getType())
5767
0
      Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
5768
0
    return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
5769
0
  }
5770
5771
0
  case Builtin::BI__xray_typedevent: {
5772
    // TODO: There should be a way to always emit events even if the current
5773
    // function is not instrumented. Losing events in a stream can cripple
5774
    // a trace.
5775
0
    if (!ShouldXRayInstrumentFunction())
5776
0
      return RValue::getIgnored();
5777
5778
0
    if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
5779
0
            XRayInstrKind::Typed))
5780
0
      return RValue::getIgnored();
5781
5782
0
    if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5783
0
      if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
5784
0
        return RValue::getIgnored();
5785
5786
0
    Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
5787
0
    auto FTy = F->getFunctionType();
5788
0
    auto Arg0 = EmitScalarExpr(E->getArg(0));
5789
0
    auto PTy0 = FTy->getParamType(0);
5790
0
    if (PTy0 != Arg0->getType())
5791
0
      Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
5792
0
    auto Arg1 = E->getArg(1);
5793
0
    auto Arg1Val = EmitScalarExpr(Arg1);
5794
0
    auto Arg1Ty = Arg1->getType();
5795
0
    auto PTy1 = FTy->getParamType(1);
5796
0
    if (PTy1 != Arg1Val->getType()) {
5797
0
      if (Arg1Ty->isArrayType())
5798
0
        Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
5799
0
      else
5800
0
        Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
5801
0
    }
5802
0
    auto Arg2 = EmitScalarExpr(E->getArg(2));
5803
0
    auto PTy2 = FTy->getParamType(2);
5804
0
    if (PTy2 != Arg2->getType())
5805
0
      Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
5806
0
    return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
5807
0
  }
5808
5809
0
  case Builtin::BI__builtin_ms_va_start:
5810
0
  case Builtin::BI__builtin_ms_va_end:
5811
0
    return RValue::get(
5812
0
        EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
5813
0
                       BuiltinID == Builtin::BI__builtin_ms_va_start));
5814
5815
0
  case Builtin::BI__builtin_ms_va_copy: {
5816
    // Lower this manually. We can't reliably determine whether or not any
5817
    // given va_copy() is for a Win64 va_list from the calling convention
5818
    // alone, because it's legal to do this from a System V ABI function.
5819
    // With opaque pointer types, we won't have enough information in LLVM
5820
    // IR to determine this from the argument types, either. Best to do it
5821
    // now, while we have enough information.
5822
0
    Address DestAddr = EmitMSVAListRef(E->getArg(0));
5823
0
    Address SrcAddr = EmitMSVAListRef(E->getArg(1));
5824
5825
0
    DestAddr = DestAddr.withElementType(Int8PtrTy);
5826
0
    SrcAddr = SrcAddr.withElementType(Int8PtrTy);
5827
5828
0
    Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
5829
0
    return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
5830
0
  }
5831
5832
0
  case Builtin::BI__builtin_get_device_side_mangled_name: {
5833
0
    auto Name = CGM.getCUDARuntime().getDeviceSideName(
5834
0
        cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
5835
0
    auto Str = CGM.GetAddrOfConstantCString(Name, "");
5836
0
    llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
5837
0
                               llvm::ConstantInt::get(SizeTy, 0)};
5838
0
    auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
5839
0
                                                     Str.getPointer(), Zeros);
5840
0
    return RValue::get(Ptr);
5841
0
  }
5842
0
  }
5843
5844
  // If this is an alias for a lib function (e.g. __builtin_sin), emit
5845
  // the call using the normal call path, but using the unmangled
5846
  // version of the function name.
5847
0
  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
5848
0
    return emitLibraryCall(*this, FD, E,
5849
0
                           CGM.getBuiltinLibFunction(FD, BuiltinID));
5850
5851
  // If this is a predefined lib function (e.g. malloc), emit the call
5852
  // using exactly the normal call path.
5853
0
  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
5854
0
    return emitLibraryCall(*this, FD, E,
5855
0
                      cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
5856
5857
  // Check that a call to a target specific builtin has the correct target
5858
  // features.
5859
  // This is down here to avoid non-target specific builtins, however, if
5860
  // generic builtins start to require generic target features then we
5861
  // can move this up to the beginning of the function.
5862
0
  checkTargetFeatures(E, FD);
5863
5864
0
  if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
5865
0
    LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
5866
5867
  // See if we have a target specific intrinsic.
5868
0
  StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
5869
0
  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
5870
0
  StringRef Prefix =
5871
0
      llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
5872
0
  if (!Prefix.empty()) {
5873
0
    IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
5874
    // NOTE we don't need to perform a compatibility flag check here since the
5875
    // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
5876
    // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
5877
0
    if (IntrinsicID == Intrinsic::not_intrinsic)
5878
0
      IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
5879
0
  }
5880
5881
0
  if (IntrinsicID != Intrinsic::not_intrinsic) {
5882
0
    SmallVector<Value*, 16> Args;
5883
5884
    // Find out if any arguments are required to be integer constant
5885
    // expressions.
5886
0
    unsigned ICEArguments = 0;
5887
0
    ASTContext::GetBuiltinTypeError Error;
5888
0
    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5889
0
    assert(Error == ASTContext::GE_None && "Should not codegen an error");
5890
5891
0
    Function *F = CGM.getIntrinsic(IntrinsicID);
5892
0
    llvm::FunctionType *FTy = F->getFunctionType();
5893
5894
0
    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
5895
0
      Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
5896
      // If the intrinsic arg type is different from the builtin arg type
5897
      // we need to do a bit cast.
5898
0
      llvm::Type *PTy = FTy->getParamType(i);
5899
0
      if (PTy != ArgValue->getType()) {
5900
        // XXX - vector of pointers?
5901
0
        if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
5902
0
          if (PtrTy->getAddressSpace() !=
5903
0
              ArgValue->getType()->getPointerAddressSpace()) {
5904
0
            ArgValue = Builder.CreateAddrSpaceCast(
5905
0
                ArgValue, llvm::PointerType::get(getLLVMContext(),
5906
0
                                                 PtrTy->getAddressSpace()));
5907
0
          }
5908
0
        }
5909
5910
0
        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
5911
0
               "Must be able to losslessly bit cast to param");
5912
        // Cast vector type (e.g., v256i32) to x86_amx, this only happen
5913
        // in amx intrinsics.
5914
0
        if (PTy->isX86_AMXTy())
5915
0
          ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
5916
0
                                             {ArgValue->getType()}, {ArgValue});
5917
0
        else
5918
0
          ArgValue = Builder.CreateBitCast(ArgValue, PTy);
5919
0
      }
5920
5921
0
      Args.push_back(ArgValue);
5922
0
    }
5923
5924
0
    Value *V = Builder.CreateCall(F, Args);
5925
0
    QualType BuiltinRetType = E->getType();
5926
5927
0
    llvm::Type *RetTy = VoidTy;
5928
0
    if (!BuiltinRetType->isVoidType())
5929
0
      RetTy = ConvertType(BuiltinRetType);
5930
5931
0
    if (RetTy != V->getType()) {
5932
      // XXX - vector of pointers?
5933
0
      if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
5934
0
        if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
5935
0
          V = Builder.CreateAddrSpaceCast(
5936
0
              V, llvm::PointerType::get(getLLVMContext(),
5937
0
                                        PtrTy->getAddressSpace()));
5938
0
        }
5939
0
      }
5940
5941
0
      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
5942
0
             "Must be able to losslessly bit cast result type");
5943
      // Cast x86_amx to vector type (e.g., v256i32), this only happen
5944
      // in amx intrinsics.
5945
0
      if (V->getType()->isX86_AMXTy())
5946
0
        V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
5947
0
                                    {V});
5948
0
      else
5949
0
        V = Builder.CreateBitCast(V, RetTy);
5950
0
    }
5951
5952
0
    if (RetTy->isVoidTy())
5953
0
      return RValue::get(nullptr);
5954
5955
0
    return RValue::get(V);
5956
0
  }
5957
5958
  // Some target-specific builtins can have aggregate return values, e.g.
5959
  // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
5960
  // ReturnValue to be non-null, so that the target-specific emission code can
5961
  // always just emit into it.
5962
0
  TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
5963
0
  if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
5964
0
    Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
5965
0
    ReturnValue = ReturnValueSlot(DestPtr, false);
5966
0
  }
5967
5968
  // Now see if we can emit a target-specific builtin.
5969
0
  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
5970
0
    switch (EvalKind) {
5971
0
    case TEK_Scalar:
5972
0
      if (V->getType()->isVoidTy())
5973
0
        return RValue::get(nullptr);
5974
0
      return RValue::get(V);
5975
0
    case TEK_Aggregate:
5976
0
      return RValue::getAggregate(ReturnValue.getValue(),
5977
0
                                  ReturnValue.isVolatile());
5978
0
    case TEK_Complex:
5979
0
      llvm_unreachable("No current target builtin returns complex");
5980
0
    }
5981
0
    llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
5982
0
  }
5983
5984
0
  if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
5985
0
    return EmitHipStdParUnsupportedBuiltin(this, FD);
5986
5987
0
  ErrorUnsupported(E, "builtin function");
5988
5989
  // Unknown builtin, for now just dump it out and return undef.
5990
0
  return GetUndefRValue(E->getType());
5991
0
}
5992
5993
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
5994
                                        unsigned BuiltinID, const CallExpr *E,
5995
                                        ReturnValueSlot ReturnValue,
5996
0
                                        llvm::Triple::ArchType Arch) {
5997
  // When compiling in HipStdPar mode we have to be conservative in rejecting
5998
  // target specific features in the FE, and defer the possible error to the
5999
  // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6000
  // referenced by an accelerator executable function, we emit an error.
6001
  // Returning nullptr here leads to the builtin being handled in
6002
  // EmitStdParUnsupportedBuiltin.
6003
0
  if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6004
0
      Arch != CGF->getTarget().getTriple().getArch())
6005
0
    return nullptr;
6006
6007
0
  switch (Arch) {
6008
0
  case llvm::Triple::arm:
6009
0
  case llvm::Triple::armeb:
6010
0
  case llvm::Triple::thumb:
6011
0
  case llvm::Triple::thumbeb:
6012
0
    return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6013
0
  case llvm::Triple::aarch64:
6014
0
  case llvm::Triple::aarch64_32:
6015
0
  case llvm::Triple::aarch64_be:
6016
0
    return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6017
0
  case llvm::Triple::bpfeb:
6018
0
  case llvm::Triple::bpfel:
6019
0
    return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6020
0
  case llvm::Triple::x86:
6021
0
  case llvm::Triple::x86_64:
6022
0
    return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6023
0
  case llvm::Triple::ppc:
6024
0
  case llvm::Triple::ppcle:
6025
0
  case llvm::Triple::ppc64:
6026
0
  case llvm::Triple::ppc64le:
6027
0
    return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6028
0
  case llvm::Triple::r600:
6029
0
  case llvm::Triple::amdgcn:
6030
0
    return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6031
0
  case llvm::Triple::systemz:
6032
0
    return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6033
0
  case llvm::Triple::nvptx:
6034
0
  case llvm::Triple::nvptx64:
6035
0
    return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6036
0
  case llvm::Triple::wasm32:
6037
0
  case llvm::Triple::wasm64:
6038
0
    return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6039
0
  case llvm::Triple::hexagon:
6040
0
    return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6041
0
  case llvm::Triple::riscv32:
6042
0
  case llvm::Triple::riscv64:
6043
0
    return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6044
0
  default:
6045
0
    return nullptr;
6046
0
  }
6047
0
}
6048
6049
Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
6050
                                              const CallExpr *E,
6051
0
                                              ReturnValueSlot ReturnValue) {
6052
0
  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6053
0
    assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6054
0
    return EmitTargetArchBuiltinExpr(
6055
0
        this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6056
0
        ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6057
0
  }
6058
6059
0
  return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6060
0
                                   getTarget().getTriple().getArch());
6061
0
}
6062
6063
static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6064
                                          NeonTypeFlags TypeFlags,
6065
                                          bool HasLegalHalfType = true,
6066
                                          bool V1Ty = false,
6067
0
                                          bool AllowBFloatArgsAndRet = true) {
6068
0
  int IsQuad = TypeFlags.isQuad();
6069
0
  switch (TypeFlags.getEltType()) {
6070
0
  case NeonTypeFlags::Int8:
6071
0
  case NeonTypeFlags::Poly8:
6072
0
    return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6073
0
  case NeonTypeFlags::Int16:
6074
0
  case NeonTypeFlags::Poly16:
6075
0
    return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6076
0
  case NeonTypeFlags::BFloat16:
6077
0
    if (AllowBFloatArgsAndRet)
6078
0
      return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6079
0
    else
6080
0
      return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6081
0
  case NeonTypeFlags::Float16:
6082
0
    if (HasLegalHalfType)
6083
0
      return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6084
0
    else
6085
0
      return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6086
0
  case NeonTypeFlags::Int32:
6087
0
    return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6088
0
  case NeonTypeFlags::Int64:
6089
0
  case NeonTypeFlags::Poly64:
6090
0
    return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6091
0
  case NeonTypeFlags::Poly128:
6092
    // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6093
    // There is a lot of i128 and f128 API missing.
6094
    // so we use v16i8 to represent poly128 and get pattern matched.
6095
0
    return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6096
0
  case NeonTypeFlags::Float32:
6097
0
    return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6098
0
  case NeonTypeFlags::Float64:
6099
0
    return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6100
0
  }
6101
0
  llvm_unreachable("Unknown vector element type!");
6102
0
}
6103
6104
static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6105
0
                                          NeonTypeFlags IntTypeFlags) {
6106
0
  int IsQuad = IntTypeFlags.isQuad();
6107
0
  switch (IntTypeFlags.getEltType()) {
6108
0
  case NeonTypeFlags::Int16:
6109
0
    return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6110
0
  case NeonTypeFlags::Int32:
6111
0
    return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6112
0
  case NeonTypeFlags::Int64:
6113
0
    return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6114
0
  default:
6115
0
    llvm_unreachable("Type can't be converted to floating-point!");
6116
0
  }
6117
0
}
6118
6119
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
6120
0
                                      const ElementCount &Count) {
6121
0
  Value *SV = llvm::ConstantVector::getSplat(Count, C);
6122
0
  return Builder.CreateShuffleVector(V, V, SV, "lane");
6123
0
}
6124
6125
0
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
6126
0
  ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6127
0
  return EmitNeonSplat(V, C, EC);
6128
0
}
6129
6130
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
6131
                                     const char *name,
6132
0
                                     unsigned shift, bool rightshift) {
6133
0
  unsigned j = 0;
6134
0
  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6135
0
       ai != ae; ++ai, ++j) {
6136
0
    if (F->isConstrainedFPIntrinsic())
6137
0
      if (ai->getType()->isMetadataTy())
6138
0
        continue;
6139
0
    if (shift > 0 && shift == j)
6140
0
      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6141
0
    else
6142
0
      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6143
0
  }
6144
6145
0
  if (F->isConstrainedFPIntrinsic())
6146
0
    return Builder.CreateConstrainedFPCall(F, Ops, name);
6147
0
  else
6148
0
    return Builder.CreateCall(F, Ops, name);
6149
0
}
6150
6151
Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
6152
0
                                            bool neg) {
6153
0
  int SV = cast<ConstantInt>(V)->getSExtValue();
6154
0
  return ConstantInt::get(Ty, neg ? -SV : SV);
6155
0
}
6156
6157
// Right-shift a vector by a constant.
6158
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
6159
                                          llvm::Type *Ty, bool usgn,
6160
0
                                          const char *name) {
6161
0
  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6162
6163
0
  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6164
0
  int EltSize = VTy->getScalarSizeInBits();
6165
6166
0
  Vec = Builder.CreateBitCast(Vec, Ty);
6167
6168
  // lshr/ashr are undefined when the shift amount is equal to the vector
6169
  // element size.
6170
0
  if (ShiftAmt == EltSize) {
6171
0
    if (usgn) {
6172
      // Right-shifting an unsigned value by its size yields 0.
6173
0
      return llvm::ConstantAggregateZero::get(VTy);
6174
0
    } else {
6175
      // Right-shifting a signed value by its size is equivalent
6176
      // to a shift of size-1.
6177
0
      --ShiftAmt;
6178
0
      Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6179
0
    }
6180
0
  }
6181
6182
0
  Shift = EmitNeonShiftVector(Shift, Ty, false);
6183
0
  if (usgn)
6184
0
    return Builder.CreateLShr(Vec, Shift, name);
6185
0
  else
6186
0
    return Builder.CreateAShr(Vec, Shift, name);
6187
0
}
6188
6189
enum {
6190
  AddRetType = (1 << 0),
6191
  Add1ArgType = (1 << 1),
6192
  Add2ArgTypes = (1 << 2),
6193
6194
  VectorizeRetType = (1 << 3),
6195
  VectorizeArgTypes = (1 << 4),
6196
6197
  InventFloatType = (1 << 5),
6198
  UnsignedAlts = (1 << 6),
6199
6200
  Use64BitVectors = (1 << 7),
6201
  Use128BitVectors = (1 << 8),
6202
6203
  Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
6204
  VectorRet = AddRetType | VectorizeRetType,
6205
  VectorRetGetArgs01 =
6206
      AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
6207
  FpCmpzModifiers =
6208
      AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
6209
};
6210
6211
namespace {
6212
struct ARMVectorIntrinsicInfo {
6213
  const char *NameHint;
6214
  unsigned BuiltinID;
6215
  unsigned LLVMIntrinsic;
6216
  unsigned AltLLVMIntrinsic;
6217
  uint64_t TypeModifier;
6218
6219
0
  bool operator<(unsigned RHSBuiltinID) const {
6220
0
    return BuiltinID < RHSBuiltinID;
6221
0
  }
6222
0
  bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6223
0
    return BuiltinID < TE.BuiltinID;
6224
0
  }
6225
};
6226
} // end anonymous namespace
6227
6228
#define NEONMAP0(NameBase) \
6229
  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6230
6231
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6232
  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6233
      Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6234
6235
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6236
  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6237
      Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6238
      TypeModifier }
6239
6240
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6241
  NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6242
  NEONMAP0(splat_lane_v),
6243
  NEONMAP0(splat_laneq_v),
6244
  NEONMAP0(splatq_lane_v),
6245
  NEONMAP0(splatq_laneq_v),
6246
  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6247
  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6248
  NEONMAP1(vabs_v, arm_neon_vabs, 0),
6249
  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6250
  NEONMAP0(vadd_v),
6251
  NEONMAP0(vaddhn_v),
6252
  NEONMAP0(vaddq_v),
6253
  NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6254
  NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6255
  NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6256
  NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6257
  NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6258
  NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6259
  NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6260
  NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6261
  NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6262
  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6263
  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6264
  NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6265
  NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6266
  NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6267
  NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6268
  NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6269
  NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6270
  NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6271
  NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6272
  NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6273
  NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6274
  NEONMAP1(vcage_v, arm_neon_vacge, 0),
6275
  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6276
  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6277
  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6278
  NEONMAP1(vcale_v, arm_neon_vacge, 0),
6279
  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6280
  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6281
  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6282
  NEONMAP0(vceqz_v),
6283
  NEONMAP0(vceqzq_v),
6284
  NEONMAP0(vcgez_v),
6285
  NEONMAP0(vcgezq_v),
6286
  NEONMAP0(vcgtz_v),
6287
  NEONMAP0(vcgtzq_v),
6288
  NEONMAP0(vclez_v),
6289
  NEONMAP0(vclezq_v),
6290
  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6291
  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6292
  NEONMAP0(vcltz_v),
6293
  NEONMAP0(vcltzq_v),
6294
  NEONMAP1(vclz_v, ctlz, Add1ArgType),
6295
  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6296
  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6297
  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6298
  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6299
  NEONMAP0(vcvt_f16_s16),
6300
  NEONMAP0(vcvt_f16_u16),
6301
  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6302
  NEONMAP0(vcvt_f32_v),
6303
  NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6304
  NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6305
  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6306
  NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6307
  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6308
  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6309
  NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6310
  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6311
  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6312
  NEONMAP0(vcvt_s16_f16),
6313
  NEONMAP0(vcvt_s32_v),
6314
  NEONMAP0(vcvt_s64_v),
6315
  NEONMAP0(vcvt_u16_f16),
6316
  NEONMAP0(vcvt_u32_v),
6317
  NEONMAP0(vcvt_u64_v),
6318
  NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6319
  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6320
  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6321
  NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6322
  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6323
  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6324
  NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6325
  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6326
  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6327
  NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6328
  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6329
  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6330
  NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6331
  NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6332
  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6333
  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6334
  NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6335
  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6336
  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6337
  NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6338
  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6339
  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6340
  NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6341
  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6342
  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6343
  NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6344
  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6345
  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6346
  NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6347
  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6348
  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6349
  NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6350
  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6351
  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6352
  NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6353
  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6354
  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6355
  NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6356
  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6357
  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6358
  NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6359
  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6360
  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6361
  NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6362
  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6363
  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6364
  NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6365
  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6366
  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6367
  NEONMAP0(vcvtq_f16_s16),
6368
  NEONMAP0(vcvtq_f16_u16),
6369
  NEONMAP0(vcvtq_f32_v),
6370
  NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6371
  NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6372
  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6373
  NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6374
  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6375
  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6376
  NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6377
  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6378
  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6379
  NEONMAP0(vcvtq_s16_f16),
6380
  NEONMAP0(vcvtq_s32_v),
6381
  NEONMAP0(vcvtq_s64_v),
6382
  NEONMAP0(vcvtq_u16_f16),
6383
  NEONMAP0(vcvtq_u32_v),
6384
  NEONMAP0(vcvtq_u64_v),
6385
  NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6386
  NEONMAP1(vdot_u32, arm_neon_udot, 0),
6387
  NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6388
  NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6389
  NEONMAP0(vext_v),
6390
  NEONMAP0(vextq_v),
6391
  NEONMAP0(vfma_v),
6392
  NEONMAP0(vfmaq_v),
6393
  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6394
  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6395
  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6396
  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6397
  NEONMAP0(vld1_dup_v),
6398
  NEONMAP1(vld1_v, arm_neon_vld1, 0),
6399
  NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6400
  NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6401
  NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6402
  NEONMAP0(vld1q_dup_v),
6403
  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6404
  NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6405
  NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6406
  NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6407
  NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6408
  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6409
  NEONMAP1(vld2_v, arm_neon_vld2, 0),
6410
  NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6411
  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6412
  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6413
  NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6414
  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6415
  NEONMAP1(vld3_v, arm_neon_vld3, 0),
6416
  NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6417
  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6418
  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6419
  NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6420
  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6421
  NEONMAP1(vld4_v, arm_neon_vld4, 0),
6422
  NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6423
  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6424
  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6425
  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6426
  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6427
  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6428
  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6429
  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6430
  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6431
  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6432
  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6433
  NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6434
  NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6435
  NEONMAP0(vmovl_v),
6436
  NEONMAP0(vmovn_v),
6437
  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6438
  NEONMAP0(vmull_v),
6439
  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6440
  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6441
  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6442
  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6443
  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6444
  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6445
  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6446
  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6447
  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6448
  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6449
  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6450
  NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6451
  NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6452
  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6453
  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6454
  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6455
  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6456
  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6457
  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6458
  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6459
  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6460
  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6461
  NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6462
  NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6463
  NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6464
  NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6465
  NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6466
  NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6467
  NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6468
  NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6469
  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6470
  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6471
  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6472
  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6473
  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6474
  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6475
  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6476
  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6477
  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6478
  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6479
  NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6480
  NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6481
  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6482
  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6483
  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6484
  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6485
  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6486
  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6487
  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6488
  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6489
  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6490
  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6491
  NEONMAP0(vrndi_v),
6492
  NEONMAP0(vrndiq_v),
6493
  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6494
  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6495
  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6496
  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6497
  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6498
  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6499
  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6500
  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6501
  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6502
  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6503
  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6504
  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6505
  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6506
  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6507
  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6508
  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6509
  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6510
  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6511
  NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6512
  NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6513
  NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6514
  NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6515
  NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6516
  NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6517
  NEONMAP0(vshl_n_v),
6518
  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6519
  NEONMAP0(vshll_n_v),
6520
  NEONMAP0(vshlq_n_v),
6521
  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6522
  NEONMAP0(vshr_n_v),
6523
  NEONMAP0(vshrn_n_v),
6524
  NEONMAP0(vshrq_n_v),
6525
  NEONMAP1(vst1_v, arm_neon_vst1, 0),
6526
  NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6527
  NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6528
  NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6529
  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6530
  NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6531
  NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6532
  NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6533
  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6534
  NEONMAP1(vst2_v, arm_neon_vst2, 0),
6535
  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6536
  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6537
  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6538
  NEONMAP1(vst3_v, arm_neon_vst3, 0),
6539
  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6540
  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6541
  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6542
  NEONMAP1(vst4_v, arm_neon_vst4, 0),
6543
  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6544
  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6545
  NEONMAP0(vsubhn_v),
6546
  NEONMAP0(vtrn_v),
6547
  NEONMAP0(vtrnq_v),
6548
  NEONMAP0(vtst_v),
6549
  NEONMAP0(vtstq_v),
6550
  NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6551
  NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6552
  NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6553
  NEONMAP0(vuzp_v),
6554
  NEONMAP0(vuzpq_v),
6555
  NEONMAP0(vzip_v),
6556
  NEONMAP0(vzipq_v)
6557
};
6558
6559
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6560
  NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6561
  NEONMAP0(splat_lane_v),
6562
  NEONMAP0(splat_laneq_v),
6563
  NEONMAP0(splatq_lane_v),
6564
  NEONMAP0(splatq_laneq_v),
6565
  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6566
  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6567
  NEONMAP0(vadd_v),
6568
  NEONMAP0(vaddhn_v),
6569
  NEONMAP0(vaddq_p128),
6570
  NEONMAP0(vaddq_v),
6571
  NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6572
  NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6573
  NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6574
  NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6575
  NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6576
  NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6577
  NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6578
  NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6579
  NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6580
  NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6581
  NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6582
  NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6583
  NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6584
  NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6585
  NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6586
  NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6587
  NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6588
  NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6589
  NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6590
  NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6591
  NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6592
  NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6593
  NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6594
  NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6595
  NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6596
  NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6597
  NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6598
  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6599
  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6600
  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6601
  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6602
  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6603
  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6604
  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6605
  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6606
  NEONMAP0(vceqz_v),
6607
  NEONMAP0(vceqzq_v),
6608
  NEONMAP0(vcgez_v),
6609
  NEONMAP0(vcgezq_v),
6610
  NEONMAP0(vcgtz_v),
6611
  NEONMAP0(vcgtzq_v),
6612
  NEONMAP0(vclez_v),
6613
  NEONMAP0(vclezq_v),
6614
  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6615
  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6616
  NEONMAP0(vcltz_v),
6617
  NEONMAP0(vcltzq_v),
6618
  NEONMAP1(vclz_v, ctlz, Add1ArgType),
6619
  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6620
  NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6621
  NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6622
  NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6623
  NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6624
  NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6625
  NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6626
  NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6627
  NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6628
  NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6629
  NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6630
  NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6631
  NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6632
  NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6633
  NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6634
  NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6635
  NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6636
  NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6637
  NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6638
  NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6639
  NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6640
  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6641
  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6642
  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6643
  NEONMAP0(vcvt_f16_s16),
6644
  NEONMAP0(vcvt_f16_u16),
6645
  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6646
  NEONMAP0(vcvt_f32_v),
6647
  NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6648
  NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6649
  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6650
  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6651
  NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6652
  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6653
  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6654
  NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6655
  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6656
  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6657
  NEONMAP0(vcvtq_f16_s16),
6658
  NEONMAP0(vcvtq_f16_u16),
6659
  NEONMAP0(vcvtq_f32_v),
6660
  NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6661
  NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6662
  NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6663
  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6664
  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6665
  NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6666
  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6667
  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6668
  NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6669
  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6670
  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6671
  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6672
  NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6673
  NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6674
  NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6675
  NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6676
  NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6677
  NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6678
  NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6679
  NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6680
  NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6681
  NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6682
  NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6683
  NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6684
  NEONMAP0(vext_v),
6685
  NEONMAP0(vextq_v),
6686
  NEONMAP0(vfma_v),
6687
  NEONMAP0(vfmaq_v),
6688
  NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6689
  NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6690
  NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6691
  NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6692
  NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6693
  NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6694
  NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6695
  NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6696
  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6697
  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6698
  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6699
  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6700
  NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6701
  NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6702
  NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6703
  NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6704
  NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6705
  NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6706
  NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6707
  NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6708
  NEONMAP0(vmovl_v),
6709
  NEONMAP0(vmovn_v),
6710
  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6711
  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6712
  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6713
  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6714
  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6715
  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6716
  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6717
  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6718
  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6719
  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6720
  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6721
  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6722
  NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6723
  NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6724
  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6725
  NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6726
  NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6727
  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6728
  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6729
  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6730
  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6731
  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6732
  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
6733
  NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6734
  NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6735
  NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6736
  NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6737
  NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6738
  NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6739
  NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6740
  NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6741
  NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6742
  NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6743
  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
6744
  NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6745
  NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6746
  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
6747
  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6748
  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6749
  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
6750
  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6751
  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
6752
  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6753
  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
6754
  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
6755
  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6756
  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6757
  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
6758
  NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
6759
  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6760
  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6761
  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
6762
  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
6763
  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6764
  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6765
  NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
6766
  NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
6767
  NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
6768
  NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
6769
  NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
6770
  NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
6771
  NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
6772
  NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
6773
  NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
6774
  NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
6775
  NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
6776
  NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
6777
  NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
6778
  NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
6779
  NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
6780
  NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
6781
  NEONMAP0(vrndi_v),
6782
  NEONMAP0(vrndiq_v),
6783
  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6784
  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6785
  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6786
  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6787
  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6788
  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6789
  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
6790
  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
6791
  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
6792
  NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
6793
  NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
6794
  NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
6795
  NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
6796
  NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
6797
  NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
6798
  NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
6799
  NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
6800
  NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
6801
  NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
6802
  NEONMAP0(vshl_n_v),
6803
  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6804
  NEONMAP0(vshll_n_v),
6805
  NEONMAP0(vshlq_n_v),
6806
  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6807
  NEONMAP0(vshr_n_v),
6808
  NEONMAP0(vshrn_n_v),
6809
  NEONMAP0(vshrq_n_v),
6810
  NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
6811
  NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
6812
  NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
6813
  NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
6814
  NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
6815
  NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
6816
  NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
6817
  NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
6818
  NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
6819
  NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
6820
  NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
6821
  NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
6822
  NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
6823
  NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
6824
  NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
6825
  NEONMAP0(vsubhn_v),
6826
  NEONMAP0(vtst_v),
6827
  NEONMAP0(vtstq_v),
6828
  NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
6829
  NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
6830
  NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
6831
  NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
6832
};
6833
6834
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
6835
  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
6836
  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
6837
  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
6838
  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6839
  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6840
  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6841
  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6842
  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6843
  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6844
  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6845
  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6846
  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
6847
  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6848
  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
6849
  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6850
  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6851
  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6852
  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6853
  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6854
  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6855
  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6856
  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6857
  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6858
  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6859
  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6860
  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6861
  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
6862
  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
6863
  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6864
  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6865
  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6866
  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6867
  NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6868
  NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6869
  NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
6870
  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6871
  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6872
  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
6873
  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
6874
  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6875
  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6876
  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
6877
  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
6878
  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6879
  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6880
  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
6881
  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
6882
  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
6883
  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
6884
  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
6885
  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
6886
  NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
6887
  NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
6888
  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
6889
  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6890
  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6891
  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6892
  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6893
  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6894
  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6895
  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6896
  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6897
  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
6898
  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
6899
  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6900
  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6901
  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6902
  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6903
  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6904
  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6905
  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6906
  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6907
  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
6908
  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
6909
  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
6910
  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
6911
  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
6912
  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6913
  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6914
  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6915
  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
6916
  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6917
  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
6918
  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6919
  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
6920
  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
6921
  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
6922
  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
6923
  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
6924
  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
6925
  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
6926
  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
6927
  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
6928
  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
6929
  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
6930
  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
6931
  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
6932
  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
6933
  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
6934
  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
6935
  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
6936
  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
6937
  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
6938
  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
6939
  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
6940
  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
6941
  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
6942
  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
6943
  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
6944
  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
6945
  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
6946
  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
6947
  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
6948
  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
6949
  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
6950
  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
6951
  NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
6952
  NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6953
  NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
6954
  NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6955
  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
6956
  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
6957
  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
6958
  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
6959
  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
6960
  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
6961
  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
6962
  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
6963
  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
6964
  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
6965
  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
6966
  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
6967
  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
6968
  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
6969
  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
6970
  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
6971
  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
6972
  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
6973
  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
6974
  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6975
  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6976
  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6977
  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6978
  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
6979
  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
6980
  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6981
  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6982
  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
6983
  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
6984
  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
6985
  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
6986
  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
6987
  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
6988
  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
6989
  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
6990
  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
6991
  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
6992
  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
6993
  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
6994
  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
6995
  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
6996
  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
6997
  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
6998
  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
6999
  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7000
  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7001
  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7002
  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7003
  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7004
  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7005
  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7006
  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7007
  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7008
  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7009
  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7010
  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7011
  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7012
  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7013
  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7014
  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7015
  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7016
  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7017
  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7018
  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7019
  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7020
  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7021
  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7022
  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7023
  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7024
  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7025
  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7026
  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7027
  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7028
  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7029
  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7030
  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7031
  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7032
  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7033
  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7034
  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7035
  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7036
  // FP16 scalar intrinisics go here.
7037
  NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7038
  NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7039
  NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7040
  NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7041
  NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7042
  NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7043
  NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7044
  NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7045
  NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7046
  NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7047
  NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7048
  NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7049
  NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7050
  NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7051
  NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7052
  NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7053
  NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7054
  NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7055
  NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7056
  NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7057
  NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7058
  NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7059
  NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7060
  NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7061
  NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7062
  NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7063
  NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7064
  NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7065
  NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7066
  NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7067
  NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7068
  NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7069
  NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7070
  NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7071
};
7072
7073
// Some intrinsics are equivalent for codegen.
7074
static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7075
  { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7076
  { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7077
  { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7078
  { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7079
  { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7080
  { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7081
  { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7082
  { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7083
  { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, },
7084
  { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, },
7085
  { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7086
  { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7087
  { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7088
  { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7089
  { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7090
  { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7091
  { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7092
  { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7093
  { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7094
  { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7095
  { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7096
  { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7097
  { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7098
  { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7099
  { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7100
  { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7101
  { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7102
  { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7103
  { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, },
7104
  { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, },
7105
  { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7106
  { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7107
  { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7108
  { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7109
  { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7110
  { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7111
  { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7112
  { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7113
  { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7114
  { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7115
  { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7116
  { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7117
  { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7118
  { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7119
  { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7120
  { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7121
  { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7122
  { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7123
  { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7124
  { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7125
  { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7126
  { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7127
  { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7128
  { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7129
  { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7130
  { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7131
  { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7132
  { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7133
  { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7134
  { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7135
  { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7136
  { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7137
  { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7138
  { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7139
  { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7140
  { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7141
  { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7142
  { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7143
  { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7144
  { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7145
  { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7146
  { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7147
  { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7148
  { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7149
  { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7150
  { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7151
  { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7152
  { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7153
  { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7154
  { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7155
  { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7156
  { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7157
  { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7158
  { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7159
  { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7160
  { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7161
  { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7162
  { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7163
  { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7164
  { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7165
  { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7166
  { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7167
  { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7168
  { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7169
  { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7170
  { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7171
  { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7172
  { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7173
  { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7174
  { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7175
  { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7176
  { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7177
  { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7178
  { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7179
  { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7180
  { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7181
  { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7182
  { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7183
  { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7184
  { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7185
  { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7186
  { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7187
  { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7188
  { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7189
  { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7190
  { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7191
  { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7192
  { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7193
  { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7194
  { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7195
  { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7196
  { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7197
  { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7198
  { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7199
  { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7200
  { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7201
  { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7202
  { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7203
  { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7204
  { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7205
  { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7206
  { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7207
  { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, },
7208
  { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, },
7209
  { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, },
7210
  { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, },
7211
  { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, },
7212
  { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, },
7213
  // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7214
  // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7215
  // arbitrary one to be handled as tha canonical variation.
7216
  { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7217
  { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7218
  { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7219
  { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7220
  { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7221
  { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7222
  { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7223
  { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7224
  { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7225
  { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7226
  { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7227
  { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7228
};
7229
7230
#undef NEONMAP0
7231
#undef NEONMAP1
7232
#undef NEONMAP2
7233
7234
#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         \
7235
  {                                                                            \
7236
    #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0,   \
7237
        TypeModifier                                                           \
7238
  }
7239
7240
#define SVEMAP2(NameBase, TypeModifier)                                        \
7241
  { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7242
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7243
#define GET_SVE_LLVM_INTRINSIC_MAP
7244
#include "clang/Basic/arm_sve_builtin_cg.inc"
7245
#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7246
#undef GET_SVE_LLVM_INTRINSIC_MAP
7247
};
7248
7249
#undef SVEMAP1
7250
#undef SVEMAP2
7251
7252
#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         \
7253
  {                                                                            \
7254
    #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0,   \
7255
        TypeModifier                                                           \
7256
  }
7257
7258
#define SMEMAP2(NameBase, TypeModifier)                                        \
7259
  { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7260
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7261
#define GET_SME_LLVM_INTRINSIC_MAP
7262
#include "clang/Basic/arm_sme_builtin_cg.inc"
7263
#undef GET_SME_LLVM_INTRINSIC_MAP
7264
};
7265
7266
#undef SMEMAP1
7267
#undef SMEMAP2
7268
7269
static bool NEONSIMDIntrinsicsProvenSorted = false;
7270
7271
static bool AArch64SIMDIntrinsicsProvenSorted = false;
7272
static bool AArch64SISDIntrinsicsProvenSorted = false;
7273
static bool AArch64SVEIntrinsicsProvenSorted = false;
7274
static bool AArch64SMEIntrinsicsProvenSorted = false;
7275
7276
static const ARMVectorIntrinsicInfo *
7277
findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
7278
0
                            unsigned BuiltinID, bool &MapProvenSorted) {
7279
7280
0
#ifndef NDEBUG
7281
0
  if (!MapProvenSorted) {
7282
0
    assert(llvm::is_sorted(IntrinsicMap));
7283
0
    MapProvenSorted = true;
7284
0
  }
7285
0
#endif
7286
7287
0
  const ARMVectorIntrinsicInfo *Builtin =
7288
0
      llvm::lower_bound(IntrinsicMap, BuiltinID);
7289
7290
0
  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7291
0
    return Builtin;
7292
7293
0
  return nullptr;
7294
0
}
7295
7296
Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
7297
                                                   unsigned Modifier,
7298
                                                   llvm::Type *ArgType,
7299
0
                                                   const CallExpr *E) {
7300
0
  int VectorSize = 0;
7301
0
  if (Modifier & Use64BitVectors)
7302
0
    VectorSize = 64;
7303
0
  else if (Modifier & Use128BitVectors)
7304
0
    VectorSize = 128;
7305
7306
  // Return type.
7307
0
  SmallVector<llvm::Type *, 3> Tys;
7308
0
  if (Modifier & AddRetType) {
7309
0
    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7310
0
    if (Modifier & VectorizeRetType)
7311
0
      Ty = llvm::FixedVectorType::get(
7312
0
          Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7313
7314
0
    Tys.push_back(Ty);
7315
0
  }
7316
7317
  // Arguments.
7318
0
  if (Modifier & VectorizeArgTypes) {
7319
0
    int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7320
0
    ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7321
0
  }
7322
7323
0
  if (Modifier & (Add1ArgType | Add2ArgTypes))
7324
0
    Tys.push_back(ArgType);
7325
7326
0
  if (Modifier & Add2ArgTypes)
7327
0
    Tys.push_back(ArgType);
7328
7329
0
  if (Modifier & InventFloatType)
7330
0
    Tys.push_back(FloatTy);
7331
7332
0
  return CGM.getIntrinsic(IntrinsicID, Tys);
7333
0
}
7334
7335
static Value *EmitCommonNeonSISDBuiltinExpr(
7336
    CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7337
0
    SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7338
0
  unsigned BuiltinID = SISDInfo.BuiltinID;
7339
0
  unsigned int Int = SISDInfo.LLVMIntrinsic;
7340
0
  unsigned Modifier = SISDInfo.TypeModifier;
7341
0
  const char *s = SISDInfo.NameHint;
7342
7343
0
  switch (BuiltinID) {
7344
0
  case NEON::BI__builtin_neon_vcled_s64:
7345
0
  case NEON::BI__builtin_neon_vcled_u64:
7346
0
  case NEON::BI__builtin_neon_vcles_f32:
7347
0
  case NEON::BI__builtin_neon_vcled_f64:
7348
0
  case NEON::BI__builtin_neon_vcltd_s64:
7349
0
  case NEON::BI__builtin_neon_vcltd_u64:
7350
0
  case NEON::BI__builtin_neon_vclts_f32:
7351
0
  case NEON::BI__builtin_neon_vcltd_f64:
7352
0
  case NEON::BI__builtin_neon_vcales_f32:
7353
0
  case NEON::BI__builtin_neon_vcaled_f64:
7354
0
  case NEON::BI__builtin_neon_vcalts_f32:
7355
0
  case NEON::BI__builtin_neon_vcaltd_f64:
7356
    // Only one direction of comparisons actually exist, cmle is actually a cmge
7357
    // with swapped operands. The table gives us the right intrinsic but we
7358
    // still need to do the swap.
7359
0
    std::swap(Ops[0], Ops[1]);
7360
0
    break;
7361
0
  }
7362
7363
0
  assert(Int && "Generic code assumes a valid intrinsic");
7364
7365
  // Determine the type(s) of this overloaded AArch64 intrinsic.
7366
0
  const Expr *Arg = E->getArg(0);
7367
0
  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7368
0
  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7369
7370
0
  int j = 0;
7371
0
  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7372
0
  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7373
0
       ai != ae; ++ai, ++j) {
7374
0
    llvm::Type *ArgTy = ai->getType();
7375
0
    if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7376
0
             ArgTy->getPrimitiveSizeInBits())
7377
0
      continue;
7378
7379
0
    assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7380
    // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7381
    // it before inserting.
7382
0
    Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7383
0
        Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7384
0
    Ops[j] =
7385
0
        CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7386
0
  }
7387
7388
0
  Value *Result = CGF.EmitNeonCall(F, Ops, s);
7389
0
  llvm::Type *ResultType = CGF.ConvertType(E->getType());
7390
0
  if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7391
0
      Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7392
0
    return CGF.Builder.CreateExtractElement(Result, C0);
7393
7394
0
  return CGF.Builder.CreateBitCast(Result, ResultType, s);
7395
0
}
7396
7397
Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
7398
    unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7399
    const char *NameHint, unsigned Modifier, const CallExpr *E,
7400
    SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7401
0
    llvm::Triple::ArchType Arch) {
7402
  // Get the last argument, which specifies the vector type.
7403
0
  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7404
0
  std::optional<llvm::APSInt> NeonTypeConst =
7405
0
      Arg->getIntegerConstantExpr(getContext());
7406
0
  if (!NeonTypeConst)
7407
0
    return nullptr;
7408
7409
  // Determine the type of this overloaded NEON intrinsic.
7410
0
  NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7411
0
  bool Usgn = Type.isUnsigned();
7412
0
  bool Quad = Type.isQuad();
7413
0
  const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7414
0
  const bool AllowBFloatArgsAndRet =
7415
0
      getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7416
7417
0
  llvm::FixedVectorType *VTy =
7418
0
      GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7419
0
  llvm::Type *Ty = VTy;
7420
0
  if (!Ty)
7421
0
    return nullptr;
7422
7423
0
  auto getAlignmentValue32 = [&](Address addr) -> Value* {
7424
0
    return Builder.getInt32(addr.getAlignment().getQuantity());
7425
0
  };
7426
7427
0
  unsigned Int = LLVMIntrinsic;
7428
0
  if ((Modifier & UnsignedAlts) && !Usgn)
7429
0
    Int = AltLLVMIntrinsic;
7430
7431
0
  switch (BuiltinID) {
7432
0
  default: break;
7433
0
  case NEON::BI__builtin_neon_splat_lane_v:
7434
0
  case NEON::BI__builtin_neon_splat_laneq_v:
7435
0
  case NEON::BI__builtin_neon_splatq_lane_v:
7436
0
  case NEON::BI__builtin_neon_splatq_laneq_v: {
7437
0
    auto NumElements = VTy->getElementCount();
7438
0
    if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7439
0
      NumElements = NumElements * 2;
7440
0
    if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7441
0
      NumElements = NumElements.divideCoefficientBy(2);
7442
7443
0
    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7444
0
    return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7445
0
  }
7446
0
  case NEON::BI__builtin_neon_vpadd_v:
7447
0
  case NEON::BI__builtin_neon_vpaddq_v:
7448
    // We don't allow fp/int overloading of intrinsics.
7449
0
    if (VTy->getElementType()->isFloatingPointTy() &&
7450
0
        Int == Intrinsic::aarch64_neon_addp)
7451
0
      Int = Intrinsic::aarch64_neon_faddp;
7452
0
    break;
7453
0
  case NEON::BI__builtin_neon_vabs_v:
7454
0
  case NEON::BI__builtin_neon_vabsq_v:
7455
0
    if (VTy->getElementType()->isFloatingPointTy())
7456
0
      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7457
0
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7458
0
  case NEON::BI__builtin_neon_vadd_v:
7459
0
  case NEON::BI__builtin_neon_vaddq_v: {
7460
0
    llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7461
0
    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7462
0
    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7463
0
    Ops[0] =  Builder.CreateXor(Ops[0], Ops[1]);
7464
0
    return Builder.CreateBitCast(Ops[0], Ty);
7465
0
  }
7466
0
  case NEON::BI__builtin_neon_vaddhn_v: {
7467
0
    llvm::FixedVectorType *SrcTy =
7468
0
        llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7469
7470
    // %sum = add <4 x i32> %lhs, %rhs
7471
0
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7472
0
    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7473
0
    Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7474
7475
    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7476
0
    Constant *ShiftAmt =
7477
0
        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7478
0
    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7479
7480
    // %res = trunc <4 x i32> %high to <4 x i16>
7481
0
    return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7482
0
  }
7483
0
  case NEON::BI__builtin_neon_vcale_v:
7484
0
  case NEON::BI__builtin_neon_vcaleq_v:
7485
0
  case NEON::BI__builtin_neon_vcalt_v:
7486
0
  case NEON::BI__builtin_neon_vcaltq_v:
7487
0
    std::swap(Ops[0], Ops[1]);
7488
0
    [[fallthrough]];
7489
0
  case NEON::BI__builtin_neon_vcage_v:
7490
0
  case NEON::BI__builtin_neon_vcageq_v:
7491
0
  case NEON::BI__builtin_neon_vcagt_v:
7492
0
  case NEON::BI__builtin_neon_vcagtq_v: {
7493
0
    llvm::Type *Ty;
7494
0
    switch (VTy->getScalarSizeInBits()) {
7495
0
    default: llvm_unreachable("unexpected type");
7496
0
    case 32:
7497
0
      Ty = FloatTy;
7498
0
      break;
7499
0
    case 64:
7500
0
      Ty = DoubleTy;
7501
0
      break;
7502
0
    case 16:
7503
0
      Ty = HalfTy;
7504
0
      break;
7505
0
    }
7506
0
    auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7507
0
    llvm::Type *Tys[] = { VTy, VecFlt };
7508
0
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7509
0
    return EmitNeonCall(F, Ops, NameHint);
7510
0
  }
7511
0
  case NEON::BI__builtin_neon_vceqz_v:
7512
0
  case NEON::BI__builtin_neon_vceqzq_v:
7513
0
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7514
0
                                         ICmpInst::ICMP_EQ, "vceqz");
7515
0
  case NEON::BI__builtin_neon_vcgez_v:
7516
0
  case NEON::BI__builtin_neon_vcgezq_v:
7517
0
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7518
0
                                         ICmpInst::ICMP_SGE, "vcgez");
7519
0
  case NEON::BI__builtin_neon_vclez_v:
7520
0
  case NEON::BI__builtin_neon_vclezq_v:
7521
0
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7522
0
                                         ICmpInst::ICMP_SLE, "vclez");
7523
0
  case NEON::BI__builtin_neon_vcgtz_v:
7524
0
  case NEON::BI__builtin_neon_vcgtzq_v:
7525
0
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7526
0
                                         ICmpInst::ICMP_SGT, "vcgtz");
7527
0
  case NEON::BI__builtin_neon_vcltz_v:
7528
0
  case NEON::BI__builtin_neon_vcltzq_v:
7529
0
    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7530
0
                                         ICmpInst::ICMP_SLT, "vcltz");
7531
0
  case NEON::BI__builtin_neon_vclz_v:
7532
0
  case NEON::BI__builtin_neon_vclzq_v:
7533
    // We generate target-independent intrinsic, which needs a second argument
7534
    // for whether or not clz of zero is undefined; on ARM it isn't.
7535
0
    Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7536
0
    break;
7537
0
  case NEON::BI__builtin_neon_vcvt_f32_v:
7538
0
  case NEON::BI__builtin_neon_vcvtq_f32_v:
7539
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7540
0
    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7541
0
                     HasLegalHalfType);
7542
0
    return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7543
0
                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7544
0
  case NEON::BI__builtin_neon_vcvt_f16_s16:
7545
0
  case NEON::BI__builtin_neon_vcvt_f16_u16:
7546
0
  case NEON::BI__builtin_neon_vcvtq_f16_s16:
7547
0
  case NEON::BI__builtin_neon_vcvtq_f16_u16:
7548
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7549
0
    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7550
0
                     HasLegalHalfType);
7551
0
    return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7552
0
                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7553
0
  case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7554
0
  case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7555
0
  case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7556
0
  case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7557
0
    llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7558
0
    Function *F = CGM.getIntrinsic(Int, Tys);
7559
0
    return EmitNeonCall(F, Ops, "vcvt_n");
7560
0
  }
7561
0
  case NEON::BI__builtin_neon_vcvt_n_f32_v:
7562
0
  case NEON::BI__builtin_neon_vcvt_n_f64_v:
7563
0
  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7564
0
  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7565
0
    llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7566
0
    Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7567
0
    Function *F = CGM.getIntrinsic(Int, Tys);
7568
0
    return EmitNeonCall(F, Ops, "vcvt_n");
7569
0
  }
7570
0
  case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7571
0
  case NEON::BI__builtin_neon_vcvt_n_s32_v:
7572
0
  case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7573
0
  case NEON::BI__builtin_neon_vcvt_n_u32_v:
7574
0
  case NEON::BI__builtin_neon_vcvt_n_s64_v:
7575
0
  case NEON::BI__builtin_neon_vcvt_n_u64_v:
7576
0
  case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7577
0
  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7578
0
  case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7579
0
  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7580
0
  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7581
0
  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7582
0
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7583
0
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7584
0
    return EmitNeonCall(F, Ops, "vcvt_n");
7585
0
  }
7586
0
  case NEON::BI__builtin_neon_vcvt_s32_v:
7587
0
  case NEON::BI__builtin_neon_vcvt_u32_v:
7588
0
  case NEON::BI__builtin_neon_vcvt_s64_v:
7589
0
  case NEON::BI__builtin_neon_vcvt_u64_v:
7590
0
  case NEON::BI__builtin_neon_vcvt_s16_f16:
7591
0
  case NEON::BI__builtin_neon_vcvt_u16_f16:
7592
0
  case NEON::BI__builtin_neon_vcvtq_s32_v:
7593
0
  case NEON::BI__builtin_neon_vcvtq_u32_v:
7594
0
  case NEON::BI__builtin_neon_vcvtq_s64_v:
7595
0
  case NEON::BI__builtin_neon_vcvtq_u64_v:
7596
0
  case NEON::BI__builtin_neon_vcvtq_s16_f16:
7597
0
  case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7598
0
    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7599
0
    return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7600
0
                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7601
0
  }
7602
0
  case NEON::BI__builtin_neon_vcvta_s16_f16:
7603
0
  case NEON::BI__builtin_neon_vcvta_s32_v:
7604
0
  case NEON::BI__builtin_neon_vcvta_s64_v:
7605
0
  case NEON::BI__builtin_neon_vcvta_u16_f16:
7606
0
  case NEON::BI__builtin_neon_vcvta_u32_v:
7607
0
  case NEON::BI__builtin_neon_vcvta_u64_v:
7608
0
  case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7609
0
  case NEON::BI__builtin_neon_vcvtaq_s32_v:
7610
0
  case NEON::BI__builtin_neon_vcvtaq_s64_v:
7611
0
  case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7612
0
  case NEON::BI__builtin_neon_vcvtaq_u32_v:
7613
0
  case NEON::BI__builtin_neon_vcvtaq_u64_v:
7614
0
  case NEON::BI__builtin_neon_vcvtn_s16_f16:
7615
0
  case NEON::BI__builtin_neon_vcvtn_s32_v:
7616
0
  case NEON::BI__builtin_neon_vcvtn_s64_v:
7617
0
  case NEON::BI__builtin_neon_vcvtn_u16_f16:
7618
0
  case NEON::BI__builtin_neon_vcvtn_u32_v:
7619
0
  case NEON::BI__builtin_neon_vcvtn_u64_v:
7620
0
  case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7621
0
  case NEON::BI__builtin_neon_vcvtnq_s32_v:
7622
0
  case NEON::BI__builtin_neon_vcvtnq_s64_v:
7623
0
  case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7624
0
  case NEON::BI__builtin_neon_vcvtnq_u32_v:
7625
0
  case NEON::BI__builtin_neon_vcvtnq_u64_v:
7626
0
  case NEON::BI__builtin_neon_vcvtp_s16_f16:
7627
0
  case NEON::BI__builtin_neon_vcvtp_s32_v:
7628
0
  case NEON::BI__builtin_neon_vcvtp_s64_v:
7629
0
  case NEON::BI__builtin_neon_vcvtp_u16_f16:
7630
0
  case NEON::BI__builtin_neon_vcvtp_u32_v:
7631
0
  case NEON::BI__builtin_neon_vcvtp_u64_v:
7632
0
  case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7633
0
  case NEON::BI__builtin_neon_vcvtpq_s32_v:
7634
0
  case NEON::BI__builtin_neon_vcvtpq_s64_v:
7635
0
  case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7636
0
  case NEON::BI__builtin_neon_vcvtpq_u32_v:
7637
0
  case NEON::BI__builtin_neon_vcvtpq_u64_v:
7638
0
  case NEON::BI__builtin_neon_vcvtm_s16_f16:
7639
0
  case NEON::BI__builtin_neon_vcvtm_s32_v:
7640
0
  case NEON::BI__builtin_neon_vcvtm_s64_v:
7641
0
  case NEON::BI__builtin_neon_vcvtm_u16_f16:
7642
0
  case NEON::BI__builtin_neon_vcvtm_u32_v:
7643
0
  case NEON::BI__builtin_neon_vcvtm_u64_v:
7644
0
  case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7645
0
  case NEON::BI__builtin_neon_vcvtmq_s32_v:
7646
0
  case NEON::BI__builtin_neon_vcvtmq_s64_v:
7647
0
  case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7648
0
  case NEON::BI__builtin_neon_vcvtmq_u32_v:
7649
0
  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7650
0
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7651
0
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7652
0
  }
7653
0
  case NEON::BI__builtin_neon_vcvtx_f32_v: {
7654
0
    llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7655
0
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7656
7657
0
  }
7658
0
  case NEON::BI__builtin_neon_vext_v:
7659
0
  case NEON::BI__builtin_neon_vextq_v: {
7660
0
    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7661
0
    SmallVector<int, 16> Indices;
7662
0
    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7663
0
      Indices.push_back(i+CV);
7664
7665
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7666
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7667
0
    return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7668
0
  }
7669
0
  case NEON::BI__builtin_neon_vfma_v:
7670
0
  case NEON::BI__builtin_neon_vfmaq_v: {
7671
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7672
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7673
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7674
7675
    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7676
0
    return emitCallMaybeConstrainedFPBuiltin(
7677
0
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7678
0
        {Ops[1], Ops[2], Ops[0]});
7679
0
  }
7680
0
  case NEON::BI__builtin_neon_vld1_v:
7681
0
  case NEON::BI__builtin_neon_vld1q_v: {
7682
0
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
7683
0
    Ops.push_back(getAlignmentValue32(PtrOp0));
7684
0
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7685
0
  }
7686
0
  case NEON::BI__builtin_neon_vld1_x2_v:
7687
0
  case NEON::BI__builtin_neon_vld1q_x2_v:
7688
0
  case NEON::BI__builtin_neon_vld1_x3_v:
7689
0
  case NEON::BI__builtin_neon_vld1q_x3_v:
7690
0
  case NEON::BI__builtin_neon_vld1_x4_v:
7691
0
  case NEON::BI__builtin_neon_vld1q_x4_v: {
7692
0
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7693
0
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7694
0
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7695
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7696
0
  }
7697
0
  case NEON::BI__builtin_neon_vld2_v:
7698
0
  case NEON::BI__builtin_neon_vld2q_v:
7699
0
  case NEON::BI__builtin_neon_vld3_v:
7700
0
  case NEON::BI__builtin_neon_vld3q_v:
7701
0
  case NEON::BI__builtin_neon_vld4_v:
7702
0
  case NEON::BI__builtin_neon_vld4q_v:
7703
0
  case NEON::BI__builtin_neon_vld2_dup_v:
7704
0
  case NEON::BI__builtin_neon_vld2q_dup_v:
7705
0
  case NEON::BI__builtin_neon_vld3_dup_v:
7706
0
  case NEON::BI__builtin_neon_vld3q_dup_v:
7707
0
  case NEON::BI__builtin_neon_vld4_dup_v:
7708
0
  case NEON::BI__builtin_neon_vld4q_dup_v: {
7709
0
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
7710
0
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7711
0
    Value *Align = getAlignmentValue32(PtrOp1);
7712
0
    Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7713
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7714
0
  }
7715
0
  case NEON::BI__builtin_neon_vld1_dup_v:
7716
0
  case NEON::BI__builtin_neon_vld1q_dup_v: {
7717
0
    Value *V = PoisonValue::get(Ty);
7718
0
    PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7719
0
    LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7720
0
    llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7721
0
    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7722
0
    return EmitNeonSplat(Ops[0], CI);
7723
0
  }
7724
0
  case NEON::BI__builtin_neon_vld2_lane_v:
7725
0
  case NEON::BI__builtin_neon_vld2q_lane_v:
7726
0
  case NEON::BI__builtin_neon_vld3_lane_v:
7727
0
  case NEON::BI__builtin_neon_vld3q_lane_v:
7728
0
  case NEON::BI__builtin_neon_vld4_lane_v:
7729
0
  case NEON::BI__builtin_neon_vld4q_lane_v: {
7730
0
    llvm::Type *Tys[] = {Ty, Int8PtrTy};
7731
0
    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7732
0
    for (unsigned I = 2; I < Ops.size() - 1; ++I)
7733
0
      Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7734
0
    Ops.push_back(getAlignmentValue32(PtrOp1));
7735
0
    Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7736
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7737
0
  }
7738
0
  case NEON::BI__builtin_neon_vmovl_v: {
7739
0
    llvm::FixedVectorType *DTy =
7740
0
        llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7741
0
    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
7742
0
    if (Usgn)
7743
0
      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
7744
0
    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
7745
0
  }
7746
0
  case NEON::BI__builtin_neon_vmovn_v: {
7747
0
    llvm::FixedVectorType *QTy =
7748
0
        llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7749
0
    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
7750
0
    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
7751
0
  }
7752
0
  case NEON::BI__builtin_neon_vmull_v:
7753
    // FIXME: the integer vmull operations could be emitted in terms of pure
7754
    // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
7755
    // hoisting the exts outside loops. Until global ISel comes along that can
7756
    // see through such movement this leads to bad CodeGen. So we need an
7757
    // intrinsic for now.
7758
0
    Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
7759
0
    Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
7760
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7761
0
  case NEON::BI__builtin_neon_vpadal_v:
7762
0
  case NEON::BI__builtin_neon_vpadalq_v: {
7763
    // The source operand type has twice as many elements of half the size.
7764
0
    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7765
0
    llvm::Type *EltTy =
7766
0
      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7767
0
    auto *NarrowTy =
7768
0
        llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7769
0
    llvm::Type *Tys[2] = { Ty, NarrowTy };
7770
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7771
0
  }
7772
0
  case NEON::BI__builtin_neon_vpaddl_v:
7773
0
  case NEON::BI__builtin_neon_vpaddlq_v: {
7774
    // The source operand type has twice as many elements of half the size.
7775
0
    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7776
0
    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7777
0
    auto *NarrowTy =
7778
0
        llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7779
0
    llvm::Type *Tys[2] = { Ty, NarrowTy };
7780
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
7781
0
  }
7782
0
  case NEON::BI__builtin_neon_vqdmlal_v:
7783
0
  case NEON::BI__builtin_neon_vqdmlsl_v: {
7784
0
    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
7785
0
    Ops[1] =
7786
0
        EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
7787
0
    Ops.resize(2);
7788
0
    return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
7789
0
  }
7790
0
  case NEON::BI__builtin_neon_vqdmulhq_lane_v:
7791
0
  case NEON::BI__builtin_neon_vqdmulh_lane_v:
7792
0
  case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
7793
0
  case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
7794
0
    auto *RTy = cast<llvm::FixedVectorType>(Ty);
7795
0
    if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
7796
0
        BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
7797
0
      RTy = llvm::FixedVectorType::get(RTy->getElementType(),
7798
0
                                       RTy->getNumElements() * 2);
7799
0
    llvm::Type *Tys[2] = {
7800
0
        RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7801
0
                                             /*isQuad*/ false))};
7802
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7803
0
  }
7804
0
  case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
7805
0
  case NEON::BI__builtin_neon_vqdmulh_laneq_v:
7806
0
  case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
7807
0
  case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
7808
0
    llvm::Type *Tys[2] = {
7809
0
        Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7810
0
                                            /*isQuad*/ true))};
7811
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7812
0
  }
7813
0
  case NEON::BI__builtin_neon_vqshl_n_v:
7814
0
  case NEON::BI__builtin_neon_vqshlq_n_v:
7815
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
7816
0
                        1, false);
7817
0
  case NEON::BI__builtin_neon_vqshlu_n_v:
7818
0
  case NEON::BI__builtin_neon_vqshluq_n_v:
7819
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
7820
0
                        1, false);
7821
0
  case NEON::BI__builtin_neon_vrecpe_v:
7822
0
  case NEON::BI__builtin_neon_vrecpeq_v:
7823
0
  case NEON::BI__builtin_neon_vrsqrte_v:
7824
0
  case NEON::BI__builtin_neon_vrsqrteq_v:
7825
0
    Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
7826
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7827
0
  case NEON::BI__builtin_neon_vrndi_v:
7828
0
  case NEON::BI__builtin_neon_vrndiq_v:
7829
0
    Int = Builder.getIsFPConstrained()
7830
0
              ? Intrinsic::experimental_constrained_nearbyint
7831
0
              : Intrinsic::nearbyint;
7832
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7833
0
  case NEON::BI__builtin_neon_vrshr_n_v:
7834
0
  case NEON::BI__builtin_neon_vrshrq_n_v:
7835
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
7836
0
                        1, true);
7837
0
  case NEON::BI__builtin_neon_vsha512hq_u64:
7838
0
  case NEON::BI__builtin_neon_vsha512h2q_u64:
7839
0
  case NEON::BI__builtin_neon_vsha512su0q_u64:
7840
0
  case NEON::BI__builtin_neon_vsha512su1q_u64: {
7841
0
    Function *F = CGM.getIntrinsic(Int);
7842
0
    return EmitNeonCall(F, Ops, "");
7843
0
  }
7844
0
  case NEON::BI__builtin_neon_vshl_n_v:
7845
0
  case NEON::BI__builtin_neon_vshlq_n_v:
7846
0
    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
7847
0
    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
7848
0
                             "vshl_n");
7849
0
  case NEON::BI__builtin_neon_vshll_n_v: {
7850
0
    llvm::FixedVectorType *SrcTy =
7851
0
        llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7852
0
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7853
0
    if (Usgn)
7854
0
      Ops[0] = Builder.CreateZExt(Ops[0], VTy);
7855
0
    else
7856
0
      Ops[0] = Builder.CreateSExt(Ops[0], VTy);
7857
0
    Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
7858
0
    return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
7859
0
  }
7860
0
  case NEON::BI__builtin_neon_vshrn_n_v: {
7861
0
    llvm::FixedVectorType *SrcTy =
7862
0
        llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7863
0
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7864
0
    Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
7865
0
    if (Usgn)
7866
0
      Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
7867
0
    else
7868
0
      Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
7869
0
    return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
7870
0
  }
7871
0
  case NEON::BI__builtin_neon_vshr_n_v:
7872
0
  case NEON::BI__builtin_neon_vshrq_n_v:
7873
0
    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
7874
0
  case NEON::BI__builtin_neon_vst1_v:
7875
0
  case NEON::BI__builtin_neon_vst1q_v:
7876
0
  case NEON::BI__builtin_neon_vst2_v:
7877
0
  case NEON::BI__builtin_neon_vst2q_v:
7878
0
  case NEON::BI__builtin_neon_vst3_v:
7879
0
  case NEON::BI__builtin_neon_vst3q_v:
7880
0
  case NEON::BI__builtin_neon_vst4_v:
7881
0
  case NEON::BI__builtin_neon_vst4q_v:
7882
0
  case NEON::BI__builtin_neon_vst2_lane_v:
7883
0
  case NEON::BI__builtin_neon_vst2q_lane_v:
7884
0
  case NEON::BI__builtin_neon_vst3_lane_v:
7885
0
  case NEON::BI__builtin_neon_vst3q_lane_v:
7886
0
  case NEON::BI__builtin_neon_vst4_lane_v:
7887
0
  case NEON::BI__builtin_neon_vst4q_lane_v: {
7888
0
    llvm::Type *Tys[] = {Int8PtrTy, Ty};
7889
0
    Ops.push_back(getAlignmentValue32(PtrOp0));
7890
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
7891
0
  }
7892
0
  case NEON::BI__builtin_neon_vsm3partw1q_u32:
7893
0
  case NEON::BI__builtin_neon_vsm3partw2q_u32:
7894
0
  case NEON::BI__builtin_neon_vsm3ss1q_u32:
7895
0
  case NEON::BI__builtin_neon_vsm4ekeyq_u32:
7896
0
  case NEON::BI__builtin_neon_vsm4eq_u32: {
7897
0
    Function *F = CGM.getIntrinsic(Int);
7898
0
    return EmitNeonCall(F, Ops, "");
7899
0
  }
7900
0
  case NEON::BI__builtin_neon_vsm3tt1aq_u32:
7901
0
  case NEON::BI__builtin_neon_vsm3tt1bq_u32:
7902
0
  case NEON::BI__builtin_neon_vsm3tt2aq_u32:
7903
0
  case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
7904
0
    Function *F = CGM.getIntrinsic(Int);
7905
0
    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7906
0
    return EmitNeonCall(F, Ops, "");
7907
0
  }
7908
0
  case NEON::BI__builtin_neon_vst1_x2_v:
7909
0
  case NEON::BI__builtin_neon_vst1q_x2_v:
7910
0
  case NEON::BI__builtin_neon_vst1_x3_v:
7911
0
  case NEON::BI__builtin_neon_vst1q_x3_v:
7912
0
  case NEON::BI__builtin_neon_vst1_x4_v:
7913
0
  case NEON::BI__builtin_neon_vst1q_x4_v: {
7914
    // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
7915
    // in AArch64 it comes last. We may want to stick to one or another.
7916
0
    if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
7917
0
        Arch == llvm::Triple::aarch64_32) {
7918
0
      llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7919
0
      std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7920
0
      return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
7921
0
    }
7922
0
    llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
7923
0
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
7924
0
  }
7925
0
  case NEON::BI__builtin_neon_vsubhn_v: {
7926
0
    llvm::FixedVectorType *SrcTy =
7927
0
        llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7928
7929
    // %sum = add <4 x i32> %lhs, %rhs
7930
0
    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7931
0
    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7932
0
    Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
7933
7934
    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7935
0
    Constant *ShiftAmt =
7936
0
        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7937
0
    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
7938
7939
    // %res = trunc <4 x i32> %high to <4 x i16>
7940
0
    return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
7941
0
  }
7942
0
  case NEON::BI__builtin_neon_vtrn_v:
7943
0
  case NEON::BI__builtin_neon_vtrnq_v: {
7944
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7945
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7946
0
    Value *SV = nullptr;
7947
7948
0
    for (unsigned vi = 0; vi != 2; ++vi) {
7949
0
      SmallVector<int, 16> Indices;
7950
0
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7951
0
        Indices.push_back(i+vi);
7952
0
        Indices.push_back(i+e+vi);
7953
0
      }
7954
0
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7955
0
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7956
0
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7957
0
    }
7958
0
    return SV;
7959
0
  }
7960
0
  case NEON::BI__builtin_neon_vtst_v:
7961
0
  case NEON::BI__builtin_neon_vtstq_v: {
7962
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7963
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7964
0
    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
7965
0
    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
7966
0
                                ConstantAggregateZero::get(Ty));
7967
0
    return Builder.CreateSExt(Ops[0], Ty, "vtst");
7968
0
  }
7969
0
  case NEON::BI__builtin_neon_vuzp_v:
7970
0
  case NEON::BI__builtin_neon_vuzpq_v: {
7971
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7972
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7973
0
    Value *SV = nullptr;
7974
7975
0
    for (unsigned vi = 0; vi != 2; ++vi) {
7976
0
      SmallVector<int, 16> Indices;
7977
0
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7978
0
        Indices.push_back(2*i+vi);
7979
7980
0
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7981
0
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7982
0
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7983
0
    }
7984
0
    return SV;
7985
0
  }
7986
0
  case NEON::BI__builtin_neon_vxarq_u64: {
7987
0
    Function *F = CGM.getIntrinsic(Int);
7988
0
    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7989
0
    return EmitNeonCall(F, Ops, "");
7990
0
  }
7991
0
  case NEON::BI__builtin_neon_vzip_v:
7992
0
  case NEON::BI__builtin_neon_vzipq_v: {
7993
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7994
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7995
0
    Value *SV = nullptr;
7996
7997
0
    for (unsigned vi = 0; vi != 2; ++vi) {
7998
0
      SmallVector<int, 16> Indices;
7999
0
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8000
0
        Indices.push_back((i + vi*e) >> 1);
8001
0
        Indices.push_back(((i + vi*e) >> 1)+e);
8002
0
      }
8003
0
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8004
0
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8005
0
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8006
0
    }
8007
0
    return SV;
8008
0
  }
8009
0
  case NEON::BI__builtin_neon_vdot_s32:
8010
0
  case NEON::BI__builtin_neon_vdot_u32:
8011
0
  case NEON::BI__builtin_neon_vdotq_s32:
8012
0
  case NEON::BI__builtin_neon_vdotq_u32: {
8013
0
    auto *InputTy =
8014
0
        llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8015
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8016
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8017
0
  }
8018
0
  case NEON::BI__builtin_neon_vfmlal_low_f16:
8019
0
  case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8020
0
    auto *InputTy =
8021
0
        llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8022
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8023
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8024
0
  }
8025
0
  case NEON::BI__builtin_neon_vfmlsl_low_f16:
8026
0
  case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8027
0
    auto *InputTy =
8028
0
        llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8029
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8030
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8031
0
  }
8032
0
  case NEON::BI__builtin_neon_vfmlal_high_f16:
8033
0
  case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8034
0
    auto *InputTy =
8035
0
        llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8036
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8037
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8038
0
  }
8039
0
  case NEON::BI__builtin_neon_vfmlsl_high_f16:
8040
0
  case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8041
0
    auto *InputTy =
8042
0
        llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8043
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8044
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8045
0
  }
8046
0
  case NEON::BI__builtin_neon_vmmlaq_s32:
8047
0
  case NEON::BI__builtin_neon_vmmlaq_u32: {
8048
0
    auto *InputTy =
8049
0
        llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8050
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8051
0
    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8052
0
  }
8053
0
  case NEON::BI__builtin_neon_vusmmlaq_s32: {
8054
0
    auto *InputTy =
8055
0
        llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8056
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8057
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8058
0
  }
8059
0
  case NEON::BI__builtin_neon_vusdot_s32:
8060
0
  case NEON::BI__builtin_neon_vusdotq_s32: {
8061
0
    auto *InputTy =
8062
0
        llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8063
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8064
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8065
0
  }
8066
0
  case NEON::BI__builtin_neon_vbfdot_f32:
8067
0
  case NEON::BI__builtin_neon_vbfdotq_f32: {
8068
0
    llvm::Type *InputTy =
8069
0
        llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8070
0
    llvm::Type *Tys[2] = { Ty, InputTy };
8071
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8072
0
  }
8073
0
  case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8074
0
    llvm::Type *Tys[1] = { Ty };
8075
0
    Function *F = CGM.getIntrinsic(Int, Tys);
8076
0
    return EmitNeonCall(F, Ops, "vcvtfp2bf");
8077
0
  }
8078
8079
0
  }
8080
8081
0
  assert(Int && "Expected valid intrinsic number");
8082
8083
  // Determine the type(s) of this overloaded AArch64 intrinsic.
8084
0
  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8085
8086
0
  Value *Result = EmitNeonCall(F, Ops, NameHint);
8087
0
  llvm::Type *ResultType = ConvertType(E->getType());
8088
  // AArch64 intrinsic one-element vector type cast to
8089
  // scalar type expected by the builtin
8090
0
  return Builder.CreateBitCast(Result, ResultType, NameHint);
8091
0
}
8092
8093
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
8094
    Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8095
0
    const CmpInst::Predicate Ip, const Twine &Name) {
8096
0
  llvm::Type *OTy = Op->getType();
8097
8098
  // FIXME: this is utterly horrific. We should not be looking at previous
8099
  // codegen context to find out what needs doing. Unfortunately TableGen
8100
  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8101
  // (etc).
8102
0
  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8103
0
    OTy = BI->getOperand(0)->getType();
8104
8105
0
  Op = Builder.CreateBitCast(Op, OTy);
8106
0
  if (OTy->getScalarType()->isFloatingPointTy()) {
8107
0
    if (Fp == CmpInst::FCMP_OEQ)
8108
0
      Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8109
0
    else
8110
0
      Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8111
0
  } else {
8112
0
    Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8113
0
  }
8114
0
  return Builder.CreateSExt(Op, Ty, Name);
8115
0
}
8116
8117
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
8118
                                 Value *ExtOp, Value *IndexOp,
8119
                                 llvm::Type *ResTy, unsigned IntID,
8120
0
                                 const char *Name) {
8121
0
  SmallVector<Value *, 2> TblOps;
8122
0
  if (ExtOp)
8123
0
    TblOps.push_back(ExtOp);
8124
8125
  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8126
0
  SmallVector<int, 16> Indices;
8127
0
  auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8128
0
  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8129
0
    Indices.push_back(2*i);
8130
0
    Indices.push_back(2*i+1);
8131
0
  }
8132
8133
0
  int PairPos = 0, End = Ops.size() - 1;
8134
0
  while (PairPos < End) {
8135
0
    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8136
0
                                                     Ops[PairPos+1], Indices,
8137
0
                                                     Name));
8138
0
    PairPos += 2;
8139
0
  }
8140
8141
  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8142
  // of the 128-bit lookup table with zero.
8143
0
  if (PairPos == End) {
8144
0
    Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8145
0
    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8146
0
                                                     ZeroTbl, Indices, Name));
8147
0
  }
8148
8149
0
  Function *TblF;
8150
0
  TblOps.push_back(IndexOp);
8151
0
  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8152
8153
0
  return CGF.EmitNeonCall(TblF, TblOps, Name);
8154
0
}
8155
8156
0
Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8157
0
  unsigned Value;
8158
0
  switch (BuiltinID) {
8159
0
  default:
8160
0
    return nullptr;
8161
0
  case clang::ARM::BI__builtin_arm_nop:
8162
0
    Value = 0;
8163
0
    break;
8164
0
  case clang::ARM::BI__builtin_arm_yield:
8165
0
  case clang::ARM::BI__yield:
8166
0
    Value = 1;
8167
0
    break;
8168
0
  case clang::ARM::BI__builtin_arm_wfe:
8169
0
  case clang::ARM::BI__wfe:
8170
0
    Value = 2;
8171
0
    break;
8172
0
  case clang::ARM::BI__builtin_arm_wfi:
8173
0
  case clang::ARM::BI__wfi:
8174
0
    Value = 3;
8175
0
    break;
8176
0
  case clang::ARM::BI__builtin_arm_sev:
8177
0
  case clang::ARM::BI__sev:
8178
0
    Value = 4;
8179
0
    break;
8180
0
  case clang::ARM::BI__builtin_arm_sevl:
8181
0
  case clang::ARM::BI__sevl:
8182
0
    Value = 5;
8183
0
    break;
8184
0
  }
8185
8186
0
  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8187
0
                            llvm::ConstantInt::get(Int32Ty, Value));
8188
0
}
8189
8190
enum SpecialRegisterAccessKind {
8191
  NormalRead,
8192
  VolatileRead,
8193
  Write,
8194
};
8195
8196
// Generates the IR for __builtin_read_exec_*.
8197
// Lowers the builtin to amdgcn_ballot intrinsic.
8198
static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
8199
                                      llvm::Type *RegisterType,
8200
0
                                      llvm::Type *ValueType, bool isExecHi) {
8201
0
  CodeGen::CGBuilderTy &Builder = CGF.Builder;
8202
0
  CodeGen::CodeGenModule &CGM = CGF.CGM;
8203
8204
0
  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8205
0
  llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8206
8207
0
  if (isExecHi) {
8208
0
    Value *Rt2 = Builder.CreateLShr(Call, 32);
8209
0
    Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8210
0
    return Rt2;
8211
0
  }
8212
8213
0
  return Call;
8214
0
}
8215
8216
// Generates the IR for the read/write special register builtin,
8217
// ValueType is the type of the value that is to be written or read,
8218
// RegisterType is the type of the register being written to or read from.
8219
static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
8220
                                         const CallExpr *E,
8221
                                         llvm::Type *RegisterType,
8222
                                         llvm::Type *ValueType,
8223
                                         SpecialRegisterAccessKind AccessKind,
8224
0
                                         StringRef SysReg = "") {
8225
  // write and register intrinsics only support 32, 64 and 128 bit operations.
8226
0
  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8227
0
          RegisterType->isIntegerTy(128)) &&
8228
0
         "Unsupported size for register.");
8229
8230
0
  CodeGen::CGBuilderTy &Builder = CGF.Builder;
8231
0
  CodeGen::CodeGenModule &CGM = CGF.CGM;
8232
0
  LLVMContext &Context = CGM.getLLVMContext();
8233
8234
0
  if (SysReg.empty()) {
8235
0
    const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8236
0
    SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8237
0
  }
8238
8239
0
  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8240
0
  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8241
0
  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8242
8243
0
  llvm::Type *Types[] = { RegisterType };
8244
8245
0
  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8246
0
  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8247
0
            && "Can't fit 64-bit value in 32-bit register");
8248
8249
0
  if (AccessKind != Write) {
8250
0
    assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8251
0
    llvm::Function *F = CGM.getIntrinsic(
8252
0
        AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8253
0
                                   : llvm::Intrinsic::read_register,
8254
0
        Types);
8255
0
    llvm::Value *Call = Builder.CreateCall(F, Metadata);
8256
8257
0
    if (MixedTypes)
8258
      // Read into 64 bit register and then truncate result to 32 bit.
8259
0
      return Builder.CreateTrunc(Call, ValueType);
8260
8261
0
    if (ValueType->isPointerTy())
8262
      // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8263
0
      return Builder.CreateIntToPtr(Call, ValueType);
8264
8265
0
    return Call;
8266
0
  }
8267
8268
0
  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8269
0
  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8270
0
  if (MixedTypes) {
8271
    // Extend 32 bit write value to 64 bit to pass to write.
8272
0
    ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8273
0
    return Builder.CreateCall(F, { Metadata, ArgValue });
8274
0
  }
8275
8276
0
  if (ValueType->isPointerTy()) {
8277
    // Have VoidPtrTy ArgValue but want to return an i32/i64.
8278
0
    ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8279
0
    return Builder.CreateCall(F, { Metadata, ArgValue });
8280
0
  }
8281
8282
0
  return Builder.CreateCall(F, { Metadata, ArgValue });
8283
0
}
8284
8285
/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8286
/// argument that specifies the vector type.
8287
0
static bool HasExtraNeonArgument(unsigned BuiltinID) {
8288
0
  switch (BuiltinID) {
8289
0
  default: break;
8290
0
  case NEON::BI__builtin_neon_vget_lane_i8:
8291
0
  case NEON::BI__builtin_neon_vget_lane_i16:
8292
0
  case NEON::BI__builtin_neon_vget_lane_bf16:
8293
0
  case NEON::BI__builtin_neon_vget_lane_i32:
8294
0
  case NEON::BI__builtin_neon_vget_lane_i64:
8295
0
  case NEON::BI__builtin_neon_vget_lane_f32:
8296
0
  case NEON::BI__builtin_neon_vgetq_lane_i8:
8297
0
  case NEON::BI__builtin_neon_vgetq_lane_i16:
8298
0
  case NEON::BI__builtin_neon_vgetq_lane_bf16:
8299
0
  case NEON::BI__builtin_neon_vgetq_lane_i32:
8300
0
  case NEON::BI__builtin_neon_vgetq_lane_i64:
8301
0
  case NEON::BI__builtin_neon_vgetq_lane_f32:
8302
0
  case NEON::BI__builtin_neon_vduph_lane_bf16:
8303
0
  case NEON::BI__builtin_neon_vduph_laneq_bf16:
8304
0
  case NEON::BI__builtin_neon_vset_lane_i8:
8305
0
  case NEON::BI__builtin_neon_vset_lane_i16:
8306
0
  case NEON::BI__builtin_neon_vset_lane_bf16:
8307
0
  case NEON::BI__builtin_neon_vset_lane_i32:
8308
0
  case NEON::BI__builtin_neon_vset_lane_i64:
8309
0
  case NEON::BI__builtin_neon_vset_lane_f32:
8310
0
  case NEON::BI__builtin_neon_vsetq_lane_i8:
8311
0
  case NEON::BI__builtin_neon_vsetq_lane_i16:
8312
0
  case NEON::BI__builtin_neon_vsetq_lane_bf16:
8313
0
  case NEON::BI__builtin_neon_vsetq_lane_i32:
8314
0
  case NEON::BI__builtin_neon_vsetq_lane_i64:
8315
0
  case NEON::BI__builtin_neon_vsetq_lane_f32:
8316
0
  case NEON::BI__builtin_neon_vsha1h_u32:
8317
0
  case NEON::BI__builtin_neon_vsha1cq_u32:
8318
0
  case NEON::BI__builtin_neon_vsha1pq_u32:
8319
0
  case NEON::BI__builtin_neon_vsha1mq_u32:
8320
0
  case NEON::BI__builtin_neon_vcvth_bf16_f32:
8321
0
  case clang::ARM::BI_MoveToCoprocessor:
8322
0
  case clang::ARM::BI_MoveToCoprocessor2:
8323
0
    return false;
8324
0
  }
8325
0
  return true;
8326
0
}
8327
8328
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8329
                                           const CallExpr *E,
8330
                                           ReturnValueSlot ReturnValue,
8331
0
                                           llvm::Triple::ArchType Arch) {
8332
0
  if (auto Hint = GetValueForARMHint(BuiltinID))
8333
0
    return Hint;
8334
8335
0
  if (BuiltinID == clang::ARM::BI__emit) {
8336
0
    bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8337
0
    llvm::FunctionType *FTy =
8338
0
        llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8339
8340
0
    Expr::EvalResult Result;
8341
0
    if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8342
0
      llvm_unreachable("Sema will ensure that the parameter is constant");
8343
8344
0
    llvm::APSInt Value = Result.Val.getInt();
8345
0
    uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8346
8347
0
    llvm::InlineAsm *Emit =
8348
0
        IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8349
0
                                 /*hasSideEffects=*/true)
8350
0
                : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8351
0
                                 /*hasSideEffects=*/true);
8352
8353
0
    return Builder.CreateCall(Emit);
8354
0
  }
8355
8356
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8357
0
    Value *Option = EmitScalarExpr(E->getArg(0));
8358
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8359
0
  }
8360
8361
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8362
0
    Value *Address = EmitScalarExpr(E->getArg(0));
8363
0
    Value *RW      = EmitScalarExpr(E->getArg(1));
8364
0
    Value *IsData  = EmitScalarExpr(E->getArg(2));
8365
8366
    // Locality is not supported on ARM target
8367
0
    Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8368
8369
0
    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8370
0
    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8371
0
  }
8372
8373
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8374
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8375
0
    return Builder.CreateCall(
8376
0
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8377
0
  }
8378
8379
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8380
0
      BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8381
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8382
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8383
0
    Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8384
0
    if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8385
0
      Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8386
0
    return Res;
8387
0
  }
8388
8389
8390
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8391
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8392
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8393
0
  }
8394
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8395
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8396
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8397
0
                              "cls");
8398
0
  }
8399
8400
0
  if (BuiltinID == clang::ARM::BI__clear_cache) {
8401
0
    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8402
0
    const FunctionDecl *FD = E->getDirectCallee();
8403
0
    Value *Ops[2];
8404
0
    for (unsigned i = 0; i < 2; i++)
8405
0
      Ops[i] = EmitScalarExpr(E->getArg(i));
8406
0
    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8407
0
    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8408
0
    StringRef Name = FD->getName();
8409
0
    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8410
0
  }
8411
8412
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8413
0
      BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8414
0
    Function *F;
8415
8416
0
    switch (BuiltinID) {
8417
0
    default: llvm_unreachable("unexpected builtin");
8418
0
    case clang::ARM::BI__builtin_arm_mcrr:
8419
0
      F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8420
0
      break;
8421
0
    case clang::ARM::BI__builtin_arm_mcrr2:
8422
0
      F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8423
0
      break;
8424
0
    }
8425
8426
    // MCRR{2} instruction has 5 operands but
8427
    // the intrinsic has 4 because Rt and Rt2
8428
    // are represented as a single unsigned 64
8429
    // bit integer in the intrinsic definition
8430
    // but internally it's represented as 2 32
8431
    // bit integers.
8432
8433
0
    Value *Coproc = EmitScalarExpr(E->getArg(0));
8434
0
    Value *Opc1 = EmitScalarExpr(E->getArg(1));
8435
0
    Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8436
0
    Value *CRm = EmitScalarExpr(E->getArg(3));
8437
8438
0
    Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8439
0
    Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8440
0
    Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8441
0
    Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8442
8443
0
    return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8444
0
  }
8445
8446
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8447
0
      BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8448
0
    Function *F;
8449
8450
0
    switch (BuiltinID) {
8451
0
    default: llvm_unreachable("unexpected builtin");
8452
0
    case clang::ARM::BI__builtin_arm_mrrc:
8453
0
      F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8454
0
      break;
8455
0
    case clang::ARM::BI__builtin_arm_mrrc2:
8456
0
      F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8457
0
      break;
8458
0
    }
8459
8460
0
    Value *Coproc = EmitScalarExpr(E->getArg(0));
8461
0
    Value *Opc1 = EmitScalarExpr(E->getArg(1));
8462
0
    Value *CRm  = EmitScalarExpr(E->getArg(2));
8463
0
    Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8464
8465
    // Returns an unsigned 64 bit integer, represented
8466
    // as two 32 bit integers.
8467
8468
0
    Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8469
0
    Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8470
0
    Rt = Builder.CreateZExt(Rt, Int64Ty);
8471
0
    Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8472
8473
0
    Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8474
0
    RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8475
0
    RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8476
8477
0
    return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8478
0
  }
8479
8480
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8481
0
      ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8482
0
        BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8483
0
       getContext().getTypeSize(E->getType()) == 64) ||
8484
0
      BuiltinID == clang::ARM::BI__ldrexd) {
8485
0
    Function *F;
8486
8487
0
    switch (BuiltinID) {
8488
0
    default: llvm_unreachable("unexpected builtin");
8489
0
    case clang::ARM::BI__builtin_arm_ldaex:
8490
0
      F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8491
0
      break;
8492
0
    case clang::ARM::BI__builtin_arm_ldrexd:
8493
0
    case clang::ARM::BI__builtin_arm_ldrex:
8494
0
    case clang::ARM::BI__ldrexd:
8495
0
      F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8496
0
      break;
8497
0
    }
8498
8499
0
    Value *LdPtr = EmitScalarExpr(E->getArg(0));
8500
0
    Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8501
8502
0
    Value *Val0 = Builder.CreateExtractValue(Val, 1);
8503
0
    Value *Val1 = Builder.CreateExtractValue(Val, 0);
8504
0
    Val0 = Builder.CreateZExt(Val0, Int64Ty);
8505
0
    Val1 = Builder.CreateZExt(Val1, Int64Ty);
8506
8507
0
    Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8508
0
    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8509
0
    Val = Builder.CreateOr(Val, Val1);
8510
0
    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8511
0
  }
8512
8513
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8514
0
      BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8515
0
    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8516
8517
0
    QualType Ty = E->getType();
8518
0
    llvm::Type *RealResTy = ConvertType(Ty);
8519
0
    llvm::Type *IntTy =
8520
0
        llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8521
8522
0
    Function *F = CGM.getIntrinsic(
8523
0
        BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8524
0
                                                       : Intrinsic::arm_ldrex,
8525
0
        UnqualPtrTy);
8526
0
    CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8527
0
    Val->addParamAttr(
8528
0
        0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8529
8530
0
    if (RealResTy->isPointerTy())
8531
0
      return Builder.CreateIntToPtr(Val, RealResTy);
8532
0
    else {
8533
0
      llvm::Type *IntResTy = llvm::IntegerType::get(
8534
0
          getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8535
0
      return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8536
0
                                   RealResTy);
8537
0
    }
8538
0
  }
8539
8540
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8541
0
      ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8542
0
        BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8543
0
       getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8544
0
    Function *F = CGM.getIntrinsic(
8545
0
        BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8546
0
                                                       : Intrinsic::arm_strexd);
8547
0
    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8548
8549
0
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8550
0
    Value *Val = EmitScalarExpr(E->getArg(0));
8551
0
    Builder.CreateStore(Val, Tmp);
8552
8553
0
    Address LdPtr = Tmp.withElementType(STy);
8554
0
    Val = Builder.CreateLoad(LdPtr);
8555
8556
0
    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8557
0
    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8558
0
    Value *StPtr = EmitScalarExpr(E->getArg(1));
8559
0
    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8560
0
  }
8561
8562
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8563
0
      BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8564
0
    Value *StoreVal = EmitScalarExpr(E->getArg(0));
8565
0
    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8566
8567
0
    QualType Ty = E->getArg(0)->getType();
8568
0
    llvm::Type *StoreTy =
8569
0
        llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8570
8571
0
    if (StoreVal->getType()->isPointerTy())
8572
0
      StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8573
0
    else {
8574
0
      llvm::Type *IntTy = llvm::IntegerType::get(
8575
0
          getLLVMContext(),
8576
0
          CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8577
0
      StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8578
0
      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8579
0
    }
8580
8581
0
    Function *F = CGM.getIntrinsic(
8582
0
        BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8583
0
                                                       : Intrinsic::arm_strex,
8584
0
        StoreAddr->getType());
8585
8586
0
    CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8587
0
    CI->addParamAttr(
8588
0
        1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8589
0
    return CI;
8590
0
  }
8591
8592
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8593
0
    Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8594
0
    return Builder.CreateCall(F);
8595
0
  }
8596
8597
  // CRC32
8598
0
  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8599
0
  switch (BuiltinID) {
8600
0
  case clang::ARM::BI__builtin_arm_crc32b:
8601
0
    CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8602
0
  case clang::ARM::BI__builtin_arm_crc32cb:
8603
0
    CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8604
0
  case clang::ARM::BI__builtin_arm_crc32h:
8605
0
    CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8606
0
  case clang::ARM::BI__builtin_arm_crc32ch:
8607
0
    CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8608
0
  case clang::ARM::BI__builtin_arm_crc32w:
8609
0
  case clang::ARM::BI__builtin_arm_crc32d:
8610
0
    CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8611
0
  case clang::ARM::BI__builtin_arm_crc32cw:
8612
0
  case clang::ARM::BI__builtin_arm_crc32cd:
8613
0
    CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8614
0
  }
8615
8616
0
  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8617
0
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
8618
0
    Value *Arg1 = EmitScalarExpr(E->getArg(1));
8619
8620
    // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8621
    // intrinsics, hence we need different codegen for these cases.
8622
0
    if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8623
0
        BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8624
0
      Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8625
0
      Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8626
0
      Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8627
0
      Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8628
8629
0
      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8630
0
      Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8631
0
      return Builder.CreateCall(F, {Res, Arg1b});
8632
0
    } else {
8633
0
      Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8634
8635
0
      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8636
0
      return Builder.CreateCall(F, {Arg0, Arg1});
8637
0
    }
8638
0
  }
8639
8640
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8641
0
      BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8642
0
      BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8643
0
      BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8644
0
      BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8645
0
      BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8646
8647
0
    SpecialRegisterAccessKind AccessKind = Write;
8648
0
    if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8649
0
        BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8650
0
        BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8651
0
      AccessKind = VolatileRead;
8652
8653
0
    bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8654
0
                            BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8655
8656
0
    bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8657
0
                   BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8658
8659
0
    llvm::Type *ValueType;
8660
0
    llvm::Type *RegisterType;
8661
0
    if (IsPointerBuiltin) {
8662
0
      ValueType = VoidPtrTy;
8663
0
      RegisterType = Int32Ty;
8664
0
    } else if (Is64Bit) {
8665
0
      ValueType = RegisterType = Int64Ty;
8666
0
    } else {
8667
0
      ValueType = RegisterType = Int32Ty;
8668
0
    }
8669
8670
0
    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8671
0
                                      AccessKind);
8672
0
  }
8673
8674
0
  if (BuiltinID == ARM::BI__builtin_sponentry) {
8675
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8676
0
    return Builder.CreateCall(F);
8677
0
  }
8678
8679
  // Handle MSVC intrinsics before argument evaluation to prevent double
8680
  // evaluation.
8681
0
  if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8682
0
    return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8683
8684
  // Deal with MVE builtins
8685
0
  if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8686
0
    return Result;
8687
  // Handle CDE builtins
8688
0
  if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8689
0
    return Result;
8690
8691
  // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8692
0
  auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8693
0
    return P.first == BuiltinID;
8694
0
  });
8695
0
  if (It != end(NEONEquivalentIntrinsicMap))
8696
0
    BuiltinID = It->second;
8697
8698
  // Find out if any arguments are required to be integer constant
8699
  // expressions.
8700
0
  unsigned ICEArguments = 0;
8701
0
  ASTContext::GetBuiltinTypeError Error;
8702
0
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8703
0
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
8704
8705
0
  auto getAlignmentValue32 = [&](Address addr) -> Value* {
8706
0
    return Builder.getInt32(addr.getAlignment().getQuantity());
8707
0
  };
8708
8709
0
  Address PtrOp0 = Address::invalid();
8710
0
  Address PtrOp1 = Address::invalid();
8711
0
  SmallVector<Value*, 4> Ops;
8712
0
  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8713
0
  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8714
0
  for (unsigned i = 0, e = NumArgs; i != e; i++) {
8715
0
    if (i == 0) {
8716
0
      switch (BuiltinID) {
8717
0
      case NEON::BI__builtin_neon_vld1_v:
8718
0
      case NEON::BI__builtin_neon_vld1q_v:
8719
0
      case NEON::BI__builtin_neon_vld1q_lane_v:
8720
0
      case NEON::BI__builtin_neon_vld1_lane_v:
8721
0
      case NEON::BI__builtin_neon_vld1_dup_v:
8722
0
      case NEON::BI__builtin_neon_vld1q_dup_v:
8723
0
      case NEON::BI__builtin_neon_vst1_v:
8724
0
      case NEON::BI__builtin_neon_vst1q_v:
8725
0
      case NEON::BI__builtin_neon_vst1q_lane_v:
8726
0
      case NEON::BI__builtin_neon_vst1_lane_v:
8727
0
      case NEON::BI__builtin_neon_vst2_v:
8728
0
      case NEON::BI__builtin_neon_vst2q_v:
8729
0
      case NEON::BI__builtin_neon_vst2_lane_v:
8730
0
      case NEON::BI__builtin_neon_vst2q_lane_v:
8731
0
      case NEON::BI__builtin_neon_vst3_v:
8732
0
      case NEON::BI__builtin_neon_vst3q_v:
8733
0
      case NEON::BI__builtin_neon_vst3_lane_v:
8734
0
      case NEON::BI__builtin_neon_vst3q_lane_v:
8735
0
      case NEON::BI__builtin_neon_vst4_v:
8736
0
      case NEON::BI__builtin_neon_vst4q_v:
8737
0
      case NEON::BI__builtin_neon_vst4_lane_v:
8738
0
      case NEON::BI__builtin_neon_vst4q_lane_v:
8739
        // Get the alignment for the argument in addition to the value;
8740
        // we'll use it later.
8741
0
        PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
8742
0
        Ops.push_back(PtrOp0.getPointer());
8743
0
        continue;
8744
0
      }
8745
0
    }
8746
0
    if (i == 1) {
8747
0
      switch (BuiltinID) {
8748
0
      case NEON::BI__builtin_neon_vld2_v:
8749
0
      case NEON::BI__builtin_neon_vld2q_v:
8750
0
      case NEON::BI__builtin_neon_vld3_v:
8751
0
      case NEON::BI__builtin_neon_vld3q_v:
8752
0
      case NEON::BI__builtin_neon_vld4_v:
8753
0
      case NEON::BI__builtin_neon_vld4q_v:
8754
0
      case NEON::BI__builtin_neon_vld2_lane_v:
8755
0
      case NEON::BI__builtin_neon_vld2q_lane_v:
8756
0
      case NEON::BI__builtin_neon_vld3_lane_v:
8757
0
      case NEON::BI__builtin_neon_vld3q_lane_v:
8758
0
      case NEON::BI__builtin_neon_vld4_lane_v:
8759
0
      case NEON::BI__builtin_neon_vld4q_lane_v:
8760
0
      case NEON::BI__builtin_neon_vld2_dup_v:
8761
0
      case NEON::BI__builtin_neon_vld2q_dup_v:
8762
0
      case NEON::BI__builtin_neon_vld3_dup_v:
8763
0
      case NEON::BI__builtin_neon_vld3q_dup_v:
8764
0
      case NEON::BI__builtin_neon_vld4_dup_v:
8765
0
      case NEON::BI__builtin_neon_vld4q_dup_v:
8766
        // Get the alignment for the argument in addition to the value;
8767
        // we'll use it later.
8768
0
        PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
8769
0
        Ops.push_back(PtrOp1.getPointer());
8770
0
        continue;
8771
0
      }
8772
0
    }
8773
8774
0
    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
8775
0
  }
8776
8777
0
  switch (BuiltinID) {
8778
0
  default: break;
8779
8780
0
  case NEON::BI__builtin_neon_vget_lane_i8:
8781
0
  case NEON::BI__builtin_neon_vget_lane_i16:
8782
0
  case NEON::BI__builtin_neon_vget_lane_i32:
8783
0
  case NEON::BI__builtin_neon_vget_lane_i64:
8784
0
  case NEON::BI__builtin_neon_vget_lane_bf16:
8785
0
  case NEON::BI__builtin_neon_vget_lane_f32:
8786
0
  case NEON::BI__builtin_neon_vgetq_lane_i8:
8787
0
  case NEON::BI__builtin_neon_vgetq_lane_i16:
8788
0
  case NEON::BI__builtin_neon_vgetq_lane_i32:
8789
0
  case NEON::BI__builtin_neon_vgetq_lane_i64:
8790
0
  case NEON::BI__builtin_neon_vgetq_lane_bf16:
8791
0
  case NEON::BI__builtin_neon_vgetq_lane_f32:
8792
0
  case NEON::BI__builtin_neon_vduph_lane_bf16:
8793
0
  case NEON::BI__builtin_neon_vduph_laneq_bf16:
8794
0
    return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
8795
8796
0
  case NEON::BI__builtin_neon_vrndns_f32: {
8797
0
    Value *Arg = EmitScalarExpr(E->getArg(0));
8798
0
    llvm::Type *Tys[] = {Arg->getType()};
8799
0
    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
8800
0
    return Builder.CreateCall(F, {Arg}, "vrndn"); }
8801
8802
0
  case NEON::BI__builtin_neon_vset_lane_i8:
8803
0
  case NEON::BI__builtin_neon_vset_lane_i16:
8804
0
  case NEON::BI__builtin_neon_vset_lane_i32:
8805
0
  case NEON::BI__builtin_neon_vset_lane_i64:
8806
0
  case NEON::BI__builtin_neon_vset_lane_bf16:
8807
0
  case NEON::BI__builtin_neon_vset_lane_f32:
8808
0
  case NEON::BI__builtin_neon_vsetq_lane_i8:
8809
0
  case NEON::BI__builtin_neon_vsetq_lane_i16:
8810
0
  case NEON::BI__builtin_neon_vsetq_lane_i32:
8811
0
  case NEON::BI__builtin_neon_vsetq_lane_i64:
8812
0
  case NEON::BI__builtin_neon_vsetq_lane_bf16:
8813
0
  case NEON::BI__builtin_neon_vsetq_lane_f32:
8814
0
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
8815
8816
0
  case NEON::BI__builtin_neon_vsha1h_u32:
8817
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
8818
0
                        "vsha1h");
8819
0
  case NEON::BI__builtin_neon_vsha1cq_u32:
8820
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
8821
0
                        "vsha1h");
8822
0
  case NEON::BI__builtin_neon_vsha1pq_u32:
8823
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
8824
0
                        "vsha1h");
8825
0
  case NEON::BI__builtin_neon_vsha1mq_u32:
8826
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
8827
0
                        "vsha1h");
8828
8829
0
  case NEON::BI__builtin_neon_vcvth_bf16_f32: {
8830
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
8831
0
                        "vcvtbfp2bf");
8832
0
  }
8833
8834
  // The ARM _MoveToCoprocessor builtins put the input register value as
8835
  // the first argument, but the LLVM intrinsic expects it as the third one.
8836
0
  case clang::ARM::BI_MoveToCoprocessor:
8837
0
  case clang::ARM::BI_MoveToCoprocessor2: {
8838
0
    Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
8839
0
                                       ? Intrinsic::arm_mcr
8840
0
                                       : Intrinsic::arm_mcr2);
8841
0
    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
8842
0
                                  Ops[3], Ops[4], Ops[5]});
8843
0
  }
8844
0
  }
8845
8846
  // Get the last argument, which specifies the vector type.
8847
0
  assert(HasExtraArg);
8848
0
  const Expr *Arg = E->getArg(E->getNumArgs()-1);
8849
0
  std::optional<llvm::APSInt> Result =
8850
0
      Arg->getIntegerConstantExpr(getContext());
8851
0
  if (!Result)
8852
0
    return nullptr;
8853
8854
0
  if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
8855
0
      BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
8856
    // Determine the overloaded type of this builtin.
8857
0
    llvm::Type *Ty;
8858
0
    if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
8859
0
      Ty = FloatTy;
8860
0
    else
8861
0
      Ty = DoubleTy;
8862
8863
    // Determine whether this is an unsigned conversion or not.
8864
0
    bool usgn = Result->getZExtValue() == 1;
8865
0
    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
8866
8867
    // Call the appropriate intrinsic.
8868
0
    Function *F = CGM.getIntrinsic(Int, Ty);
8869
0
    return Builder.CreateCall(F, Ops, "vcvtr");
8870
0
  }
8871
8872
  // Determine the type of this overloaded NEON intrinsic.
8873
0
  NeonTypeFlags Type = Result->getZExtValue();
8874
0
  bool usgn = Type.isUnsigned();
8875
0
  bool rightShift = false;
8876
8877
0
  llvm::FixedVectorType *VTy =
8878
0
      GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
8879
0
                  getTarget().hasBFloat16Type());
8880
0
  llvm::Type *Ty = VTy;
8881
0
  if (!Ty)
8882
0
    return nullptr;
8883
8884
  // Many NEON builtins have identical semantics and uses in ARM and
8885
  // AArch64. Emit these in a single function.
8886
0
  auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
8887
0
  const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
8888
0
      IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
8889
0
  if (Builtin)
8890
0
    return EmitCommonNeonBuiltinExpr(
8891
0
        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
8892
0
        Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
8893
8894
0
  unsigned Int;
8895
0
  switch (BuiltinID) {
8896
0
  default: return nullptr;
8897
0
  case NEON::BI__builtin_neon_vld1q_lane_v:
8898
    // Handle 64-bit integer elements as a special case.  Use shuffles of
8899
    // one-element vectors to avoid poor code for i64 in the backend.
8900
0
    if (VTy->getElementType()->isIntegerTy(64)) {
8901
      // Extract the other lane.
8902
0
      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8903
0
      int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
8904
0
      Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
8905
0
      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8906
      // Load the value as a one-element vector.
8907
0
      Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
8908
0
      llvm::Type *Tys[] = {Ty, Int8PtrTy};
8909
0
      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
8910
0
      Value *Align = getAlignmentValue32(PtrOp0);
8911
0
      Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
8912
      // Combine them.
8913
0
      int Indices[] = {1 - Lane, Lane};
8914
0
      return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
8915
0
    }
8916
0
    [[fallthrough]];
8917
0
  case NEON::BI__builtin_neon_vld1_lane_v: {
8918
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8919
0
    PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8920
0
    Value *Ld = Builder.CreateLoad(PtrOp0);
8921
0
    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
8922
0
  }
8923
0
  case NEON::BI__builtin_neon_vqrshrn_n_v:
8924
0
    Int =
8925
0
      usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
8926
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
8927
0
                        1, true);
8928
0
  case NEON::BI__builtin_neon_vqrshrun_n_v:
8929
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
8930
0
                        Ops, "vqrshrun_n", 1, true);
8931
0
  case NEON::BI__builtin_neon_vqshrn_n_v:
8932
0
    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
8933
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
8934
0
                        1, true);
8935
0
  case NEON::BI__builtin_neon_vqshrun_n_v:
8936
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
8937
0
                        Ops, "vqshrun_n", 1, true);
8938
0
  case NEON::BI__builtin_neon_vrecpe_v:
8939
0
  case NEON::BI__builtin_neon_vrecpeq_v:
8940
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
8941
0
                        Ops, "vrecpe");
8942
0
  case NEON::BI__builtin_neon_vrshrn_n_v:
8943
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
8944
0
                        Ops, "vrshrn_n", 1, true);
8945
0
  case NEON::BI__builtin_neon_vrsra_n_v:
8946
0
  case NEON::BI__builtin_neon_vrsraq_n_v:
8947
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8948
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8949
0
    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
8950
0
    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
8951
0
    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
8952
0
    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
8953
0
  case NEON::BI__builtin_neon_vsri_n_v:
8954
0
  case NEON::BI__builtin_neon_vsriq_n_v:
8955
0
    rightShift = true;
8956
0
    [[fallthrough]];
8957
0
  case NEON::BI__builtin_neon_vsli_n_v:
8958
0
  case NEON::BI__builtin_neon_vsliq_n_v:
8959
0
    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
8960
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
8961
0
                        Ops, "vsli_n");
8962
0
  case NEON::BI__builtin_neon_vsra_n_v:
8963
0
  case NEON::BI__builtin_neon_vsraq_n_v:
8964
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8965
0
    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
8966
0
    return Builder.CreateAdd(Ops[0], Ops[1]);
8967
0
  case NEON::BI__builtin_neon_vst1q_lane_v:
8968
    // Handle 64-bit integer elements as a special case.  Use a shuffle to get
8969
    // a one-element vector and avoid poor code for i64 in the backend.
8970
0
    if (VTy->getElementType()->isIntegerTy(64)) {
8971
0
      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8972
0
      Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
8973
0
      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
8974
0
      Ops[2] = getAlignmentValue32(PtrOp0);
8975
0
      llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
8976
0
      return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
8977
0
                                                 Tys), Ops);
8978
0
    }
8979
0
    [[fallthrough]];
8980
0
  case NEON::BI__builtin_neon_vst1_lane_v: {
8981
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8982
0
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
8983
0
    return Builder.CreateStore(Ops[1],
8984
0
                               PtrOp0.withElementType(Ops[1]->getType()));
8985
0
  }
8986
0
  case NEON::BI__builtin_neon_vtbl1_v:
8987
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
8988
0
                        Ops, "vtbl1");
8989
0
  case NEON::BI__builtin_neon_vtbl2_v:
8990
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
8991
0
                        Ops, "vtbl2");
8992
0
  case NEON::BI__builtin_neon_vtbl3_v:
8993
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
8994
0
                        Ops, "vtbl3");
8995
0
  case NEON::BI__builtin_neon_vtbl4_v:
8996
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
8997
0
                        Ops, "vtbl4");
8998
0
  case NEON::BI__builtin_neon_vtbx1_v:
8999
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9000
0
                        Ops, "vtbx1");
9001
0
  case NEON::BI__builtin_neon_vtbx2_v:
9002
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9003
0
                        Ops, "vtbx2");
9004
0
  case NEON::BI__builtin_neon_vtbx3_v:
9005
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9006
0
                        Ops, "vtbx3");
9007
0
  case NEON::BI__builtin_neon_vtbx4_v:
9008
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9009
0
                        Ops, "vtbx4");
9010
0
  }
9011
0
}
9012
9013
template<typename Integer>
9014
0
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
9015
0
  return E->getIntegerConstantExpr(Context)->getExtValue();
9016
0
}
9017
9018
static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9019
0
                                     llvm::Type *T, bool Unsigned) {
9020
  // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9021
  // which finds it convenient to specify signed/unsigned as a boolean flag.
9022
0
  return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9023
0
}
9024
9025
static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9026
0
                                    uint32_t Shift, bool Unsigned) {
9027
  // MVE helper function for integer shift right. This must handle signed vs
9028
  // unsigned, and also deal specially with the case where the shift count is
9029
  // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9030
  // undefined behavior, but in MVE it's legal, so we must convert it to code
9031
  // that is not undefined in IR.
9032
0
  unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9033
0
                          ->getElementType()
9034
0
                          ->getPrimitiveSizeInBits();
9035
0
  if (Shift == LaneBits) {
9036
    // An unsigned shift of the full lane size always generates zero, so we can
9037
    // simply emit a zero vector. A signed shift of the full lane size does the
9038
    // same thing as shifting by one bit fewer.
9039
0
    if (Unsigned)
9040
0
      return llvm::Constant::getNullValue(V->getType());
9041
0
    else
9042
0
      --Shift;
9043
0
  }
9044
0
  return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9045
0
}
9046
9047
0
static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9048
  // MVE-specific helper function for a vector splat, which infers the element
9049
  // count of the output vector by knowing that MVE vectors are all 128 bits
9050
  // wide.
9051
0
  unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9052
0
  return Builder.CreateVectorSplat(Elements, V);
9053
0
}
9054
9055
static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9056
                                            CodeGenFunction *CGF,
9057
                                            llvm::Value *V,
9058
0
                                            llvm::Type *DestType) {
9059
  // Convert one MVE vector type into another by reinterpreting its in-register
9060
  // format.
9061
  //
9062
  // Little-endian, this is identical to a bitcast (which reinterprets the
9063
  // memory format). But big-endian, they're not necessarily the same, because
9064
  // the register and memory formats map to each other differently depending on
9065
  // the lane size.
9066
  //
9067
  // We generate a bitcast whenever we can (if we're little-endian, or if the
9068
  // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9069
  // that performs the different kind of reinterpretation.
9070
0
  if (CGF->getTarget().isBigEndian() &&
9071
0
      V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9072
0
    return Builder.CreateCall(
9073
0
        CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9074
0
                              {DestType, V->getType()}),
9075
0
        V);
9076
0
  } else {
9077
0
    return Builder.CreateBitCast(V, DestType);
9078
0
  }
9079
0
}
9080
9081
0
static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9082
  // Make a shufflevector that extracts every other element of a vector (evens
9083
  // or odds, as desired).
9084
0
  SmallVector<int, 16> Indices;
9085
0
  unsigned InputElements =
9086
0
      cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9087
0
  for (unsigned i = 0; i < InputElements; i += 2)
9088
0
    Indices.push_back(i + Odd);
9089
0
  return Builder.CreateShuffleVector(V, Indices);
9090
0
}
9091
9092
static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9093
0
                              llvm::Value *V1) {
9094
  // Make a shufflevector that interleaves two vectors element by element.
9095
0
  assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9096
0
  SmallVector<int, 16> Indices;
9097
0
  unsigned InputElements =
9098
0
      cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9099
0
  for (unsigned i = 0; i < InputElements; i++) {
9100
0
    Indices.push_back(i);
9101
0
    Indices.push_back(i + InputElements);
9102
0
  }
9103
0
  return Builder.CreateShuffleVector(V0, V1, Indices);
9104
0
}
9105
9106
template<unsigned HighBit, unsigned OtherBits>
9107
0
static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9108
  // MVE-specific helper function to make a vector splat of a constant such as
9109
  // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9110
0
  llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9111
0
  unsigned LaneBits = T->getPrimitiveSizeInBits();
9112
0
  uint32_t Value = HighBit << (LaneBits - 1);
9113
0
  if (OtherBits)
9114
0
    Value |= (1UL << (LaneBits - 1)) - 1;
9115
0
  llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9116
0
  return ARMMVEVectorSplat(Builder, Lane);
9117
0
}
Unexecuted instantiation: CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<1u, 0u>(clang::CodeGen::CGBuilderTy&, llvm::Type*)
Unexecuted instantiation: CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<0u, 1u>(clang::CodeGen::CGBuilderTy&, llvm::Type*)
Unexecuted instantiation: CGBuiltin.cpp:llvm::Value* ARMMVEConstantSplat<1u, 1u>(clang::CodeGen::CGBuilderTy&, llvm::Type*)
9118
9119
static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9120
                                               llvm::Value *V,
9121
0
                                               unsigned ReverseWidth) {
9122
  // MVE-specific helper function which reverses the elements of a
9123
  // vector within every (ReverseWidth)-bit collection of lanes.
9124
0
  SmallVector<int, 16> Indices;
9125
0
  unsigned LaneSize = V->getType()->getScalarSizeInBits();
9126
0
  unsigned Elements = 128 / LaneSize;
9127
0
  unsigned Mask = ReverseWidth / LaneSize - 1;
9128
0
  for (unsigned i = 0; i < Elements; i++)
9129
0
    Indices.push_back(i ^ Mask);
9130
0
  return Builder.CreateShuffleVector(V, Indices);
9131
0
}
9132
9133
Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
9134
                                              const CallExpr *E,
9135
                                              ReturnValueSlot ReturnValue,
9136
0
                                              llvm::Triple::ArchType Arch) {
9137
0
  enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9138
0
  Intrinsic::ID IRIntr;
9139
0
  unsigned NumVectors;
9140
9141
  // Code autogenerated by Tablegen will handle all the simple builtins.
9142
0
  switch (BuiltinID) {
9143
0
    #include "clang/Basic/arm_mve_builtin_cg.inc"
9144
9145
    // If we didn't match an MVE builtin id at all, go back to the
9146
    // main EmitARMBuiltinExpr.
9147
0
  default:
9148
0
    return nullptr;
9149
0
  }
9150
9151
  // Anything that breaks from that switch is an MVE builtin that
9152
  // needs handwritten code to generate.
9153
9154
0
  switch (CustomCodeGenType) {
9155
9156
0
  case CustomCodeGen::VLD24: {
9157
0
    llvm::SmallVector<Value *, 4> Ops;
9158
0
    llvm::SmallVector<llvm::Type *, 4> Tys;
9159
9160
0
    auto MvecCType = E->getType();
9161
0
    auto MvecLType = ConvertType(MvecCType);
9162
0
    assert(MvecLType->isStructTy() &&
9163
0
           "Return type for vld[24]q should be a struct");
9164
0
    assert(MvecLType->getStructNumElements() == 1 &&
9165
0
           "Return-type struct for vld[24]q should have one element");
9166
0
    auto MvecLTypeInner = MvecLType->getStructElementType(0);
9167
0
    assert(MvecLTypeInner->isArrayTy() &&
9168
0
           "Return-type struct for vld[24]q should contain an array");
9169
0
    assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9170
0
           "Array member of return-type struct vld[24]q has wrong length");
9171
0
    auto VecLType = MvecLTypeInner->getArrayElementType();
9172
9173
0
    Tys.push_back(VecLType);
9174
9175
0
    auto Addr = E->getArg(0);
9176
0
    Ops.push_back(EmitScalarExpr(Addr));
9177
0
    Tys.push_back(ConvertType(Addr->getType()));
9178
9179
0
    Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9180
0
    Value *LoadResult = Builder.CreateCall(F, Ops);
9181
0
    Value *MvecOut = PoisonValue::get(MvecLType);
9182
0
    for (unsigned i = 0; i < NumVectors; ++i) {
9183
0
      Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9184
0
      MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9185
0
    }
9186
9187
0
    if (ReturnValue.isNull())
9188
0
      return MvecOut;
9189
0
    else
9190
0
      return Builder.CreateStore(MvecOut, ReturnValue.getValue());
9191
0
  }
9192
9193
0
  case CustomCodeGen::VST24: {
9194
0
    llvm::SmallVector<Value *, 4> Ops;
9195
0
    llvm::SmallVector<llvm::Type *, 4> Tys;
9196
9197
0
    auto Addr = E->getArg(0);
9198
0
    Ops.push_back(EmitScalarExpr(Addr));
9199
0
    Tys.push_back(ConvertType(Addr->getType()));
9200
9201
0
    auto MvecCType = E->getArg(1)->getType();
9202
0
    auto MvecLType = ConvertType(MvecCType);
9203
0
    assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9204
0
    assert(MvecLType->getStructNumElements() == 1 &&
9205
0
           "Data-type struct for vst2q should have one element");
9206
0
    auto MvecLTypeInner = MvecLType->getStructElementType(0);
9207
0
    assert(MvecLTypeInner->isArrayTy() &&
9208
0
           "Data-type struct for vst2q should contain an array");
9209
0
    assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9210
0
           "Array member of return-type struct vld[24]q has wrong length");
9211
0
    auto VecLType = MvecLTypeInner->getArrayElementType();
9212
9213
0
    Tys.push_back(VecLType);
9214
9215
0
    AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9216
0
    EmitAggExpr(E->getArg(1), MvecSlot);
9217
0
    auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9218
0
    for (unsigned i = 0; i < NumVectors; i++)
9219
0
      Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9220
9221
0
    Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9222
0
    Value *ToReturn = nullptr;
9223
0
    for (unsigned i = 0; i < NumVectors; i++) {
9224
0
      Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9225
0
      ToReturn = Builder.CreateCall(F, Ops);
9226
0
      Ops.pop_back();
9227
0
    }
9228
0
    return ToReturn;
9229
0
  }
9230
0
  }
9231
0
  llvm_unreachable("unknown custom codegen type.");
9232
0
}
9233
9234
Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
9235
                                              const CallExpr *E,
9236
                                              ReturnValueSlot ReturnValue,
9237
0
                                              llvm::Triple::ArchType Arch) {
9238
0
  switch (BuiltinID) {
9239
0
  default:
9240
0
    return nullptr;
9241
0
#include "clang/Basic/arm_cde_builtin_cg.inc"
9242
0
  }
9243
0
}
9244
9245
static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9246
                                      const CallExpr *E,
9247
                                      SmallVectorImpl<Value *> &Ops,
9248
0
                                      llvm::Triple::ArchType Arch) {
9249
0
  unsigned int Int = 0;
9250
0
  const char *s = nullptr;
9251
9252
0
  switch (BuiltinID) {
9253
0
  default:
9254
0
    return nullptr;
9255
0
  case NEON::BI__builtin_neon_vtbl1_v:
9256
0
  case NEON::BI__builtin_neon_vqtbl1_v:
9257
0
  case NEON::BI__builtin_neon_vqtbl1q_v:
9258
0
  case NEON::BI__builtin_neon_vtbl2_v:
9259
0
  case NEON::BI__builtin_neon_vqtbl2_v:
9260
0
  case NEON::BI__builtin_neon_vqtbl2q_v:
9261
0
  case NEON::BI__builtin_neon_vtbl3_v:
9262
0
  case NEON::BI__builtin_neon_vqtbl3_v:
9263
0
  case NEON::BI__builtin_neon_vqtbl3q_v:
9264
0
  case NEON::BI__builtin_neon_vtbl4_v:
9265
0
  case NEON::BI__builtin_neon_vqtbl4_v:
9266
0
  case NEON::BI__builtin_neon_vqtbl4q_v:
9267
0
    break;
9268
0
  case NEON::BI__builtin_neon_vtbx1_v:
9269
0
  case NEON::BI__builtin_neon_vqtbx1_v:
9270
0
  case NEON::BI__builtin_neon_vqtbx1q_v:
9271
0
  case NEON::BI__builtin_neon_vtbx2_v:
9272
0
  case NEON::BI__builtin_neon_vqtbx2_v:
9273
0
  case NEON::BI__builtin_neon_vqtbx2q_v:
9274
0
  case NEON::BI__builtin_neon_vtbx3_v:
9275
0
  case NEON::BI__builtin_neon_vqtbx3_v:
9276
0
  case NEON::BI__builtin_neon_vqtbx3q_v:
9277
0
  case NEON::BI__builtin_neon_vtbx4_v:
9278
0
  case NEON::BI__builtin_neon_vqtbx4_v:
9279
0
  case NEON::BI__builtin_neon_vqtbx4q_v:
9280
0
    break;
9281
0
  }
9282
9283
0
  assert(E->getNumArgs() >= 3);
9284
9285
  // Get the last argument, which specifies the vector type.
9286
0
  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9287
0
  std::optional<llvm::APSInt> Result =
9288
0
      Arg->getIntegerConstantExpr(CGF.getContext());
9289
0
  if (!Result)
9290
0
    return nullptr;
9291
9292
  // Determine the type of this overloaded NEON intrinsic.
9293
0
  NeonTypeFlags Type = Result->getZExtValue();
9294
0
  llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9295
0
  if (!Ty)
9296
0
    return nullptr;
9297
9298
0
  CodeGen::CGBuilderTy &Builder = CGF.Builder;
9299
9300
  // AArch64 scalar builtins are not overloaded, they do not have an extra
9301
  // argument that specifies the vector type, need to handle each case.
9302
0
  switch (BuiltinID) {
9303
0
  case NEON::BI__builtin_neon_vtbl1_v: {
9304
0
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9305
0
                              Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9306
0
  }
9307
0
  case NEON::BI__builtin_neon_vtbl2_v: {
9308
0
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9309
0
                              Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9310
0
  }
9311
0
  case NEON::BI__builtin_neon_vtbl3_v: {
9312
0
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9313
0
                              Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9314
0
  }
9315
0
  case NEON::BI__builtin_neon_vtbl4_v: {
9316
0
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9317
0
                              Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9318
0
  }
9319
0
  case NEON::BI__builtin_neon_vtbx1_v: {
9320
0
    Value *TblRes =
9321
0
        packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9322
0
                           Intrinsic::aarch64_neon_tbl1, "vtbl1");
9323
9324
0
    llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9325
0
    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9326
0
    CmpRes = Builder.CreateSExt(CmpRes, Ty);
9327
9328
0
    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9329
0
    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9330
0
    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9331
0
  }
9332
0
  case NEON::BI__builtin_neon_vtbx2_v: {
9333
0
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9334
0
                              Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9335
0
  }
9336
0
  case NEON::BI__builtin_neon_vtbx3_v: {
9337
0
    Value *TblRes =
9338
0
        packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9339
0
                           Intrinsic::aarch64_neon_tbl2, "vtbl2");
9340
9341
0
    llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9342
0
    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9343
0
                                           TwentyFourV);
9344
0
    CmpRes = Builder.CreateSExt(CmpRes, Ty);
9345
9346
0
    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9347
0
    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9348
0
    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9349
0
  }
9350
0
  case NEON::BI__builtin_neon_vtbx4_v: {
9351
0
    return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9352
0
                              Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9353
0
  }
9354
0
  case NEON::BI__builtin_neon_vqtbl1_v:
9355
0
  case NEON::BI__builtin_neon_vqtbl1q_v:
9356
0
    Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9357
0
  case NEON::BI__builtin_neon_vqtbl2_v:
9358
0
  case NEON::BI__builtin_neon_vqtbl2q_v: {
9359
0
    Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9360
0
  case NEON::BI__builtin_neon_vqtbl3_v:
9361
0
  case NEON::BI__builtin_neon_vqtbl3q_v:
9362
0
    Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9363
0
  case NEON::BI__builtin_neon_vqtbl4_v:
9364
0
  case NEON::BI__builtin_neon_vqtbl4q_v:
9365
0
    Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9366
0
  case NEON::BI__builtin_neon_vqtbx1_v:
9367
0
  case NEON::BI__builtin_neon_vqtbx1q_v:
9368
0
    Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9369
0
  case NEON::BI__builtin_neon_vqtbx2_v:
9370
0
  case NEON::BI__builtin_neon_vqtbx2q_v:
9371
0
    Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9372
0
  case NEON::BI__builtin_neon_vqtbx3_v:
9373
0
  case NEON::BI__builtin_neon_vqtbx3q_v:
9374
0
    Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9375
0
  case NEON::BI__builtin_neon_vqtbx4_v:
9376
0
  case NEON::BI__builtin_neon_vqtbx4q_v:
9377
0
    Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9378
0
  }
9379
0
  }
9380
9381
0
  if (!Int)
9382
0
    return nullptr;
9383
9384
0
  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9385
0
  return CGF.EmitNeonCall(F, Ops, s);
9386
0
}
9387
9388
0
Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
9389
0
  auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9390
0
  Op = Builder.CreateBitCast(Op, Int16Ty);
9391
0
  Value *V = PoisonValue::get(VTy);
9392
0
  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9393
0
  Op = Builder.CreateInsertElement(V, Op, CI);
9394
0
  return Op;
9395
0
}
9396
9397
/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9398
/// access builtin.  Only required if it can't be inferred from the base pointer
9399
/// operand.
9400
0
llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9401
0
  switch (TypeFlags.getMemEltType()) {
9402
0
  case SVETypeFlags::MemEltTyDefault:
9403
0
    return getEltType(TypeFlags);
9404
0
  case SVETypeFlags::MemEltTyInt8:
9405
0
    return Builder.getInt8Ty();
9406
0
  case SVETypeFlags::MemEltTyInt16:
9407
0
    return Builder.getInt16Ty();
9408
0
  case SVETypeFlags::MemEltTyInt32:
9409
0
    return Builder.getInt32Ty();
9410
0
  case SVETypeFlags::MemEltTyInt64:
9411
0
    return Builder.getInt64Ty();
9412
0
  }
9413
0
  llvm_unreachable("Unknown MemEltType");
9414
0
}
9415
9416
0
llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9417
0
  switch (TypeFlags.getEltType()) {
9418
0
  default:
9419
0
    llvm_unreachable("Invalid SVETypeFlag!");
9420
9421
0
  case SVETypeFlags::EltTyInt8:
9422
0
    return Builder.getInt8Ty();
9423
0
  case SVETypeFlags::EltTyInt16:
9424
0
    return Builder.getInt16Ty();
9425
0
  case SVETypeFlags::EltTyInt32:
9426
0
    return Builder.getInt32Ty();
9427
0
  case SVETypeFlags::EltTyInt64:
9428
0
    return Builder.getInt64Ty();
9429
0
  case SVETypeFlags::EltTyInt128:
9430
0
    return Builder.getInt128Ty();
9431
9432
0
  case SVETypeFlags::EltTyFloat16:
9433
0
    return Builder.getHalfTy();
9434
0
  case SVETypeFlags::EltTyFloat32:
9435
0
    return Builder.getFloatTy();
9436
0
  case SVETypeFlags::EltTyFloat64:
9437
0
    return Builder.getDoubleTy();
9438
9439
0
  case SVETypeFlags::EltTyBFloat16:
9440
0
    return Builder.getBFloatTy();
9441
9442
0
  case SVETypeFlags::EltTyBool8:
9443
0
  case SVETypeFlags::EltTyBool16:
9444
0
  case SVETypeFlags::EltTyBool32:
9445
0
  case SVETypeFlags::EltTyBool64:
9446
0
    return Builder.getInt1Ty();
9447
0
  }
9448
0
}
9449
9450
// Return the llvm predicate vector type corresponding to the specified element
9451
// TypeFlags.
9452
llvm::ScalableVectorType *
9453
0
CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
9454
0
  switch (TypeFlags.getEltType()) {
9455
0
  default: llvm_unreachable("Unhandled SVETypeFlag!");
9456
9457
0
  case SVETypeFlags::EltTyInt8:
9458
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9459
0
  case SVETypeFlags::EltTyInt16:
9460
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9461
0
  case SVETypeFlags::EltTyInt32:
9462
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9463
0
  case SVETypeFlags::EltTyInt64:
9464
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9465
9466
0
  case SVETypeFlags::EltTyBFloat16:
9467
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9468
0
  case SVETypeFlags::EltTyFloat16:
9469
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9470
0
  case SVETypeFlags::EltTyFloat32:
9471
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9472
0
  case SVETypeFlags::EltTyFloat64:
9473
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9474
9475
0
  case SVETypeFlags::EltTyBool8:
9476
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9477
0
  case SVETypeFlags::EltTyBool16:
9478
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9479
0
  case SVETypeFlags::EltTyBool32:
9480
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9481
0
  case SVETypeFlags::EltTyBool64:
9482
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9483
0
  }
9484
0
}
9485
9486
// Return the llvm vector type corresponding to the specified element TypeFlags.
9487
llvm::ScalableVectorType *
9488
0
CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9489
0
  switch (TypeFlags.getEltType()) {
9490
0
  default:
9491
0
    llvm_unreachable("Invalid SVETypeFlag!");
9492
9493
0
  case SVETypeFlags::EltTyInt8:
9494
0
    return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9495
0
  case SVETypeFlags::EltTyInt16:
9496
0
    return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9497
0
  case SVETypeFlags::EltTyInt32:
9498
0
    return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9499
0
  case SVETypeFlags::EltTyInt64:
9500
0
    return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9501
9502
0
  case SVETypeFlags::EltTyFloat16:
9503
0
    return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9504
0
  case SVETypeFlags::EltTyBFloat16:
9505
0
    return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9506
0
  case SVETypeFlags::EltTyFloat32:
9507
0
    return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9508
0
  case SVETypeFlags::EltTyFloat64:
9509
0
    return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9510
9511
0
  case SVETypeFlags::EltTyBool8:
9512
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9513
0
  case SVETypeFlags::EltTyBool16:
9514
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9515
0
  case SVETypeFlags::EltTyBool32:
9516
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9517
0
  case SVETypeFlags::EltTyBool64:
9518
0
    return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9519
0
  }
9520
0
}
9521
9522
llvm::Value *
9523
0
CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
9524
0
  Function *Ptrue =
9525
0
      CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9526
0
  return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9527
0
}
9528
9529
constexpr unsigned SVEBitsPerBlock = 128;
9530
9531
0
static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9532
0
  unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9533
0
  return llvm::ScalableVectorType::get(EltTy, NumElts);
9534
0
}
9535
9536
// Reinterpret the input predicate so that it can be used to correctly isolate
9537
// the elements of the specified datatype.
9538
Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
9539
0
                                             llvm::ScalableVectorType *VTy) {
9540
9541
0
  if (isa<TargetExtType>(Pred->getType()) &&
9542
0
      cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9543
0
    return Pred;
9544
9545
0
  auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9546
0
  if (Pred->getType() == RTy)
9547
0
    return Pred;
9548
9549
0
  unsigned IntID;
9550
0
  llvm::Type *IntrinsicTy;
9551
0
  switch (VTy->getMinNumElements()) {
9552
0
  default:
9553
0
    llvm_unreachable("unsupported element count!");
9554
0
  case 1:
9555
0
  case 2:
9556
0
  case 4:
9557
0
  case 8:
9558
0
    IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9559
0
    IntrinsicTy = RTy;
9560
0
    break;
9561
0
  case 16:
9562
0
    IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9563
0
    IntrinsicTy = Pred->getType();
9564
0
    break;
9565
0
  }
9566
9567
0
  Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9568
0
  Value *C = Builder.CreateCall(F, Pred);
9569
0
  assert(C->getType() == RTy && "Unexpected return type!");
9570
0
  return C;
9571
0
}
9572
9573
Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
9574
                                          SmallVectorImpl<Value *> &Ops,
9575
0
                                          unsigned IntID) {
9576
0
  auto *ResultTy = getSVEType(TypeFlags);
9577
0
  auto *OverloadedTy =
9578
0
      llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9579
9580
0
  Function *F = nullptr;
9581
0
  if (Ops[1]->getType()->isVectorTy())
9582
    // This is the "vector base, scalar offset" case. In order to uniquely
9583
    // map this built-in to an LLVM IR intrinsic, we need both the return type
9584
    // and the type of the vector base.
9585
0
    F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9586
0
  else
9587
    // This is the "scalar base, vector offset case". The type of the offset
9588
    // is encoded in the name of the intrinsic. We only need to specify the
9589
    // return type in order to uniquely map this built-in to an LLVM IR
9590
    // intrinsic.
9591
0
    F = CGM.getIntrinsic(IntID, OverloadedTy);
9592
9593
  // At the ACLE level there's only one predicate type, svbool_t, which is
9594
  // mapped to <n x 16 x i1>. However, this might be incompatible with the
9595
  // actual type being loaded. For example, when loading doubles (i64) the
9596
  // predicate should be <n x 2 x i1> instead. At the IR level the type of
9597
  // the predicate and the data being loaded must match. Cast to the type
9598
  // expected by the intrinsic. The intrinsic itself should be defined in
9599
  // a way than enforces relations between parameter types.
9600
0
  Ops[0] = EmitSVEPredicateCast(
9601
0
      Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9602
9603
  // Pass 0 when the offset is missing. This can only be applied when using
9604
  // the "vector base" addressing mode for which ACLE allows no offset. The
9605
  // corresponding LLVM IR always requires an offset.
9606
0
  if (Ops.size() == 2) {
9607
0
    assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9608
0
    Ops.push_back(ConstantInt::get(Int64Ty, 0));
9609
0
  }
9610
9611
  // For "vector base, scalar index" scale the index so that it becomes a
9612
  // scalar offset.
9613
0
  if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9614
0
    unsigned BytesPerElt =
9615
0
        OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9616
0
    Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9617
0
  }
9618
9619
0
  Value *Call = Builder.CreateCall(F, Ops);
9620
9621
  // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9622
  // other cases it's folded into a nop.
9623
0
  return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9624
0
                                  : Builder.CreateSExt(Call, ResultTy);
9625
0
}
9626
9627
Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
9628
                                            SmallVectorImpl<Value *> &Ops,
9629
0
                                            unsigned IntID) {
9630
0
  auto *SrcDataTy = getSVEType(TypeFlags);
9631
0
  auto *OverloadedTy =
9632
0
      llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9633
9634
  // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9635
  // it's the first argument. Move it accordingly.
9636
0
  Ops.insert(Ops.begin(), Ops.pop_back_val());
9637
9638
0
  Function *F = nullptr;
9639
0
  if (Ops[2]->getType()->isVectorTy())
9640
    // This is the "vector base, scalar offset" case. In order to uniquely
9641
    // map this built-in to an LLVM IR intrinsic, we need both the return type
9642
    // and the type of the vector base.
9643
0
    F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9644
0
  else
9645
    // This is the "scalar base, vector offset case". The type of the offset
9646
    // is encoded in the name of the intrinsic. We only need to specify the
9647
    // return type in order to uniquely map this built-in to an LLVM IR
9648
    // intrinsic.
9649
0
    F = CGM.getIntrinsic(IntID, OverloadedTy);
9650
9651
  // Pass 0 when the offset is missing. This can only be applied when using
9652
  // the "vector base" addressing mode for which ACLE allows no offset. The
9653
  // corresponding LLVM IR always requires an offset.
9654
0
  if (Ops.size() == 3) {
9655
0
    assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9656
0
    Ops.push_back(ConstantInt::get(Int64Ty, 0));
9657
0
  }
9658
9659
  // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9660
  // folded into a nop.
9661
0
  Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9662
9663
  // At the ACLE level there's only one predicate type, svbool_t, which is
9664
  // mapped to <n x 16 x i1>. However, this might be incompatible with the
9665
  // actual type being stored. For example, when storing doubles (i64) the
9666
  // predicated should be <n x 2 x i1> instead. At the IR level the type of
9667
  // the predicate and the data being stored must match. Cast to the type
9668
  // expected by the intrinsic. The intrinsic itself should be defined in
9669
  // a way that enforces relations between parameter types.
9670
0
  Ops[1] = EmitSVEPredicateCast(
9671
0
      Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9672
9673
  // For "vector base, scalar index" scale the index so that it becomes a
9674
  // scalar offset.
9675
0
  if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9676
0
    unsigned BytesPerElt =
9677
0
        OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9678
0
    Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9679
0
  }
9680
9681
0
  return Builder.CreateCall(F, Ops);
9682
0
}
9683
9684
Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
9685
                                              SmallVectorImpl<Value *> &Ops,
9686
0
                                              unsigned IntID) {
9687
  // The gather prefetches are overloaded on the vector input - this can either
9688
  // be the vector of base addresses or vector of offsets.
9689
0
  auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9690
0
  if (!OverloadedTy)
9691
0
    OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9692
9693
  // Cast the predicate from svbool_t to the right number of elements.
9694
0
  Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9695
9696
  // vector + imm addressing modes
9697
0
  if (Ops[1]->getType()->isVectorTy()) {
9698
0
    if (Ops.size() == 3) {
9699
      // Pass 0 for 'vector+imm' when the index is omitted.
9700
0
      Ops.push_back(ConstantInt::get(Int64Ty, 0));
9701
9702
      // The sv_prfop is the last operand in the builtin and IR intrinsic.
9703
0
      std::swap(Ops[2], Ops[3]);
9704
0
    } else {
9705
      // Index needs to be passed as scaled offset.
9706
0
      llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9707
0
      unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9708
0
      if (BytesPerElt > 1)
9709
0
        Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9710
0
    }
9711
0
  }
9712
9713
0
  Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9714
0
  return Builder.CreateCall(F, Ops);
9715
0
}
9716
9717
Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
9718
                                          SmallVectorImpl<Value*> &Ops,
9719
0
                                          unsigned IntID) {
9720
0
  llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9721
9722
0
  unsigned N;
9723
0
  switch (IntID) {
9724
0
  case Intrinsic::aarch64_sve_ld2_sret:
9725
0
  case Intrinsic::aarch64_sve_ld1_pn_x2:
9726
0
  case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9727
0
  case Intrinsic::aarch64_sve_ld2q_sret:
9728
0
    N = 2;
9729
0
    break;
9730
0
  case Intrinsic::aarch64_sve_ld3_sret:
9731
0
  case Intrinsic::aarch64_sve_ld3q_sret:
9732
0
    N = 3;
9733
0
    break;
9734
0
  case Intrinsic::aarch64_sve_ld4_sret:
9735
0
  case Intrinsic::aarch64_sve_ld1_pn_x4:
9736
0
  case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9737
0
  case Intrinsic::aarch64_sve_ld4q_sret:
9738
0
    N = 4;
9739
0
    break;
9740
0
  default:
9741
0
    llvm_unreachable("unknown intrinsic!");
9742
0
  }
9743
0
  auto RetTy = llvm::VectorType::get(VTy->getElementType(),
9744
0
                                     VTy->getElementCount() * N);
9745
9746
0
  Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9747
0
  Value *BasePtr = Ops[1];
9748
9749
  // Does the load have an offset?
9750
0
  if (Ops.size() > 2)
9751
0
    BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9752
9753
0
  Function *F = CGM.getIntrinsic(IntID, {VTy});
9754
0
  Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
9755
0
  unsigned MinElts = VTy->getMinNumElements();
9756
0
  Value *Ret = llvm::PoisonValue::get(RetTy);
9757
0
  for (unsigned I = 0; I < N; I++) {
9758
0
    Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9759
0
    Value *SRet = Builder.CreateExtractValue(Call, I);
9760
0
    Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
9761
0
  }
9762
0
  return Ret;
9763
0
}
9764
9765
Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
9766
                                           SmallVectorImpl<Value*> &Ops,
9767
0
                                           unsigned IntID) {
9768
0
  llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9769
9770
0
  unsigned N;
9771
0
  switch (IntID) {
9772
0
  case Intrinsic::aarch64_sve_st2:
9773
0
  case Intrinsic::aarch64_sve_st1_pn_x2:
9774
0
  case Intrinsic::aarch64_sve_stnt1_pn_x2:
9775
0
  case Intrinsic::aarch64_sve_st2q:
9776
0
    N = 2;
9777
0
    break;
9778
0
  case Intrinsic::aarch64_sve_st3:
9779
0
  case Intrinsic::aarch64_sve_st3q:
9780
0
    N = 3;
9781
0
    break;
9782
0
  case Intrinsic::aarch64_sve_st4:
9783
0
  case Intrinsic::aarch64_sve_st1_pn_x4:
9784
0
  case Intrinsic::aarch64_sve_stnt1_pn_x4:
9785
0
  case Intrinsic::aarch64_sve_st4q:
9786
0
    N = 4;
9787
0
    break;
9788
0
  default:
9789
0
    llvm_unreachable("unknown intrinsic!");
9790
0
  }
9791
9792
0
  Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9793
0
  Value *BasePtr = Ops[1];
9794
9795
  // Does the store have an offset?
9796
0
  if (Ops.size() > (2 + N))
9797
0
    BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9798
9799
  // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
9800
  // need to break up the tuple vector.
9801
0
  SmallVector<llvm::Value*, 5> Operands;
9802
0
  for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
9803
0
    Operands.push_back(Ops[I]);
9804
0
  Operands.append({Predicate, BasePtr});
9805
0
  Function *F = CGM.getIntrinsic(IntID, { VTy });
9806
9807
0
  return Builder.CreateCall(F, Operands);
9808
0
}
9809
9810
// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
9811
// svpmullt_pair intrinsics, with the exception that their results are bitcast
9812
// to a wider type.
9813
Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
9814
                                     SmallVectorImpl<Value *> &Ops,
9815
0
                                     unsigned BuiltinID) {
9816
  // Splat scalar operand to vector (intrinsics with _n infix)
9817
0
  if (TypeFlags.hasSplatOperand()) {
9818
0
    unsigned OpNo = TypeFlags.getSplatOperand();
9819
0
    Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
9820
0
  }
9821
9822
  // The pair-wise function has a narrower overloaded type.
9823
0
  Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
9824
0
  Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
9825
9826
  // Now bitcast to the wider result type.
9827
0
  llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
9828
0
  return EmitSVEReinterpret(Call, Ty);
9829
0
}
9830
9831
Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
9832
0
                                    ArrayRef<Value *> Ops, unsigned BuiltinID) {
9833
0
  llvm::Type *OverloadedTy = getSVEType(TypeFlags);
9834
0
  Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
9835
0
  return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
9836
0
}
9837
9838
Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
9839
                                            SmallVectorImpl<Value *> &Ops,
9840
0
                                            unsigned BuiltinID) {
9841
0
  auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9842
0
  auto *VectorTy = getSVEVectorForElementType(MemEltTy);
9843
0
  auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9844
9845
0
  Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9846
0
  Value *BasePtr = Ops[1];
9847
9848
  // Implement the index operand if not omitted.
9849
0
  if (Ops.size() > 3)
9850
0
    BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9851
9852
0
  Value *PrfOp = Ops.back();
9853
9854
0
  Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
9855
0
  return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
9856
0
}
9857
9858
Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
9859
                                          llvm::Type *ReturnTy,
9860
                                          SmallVectorImpl<Value *> &Ops,
9861
                                          unsigned IntrinsicID,
9862
0
                                          bool IsZExtReturn) {
9863
0
  QualType LangPTy = E->getArg(1)->getType();
9864
0
  llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9865
0
      LangPTy->castAs<PointerType>()->getPointeeType());
9866
9867
  // The vector type that is returned may be different from the
9868
  // eventual type loaded from memory.
9869
0
  auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
9870
0
  llvm::ScalableVectorType *MemoryTy = nullptr;
9871
0
  llvm::ScalableVectorType *PredTy = nullptr;
9872
0
  bool IsQuadLoad = false;
9873
0
  switch (IntrinsicID) {
9874
0
  case Intrinsic::aarch64_sve_ld1uwq:
9875
0
  case Intrinsic::aarch64_sve_ld1udq:
9876
0
    MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
9877
0
    PredTy = llvm::ScalableVectorType::get(
9878
0
        llvm::Type::getInt1Ty(getLLVMContext()), 1);
9879
0
    IsQuadLoad = true;
9880
0
    break;
9881
0
  default:
9882
0
    MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9883
0
    PredTy = MemoryTy;
9884
0
    break;
9885
0
  }
9886
9887
0
  Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
9888
0
  Value *BasePtr = Ops[1];
9889
9890
  // Does the load have an offset?
9891
0
  if (Ops.size() > 2)
9892
0
    BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9893
9894
0
  Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
9895
0
  auto *Load =
9896
0
      cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
9897
0
  auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9898
0
  CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
9899
9900
0
  if (IsQuadLoad)
9901
0
    return Load;
9902
9903
0
  return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
9904
0
                      : Builder.CreateSExt(Load, VectorTy);
9905
0
}
9906
9907
Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
9908
                                           SmallVectorImpl<Value *> &Ops,
9909
0
                                           unsigned IntrinsicID) {
9910
0
  QualType LangPTy = E->getArg(1)->getType();
9911
0
  llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9912
0
      LangPTy->castAs<PointerType>()->getPointeeType());
9913
9914
  // The vector type that is stored may be different from the
9915
  // eventual type stored to memory.
9916
0
  auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
9917
0
  auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9918
9919
0
  auto PredTy = MemoryTy;
9920
0
  auto AddrMemoryTy = MemoryTy;
9921
0
  bool IsQuadStore = false;
9922
9923
0
  switch (IntrinsicID) {
9924
0
  case Intrinsic::aarch64_sve_st1wq:
9925
0
  case Intrinsic::aarch64_sve_st1dq:
9926
0
    AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
9927
0
    PredTy =
9928
0
        llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
9929
0
    IsQuadStore = true;
9930
0
    break;
9931
0
  default:
9932
0
    break;
9933
0
  }
9934
0
  Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
9935
0
  Value *BasePtr = Ops[1];
9936
9937
  // Does the store have an offset?
9938
0
  if (Ops.size() == 4)
9939
0
    BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
9940
9941
  // Last value is always the data
9942
0
  Value *Val =
9943
0
      IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
9944
9945
0
  Function *F =
9946
0
      CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
9947
0
  auto *Store =
9948
0
      cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
9949
0
  auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
9950
0
  CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
9951
0
  return Store;
9952
0
}
9953
9954
Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
9955
                                      SmallVectorImpl<Value *> &Ops,
9956
0
                                      unsigned IntID) {
9957
0
  Ops[2] = EmitSVEPredicateCast(
9958
0
      Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
9959
9960
0
  SmallVector<Value *> NewOps;
9961
0
  NewOps.push_back(Ops[2]);
9962
9963
0
  llvm::Value *BasePtr = Ops[3];
9964
9965
  // If the intrinsic contains the vnum parameter, multiply it with the vector
9966
  // size in bytes.
9967
0
  if (Ops.size() == 5) {
9968
0
    Function *StreamingVectorLength =
9969
0
        CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
9970
0
    llvm::Value *StreamingVectorLengthCall =
9971
0
        Builder.CreateCall(StreamingVectorLength);
9972
0
    llvm::Value *Mulvl =
9973
0
        Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
9974
    // The type of the ptr parameter is void *, so use Int8Ty here.
9975
0
    BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
9976
0
  }
9977
0
  NewOps.push_back(BasePtr);
9978
0
  NewOps.push_back(Ops[0]);
9979
0
  NewOps.push_back(Ops[1]);
9980
0
  Function *F = CGM.getIntrinsic(IntID);
9981
0
  return Builder.CreateCall(F, NewOps);
9982
0
}
9983
9984
Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
9985
                                         SmallVectorImpl<Value *> &Ops,
9986
0
                                         unsigned IntID) {
9987
0
  auto *VecTy = getSVEType(TypeFlags);
9988
0
  Function *F = CGM.getIntrinsic(IntID, VecTy);
9989
0
  if (TypeFlags.isReadZA())
9990
0
    Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
9991
0
  else if (TypeFlags.isWriteZA())
9992
0
    Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
9993
0
  return Builder.CreateCall(F, Ops);
9994
0
}
9995
9996
Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
9997
                                    SmallVectorImpl<Value *> &Ops,
9998
0
                                    unsigned IntID) {
9999
  // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10000
0
  if (Ops.size() == 0)
10001
0
    Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10002
0
  Function *F = CGM.getIntrinsic(IntID, {});
10003
0
  return Builder.CreateCall(F, Ops);
10004
0
}
10005
10006
Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
10007
                                      SmallVectorImpl<Value *> &Ops,
10008
0
                                      unsigned IntID) {
10009
0
  if (Ops.size() == 2)
10010
0
    Ops.push_back(Builder.getInt32(0));
10011
0
  else
10012
0
    Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10013
0
  Function *F = CGM.getIntrinsic(IntID, {});
10014
0
  return Builder.CreateCall(F, Ops);
10015
0
}
10016
10017
// Limit the usage of scalable llvm IR generated by the ACLE by using the
10018
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10019
0
Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10020
0
  return Builder.CreateVectorSplat(
10021
0
      cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10022
0
}
10023
10024
0
Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
10025
0
  return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10026
0
}
10027
10028
0
Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10029
  // FIXME: For big endian this needs an additional REV, or needs a separate
10030
  // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10031
  // instruction is defined as 'bitwise' equivalent from memory point of
10032
  // view (when storing/reloading), whereas the svreinterpret builtin
10033
  // implements bitwise equivalent cast from register point of view.
10034
  // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10035
0
  return Builder.CreateBitCast(Val, Ty);
10036
0
}
10037
10038
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10039
0
                                      SmallVectorImpl<Value *> &Ops) {
10040
0
  auto *SplatZero = Constant::getNullValue(Ty);
10041
0
  Ops.insert(Ops.begin(), SplatZero);
10042
0
}
10043
10044
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10045
0
                                       SmallVectorImpl<Value *> &Ops) {
10046
0
  auto *SplatUndef = UndefValue::get(Ty);
10047
0
  Ops.insert(Ops.begin(), SplatUndef);
10048
0
}
10049
10050
SmallVector<llvm::Type *, 2>
10051
CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
10052
                                     llvm::Type *ResultType,
10053
0
                                     ArrayRef<Value *> Ops) {
10054
0
  if (TypeFlags.isOverloadNone())
10055
0
    return {};
10056
10057
0
  llvm::Type *DefaultType = getSVEType(TypeFlags);
10058
10059
0
  if (TypeFlags.isOverloadWhile())
10060
0
    return {DefaultType, Ops[1]->getType()};
10061
10062
0
  if (TypeFlags.isOverloadWhileRW())
10063
0
    return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10064
10065
0
  if (TypeFlags.isOverloadCvt())
10066
0
    return {Ops[0]->getType(), Ops.back()->getType()};
10067
10068
0
  if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10069
0
      ResultType->isVectorTy())
10070
0
    return {ResultType, Ops[1]->getType()};
10071
10072
0
  assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10073
0
  return {DefaultType};
10074
0
}
10075
10076
Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
10077
                                             llvm::Type *Ty,
10078
0
                                             ArrayRef<Value *> Ops) {
10079
0
  assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10080
0
         "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
10081
10082
0
  unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10083
0
  auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10084
0
                      TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10085
0
  Value *Idx = ConstantInt::get(CGM.Int64Ty,
10086
0
                                I * SingleVecTy->getMinNumElements());
10087
10088
0
  if (TypeFlags.isTupleSet())
10089
0
    return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10090
0
  return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10091
0
}
10092
10093
Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
10094
                                             llvm::Type *Ty,
10095
0
                                             ArrayRef<Value *> Ops) {
10096
0
  assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10097
10098
0
  auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10099
0
  unsigned MinElts = SrcTy->getMinNumElements();
10100
0
  Value *Call = llvm::PoisonValue::get(Ty);
10101
0
  for (unsigned I = 0; I < Ops.size(); I++) {
10102
0
    Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10103
0
    Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10104
0
  }
10105
10106
0
  return Call;
10107
0
}
10108
10109
0
Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
10110
  // Multi-vector results should be broken up into a single (wide) result
10111
  // vector.
10112
0
  auto *StructTy = dyn_cast<StructType>(Call->getType());
10113
0
  if (!StructTy)
10114
0
    return Call;
10115
10116
0
  auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10117
0
  if (!VTy)
10118
0
    return Call;
10119
0
  unsigned N = StructTy->getNumElements();
10120
10121
  // We may need to emit a cast to a svbool_t
10122
0
  bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10123
0
  unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10124
10125
0
  ScalableVectorType *WideVTy =
10126
0
      ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10127
0
  Value *Ret = llvm::PoisonValue::get(WideVTy);
10128
0
  for (unsigned I = 0; I < N; ++I) {
10129
0
    Value *SRet = Builder.CreateExtractValue(Call, I);
10130
0
    assert(SRet->getType() == VTy && "Unexpected type for result value");
10131
0
    Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10132
10133
0
    if (IsPredTy)
10134
0
      SRet = EmitSVEPredicateCast(
10135
0
          SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10136
10137
0
    Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10138
0
  }
10139
0
  Call = Ret;
10140
10141
0
  return Call;
10142
0
}
10143
10144
void CodeGenFunction::GetAArch64SVEProcessedOperands(
10145
    unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10146
0
    SVETypeFlags TypeFlags) {
10147
  // Find out if any arguments are required to be integer constant expressions.
10148
0
  unsigned ICEArguments = 0;
10149
0
  ASTContext::GetBuiltinTypeError Error;
10150
0
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10151
0
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
10152
10153
  // Tuple set/get only requires one insert/extract vector, which is
10154
  // created by EmitSVETupleSetOrGet.
10155
0
  bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10156
10157
0
  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10158
0
    bool IsICE = ICEArguments & (1 << i);
10159
0
    Value *Arg = EmitScalarExpr(E->getArg(i));
10160
10161
0
    if (IsICE) {
10162
      // If this is required to be a constant, constant fold it so that we know
10163
      // that the generated intrinsic gets a ConstantInt.
10164
0
      std::optional<llvm::APSInt> Result =
10165
0
          E->getArg(i)->getIntegerConstantExpr(getContext());
10166
0
      assert(Result && "Expected argument to be a constant");
10167
10168
      // Immediates for SVE llvm intrinsics are always 32bit.  We can safely
10169
      // truncate because the immediate has been range checked and no valid
10170
      // immediate requires more than a handful of bits.
10171
0
      *Result = Result->extOrTrunc(32);
10172
0
      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10173
0
      continue;
10174
0
    }
10175
10176
0
    if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10177
0
      Ops.push_back(Arg);
10178
0
      continue;
10179
0
    }
10180
10181
0
    auto *VTy = cast<ScalableVectorType>(Arg->getType());
10182
0
    unsigned MinElts = VTy->getMinNumElements();
10183
0
    bool IsPred = VTy->getElementType()->isIntegerTy(1);
10184
0
    unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10185
10186
0
    if (N == 1) {
10187
0
      Ops.push_back(Arg);
10188
0
      continue;
10189
0
    }
10190
10191
0
    for (unsigned I = 0; I < N; ++I) {
10192
0
      Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10193
0
      auto *NewVTy =
10194
0
          ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10195
0
      Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10196
0
    }
10197
0
  }
10198
0
}
10199
10200
Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
10201
0
                                                  const CallExpr *E) {
10202
0
  llvm::Type *Ty = ConvertType(E->getType());
10203
0
  if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10204
0
      BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10205
0
    Value *Val = EmitScalarExpr(E->getArg(0));
10206
0
    return EmitSVEReinterpret(Val, Ty);
10207
0
  }
10208
10209
0
  auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10210
0
                                              AArch64SVEIntrinsicsProvenSorted);
10211
10212
0
  llvm::SmallVector<Value *, 4> Ops;
10213
0
  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10214
0
  GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10215
10216
0
  if (TypeFlags.isLoad())
10217
0
    return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10218
0
                             TypeFlags.isZExtReturn());
10219
0
  else if (TypeFlags.isStore())
10220
0
    return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10221
0
  else if (TypeFlags.isGatherLoad())
10222
0
    return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10223
0
  else if (TypeFlags.isScatterStore())
10224
0
    return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10225
0
  else if (TypeFlags.isPrefetch())
10226
0
    return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10227
0
  else if (TypeFlags.isGatherPrefetch())
10228
0
    return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10229
0
  else if (TypeFlags.isStructLoad())
10230
0
    return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10231
0
  else if (TypeFlags.isStructStore())
10232
0
    return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10233
0
  else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10234
0
    return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10235
0
  else if (TypeFlags.isTupleCreate())
10236
0
    return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10237
0
  else if (TypeFlags.isUndef())
10238
0
    return UndefValue::get(Ty);
10239
0
  else if (Builtin->LLVMIntrinsic != 0) {
10240
0
    if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10241
0
      InsertExplicitZeroOperand(Builder, Ty, Ops);
10242
10243
0
    if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10244
0
      InsertExplicitUndefOperand(Builder, Ty, Ops);
10245
10246
    // Some ACLE builtins leave out the argument to specify the predicate
10247
    // pattern, which is expected to be expanded to an SV_ALL pattern.
10248
0
    if (TypeFlags.isAppendSVALL())
10249
0
      Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10250
0
    if (TypeFlags.isInsertOp1SVALL())
10251
0
      Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10252
10253
    // Predicates must match the main datatype.
10254
0
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10255
0
      if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10256
0
        if (PredTy->getElementType()->isIntegerTy(1))
10257
0
          Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10258
10259
    // Splat scalar operand to vector (intrinsics with _n infix)
10260
0
    if (TypeFlags.hasSplatOperand()) {
10261
0
      unsigned OpNo = TypeFlags.getSplatOperand();
10262
0
      Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10263
0
    }
10264
10265
0
    if (TypeFlags.isReverseCompare())
10266
0
      std::swap(Ops[1], Ops[2]);
10267
0
    else if (TypeFlags.isReverseUSDOT())
10268
0
      std::swap(Ops[1], Ops[2]);
10269
0
    else if (TypeFlags.isReverseMergeAnyBinOp() &&
10270
0
             TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10271
0
      std::swap(Ops[1], Ops[2]);
10272
0
    else if (TypeFlags.isReverseMergeAnyAccOp() &&
10273
0
             TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10274
0
      std::swap(Ops[1], Ops[3]);
10275
10276
    // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10277
0
    if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10278
0
      llvm::Type *OpndTy = Ops[1]->getType();
10279
0
      auto *SplatZero = Constant::getNullValue(OpndTy);
10280
0
      Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10281
0
    }
10282
10283
0
    Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10284
0
                                   getSVEOverloadTypes(TypeFlags, Ty, Ops));
10285
0
    Value *Call = Builder.CreateCall(F, Ops);
10286
10287
    // Predicate results must be converted to svbool_t.
10288
0
    if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10289
0
      if (PredTy->getScalarType()->isIntegerTy(1))
10290
0
        Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10291
10292
0
    return FormSVEBuiltinResult(Call);
10293
0
  }
10294
10295
0
  switch (BuiltinID) {
10296
0
  default:
10297
0
    return nullptr;
10298
10299
0
  case SVE::BI__builtin_sve_svreinterpret_b: {
10300
0
    auto SVCountTy =
10301
0
        llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10302
0
    Function *CastFromSVCountF =
10303
0
        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10304
0
    return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10305
0
  }
10306
0
  case SVE::BI__builtin_sve_svreinterpret_c: {
10307
0
    auto SVCountTy =
10308
0
        llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10309
0
    Function *CastToSVCountF =
10310
0
        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10311
0
    return Builder.CreateCall(CastToSVCountF, Ops[0]);
10312
0
  }
10313
10314
0
  case SVE::BI__builtin_sve_svpsel_lane_b8:
10315
0
  case SVE::BI__builtin_sve_svpsel_lane_b16:
10316
0
  case SVE::BI__builtin_sve_svpsel_lane_b32:
10317
0
  case SVE::BI__builtin_sve_svpsel_lane_b64:
10318
0
  case SVE::BI__builtin_sve_svpsel_lane_c8:
10319
0
  case SVE::BI__builtin_sve_svpsel_lane_c16:
10320
0
  case SVE::BI__builtin_sve_svpsel_lane_c32:
10321
0
  case SVE::BI__builtin_sve_svpsel_lane_c64: {
10322
0
    bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10323
0
    assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10324
0
                               "aarch64.svcount")) &&
10325
0
           "Unexpected TargetExtType");
10326
0
    auto SVCountTy =
10327
0
        llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10328
0
    Function *CastFromSVCountF =
10329
0
        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10330
0
    Function *CastToSVCountF =
10331
0
        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10332
10333
0
    auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10334
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10335
0
    llvm::Value *Ops0 =
10336
0
        IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10337
0
    llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10338
0
    llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10339
0
    return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10340
0
  }
10341
0
  case SVE::BI__builtin_sve_svmov_b_z: {
10342
    // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10343
0
    SVETypeFlags TypeFlags(Builtin->TypeModifier);
10344
0
    llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10345
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10346
0
    return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10347
0
  }
10348
10349
0
  case SVE::BI__builtin_sve_svnot_b_z: {
10350
    // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10351
0
    SVETypeFlags TypeFlags(Builtin->TypeModifier);
10352
0
    llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10353
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10354
0
    return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10355
0
  }
10356
10357
0
  case SVE::BI__builtin_sve_svmovlb_u16:
10358
0
  case SVE::BI__builtin_sve_svmovlb_u32:
10359
0
  case SVE::BI__builtin_sve_svmovlb_u64:
10360
0
    return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10361
10362
0
  case SVE::BI__builtin_sve_svmovlb_s16:
10363
0
  case SVE::BI__builtin_sve_svmovlb_s32:
10364
0
  case SVE::BI__builtin_sve_svmovlb_s64:
10365
0
    return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10366
10367
0
  case SVE::BI__builtin_sve_svmovlt_u16:
10368
0
  case SVE::BI__builtin_sve_svmovlt_u32:
10369
0
  case SVE::BI__builtin_sve_svmovlt_u64:
10370
0
    return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10371
10372
0
  case SVE::BI__builtin_sve_svmovlt_s16:
10373
0
  case SVE::BI__builtin_sve_svmovlt_s32:
10374
0
  case SVE::BI__builtin_sve_svmovlt_s64:
10375
0
    return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10376
10377
0
  case SVE::BI__builtin_sve_svpmullt_u16:
10378
0
  case SVE::BI__builtin_sve_svpmullt_u64:
10379
0
  case SVE::BI__builtin_sve_svpmullt_n_u16:
10380
0
  case SVE::BI__builtin_sve_svpmullt_n_u64:
10381
0
    return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10382
10383
0
  case SVE::BI__builtin_sve_svpmullb_u16:
10384
0
  case SVE::BI__builtin_sve_svpmullb_u64:
10385
0
  case SVE::BI__builtin_sve_svpmullb_n_u16:
10386
0
  case SVE::BI__builtin_sve_svpmullb_n_u64:
10387
0
    return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10388
10389
0
  case SVE::BI__builtin_sve_svdup_n_b8:
10390
0
  case SVE::BI__builtin_sve_svdup_n_b16:
10391
0
  case SVE::BI__builtin_sve_svdup_n_b32:
10392
0
  case SVE::BI__builtin_sve_svdup_n_b64: {
10393
0
    Value *CmpNE =
10394
0
        Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10395
0
    llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10396
0
    Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10397
0
    return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10398
0
  }
10399
10400
0
  case SVE::BI__builtin_sve_svdupq_n_b8:
10401
0
  case SVE::BI__builtin_sve_svdupq_n_b16:
10402
0
  case SVE::BI__builtin_sve_svdupq_n_b32:
10403
0
  case SVE::BI__builtin_sve_svdupq_n_b64:
10404
0
  case SVE::BI__builtin_sve_svdupq_n_u8:
10405
0
  case SVE::BI__builtin_sve_svdupq_n_s8:
10406
0
  case SVE::BI__builtin_sve_svdupq_n_u64:
10407
0
  case SVE::BI__builtin_sve_svdupq_n_f64:
10408
0
  case SVE::BI__builtin_sve_svdupq_n_s64:
10409
0
  case SVE::BI__builtin_sve_svdupq_n_u16:
10410
0
  case SVE::BI__builtin_sve_svdupq_n_f16:
10411
0
  case SVE::BI__builtin_sve_svdupq_n_bf16:
10412
0
  case SVE::BI__builtin_sve_svdupq_n_s16:
10413
0
  case SVE::BI__builtin_sve_svdupq_n_u32:
10414
0
  case SVE::BI__builtin_sve_svdupq_n_f32:
10415
0
  case SVE::BI__builtin_sve_svdupq_n_s32: {
10416
    // These builtins are implemented by storing each element to an array and using
10417
    // ld1rq to materialize a vector.
10418
0
    unsigned NumOpnds = Ops.size();
10419
10420
0
    bool IsBoolTy =
10421
0
        cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10422
10423
    // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10424
    // so that the compare can use the width that is natural for the expected
10425
    // number of predicate lanes.
10426
0
    llvm::Type *EltTy = Ops[0]->getType();
10427
0
    if (IsBoolTy)
10428
0
      EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10429
10430
0
    SmallVector<llvm::Value *, 16> VecOps;
10431
0
    for (unsigned I = 0; I < NumOpnds; ++I)
10432
0
        VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10433
0
    Value *Vec = BuildVector(VecOps);
10434
10435
0
    llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10436
0
    Value *InsertSubVec = Builder.CreateInsertVector(
10437
0
        OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10438
10439
0
    Function *F =
10440
0
        CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10441
0
    Value *DupQLane =
10442
0
        Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10443
10444
0
    if (!IsBoolTy)
10445
0
      return DupQLane;
10446
10447
0
    SVETypeFlags TypeFlags(Builtin->TypeModifier);
10448
0
    Value *Pred = EmitSVEAllTruePred(TypeFlags);
10449
10450
    // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10451
0
    F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10452
0
                                       : Intrinsic::aarch64_sve_cmpne_wide,
10453
0
                         OverloadedTy);
10454
0
    Value *Call = Builder.CreateCall(
10455
0
        F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10456
0
    return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10457
0
  }
10458
10459
0
  case SVE::BI__builtin_sve_svpfalse_b:
10460
0
    return ConstantInt::getFalse(Ty);
10461
10462
0
  case SVE::BI__builtin_sve_svpfalse_c: {
10463
0
    auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10464
0
    Function *CastToSVCountF =
10465
0
        CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10466
0
    return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10467
0
  }
10468
10469
0
  case SVE::BI__builtin_sve_svlen_bf16:
10470
0
  case SVE::BI__builtin_sve_svlen_f16:
10471
0
  case SVE::BI__builtin_sve_svlen_f32:
10472
0
  case SVE::BI__builtin_sve_svlen_f64:
10473
0
  case SVE::BI__builtin_sve_svlen_s8:
10474
0
  case SVE::BI__builtin_sve_svlen_s16:
10475
0
  case SVE::BI__builtin_sve_svlen_s32:
10476
0
  case SVE::BI__builtin_sve_svlen_s64:
10477
0
  case SVE::BI__builtin_sve_svlen_u8:
10478
0
  case SVE::BI__builtin_sve_svlen_u16:
10479
0
  case SVE::BI__builtin_sve_svlen_u32:
10480
0
  case SVE::BI__builtin_sve_svlen_u64: {
10481
0
    SVETypeFlags TF(Builtin->TypeModifier);
10482
0
    auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10483
0
    auto *NumEls =
10484
0
        llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10485
10486
0
    Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10487
0
    return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10488
0
  }
10489
10490
0
  case SVE::BI__builtin_sve_svtbl2_u8:
10491
0
  case SVE::BI__builtin_sve_svtbl2_s8:
10492
0
  case SVE::BI__builtin_sve_svtbl2_u16:
10493
0
  case SVE::BI__builtin_sve_svtbl2_s16:
10494
0
  case SVE::BI__builtin_sve_svtbl2_u32:
10495
0
  case SVE::BI__builtin_sve_svtbl2_s32:
10496
0
  case SVE::BI__builtin_sve_svtbl2_u64:
10497
0
  case SVE::BI__builtin_sve_svtbl2_s64:
10498
0
  case SVE::BI__builtin_sve_svtbl2_f16:
10499
0
  case SVE::BI__builtin_sve_svtbl2_bf16:
10500
0
  case SVE::BI__builtin_sve_svtbl2_f32:
10501
0
  case SVE::BI__builtin_sve_svtbl2_f64: {
10502
0
    SVETypeFlags TF(Builtin->TypeModifier);
10503
0
    auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10504
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10505
0
    return Builder.CreateCall(F, Ops);
10506
0
  }
10507
10508
0
  case SVE::BI__builtin_sve_svset_neonq_s8:
10509
0
  case SVE::BI__builtin_sve_svset_neonq_s16:
10510
0
  case SVE::BI__builtin_sve_svset_neonq_s32:
10511
0
  case SVE::BI__builtin_sve_svset_neonq_s64:
10512
0
  case SVE::BI__builtin_sve_svset_neonq_u8:
10513
0
  case SVE::BI__builtin_sve_svset_neonq_u16:
10514
0
  case SVE::BI__builtin_sve_svset_neonq_u32:
10515
0
  case SVE::BI__builtin_sve_svset_neonq_u64:
10516
0
  case SVE::BI__builtin_sve_svset_neonq_f16:
10517
0
  case SVE::BI__builtin_sve_svset_neonq_f32:
10518
0
  case SVE::BI__builtin_sve_svset_neonq_f64:
10519
0
  case SVE::BI__builtin_sve_svset_neonq_bf16: {
10520
0
    return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10521
0
  }
10522
10523
0
  case SVE::BI__builtin_sve_svget_neonq_s8:
10524
0
  case SVE::BI__builtin_sve_svget_neonq_s16:
10525
0
  case SVE::BI__builtin_sve_svget_neonq_s32:
10526
0
  case SVE::BI__builtin_sve_svget_neonq_s64:
10527
0
  case SVE::BI__builtin_sve_svget_neonq_u8:
10528
0
  case SVE::BI__builtin_sve_svget_neonq_u16:
10529
0
  case SVE::BI__builtin_sve_svget_neonq_u32:
10530
0
  case SVE::BI__builtin_sve_svget_neonq_u64:
10531
0
  case SVE::BI__builtin_sve_svget_neonq_f16:
10532
0
  case SVE::BI__builtin_sve_svget_neonq_f32:
10533
0
  case SVE::BI__builtin_sve_svget_neonq_f64:
10534
0
  case SVE::BI__builtin_sve_svget_neonq_bf16: {
10535
0
    return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10536
0
  }
10537
10538
0
  case SVE::BI__builtin_sve_svdup_neonq_s8:
10539
0
  case SVE::BI__builtin_sve_svdup_neonq_s16:
10540
0
  case SVE::BI__builtin_sve_svdup_neonq_s32:
10541
0
  case SVE::BI__builtin_sve_svdup_neonq_s64:
10542
0
  case SVE::BI__builtin_sve_svdup_neonq_u8:
10543
0
  case SVE::BI__builtin_sve_svdup_neonq_u16:
10544
0
  case SVE::BI__builtin_sve_svdup_neonq_u32:
10545
0
  case SVE::BI__builtin_sve_svdup_neonq_u64:
10546
0
  case SVE::BI__builtin_sve_svdup_neonq_f16:
10547
0
  case SVE::BI__builtin_sve_svdup_neonq_f32:
10548
0
  case SVE::BI__builtin_sve_svdup_neonq_f64:
10549
0
  case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10550
0
    Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10551
0
                                               Builder.getInt64(0));
10552
0
    return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10553
0
                                   {Insert, Builder.getInt64(0)});
10554
0
  }
10555
0
  }
10556
10557
  /// Should not happen
10558
0
  return nullptr;
10559
0
}
10560
10561
static void swapCommutativeSMEOperands(unsigned BuiltinID,
10562
0
                                       SmallVectorImpl<Value *> &Ops) {
10563
0
  unsigned MultiVec;
10564
0
  switch (BuiltinID) {
10565
0
  default:
10566
0
    return;
10567
0
  case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10568
0
    MultiVec = 1;
10569
0
    break;
10570
0
  case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10571
0
  case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10572
0
    MultiVec = 2;
10573
0
    break;
10574
0
  case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10575
0
  case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10576
0
    MultiVec = 4;
10577
0
    break;
10578
0
  }
10579
10580
0
  if (MultiVec > 0)
10581
0
    for (unsigned I = 0; I < MultiVec; ++I)
10582
0
      std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10583
0
}
10584
10585
Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
10586
0
                                                  const CallExpr *E) {
10587
0
  auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10588
0
                                              AArch64SMEIntrinsicsProvenSorted);
10589
10590
0
  llvm::SmallVector<Value *, 4> Ops;
10591
0
  SVETypeFlags TypeFlags(Builtin->TypeModifier);
10592
0
  GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10593
10594
0
  if (TypeFlags.isLoad() || TypeFlags.isStore())
10595
0
    return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10596
0
  else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10597
0
    return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10598
0
  else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10599
0
           BuiltinID == SME::BI__builtin_sme_svzero_za)
10600
0
    return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10601
0
  else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10602
0
           BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10603
0
           BuiltinID == SME::BI__builtin_sme_svldr_za ||
10604
0
           BuiltinID == SME::BI__builtin_sme_svstr_za)
10605
0
    return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10606
10607
  // Handle builtins which require their multi-vector operands to be swapped
10608
0
  swapCommutativeSMEOperands(BuiltinID, Ops);
10609
10610
  // Should not happen!
10611
0
  if (Builtin->LLVMIntrinsic == 0)
10612
0
    return nullptr;
10613
10614
  // Predicates must match the main datatype.
10615
0
  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10616
0
    if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10617
0
      if (PredTy->getElementType()->isIntegerTy(1))
10618
0
        Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10619
10620
0
  Function *F =
10621
0
      TypeFlags.isOverloadNone()
10622
0
          ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10623
0
          : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10624
0
  Value *Call = Builder.CreateCall(F, Ops);
10625
10626
0
  return FormSVEBuiltinResult(Call);
10627
0
}
10628
10629
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
10630
                                               const CallExpr *E,
10631
0
                                               llvm::Triple::ArchType Arch) {
10632
0
  if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10633
0
      BuiltinID <= clang::AArch64::LastSVEBuiltin)
10634
0
    return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10635
10636
0
  if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10637
0
      BuiltinID <= clang::AArch64::LastSMEBuiltin)
10638
0
    return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10639
10640
0
  unsigned HintID = static_cast<unsigned>(-1);
10641
0
  switch (BuiltinID) {
10642
0
  default: break;
10643
0
  case clang::AArch64::BI__builtin_arm_nop:
10644
0
    HintID = 0;
10645
0
    break;
10646
0
  case clang::AArch64::BI__builtin_arm_yield:
10647
0
  case clang::AArch64::BI__yield:
10648
0
    HintID = 1;
10649
0
    break;
10650
0
  case clang::AArch64::BI__builtin_arm_wfe:
10651
0
  case clang::AArch64::BI__wfe:
10652
0
    HintID = 2;
10653
0
    break;
10654
0
  case clang::AArch64::BI__builtin_arm_wfi:
10655
0
  case clang::AArch64::BI__wfi:
10656
0
    HintID = 3;
10657
0
    break;
10658
0
  case clang::AArch64::BI__builtin_arm_sev:
10659
0
  case clang::AArch64::BI__sev:
10660
0
    HintID = 4;
10661
0
    break;
10662
0
  case clang::AArch64::BI__builtin_arm_sevl:
10663
0
  case clang::AArch64::BI__sevl:
10664
0
    HintID = 5;
10665
0
    break;
10666
0
  }
10667
10668
0
  if (HintID != static_cast<unsigned>(-1)) {
10669
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10670
0
    return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10671
0
  }
10672
10673
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10674
    // Create call to __arm_sme_state and store the results to the two pointers.
10675
0
    CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
10676
0
        llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10677
0
                                false),
10678
0
        "__arm_sme_state"));
10679
0
    auto Attrs =
10680
0
        AttributeList()
10681
0
            .addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible")
10682
0
            .addFnAttribute(getLLVMContext(), "aarch64_pstate_za_preserved");
10683
0
    CI->setAttributes(Attrs);
10684
0
    CI->setCallingConv(
10685
0
        llvm::CallingConv::
10686
0
            AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10687
0
    Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10688
0
                        EmitPointerWithAlignment(E->getArg(0)));
10689
0
    return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10690
0
                               EmitPointerWithAlignment(E->getArg(1)));
10691
0
  }
10692
10693
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10694
0
    assert((getContext().getTypeSize(E->getType()) == 32) &&
10695
0
           "rbit of unusual size!");
10696
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10697
0
    return Builder.CreateCall(
10698
0
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10699
0
  }
10700
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10701
0
    assert((getContext().getTypeSize(E->getType()) == 64) &&
10702
0
           "rbit of unusual size!");
10703
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10704
0
    return Builder.CreateCall(
10705
0
        CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10706
0
  }
10707
10708
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10709
0
      BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10710
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10711
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10712
0
    Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10713
0
    if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10714
0
      Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10715
0
    return Res;
10716
0
  }
10717
10718
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10719
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10720
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10721
0
                              "cls");
10722
0
  }
10723
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10724
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10725
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10726
0
                              "cls");
10727
0
  }
10728
10729
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
10730
0
      BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
10731
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10732
0
    llvm::Type *Ty = Arg->getType();
10733
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
10734
0
                              Arg, "frint32z");
10735
0
  }
10736
10737
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
10738
0
      BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
10739
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10740
0
    llvm::Type *Ty = Arg->getType();
10741
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
10742
0
                              Arg, "frint64z");
10743
0
  }
10744
10745
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
10746
0
      BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
10747
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10748
0
    llvm::Type *Ty = Arg->getType();
10749
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
10750
0
                              Arg, "frint32x");
10751
0
  }
10752
10753
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
10754
0
      BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
10755
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10756
0
    llvm::Type *Ty = Arg->getType();
10757
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
10758
0
                              Arg, "frint64x");
10759
0
  }
10760
10761
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
10762
0
    assert((getContext().getTypeSize(E->getType()) == 32) &&
10763
0
           "__jcvt of unusual size!");
10764
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10765
0
    return Builder.CreateCall(
10766
0
        CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
10767
0
  }
10768
10769
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
10770
0
      BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
10771
0
      BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
10772
0
      BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
10773
0
    llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
10774
0
    llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
10775
10776
0
    if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
10777
      // Load from the address via an LLVM intrinsic, receiving a
10778
      // tuple of 8 i64 words, and store each one to ValPtr.
10779
0
      Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
10780
0
      llvm::Value *Val = Builder.CreateCall(F, MemAddr);
10781
0
      llvm::Value *ToRet;
10782
0
      for (size_t i = 0; i < 8; i++) {
10783
0
        llvm::Value *ValOffsetPtr =
10784
0
            Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10785
0
        Address Addr =
10786
0
            Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10787
0
        ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
10788
0
      }
10789
0
      return ToRet;
10790
0
    } else {
10791
      // Load 8 i64 words from ValPtr, and store them to the address
10792
      // via an LLVM intrinsic.
10793
0
      SmallVector<llvm::Value *, 9> Args;
10794
0
      Args.push_back(MemAddr);
10795
0
      for (size_t i = 0; i < 8; i++) {
10796
0
        llvm::Value *ValOffsetPtr =
10797
0
            Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10798
0
        Address Addr =
10799
0
            Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10800
0
        Args.push_back(Builder.CreateLoad(Addr));
10801
0
      }
10802
10803
0
      auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
10804
0
                       ? Intrinsic::aarch64_st64b
10805
0
                   : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
10806
0
                       ? Intrinsic::aarch64_st64bv
10807
0
                       : Intrinsic::aarch64_st64bv0);
10808
0
      Function *F = CGM.getIntrinsic(Intr);
10809
0
      return Builder.CreateCall(F, Args);
10810
0
    }
10811
0
  }
10812
10813
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
10814
0
      BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
10815
10816
0
    auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
10817
0
                     ? Intrinsic::aarch64_rndr
10818
0
                     : Intrinsic::aarch64_rndrrs);
10819
0
    Function *F = CGM.getIntrinsic(Intr);
10820
0
    llvm::Value *Val = Builder.CreateCall(F);
10821
0
    Value *RandomValue = Builder.CreateExtractValue(Val, 0);
10822
0
    Value *Status = Builder.CreateExtractValue(Val, 1);
10823
10824
0
    Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
10825
0
    Builder.CreateStore(RandomValue, MemAddress);
10826
0
    Status = Builder.CreateZExt(Status, Int32Ty);
10827
0
    return Status;
10828
0
  }
10829
10830
0
  if (BuiltinID == clang::AArch64::BI__clear_cache) {
10831
0
    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
10832
0
    const FunctionDecl *FD = E->getDirectCallee();
10833
0
    Value *Ops[2];
10834
0
    for (unsigned i = 0; i < 2; i++)
10835
0
      Ops[i] = EmitScalarExpr(E->getArg(i));
10836
0
    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
10837
0
    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
10838
0
    StringRef Name = FD->getName();
10839
0
    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
10840
0
  }
10841
10842
0
  if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10843
0
       BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
10844
0
      getContext().getTypeSize(E->getType()) == 128) {
10845
0
    Function *F =
10846
0
        CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10847
0
                             ? Intrinsic::aarch64_ldaxp
10848
0
                             : Intrinsic::aarch64_ldxp);
10849
10850
0
    Value *LdPtr = EmitScalarExpr(E->getArg(0));
10851
0
    Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
10852
10853
0
    Value *Val0 = Builder.CreateExtractValue(Val, 1);
10854
0
    Value *Val1 = Builder.CreateExtractValue(Val, 0);
10855
0
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
10856
0
    Val0 = Builder.CreateZExt(Val0, Int128Ty);
10857
0
    Val1 = Builder.CreateZExt(Val1, Int128Ty);
10858
10859
0
    Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
10860
0
    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
10861
0
    Val = Builder.CreateOr(Val, Val1);
10862
0
    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
10863
0
  } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10864
0
             BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
10865
0
    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
10866
10867
0
    QualType Ty = E->getType();
10868
0
    llvm::Type *RealResTy = ConvertType(Ty);
10869
0
    llvm::Type *IntTy =
10870
0
        llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
10871
10872
0
    Function *F =
10873
0
        CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10874
0
                             ? Intrinsic::aarch64_ldaxr
10875
0
                             : Intrinsic::aarch64_ldxr,
10876
0
                         UnqualPtrTy);
10877
0
    CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
10878
0
    Val->addParamAttr(
10879
0
        0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
10880
10881
0
    if (RealResTy->isPointerTy())
10882
0
      return Builder.CreateIntToPtr(Val, RealResTy);
10883
10884
0
    llvm::Type *IntResTy = llvm::IntegerType::get(
10885
0
        getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
10886
0
    return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
10887
0
                                 RealResTy);
10888
0
  }
10889
10890
0
  if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
10891
0
       BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
10892
0
      getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
10893
0
    Function *F =
10894
0
        CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
10895
0
                             ? Intrinsic::aarch64_stlxp
10896
0
                             : Intrinsic::aarch64_stxp);
10897
0
    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
10898
10899
0
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
10900
0
    EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
10901
10902
0
    Tmp = Tmp.withElementType(STy);
10903
0
    llvm::Value *Val = Builder.CreateLoad(Tmp);
10904
10905
0
    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
10906
0
    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
10907
0
    Value *StPtr = EmitScalarExpr(E->getArg(1));
10908
0
    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
10909
0
  }
10910
10911
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
10912
0
      BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
10913
0
    Value *StoreVal = EmitScalarExpr(E->getArg(0));
10914
0
    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
10915
10916
0
    QualType Ty = E->getArg(0)->getType();
10917
0
    llvm::Type *StoreTy =
10918
0
        llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
10919
10920
0
    if (StoreVal->getType()->isPointerTy())
10921
0
      StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
10922
0
    else {
10923
0
      llvm::Type *IntTy = llvm::IntegerType::get(
10924
0
          getLLVMContext(),
10925
0
          CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
10926
0
      StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
10927
0
      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
10928
0
    }
10929
10930
0
    Function *F =
10931
0
        CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
10932
0
                             ? Intrinsic::aarch64_stlxr
10933
0
                             : Intrinsic::aarch64_stxr,
10934
0
                         StoreAddr->getType());
10935
0
    CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
10936
0
    CI->addParamAttr(
10937
0
        1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
10938
0
    return CI;
10939
0
  }
10940
10941
0
  if (BuiltinID == clang::AArch64::BI__getReg) {
10942
0
    Expr::EvalResult Result;
10943
0
    if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
10944
0
      llvm_unreachable("Sema will ensure that the parameter is constant");
10945
10946
0
    llvm::APSInt Value = Result.Val.getInt();
10947
0
    LLVMContext &Context = CGM.getLLVMContext();
10948
0
    std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
10949
10950
0
    llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
10951
0
    llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
10952
0
    llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
10953
10954
0
    llvm::Function *F =
10955
0
        CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
10956
0
    return Builder.CreateCall(F, Metadata);
10957
0
  }
10958
10959
0
  if (BuiltinID == clang::AArch64::BI__break) {
10960
0
    Expr::EvalResult Result;
10961
0
    if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
10962
0
      llvm_unreachable("Sema will ensure that the parameter is constant");
10963
10964
0
    llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
10965
0
    return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
10966
0
  }
10967
10968
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
10969
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
10970
0
    return Builder.CreateCall(F);
10971
0
  }
10972
10973
0
  if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
10974
0
    return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
10975
0
                               llvm::SyncScope::SingleThread);
10976
10977
  // CRC32
10978
0
  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
10979
0
  switch (BuiltinID) {
10980
0
  case clang::AArch64::BI__builtin_arm_crc32b:
10981
0
    CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
10982
0
  case clang::AArch64::BI__builtin_arm_crc32cb:
10983
0
    CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
10984
0
  case clang::AArch64::BI__builtin_arm_crc32h:
10985
0
    CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
10986
0
  case clang::AArch64::BI__builtin_arm_crc32ch:
10987
0
    CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
10988
0
  case clang::AArch64::BI__builtin_arm_crc32w:
10989
0
    CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
10990
0
  case clang::AArch64::BI__builtin_arm_crc32cw:
10991
0
    CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
10992
0
  case clang::AArch64::BI__builtin_arm_crc32d:
10993
0
    CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
10994
0
  case clang::AArch64::BI__builtin_arm_crc32cd:
10995
0
    CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
10996
0
  }
10997
10998
0
  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
10999
0
    Value *Arg0 = EmitScalarExpr(E->getArg(0));
11000
0
    Value *Arg1 = EmitScalarExpr(E->getArg(1));
11001
0
    Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11002
11003
0
    llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11004
0
    Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11005
11006
0
    return Builder.CreateCall(F, {Arg0, Arg1});
11007
0
  }
11008
11009
  // Memory Operations (MOPS)
11010
0
  if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11011
0
    Value *Dst = EmitScalarExpr(E->getArg(0));
11012
0
    Value *Val = EmitScalarExpr(E->getArg(1));
11013
0
    Value *Size = EmitScalarExpr(E->getArg(2));
11014
0
    Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11015
0
    Val = Builder.CreateTrunc(Val, Int8Ty);
11016
0
    Size = Builder.CreateIntCast(Size, Int64Ty, false);
11017
0
    return Builder.CreateCall(
11018
0
        CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11019
0
  }
11020
11021
  // Memory Tagging Extensions (MTE) Intrinsics
11022
0
  Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11023
0
  switch (BuiltinID) {
11024
0
  case clang::AArch64::BI__builtin_arm_irg:
11025
0
    MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11026
0
  case clang::AArch64::BI__builtin_arm_addg:
11027
0
    MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11028
0
  case clang::AArch64::BI__builtin_arm_gmi:
11029
0
    MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11030
0
  case clang::AArch64::BI__builtin_arm_ldg:
11031
0
    MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11032
0
  case clang::AArch64::BI__builtin_arm_stg:
11033
0
    MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11034
0
  case clang::AArch64::BI__builtin_arm_subp:
11035
0
    MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11036
0
  }
11037
11038
0
  if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11039
0
    llvm::Type *T = ConvertType(E->getType());
11040
11041
0
    if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11042
0
      Value *Pointer = EmitScalarExpr(E->getArg(0));
11043
0
      Value *Mask = EmitScalarExpr(E->getArg(1));
11044
11045
0
      Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11046
0
      Mask = Builder.CreateZExt(Mask, Int64Ty);
11047
0
      Value *RV = Builder.CreateCall(
11048
0
                       CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11049
0
       return Builder.CreatePointerCast(RV, T);
11050
0
    }
11051
0
    if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11052
0
      Value *Pointer = EmitScalarExpr(E->getArg(0));
11053
0
      Value *TagOffset = EmitScalarExpr(E->getArg(1));
11054
11055
0
      Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11056
0
      TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11057
0
      Value *RV = Builder.CreateCall(
11058
0
                       CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11059
0
      return Builder.CreatePointerCast(RV, T);
11060
0
    }
11061
0
    if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11062
0
      Value *Pointer = EmitScalarExpr(E->getArg(0));
11063
0
      Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11064
11065
0
      ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11066
0
      Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11067
0
      return Builder.CreateCall(
11068
0
                       CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11069
0
    }
11070
    // Although it is possible to supply a different return
11071
    // address (first arg) to this intrinsic, for now we set
11072
    // return address same as input address.
11073
0
    if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11074
0
      Value *TagAddress = EmitScalarExpr(E->getArg(0));
11075
0
      TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11076
0
      Value *RV = Builder.CreateCall(
11077
0
                    CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11078
0
      return Builder.CreatePointerCast(RV, T);
11079
0
    }
11080
    // Although it is possible to supply a different tag (to set)
11081
    // to this intrinsic (as first arg), for now we supply
11082
    // the tag that is in input address arg (common use case).
11083
0
    if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11084
0
        Value *TagAddress = EmitScalarExpr(E->getArg(0));
11085
0
        TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11086
0
        return Builder.CreateCall(
11087
0
                 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11088
0
    }
11089
0
    if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11090
0
      Value *PointerA = EmitScalarExpr(E->getArg(0));
11091
0
      Value *PointerB = EmitScalarExpr(E->getArg(1));
11092
0
      PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11093
0
      PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11094
0
      return Builder.CreateCall(
11095
0
                       CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11096
0
    }
11097
0
  }
11098
11099
0
  if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11100
0
      BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11101
0
      BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11102
0
      BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11103
0
      BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11104
0
      BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11105
0
      BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11106
0
      BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11107
11108
0
    SpecialRegisterAccessKind AccessKind = Write;
11109
0
    if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11110
0
        BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11111
0
        BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11112
0
        BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11113
0
      AccessKind = VolatileRead;
11114
11115
0
    bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11116
0
                            BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11117
11118
0
    bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11119
0
                   BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11120
11121
0
    bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11122
0
                    BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11123
11124
0
    llvm::Type *ValueType;
11125
0
    llvm::Type *RegisterType = Int64Ty;
11126
0
    if (Is32Bit) {
11127
0
      ValueType = Int32Ty;
11128
0
    } else if (Is128Bit) {
11129
0
      llvm::Type *Int128Ty =
11130
0
          llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11131
0
      ValueType = Int128Ty;
11132
0
      RegisterType = Int128Ty;
11133
0
    } else if (IsPointerBuiltin) {
11134
0
      ValueType = VoidPtrTy;
11135
0
    } else {
11136
0
      ValueType = Int64Ty;
11137
0
    };
11138
11139
0
    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11140
0
                                      AccessKind);
11141
0
  }
11142
11143
0
  if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11144
0
      BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11145
0
    LLVMContext &Context = CGM.getLLVMContext();
11146
11147
0
    unsigned SysReg =
11148
0
      E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11149
11150
0
    std::string SysRegStr;
11151
0
    llvm::raw_string_ostream(SysRegStr) <<
11152
0
                       ((1 << 1) | ((SysReg >> 14) & 1))  << ":" <<
11153
0
                       ((SysReg >> 11) & 7)               << ":" <<
11154
0
                       ((SysReg >> 7)  & 15)              << ":" <<
11155
0
                       ((SysReg >> 3)  & 15)              << ":" <<
11156
0
                       ( SysReg        & 7);
11157
11158
0
    llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11159
0
    llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11160
0
    llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11161
11162
0
    llvm::Type *RegisterType = Int64Ty;
11163
0
    llvm::Type *Types[] = { RegisterType };
11164
11165
0
    if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11166
0
      llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11167
11168
0
      return Builder.CreateCall(F, Metadata);
11169
0
    }
11170
11171
0
    llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11172
0
    llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11173
11174
0
    return Builder.CreateCall(F, { Metadata, ArgValue });
11175
0
  }
11176
11177
0
  if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11178
0
    llvm::Function *F =
11179
0
        CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11180
0
    return Builder.CreateCall(F);
11181
0
  }
11182
11183
0
  if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11184
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11185
0
    return Builder.CreateCall(F);
11186
0
  }
11187
11188
0
  if (BuiltinID == clang::AArch64::BI__mulh ||
11189
0
      BuiltinID == clang::AArch64::BI__umulh) {
11190
0
    llvm::Type *ResType = ConvertType(E->getType());
11191
0
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11192
11193
0
    bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11194
0
    Value *LHS =
11195
0
        Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11196
0
    Value *RHS =
11197
0
        Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11198
11199
0
    Value *MulResult, *HigherBits;
11200
0
    if (IsSigned) {
11201
0
      MulResult = Builder.CreateNSWMul(LHS, RHS);
11202
0
      HigherBits = Builder.CreateAShr(MulResult, 64);
11203
0
    } else {
11204
0
      MulResult = Builder.CreateNUWMul(LHS, RHS);
11205
0
      HigherBits = Builder.CreateLShr(MulResult, 64);
11206
0
    }
11207
0
    HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11208
11209
0
    return HigherBits;
11210
0
  }
11211
11212
0
  if (BuiltinID == AArch64::BI__writex18byte ||
11213
0
      BuiltinID == AArch64::BI__writex18word ||
11214
0
      BuiltinID == AArch64::BI__writex18dword ||
11215
0
      BuiltinID == AArch64::BI__writex18qword) {
11216
    // Read x18 as i8*
11217
0
    LLVMContext &Context = CGM.getLLVMContext();
11218
0
    llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11219
0
    llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11220
0
    llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11221
0
    llvm::Function *F =
11222
0
        CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11223
0
    llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11224
0
    X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11225
11226
    // Store val at x18 + offset
11227
0
    Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11228
0
    Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11229
0
    Value *Val = EmitScalarExpr(E->getArg(1));
11230
0
    StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11231
0
    return Store;
11232
0
  }
11233
11234
0
  if (BuiltinID == AArch64::BI__readx18byte ||
11235
0
      BuiltinID == AArch64::BI__readx18word ||
11236
0
      BuiltinID == AArch64::BI__readx18dword ||
11237
0
      BuiltinID == AArch64::BI__readx18qword) {
11238
0
    llvm::Type *IntTy = ConvertType(E->getType());
11239
11240
    // Read x18 as i8*
11241
0
    LLVMContext &Context = CGM.getLLVMContext();
11242
0
    llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11243
0
    llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11244
0
    llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11245
0
    llvm::Function *F =
11246
0
        CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11247
0
    llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11248
0
    X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11249
11250
    // Load x18 + offset
11251
0
    Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11252
0
    Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11253
0
    LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11254
0
    return Load;
11255
0
  }
11256
11257
0
  if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11258
0
      BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11259
0
      BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11260
0
      BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11261
0
    Value *Arg = EmitScalarExpr(E->getArg(0));
11262
0
    llvm::Type *RetTy = ConvertType(E->getType());
11263
0
    return Builder.CreateBitCast(Arg, RetTy);
11264
0
  }
11265
11266
0
  if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11267
0
      BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11268
0
      BuiltinID == AArch64::BI_CountLeadingZeros ||
11269
0
      BuiltinID == AArch64::BI_CountLeadingZeros64) {
11270
0
    Value *Arg = EmitScalarExpr(E->getArg(0));
11271
0
    llvm::Type *ArgType = Arg->getType();
11272
11273
0
    if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11274
0
        BuiltinID == AArch64::BI_CountLeadingOnes64)
11275
0
      Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11276
11277
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11278
0
    Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11279
11280
0
    if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11281
0
        BuiltinID == AArch64::BI_CountLeadingZeros64)
11282
0
      Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11283
0
    return Result;
11284
0
  }
11285
11286
0
  if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11287
0
      BuiltinID == AArch64::BI_CountLeadingSigns64) {
11288
0
    Value *Arg = EmitScalarExpr(E->getArg(0));
11289
11290
0
    Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11291
0
                      ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11292
0
                      : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11293
11294
0
    Value *Result = Builder.CreateCall(F, Arg, "cls");
11295
0
    if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11296
0
      Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11297
0
    return Result;
11298
0
  }
11299
11300
0
  if (BuiltinID == AArch64::BI_CountOneBits ||
11301
0
      BuiltinID == AArch64::BI_CountOneBits64) {
11302
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
11303
0
    llvm::Type *ArgType = ArgValue->getType();
11304
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11305
11306
0
    Value *Result = Builder.CreateCall(F, ArgValue);
11307
0
    if (BuiltinID == AArch64::BI_CountOneBits64)
11308
0
      Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11309
0
    return Result;
11310
0
  }
11311
11312
0
  if (BuiltinID == AArch64::BI__prefetch) {
11313
0
    Value *Address = EmitScalarExpr(E->getArg(0));
11314
0
    Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11315
0
    Value *Locality = ConstantInt::get(Int32Ty, 3);
11316
0
    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11317
0
    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11318
0
    return Builder.CreateCall(F, {Address, RW, Locality, Data});
11319
0
  }
11320
11321
  // Handle MSVC intrinsics before argument evaluation to prevent double
11322
  // evaluation.
11323
0
  if (std::optional<MSVCIntrin> MsvcIntId =
11324
0
          translateAarch64ToMsvcIntrin(BuiltinID))
11325
0
    return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11326
11327
  // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11328
0
  auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11329
0
    return P.first == BuiltinID;
11330
0
  });
11331
0
  if (It != end(NEONEquivalentIntrinsicMap))
11332
0
    BuiltinID = It->second;
11333
11334
  // Find out if any arguments are required to be integer constant
11335
  // expressions.
11336
0
  unsigned ICEArguments = 0;
11337
0
  ASTContext::GetBuiltinTypeError Error;
11338
0
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11339
0
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
11340
11341
0
  llvm::SmallVector<Value*, 4> Ops;
11342
0
  Address PtrOp0 = Address::invalid();
11343
0
  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11344
0
    if (i == 0) {
11345
0
      switch (BuiltinID) {
11346
0
      case NEON::BI__builtin_neon_vld1_v:
11347
0
      case NEON::BI__builtin_neon_vld1q_v:
11348
0
      case NEON::BI__builtin_neon_vld1_dup_v:
11349
0
      case NEON::BI__builtin_neon_vld1q_dup_v:
11350
0
      case NEON::BI__builtin_neon_vld1_lane_v:
11351
0
      case NEON::BI__builtin_neon_vld1q_lane_v:
11352
0
      case NEON::BI__builtin_neon_vst1_v:
11353
0
      case NEON::BI__builtin_neon_vst1q_v:
11354
0
      case NEON::BI__builtin_neon_vst1_lane_v:
11355
0
      case NEON::BI__builtin_neon_vst1q_lane_v:
11356
0
      case NEON::BI__builtin_neon_vldap1_lane_s64:
11357
0
      case NEON::BI__builtin_neon_vldap1q_lane_s64:
11358
0
      case NEON::BI__builtin_neon_vstl1_lane_s64:
11359
0
      case NEON::BI__builtin_neon_vstl1q_lane_s64:
11360
        // Get the alignment for the argument in addition to the value;
11361
        // we'll use it later.
11362
0
        PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11363
0
        Ops.push_back(PtrOp0.getPointer());
11364
0
        continue;
11365
0
      }
11366
0
    }
11367
0
    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11368
0
  }
11369
11370
0
  auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11371
0
  const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11372
0
      SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11373
11374
0
  if (Builtin) {
11375
0
    Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11376
0
    Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11377
0
    assert(Result && "SISD intrinsic should have been handled");
11378
0
    return Result;
11379
0
  }
11380
11381
0
  const Expr *Arg = E->getArg(E->getNumArgs()-1);
11382
0
  NeonTypeFlags Type(0);
11383
0
  if (std::optional<llvm::APSInt> Result =
11384
0
          Arg->getIntegerConstantExpr(getContext()))
11385
    // Determine the type of this overloaded NEON intrinsic.
11386
0
    Type = NeonTypeFlags(Result->getZExtValue());
11387
11388
0
  bool usgn = Type.isUnsigned();
11389
0
  bool quad = Type.isQuad();
11390
11391
  // Handle non-overloaded intrinsics first.
11392
0
  switch (BuiltinID) {
11393
0
  default: break;
11394
0
  case NEON::BI__builtin_neon_vabsh_f16:
11395
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11396
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11397
0
  case NEON::BI__builtin_neon_vaddq_p128: {
11398
0
    llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11399
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11400
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11401
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11402
0
    Ops[0] =  Builder.CreateXor(Ops[0], Ops[1]);
11403
0
    llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11404
0
    return Builder.CreateBitCast(Ops[0], Int128Ty);
11405
0
  }
11406
0
  case NEON::BI__builtin_neon_vldrq_p128: {
11407
0
    llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11408
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
11409
0
    return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11410
0
                                     CharUnits::fromQuantity(16));
11411
0
  }
11412
0
  case NEON::BI__builtin_neon_vstrq_p128: {
11413
0
    Value *Ptr = Ops[0];
11414
0
    return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
11415
0
  }
11416
0
  case NEON::BI__builtin_neon_vcvts_f32_u32:
11417
0
  case NEON::BI__builtin_neon_vcvtd_f64_u64:
11418
0
    usgn = true;
11419
0
    [[fallthrough]];
11420
0
  case NEON::BI__builtin_neon_vcvts_f32_s32:
11421
0
  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11422
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11423
0
    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11424
0
    llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11425
0
    llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11426
0
    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11427
0
    if (usgn)
11428
0
      return Builder.CreateUIToFP(Ops[0], FTy);
11429
0
    return Builder.CreateSIToFP(Ops[0], FTy);
11430
0
  }
11431
0
  case NEON::BI__builtin_neon_vcvth_f16_u16:
11432
0
  case NEON::BI__builtin_neon_vcvth_f16_u32:
11433
0
  case NEON::BI__builtin_neon_vcvth_f16_u64:
11434
0
    usgn = true;
11435
0
    [[fallthrough]];
11436
0
  case NEON::BI__builtin_neon_vcvth_f16_s16:
11437
0
  case NEON::BI__builtin_neon_vcvth_f16_s32:
11438
0
  case NEON::BI__builtin_neon_vcvth_f16_s64: {
11439
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11440
0
    llvm::Type *FTy = HalfTy;
11441
0
    llvm::Type *InTy;
11442
0
    if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11443
0
      InTy = Int64Ty;
11444
0
    else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11445
0
      InTy = Int32Ty;
11446
0
    else
11447
0
      InTy = Int16Ty;
11448
0
    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11449
0
    if (usgn)
11450
0
      return Builder.CreateUIToFP(Ops[0], FTy);
11451
0
    return Builder.CreateSIToFP(Ops[0], FTy);
11452
0
  }
11453
0
  case NEON::BI__builtin_neon_vcvtah_u16_f16:
11454
0
  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11455
0
  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11456
0
  case NEON::BI__builtin_neon_vcvtph_u16_f16:
11457
0
  case NEON::BI__builtin_neon_vcvth_u16_f16:
11458
0
  case NEON::BI__builtin_neon_vcvtah_s16_f16:
11459
0
  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11460
0
  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11461
0
  case NEON::BI__builtin_neon_vcvtph_s16_f16:
11462
0
  case NEON::BI__builtin_neon_vcvth_s16_f16: {
11463
0
    unsigned Int;
11464
0
    llvm::Type* InTy = Int32Ty;
11465
0
    llvm::Type* FTy  = HalfTy;
11466
0
    llvm::Type *Tys[2] = {InTy, FTy};
11467
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11468
0
    switch (BuiltinID) {
11469
0
    default: llvm_unreachable("missing builtin ID in switch!");
11470
0
    case NEON::BI__builtin_neon_vcvtah_u16_f16:
11471
0
      Int = Intrinsic::aarch64_neon_fcvtau; break;
11472
0
    case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11473
0
      Int = Intrinsic::aarch64_neon_fcvtmu; break;
11474
0
    case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11475
0
      Int = Intrinsic::aarch64_neon_fcvtnu; break;
11476
0
    case NEON::BI__builtin_neon_vcvtph_u16_f16:
11477
0
      Int = Intrinsic::aarch64_neon_fcvtpu; break;
11478
0
    case NEON::BI__builtin_neon_vcvth_u16_f16:
11479
0
      Int = Intrinsic::aarch64_neon_fcvtzu; break;
11480
0
    case NEON::BI__builtin_neon_vcvtah_s16_f16:
11481
0
      Int = Intrinsic::aarch64_neon_fcvtas; break;
11482
0
    case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11483
0
      Int = Intrinsic::aarch64_neon_fcvtms; break;
11484
0
    case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11485
0
      Int = Intrinsic::aarch64_neon_fcvtns; break;
11486
0
    case NEON::BI__builtin_neon_vcvtph_s16_f16:
11487
0
      Int = Intrinsic::aarch64_neon_fcvtps; break;
11488
0
    case NEON::BI__builtin_neon_vcvth_s16_f16:
11489
0
      Int = Intrinsic::aarch64_neon_fcvtzs; break;
11490
0
    }
11491
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11492
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
11493
0
  }
11494
0
  case NEON::BI__builtin_neon_vcaleh_f16:
11495
0
  case NEON::BI__builtin_neon_vcalth_f16:
11496
0
  case NEON::BI__builtin_neon_vcageh_f16:
11497
0
  case NEON::BI__builtin_neon_vcagth_f16: {
11498
0
    unsigned Int;
11499
0
    llvm::Type* InTy = Int32Ty;
11500
0
    llvm::Type* FTy  = HalfTy;
11501
0
    llvm::Type *Tys[2] = {InTy, FTy};
11502
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11503
0
    switch (BuiltinID) {
11504
0
    default: llvm_unreachable("missing builtin ID in switch!");
11505
0
    case NEON::BI__builtin_neon_vcageh_f16:
11506
0
      Int = Intrinsic::aarch64_neon_facge; break;
11507
0
    case NEON::BI__builtin_neon_vcagth_f16:
11508
0
      Int = Intrinsic::aarch64_neon_facgt; break;
11509
0
    case NEON::BI__builtin_neon_vcaleh_f16:
11510
0
      Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11511
0
    case NEON::BI__builtin_neon_vcalth_f16:
11512
0
      Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11513
0
    }
11514
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11515
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
11516
0
  }
11517
0
  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11518
0
  case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11519
0
    unsigned Int;
11520
0
    llvm::Type* InTy = Int32Ty;
11521
0
    llvm::Type* FTy  = HalfTy;
11522
0
    llvm::Type *Tys[2] = {InTy, FTy};
11523
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11524
0
    switch (BuiltinID) {
11525
0
    default: llvm_unreachable("missing builtin ID in switch!");
11526
0
    case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11527
0
      Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11528
0
    case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11529
0
      Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11530
0
    }
11531
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11532
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
11533
0
  }
11534
0
  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11535
0
  case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11536
0
    unsigned Int;
11537
0
    llvm::Type* FTy  = HalfTy;
11538
0
    llvm::Type* InTy = Int32Ty;
11539
0
    llvm::Type *Tys[2] = {FTy, InTy};
11540
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11541
0
    switch (BuiltinID) {
11542
0
    default: llvm_unreachable("missing builtin ID in switch!");
11543
0
    case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11544
0
      Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11545
0
      Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11546
0
      break;
11547
0
    case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11548
0
      Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11549
0
      Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11550
0
      break;
11551
0
    }
11552
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11553
0
  }
11554
0
  case NEON::BI__builtin_neon_vpaddd_s64: {
11555
0
    auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11556
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
11557
    // The vector is v2f64, so make sure it's bitcast to that.
11558
0
    Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11559
0
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11560
0
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11561
0
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11562
0
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11563
    // Pairwise addition of a v2f64 into a scalar f64.
11564
0
    return Builder.CreateAdd(Op0, Op1, "vpaddd");
11565
0
  }
11566
0
  case NEON::BI__builtin_neon_vpaddd_f64: {
11567
0
    auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11568
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
11569
    // The vector is v2f64, so make sure it's bitcast to that.
11570
0
    Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11571
0
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11572
0
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11573
0
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11574
0
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11575
    // Pairwise addition of a v2f64 into a scalar f64.
11576
0
    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11577
0
  }
11578
0
  case NEON::BI__builtin_neon_vpadds_f32: {
11579
0
    auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11580
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
11581
    // The vector is v2f32, so make sure it's bitcast to that.
11582
0
    Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11583
0
    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11584
0
    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11585
0
    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11586
0
    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11587
    // Pairwise addition of a v2f32 into a scalar f32.
11588
0
    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11589
0
  }
11590
0
  case NEON::BI__builtin_neon_vceqzd_s64:
11591
0
  case NEON::BI__builtin_neon_vceqzd_f64:
11592
0
  case NEON::BI__builtin_neon_vceqzs_f32:
11593
0
  case NEON::BI__builtin_neon_vceqzh_f16:
11594
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11595
0
    return EmitAArch64CompareBuiltinExpr(
11596
0
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11597
0
        ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11598
0
  case NEON::BI__builtin_neon_vcgezd_s64:
11599
0
  case NEON::BI__builtin_neon_vcgezd_f64:
11600
0
  case NEON::BI__builtin_neon_vcgezs_f32:
11601
0
  case NEON::BI__builtin_neon_vcgezh_f16:
11602
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11603
0
    return EmitAArch64CompareBuiltinExpr(
11604
0
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11605
0
        ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11606
0
  case NEON::BI__builtin_neon_vclezd_s64:
11607
0
  case NEON::BI__builtin_neon_vclezd_f64:
11608
0
  case NEON::BI__builtin_neon_vclezs_f32:
11609
0
  case NEON::BI__builtin_neon_vclezh_f16:
11610
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11611
0
    return EmitAArch64CompareBuiltinExpr(
11612
0
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11613
0
        ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11614
0
  case NEON::BI__builtin_neon_vcgtzd_s64:
11615
0
  case NEON::BI__builtin_neon_vcgtzd_f64:
11616
0
  case NEON::BI__builtin_neon_vcgtzs_f32:
11617
0
  case NEON::BI__builtin_neon_vcgtzh_f16:
11618
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11619
0
    return EmitAArch64CompareBuiltinExpr(
11620
0
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11621
0
        ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11622
0
  case NEON::BI__builtin_neon_vcltzd_s64:
11623
0
  case NEON::BI__builtin_neon_vcltzd_f64:
11624
0
  case NEON::BI__builtin_neon_vcltzs_f32:
11625
0
  case NEON::BI__builtin_neon_vcltzh_f16:
11626
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11627
0
    return EmitAArch64CompareBuiltinExpr(
11628
0
        Ops[0], ConvertType(E->getCallReturnType(getContext())),
11629
0
        ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11630
11631
0
  case NEON::BI__builtin_neon_vceqzd_u64: {
11632
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
11633
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11634
0
    Ops[0] =
11635
0
        Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11636
0
    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11637
0
  }
11638
0
  case NEON::BI__builtin_neon_vceqd_f64:
11639
0
  case NEON::BI__builtin_neon_vcled_f64:
11640
0
  case NEON::BI__builtin_neon_vcltd_f64:
11641
0
  case NEON::BI__builtin_neon_vcged_f64:
11642
0
  case NEON::BI__builtin_neon_vcgtd_f64: {
11643
0
    llvm::CmpInst::Predicate P;
11644
0
    switch (BuiltinID) {
11645
0
    default: llvm_unreachable("missing builtin ID in switch!");
11646
0
    case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11647
0
    case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11648
0
    case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11649
0
    case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11650
0
    case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11651
0
    }
11652
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11653
0
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11654
0
    Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11655
0
    if (P == llvm::FCmpInst::FCMP_OEQ)
11656
0
      Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11657
0
    else
11658
0
      Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11659
0
    return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11660
0
  }
11661
0
  case NEON::BI__builtin_neon_vceqs_f32:
11662
0
  case NEON::BI__builtin_neon_vcles_f32:
11663
0
  case NEON::BI__builtin_neon_vclts_f32:
11664
0
  case NEON::BI__builtin_neon_vcges_f32:
11665
0
  case NEON::BI__builtin_neon_vcgts_f32: {
11666
0
    llvm::CmpInst::Predicate P;
11667
0
    switch (BuiltinID) {
11668
0
    default: llvm_unreachable("missing builtin ID in switch!");
11669
0
    case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11670
0
    case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11671
0
    case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11672
0
    case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11673
0
    case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11674
0
    }
11675
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11676
0
    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11677
0
    Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11678
0
    if (P == llvm::FCmpInst::FCMP_OEQ)
11679
0
      Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11680
0
    else
11681
0
      Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11682
0
    return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11683
0
  }
11684
0
  case NEON::BI__builtin_neon_vceqh_f16:
11685
0
  case NEON::BI__builtin_neon_vcleh_f16:
11686
0
  case NEON::BI__builtin_neon_vclth_f16:
11687
0
  case NEON::BI__builtin_neon_vcgeh_f16:
11688
0
  case NEON::BI__builtin_neon_vcgth_f16: {
11689
0
    llvm::CmpInst::Predicate P;
11690
0
    switch (BuiltinID) {
11691
0
    default: llvm_unreachable("missing builtin ID in switch!");
11692
0
    case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11693
0
    case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11694
0
    case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11695
0
    case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11696
0
    case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11697
0
    }
11698
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11699
0
    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11700
0
    Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11701
0
    if (P == llvm::FCmpInst::FCMP_OEQ)
11702
0
      Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11703
0
    else
11704
0
      Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11705
0
    return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11706
0
  }
11707
0
  case NEON::BI__builtin_neon_vceqd_s64:
11708
0
  case NEON::BI__builtin_neon_vceqd_u64:
11709
0
  case NEON::BI__builtin_neon_vcgtd_s64:
11710
0
  case NEON::BI__builtin_neon_vcgtd_u64:
11711
0
  case NEON::BI__builtin_neon_vcltd_s64:
11712
0
  case NEON::BI__builtin_neon_vcltd_u64:
11713
0
  case NEON::BI__builtin_neon_vcged_u64:
11714
0
  case NEON::BI__builtin_neon_vcged_s64:
11715
0
  case NEON::BI__builtin_neon_vcled_u64:
11716
0
  case NEON::BI__builtin_neon_vcled_s64: {
11717
0
    llvm::CmpInst::Predicate P;
11718
0
    switch (BuiltinID) {
11719
0
    default: llvm_unreachable("missing builtin ID in switch!");
11720
0
    case NEON::BI__builtin_neon_vceqd_s64:
11721
0
    case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
11722
0
    case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
11723
0
    case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
11724
0
    case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
11725
0
    case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
11726
0
    case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
11727
0
    case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
11728
0
    case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
11729
0
    case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
11730
0
    }
11731
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11732
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11733
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11734
0
    Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
11735
0
    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
11736
0
  }
11737
0
  case NEON::BI__builtin_neon_vtstd_s64:
11738
0
  case NEON::BI__builtin_neon_vtstd_u64: {
11739
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11740
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11741
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11742
0
    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
11743
0
    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
11744
0
                                llvm::Constant::getNullValue(Int64Ty));
11745
0
    return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
11746
0
  }
11747
0
  case NEON::BI__builtin_neon_vset_lane_i8:
11748
0
  case NEON::BI__builtin_neon_vset_lane_i16:
11749
0
  case NEON::BI__builtin_neon_vset_lane_i32:
11750
0
  case NEON::BI__builtin_neon_vset_lane_i64:
11751
0
  case NEON::BI__builtin_neon_vset_lane_bf16:
11752
0
  case NEON::BI__builtin_neon_vset_lane_f32:
11753
0
  case NEON::BI__builtin_neon_vsetq_lane_i8:
11754
0
  case NEON::BI__builtin_neon_vsetq_lane_i16:
11755
0
  case NEON::BI__builtin_neon_vsetq_lane_i32:
11756
0
  case NEON::BI__builtin_neon_vsetq_lane_i64:
11757
0
  case NEON::BI__builtin_neon_vsetq_lane_bf16:
11758
0
  case NEON::BI__builtin_neon_vsetq_lane_f32:
11759
0
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
11760
0
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11761
0
  case NEON::BI__builtin_neon_vset_lane_f64:
11762
    // The vector type needs a cast for the v1f64 variant.
11763
0
    Ops[1] =
11764
0
        Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
11765
0
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
11766
0
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11767
0
  case NEON::BI__builtin_neon_vsetq_lane_f64:
11768
    // The vector type needs a cast for the v2f64 variant.
11769
0
    Ops[1] =
11770
0
        Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
11771
0
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
11772
0
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11773
11774
0
  case NEON::BI__builtin_neon_vget_lane_i8:
11775
0
  case NEON::BI__builtin_neon_vdupb_lane_i8:
11776
0
    Ops[0] =
11777
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
11778
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11779
0
                                        "vget_lane");
11780
0
  case NEON::BI__builtin_neon_vgetq_lane_i8:
11781
0
  case NEON::BI__builtin_neon_vdupb_laneq_i8:
11782
0
    Ops[0] =
11783
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
11784
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11785
0
                                        "vgetq_lane");
11786
0
  case NEON::BI__builtin_neon_vget_lane_i16:
11787
0
  case NEON::BI__builtin_neon_vduph_lane_i16:
11788
0
    Ops[0] =
11789
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
11790
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11791
0
                                        "vget_lane");
11792
0
  case NEON::BI__builtin_neon_vgetq_lane_i16:
11793
0
  case NEON::BI__builtin_neon_vduph_laneq_i16:
11794
0
    Ops[0] =
11795
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
11796
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11797
0
                                        "vgetq_lane");
11798
0
  case NEON::BI__builtin_neon_vget_lane_i32:
11799
0
  case NEON::BI__builtin_neon_vdups_lane_i32:
11800
0
    Ops[0] =
11801
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
11802
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11803
0
                                        "vget_lane");
11804
0
  case NEON::BI__builtin_neon_vdups_lane_f32:
11805
0
    Ops[0] =
11806
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11807
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11808
0
                                        "vdups_lane");
11809
0
  case NEON::BI__builtin_neon_vgetq_lane_i32:
11810
0
  case NEON::BI__builtin_neon_vdups_laneq_i32:
11811
0
    Ops[0] =
11812
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
11813
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11814
0
                                        "vgetq_lane");
11815
0
  case NEON::BI__builtin_neon_vget_lane_i64:
11816
0
  case NEON::BI__builtin_neon_vdupd_lane_i64:
11817
0
    Ops[0] =
11818
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
11819
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11820
0
                                        "vget_lane");
11821
0
  case NEON::BI__builtin_neon_vdupd_lane_f64:
11822
0
    Ops[0] =
11823
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11824
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11825
0
                                        "vdupd_lane");
11826
0
  case NEON::BI__builtin_neon_vgetq_lane_i64:
11827
0
  case NEON::BI__builtin_neon_vdupd_laneq_i64:
11828
0
    Ops[0] =
11829
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
11830
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11831
0
                                        "vgetq_lane");
11832
0
  case NEON::BI__builtin_neon_vget_lane_f32:
11833
0
    Ops[0] =
11834
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11835
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11836
0
                                        "vget_lane");
11837
0
  case NEON::BI__builtin_neon_vget_lane_f64:
11838
0
    Ops[0] =
11839
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11840
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11841
0
                                        "vget_lane");
11842
0
  case NEON::BI__builtin_neon_vgetq_lane_f32:
11843
0
  case NEON::BI__builtin_neon_vdups_laneq_f32:
11844
0
    Ops[0] =
11845
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
11846
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11847
0
                                        "vgetq_lane");
11848
0
  case NEON::BI__builtin_neon_vgetq_lane_f64:
11849
0
  case NEON::BI__builtin_neon_vdupd_laneq_f64:
11850
0
    Ops[0] =
11851
0
        Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
11852
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11853
0
                                        "vgetq_lane");
11854
0
  case NEON::BI__builtin_neon_vaddh_f16:
11855
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11856
0
    return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
11857
0
  case NEON::BI__builtin_neon_vsubh_f16:
11858
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11859
0
    return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
11860
0
  case NEON::BI__builtin_neon_vmulh_f16:
11861
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11862
0
    return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
11863
0
  case NEON::BI__builtin_neon_vdivh_f16:
11864
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11865
0
    return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
11866
0
  case NEON::BI__builtin_neon_vfmah_f16:
11867
    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11868
0
    return emitCallMaybeConstrainedFPBuiltin(
11869
0
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
11870
0
        {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
11871
0
  case NEON::BI__builtin_neon_vfmsh_f16: {
11872
0
    Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
11873
11874
    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
11875
0
    return emitCallMaybeConstrainedFPBuiltin(
11876
0
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
11877
0
        {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
11878
0
  }
11879
0
  case NEON::BI__builtin_neon_vaddd_s64:
11880
0
  case NEON::BI__builtin_neon_vaddd_u64:
11881
0
    return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
11882
0
  case NEON::BI__builtin_neon_vsubd_s64:
11883
0
  case NEON::BI__builtin_neon_vsubd_u64:
11884
0
    return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
11885
0
  case NEON::BI__builtin_neon_vqdmlalh_s16:
11886
0
  case NEON::BI__builtin_neon_vqdmlslh_s16: {
11887
0
    SmallVector<Value *, 2> ProductOps;
11888
0
    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
11889
0
    ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
11890
0
    auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
11891
0
    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
11892
0
                          ProductOps, "vqdmlXl");
11893
0
    Constant *CI = ConstantInt::get(SizeTy, 0);
11894
0
    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
11895
11896
0
    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
11897
0
                                        ? Intrinsic::aarch64_neon_sqadd
11898
0
                                        : Intrinsic::aarch64_neon_sqsub;
11899
0
    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
11900
0
  }
11901
0
  case NEON::BI__builtin_neon_vqshlud_n_s64: {
11902
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11903
0
    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
11904
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
11905
0
                        Ops, "vqshlu_n");
11906
0
  }
11907
0
  case NEON::BI__builtin_neon_vqshld_n_u64:
11908
0
  case NEON::BI__builtin_neon_vqshld_n_s64: {
11909
0
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
11910
0
                                   ? Intrinsic::aarch64_neon_uqshl
11911
0
                                   : Intrinsic::aarch64_neon_sqshl;
11912
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11913
0
    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
11914
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
11915
0
  }
11916
0
  case NEON::BI__builtin_neon_vrshrd_n_u64:
11917
0
  case NEON::BI__builtin_neon_vrshrd_n_s64: {
11918
0
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
11919
0
                                   ? Intrinsic::aarch64_neon_urshl
11920
0
                                   : Intrinsic::aarch64_neon_srshl;
11921
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
11922
0
    int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
11923
0
    Ops[1] = ConstantInt::get(Int64Ty, -SV);
11924
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
11925
0
  }
11926
0
  case NEON::BI__builtin_neon_vrsrad_n_u64:
11927
0
  case NEON::BI__builtin_neon_vrsrad_n_s64: {
11928
0
    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
11929
0
                                   ? Intrinsic::aarch64_neon_urshl
11930
0
                                   : Intrinsic::aarch64_neon_srshl;
11931
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11932
0
    Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
11933
0
    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
11934
0
                                {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
11935
0
    return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
11936
0
  }
11937
0
  case NEON::BI__builtin_neon_vshld_n_s64:
11938
0
  case NEON::BI__builtin_neon_vshld_n_u64: {
11939
0
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11940
0
    return Builder.CreateShl(
11941
0
        Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
11942
0
  }
11943
0
  case NEON::BI__builtin_neon_vshrd_n_s64: {
11944
0
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11945
0
    return Builder.CreateAShr(
11946
0
        Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
11947
0
                                                   Amt->getZExtValue())),
11948
0
        "shrd_n");
11949
0
  }
11950
0
  case NEON::BI__builtin_neon_vshrd_n_u64: {
11951
0
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
11952
0
    uint64_t ShiftAmt = Amt->getZExtValue();
11953
    // Right-shifting an unsigned value by its size yields 0.
11954
0
    if (ShiftAmt == 64)
11955
0
      return ConstantInt::get(Int64Ty, 0);
11956
0
    return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
11957
0
                              "shrd_n");
11958
0
  }
11959
0
  case NEON::BI__builtin_neon_vsrad_n_s64: {
11960
0
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
11961
0
    Ops[1] = Builder.CreateAShr(
11962
0
        Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
11963
0
                                                   Amt->getZExtValue())),
11964
0
        "shrd_n");
11965
0
    return Builder.CreateAdd(Ops[0], Ops[1]);
11966
0
  }
11967
0
  case NEON::BI__builtin_neon_vsrad_n_u64: {
11968
0
    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
11969
0
    uint64_t ShiftAmt = Amt->getZExtValue();
11970
    // Right-shifting an unsigned value by its size yields 0.
11971
    // As Op + 0 = Op, return Ops[0] directly.
11972
0
    if (ShiftAmt == 64)
11973
0
      return Ops[0];
11974
0
    Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
11975
0
                                "shrd_n");
11976
0
    return Builder.CreateAdd(Ops[0], Ops[1]);
11977
0
  }
11978
0
  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
11979
0
  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
11980
0
  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
11981
0
  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
11982
0
    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
11983
0
                                          "lane");
11984
0
    SmallVector<Value *, 2> ProductOps;
11985
0
    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
11986
0
    ProductOps.push_back(vectorWrapScalar16(Ops[2]));
11987
0
    auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
11988
0
    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
11989
0
                          ProductOps, "vqdmlXl");
11990
0
    Constant *CI = ConstantInt::get(SizeTy, 0);
11991
0
    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
11992
0
    Ops.pop_back();
11993
11994
0
    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
11995
0
                       BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
11996
0
                          ? Intrinsic::aarch64_neon_sqadd
11997
0
                          : Intrinsic::aarch64_neon_sqsub;
11998
0
    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
11999
0
  }
12000
0
  case NEON::BI__builtin_neon_vqdmlals_s32:
12001
0
  case NEON::BI__builtin_neon_vqdmlsls_s32: {
12002
0
    SmallVector<Value *, 2> ProductOps;
12003
0
    ProductOps.push_back(Ops[1]);
12004
0
    ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12005
0
    Ops[1] =
12006
0
        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12007
0
                     ProductOps, "vqdmlXl");
12008
12009
0
    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12010
0
                                        ? Intrinsic::aarch64_neon_sqadd
12011
0
                                        : Intrinsic::aarch64_neon_sqsub;
12012
0
    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12013
0
  }
12014
0
  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12015
0
  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12016
0
  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12017
0
  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12018
0
    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12019
0
                                          "lane");
12020
0
    SmallVector<Value *, 2> ProductOps;
12021
0
    ProductOps.push_back(Ops[1]);
12022
0
    ProductOps.push_back(Ops[2]);
12023
0
    Ops[1] =
12024
0
        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12025
0
                     ProductOps, "vqdmlXl");
12026
0
    Ops.pop_back();
12027
12028
0
    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12029
0
                       BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12030
0
                          ? Intrinsic::aarch64_neon_sqadd
12031
0
                          : Intrinsic::aarch64_neon_sqsub;
12032
0
    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12033
0
  }
12034
0
  case NEON::BI__builtin_neon_vget_lane_bf16:
12035
0
  case NEON::BI__builtin_neon_vduph_lane_bf16:
12036
0
  case NEON::BI__builtin_neon_vduph_lane_f16: {
12037
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12038
0
                                        "vget_lane");
12039
0
  }
12040
0
  case NEON::BI__builtin_neon_vgetq_lane_bf16:
12041
0
  case NEON::BI__builtin_neon_vduph_laneq_bf16:
12042
0
  case NEON::BI__builtin_neon_vduph_laneq_f16: {
12043
0
    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12044
0
                                        "vgetq_lane");
12045
0
  }
12046
12047
0
  case clang::AArch64::BI_InterlockedAdd: {
12048
0
    Address DestAddr = CheckAtomicAlignment(*this, E);
12049
0
    Value *Val = EmitScalarExpr(E->getArg(1));
12050
0
    AtomicRMWInst *RMWI =
12051
0
        Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12052
0
                                llvm::AtomicOrdering::SequentiallyConsistent);
12053
0
    return Builder.CreateAdd(RMWI, Val);
12054
0
  }
12055
0
  }
12056
12057
0
  llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12058
0
  llvm::Type *Ty = VTy;
12059
0
  if (!Ty)
12060
0
    return nullptr;
12061
12062
  // Not all intrinsics handled by the common case work for AArch64 yet, so only
12063
  // defer to common code if it's been added to our special map.
12064
0
  Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
12065
0
                                        AArch64SIMDIntrinsicsProvenSorted);
12066
12067
0
  if (Builtin)
12068
0
    return EmitCommonNeonBuiltinExpr(
12069
0
        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12070
0
        Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12071
0
        /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12072
12073
0
  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12074
0
    return V;
12075
12076
0
  unsigned Int;
12077
0
  switch (BuiltinID) {
12078
0
  default: return nullptr;
12079
0
  case NEON::BI__builtin_neon_vbsl_v:
12080
0
  case NEON::BI__builtin_neon_vbslq_v: {
12081
0
    llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12082
0
    Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12083
0
    Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12084
0
    Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12085
12086
0
    Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12087
0
    Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12088
0
    Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12089
0
    return Builder.CreateBitCast(Ops[0], Ty);
12090
0
  }
12091
0
  case NEON::BI__builtin_neon_vfma_lane_v:
12092
0
  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12093
    // The ARM builtins (and instructions) have the addend as the first
12094
    // operand, but the 'fma' intrinsics have it last. Swap it around here.
12095
0
    Value *Addend = Ops[0];
12096
0
    Value *Multiplicand = Ops[1];
12097
0
    Value *LaneSource = Ops[2];
12098
0
    Ops[0] = Multiplicand;
12099
0
    Ops[1] = LaneSource;
12100
0
    Ops[2] = Addend;
12101
12102
    // Now adjust things to handle the lane access.
12103
0
    auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12104
0
                         ? llvm::FixedVectorType::get(VTy->getElementType(),
12105
0
                                                      VTy->getNumElements() / 2)
12106
0
                         : VTy;
12107
0
    llvm::Constant *cst = cast<Constant>(Ops[3]);
12108
0
    Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12109
0
    Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12110
0
    Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12111
12112
0
    Ops.pop_back();
12113
0
    Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12114
0
                                       : Intrinsic::fma;
12115
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12116
0
  }
12117
0
  case NEON::BI__builtin_neon_vfma_laneq_v: {
12118
0
    auto *VTy = cast<llvm::FixedVectorType>(Ty);
12119
    // v1f64 fma should be mapped to Neon scalar f64 fma
12120
0
    if (VTy && VTy->getElementType() == DoubleTy) {
12121
0
      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12122
0
      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12123
0
      llvm::FixedVectorType *VTy =
12124
0
          GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
12125
0
      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12126
0
      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12127
0
      Value *Result;
12128
0
      Result = emitCallMaybeConstrainedFPBuiltin(
12129
0
          *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12130
0
          DoubleTy, {Ops[1], Ops[2], Ops[0]});
12131
0
      return Builder.CreateBitCast(Result, Ty);
12132
0
    }
12133
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12134
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12135
12136
0
    auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12137
0
                                           VTy->getNumElements() * 2);
12138
0
    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12139
0
    Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12140
0
                                               cast<ConstantInt>(Ops[3]));
12141
0
    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12142
12143
0
    return emitCallMaybeConstrainedFPBuiltin(
12144
0
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12145
0
        {Ops[2], Ops[1], Ops[0]});
12146
0
  }
12147
0
  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12148
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12149
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12150
12151
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12152
0
    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12153
0
    return emitCallMaybeConstrainedFPBuiltin(
12154
0
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12155
0
        {Ops[2], Ops[1], Ops[0]});
12156
0
  }
12157
0
  case NEON::BI__builtin_neon_vfmah_lane_f16:
12158
0
  case NEON::BI__builtin_neon_vfmas_lane_f32:
12159
0
  case NEON::BI__builtin_neon_vfmah_laneq_f16:
12160
0
  case NEON::BI__builtin_neon_vfmas_laneq_f32:
12161
0
  case NEON::BI__builtin_neon_vfmad_lane_f64:
12162
0
  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12163
0
    Ops.push_back(EmitScalarExpr(E->getArg(3)));
12164
0
    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12165
0
    Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12166
0
    return emitCallMaybeConstrainedFPBuiltin(
12167
0
        *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12168
0
        {Ops[1], Ops[2], Ops[0]});
12169
0
  }
12170
0
  case NEON::BI__builtin_neon_vmull_v:
12171
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12172
0
    Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12173
0
    if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12174
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12175
0
  case NEON::BI__builtin_neon_vmax_v:
12176
0
  case NEON::BI__builtin_neon_vmaxq_v:
12177
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12178
0
    Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12179
0
    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12180
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12181
0
  case NEON::BI__builtin_neon_vmaxh_f16: {
12182
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12183
0
    Int = Intrinsic::aarch64_neon_fmax;
12184
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12185
0
  }
12186
0
  case NEON::BI__builtin_neon_vmin_v:
12187
0
  case NEON::BI__builtin_neon_vminq_v:
12188
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12189
0
    Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12190
0
    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12191
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12192
0
  case NEON::BI__builtin_neon_vminh_f16: {
12193
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12194
0
    Int = Intrinsic::aarch64_neon_fmin;
12195
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12196
0
  }
12197
0
  case NEON::BI__builtin_neon_vabd_v:
12198
0
  case NEON::BI__builtin_neon_vabdq_v:
12199
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12200
0
    Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12201
0
    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12202
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12203
0
  case NEON::BI__builtin_neon_vpadal_v:
12204
0
  case NEON::BI__builtin_neon_vpadalq_v: {
12205
0
    unsigned ArgElts = VTy->getNumElements();
12206
0
    llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12207
0
    unsigned BitWidth = EltTy->getBitWidth();
12208
0
    auto *ArgTy = llvm::FixedVectorType::get(
12209
0
        llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12210
0
    llvm::Type* Tys[2] = { VTy, ArgTy };
12211
0
    Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12212
0
    SmallVector<llvm::Value*, 1> TmpOps;
12213
0
    TmpOps.push_back(Ops[1]);
12214
0
    Function *F = CGM.getIntrinsic(Int, Tys);
12215
0
    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12216
0
    llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12217
0
    return Builder.CreateAdd(tmp, addend);
12218
0
  }
12219
0
  case NEON::BI__builtin_neon_vpmin_v:
12220
0
  case NEON::BI__builtin_neon_vpminq_v:
12221
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12222
0
    Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12223
0
    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12224
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12225
0
  case NEON::BI__builtin_neon_vpmax_v:
12226
0
  case NEON::BI__builtin_neon_vpmaxq_v:
12227
    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12228
0
    Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12229
0
    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12230
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12231
0
  case NEON::BI__builtin_neon_vminnm_v:
12232
0
  case NEON::BI__builtin_neon_vminnmq_v:
12233
0
    Int = Intrinsic::aarch64_neon_fminnm;
12234
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12235
0
  case NEON::BI__builtin_neon_vminnmh_f16:
12236
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12237
0
    Int = Intrinsic::aarch64_neon_fminnm;
12238
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12239
0
  case NEON::BI__builtin_neon_vmaxnm_v:
12240
0
  case NEON::BI__builtin_neon_vmaxnmq_v:
12241
0
    Int = Intrinsic::aarch64_neon_fmaxnm;
12242
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12243
0
  case NEON::BI__builtin_neon_vmaxnmh_f16:
12244
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12245
0
    Int = Intrinsic::aarch64_neon_fmaxnm;
12246
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12247
0
  case NEON::BI__builtin_neon_vrecpss_f32: {
12248
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12249
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12250
0
                        Ops, "vrecps");
12251
0
  }
12252
0
  case NEON::BI__builtin_neon_vrecpsd_f64:
12253
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12254
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12255
0
                        Ops, "vrecps");
12256
0
  case NEON::BI__builtin_neon_vrecpsh_f16:
12257
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
12258
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12259
0
                        Ops, "vrecps");
12260
0
  case NEON::BI__builtin_neon_vqshrun_n_v:
12261
0
    Int = Intrinsic::aarch64_neon_sqshrun;
12262
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12263
0
  case NEON::BI__builtin_neon_vqrshrun_n_v:
12264
0
    Int = Intrinsic::aarch64_neon_sqrshrun;
12265
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12266
0
  case NEON::BI__builtin_neon_vqshrn_n_v:
12267
0
    Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12268
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12269
0
  case NEON::BI__builtin_neon_vrshrn_n_v:
12270
0
    Int = Intrinsic::aarch64_neon_rshrn;
12271
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12272
0
  case NEON::BI__builtin_neon_vqrshrn_n_v:
12273
0
    Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12274
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12275
0
  case NEON::BI__builtin_neon_vrndah_f16: {
12276
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12277
0
    Int = Builder.getIsFPConstrained()
12278
0
              ? Intrinsic::experimental_constrained_round
12279
0
              : Intrinsic::round;
12280
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12281
0
  }
12282
0
  case NEON::BI__builtin_neon_vrnda_v:
12283
0
  case NEON::BI__builtin_neon_vrndaq_v: {
12284
0
    Int = Builder.getIsFPConstrained()
12285
0
              ? Intrinsic::experimental_constrained_round
12286
0
              : Intrinsic::round;
12287
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12288
0
  }
12289
0
  case NEON::BI__builtin_neon_vrndih_f16: {
12290
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12291
0
    Int = Builder.getIsFPConstrained()
12292
0
              ? Intrinsic::experimental_constrained_nearbyint
12293
0
              : Intrinsic::nearbyint;
12294
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12295
0
  }
12296
0
  case NEON::BI__builtin_neon_vrndmh_f16: {
12297
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12298
0
    Int = Builder.getIsFPConstrained()
12299
0
              ? Intrinsic::experimental_constrained_floor
12300
0
              : Intrinsic::floor;
12301
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12302
0
  }
12303
0
  case NEON::BI__builtin_neon_vrndm_v:
12304
0
  case NEON::BI__builtin_neon_vrndmq_v: {
12305
0
    Int = Builder.getIsFPConstrained()
12306
0
              ? Intrinsic::experimental_constrained_floor
12307
0
              : Intrinsic::floor;
12308
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12309
0
  }
12310
0
  case NEON::BI__builtin_neon_vrndnh_f16: {
12311
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12312
0
    Int = Builder.getIsFPConstrained()
12313
0
              ? Intrinsic::experimental_constrained_roundeven
12314
0
              : Intrinsic::roundeven;
12315
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12316
0
  }
12317
0
  case NEON::BI__builtin_neon_vrndn_v:
12318
0
  case NEON::BI__builtin_neon_vrndnq_v: {
12319
0
    Int = Builder.getIsFPConstrained()
12320
0
              ? Intrinsic::experimental_constrained_roundeven
12321
0
              : Intrinsic::roundeven;
12322
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12323
0
  }
12324
0
  case NEON::BI__builtin_neon_vrndns_f32: {
12325
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12326
0
    Int = Builder.getIsFPConstrained()
12327
0
              ? Intrinsic::experimental_constrained_roundeven
12328
0
              : Intrinsic::roundeven;
12329
0
    return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12330
0
  }
12331
0
  case NEON::BI__builtin_neon_vrndph_f16: {
12332
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12333
0
    Int = Builder.getIsFPConstrained()
12334
0
              ? Intrinsic::experimental_constrained_ceil
12335
0
              : Intrinsic::ceil;
12336
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12337
0
  }
12338
0
  case NEON::BI__builtin_neon_vrndp_v:
12339
0
  case NEON::BI__builtin_neon_vrndpq_v: {
12340
0
    Int = Builder.getIsFPConstrained()
12341
0
              ? Intrinsic::experimental_constrained_ceil
12342
0
              : Intrinsic::ceil;
12343
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12344
0
  }
12345
0
  case NEON::BI__builtin_neon_vrndxh_f16: {
12346
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12347
0
    Int = Builder.getIsFPConstrained()
12348
0
              ? Intrinsic::experimental_constrained_rint
12349
0
              : Intrinsic::rint;
12350
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12351
0
  }
12352
0
  case NEON::BI__builtin_neon_vrndx_v:
12353
0
  case NEON::BI__builtin_neon_vrndxq_v: {
12354
0
    Int = Builder.getIsFPConstrained()
12355
0
              ? Intrinsic::experimental_constrained_rint
12356
0
              : Intrinsic::rint;
12357
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12358
0
  }
12359
0
  case NEON::BI__builtin_neon_vrndh_f16: {
12360
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12361
0
    Int = Builder.getIsFPConstrained()
12362
0
              ? Intrinsic::experimental_constrained_trunc
12363
0
              : Intrinsic::trunc;
12364
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12365
0
  }
12366
0
  case NEON::BI__builtin_neon_vrnd32x_f32:
12367
0
  case NEON::BI__builtin_neon_vrnd32xq_f32:
12368
0
  case NEON::BI__builtin_neon_vrnd32x_f64:
12369
0
  case NEON::BI__builtin_neon_vrnd32xq_f64: {
12370
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12371
0
    Int = Intrinsic::aarch64_neon_frint32x;
12372
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12373
0
  }
12374
0
  case NEON::BI__builtin_neon_vrnd32z_f32:
12375
0
  case NEON::BI__builtin_neon_vrnd32zq_f32:
12376
0
  case NEON::BI__builtin_neon_vrnd32z_f64:
12377
0
  case NEON::BI__builtin_neon_vrnd32zq_f64: {
12378
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12379
0
    Int = Intrinsic::aarch64_neon_frint32z;
12380
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12381
0
  }
12382
0
  case NEON::BI__builtin_neon_vrnd64x_f32:
12383
0
  case NEON::BI__builtin_neon_vrnd64xq_f32:
12384
0
  case NEON::BI__builtin_neon_vrnd64x_f64:
12385
0
  case NEON::BI__builtin_neon_vrnd64xq_f64: {
12386
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12387
0
    Int = Intrinsic::aarch64_neon_frint64x;
12388
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12389
0
  }
12390
0
  case NEON::BI__builtin_neon_vrnd64z_f32:
12391
0
  case NEON::BI__builtin_neon_vrnd64zq_f32:
12392
0
  case NEON::BI__builtin_neon_vrnd64z_f64:
12393
0
  case NEON::BI__builtin_neon_vrnd64zq_f64: {
12394
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12395
0
    Int = Intrinsic::aarch64_neon_frint64z;
12396
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12397
0
  }
12398
0
  case NEON::BI__builtin_neon_vrnd_v:
12399
0
  case NEON::BI__builtin_neon_vrndq_v: {
12400
0
    Int = Builder.getIsFPConstrained()
12401
0
              ? Intrinsic::experimental_constrained_trunc
12402
0
              : Intrinsic::trunc;
12403
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12404
0
  }
12405
0
  case NEON::BI__builtin_neon_vcvt_f64_v:
12406
0
  case NEON::BI__builtin_neon_vcvtq_f64_v:
12407
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12408
0
    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12409
0
    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12410
0
                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12411
0
  case NEON::BI__builtin_neon_vcvt_f64_f32: {
12412
0
    assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12413
0
           "unexpected vcvt_f64_f32 builtin");
12414
0
    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12415
0
    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12416
12417
0
    return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12418
0
  }
12419
0
  case NEON::BI__builtin_neon_vcvt_f32_f64: {
12420
0
    assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12421
0
           "unexpected vcvt_f32_f64 builtin");
12422
0
    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12423
0
    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12424
12425
0
    return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12426
0
  }
12427
0
  case NEON::BI__builtin_neon_vcvt_s32_v:
12428
0
  case NEON::BI__builtin_neon_vcvt_u32_v:
12429
0
  case NEON::BI__builtin_neon_vcvt_s64_v:
12430
0
  case NEON::BI__builtin_neon_vcvt_u64_v:
12431
0
  case NEON::BI__builtin_neon_vcvt_s16_f16:
12432
0
  case NEON::BI__builtin_neon_vcvt_u16_f16:
12433
0
  case NEON::BI__builtin_neon_vcvtq_s32_v:
12434
0
  case NEON::BI__builtin_neon_vcvtq_u32_v:
12435
0
  case NEON::BI__builtin_neon_vcvtq_s64_v:
12436
0
  case NEON::BI__builtin_neon_vcvtq_u64_v:
12437
0
  case NEON::BI__builtin_neon_vcvtq_s16_f16:
12438
0
  case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12439
0
    Int =
12440
0
        usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12441
0
    llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12442
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12443
0
  }
12444
0
  case NEON::BI__builtin_neon_vcvta_s16_f16:
12445
0
  case NEON::BI__builtin_neon_vcvta_u16_f16:
12446
0
  case NEON::BI__builtin_neon_vcvta_s32_v:
12447
0
  case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12448
0
  case NEON::BI__builtin_neon_vcvtaq_s32_v:
12449
0
  case NEON::BI__builtin_neon_vcvta_u32_v:
12450
0
  case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12451
0
  case NEON::BI__builtin_neon_vcvtaq_u32_v:
12452
0
  case NEON::BI__builtin_neon_vcvta_s64_v:
12453
0
  case NEON::BI__builtin_neon_vcvtaq_s64_v:
12454
0
  case NEON::BI__builtin_neon_vcvta_u64_v:
12455
0
  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12456
0
    Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12457
0
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12458
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12459
0
  }
12460
0
  case NEON::BI__builtin_neon_vcvtm_s16_f16:
12461
0
  case NEON::BI__builtin_neon_vcvtm_s32_v:
12462
0
  case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12463
0
  case NEON::BI__builtin_neon_vcvtmq_s32_v:
12464
0
  case NEON::BI__builtin_neon_vcvtm_u16_f16:
12465
0
  case NEON::BI__builtin_neon_vcvtm_u32_v:
12466
0
  case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12467
0
  case NEON::BI__builtin_neon_vcvtmq_u32_v:
12468
0
  case NEON::BI__builtin_neon_vcvtm_s64_v:
12469
0
  case NEON::BI__builtin_neon_vcvtmq_s64_v:
12470
0
  case NEON::BI__builtin_neon_vcvtm_u64_v:
12471
0
  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12472
0
    Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12473
0
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12474
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12475
0
  }
12476
0
  case NEON::BI__builtin_neon_vcvtn_s16_f16:
12477
0
  case NEON::BI__builtin_neon_vcvtn_s32_v:
12478
0
  case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12479
0
  case NEON::BI__builtin_neon_vcvtnq_s32_v:
12480
0
  case NEON::BI__builtin_neon_vcvtn_u16_f16:
12481
0
  case NEON::BI__builtin_neon_vcvtn_u32_v:
12482
0
  case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12483
0
  case NEON::BI__builtin_neon_vcvtnq_u32_v:
12484
0
  case NEON::BI__builtin_neon_vcvtn_s64_v:
12485
0
  case NEON::BI__builtin_neon_vcvtnq_s64_v:
12486
0
  case NEON::BI__builtin_neon_vcvtn_u64_v:
12487
0
  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12488
0
    Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12489
0
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12490
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12491
0
  }
12492
0
  case NEON::BI__builtin_neon_vcvtp_s16_f16:
12493
0
  case NEON::BI__builtin_neon_vcvtp_s32_v:
12494
0
  case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12495
0
  case NEON::BI__builtin_neon_vcvtpq_s32_v:
12496
0
  case NEON::BI__builtin_neon_vcvtp_u16_f16:
12497
0
  case NEON::BI__builtin_neon_vcvtp_u32_v:
12498
0
  case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12499
0
  case NEON::BI__builtin_neon_vcvtpq_u32_v:
12500
0
  case NEON::BI__builtin_neon_vcvtp_s64_v:
12501
0
  case NEON::BI__builtin_neon_vcvtpq_s64_v:
12502
0
  case NEON::BI__builtin_neon_vcvtp_u64_v:
12503
0
  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12504
0
    Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12505
0
    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12506
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12507
0
  }
12508
0
  case NEON::BI__builtin_neon_vmulx_v:
12509
0
  case NEON::BI__builtin_neon_vmulxq_v: {
12510
0
    Int = Intrinsic::aarch64_neon_fmulx;
12511
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12512
0
  }
12513
0
  case NEON::BI__builtin_neon_vmulxh_lane_f16:
12514
0
  case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12515
    // vmulx_lane should be mapped to Neon scalar mulx after
12516
    // extracting the scalar element
12517
0
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
12518
0
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12519
0
    Ops.pop_back();
12520
0
    Int = Intrinsic::aarch64_neon_fmulx;
12521
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12522
0
  }
12523
0
  case NEON::BI__builtin_neon_vmul_lane_v:
12524
0
  case NEON::BI__builtin_neon_vmul_laneq_v: {
12525
    // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12526
0
    bool Quad = false;
12527
0
    if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12528
0
      Quad = true;
12529
0
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12530
0
    llvm::FixedVectorType *VTy =
12531
0
        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
12532
0
    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12533
0
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12534
0
    Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12535
0
    return Builder.CreateBitCast(Result, Ty);
12536
0
  }
12537
0
  case NEON::BI__builtin_neon_vnegd_s64:
12538
0
    return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12539
0
  case NEON::BI__builtin_neon_vnegh_f16:
12540
0
    return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12541
0
  case NEON::BI__builtin_neon_vpmaxnm_v:
12542
0
  case NEON::BI__builtin_neon_vpmaxnmq_v: {
12543
0
    Int = Intrinsic::aarch64_neon_fmaxnmp;
12544
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12545
0
  }
12546
0
  case NEON::BI__builtin_neon_vpminnm_v:
12547
0
  case NEON::BI__builtin_neon_vpminnmq_v: {
12548
0
    Int = Intrinsic::aarch64_neon_fminnmp;
12549
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12550
0
  }
12551
0
  case NEON::BI__builtin_neon_vsqrth_f16: {
12552
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12553
0
    Int = Builder.getIsFPConstrained()
12554
0
              ? Intrinsic::experimental_constrained_sqrt
12555
0
              : Intrinsic::sqrt;
12556
0
    return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12557
0
  }
12558
0
  case NEON::BI__builtin_neon_vsqrt_v:
12559
0
  case NEON::BI__builtin_neon_vsqrtq_v: {
12560
0
    Int = Builder.getIsFPConstrained()
12561
0
              ? Intrinsic::experimental_constrained_sqrt
12562
0
              : Intrinsic::sqrt;
12563
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12564
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12565
0
  }
12566
0
  case NEON::BI__builtin_neon_vrbit_v:
12567
0
  case NEON::BI__builtin_neon_vrbitq_v: {
12568
0
    Int = Intrinsic::bitreverse;
12569
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12570
0
  }
12571
0
  case NEON::BI__builtin_neon_vaddv_u8:
12572
    // FIXME: These are handled by the AArch64 scalar code.
12573
0
    usgn = true;
12574
0
    [[fallthrough]];
12575
0
  case NEON::BI__builtin_neon_vaddv_s8: {
12576
0
    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12577
0
    Ty = Int32Ty;
12578
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12579
0
    llvm::Type *Tys[2] = { Ty, VTy };
12580
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12581
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12582
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12583
0
  }
12584
0
  case NEON::BI__builtin_neon_vaddv_u16:
12585
0
    usgn = true;
12586
0
    [[fallthrough]];
12587
0
  case NEON::BI__builtin_neon_vaddv_s16: {
12588
0
    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12589
0
    Ty = Int32Ty;
12590
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12591
0
    llvm::Type *Tys[2] = { Ty, VTy };
12592
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12593
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12594
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12595
0
  }
12596
0
  case NEON::BI__builtin_neon_vaddvq_u8:
12597
0
    usgn = true;
12598
0
    [[fallthrough]];
12599
0
  case NEON::BI__builtin_neon_vaddvq_s8: {
12600
0
    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12601
0
    Ty = Int32Ty;
12602
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12603
0
    llvm::Type *Tys[2] = { Ty, VTy };
12604
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12605
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12606
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12607
0
  }
12608
0
  case NEON::BI__builtin_neon_vaddvq_u16:
12609
0
    usgn = true;
12610
0
    [[fallthrough]];
12611
0
  case NEON::BI__builtin_neon_vaddvq_s16: {
12612
0
    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12613
0
    Ty = Int32Ty;
12614
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12615
0
    llvm::Type *Tys[2] = { Ty, VTy };
12616
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12617
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12618
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12619
0
  }
12620
0
  case NEON::BI__builtin_neon_vmaxv_u8: {
12621
0
    Int = Intrinsic::aarch64_neon_umaxv;
12622
0
    Ty = Int32Ty;
12623
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12624
0
    llvm::Type *Tys[2] = { Ty, VTy };
12625
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12626
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12627
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12628
0
  }
12629
0
  case NEON::BI__builtin_neon_vmaxv_u16: {
12630
0
    Int = Intrinsic::aarch64_neon_umaxv;
12631
0
    Ty = Int32Ty;
12632
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12633
0
    llvm::Type *Tys[2] = { Ty, VTy };
12634
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12635
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12636
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12637
0
  }
12638
0
  case NEON::BI__builtin_neon_vmaxvq_u8: {
12639
0
    Int = Intrinsic::aarch64_neon_umaxv;
12640
0
    Ty = Int32Ty;
12641
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12642
0
    llvm::Type *Tys[2] = { Ty, VTy };
12643
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12644
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12645
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12646
0
  }
12647
0
  case NEON::BI__builtin_neon_vmaxvq_u16: {
12648
0
    Int = Intrinsic::aarch64_neon_umaxv;
12649
0
    Ty = Int32Ty;
12650
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12651
0
    llvm::Type *Tys[2] = { Ty, VTy };
12652
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12653
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12654
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12655
0
  }
12656
0
  case NEON::BI__builtin_neon_vmaxv_s8: {
12657
0
    Int = Intrinsic::aarch64_neon_smaxv;
12658
0
    Ty = Int32Ty;
12659
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12660
0
    llvm::Type *Tys[2] = { Ty, VTy };
12661
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12662
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12663
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12664
0
  }
12665
0
  case NEON::BI__builtin_neon_vmaxv_s16: {
12666
0
    Int = Intrinsic::aarch64_neon_smaxv;
12667
0
    Ty = Int32Ty;
12668
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12669
0
    llvm::Type *Tys[2] = { Ty, VTy };
12670
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12671
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12672
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12673
0
  }
12674
0
  case NEON::BI__builtin_neon_vmaxvq_s8: {
12675
0
    Int = Intrinsic::aarch64_neon_smaxv;
12676
0
    Ty = Int32Ty;
12677
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12678
0
    llvm::Type *Tys[2] = { Ty, VTy };
12679
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12680
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12681
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12682
0
  }
12683
0
  case NEON::BI__builtin_neon_vmaxvq_s16: {
12684
0
    Int = Intrinsic::aarch64_neon_smaxv;
12685
0
    Ty = Int32Ty;
12686
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12687
0
    llvm::Type *Tys[2] = { Ty, VTy };
12688
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12689
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12690
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12691
0
  }
12692
0
  case NEON::BI__builtin_neon_vmaxv_f16: {
12693
0
    Int = Intrinsic::aarch64_neon_fmaxv;
12694
0
    Ty = HalfTy;
12695
0
    VTy = llvm::FixedVectorType::get(HalfTy, 4);
12696
0
    llvm::Type *Tys[2] = { Ty, VTy };
12697
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12698
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12699
0
    return Builder.CreateTrunc(Ops[0], HalfTy);
12700
0
  }
12701
0
  case NEON::BI__builtin_neon_vmaxvq_f16: {
12702
0
    Int = Intrinsic::aarch64_neon_fmaxv;
12703
0
    Ty = HalfTy;
12704
0
    VTy = llvm::FixedVectorType::get(HalfTy, 8);
12705
0
    llvm::Type *Tys[2] = { Ty, VTy };
12706
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12707
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12708
0
    return Builder.CreateTrunc(Ops[0], HalfTy);
12709
0
  }
12710
0
  case NEON::BI__builtin_neon_vminv_u8: {
12711
0
    Int = Intrinsic::aarch64_neon_uminv;
12712
0
    Ty = Int32Ty;
12713
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12714
0
    llvm::Type *Tys[2] = { Ty, VTy };
12715
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12716
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12717
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12718
0
  }
12719
0
  case NEON::BI__builtin_neon_vminv_u16: {
12720
0
    Int = Intrinsic::aarch64_neon_uminv;
12721
0
    Ty = Int32Ty;
12722
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12723
0
    llvm::Type *Tys[2] = { Ty, VTy };
12724
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12725
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12726
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12727
0
  }
12728
0
  case NEON::BI__builtin_neon_vminvq_u8: {
12729
0
    Int = Intrinsic::aarch64_neon_uminv;
12730
0
    Ty = Int32Ty;
12731
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12732
0
    llvm::Type *Tys[2] = { Ty, VTy };
12733
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12734
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12735
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12736
0
  }
12737
0
  case NEON::BI__builtin_neon_vminvq_u16: {
12738
0
    Int = Intrinsic::aarch64_neon_uminv;
12739
0
    Ty = Int32Ty;
12740
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12741
0
    llvm::Type *Tys[2] = { Ty, VTy };
12742
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12743
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12744
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12745
0
  }
12746
0
  case NEON::BI__builtin_neon_vminv_s8: {
12747
0
    Int = Intrinsic::aarch64_neon_sminv;
12748
0
    Ty = Int32Ty;
12749
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12750
0
    llvm::Type *Tys[2] = { Ty, VTy };
12751
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12752
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12753
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12754
0
  }
12755
0
  case NEON::BI__builtin_neon_vminv_s16: {
12756
0
    Int = Intrinsic::aarch64_neon_sminv;
12757
0
    Ty = Int32Ty;
12758
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12759
0
    llvm::Type *Tys[2] = { Ty, VTy };
12760
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12761
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12762
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12763
0
  }
12764
0
  case NEON::BI__builtin_neon_vminvq_s8: {
12765
0
    Int = Intrinsic::aarch64_neon_sminv;
12766
0
    Ty = Int32Ty;
12767
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12768
0
    llvm::Type *Tys[2] = { Ty, VTy };
12769
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12770
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12771
0
    return Builder.CreateTrunc(Ops[0], Int8Ty);
12772
0
  }
12773
0
  case NEON::BI__builtin_neon_vminvq_s16: {
12774
0
    Int = Intrinsic::aarch64_neon_sminv;
12775
0
    Ty = Int32Ty;
12776
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12777
0
    llvm::Type *Tys[2] = { Ty, VTy };
12778
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12779
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12780
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12781
0
  }
12782
0
  case NEON::BI__builtin_neon_vminv_f16: {
12783
0
    Int = Intrinsic::aarch64_neon_fminv;
12784
0
    Ty = HalfTy;
12785
0
    VTy = llvm::FixedVectorType::get(HalfTy, 4);
12786
0
    llvm::Type *Tys[2] = { Ty, VTy };
12787
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12788
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12789
0
    return Builder.CreateTrunc(Ops[0], HalfTy);
12790
0
  }
12791
0
  case NEON::BI__builtin_neon_vminvq_f16: {
12792
0
    Int = Intrinsic::aarch64_neon_fminv;
12793
0
    Ty = HalfTy;
12794
0
    VTy = llvm::FixedVectorType::get(HalfTy, 8);
12795
0
    llvm::Type *Tys[2] = { Ty, VTy };
12796
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12797
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12798
0
    return Builder.CreateTrunc(Ops[0], HalfTy);
12799
0
  }
12800
0
  case NEON::BI__builtin_neon_vmaxnmv_f16: {
12801
0
    Int = Intrinsic::aarch64_neon_fmaxnmv;
12802
0
    Ty = HalfTy;
12803
0
    VTy = llvm::FixedVectorType::get(HalfTy, 4);
12804
0
    llvm::Type *Tys[2] = { Ty, VTy };
12805
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12806
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12807
0
    return Builder.CreateTrunc(Ops[0], HalfTy);
12808
0
  }
12809
0
  case NEON::BI__builtin_neon_vmaxnmvq_f16: {
12810
0
    Int = Intrinsic::aarch64_neon_fmaxnmv;
12811
0
    Ty = HalfTy;
12812
0
    VTy = llvm::FixedVectorType::get(HalfTy, 8);
12813
0
    llvm::Type *Tys[2] = { Ty, VTy };
12814
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12815
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12816
0
    return Builder.CreateTrunc(Ops[0], HalfTy);
12817
0
  }
12818
0
  case NEON::BI__builtin_neon_vminnmv_f16: {
12819
0
    Int = Intrinsic::aarch64_neon_fminnmv;
12820
0
    Ty = HalfTy;
12821
0
    VTy = llvm::FixedVectorType::get(HalfTy, 4);
12822
0
    llvm::Type *Tys[2] = { Ty, VTy };
12823
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12824
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12825
0
    return Builder.CreateTrunc(Ops[0], HalfTy);
12826
0
  }
12827
0
  case NEON::BI__builtin_neon_vminnmvq_f16: {
12828
0
    Int = Intrinsic::aarch64_neon_fminnmv;
12829
0
    Ty = HalfTy;
12830
0
    VTy = llvm::FixedVectorType::get(HalfTy, 8);
12831
0
    llvm::Type *Tys[2] = { Ty, VTy };
12832
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12833
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12834
0
    return Builder.CreateTrunc(Ops[0], HalfTy);
12835
0
  }
12836
0
  case NEON::BI__builtin_neon_vmul_n_f64: {
12837
0
    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12838
0
    Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
12839
0
    return Builder.CreateFMul(Ops[0], RHS);
12840
0
  }
12841
0
  case NEON::BI__builtin_neon_vaddlv_u8: {
12842
0
    Int = Intrinsic::aarch64_neon_uaddlv;
12843
0
    Ty = Int32Ty;
12844
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12845
0
    llvm::Type *Tys[2] = { Ty, VTy };
12846
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12847
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12848
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12849
0
  }
12850
0
  case NEON::BI__builtin_neon_vaddlv_u16: {
12851
0
    Int = Intrinsic::aarch64_neon_uaddlv;
12852
0
    Ty = Int32Ty;
12853
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12854
0
    llvm::Type *Tys[2] = { Ty, VTy };
12855
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12856
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12857
0
  }
12858
0
  case NEON::BI__builtin_neon_vaddlvq_u8: {
12859
0
    Int = Intrinsic::aarch64_neon_uaddlv;
12860
0
    Ty = Int32Ty;
12861
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12862
0
    llvm::Type *Tys[2] = { Ty, VTy };
12863
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12864
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12865
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12866
0
  }
12867
0
  case NEON::BI__builtin_neon_vaddlvq_u16: {
12868
0
    Int = Intrinsic::aarch64_neon_uaddlv;
12869
0
    Ty = Int32Ty;
12870
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12871
0
    llvm::Type *Tys[2] = { Ty, VTy };
12872
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12873
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12874
0
  }
12875
0
  case NEON::BI__builtin_neon_vaddlv_s8: {
12876
0
    Int = Intrinsic::aarch64_neon_saddlv;
12877
0
    Ty = Int32Ty;
12878
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12879
0
    llvm::Type *Tys[2] = { Ty, VTy };
12880
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12881
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12882
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12883
0
  }
12884
0
  case NEON::BI__builtin_neon_vaddlv_s16: {
12885
0
    Int = Intrinsic::aarch64_neon_saddlv;
12886
0
    Ty = Int32Ty;
12887
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12888
0
    llvm::Type *Tys[2] = { Ty, VTy };
12889
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12890
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12891
0
  }
12892
0
  case NEON::BI__builtin_neon_vaddlvq_s8: {
12893
0
    Int = Intrinsic::aarch64_neon_saddlv;
12894
0
    Ty = Int32Ty;
12895
0
    VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12896
0
    llvm::Type *Tys[2] = { Ty, VTy };
12897
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12898
0
    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12899
0
    return Builder.CreateTrunc(Ops[0], Int16Ty);
12900
0
  }
12901
0
  case NEON::BI__builtin_neon_vaddlvq_s16: {
12902
0
    Int = Intrinsic::aarch64_neon_saddlv;
12903
0
    Ty = Int32Ty;
12904
0
    VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12905
0
    llvm::Type *Tys[2] = { Ty, VTy };
12906
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
12907
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12908
0
  }
12909
0
  case NEON::BI__builtin_neon_vsri_n_v:
12910
0
  case NEON::BI__builtin_neon_vsriq_n_v: {
12911
0
    Int = Intrinsic::aarch64_neon_vsri;
12912
0
    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
12913
0
    return EmitNeonCall(Intrin, Ops, "vsri_n");
12914
0
  }
12915
0
  case NEON::BI__builtin_neon_vsli_n_v:
12916
0
  case NEON::BI__builtin_neon_vsliq_n_v: {
12917
0
    Int = Intrinsic::aarch64_neon_vsli;
12918
0
    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
12919
0
    return EmitNeonCall(Intrin, Ops, "vsli_n");
12920
0
  }
12921
0
  case NEON::BI__builtin_neon_vsra_n_v:
12922
0
  case NEON::BI__builtin_neon_vsraq_n_v:
12923
0
    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12924
0
    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
12925
0
    return Builder.CreateAdd(Ops[0], Ops[1]);
12926
0
  case NEON::BI__builtin_neon_vrsra_n_v:
12927
0
  case NEON::BI__builtin_neon_vrsraq_n_v: {
12928
0
    Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
12929
0
    SmallVector<llvm::Value*,2> TmpOps;
12930
0
    TmpOps.push_back(Ops[1]);
12931
0
    TmpOps.push_back(Ops[2]);
12932
0
    Function* F = CGM.getIntrinsic(Int, Ty);
12933
0
    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
12934
0
    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
12935
0
    return Builder.CreateAdd(Ops[0], tmp);
12936
0
  }
12937
0
  case NEON::BI__builtin_neon_vld1_v:
12938
0
  case NEON::BI__builtin_neon_vld1q_v: {
12939
0
    return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
12940
0
  }
12941
0
  case NEON::BI__builtin_neon_vst1_v:
12942
0
  case NEON::BI__builtin_neon_vst1q_v:
12943
0
    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12944
0
    return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
12945
0
  case NEON::BI__builtin_neon_vld1_lane_v:
12946
0
  case NEON::BI__builtin_neon_vld1q_lane_v: {
12947
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12948
0
    Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
12949
0
                                       PtrOp0.getAlignment());
12950
0
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
12951
0
  }
12952
0
  case NEON::BI__builtin_neon_vldap1_lane_s64:
12953
0
  case NEON::BI__builtin_neon_vldap1q_lane_s64: {
12954
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12955
0
    llvm::LoadInst *LI = Builder.CreateAlignedLoad(
12956
0
        VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
12957
0
    LI->setAtomic(llvm::AtomicOrdering::Acquire);
12958
0
    Ops[0] = LI;
12959
0
    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
12960
0
  }
12961
0
  case NEON::BI__builtin_neon_vld1_dup_v:
12962
0
  case NEON::BI__builtin_neon_vld1q_dup_v: {
12963
0
    Value *V = PoisonValue::get(Ty);
12964
0
    Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
12965
0
                                       PtrOp0.getAlignment());
12966
0
    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
12967
0
    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
12968
0
    return EmitNeonSplat(Ops[0], CI);
12969
0
  }
12970
0
  case NEON::BI__builtin_neon_vst1_lane_v:
12971
0
  case NEON::BI__builtin_neon_vst1q_lane_v:
12972
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12973
0
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
12974
0
    return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
12975
0
  case NEON::BI__builtin_neon_vstl1_lane_s64:
12976
0
  case NEON::BI__builtin_neon_vstl1q_lane_s64: {
12977
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12978
0
    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
12979
0
    llvm::StoreInst *SI =
12980
0
        Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
12981
0
    SI->setAtomic(llvm::AtomicOrdering::Release);
12982
0
    return SI;
12983
0
  }
12984
0
  case NEON::BI__builtin_neon_vld2_v:
12985
0
  case NEON::BI__builtin_neon_vld2q_v: {
12986
0
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
12987
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
12988
0
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
12989
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12990
0
  }
12991
0
  case NEON::BI__builtin_neon_vld3_v:
12992
0
  case NEON::BI__builtin_neon_vld3q_v: {
12993
0
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
12994
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
12995
0
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
12996
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
12997
0
  }
12998
0
  case NEON::BI__builtin_neon_vld4_v:
12999
0
  case NEON::BI__builtin_neon_vld4q_v: {
13000
0
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13001
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13002
0
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13003
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13004
0
  }
13005
0
  case NEON::BI__builtin_neon_vld2_dup_v:
13006
0
  case NEON::BI__builtin_neon_vld2q_dup_v: {
13007
0
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13008
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13009
0
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13010
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13011
0
  }
13012
0
  case NEON::BI__builtin_neon_vld3_dup_v:
13013
0
  case NEON::BI__builtin_neon_vld3q_dup_v: {
13014
0
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13015
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13016
0
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13017
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13018
0
  }
13019
0
  case NEON::BI__builtin_neon_vld4_dup_v:
13020
0
  case NEON::BI__builtin_neon_vld4q_dup_v: {
13021
0
    llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13022
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13023
0
    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13024
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13025
0
  }
13026
0
  case NEON::BI__builtin_neon_vld2_lane_v:
13027
0
  case NEON::BI__builtin_neon_vld2q_lane_v: {
13028
0
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13029
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13030
0
    std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13031
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13032
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13033
0
    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13034
0
    Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13035
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13036
0
  }
13037
0
  case NEON::BI__builtin_neon_vld3_lane_v:
13038
0
  case NEON::BI__builtin_neon_vld3q_lane_v: {
13039
0
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13040
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13041
0
    std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13042
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13043
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13044
0
    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13045
0
    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13046
0
    Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13047
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13048
0
  }
13049
0
  case NEON::BI__builtin_neon_vld4_lane_v:
13050
0
  case NEON::BI__builtin_neon_vld4q_lane_v: {
13051
0
    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13052
0
    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13053
0
    std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13054
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13055
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13056
0
    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13057
0
    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13058
0
    Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13059
0
    Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13060
0
    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13061
0
  }
13062
0
  case NEON::BI__builtin_neon_vst2_v:
13063
0
  case NEON::BI__builtin_neon_vst2q_v: {
13064
0
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13065
0
    llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13066
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13067
0
                        Ops, "");
13068
0
  }
13069
0
  case NEON::BI__builtin_neon_vst2_lane_v:
13070
0
  case NEON::BI__builtin_neon_vst2q_lane_v: {
13071
0
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13072
0
    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13073
0
    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13074
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13075
0
                        Ops, "");
13076
0
  }
13077
0
  case NEON::BI__builtin_neon_vst3_v:
13078
0
  case NEON::BI__builtin_neon_vst3q_v: {
13079
0
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13080
0
    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13081
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13082
0
                        Ops, "");
13083
0
  }
13084
0
  case NEON::BI__builtin_neon_vst3_lane_v:
13085
0
  case NEON::BI__builtin_neon_vst3q_lane_v: {
13086
0
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13087
0
    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13088
0
    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13089
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13090
0
                        Ops, "");
13091
0
  }
13092
0
  case NEON::BI__builtin_neon_vst4_v:
13093
0
  case NEON::BI__builtin_neon_vst4q_v: {
13094
0
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13095
0
    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13096
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13097
0
                        Ops, "");
13098
0
  }
13099
0
  case NEON::BI__builtin_neon_vst4_lane_v:
13100
0
  case NEON::BI__builtin_neon_vst4q_lane_v: {
13101
0
    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13102
0
    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13103
0
    llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13104
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13105
0
                        Ops, "");
13106
0
  }
13107
0
  case NEON::BI__builtin_neon_vtrn_v:
13108
0
  case NEON::BI__builtin_neon_vtrnq_v: {
13109
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13110
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13111
0
    Value *SV = nullptr;
13112
13113
0
    for (unsigned vi = 0; vi != 2; ++vi) {
13114
0
      SmallVector<int, 16> Indices;
13115
0
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13116
0
        Indices.push_back(i+vi);
13117
0
        Indices.push_back(i+e+vi);
13118
0
      }
13119
0
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13120
0
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13121
0
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13122
0
    }
13123
0
    return SV;
13124
0
  }
13125
0
  case NEON::BI__builtin_neon_vuzp_v:
13126
0
  case NEON::BI__builtin_neon_vuzpq_v: {
13127
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13128
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13129
0
    Value *SV = nullptr;
13130
13131
0
    for (unsigned vi = 0; vi != 2; ++vi) {
13132
0
      SmallVector<int, 16> Indices;
13133
0
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13134
0
        Indices.push_back(2*i+vi);
13135
13136
0
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13137
0
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13138
0
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13139
0
    }
13140
0
    return SV;
13141
0
  }
13142
0
  case NEON::BI__builtin_neon_vzip_v:
13143
0
  case NEON::BI__builtin_neon_vzipq_v: {
13144
0
    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13145
0
    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13146
0
    Value *SV = nullptr;
13147
13148
0
    for (unsigned vi = 0; vi != 2; ++vi) {
13149
0
      SmallVector<int, 16> Indices;
13150
0
      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13151
0
        Indices.push_back((i + vi*e) >> 1);
13152
0
        Indices.push_back(((i + vi*e) >> 1)+e);
13153
0
      }
13154
0
      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13155
0
      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13156
0
      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13157
0
    }
13158
0
    return SV;
13159
0
  }
13160
0
  case NEON::BI__builtin_neon_vqtbl1q_v: {
13161
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13162
0
                        Ops, "vtbl1");
13163
0
  }
13164
0
  case NEON::BI__builtin_neon_vqtbl2q_v: {
13165
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13166
0
                        Ops, "vtbl2");
13167
0
  }
13168
0
  case NEON::BI__builtin_neon_vqtbl3q_v: {
13169
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13170
0
                        Ops, "vtbl3");
13171
0
  }
13172
0
  case NEON::BI__builtin_neon_vqtbl4q_v: {
13173
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13174
0
                        Ops, "vtbl4");
13175
0
  }
13176
0
  case NEON::BI__builtin_neon_vqtbx1q_v: {
13177
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13178
0
                        Ops, "vtbx1");
13179
0
  }
13180
0
  case NEON::BI__builtin_neon_vqtbx2q_v: {
13181
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13182
0
                        Ops, "vtbx2");
13183
0
  }
13184
0
  case NEON::BI__builtin_neon_vqtbx3q_v: {
13185
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13186
0
                        Ops, "vtbx3");
13187
0
  }
13188
0
  case NEON::BI__builtin_neon_vqtbx4q_v: {
13189
0
    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13190
0
                        Ops, "vtbx4");
13191
0
  }
13192
0
  case NEON::BI__builtin_neon_vsqadd_v:
13193
0
  case NEON::BI__builtin_neon_vsqaddq_v: {
13194
0
    Int = Intrinsic::aarch64_neon_usqadd;
13195
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13196
0
  }
13197
0
  case NEON::BI__builtin_neon_vuqadd_v:
13198
0
  case NEON::BI__builtin_neon_vuqaddq_v: {
13199
0
    Int = Intrinsic::aarch64_neon_suqadd;
13200
0
    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13201
0
  }
13202
0
  }
13203
0
}
13204
13205
Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13206
0
                                           const CallExpr *E) {
13207
0
  assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13208
0
          BuiltinID == BPF::BI__builtin_btf_type_id ||
13209
0
          BuiltinID == BPF::BI__builtin_preserve_type_info ||
13210
0
          BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13211
0
         "unexpected BPF builtin");
13212
13213
  // A sequence number, injected into IR builtin functions, to
13214
  // prevent CSE given the only difference of the function
13215
  // may just be the debuginfo metadata.
13216
0
  static uint32_t BuiltinSeqNum;
13217
13218
0
  switch (BuiltinID) {
13219
0
  default:
13220
0
    llvm_unreachable("Unexpected BPF builtin");
13221
0
  case BPF::BI__builtin_preserve_field_info: {
13222
0
    const Expr *Arg = E->getArg(0);
13223
0
    bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13224
13225
0
    if (!getDebugInfo()) {
13226
0
      CGM.Error(E->getExprLoc(),
13227
0
                "using __builtin_preserve_field_info() without -g");
13228
0
      return IsBitField ? EmitLValue(Arg).getBitFieldPointer()
13229
0
                        : EmitLValue(Arg).getPointer(*this);
13230
0
    }
13231
13232
    // Enable underlying preserve_*_access_index() generation.
13233
0
    bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13234
0
    IsInPreservedAIRegion = true;
13235
0
    Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer()
13236
0
                                  : EmitLValue(Arg).getPointer(*this);
13237
0
    IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13238
13239
0
    ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13240
0
    Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13241
13242
    // Built the IR for the preserve_field_info intrinsic.
13243
0
    llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13244
0
        &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13245
0
        {FieldAddr->getType()});
13246
0
    return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13247
0
  }
13248
0
  case BPF::BI__builtin_btf_type_id:
13249
0
  case BPF::BI__builtin_preserve_type_info: {
13250
0
    if (!getDebugInfo()) {
13251
0
      CGM.Error(E->getExprLoc(), "using builtin function without -g");
13252
0
      return nullptr;
13253
0
    }
13254
13255
0
    const Expr *Arg0 = E->getArg(0);
13256
0
    llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13257
0
        Arg0->getType(), Arg0->getExprLoc());
13258
13259
0
    ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13260
0
    Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13261
0
    Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13262
13263
0
    llvm::Function *FnDecl;
13264
0
    if (BuiltinID == BPF::BI__builtin_btf_type_id)
13265
0
      FnDecl = llvm::Intrinsic::getDeclaration(
13266
0
          &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13267
0
    else
13268
0
      FnDecl = llvm::Intrinsic::getDeclaration(
13269
0
          &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13270
0
    CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13271
0
    Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13272
0
    return Fn;
13273
0
  }
13274
0
  case BPF::BI__builtin_preserve_enum_value: {
13275
0
    if (!getDebugInfo()) {
13276
0
      CGM.Error(E->getExprLoc(), "using builtin function without -g");
13277
0
      return nullptr;
13278
0
    }
13279
13280
0
    const Expr *Arg0 = E->getArg(0);
13281
0
    llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13282
0
        Arg0->getType(), Arg0->getExprLoc());
13283
13284
    // Find enumerator
13285
0
    const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13286
0
    const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13287
0
    const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13288
0
    const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13289
13290
0
    auto InitVal = Enumerator->getInitVal();
13291
0
    std::string InitValStr;
13292
0
    if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13293
0
      InitValStr = std::to_string(InitVal.getSExtValue());
13294
0
    else
13295
0
      InitValStr = std::to_string(InitVal.getZExtValue());
13296
0
    std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13297
0
    Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13298
13299
0
    ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13300
0
    Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13301
0
    Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13302
13303
0
    llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13304
0
        &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13305
0
    CallInst *Fn =
13306
0
        Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13307
0
    Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13308
0
    return Fn;
13309
0
  }
13310
0
  }
13311
0
}
13312
13313
llvm::Value *CodeGenFunction::
13314
0
BuildVector(ArrayRef<llvm::Value*> Ops) {
13315
0
  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13316
0
         "Not a power-of-two sized vector!");
13317
0
  bool AllConstants = true;
13318
0
  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13319
0
    AllConstants &= isa<Constant>(Ops[i]);
13320
13321
  // If this is a constant vector, create a ConstantVector.
13322
0
  if (AllConstants) {
13323
0
    SmallVector<llvm::Constant*, 16> CstOps;
13324
0
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13325
0
      CstOps.push_back(cast<Constant>(Ops[i]));
13326
0
    return llvm::ConstantVector::get(CstOps);
13327
0
  }
13328
13329
  // Otherwise, insertelement the values to build the vector.
13330
0
  Value *Result = llvm::PoisonValue::get(
13331
0
      llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13332
13333
0
  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13334
0
    Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13335
13336
0
  return Result;
13337
0
}
13338
13339
// Convert the mask from an integer type to a vector of i1.
13340
static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
13341
0
                              unsigned NumElts) {
13342
13343
0
  auto *MaskTy = llvm::FixedVectorType::get(
13344
0
      CGF.Builder.getInt1Ty(),
13345
0
      cast<IntegerType>(Mask->getType())->getBitWidth());
13346
0
  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13347
13348
  // If we have less than 8 elements, then the starting mask was an i8 and
13349
  // we need to extract down to the right number of elements.
13350
0
  if (NumElts < 8) {
13351
0
    int Indices[4];
13352
0
    for (unsigned i = 0; i != NumElts; ++i)
13353
0
      Indices[i] = i;
13354
0
    MaskVec = CGF.Builder.CreateShuffleVector(
13355
0
        MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13356
0
  }
13357
0
  return MaskVec;
13358
0
}
13359
13360
static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13361
0
                                 Align Alignment) {
13362
0
  Value *Ptr = Ops[0];
13363
13364
0
  Value *MaskVec = getMaskVecValue(
13365
0
      CGF, Ops[2],
13366
0
      cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13367
13368
0
  return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13369
0
}
13370
13371
static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13372
0
                                Align Alignment) {
13373
0
  llvm::Type *Ty = Ops[1]->getType();
13374
0
  Value *Ptr = Ops[0];
13375
13376
0
  Value *MaskVec = getMaskVecValue(
13377
0
      CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13378
13379
0
  return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13380
0
}
13381
13382
static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
13383
0
                                ArrayRef<Value *> Ops) {
13384
0
  auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13385
0
  Value *Ptr = Ops[0];
13386
13387
0
  Value *MaskVec = getMaskVecValue(
13388
0
      CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13389
13390
0
  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13391
0
                                           ResultTy);
13392
0
  return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13393
0
}
13394
13395
static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
13396
                                    ArrayRef<Value *> Ops,
13397
0
                                    bool IsCompress) {
13398
0
  auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13399
13400
0
  Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13401
13402
0
  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13403
0
                                 : Intrinsic::x86_avx512_mask_expand;
13404
0
  llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13405
0
  return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13406
0
}
13407
13408
static Value *EmitX86CompressStore(CodeGenFunction &CGF,
13409
0
                                   ArrayRef<Value *> Ops) {
13410
0
  auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13411
0
  Value *Ptr = Ops[0];
13412
13413
0
  Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13414
13415
0
  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13416
0
                                           ResultTy);
13417
0
  return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13418
0
}
13419
13420
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13421
                              ArrayRef<Value *> Ops,
13422
0
                              bool InvertLHS = false) {
13423
0
  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13424
0
  Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13425
0
  Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13426
13427
0
  if (InvertLHS)
13428
0
    LHS = CGF.Builder.CreateNot(LHS);
13429
13430
0
  return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13431
0
                                   Ops[0]->getType());
13432
0
}
13433
13434
static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
13435
0
                                 Value *Amt, bool IsRight) {
13436
0
  llvm::Type *Ty = Op0->getType();
13437
13438
  // Amount may be scalar immediate, in which case create a splat vector.
13439
  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13440
  // we only care about the lowest log2 bits anyway.
13441
0
  if (Amt->getType() != Ty) {
13442
0
    unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13443
0
    Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13444
0
    Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13445
0
  }
13446
13447
0
  unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13448
0
  Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13449
0
  return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13450
0
}
13451
13452
static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13453
0
                           bool IsSigned) {
13454
0
  Value *Op0 = Ops[0];
13455
0
  Value *Op1 = Ops[1];
13456
0
  llvm::Type *Ty = Op0->getType();
13457
0
  uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13458
13459
0
  CmpInst::Predicate Pred;
13460
0
  switch (Imm) {
13461
0
  case 0x0:
13462
0
    Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13463
0
    break;
13464
0
  case 0x1:
13465
0
    Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13466
0
    break;
13467
0
  case 0x2:
13468
0
    Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13469
0
    break;
13470
0
  case 0x3:
13471
0
    Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13472
0
    break;
13473
0
  case 0x4:
13474
0
    Pred = ICmpInst::ICMP_EQ;
13475
0
    break;
13476
0
  case 0x5:
13477
0
    Pred = ICmpInst::ICMP_NE;
13478
0
    break;
13479
0
  case 0x6:
13480
0
    return llvm::Constant::getNullValue(Ty); // FALSE
13481
0
  case 0x7:
13482
0
    return llvm::Constant::getAllOnesValue(Ty); // TRUE
13483
0
  default:
13484
0
    llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13485
0
  }
13486
13487
0
  Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13488
0
  Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13489
0
  return Res;
13490
0
}
13491
13492
static Value *EmitX86Select(CodeGenFunction &CGF,
13493
0
                            Value *Mask, Value *Op0, Value *Op1) {
13494
13495
  // If the mask is all ones just return first argument.
13496
0
  if (const auto *C = dyn_cast<Constant>(Mask))
13497
0
    if (C->isAllOnesValue())
13498
0
      return Op0;
13499
13500
0
  Mask = getMaskVecValue(
13501
0
      CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13502
13503
0
  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13504
0
}
13505
13506
static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
13507
0
                                  Value *Mask, Value *Op0, Value *Op1) {
13508
  // If the mask is all ones just return first argument.
13509
0
  if (const auto *C = dyn_cast<Constant>(Mask))
13510
0
    if (C->isAllOnesValue())
13511
0
      return Op0;
13512
13513
0
  auto *MaskTy = llvm::FixedVectorType::get(
13514
0
      CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13515
0
  Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13516
0
  Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13517
0
  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13518
0
}
13519
13520
static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
13521
0
                                         unsigned NumElts, Value *MaskIn) {
13522
0
  if (MaskIn) {
13523
0
    const auto *C = dyn_cast<Constant>(MaskIn);
13524
0
    if (!C || !C->isAllOnesValue())
13525
0
      Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13526
0
  }
13527
13528
0
  if (NumElts < 8) {
13529
0
    int Indices[8];
13530
0
    for (unsigned i = 0; i != NumElts; ++i)
13531
0
      Indices[i] = i;
13532
0
    for (unsigned i = NumElts; i != 8; ++i)
13533
0
      Indices[i] = i % NumElts + NumElts;
13534
0
    Cmp = CGF.Builder.CreateShuffleVector(
13535
0
        Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13536
0
  }
13537
13538
0
  return CGF.Builder.CreateBitCast(Cmp,
13539
0
                                   IntegerType::get(CGF.getLLVMContext(),
13540
0
                                                    std::max(NumElts, 8U)));
13541
0
}
13542
13543
static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
13544
0
                                   bool Signed, ArrayRef<Value *> Ops) {
13545
0
  assert((Ops.size() == 2 || Ops.size() == 4) &&
13546
0
         "Unexpected number of arguments");
13547
0
  unsigned NumElts =
13548
0
      cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13549
0
  Value *Cmp;
13550
13551
0
  if (CC == 3) {
13552
0
    Cmp = Constant::getNullValue(
13553
0
        llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13554
0
  } else if (CC == 7) {
13555
0
    Cmp = Constant::getAllOnesValue(
13556
0
        llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13557
0
  } else {
13558
0
    ICmpInst::Predicate Pred;
13559
0
    switch (CC) {
13560
0
    default: llvm_unreachable("Unknown condition code");
13561
0
    case 0: Pred = ICmpInst::ICMP_EQ;  break;
13562
0
    case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13563
0
    case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13564
0
    case 4: Pred = ICmpInst::ICMP_NE;  break;
13565
0
    case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13566
0
    case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13567
0
    }
13568
0
    Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13569
0
  }
13570
13571
0
  Value *MaskIn = nullptr;
13572
0
  if (Ops.size() == 4)
13573
0
    MaskIn = Ops[3];
13574
13575
0
  return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13576
0
}
13577
13578
0
static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
13579
0
  Value *Zero = Constant::getNullValue(In->getType());
13580
0
  return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13581
0
}
13582
13583
static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
13584
0
                                    ArrayRef<Value *> Ops, bool IsSigned) {
13585
0
  unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13586
0
  llvm::Type *Ty = Ops[1]->getType();
13587
13588
0
  Value *Res;
13589
0
  if (Rnd != 4) {
13590
0
    Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13591
0
                                 : Intrinsic::x86_avx512_uitofp_round;
13592
0
    Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13593
0
    Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13594
0
  } else {
13595
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13596
0
    Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13597
0
                   : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13598
0
  }
13599
13600
0
  return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13601
0
}
13602
13603
// Lowers X86 FMA intrinsics to IR.
13604
static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
13605
                             ArrayRef<Value *> Ops, unsigned BuiltinID,
13606
0
                             bool IsAddSub) {
13607
13608
0
  bool Subtract = false;
13609
0
  Intrinsic::ID IID = Intrinsic::not_intrinsic;
13610
0
  switch (BuiltinID) {
13611
0
  default: break;
13612
0
  case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13613
0
    Subtract = true;
13614
0
    [[fallthrough]];
13615
0
  case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13616
0
  case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13617
0
  case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13618
0
    IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13619
0
    break;
13620
0
  case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13621
0
    Subtract = true;
13622
0
    [[fallthrough]];
13623
0
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13624
0
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13625
0
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13626
0
    IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13627
0
    break;
13628
0
  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13629
0
    Subtract = true;
13630
0
    [[fallthrough]];
13631
0
  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13632
0
  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13633
0
  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13634
0
    IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13635
0
  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13636
0
    Subtract = true;
13637
0
    [[fallthrough]];
13638
0
  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13639
0
  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13640
0
  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13641
0
    IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13642
0
  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13643
0
    Subtract = true;
13644
0
    [[fallthrough]];
13645
0
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13646
0
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13647
0
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13648
0
    IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13649
0
    break;
13650
0
  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13651
0
    Subtract = true;
13652
0
    [[fallthrough]];
13653
0
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13654
0
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13655
0
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13656
0
    IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13657
0
    break;
13658
0
  }
13659
13660
0
  Value *A = Ops[0];
13661
0
  Value *B = Ops[1];
13662
0
  Value *C = Ops[2];
13663
13664
0
  if (Subtract)
13665
0
    C = CGF.Builder.CreateFNeg(C);
13666
13667
0
  Value *Res;
13668
13669
  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13670
0
  if (IID != Intrinsic::not_intrinsic &&
13671
0
      (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
13672
0
       IsAddSub)) {
13673
0
    Function *Intr = CGF.CGM.getIntrinsic(IID);
13674
0
    Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13675
0
  } else {
13676
0
    llvm::Type *Ty = A->getType();
13677
0
    Function *FMA;
13678
0
    if (CGF.Builder.getIsFPConstrained()) {
13679
0
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13680
0
      FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13681
0
      Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13682
0
    } else {
13683
0
      FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13684
0
      Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13685
0
    }
13686
0
  }
13687
13688
  // Handle any required masking.
13689
0
  Value *MaskFalseVal = nullptr;
13690
0
  switch (BuiltinID) {
13691
0
  case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13692
0
  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13693
0
  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13694
0
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13695
0
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13696
0
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13697
0
    MaskFalseVal = Ops[0];
13698
0
    break;
13699
0
  case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13700
0
  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13701
0
  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13702
0
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13703
0
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13704
0
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13705
0
    MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
13706
0
    break;
13707
0
  case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13708
0
  case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13709
0
  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13710
0
  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13711
0
  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13712
0
  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13713
0
  case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13714
0
  case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13715
0
  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13716
0
  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13717
0
  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13718
0
  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13719
0
    MaskFalseVal = Ops[2];
13720
0
    break;
13721
0
  }
13722
13723
0
  if (MaskFalseVal)
13724
0
    return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
13725
13726
0
  return Res;
13727
0
}
13728
13729
static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
13730
                                MutableArrayRef<Value *> Ops, Value *Upper,
13731
                                bool ZeroMask = false, unsigned PTIdx = 0,
13732
0
                                bool NegAcc = false) {
13733
0
  unsigned Rnd = 4;
13734
0
  if (Ops.size() > 4)
13735
0
    Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
13736
13737
0
  if (NegAcc)
13738
0
    Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
13739
13740
0
  Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
13741
0
  Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
13742
0
  Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
13743
0
  Value *Res;
13744
0
  if (Rnd != 4) {
13745
0
    Intrinsic::ID IID;
13746
13747
0
    switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
13748
0
    case 16:
13749
0
      IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
13750
0
      break;
13751
0
    case 32:
13752
0
      IID = Intrinsic::x86_avx512_vfmadd_f32;
13753
0
      break;
13754
0
    case 64:
13755
0
      IID = Intrinsic::x86_avx512_vfmadd_f64;
13756
0
      break;
13757
0
    default:
13758
0
      llvm_unreachable("Unexpected size");
13759
0
    }
13760
0
    Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13761
0
                                 {Ops[0], Ops[1], Ops[2], Ops[4]});
13762
0
  } else if (CGF.Builder.getIsFPConstrained()) {
13763
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13764
0
    Function *FMA = CGF.CGM.getIntrinsic(
13765
0
        Intrinsic::experimental_constrained_fma, Ops[0]->getType());
13766
0
    Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
13767
0
  } else {
13768
0
    Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
13769
0
    Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
13770
0
  }
13771
  // If we have more than 3 arguments, we need to do masking.
13772
0
  if (Ops.size() > 3) {
13773
0
    Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
13774
0
                               : Ops[PTIdx];
13775
13776
    // If we negated the accumulator and the its the PassThru value we need to
13777
    // bypass the negate. Conveniently Upper should be the same thing in this
13778
    // case.
13779
0
    if (NegAcc && PTIdx == 2)
13780
0
      PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
13781
13782
0
    Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
13783
0
  }
13784
0
  return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
13785
0
}
13786
13787
static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
13788
0
                           ArrayRef<Value *> Ops) {
13789
0
  llvm::Type *Ty = Ops[0]->getType();
13790
  // Arguments have a vXi32 type so cast to vXi64.
13791
0
  Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
13792
0
                                  Ty->getPrimitiveSizeInBits() / 64);
13793
0
  Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
13794
0
  Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
13795
13796
0
  if (IsSigned) {
13797
    // Shift left then arithmetic shift right.
13798
0
    Constant *ShiftAmt = ConstantInt::get(Ty, 32);
13799
0
    LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
13800
0
    LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
13801
0
    RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
13802
0
    RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
13803
0
  } else {
13804
    // Clear the upper bits.
13805
0
    Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
13806
0
    LHS = CGF.Builder.CreateAnd(LHS, Mask);
13807
0
    RHS = CGF.Builder.CreateAnd(RHS, Mask);
13808
0
  }
13809
13810
0
  return CGF.Builder.CreateMul(LHS, RHS);
13811
0
}
13812
13813
// Emit a masked pternlog intrinsic. This only exists because the header has to
13814
// use a macro and we aren't able to pass the input argument to a pternlog
13815
// builtin and a select builtin without evaluating it twice.
13816
static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
13817
0
                             ArrayRef<Value *> Ops) {
13818
0
  llvm::Type *Ty = Ops[0]->getType();
13819
13820
0
  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
13821
0
  unsigned EltWidth = Ty->getScalarSizeInBits();
13822
0
  Intrinsic::ID IID;
13823
0
  if (VecWidth == 128 && EltWidth == 32)
13824
0
    IID = Intrinsic::x86_avx512_pternlog_d_128;
13825
0
  else if (VecWidth == 256 && EltWidth == 32)
13826
0
    IID = Intrinsic::x86_avx512_pternlog_d_256;
13827
0
  else if (VecWidth == 512 && EltWidth == 32)
13828
0
    IID = Intrinsic::x86_avx512_pternlog_d_512;
13829
0
  else if (VecWidth == 128 && EltWidth == 64)
13830
0
    IID = Intrinsic::x86_avx512_pternlog_q_128;
13831
0
  else if (VecWidth == 256 && EltWidth == 64)
13832
0
    IID = Intrinsic::x86_avx512_pternlog_q_256;
13833
0
  else if (VecWidth == 512 && EltWidth == 64)
13834
0
    IID = Intrinsic::x86_avx512_pternlog_q_512;
13835
0
  else
13836
0
    llvm_unreachable("Unexpected intrinsic");
13837
13838
0
  Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13839
0
                                          Ops.drop_back());
13840
0
  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
13841
0
  return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
13842
0
}
13843
13844
static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
13845
0
                              llvm::Type *DstTy) {
13846
0
  unsigned NumberOfElements =
13847
0
      cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13848
0
  Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
13849
0
  return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
13850
0
}
13851
13852
0
Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
13853
0
  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
13854
0
  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
13855
0
  return EmitX86CpuIs(CPUStr);
13856
0
}
13857
13858
// Convert F16 halfs to floats.
13859
static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
13860
                                       ArrayRef<Value *> Ops,
13861
0
                                       llvm::Type *DstTy) {
13862
0
  assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
13863
0
         "Unknown cvtph2ps intrinsic");
13864
13865
  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
13866
0
  if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
13867
0
    Function *F =
13868
0
        CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
13869
0
    return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
13870
0
  }
13871
13872
0
  unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13873
0
  Value *Src = Ops[0];
13874
13875
  // Extract the subvector.
13876
0
  if (NumDstElts !=
13877
0
      cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
13878
0
    assert(NumDstElts == 4 && "Unexpected vector size");
13879
0
    Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
13880
0
  }
13881
13882
  // Bitcast from vXi16 to vXf16.
13883
0
  auto *HalfTy = llvm::FixedVectorType::get(
13884
0
      llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
13885
0
  Src = CGF.Builder.CreateBitCast(Src, HalfTy);
13886
13887
  // Perform the fp-extension.
13888
0
  Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
13889
13890
0
  if (Ops.size() >= 3)
13891
0
    Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13892
0
  return Res;
13893
0
}
13894
13895
0
Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
13896
13897
0
  llvm::Type *Int32Ty = Builder.getInt32Ty();
13898
13899
  // Matching the struct layout from the compiler-rt/libgcc structure that is
13900
  // filled in:
13901
  // unsigned int __cpu_vendor;
13902
  // unsigned int __cpu_type;
13903
  // unsigned int __cpu_subtype;
13904
  // unsigned int __cpu_features[1];
13905
0
  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
13906
0
                                          llvm::ArrayType::get(Int32Ty, 1));
13907
13908
  // Grab the global __cpu_model.
13909
0
  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
13910
0
  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
13911
13912
  // Calculate the index needed to access the correct field based on the
13913
  // range. Also adjust the expected value.
13914
0
  unsigned Index;
13915
0
  unsigned Value;
13916
0
  std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
13917
0
#define X86_VENDOR(ENUM, STRING)                                               \
13918
0
  .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
13919
0
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)                                        \
13920
0
  .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
13921
0
#define X86_CPU_TYPE(ENUM, STR)                                                \
13922
0
  .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
13923
0
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)                                     \
13924
0
  .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
13925
0
#define X86_CPU_SUBTYPE(ENUM, STR)                                             \
13926
0
  .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
13927
0
#include "llvm/TargetParser/X86TargetParser.def"
13928
0
                               .Default({0, 0});
13929
0
  assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
13930
13931
  // Grab the appropriate field from __cpu_model.
13932
0
  llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
13933
0
                         ConstantInt::get(Int32Ty, Index)};
13934
0
  llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
13935
0
  CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
13936
0
                                       CharUnits::fromQuantity(4));
13937
13938
  // Check the value of the field against the requested value.
13939
0
  return Builder.CreateICmpEQ(CpuValue,
13940
0
                                  llvm::ConstantInt::get(Int32Ty, Value));
13941
0
}
13942
13943
0
Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
13944
0
  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
13945
0
  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
13946
0
  return EmitX86CpuSupports(FeatureStr);
13947
0
}
13948
13949
0
Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
13950
0
  return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
13951
0
}
13952
13953
llvm::Value *
13954
0
CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
13955
0
  Value *Result = Builder.getTrue();
13956
0
  if (FeatureMask[0] != 0) {
13957
    // Matching the struct layout from the compiler-rt/libgcc structure that is
13958
    // filled in:
13959
    // unsigned int __cpu_vendor;
13960
    // unsigned int __cpu_type;
13961
    // unsigned int __cpu_subtype;
13962
    // unsigned int __cpu_features[1];
13963
0
    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
13964
0
                                            llvm::ArrayType::get(Int32Ty, 1));
13965
13966
    // Grab the global __cpu_model.
13967
0
    llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
13968
0
    cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
13969
13970
    // Grab the first (0th) element from the field __cpu_features off of the
13971
    // global in the struct STy.
13972
0
    Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
13973
0
                     Builder.getInt32(0)};
13974
0
    Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
13975
0
    Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
13976
0
                                                CharUnits::fromQuantity(4));
13977
13978
    // Check the value of the bit corresponding to the feature requested.
13979
0
    Value *Mask = Builder.getInt32(FeatureMask[0]);
13980
0
    Value *Bitset = Builder.CreateAnd(Features, Mask);
13981
0
    Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
13982
0
    Result = Builder.CreateAnd(Result, Cmp);
13983
0
  }
13984
13985
0
  llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
13986
0
  llvm::Constant *CpuFeatures2 =
13987
0
      CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
13988
0
  cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
13989
0
  for (int i = 1; i != 4; ++i) {
13990
0
    const uint32_t M = FeatureMask[i];
13991
0
    if (!M)
13992
0
      continue;
13993
0
    Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
13994
0
    Value *Features = Builder.CreateAlignedLoad(
13995
0
        Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs),
13996
0
        CharUnits::fromQuantity(4));
13997
    // Check the value of the bit corresponding to the feature requested.
13998
0
    Value *Mask = Builder.getInt32(M);
13999
0
    Value *Bitset = Builder.CreateAnd(Features, Mask);
14000
0
    Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14001
0
    Result = Builder.CreateAnd(Result, Cmp);
14002
0
  }
14003
14004
0
  return Result;
14005
0
}
14006
14007
0
Value *CodeGenFunction::EmitAArch64CpuInit() {
14008
0
  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14009
0
  llvm::FunctionCallee Func =
14010
0
      CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14011
0
  cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14012
0
  cast<llvm::GlobalValue>(Func.getCallee())
14013
0
      ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14014
0
  return Builder.CreateCall(Func);
14015
0
}
14016
14017
0
Value *CodeGenFunction::EmitX86CpuInit() {
14018
0
  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14019
0
                                                    /*Variadic*/ false);
14020
0
  llvm::FunctionCallee Func =
14021
0
      CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14022
0
  cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14023
0
  cast<llvm::GlobalValue>(Func.getCallee())
14024
0
      ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14025
0
  return Builder.CreateCall(Func);
14026
0
}
14027
14028
llvm::Value *
14029
0
CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14030
0
  uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14031
0
  Value *Result = Builder.getTrue();
14032
0
  if (FeaturesMask != 0) {
14033
    // Get features from structure in runtime library
14034
    // struct {
14035
    //   unsigned long long features;
14036
    // } __aarch64_cpu_features;
14037
0
    llvm::Type *STy = llvm::StructType::get(Int64Ty);
14038
0
    llvm::Constant *AArch64CPUFeatures =
14039
0
        CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14040
0
    cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14041
0
    llvm::Value *CpuFeatures = Builder.CreateGEP(
14042
0
        STy, AArch64CPUFeatures,
14043
0
        {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14044
0
    Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14045
0
                                                CharUnits::fromQuantity(8));
14046
0
    Value *Mask = Builder.getInt64(FeaturesMask);
14047
0
    Value *Bitset = Builder.CreateAnd(Features, Mask);
14048
0
    Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14049
0
    Result = Builder.CreateAnd(Result, Cmp);
14050
0
  }
14051
0
  return Result;
14052
0
}
14053
14054
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14055
0
                                           const CallExpr *E) {
14056
0
  if (BuiltinID == X86::BI__builtin_cpu_is)
14057
0
    return EmitX86CpuIs(E);
14058
0
  if (BuiltinID == X86::BI__builtin_cpu_supports)
14059
0
    return EmitX86CpuSupports(E);
14060
0
  if (BuiltinID == X86::BI__builtin_cpu_init)
14061
0
    return EmitX86CpuInit();
14062
14063
  // Handle MSVC intrinsics before argument evaluation to prevent double
14064
  // evaluation.
14065
0
  if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14066
0
    return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14067
14068
0
  SmallVector<Value*, 4> Ops;
14069
0
  bool IsMaskFCmp = false;
14070
0
  bool IsConjFMA = false;
14071
14072
  // Find out if any arguments are required to be integer constant expressions.
14073
0
  unsigned ICEArguments = 0;
14074
0
  ASTContext::GetBuiltinTypeError Error;
14075
0
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14076
0
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
14077
14078
0
  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14079
0
    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14080
0
  }
14081
14082
  // These exist so that the builtin that takes an immediate can be bounds
14083
  // checked by clang to avoid passing bad immediates to the backend. Since
14084
  // AVX has a larger immediate than SSE we would need separate builtins to
14085
  // do the different bounds checking. Rather than create a clang specific
14086
  // SSE only builtin, this implements eight separate builtins to match gcc
14087
  // implementation.
14088
0
  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14089
0
    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14090
0
    llvm::Function *F = CGM.getIntrinsic(ID);
14091
0
    return Builder.CreateCall(F, Ops);
14092
0
  };
14093
14094
  // For the vector forms of FP comparisons, translate the builtins directly to
14095
  // IR.
14096
  // TODO: The builtins could be removed if the SSE header files used vector
14097
  // extension comparisons directly (vector ordered/unordered may need
14098
  // additional support via __builtin_isnan()).
14099
0
  auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14100
0
                                         bool IsSignaling) {
14101
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14102
0
    Value *Cmp;
14103
0
    if (IsSignaling)
14104
0
      Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14105
0
    else
14106
0
      Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14107
0
    llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14108
0
    llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14109
0
    Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14110
0
    return Builder.CreateBitCast(Sext, FPVecTy);
14111
0
  };
14112
14113
0
  switch (BuiltinID) {
14114
0
  default: return nullptr;
14115
0
  case X86::BI_mm_prefetch: {
14116
0
    Value *Address = Ops[0];
14117
0
    ConstantInt *C = cast<ConstantInt>(Ops[1]);
14118
0
    Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14119
0
    Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14120
0
    Value *Data = ConstantInt::get(Int32Ty, 1);
14121
0
    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14122
0
    return Builder.CreateCall(F, {Address, RW, Locality, Data});
14123
0
  }
14124
0
  case X86::BI_mm_clflush: {
14125
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14126
0
                              Ops[0]);
14127
0
  }
14128
0
  case X86::BI_mm_lfence: {
14129
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14130
0
  }
14131
0
  case X86::BI_mm_mfence: {
14132
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14133
0
  }
14134
0
  case X86::BI_mm_sfence: {
14135
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14136
0
  }
14137
0
  case X86::BI_mm_pause: {
14138
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14139
0
  }
14140
0
  case X86::BI__rdtsc: {
14141
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14142
0
  }
14143
0
  case X86::BI__builtin_ia32_rdtscp: {
14144
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14145
0
    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14146
0
                                      Ops[0]);
14147
0
    return Builder.CreateExtractValue(Call, 0);
14148
0
  }
14149
0
  case X86::BI__builtin_ia32_lzcnt_u16:
14150
0
  case X86::BI__builtin_ia32_lzcnt_u32:
14151
0
  case X86::BI__builtin_ia32_lzcnt_u64: {
14152
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14153
0
    return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14154
0
  }
14155
0
  case X86::BI__builtin_ia32_tzcnt_u16:
14156
0
  case X86::BI__builtin_ia32_tzcnt_u32:
14157
0
  case X86::BI__builtin_ia32_tzcnt_u64: {
14158
0
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14159
0
    return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14160
0
  }
14161
0
  case X86::BI__builtin_ia32_undef128:
14162
0
  case X86::BI__builtin_ia32_undef256:
14163
0
  case X86::BI__builtin_ia32_undef512:
14164
    // The x86 definition of "undef" is not the same as the LLVM definition
14165
    // (PR32176). We leave optimizing away an unnecessary zero constant to the
14166
    // IR optimizer and backend.
14167
    // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14168
    // value, we should use that here instead of a zero.
14169
0
    return llvm::Constant::getNullValue(ConvertType(E->getType()));
14170
0
  case X86::BI__builtin_ia32_vec_init_v8qi:
14171
0
  case X86::BI__builtin_ia32_vec_init_v4hi:
14172
0
  case X86::BI__builtin_ia32_vec_init_v2si:
14173
0
    return Builder.CreateBitCast(BuildVector(Ops),
14174
0
                                 llvm::Type::getX86_MMXTy(getLLVMContext()));
14175
0
  case X86::BI__builtin_ia32_vec_ext_v2si:
14176
0
  case X86::BI__builtin_ia32_vec_ext_v16qi:
14177
0
  case X86::BI__builtin_ia32_vec_ext_v8hi:
14178
0
  case X86::BI__builtin_ia32_vec_ext_v4si:
14179
0
  case X86::BI__builtin_ia32_vec_ext_v4sf:
14180
0
  case X86::BI__builtin_ia32_vec_ext_v2di:
14181
0
  case X86::BI__builtin_ia32_vec_ext_v32qi:
14182
0
  case X86::BI__builtin_ia32_vec_ext_v16hi:
14183
0
  case X86::BI__builtin_ia32_vec_ext_v8si:
14184
0
  case X86::BI__builtin_ia32_vec_ext_v4di: {
14185
0
    unsigned NumElts =
14186
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14187
0
    uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14188
0
    Index &= NumElts - 1;
14189
    // These builtins exist so we can ensure the index is an ICE and in range.
14190
    // Otherwise we could just do this in the header file.
14191
0
    return Builder.CreateExtractElement(Ops[0], Index);
14192
0
  }
14193
0
  case X86::BI__builtin_ia32_vec_set_v16qi:
14194
0
  case X86::BI__builtin_ia32_vec_set_v8hi:
14195
0
  case X86::BI__builtin_ia32_vec_set_v4si:
14196
0
  case X86::BI__builtin_ia32_vec_set_v2di:
14197
0
  case X86::BI__builtin_ia32_vec_set_v32qi:
14198
0
  case X86::BI__builtin_ia32_vec_set_v16hi:
14199
0
  case X86::BI__builtin_ia32_vec_set_v8si:
14200
0
  case X86::BI__builtin_ia32_vec_set_v4di: {
14201
0
    unsigned NumElts =
14202
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14203
0
    unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14204
0
    Index &= NumElts - 1;
14205
    // These builtins exist so we can ensure the index is an ICE and in range.
14206
    // Otherwise we could just do this in the header file.
14207
0
    return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14208
0
  }
14209
0
  case X86::BI_mm_setcsr:
14210
0
  case X86::BI__builtin_ia32_ldmxcsr: {
14211
0
    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
14212
0
    Builder.CreateStore(Ops[0], Tmp);
14213
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14214
0
                              Tmp.getPointer());
14215
0
  }
14216
0
  case X86::BI_mm_getcsr:
14217
0
  case X86::BI__builtin_ia32_stmxcsr: {
14218
0
    Address Tmp = CreateMemTemp(E->getType());
14219
0
    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14220
0
                       Tmp.getPointer());
14221
0
    return Builder.CreateLoad(Tmp, "stmxcsr");
14222
0
  }
14223
0
  case X86::BI__builtin_ia32_xsave:
14224
0
  case X86::BI__builtin_ia32_xsave64:
14225
0
  case X86::BI__builtin_ia32_xrstor:
14226
0
  case X86::BI__builtin_ia32_xrstor64:
14227
0
  case X86::BI__builtin_ia32_xsaveopt:
14228
0
  case X86::BI__builtin_ia32_xsaveopt64:
14229
0
  case X86::BI__builtin_ia32_xrstors:
14230
0
  case X86::BI__builtin_ia32_xrstors64:
14231
0
  case X86::BI__builtin_ia32_xsavec:
14232
0
  case X86::BI__builtin_ia32_xsavec64:
14233
0
  case X86::BI__builtin_ia32_xsaves:
14234
0
  case X86::BI__builtin_ia32_xsaves64:
14235
0
  case X86::BI__builtin_ia32_xsetbv:
14236
0
  case X86::BI_xsetbv: {
14237
0
    Intrinsic::ID ID;
14238
0
#define INTRINSIC_X86_XSAVE_ID(NAME) \
14239
0
    case X86::BI__builtin_ia32_##NAME: \
14240
0
      ID = Intrinsic::x86_##NAME; \
14241
0
      break
14242
0
    switch (BuiltinID) {
14243
0
    default: llvm_unreachable("Unsupported intrinsic!");
14244
0
    INTRINSIC_X86_XSAVE_ID(xsave);
14245
0
    INTRINSIC_X86_XSAVE_ID(xsave64);
14246
0
    INTRINSIC_X86_XSAVE_ID(xrstor);
14247
0
    INTRINSIC_X86_XSAVE_ID(xrstor64);
14248
0
    INTRINSIC_X86_XSAVE_ID(xsaveopt);
14249
0
    INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14250
0
    INTRINSIC_X86_XSAVE_ID(xrstors);
14251
0
    INTRINSIC_X86_XSAVE_ID(xrstors64);
14252
0
    INTRINSIC_X86_XSAVE_ID(xsavec);
14253
0
    INTRINSIC_X86_XSAVE_ID(xsavec64);
14254
0
    INTRINSIC_X86_XSAVE_ID(xsaves);
14255
0
    INTRINSIC_X86_XSAVE_ID(xsaves64);
14256
0
    INTRINSIC_X86_XSAVE_ID(xsetbv);
14257
0
    case X86::BI_xsetbv:
14258
0
      ID = Intrinsic::x86_xsetbv;
14259
0
      break;
14260
0
    }
14261
0
#undef INTRINSIC_X86_XSAVE_ID
14262
0
    Value *Mhi = Builder.CreateTrunc(
14263
0
      Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14264
0
    Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14265
0
    Ops[1] = Mhi;
14266
0
    Ops.push_back(Mlo);
14267
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14268
0
  }
14269
0
  case X86::BI__builtin_ia32_xgetbv:
14270
0
  case X86::BI_xgetbv:
14271
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14272
0
  case X86::BI__builtin_ia32_storedqudi128_mask:
14273
0
  case X86::BI__builtin_ia32_storedqusi128_mask:
14274
0
  case X86::BI__builtin_ia32_storedquhi128_mask:
14275
0
  case X86::BI__builtin_ia32_storedquqi128_mask:
14276
0
  case X86::BI__builtin_ia32_storeupd128_mask:
14277
0
  case X86::BI__builtin_ia32_storeups128_mask:
14278
0
  case X86::BI__builtin_ia32_storedqudi256_mask:
14279
0
  case X86::BI__builtin_ia32_storedqusi256_mask:
14280
0
  case X86::BI__builtin_ia32_storedquhi256_mask:
14281
0
  case X86::BI__builtin_ia32_storedquqi256_mask:
14282
0
  case X86::BI__builtin_ia32_storeupd256_mask:
14283
0
  case X86::BI__builtin_ia32_storeups256_mask:
14284
0
  case X86::BI__builtin_ia32_storedqudi512_mask:
14285
0
  case X86::BI__builtin_ia32_storedqusi512_mask:
14286
0
  case X86::BI__builtin_ia32_storedquhi512_mask:
14287
0
  case X86::BI__builtin_ia32_storedquqi512_mask:
14288
0
  case X86::BI__builtin_ia32_storeupd512_mask:
14289
0
  case X86::BI__builtin_ia32_storeups512_mask:
14290
0
    return EmitX86MaskedStore(*this, Ops, Align(1));
14291
14292
0
  case X86::BI__builtin_ia32_storesh128_mask:
14293
0
  case X86::BI__builtin_ia32_storess128_mask:
14294
0
  case X86::BI__builtin_ia32_storesd128_mask:
14295
0
    return EmitX86MaskedStore(*this, Ops, Align(1));
14296
14297
0
  case X86::BI__builtin_ia32_vpopcntb_128:
14298
0
  case X86::BI__builtin_ia32_vpopcntd_128:
14299
0
  case X86::BI__builtin_ia32_vpopcntq_128:
14300
0
  case X86::BI__builtin_ia32_vpopcntw_128:
14301
0
  case X86::BI__builtin_ia32_vpopcntb_256:
14302
0
  case X86::BI__builtin_ia32_vpopcntd_256:
14303
0
  case X86::BI__builtin_ia32_vpopcntq_256:
14304
0
  case X86::BI__builtin_ia32_vpopcntw_256:
14305
0
  case X86::BI__builtin_ia32_vpopcntb_512:
14306
0
  case X86::BI__builtin_ia32_vpopcntd_512:
14307
0
  case X86::BI__builtin_ia32_vpopcntq_512:
14308
0
  case X86::BI__builtin_ia32_vpopcntw_512: {
14309
0
    llvm::Type *ResultType = ConvertType(E->getType());
14310
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14311
0
    return Builder.CreateCall(F, Ops);
14312
0
  }
14313
0
  case X86::BI__builtin_ia32_cvtmask2b128:
14314
0
  case X86::BI__builtin_ia32_cvtmask2b256:
14315
0
  case X86::BI__builtin_ia32_cvtmask2b512:
14316
0
  case X86::BI__builtin_ia32_cvtmask2w128:
14317
0
  case X86::BI__builtin_ia32_cvtmask2w256:
14318
0
  case X86::BI__builtin_ia32_cvtmask2w512:
14319
0
  case X86::BI__builtin_ia32_cvtmask2d128:
14320
0
  case X86::BI__builtin_ia32_cvtmask2d256:
14321
0
  case X86::BI__builtin_ia32_cvtmask2d512:
14322
0
  case X86::BI__builtin_ia32_cvtmask2q128:
14323
0
  case X86::BI__builtin_ia32_cvtmask2q256:
14324
0
  case X86::BI__builtin_ia32_cvtmask2q512:
14325
0
    return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14326
14327
0
  case X86::BI__builtin_ia32_cvtb2mask128:
14328
0
  case X86::BI__builtin_ia32_cvtb2mask256:
14329
0
  case X86::BI__builtin_ia32_cvtb2mask512:
14330
0
  case X86::BI__builtin_ia32_cvtw2mask128:
14331
0
  case X86::BI__builtin_ia32_cvtw2mask256:
14332
0
  case X86::BI__builtin_ia32_cvtw2mask512:
14333
0
  case X86::BI__builtin_ia32_cvtd2mask128:
14334
0
  case X86::BI__builtin_ia32_cvtd2mask256:
14335
0
  case X86::BI__builtin_ia32_cvtd2mask512:
14336
0
  case X86::BI__builtin_ia32_cvtq2mask128:
14337
0
  case X86::BI__builtin_ia32_cvtq2mask256:
14338
0
  case X86::BI__builtin_ia32_cvtq2mask512:
14339
0
    return EmitX86ConvertToMask(*this, Ops[0]);
14340
14341
0
  case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14342
0
  case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14343
0
  case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14344
0
  case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14345
0
  case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14346
0
  case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14347
0
    return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14348
0
  case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14349
0
  case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14350
0
  case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14351
0
  case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14352
0
  case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14353
0
  case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14354
0
    return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14355
14356
0
  case X86::BI__builtin_ia32_vfmaddss3:
14357
0
  case X86::BI__builtin_ia32_vfmaddsd3:
14358
0
  case X86::BI__builtin_ia32_vfmaddsh3_mask:
14359
0
  case X86::BI__builtin_ia32_vfmaddss3_mask:
14360
0
  case X86::BI__builtin_ia32_vfmaddsd3_mask:
14361
0
    return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14362
0
  case X86::BI__builtin_ia32_vfmaddss:
14363
0
  case X86::BI__builtin_ia32_vfmaddsd:
14364
0
    return EmitScalarFMAExpr(*this, E, Ops,
14365
0
                             Constant::getNullValue(Ops[0]->getType()));
14366
0
  case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14367
0
  case X86::BI__builtin_ia32_vfmaddss3_maskz:
14368
0
  case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14369
0
    return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14370
0
  case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14371
0
  case X86::BI__builtin_ia32_vfmaddss3_mask3:
14372
0
  case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14373
0
    return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14374
0
  case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14375
0
  case X86::BI__builtin_ia32_vfmsubss3_mask3:
14376
0
  case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14377
0
    return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14378
0
                             /*NegAcc*/ true);
14379
0
  case X86::BI__builtin_ia32_vfmaddph:
14380
0
  case X86::BI__builtin_ia32_vfmaddps:
14381
0
  case X86::BI__builtin_ia32_vfmaddpd:
14382
0
  case X86::BI__builtin_ia32_vfmaddph256:
14383
0
  case X86::BI__builtin_ia32_vfmaddps256:
14384
0
  case X86::BI__builtin_ia32_vfmaddpd256:
14385
0
  case X86::BI__builtin_ia32_vfmaddph512_mask:
14386
0
  case X86::BI__builtin_ia32_vfmaddph512_maskz:
14387
0
  case X86::BI__builtin_ia32_vfmaddph512_mask3:
14388
0
  case X86::BI__builtin_ia32_vfmaddps512_mask:
14389
0
  case X86::BI__builtin_ia32_vfmaddps512_maskz:
14390
0
  case X86::BI__builtin_ia32_vfmaddps512_mask3:
14391
0
  case X86::BI__builtin_ia32_vfmsubps512_mask3:
14392
0
  case X86::BI__builtin_ia32_vfmaddpd512_mask:
14393
0
  case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14394
0
  case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14395
0
  case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14396
0
  case X86::BI__builtin_ia32_vfmsubph512_mask3:
14397
0
    return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14398
0
  case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14399
0
  case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14400
0
  case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14401
0
  case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14402
0
  case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14403
0
  case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14404
0
  case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14405
0
  case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14406
0
  case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14407
0
  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14408
0
  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14409
0
  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14410
0
    return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14411
14412
0
  case X86::BI__builtin_ia32_movdqa32store128_mask:
14413
0
  case X86::BI__builtin_ia32_movdqa64store128_mask:
14414
0
  case X86::BI__builtin_ia32_storeaps128_mask:
14415
0
  case X86::BI__builtin_ia32_storeapd128_mask:
14416
0
  case X86::BI__builtin_ia32_movdqa32store256_mask:
14417
0
  case X86::BI__builtin_ia32_movdqa64store256_mask:
14418
0
  case X86::BI__builtin_ia32_storeaps256_mask:
14419
0
  case X86::BI__builtin_ia32_storeapd256_mask:
14420
0
  case X86::BI__builtin_ia32_movdqa32store512_mask:
14421
0
  case X86::BI__builtin_ia32_movdqa64store512_mask:
14422
0
  case X86::BI__builtin_ia32_storeaps512_mask:
14423
0
  case X86::BI__builtin_ia32_storeapd512_mask:
14424
0
    return EmitX86MaskedStore(
14425
0
        *this, Ops,
14426
0
        getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14427
14428
0
  case X86::BI__builtin_ia32_loadups128_mask:
14429
0
  case X86::BI__builtin_ia32_loadups256_mask:
14430
0
  case X86::BI__builtin_ia32_loadups512_mask:
14431
0
  case X86::BI__builtin_ia32_loadupd128_mask:
14432
0
  case X86::BI__builtin_ia32_loadupd256_mask:
14433
0
  case X86::BI__builtin_ia32_loadupd512_mask:
14434
0
  case X86::BI__builtin_ia32_loaddquqi128_mask:
14435
0
  case X86::BI__builtin_ia32_loaddquqi256_mask:
14436
0
  case X86::BI__builtin_ia32_loaddquqi512_mask:
14437
0
  case X86::BI__builtin_ia32_loaddquhi128_mask:
14438
0
  case X86::BI__builtin_ia32_loaddquhi256_mask:
14439
0
  case X86::BI__builtin_ia32_loaddquhi512_mask:
14440
0
  case X86::BI__builtin_ia32_loaddqusi128_mask:
14441
0
  case X86::BI__builtin_ia32_loaddqusi256_mask:
14442
0
  case X86::BI__builtin_ia32_loaddqusi512_mask:
14443
0
  case X86::BI__builtin_ia32_loaddqudi128_mask:
14444
0
  case X86::BI__builtin_ia32_loaddqudi256_mask:
14445
0
  case X86::BI__builtin_ia32_loaddqudi512_mask:
14446
0
    return EmitX86MaskedLoad(*this, Ops, Align(1));
14447
14448
0
  case X86::BI__builtin_ia32_loadsh128_mask:
14449
0
  case X86::BI__builtin_ia32_loadss128_mask:
14450
0
  case X86::BI__builtin_ia32_loadsd128_mask:
14451
0
    return EmitX86MaskedLoad(*this, Ops, Align(1));
14452
14453
0
  case X86::BI__builtin_ia32_loadaps128_mask:
14454
0
  case X86::BI__builtin_ia32_loadaps256_mask:
14455
0
  case X86::BI__builtin_ia32_loadaps512_mask:
14456
0
  case X86::BI__builtin_ia32_loadapd128_mask:
14457
0
  case X86::BI__builtin_ia32_loadapd256_mask:
14458
0
  case X86::BI__builtin_ia32_loadapd512_mask:
14459
0
  case X86::BI__builtin_ia32_movdqa32load128_mask:
14460
0
  case X86::BI__builtin_ia32_movdqa32load256_mask:
14461
0
  case X86::BI__builtin_ia32_movdqa32load512_mask:
14462
0
  case X86::BI__builtin_ia32_movdqa64load128_mask:
14463
0
  case X86::BI__builtin_ia32_movdqa64load256_mask:
14464
0
  case X86::BI__builtin_ia32_movdqa64load512_mask:
14465
0
    return EmitX86MaskedLoad(
14466
0
        *this, Ops,
14467
0
        getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14468
14469
0
  case X86::BI__builtin_ia32_expandloaddf128_mask:
14470
0
  case X86::BI__builtin_ia32_expandloaddf256_mask:
14471
0
  case X86::BI__builtin_ia32_expandloaddf512_mask:
14472
0
  case X86::BI__builtin_ia32_expandloadsf128_mask:
14473
0
  case X86::BI__builtin_ia32_expandloadsf256_mask:
14474
0
  case X86::BI__builtin_ia32_expandloadsf512_mask:
14475
0
  case X86::BI__builtin_ia32_expandloaddi128_mask:
14476
0
  case X86::BI__builtin_ia32_expandloaddi256_mask:
14477
0
  case X86::BI__builtin_ia32_expandloaddi512_mask:
14478
0
  case X86::BI__builtin_ia32_expandloadsi128_mask:
14479
0
  case X86::BI__builtin_ia32_expandloadsi256_mask:
14480
0
  case X86::BI__builtin_ia32_expandloadsi512_mask:
14481
0
  case X86::BI__builtin_ia32_expandloadhi128_mask:
14482
0
  case X86::BI__builtin_ia32_expandloadhi256_mask:
14483
0
  case X86::BI__builtin_ia32_expandloadhi512_mask:
14484
0
  case X86::BI__builtin_ia32_expandloadqi128_mask:
14485
0
  case X86::BI__builtin_ia32_expandloadqi256_mask:
14486
0
  case X86::BI__builtin_ia32_expandloadqi512_mask:
14487
0
    return EmitX86ExpandLoad(*this, Ops);
14488
14489
0
  case X86::BI__builtin_ia32_compressstoredf128_mask:
14490
0
  case X86::BI__builtin_ia32_compressstoredf256_mask:
14491
0
  case X86::BI__builtin_ia32_compressstoredf512_mask:
14492
0
  case X86::BI__builtin_ia32_compressstoresf128_mask:
14493
0
  case X86::BI__builtin_ia32_compressstoresf256_mask:
14494
0
  case X86::BI__builtin_ia32_compressstoresf512_mask:
14495
0
  case X86::BI__builtin_ia32_compressstoredi128_mask:
14496
0
  case X86::BI__builtin_ia32_compressstoredi256_mask:
14497
0
  case X86::BI__builtin_ia32_compressstoredi512_mask:
14498
0
  case X86::BI__builtin_ia32_compressstoresi128_mask:
14499
0
  case X86::BI__builtin_ia32_compressstoresi256_mask:
14500
0
  case X86::BI__builtin_ia32_compressstoresi512_mask:
14501
0
  case X86::BI__builtin_ia32_compressstorehi128_mask:
14502
0
  case X86::BI__builtin_ia32_compressstorehi256_mask:
14503
0
  case X86::BI__builtin_ia32_compressstorehi512_mask:
14504
0
  case X86::BI__builtin_ia32_compressstoreqi128_mask:
14505
0
  case X86::BI__builtin_ia32_compressstoreqi256_mask:
14506
0
  case X86::BI__builtin_ia32_compressstoreqi512_mask:
14507
0
    return EmitX86CompressStore(*this, Ops);
14508
14509
0
  case X86::BI__builtin_ia32_expanddf128_mask:
14510
0
  case X86::BI__builtin_ia32_expanddf256_mask:
14511
0
  case X86::BI__builtin_ia32_expanddf512_mask:
14512
0
  case X86::BI__builtin_ia32_expandsf128_mask:
14513
0
  case X86::BI__builtin_ia32_expandsf256_mask:
14514
0
  case X86::BI__builtin_ia32_expandsf512_mask:
14515
0
  case X86::BI__builtin_ia32_expanddi128_mask:
14516
0
  case X86::BI__builtin_ia32_expanddi256_mask:
14517
0
  case X86::BI__builtin_ia32_expanddi512_mask:
14518
0
  case X86::BI__builtin_ia32_expandsi128_mask:
14519
0
  case X86::BI__builtin_ia32_expandsi256_mask:
14520
0
  case X86::BI__builtin_ia32_expandsi512_mask:
14521
0
  case X86::BI__builtin_ia32_expandhi128_mask:
14522
0
  case X86::BI__builtin_ia32_expandhi256_mask:
14523
0
  case X86::BI__builtin_ia32_expandhi512_mask:
14524
0
  case X86::BI__builtin_ia32_expandqi128_mask:
14525
0
  case X86::BI__builtin_ia32_expandqi256_mask:
14526
0
  case X86::BI__builtin_ia32_expandqi512_mask:
14527
0
    return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14528
14529
0
  case X86::BI__builtin_ia32_compressdf128_mask:
14530
0
  case X86::BI__builtin_ia32_compressdf256_mask:
14531
0
  case X86::BI__builtin_ia32_compressdf512_mask:
14532
0
  case X86::BI__builtin_ia32_compresssf128_mask:
14533
0
  case X86::BI__builtin_ia32_compresssf256_mask:
14534
0
  case X86::BI__builtin_ia32_compresssf512_mask:
14535
0
  case X86::BI__builtin_ia32_compressdi128_mask:
14536
0
  case X86::BI__builtin_ia32_compressdi256_mask:
14537
0
  case X86::BI__builtin_ia32_compressdi512_mask:
14538
0
  case X86::BI__builtin_ia32_compresssi128_mask:
14539
0
  case X86::BI__builtin_ia32_compresssi256_mask:
14540
0
  case X86::BI__builtin_ia32_compresssi512_mask:
14541
0
  case X86::BI__builtin_ia32_compresshi128_mask:
14542
0
  case X86::BI__builtin_ia32_compresshi256_mask:
14543
0
  case X86::BI__builtin_ia32_compresshi512_mask:
14544
0
  case X86::BI__builtin_ia32_compressqi128_mask:
14545
0
  case X86::BI__builtin_ia32_compressqi256_mask:
14546
0
  case X86::BI__builtin_ia32_compressqi512_mask:
14547
0
    return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14548
14549
0
  case X86::BI__builtin_ia32_gather3div2df:
14550
0
  case X86::BI__builtin_ia32_gather3div2di:
14551
0
  case X86::BI__builtin_ia32_gather3div4df:
14552
0
  case X86::BI__builtin_ia32_gather3div4di:
14553
0
  case X86::BI__builtin_ia32_gather3div4sf:
14554
0
  case X86::BI__builtin_ia32_gather3div4si:
14555
0
  case X86::BI__builtin_ia32_gather3div8sf:
14556
0
  case X86::BI__builtin_ia32_gather3div8si:
14557
0
  case X86::BI__builtin_ia32_gather3siv2df:
14558
0
  case X86::BI__builtin_ia32_gather3siv2di:
14559
0
  case X86::BI__builtin_ia32_gather3siv4df:
14560
0
  case X86::BI__builtin_ia32_gather3siv4di:
14561
0
  case X86::BI__builtin_ia32_gather3siv4sf:
14562
0
  case X86::BI__builtin_ia32_gather3siv4si:
14563
0
  case X86::BI__builtin_ia32_gather3siv8sf:
14564
0
  case X86::BI__builtin_ia32_gather3siv8si:
14565
0
  case X86::BI__builtin_ia32_gathersiv8df:
14566
0
  case X86::BI__builtin_ia32_gathersiv16sf:
14567
0
  case X86::BI__builtin_ia32_gatherdiv8df:
14568
0
  case X86::BI__builtin_ia32_gatherdiv16sf:
14569
0
  case X86::BI__builtin_ia32_gathersiv8di:
14570
0
  case X86::BI__builtin_ia32_gathersiv16si:
14571
0
  case X86::BI__builtin_ia32_gatherdiv8di:
14572
0
  case X86::BI__builtin_ia32_gatherdiv16si: {
14573
0
    Intrinsic::ID IID;
14574
0
    switch (BuiltinID) {
14575
0
    default: llvm_unreachable("Unexpected builtin");
14576
0
    case X86::BI__builtin_ia32_gather3div2df:
14577
0
      IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14578
0
      break;
14579
0
    case X86::BI__builtin_ia32_gather3div2di:
14580
0
      IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14581
0
      break;
14582
0
    case X86::BI__builtin_ia32_gather3div4df:
14583
0
      IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14584
0
      break;
14585
0
    case X86::BI__builtin_ia32_gather3div4di:
14586
0
      IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14587
0
      break;
14588
0
    case X86::BI__builtin_ia32_gather3div4sf:
14589
0
      IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14590
0
      break;
14591
0
    case X86::BI__builtin_ia32_gather3div4si:
14592
0
      IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14593
0
      break;
14594
0
    case X86::BI__builtin_ia32_gather3div8sf:
14595
0
      IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14596
0
      break;
14597
0
    case X86::BI__builtin_ia32_gather3div8si:
14598
0
      IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14599
0
      break;
14600
0
    case X86::BI__builtin_ia32_gather3siv2df:
14601
0
      IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14602
0
      break;
14603
0
    case X86::BI__builtin_ia32_gather3siv2di:
14604
0
      IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14605
0
      break;
14606
0
    case X86::BI__builtin_ia32_gather3siv4df:
14607
0
      IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14608
0
      break;
14609
0
    case X86::BI__builtin_ia32_gather3siv4di:
14610
0
      IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14611
0
      break;
14612
0
    case X86::BI__builtin_ia32_gather3siv4sf:
14613
0
      IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14614
0
      break;
14615
0
    case X86::BI__builtin_ia32_gather3siv4si:
14616
0
      IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14617
0
      break;
14618
0
    case X86::BI__builtin_ia32_gather3siv8sf:
14619
0
      IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14620
0
      break;
14621
0
    case X86::BI__builtin_ia32_gather3siv8si:
14622
0
      IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14623
0
      break;
14624
0
    case X86::BI__builtin_ia32_gathersiv8df:
14625
0
      IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14626
0
      break;
14627
0
    case X86::BI__builtin_ia32_gathersiv16sf:
14628
0
      IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14629
0
      break;
14630
0
    case X86::BI__builtin_ia32_gatherdiv8df:
14631
0
      IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14632
0
      break;
14633
0
    case X86::BI__builtin_ia32_gatherdiv16sf:
14634
0
      IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14635
0
      break;
14636
0
    case X86::BI__builtin_ia32_gathersiv8di:
14637
0
      IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14638
0
      break;
14639
0
    case X86::BI__builtin_ia32_gathersiv16si:
14640
0
      IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14641
0
      break;
14642
0
    case X86::BI__builtin_ia32_gatherdiv8di:
14643
0
      IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14644
0
      break;
14645
0
    case X86::BI__builtin_ia32_gatherdiv16si:
14646
0
      IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14647
0
      break;
14648
0
    }
14649
14650
0
    unsigned MinElts = std::min(
14651
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
14652
0
        cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
14653
0
    Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
14654
0
    Function *Intr = CGM.getIntrinsic(IID);
14655
0
    return Builder.CreateCall(Intr, Ops);
14656
0
  }
14657
14658
0
  case X86::BI__builtin_ia32_scattersiv8df:
14659
0
  case X86::BI__builtin_ia32_scattersiv16sf:
14660
0
  case X86::BI__builtin_ia32_scatterdiv8df:
14661
0
  case X86::BI__builtin_ia32_scatterdiv16sf:
14662
0
  case X86::BI__builtin_ia32_scattersiv8di:
14663
0
  case X86::BI__builtin_ia32_scattersiv16si:
14664
0
  case X86::BI__builtin_ia32_scatterdiv8di:
14665
0
  case X86::BI__builtin_ia32_scatterdiv16si:
14666
0
  case X86::BI__builtin_ia32_scatterdiv2df:
14667
0
  case X86::BI__builtin_ia32_scatterdiv2di:
14668
0
  case X86::BI__builtin_ia32_scatterdiv4df:
14669
0
  case X86::BI__builtin_ia32_scatterdiv4di:
14670
0
  case X86::BI__builtin_ia32_scatterdiv4sf:
14671
0
  case X86::BI__builtin_ia32_scatterdiv4si:
14672
0
  case X86::BI__builtin_ia32_scatterdiv8sf:
14673
0
  case X86::BI__builtin_ia32_scatterdiv8si:
14674
0
  case X86::BI__builtin_ia32_scattersiv2df:
14675
0
  case X86::BI__builtin_ia32_scattersiv2di:
14676
0
  case X86::BI__builtin_ia32_scattersiv4df:
14677
0
  case X86::BI__builtin_ia32_scattersiv4di:
14678
0
  case X86::BI__builtin_ia32_scattersiv4sf:
14679
0
  case X86::BI__builtin_ia32_scattersiv4si:
14680
0
  case X86::BI__builtin_ia32_scattersiv8sf:
14681
0
  case X86::BI__builtin_ia32_scattersiv8si: {
14682
0
    Intrinsic::ID IID;
14683
0
    switch (BuiltinID) {
14684
0
    default: llvm_unreachable("Unexpected builtin");
14685
0
    case X86::BI__builtin_ia32_scattersiv8df:
14686
0
      IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
14687
0
      break;
14688
0
    case X86::BI__builtin_ia32_scattersiv16sf:
14689
0
      IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
14690
0
      break;
14691
0
    case X86::BI__builtin_ia32_scatterdiv8df:
14692
0
      IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
14693
0
      break;
14694
0
    case X86::BI__builtin_ia32_scatterdiv16sf:
14695
0
      IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
14696
0
      break;
14697
0
    case X86::BI__builtin_ia32_scattersiv8di:
14698
0
      IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
14699
0
      break;
14700
0
    case X86::BI__builtin_ia32_scattersiv16si:
14701
0
      IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
14702
0
      break;
14703
0
    case X86::BI__builtin_ia32_scatterdiv8di:
14704
0
      IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
14705
0
      break;
14706
0
    case X86::BI__builtin_ia32_scatterdiv16si:
14707
0
      IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
14708
0
      break;
14709
0
    case X86::BI__builtin_ia32_scatterdiv2df:
14710
0
      IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
14711
0
      break;
14712
0
    case X86::BI__builtin_ia32_scatterdiv2di:
14713
0
      IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
14714
0
      break;
14715
0
    case X86::BI__builtin_ia32_scatterdiv4df:
14716
0
      IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
14717
0
      break;
14718
0
    case X86::BI__builtin_ia32_scatterdiv4di:
14719
0
      IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
14720
0
      break;
14721
0
    case X86::BI__builtin_ia32_scatterdiv4sf:
14722
0
      IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
14723
0
      break;
14724
0
    case X86::BI__builtin_ia32_scatterdiv4si:
14725
0
      IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
14726
0
      break;
14727
0
    case X86::BI__builtin_ia32_scatterdiv8sf:
14728
0
      IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
14729
0
      break;
14730
0
    case X86::BI__builtin_ia32_scatterdiv8si:
14731
0
      IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
14732
0
      break;
14733
0
    case X86::BI__builtin_ia32_scattersiv2df:
14734
0
      IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
14735
0
      break;
14736
0
    case X86::BI__builtin_ia32_scattersiv2di:
14737
0
      IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
14738
0
      break;
14739
0
    case X86::BI__builtin_ia32_scattersiv4df:
14740
0
      IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
14741
0
      break;
14742
0
    case X86::BI__builtin_ia32_scattersiv4di:
14743
0
      IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
14744
0
      break;
14745
0
    case X86::BI__builtin_ia32_scattersiv4sf:
14746
0
      IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
14747
0
      break;
14748
0
    case X86::BI__builtin_ia32_scattersiv4si:
14749
0
      IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
14750
0
      break;
14751
0
    case X86::BI__builtin_ia32_scattersiv8sf:
14752
0
      IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
14753
0
      break;
14754
0
    case X86::BI__builtin_ia32_scattersiv8si:
14755
0
      IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
14756
0
      break;
14757
0
    }
14758
14759
0
    unsigned MinElts = std::min(
14760
0
        cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
14761
0
        cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
14762
0
    Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
14763
0
    Function *Intr = CGM.getIntrinsic(IID);
14764
0
    return Builder.CreateCall(Intr, Ops);
14765
0
  }
14766
14767
0
  case X86::BI__builtin_ia32_vextractf128_pd256:
14768
0
  case X86::BI__builtin_ia32_vextractf128_ps256:
14769
0
  case X86::BI__builtin_ia32_vextractf128_si256:
14770
0
  case X86::BI__builtin_ia32_extract128i256:
14771
0
  case X86::BI__builtin_ia32_extractf64x4_mask:
14772
0
  case X86::BI__builtin_ia32_extractf32x4_mask:
14773
0
  case X86::BI__builtin_ia32_extracti64x4_mask:
14774
0
  case X86::BI__builtin_ia32_extracti32x4_mask:
14775
0
  case X86::BI__builtin_ia32_extractf32x8_mask:
14776
0
  case X86::BI__builtin_ia32_extracti32x8_mask:
14777
0
  case X86::BI__builtin_ia32_extractf32x4_256_mask:
14778
0
  case X86::BI__builtin_ia32_extracti32x4_256_mask:
14779
0
  case X86::BI__builtin_ia32_extractf64x2_256_mask:
14780
0
  case X86::BI__builtin_ia32_extracti64x2_256_mask:
14781
0
  case X86::BI__builtin_ia32_extractf64x2_512_mask:
14782
0
  case X86::BI__builtin_ia32_extracti64x2_512_mask: {
14783
0
    auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
14784
0
    unsigned NumElts = DstTy->getNumElements();
14785
0
    unsigned SrcNumElts =
14786
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14787
0
    unsigned SubVectors = SrcNumElts / NumElts;
14788
0
    unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14789
0
    assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14790
0
    Index &= SubVectors - 1; // Remove any extra bits.
14791
0
    Index *= NumElts;
14792
14793
0
    int Indices[16];
14794
0
    for (unsigned i = 0; i != NumElts; ++i)
14795
0
      Indices[i] = i + Index;
14796
14797
0
    Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14798
0
                                             "extract");
14799
14800
0
    if (Ops.size() == 4)
14801
0
      Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
14802
14803
0
    return Res;
14804
0
  }
14805
0
  case X86::BI__builtin_ia32_vinsertf128_pd256:
14806
0
  case X86::BI__builtin_ia32_vinsertf128_ps256:
14807
0
  case X86::BI__builtin_ia32_vinsertf128_si256:
14808
0
  case X86::BI__builtin_ia32_insert128i256:
14809
0
  case X86::BI__builtin_ia32_insertf64x4:
14810
0
  case X86::BI__builtin_ia32_insertf32x4:
14811
0
  case X86::BI__builtin_ia32_inserti64x4:
14812
0
  case X86::BI__builtin_ia32_inserti32x4:
14813
0
  case X86::BI__builtin_ia32_insertf32x8:
14814
0
  case X86::BI__builtin_ia32_inserti32x8:
14815
0
  case X86::BI__builtin_ia32_insertf32x4_256:
14816
0
  case X86::BI__builtin_ia32_inserti32x4_256:
14817
0
  case X86::BI__builtin_ia32_insertf64x2_256:
14818
0
  case X86::BI__builtin_ia32_inserti64x2_256:
14819
0
  case X86::BI__builtin_ia32_insertf64x2_512:
14820
0
  case X86::BI__builtin_ia32_inserti64x2_512: {
14821
0
    unsigned DstNumElts =
14822
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14823
0
    unsigned SrcNumElts =
14824
0
        cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
14825
0
    unsigned SubVectors = DstNumElts / SrcNumElts;
14826
0
    unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14827
0
    assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14828
0
    Index &= SubVectors - 1; // Remove any extra bits.
14829
0
    Index *= SrcNumElts;
14830
14831
0
    int Indices[16];
14832
0
    for (unsigned i = 0; i != DstNumElts; ++i)
14833
0
      Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
14834
14835
0
    Value *Op1 = Builder.CreateShuffleVector(
14836
0
        Ops[1], ArrayRef(Indices, DstNumElts), "widen");
14837
14838
0
    for (unsigned i = 0; i != DstNumElts; ++i) {
14839
0
      if (i >= Index && i < (Index + SrcNumElts))
14840
0
        Indices[i] = (i - Index) + DstNumElts;
14841
0
      else
14842
0
        Indices[i] = i;
14843
0
    }
14844
14845
0
    return Builder.CreateShuffleVector(Ops[0], Op1,
14846
0
                                       ArrayRef(Indices, DstNumElts), "insert");
14847
0
  }
14848
0
  case X86::BI__builtin_ia32_pmovqd512_mask:
14849
0
  case X86::BI__builtin_ia32_pmovwb512_mask: {
14850
0
    Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
14851
0
    return EmitX86Select(*this, Ops[2], Res, Ops[1]);
14852
0
  }
14853
0
  case X86::BI__builtin_ia32_pmovdb512_mask:
14854
0
  case X86::BI__builtin_ia32_pmovdw512_mask:
14855
0
  case X86::BI__builtin_ia32_pmovqw512_mask: {
14856
0
    if (const auto *C = dyn_cast<Constant>(Ops[2]))
14857
0
      if (C->isAllOnesValue())
14858
0
        return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
14859
14860
0
    Intrinsic::ID IID;
14861
0
    switch (BuiltinID) {
14862
0
    default: llvm_unreachable("Unsupported intrinsic!");
14863
0
    case X86::BI__builtin_ia32_pmovdb512_mask:
14864
0
      IID = Intrinsic::x86_avx512_mask_pmov_db_512;
14865
0
      break;
14866
0
    case X86::BI__builtin_ia32_pmovdw512_mask:
14867
0
      IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
14868
0
      break;
14869
0
    case X86::BI__builtin_ia32_pmovqw512_mask:
14870
0
      IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
14871
0
      break;
14872
0
    }
14873
14874
0
    Function *Intr = CGM.getIntrinsic(IID);
14875
0
    return Builder.CreateCall(Intr, Ops);
14876
0
  }
14877
0
  case X86::BI__builtin_ia32_pblendw128:
14878
0
  case X86::BI__builtin_ia32_blendpd:
14879
0
  case X86::BI__builtin_ia32_blendps:
14880
0
  case X86::BI__builtin_ia32_blendpd256:
14881
0
  case X86::BI__builtin_ia32_blendps256:
14882
0
  case X86::BI__builtin_ia32_pblendw256:
14883
0
  case X86::BI__builtin_ia32_pblendd128:
14884
0
  case X86::BI__builtin_ia32_pblendd256: {
14885
0
    unsigned NumElts =
14886
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14887
0
    unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14888
14889
0
    int Indices[16];
14890
    // If there are more than 8 elements, the immediate is used twice so make
14891
    // sure we handle that.
14892
0
    for (unsigned i = 0; i != NumElts; ++i)
14893
0
      Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
14894
14895
0
    return Builder.CreateShuffleVector(Ops[0], Ops[1],
14896
0
                                       ArrayRef(Indices, NumElts), "blend");
14897
0
  }
14898
0
  case X86::BI__builtin_ia32_pshuflw:
14899
0
  case X86::BI__builtin_ia32_pshuflw256:
14900
0
  case X86::BI__builtin_ia32_pshuflw512: {
14901
0
    uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14902
0
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14903
0
    unsigned NumElts = Ty->getNumElements();
14904
14905
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14906
0
    Imm = (Imm & 0xff) * 0x01010101;
14907
14908
0
    int Indices[32];
14909
0
    for (unsigned l = 0; l != NumElts; l += 8) {
14910
0
      for (unsigned i = 0; i != 4; ++i) {
14911
0
        Indices[l + i] = l + (Imm & 3);
14912
0
        Imm >>= 2;
14913
0
      }
14914
0
      for (unsigned i = 4; i != 8; ++i)
14915
0
        Indices[l + i] = l + i;
14916
0
    }
14917
14918
0
    return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14919
0
                                       "pshuflw");
14920
0
  }
14921
0
  case X86::BI__builtin_ia32_pshufhw:
14922
0
  case X86::BI__builtin_ia32_pshufhw256:
14923
0
  case X86::BI__builtin_ia32_pshufhw512: {
14924
0
    uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14925
0
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14926
0
    unsigned NumElts = Ty->getNumElements();
14927
14928
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14929
0
    Imm = (Imm & 0xff) * 0x01010101;
14930
14931
0
    int Indices[32];
14932
0
    for (unsigned l = 0; l != NumElts; l += 8) {
14933
0
      for (unsigned i = 0; i != 4; ++i)
14934
0
        Indices[l + i] = l + i;
14935
0
      for (unsigned i = 4; i != 8; ++i) {
14936
0
        Indices[l + i] = l + 4 + (Imm & 3);
14937
0
        Imm >>= 2;
14938
0
      }
14939
0
    }
14940
14941
0
    return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14942
0
                                       "pshufhw");
14943
0
  }
14944
0
  case X86::BI__builtin_ia32_pshufd:
14945
0
  case X86::BI__builtin_ia32_pshufd256:
14946
0
  case X86::BI__builtin_ia32_pshufd512:
14947
0
  case X86::BI__builtin_ia32_vpermilpd:
14948
0
  case X86::BI__builtin_ia32_vpermilps:
14949
0
  case X86::BI__builtin_ia32_vpermilpd256:
14950
0
  case X86::BI__builtin_ia32_vpermilps256:
14951
0
  case X86::BI__builtin_ia32_vpermilpd512:
14952
0
  case X86::BI__builtin_ia32_vpermilps512: {
14953
0
    uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
14954
0
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14955
0
    unsigned NumElts = Ty->getNumElements();
14956
0
    unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
14957
0
    unsigned NumLaneElts = NumElts / NumLanes;
14958
14959
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14960
0
    Imm = (Imm & 0xff) * 0x01010101;
14961
14962
0
    int Indices[16];
14963
0
    for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
14964
0
      for (unsigned i = 0; i != NumLaneElts; ++i) {
14965
0
        Indices[i + l] = (Imm % NumLaneElts) + l;
14966
0
        Imm /= NumLaneElts;
14967
0
      }
14968
0
    }
14969
14970
0
    return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14971
0
                                       "permil");
14972
0
  }
14973
0
  case X86::BI__builtin_ia32_shufpd:
14974
0
  case X86::BI__builtin_ia32_shufpd256:
14975
0
  case X86::BI__builtin_ia32_shufpd512:
14976
0
  case X86::BI__builtin_ia32_shufps:
14977
0
  case X86::BI__builtin_ia32_shufps256:
14978
0
  case X86::BI__builtin_ia32_shufps512: {
14979
0
    uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
14980
0
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
14981
0
    unsigned NumElts = Ty->getNumElements();
14982
0
    unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
14983
0
    unsigned NumLaneElts = NumElts / NumLanes;
14984
14985
    // Splat the 8-bits of immediate 4 times to help the loop wrap around.
14986
0
    Imm = (Imm & 0xff) * 0x01010101;
14987
14988
0
    int Indices[16];
14989
0
    for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
14990
0
      for (unsigned i = 0; i != NumLaneElts; ++i) {
14991
0
        unsigned Index = Imm % NumLaneElts;
14992
0
        Imm /= NumLaneElts;
14993
0
        if (i >= (NumLaneElts / 2))
14994
0
          Index += NumElts;
14995
0
        Indices[l + i] = l + Index;
14996
0
      }
14997
0
    }
14998
14999
0
    return Builder.CreateShuffleVector(Ops[0], Ops[1],
15000
0
                                       ArrayRef(Indices, NumElts), "shufp");
15001
0
  }
15002
0
  case X86::BI__builtin_ia32_permdi256:
15003
0
  case X86::BI__builtin_ia32_permdf256:
15004
0
  case X86::BI__builtin_ia32_permdi512:
15005
0
  case X86::BI__builtin_ia32_permdf512: {
15006
0
    unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15007
0
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15008
0
    unsigned NumElts = Ty->getNumElements();
15009
15010
    // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15011
0
    int Indices[8];
15012
0
    for (unsigned l = 0; l != NumElts; l += 4)
15013
0
      for (unsigned i = 0; i != 4; ++i)
15014
0
        Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15015
15016
0
    return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15017
0
                                       "perm");
15018
0
  }
15019
0
  case X86::BI__builtin_ia32_palignr128:
15020
0
  case X86::BI__builtin_ia32_palignr256:
15021
0
  case X86::BI__builtin_ia32_palignr512: {
15022
0
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15023
15024
0
    unsigned NumElts =
15025
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15026
0
    assert(NumElts % 16 == 0);
15027
15028
    // If palignr is shifting the pair of vectors more than the size of two
15029
    // lanes, emit zero.
15030
0
    if (ShiftVal >= 32)
15031
0
      return llvm::Constant::getNullValue(ConvertType(E->getType()));
15032
15033
    // If palignr is shifting the pair of input vectors more than one lane,
15034
    // but less than two lanes, convert to shifting in zeroes.
15035
0
    if (ShiftVal > 16) {
15036
0
      ShiftVal -= 16;
15037
0
      Ops[1] = Ops[0];
15038
0
      Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15039
0
    }
15040
15041
0
    int Indices[64];
15042
    // 256-bit palignr operates on 128-bit lanes so we need to handle that
15043
0
    for (unsigned l = 0; l != NumElts; l += 16) {
15044
0
      for (unsigned i = 0; i != 16; ++i) {
15045
0
        unsigned Idx = ShiftVal + i;
15046
0
        if (Idx >= 16)
15047
0
          Idx += NumElts - 16; // End of lane, switch operand.
15048
0
        Indices[l + i] = Idx + l;
15049
0
      }
15050
0
    }
15051
15052
0
    return Builder.CreateShuffleVector(Ops[1], Ops[0],
15053
0
                                       ArrayRef(Indices, NumElts), "palignr");
15054
0
  }
15055
0
  case X86::BI__builtin_ia32_alignd128:
15056
0
  case X86::BI__builtin_ia32_alignd256:
15057
0
  case X86::BI__builtin_ia32_alignd512:
15058
0
  case X86::BI__builtin_ia32_alignq128:
15059
0
  case X86::BI__builtin_ia32_alignq256:
15060
0
  case X86::BI__builtin_ia32_alignq512: {
15061
0
    unsigned NumElts =
15062
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15063
0
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15064
15065
    // Mask the shift amount to width of a vector.
15066
0
    ShiftVal &= NumElts - 1;
15067
15068
0
    int Indices[16];
15069
0
    for (unsigned i = 0; i != NumElts; ++i)
15070
0
      Indices[i] = i + ShiftVal;
15071
15072
0
    return Builder.CreateShuffleVector(Ops[1], Ops[0],
15073
0
                                       ArrayRef(Indices, NumElts), "valign");
15074
0
  }
15075
0
  case X86::BI__builtin_ia32_shuf_f32x4_256:
15076
0
  case X86::BI__builtin_ia32_shuf_f64x2_256:
15077
0
  case X86::BI__builtin_ia32_shuf_i32x4_256:
15078
0
  case X86::BI__builtin_ia32_shuf_i64x2_256:
15079
0
  case X86::BI__builtin_ia32_shuf_f32x4:
15080
0
  case X86::BI__builtin_ia32_shuf_f64x2:
15081
0
  case X86::BI__builtin_ia32_shuf_i32x4:
15082
0
  case X86::BI__builtin_ia32_shuf_i64x2: {
15083
0
    unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15084
0
    auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15085
0
    unsigned NumElts = Ty->getNumElements();
15086
0
    unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15087
0
    unsigned NumLaneElts = NumElts / NumLanes;
15088
15089
0
    int Indices[16];
15090
0
    for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15091
0
      unsigned Index = (Imm % NumLanes) * NumLaneElts;
15092
0
      Imm /= NumLanes; // Discard the bits we just used.
15093
0
      if (l >= (NumElts / 2))
15094
0
        Index += NumElts; // Switch to other source.
15095
0
      for (unsigned i = 0; i != NumLaneElts; ++i) {
15096
0
        Indices[l + i] = Index + i;
15097
0
      }
15098
0
    }
15099
15100
0
    return Builder.CreateShuffleVector(Ops[0], Ops[1],
15101
0
                                       ArrayRef(Indices, NumElts), "shuf");
15102
0
  }
15103
15104
0
  case X86::BI__builtin_ia32_vperm2f128_pd256:
15105
0
  case X86::BI__builtin_ia32_vperm2f128_ps256:
15106
0
  case X86::BI__builtin_ia32_vperm2f128_si256:
15107
0
  case X86::BI__builtin_ia32_permti256: {
15108
0
    unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15109
0
    unsigned NumElts =
15110
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15111
15112
    // This takes a very simple approach since there are two lanes and a
15113
    // shuffle can have 2 inputs. So we reserve the first input for the first
15114
    // lane and the second input for the second lane. This may result in
15115
    // duplicate sources, but this can be dealt with in the backend.
15116
15117
0
    Value *OutOps[2];
15118
0
    int Indices[8];
15119
0
    for (unsigned l = 0; l != 2; ++l) {
15120
      // Determine the source for this lane.
15121
0
      if (Imm & (1 << ((l * 4) + 3)))
15122
0
        OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15123
0
      else if (Imm & (1 << ((l * 4) + 1)))
15124
0
        OutOps[l] = Ops[1];
15125
0
      else
15126
0
        OutOps[l] = Ops[0];
15127
15128
0
      for (unsigned i = 0; i != NumElts/2; ++i) {
15129
        // Start with ith element of the source for this lane.
15130
0
        unsigned Idx = (l * NumElts) + i;
15131
        // If bit 0 of the immediate half is set, switch to the high half of
15132
        // the source.
15133
0
        if (Imm & (1 << (l * 4)))
15134
0
          Idx += NumElts/2;
15135
0
        Indices[(l * (NumElts/2)) + i] = Idx;
15136
0
      }
15137
0
    }
15138
15139
0
    return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15140
0
                                       ArrayRef(Indices, NumElts), "vperm");
15141
0
  }
15142
15143
0
  case X86::BI__builtin_ia32_pslldqi128_byteshift:
15144
0
  case X86::BI__builtin_ia32_pslldqi256_byteshift:
15145
0
  case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15146
0
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15147
0
    auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15148
    // Builtin type is vXi64 so multiply by 8 to get bytes.
15149
0
    unsigned NumElts = ResultType->getNumElements() * 8;
15150
15151
    // If pslldq is shifting the vector more than 15 bytes, emit zero.
15152
0
    if (ShiftVal >= 16)
15153
0
      return llvm::Constant::getNullValue(ResultType);
15154
15155
0
    int Indices[64];
15156
    // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15157
0
    for (unsigned l = 0; l != NumElts; l += 16) {
15158
0
      for (unsigned i = 0; i != 16; ++i) {
15159
0
        unsigned Idx = NumElts + i - ShiftVal;
15160
0
        if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15161
0
        Indices[l + i] = Idx + l;
15162
0
      }
15163
0
    }
15164
15165
0
    auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15166
0
    Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15167
0
    Value *Zero = llvm::Constant::getNullValue(VecTy);
15168
0
    Value *SV = Builder.CreateShuffleVector(
15169
0
        Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15170
0
    return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15171
0
  }
15172
0
  case X86::BI__builtin_ia32_psrldqi128_byteshift:
15173
0
  case X86::BI__builtin_ia32_psrldqi256_byteshift:
15174
0
  case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15175
0
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15176
0
    auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15177
    // Builtin type is vXi64 so multiply by 8 to get bytes.
15178
0
    unsigned NumElts = ResultType->getNumElements() * 8;
15179
15180
    // If psrldq is shifting the vector more than 15 bytes, emit zero.
15181
0
    if (ShiftVal >= 16)
15182
0
      return llvm::Constant::getNullValue(ResultType);
15183
15184
0
    int Indices[64];
15185
    // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15186
0
    for (unsigned l = 0; l != NumElts; l += 16) {
15187
0
      for (unsigned i = 0; i != 16; ++i) {
15188
0
        unsigned Idx = i + ShiftVal;
15189
0
        if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15190
0
        Indices[l + i] = Idx + l;
15191
0
      }
15192
0
    }
15193
15194
0
    auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15195
0
    Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15196
0
    Value *Zero = llvm::Constant::getNullValue(VecTy);
15197
0
    Value *SV = Builder.CreateShuffleVector(
15198
0
        Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15199
0
    return Builder.CreateBitCast(SV, ResultType, "cast");
15200
0
  }
15201
0
  case X86::BI__builtin_ia32_kshiftliqi:
15202
0
  case X86::BI__builtin_ia32_kshiftlihi:
15203
0
  case X86::BI__builtin_ia32_kshiftlisi:
15204
0
  case X86::BI__builtin_ia32_kshiftlidi: {
15205
0
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15206
0
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15207
15208
0
    if (ShiftVal >= NumElts)
15209
0
      return llvm::Constant::getNullValue(Ops[0]->getType());
15210
15211
0
    Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15212
15213
0
    int Indices[64];
15214
0
    for (unsigned i = 0; i != NumElts; ++i)
15215
0
      Indices[i] = NumElts + i - ShiftVal;
15216
15217
0
    Value *Zero = llvm::Constant::getNullValue(In->getType());
15218
0
    Value *SV = Builder.CreateShuffleVector(
15219
0
        Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15220
0
    return Builder.CreateBitCast(SV, Ops[0]->getType());
15221
0
  }
15222
0
  case X86::BI__builtin_ia32_kshiftriqi:
15223
0
  case X86::BI__builtin_ia32_kshiftrihi:
15224
0
  case X86::BI__builtin_ia32_kshiftrisi:
15225
0
  case X86::BI__builtin_ia32_kshiftridi: {
15226
0
    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15227
0
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15228
15229
0
    if (ShiftVal >= NumElts)
15230
0
      return llvm::Constant::getNullValue(Ops[0]->getType());
15231
15232
0
    Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15233
15234
0
    int Indices[64];
15235
0
    for (unsigned i = 0; i != NumElts; ++i)
15236
0
      Indices[i] = i + ShiftVal;
15237
15238
0
    Value *Zero = llvm::Constant::getNullValue(In->getType());
15239
0
    Value *SV = Builder.CreateShuffleVector(
15240
0
        In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15241
0
    return Builder.CreateBitCast(SV, Ops[0]->getType());
15242
0
  }
15243
0
  case X86::BI__builtin_ia32_movnti:
15244
0
  case X86::BI__builtin_ia32_movnti64:
15245
0
  case X86::BI__builtin_ia32_movntsd:
15246
0
  case X86::BI__builtin_ia32_movntss: {
15247
0
    llvm::MDNode *Node = llvm::MDNode::get(
15248
0
        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15249
15250
0
    Value *Ptr = Ops[0];
15251
0
    Value *Src = Ops[1];
15252
15253
    // Extract the 0'th element of the source vector.
15254
0
    if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15255
0
        BuiltinID == X86::BI__builtin_ia32_movntss)
15256
0
      Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15257
15258
    // Unaligned nontemporal store of the scalar value.
15259
0
    StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15260
0
    SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15261
0
    SI->setAlignment(llvm::Align(1));
15262
0
    return SI;
15263
0
  }
15264
  // Rotate is a special case of funnel shift - 1st 2 args are the same.
15265
0
  case X86::BI__builtin_ia32_vprotb:
15266
0
  case X86::BI__builtin_ia32_vprotw:
15267
0
  case X86::BI__builtin_ia32_vprotd:
15268
0
  case X86::BI__builtin_ia32_vprotq:
15269
0
  case X86::BI__builtin_ia32_vprotbi:
15270
0
  case X86::BI__builtin_ia32_vprotwi:
15271
0
  case X86::BI__builtin_ia32_vprotdi:
15272
0
  case X86::BI__builtin_ia32_vprotqi:
15273
0
  case X86::BI__builtin_ia32_prold128:
15274
0
  case X86::BI__builtin_ia32_prold256:
15275
0
  case X86::BI__builtin_ia32_prold512:
15276
0
  case X86::BI__builtin_ia32_prolq128:
15277
0
  case X86::BI__builtin_ia32_prolq256:
15278
0
  case X86::BI__builtin_ia32_prolq512:
15279
0
  case X86::BI__builtin_ia32_prolvd128:
15280
0
  case X86::BI__builtin_ia32_prolvd256:
15281
0
  case X86::BI__builtin_ia32_prolvd512:
15282
0
  case X86::BI__builtin_ia32_prolvq128:
15283
0
  case X86::BI__builtin_ia32_prolvq256:
15284
0
  case X86::BI__builtin_ia32_prolvq512:
15285
0
    return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15286
0
  case X86::BI__builtin_ia32_prord128:
15287
0
  case X86::BI__builtin_ia32_prord256:
15288
0
  case X86::BI__builtin_ia32_prord512:
15289
0
  case X86::BI__builtin_ia32_prorq128:
15290
0
  case X86::BI__builtin_ia32_prorq256:
15291
0
  case X86::BI__builtin_ia32_prorq512:
15292
0
  case X86::BI__builtin_ia32_prorvd128:
15293
0
  case X86::BI__builtin_ia32_prorvd256:
15294
0
  case X86::BI__builtin_ia32_prorvd512:
15295
0
  case X86::BI__builtin_ia32_prorvq128:
15296
0
  case X86::BI__builtin_ia32_prorvq256:
15297
0
  case X86::BI__builtin_ia32_prorvq512:
15298
0
    return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15299
0
  case X86::BI__builtin_ia32_selectb_128:
15300
0
  case X86::BI__builtin_ia32_selectb_256:
15301
0
  case X86::BI__builtin_ia32_selectb_512:
15302
0
  case X86::BI__builtin_ia32_selectw_128:
15303
0
  case X86::BI__builtin_ia32_selectw_256:
15304
0
  case X86::BI__builtin_ia32_selectw_512:
15305
0
  case X86::BI__builtin_ia32_selectd_128:
15306
0
  case X86::BI__builtin_ia32_selectd_256:
15307
0
  case X86::BI__builtin_ia32_selectd_512:
15308
0
  case X86::BI__builtin_ia32_selectq_128:
15309
0
  case X86::BI__builtin_ia32_selectq_256:
15310
0
  case X86::BI__builtin_ia32_selectq_512:
15311
0
  case X86::BI__builtin_ia32_selectph_128:
15312
0
  case X86::BI__builtin_ia32_selectph_256:
15313
0
  case X86::BI__builtin_ia32_selectph_512:
15314
0
  case X86::BI__builtin_ia32_selectpbf_128:
15315
0
  case X86::BI__builtin_ia32_selectpbf_256:
15316
0
  case X86::BI__builtin_ia32_selectpbf_512:
15317
0
  case X86::BI__builtin_ia32_selectps_128:
15318
0
  case X86::BI__builtin_ia32_selectps_256:
15319
0
  case X86::BI__builtin_ia32_selectps_512:
15320
0
  case X86::BI__builtin_ia32_selectpd_128:
15321
0
  case X86::BI__builtin_ia32_selectpd_256:
15322
0
  case X86::BI__builtin_ia32_selectpd_512:
15323
0
    return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15324
0
  case X86::BI__builtin_ia32_selectsh_128:
15325
0
  case X86::BI__builtin_ia32_selectsbf_128:
15326
0
  case X86::BI__builtin_ia32_selectss_128:
15327
0
  case X86::BI__builtin_ia32_selectsd_128: {
15328
0
    Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15329
0
    Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15330
0
    A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15331
0
    return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15332
0
  }
15333
0
  case X86::BI__builtin_ia32_cmpb128_mask:
15334
0
  case X86::BI__builtin_ia32_cmpb256_mask:
15335
0
  case X86::BI__builtin_ia32_cmpb512_mask:
15336
0
  case X86::BI__builtin_ia32_cmpw128_mask:
15337
0
  case X86::BI__builtin_ia32_cmpw256_mask:
15338
0
  case X86::BI__builtin_ia32_cmpw512_mask:
15339
0
  case X86::BI__builtin_ia32_cmpd128_mask:
15340
0
  case X86::BI__builtin_ia32_cmpd256_mask:
15341
0
  case X86::BI__builtin_ia32_cmpd512_mask:
15342
0
  case X86::BI__builtin_ia32_cmpq128_mask:
15343
0
  case X86::BI__builtin_ia32_cmpq256_mask:
15344
0
  case X86::BI__builtin_ia32_cmpq512_mask: {
15345
0
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15346
0
    return EmitX86MaskedCompare(*this, CC, true, Ops);
15347
0
  }
15348
0
  case X86::BI__builtin_ia32_ucmpb128_mask:
15349
0
  case X86::BI__builtin_ia32_ucmpb256_mask:
15350
0
  case X86::BI__builtin_ia32_ucmpb512_mask:
15351
0
  case X86::BI__builtin_ia32_ucmpw128_mask:
15352
0
  case X86::BI__builtin_ia32_ucmpw256_mask:
15353
0
  case X86::BI__builtin_ia32_ucmpw512_mask:
15354
0
  case X86::BI__builtin_ia32_ucmpd128_mask:
15355
0
  case X86::BI__builtin_ia32_ucmpd256_mask:
15356
0
  case X86::BI__builtin_ia32_ucmpd512_mask:
15357
0
  case X86::BI__builtin_ia32_ucmpq128_mask:
15358
0
  case X86::BI__builtin_ia32_ucmpq256_mask:
15359
0
  case X86::BI__builtin_ia32_ucmpq512_mask: {
15360
0
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15361
0
    return EmitX86MaskedCompare(*this, CC, false, Ops);
15362
0
  }
15363
0
  case X86::BI__builtin_ia32_vpcomb:
15364
0
  case X86::BI__builtin_ia32_vpcomw:
15365
0
  case X86::BI__builtin_ia32_vpcomd:
15366
0
  case X86::BI__builtin_ia32_vpcomq:
15367
0
    return EmitX86vpcom(*this, Ops, true);
15368
0
  case X86::BI__builtin_ia32_vpcomub:
15369
0
  case X86::BI__builtin_ia32_vpcomuw:
15370
0
  case X86::BI__builtin_ia32_vpcomud:
15371
0
  case X86::BI__builtin_ia32_vpcomuq:
15372
0
    return EmitX86vpcom(*this, Ops, false);
15373
15374
0
  case X86::BI__builtin_ia32_kortestcqi:
15375
0
  case X86::BI__builtin_ia32_kortestchi:
15376
0
  case X86::BI__builtin_ia32_kortestcsi:
15377
0
  case X86::BI__builtin_ia32_kortestcdi: {
15378
0
    Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15379
0
    Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15380
0
    Value *Cmp = Builder.CreateICmpEQ(Or, C);
15381
0
    return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15382
0
  }
15383
0
  case X86::BI__builtin_ia32_kortestzqi:
15384
0
  case X86::BI__builtin_ia32_kortestzhi:
15385
0
  case X86::BI__builtin_ia32_kortestzsi:
15386
0
  case X86::BI__builtin_ia32_kortestzdi: {
15387
0
    Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15388
0
    Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15389
0
    Value *Cmp = Builder.CreateICmpEQ(Or, C);
15390
0
    return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15391
0
  }
15392
15393
0
  case X86::BI__builtin_ia32_ktestcqi:
15394
0
  case X86::BI__builtin_ia32_ktestzqi:
15395
0
  case X86::BI__builtin_ia32_ktestchi:
15396
0
  case X86::BI__builtin_ia32_ktestzhi:
15397
0
  case X86::BI__builtin_ia32_ktestcsi:
15398
0
  case X86::BI__builtin_ia32_ktestzsi:
15399
0
  case X86::BI__builtin_ia32_ktestcdi:
15400
0
  case X86::BI__builtin_ia32_ktestzdi: {
15401
0
    Intrinsic::ID IID;
15402
0
    switch (BuiltinID) {
15403
0
    default: llvm_unreachable("Unsupported intrinsic!");
15404
0
    case X86::BI__builtin_ia32_ktestcqi:
15405
0
      IID = Intrinsic::x86_avx512_ktestc_b;
15406
0
      break;
15407
0
    case X86::BI__builtin_ia32_ktestzqi:
15408
0
      IID = Intrinsic::x86_avx512_ktestz_b;
15409
0
      break;
15410
0
    case X86::BI__builtin_ia32_ktestchi:
15411
0
      IID = Intrinsic::x86_avx512_ktestc_w;
15412
0
      break;
15413
0
    case X86::BI__builtin_ia32_ktestzhi:
15414
0
      IID = Intrinsic::x86_avx512_ktestz_w;
15415
0
      break;
15416
0
    case X86::BI__builtin_ia32_ktestcsi:
15417
0
      IID = Intrinsic::x86_avx512_ktestc_d;
15418
0
      break;
15419
0
    case X86::BI__builtin_ia32_ktestzsi:
15420
0
      IID = Intrinsic::x86_avx512_ktestz_d;
15421
0
      break;
15422
0
    case X86::BI__builtin_ia32_ktestcdi:
15423
0
      IID = Intrinsic::x86_avx512_ktestc_q;
15424
0
      break;
15425
0
    case X86::BI__builtin_ia32_ktestzdi:
15426
0
      IID = Intrinsic::x86_avx512_ktestz_q;
15427
0
      break;
15428
0
    }
15429
15430
0
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15431
0
    Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15432
0
    Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15433
0
    Function *Intr = CGM.getIntrinsic(IID);
15434
0
    return Builder.CreateCall(Intr, {LHS, RHS});
15435
0
  }
15436
15437
0
  case X86::BI__builtin_ia32_kaddqi:
15438
0
  case X86::BI__builtin_ia32_kaddhi:
15439
0
  case X86::BI__builtin_ia32_kaddsi:
15440
0
  case X86::BI__builtin_ia32_kadddi: {
15441
0
    Intrinsic::ID IID;
15442
0
    switch (BuiltinID) {
15443
0
    default: llvm_unreachable("Unsupported intrinsic!");
15444
0
    case X86::BI__builtin_ia32_kaddqi:
15445
0
      IID = Intrinsic::x86_avx512_kadd_b;
15446
0
      break;
15447
0
    case X86::BI__builtin_ia32_kaddhi:
15448
0
      IID = Intrinsic::x86_avx512_kadd_w;
15449
0
      break;
15450
0
    case X86::BI__builtin_ia32_kaddsi:
15451
0
      IID = Intrinsic::x86_avx512_kadd_d;
15452
0
      break;
15453
0
    case X86::BI__builtin_ia32_kadddi:
15454
0
      IID = Intrinsic::x86_avx512_kadd_q;
15455
0
      break;
15456
0
    }
15457
15458
0
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15459
0
    Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15460
0
    Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15461
0
    Function *Intr = CGM.getIntrinsic(IID);
15462
0
    Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15463
0
    return Builder.CreateBitCast(Res, Ops[0]->getType());
15464
0
  }
15465
0
  case X86::BI__builtin_ia32_kandqi:
15466
0
  case X86::BI__builtin_ia32_kandhi:
15467
0
  case X86::BI__builtin_ia32_kandsi:
15468
0
  case X86::BI__builtin_ia32_kanddi:
15469
0
    return EmitX86MaskLogic(*this, Instruction::And, Ops);
15470
0
  case X86::BI__builtin_ia32_kandnqi:
15471
0
  case X86::BI__builtin_ia32_kandnhi:
15472
0
  case X86::BI__builtin_ia32_kandnsi:
15473
0
  case X86::BI__builtin_ia32_kandndi:
15474
0
    return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15475
0
  case X86::BI__builtin_ia32_korqi:
15476
0
  case X86::BI__builtin_ia32_korhi:
15477
0
  case X86::BI__builtin_ia32_korsi:
15478
0
  case X86::BI__builtin_ia32_kordi:
15479
0
    return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15480
0
  case X86::BI__builtin_ia32_kxnorqi:
15481
0
  case X86::BI__builtin_ia32_kxnorhi:
15482
0
  case X86::BI__builtin_ia32_kxnorsi:
15483
0
  case X86::BI__builtin_ia32_kxnordi:
15484
0
    return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15485
0
  case X86::BI__builtin_ia32_kxorqi:
15486
0
  case X86::BI__builtin_ia32_kxorhi:
15487
0
  case X86::BI__builtin_ia32_kxorsi:
15488
0
  case X86::BI__builtin_ia32_kxordi:
15489
0
    return EmitX86MaskLogic(*this, Instruction::Xor,  Ops);
15490
0
  case X86::BI__builtin_ia32_knotqi:
15491
0
  case X86::BI__builtin_ia32_knothi:
15492
0
  case X86::BI__builtin_ia32_knotsi:
15493
0
  case X86::BI__builtin_ia32_knotdi: {
15494
0
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15495
0
    Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15496
0
    return Builder.CreateBitCast(Builder.CreateNot(Res),
15497
0
                                 Ops[0]->getType());
15498
0
  }
15499
0
  case X86::BI__builtin_ia32_kmovb:
15500
0
  case X86::BI__builtin_ia32_kmovw:
15501
0
  case X86::BI__builtin_ia32_kmovd:
15502
0
  case X86::BI__builtin_ia32_kmovq: {
15503
    // Bitcast to vXi1 type and then back to integer. This gets the mask
15504
    // register type into the IR, but might be optimized out depending on
15505
    // what's around it.
15506
0
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15507
0
    Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15508
0
    return Builder.CreateBitCast(Res, Ops[0]->getType());
15509
0
  }
15510
15511
0
  case X86::BI__builtin_ia32_kunpckdi:
15512
0
  case X86::BI__builtin_ia32_kunpcksi:
15513
0
  case X86::BI__builtin_ia32_kunpckhi: {
15514
0
    unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15515
0
    Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15516
0
    Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15517
0
    int Indices[64];
15518
0
    for (unsigned i = 0; i != NumElts; ++i)
15519
0
      Indices[i] = i;
15520
15521
    // First extract half of each vector. This gives better codegen than
15522
    // doing it in a single shuffle.
15523
0
    LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15524
0
    RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15525
    // Concat the vectors.
15526
    // NOTE: Operands are swapped to match the intrinsic definition.
15527
0
    Value *Res =
15528
0
        Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15529
0
    return Builder.CreateBitCast(Res, Ops[0]->getType());
15530
0
  }
15531
15532
0
  case X86::BI__builtin_ia32_vplzcntd_128:
15533
0
  case X86::BI__builtin_ia32_vplzcntd_256:
15534
0
  case X86::BI__builtin_ia32_vplzcntd_512:
15535
0
  case X86::BI__builtin_ia32_vplzcntq_128:
15536
0
  case X86::BI__builtin_ia32_vplzcntq_256:
15537
0
  case X86::BI__builtin_ia32_vplzcntq_512: {
15538
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15539
0
    return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15540
0
  }
15541
0
  case X86::BI__builtin_ia32_sqrtss:
15542
0
  case X86::BI__builtin_ia32_sqrtsd: {
15543
0
    Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15544
0
    Function *F;
15545
0
    if (Builder.getIsFPConstrained()) {
15546
0
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15547
0
      F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15548
0
                           A->getType());
15549
0
      A = Builder.CreateConstrainedFPCall(F, {A});
15550
0
    } else {
15551
0
      F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15552
0
      A = Builder.CreateCall(F, {A});
15553
0
    }
15554
0
    return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15555
0
  }
15556
0
  case X86::BI__builtin_ia32_sqrtsh_round_mask:
15557
0
  case X86::BI__builtin_ia32_sqrtsd_round_mask:
15558
0
  case X86::BI__builtin_ia32_sqrtss_round_mask: {
15559
0
    unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
15560
    // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15561
    // otherwise keep the intrinsic.
15562
0
    if (CC != 4) {
15563
0
      Intrinsic::ID IID;
15564
15565
0
      switch (BuiltinID) {
15566
0
      default:
15567
0
        llvm_unreachable("Unsupported intrinsic!");
15568
0
      case X86::BI__builtin_ia32_sqrtsh_round_mask:
15569
0
        IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15570
0
        break;
15571
0
      case X86::BI__builtin_ia32_sqrtsd_round_mask:
15572
0
        IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15573
0
        break;
15574
0
      case X86::BI__builtin_ia32_sqrtss_round_mask:
15575
0
        IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15576
0
        break;
15577
0
      }
15578
0
      return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15579
0
    }
15580
0
    Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15581
0
    Function *F;
15582
0
    if (Builder.getIsFPConstrained()) {
15583
0
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15584
0
      F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15585
0
                           A->getType());
15586
0
      A = Builder.CreateConstrainedFPCall(F, A);
15587
0
    } else {
15588
0
      F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15589
0
      A = Builder.CreateCall(F, A);
15590
0
    }
15591
0
    Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15592
0
    A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
15593
0
    return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15594
0
  }
15595
0
  case X86::BI__builtin_ia32_sqrtpd256:
15596
0
  case X86::BI__builtin_ia32_sqrtpd:
15597
0
  case X86::BI__builtin_ia32_sqrtps256:
15598
0
  case X86::BI__builtin_ia32_sqrtps:
15599
0
  case X86::BI__builtin_ia32_sqrtph256:
15600
0
  case X86::BI__builtin_ia32_sqrtph:
15601
0
  case X86::BI__builtin_ia32_sqrtph512:
15602
0
  case X86::BI__builtin_ia32_sqrtps512:
15603
0
  case X86::BI__builtin_ia32_sqrtpd512: {
15604
0
    if (Ops.size() == 2) {
15605
0
      unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15606
      // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15607
      // otherwise keep the intrinsic.
15608
0
      if (CC != 4) {
15609
0
        Intrinsic::ID IID;
15610
15611
0
        switch (BuiltinID) {
15612
0
        default:
15613
0
          llvm_unreachable("Unsupported intrinsic!");
15614
0
        case X86::BI__builtin_ia32_sqrtph512:
15615
0
          IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15616
0
          break;
15617
0
        case X86::BI__builtin_ia32_sqrtps512:
15618
0
          IID = Intrinsic::x86_avx512_sqrt_ps_512;
15619
0
          break;
15620
0
        case X86::BI__builtin_ia32_sqrtpd512:
15621
0
          IID = Intrinsic::x86_avx512_sqrt_pd_512;
15622
0
          break;
15623
0
        }
15624
0
        return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15625
0
      }
15626
0
    }
15627
0
    if (Builder.getIsFPConstrained()) {
15628
0
      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15629
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15630
0
                                     Ops[0]->getType());
15631
0
      return Builder.CreateConstrainedFPCall(F, Ops[0]);
15632
0
    } else {
15633
0
      Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15634
0
      return Builder.CreateCall(F, Ops[0]);
15635
0
    }
15636
0
  }
15637
15638
0
  case X86::BI__builtin_ia32_pmuludq128:
15639
0
  case X86::BI__builtin_ia32_pmuludq256:
15640
0
  case X86::BI__builtin_ia32_pmuludq512:
15641
0
    return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15642
15643
0
  case X86::BI__builtin_ia32_pmuldq128:
15644
0
  case X86::BI__builtin_ia32_pmuldq256:
15645
0
  case X86::BI__builtin_ia32_pmuldq512:
15646
0
    return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15647
15648
0
  case X86::BI__builtin_ia32_pternlogd512_mask:
15649
0
  case X86::BI__builtin_ia32_pternlogq512_mask:
15650
0
  case X86::BI__builtin_ia32_pternlogd128_mask:
15651
0
  case X86::BI__builtin_ia32_pternlogd256_mask:
15652
0
  case X86::BI__builtin_ia32_pternlogq128_mask:
15653
0
  case X86::BI__builtin_ia32_pternlogq256_mask:
15654
0
    return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
15655
15656
0
  case X86::BI__builtin_ia32_pternlogd512_maskz:
15657
0
  case X86::BI__builtin_ia32_pternlogq512_maskz:
15658
0
  case X86::BI__builtin_ia32_pternlogd128_maskz:
15659
0
  case X86::BI__builtin_ia32_pternlogd256_maskz:
15660
0
  case X86::BI__builtin_ia32_pternlogq128_maskz:
15661
0
  case X86::BI__builtin_ia32_pternlogq256_maskz:
15662
0
    return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
15663
15664
0
  case X86::BI__builtin_ia32_vpshldd128:
15665
0
  case X86::BI__builtin_ia32_vpshldd256:
15666
0
  case X86::BI__builtin_ia32_vpshldd512:
15667
0
  case X86::BI__builtin_ia32_vpshldq128:
15668
0
  case X86::BI__builtin_ia32_vpshldq256:
15669
0
  case X86::BI__builtin_ia32_vpshldq512:
15670
0
  case X86::BI__builtin_ia32_vpshldw128:
15671
0
  case X86::BI__builtin_ia32_vpshldw256:
15672
0
  case X86::BI__builtin_ia32_vpshldw512:
15673
0
    return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15674
15675
0
  case X86::BI__builtin_ia32_vpshrdd128:
15676
0
  case X86::BI__builtin_ia32_vpshrdd256:
15677
0
  case X86::BI__builtin_ia32_vpshrdd512:
15678
0
  case X86::BI__builtin_ia32_vpshrdq128:
15679
0
  case X86::BI__builtin_ia32_vpshrdq256:
15680
0
  case X86::BI__builtin_ia32_vpshrdq512:
15681
0
  case X86::BI__builtin_ia32_vpshrdw128:
15682
0
  case X86::BI__builtin_ia32_vpshrdw256:
15683
0
  case X86::BI__builtin_ia32_vpshrdw512:
15684
    // Ops 0 and 1 are swapped.
15685
0
    return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15686
15687
0
  case X86::BI__builtin_ia32_vpshldvd128:
15688
0
  case X86::BI__builtin_ia32_vpshldvd256:
15689
0
  case X86::BI__builtin_ia32_vpshldvd512:
15690
0
  case X86::BI__builtin_ia32_vpshldvq128:
15691
0
  case X86::BI__builtin_ia32_vpshldvq256:
15692
0
  case X86::BI__builtin_ia32_vpshldvq512:
15693
0
  case X86::BI__builtin_ia32_vpshldvw128:
15694
0
  case X86::BI__builtin_ia32_vpshldvw256:
15695
0
  case X86::BI__builtin_ia32_vpshldvw512:
15696
0
    return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15697
15698
0
  case X86::BI__builtin_ia32_vpshrdvd128:
15699
0
  case X86::BI__builtin_ia32_vpshrdvd256:
15700
0
  case X86::BI__builtin_ia32_vpshrdvd512:
15701
0
  case X86::BI__builtin_ia32_vpshrdvq128:
15702
0
  case X86::BI__builtin_ia32_vpshrdvq256:
15703
0
  case X86::BI__builtin_ia32_vpshrdvq512:
15704
0
  case X86::BI__builtin_ia32_vpshrdvw128:
15705
0
  case X86::BI__builtin_ia32_vpshrdvw256:
15706
0
  case X86::BI__builtin_ia32_vpshrdvw512:
15707
    // Ops 0 and 1 are swapped.
15708
0
    return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15709
15710
  // Reductions
15711
0
  case X86::BI__builtin_ia32_reduce_fadd_pd512:
15712
0
  case X86::BI__builtin_ia32_reduce_fadd_ps512:
15713
0
  case X86::BI__builtin_ia32_reduce_fadd_ph512:
15714
0
  case X86::BI__builtin_ia32_reduce_fadd_ph256:
15715
0
  case X86::BI__builtin_ia32_reduce_fadd_ph128: {
15716
0
    Function *F =
15717
0
        CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
15718
0
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15719
0
    Builder.getFastMathFlags().setAllowReassoc();
15720
0
    return Builder.CreateCall(F, {Ops[0], Ops[1]});
15721
0
  }
15722
0
  case X86::BI__builtin_ia32_reduce_fmul_pd512:
15723
0
  case X86::BI__builtin_ia32_reduce_fmul_ps512:
15724
0
  case X86::BI__builtin_ia32_reduce_fmul_ph512:
15725
0
  case X86::BI__builtin_ia32_reduce_fmul_ph256:
15726
0
  case X86::BI__builtin_ia32_reduce_fmul_ph128: {
15727
0
    Function *F =
15728
0
        CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
15729
0
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15730
0
    Builder.getFastMathFlags().setAllowReassoc();
15731
0
    return Builder.CreateCall(F, {Ops[0], Ops[1]});
15732
0
  }
15733
0
  case X86::BI__builtin_ia32_reduce_fmax_pd512:
15734
0
  case X86::BI__builtin_ia32_reduce_fmax_ps512:
15735
0
  case X86::BI__builtin_ia32_reduce_fmax_ph512:
15736
0
  case X86::BI__builtin_ia32_reduce_fmax_ph256:
15737
0
  case X86::BI__builtin_ia32_reduce_fmax_ph128: {
15738
0
    Function *F =
15739
0
        CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
15740
0
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15741
0
    Builder.getFastMathFlags().setNoNaNs();
15742
0
    return Builder.CreateCall(F, {Ops[0]});
15743
0
  }
15744
0
  case X86::BI__builtin_ia32_reduce_fmin_pd512:
15745
0
  case X86::BI__builtin_ia32_reduce_fmin_ps512:
15746
0
  case X86::BI__builtin_ia32_reduce_fmin_ph512:
15747
0
  case X86::BI__builtin_ia32_reduce_fmin_ph256:
15748
0
  case X86::BI__builtin_ia32_reduce_fmin_ph128: {
15749
0
    Function *F =
15750
0
        CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
15751
0
    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15752
0
    Builder.getFastMathFlags().setNoNaNs();
15753
0
    return Builder.CreateCall(F, {Ops[0]});
15754
0
  }
15755
15756
  // 3DNow!
15757
0
  case X86::BI__builtin_ia32_pswapdsf:
15758
0
  case X86::BI__builtin_ia32_pswapdsi: {
15759
0
    llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
15760
0
    Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
15761
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
15762
0
    return Builder.CreateCall(F, Ops, "pswapd");
15763
0
  }
15764
0
  case X86::BI__builtin_ia32_rdrand16_step:
15765
0
  case X86::BI__builtin_ia32_rdrand32_step:
15766
0
  case X86::BI__builtin_ia32_rdrand64_step:
15767
0
  case X86::BI__builtin_ia32_rdseed16_step:
15768
0
  case X86::BI__builtin_ia32_rdseed32_step:
15769
0
  case X86::BI__builtin_ia32_rdseed64_step: {
15770
0
    Intrinsic::ID ID;
15771
0
    switch (BuiltinID) {
15772
0
    default: llvm_unreachable("Unsupported intrinsic!");
15773
0
    case X86::BI__builtin_ia32_rdrand16_step:
15774
0
      ID = Intrinsic::x86_rdrand_16;
15775
0
      break;
15776
0
    case X86::BI__builtin_ia32_rdrand32_step:
15777
0
      ID = Intrinsic::x86_rdrand_32;
15778
0
      break;
15779
0
    case X86::BI__builtin_ia32_rdrand64_step:
15780
0
      ID = Intrinsic::x86_rdrand_64;
15781
0
      break;
15782
0
    case X86::BI__builtin_ia32_rdseed16_step:
15783
0
      ID = Intrinsic::x86_rdseed_16;
15784
0
      break;
15785
0
    case X86::BI__builtin_ia32_rdseed32_step:
15786
0
      ID = Intrinsic::x86_rdseed_32;
15787
0
      break;
15788
0
    case X86::BI__builtin_ia32_rdseed64_step:
15789
0
      ID = Intrinsic::x86_rdseed_64;
15790
0
      break;
15791
0
    }
15792
15793
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
15794
0
    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
15795
0
                                      Ops[0]);
15796
0
    return Builder.CreateExtractValue(Call, 1);
15797
0
  }
15798
0
  case X86::BI__builtin_ia32_addcarryx_u32:
15799
0
  case X86::BI__builtin_ia32_addcarryx_u64:
15800
0
  case X86::BI__builtin_ia32_subborrow_u32:
15801
0
  case X86::BI__builtin_ia32_subborrow_u64: {
15802
0
    Intrinsic::ID IID;
15803
0
    switch (BuiltinID) {
15804
0
    default: llvm_unreachable("Unsupported intrinsic!");
15805
0
    case X86::BI__builtin_ia32_addcarryx_u32:
15806
0
      IID = Intrinsic::x86_addcarry_32;
15807
0
      break;
15808
0
    case X86::BI__builtin_ia32_addcarryx_u64:
15809
0
      IID = Intrinsic::x86_addcarry_64;
15810
0
      break;
15811
0
    case X86::BI__builtin_ia32_subborrow_u32:
15812
0
      IID = Intrinsic::x86_subborrow_32;
15813
0
      break;
15814
0
    case X86::BI__builtin_ia32_subborrow_u64:
15815
0
      IID = Intrinsic::x86_subborrow_64;
15816
0
      break;
15817
0
    }
15818
15819
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
15820
0
                                     { Ops[0], Ops[1], Ops[2] });
15821
0
    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15822
0
                                      Ops[3]);
15823
0
    return Builder.CreateExtractValue(Call, 0);
15824
0
  }
15825
15826
0
  case X86::BI__builtin_ia32_fpclassps128_mask:
15827
0
  case X86::BI__builtin_ia32_fpclassps256_mask:
15828
0
  case X86::BI__builtin_ia32_fpclassps512_mask:
15829
0
  case X86::BI__builtin_ia32_fpclassph128_mask:
15830
0
  case X86::BI__builtin_ia32_fpclassph256_mask:
15831
0
  case X86::BI__builtin_ia32_fpclassph512_mask:
15832
0
  case X86::BI__builtin_ia32_fpclasspd128_mask:
15833
0
  case X86::BI__builtin_ia32_fpclasspd256_mask:
15834
0
  case X86::BI__builtin_ia32_fpclasspd512_mask: {
15835
0
    unsigned NumElts =
15836
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15837
0
    Value *MaskIn = Ops[2];
15838
0
    Ops.erase(&Ops[2]);
15839
15840
0
    Intrinsic::ID ID;
15841
0
    switch (BuiltinID) {
15842
0
    default: llvm_unreachable("Unsupported intrinsic!");
15843
0
    case X86::BI__builtin_ia32_fpclassph128_mask:
15844
0
      ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
15845
0
      break;
15846
0
    case X86::BI__builtin_ia32_fpclassph256_mask:
15847
0
      ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
15848
0
      break;
15849
0
    case X86::BI__builtin_ia32_fpclassph512_mask:
15850
0
      ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
15851
0
      break;
15852
0
    case X86::BI__builtin_ia32_fpclassps128_mask:
15853
0
      ID = Intrinsic::x86_avx512_fpclass_ps_128;
15854
0
      break;
15855
0
    case X86::BI__builtin_ia32_fpclassps256_mask:
15856
0
      ID = Intrinsic::x86_avx512_fpclass_ps_256;
15857
0
      break;
15858
0
    case X86::BI__builtin_ia32_fpclassps512_mask:
15859
0
      ID = Intrinsic::x86_avx512_fpclass_ps_512;
15860
0
      break;
15861
0
    case X86::BI__builtin_ia32_fpclasspd128_mask:
15862
0
      ID = Intrinsic::x86_avx512_fpclass_pd_128;
15863
0
      break;
15864
0
    case X86::BI__builtin_ia32_fpclasspd256_mask:
15865
0
      ID = Intrinsic::x86_avx512_fpclass_pd_256;
15866
0
      break;
15867
0
    case X86::BI__builtin_ia32_fpclasspd512_mask:
15868
0
      ID = Intrinsic::x86_avx512_fpclass_pd_512;
15869
0
      break;
15870
0
    }
15871
15872
0
    Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15873
0
    return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
15874
0
  }
15875
15876
0
  case X86::BI__builtin_ia32_vp2intersect_q_512:
15877
0
  case X86::BI__builtin_ia32_vp2intersect_q_256:
15878
0
  case X86::BI__builtin_ia32_vp2intersect_q_128:
15879
0
  case X86::BI__builtin_ia32_vp2intersect_d_512:
15880
0
  case X86::BI__builtin_ia32_vp2intersect_d_256:
15881
0
  case X86::BI__builtin_ia32_vp2intersect_d_128: {
15882
0
    unsigned NumElts =
15883
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15884
0
    Intrinsic::ID ID;
15885
15886
0
    switch (BuiltinID) {
15887
0
    default: llvm_unreachable("Unsupported intrinsic!");
15888
0
    case X86::BI__builtin_ia32_vp2intersect_q_512:
15889
0
      ID = Intrinsic::x86_avx512_vp2intersect_q_512;
15890
0
      break;
15891
0
    case X86::BI__builtin_ia32_vp2intersect_q_256:
15892
0
      ID = Intrinsic::x86_avx512_vp2intersect_q_256;
15893
0
      break;
15894
0
    case X86::BI__builtin_ia32_vp2intersect_q_128:
15895
0
      ID = Intrinsic::x86_avx512_vp2intersect_q_128;
15896
0
      break;
15897
0
    case X86::BI__builtin_ia32_vp2intersect_d_512:
15898
0
      ID = Intrinsic::x86_avx512_vp2intersect_d_512;
15899
0
      break;
15900
0
    case X86::BI__builtin_ia32_vp2intersect_d_256:
15901
0
      ID = Intrinsic::x86_avx512_vp2intersect_d_256;
15902
0
      break;
15903
0
    case X86::BI__builtin_ia32_vp2intersect_d_128:
15904
0
      ID = Intrinsic::x86_avx512_vp2intersect_d_128;
15905
0
      break;
15906
0
    }
15907
15908
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
15909
0
    Value *Result = Builder.CreateExtractValue(Call, 0);
15910
0
    Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
15911
0
    Builder.CreateDefaultAlignedStore(Result, Ops[2]);
15912
15913
0
    Result = Builder.CreateExtractValue(Call, 1);
15914
0
    Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
15915
0
    return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
15916
0
  }
15917
15918
0
  case X86::BI__builtin_ia32_vpmultishiftqb128:
15919
0
  case X86::BI__builtin_ia32_vpmultishiftqb256:
15920
0
  case X86::BI__builtin_ia32_vpmultishiftqb512: {
15921
0
    Intrinsic::ID ID;
15922
0
    switch (BuiltinID) {
15923
0
    default: llvm_unreachable("Unsupported intrinsic!");
15924
0
    case X86::BI__builtin_ia32_vpmultishiftqb128:
15925
0
      ID = Intrinsic::x86_avx512_pmultishift_qb_128;
15926
0
      break;
15927
0
    case X86::BI__builtin_ia32_vpmultishiftqb256:
15928
0
      ID = Intrinsic::x86_avx512_pmultishift_qb_256;
15929
0
      break;
15930
0
    case X86::BI__builtin_ia32_vpmultishiftqb512:
15931
0
      ID = Intrinsic::x86_avx512_pmultishift_qb_512;
15932
0
      break;
15933
0
    }
15934
15935
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15936
0
  }
15937
15938
0
  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
15939
0
  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
15940
0
  case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
15941
0
    unsigned NumElts =
15942
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15943
0
    Value *MaskIn = Ops[2];
15944
0
    Ops.erase(&Ops[2]);
15945
15946
0
    Intrinsic::ID ID;
15947
0
    switch (BuiltinID) {
15948
0
    default: llvm_unreachable("Unsupported intrinsic!");
15949
0
    case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
15950
0
      ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
15951
0
      break;
15952
0
    case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
15953
0
      ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
15954
0
      break;
15955
0
    case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
15956
0
      ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
15957
0
      break;
15958
0
    }
15959
15960
0
    Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15961
0
    return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
15962
0
  }
15963
15964
  // packed comparison intrinsics
15965
0
  case X86::BI__builtin_ia32_cmpeqps:
15966
0
  case X86::BI__builtin_ia32_cmpeqpd:
15967
0
    return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
15968
0
  case X86::BI__builtin_ia32_cmpltps:
15969
0
  case X86::BI__builtin_ia32_cmpltpd:
15970
0
    return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
15971
0
  case X86::BI__builtin_ia32_cmpleps:
15972
0
  case X86::BI__builtin_ia32_cmplepd:
15973
0
    return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
15974
0
  case X86::BI__builtin_ia32_cmpunordps:
15975
0
  case X86::BI__builtin_ia32_cmpunordpd:
15976
0
    return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
15977
0
  case X86::BI__builtin_ia32_cmpneqps:
15978
0
  case X86::BI__builtin_ia32_cmpneqpd:
15979
0
    return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
15980
0
  case X86::BI__builtin_ia32_cmpnltps:
15981
0
  case X86::BI__builtin_ia32_cmpnltpd:
15982
0
    return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
15983
0
  case X86::BI__builtin_ia32_cmpnleps:
15984
0
  case X86::BI__builtin_ia32_cmpnlepd:
15985
0
    return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
15986
0
  case X86::BI__builtin_ia32_cmpordps:
15987
0
  case X86::BI__builtin_ia32_cmpordpd:
15988
0
    return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
15989
0
  case X86::BI__builtin_ia32_cmpph128_mask:
15990
0
  case X86::BI__builtin_ia32_cmpph256_mask:
15991
0
  case X86::BI__builtin_ia32_cmpph512_mask:
15992
0
  case X86::BI__builtin_ia32_cmpps128_mask:
15993
0
  case X86::BI__builtin_ia32_cmpps256_mask:
15994
0
  case X86::BI__builtin_ia32_cmpps512_mask:
15995
0
  case X86::BI__builtin_ia32_cmppd128_mask:
15996
0
  case X86::BI__builtin_ia32_cmppd256_mask:
15997
0
  case X86::BI__builtin_ia32_cmppd512_mask:
15998
0
    IsMaskFCmp = true;
15999
0
    [[fallthrough]];
16000
0
  case X86::BI__builtin_ia32_cmpps:
16001
0
  case X86::BI__builtin_ia32_cmpps256:
16002
0
  case X86::BI__builtin_ia32_cmppd:
16003
0
  case X86::BI__builtin_ia32_cmppd256: {
16004
    // Lowering vector comparisons to fcmp instructions, while
16005
    // ignoring signalling behaviour requested
16006
    // ignoring rounding mode requested
16007
    // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16008
16009
    // The third argument is the comparison condition, and integer in the
16010
    // range [0, 31]
16011
0
    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16012
16013
    // Lowering to IR fcmp instruction.
16014
    // Ignoring requested signaling behaviour,
16015
    // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16016
0
    FCmpInst::Predicate Pred;
16017
0
    bool IsSignaling;
16018
    // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16019
    // behavior is inverted. We'll handle that after the switch.
16020
0
    switch (CC & 0xf) {
16021
0
    case 0x00: Pred = FCmpInst::FCMP_OEQ;   IsSignaling = false; break;
16022
0
    case 0x01: Pred = FCmpInst::FCMP_OLT;   IsSignaling = true;  break;
16023
0
    case 0x02: Pred = FCmpInst::FCMP_OLE;   IsSignaling = true;  break;
16024
0
    case 0x03: Pred = FCmpInst::FCMP_UNO;   IsSignaling = false; break;
16025
0
    case 0x04: Pred = FCmpInst::FCMP_UNE;   IsSignaling = false; break;
16026
0
    case 0x05: Pred = FCmpInst::FCMP_UGE;   IsSignaling = true;  break;
16027
0
    case 0x06: Pred = FCmpInst::FCMP_UGT;   IsSignaling = true;  break;
16028
0
    case 0x07: Pred = FCmpInst::FCMP_ORD;   IsSignaling = false; break;
16029
0
    case 0x08: Pred = FCmpInst::FCMP_UEQ;   IsSignaling = false; break;
16030
0
    case 0x09: Pred = FCmpInst::FCMP_ULT;   IsSignaling = true;  break;
16031
0
    case 0x0a: Pred = FCmpInst::FCMP_ULE;   IsSignaling = true;  break;
16032
0
    case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16033
0
    case 0x0c: Pred = FCmpInst::FCMP_ONE;   IsSignaling = false; break;
16034
0
    case 0x0d: Pred = FCmpInst::FCMP_OGE;   IsSignaling = true;  break;
16035
0
    case 0x0e: Pred = FCmpInst::FCMP_OGT;   IsSignaling = true;  break;
16036
0
    case 0x0f: Pred = FCmpInst::FCMP_TRUE;  IsSignaling = false; break;
16037
0
    default: llvm_unreachable("Unhandled CC");
16038
0
    }
16039
16040
    // Invert the signalling behavior for 16-31.
16041
0
    if (CC & 0x10)
16042
0
      IsSignaling = !IsSignaling;
16043
16044
    // If the predicate is true or false and we're using constrained intrinsics,
16045
    // we don't have a compare intrinsic we can use. Just use the legacy X86
16046
    // specific intrinsic.
16047
    // If the intrinsic is mask enabled and we're using constrained intrinsics,
16048
    // use the legacy X86 specific intrinsic.
16049
0
    if (Builder.getIsFPConstrained() &&
16050
0
        (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16051
0
         IsMaskFCmp)) {
16052
16053
0
      Intrinsic::ID IID;
16054
0
      switch (BuiltinID) {
16055
0
      default: llvm_unreachable("Unexpected builtin");
16056
0
      case X86::BI__builtin_ia32_cmpps:
16057
0
        IID = Intrinsic::x86_sse_cmp_ps;
16058
0
        break;
16059
0
      case X86::BI__builtin_ia32_cmpps256:
16060
0
        IID = Intrinsic::x86_avx_cmp_ps_256;
16061
0
        break;
16062
0
      case X86::BI__builtin_ia32_cmppd:
16063
0
        IID = Intrinsic::x86_sse2_cmp_pd;
16064
0
        break;
16065
0
      case X86::BI__builtin_ia32_cmppd256:
16066
0
        IID = Intrinsic::x86_avx_cmp_pd_256;
16067
0
        break;
16068
0
      case X86::BI__builtin_ia32_cmpph128_mask:
16069
0
        IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16070
0
        break;
16071
0
      case X86::BI__builtin_ia32_cmpph256_mask:
16072
0
        IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16073
0
        break;
16074
0
      case X86::BI__builtin_ia32_cmpph512_mask:
16075
0
        IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16076
0
        break;
16077
0
      case X86::BI__builtin_ia32_cmpps512_mask:
16078
0
        IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16079
0
        break;
16080
0
      case X86::BI__builtin_ia32_cmppd512_mask:
16081
0
        IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16082
0
        break;
16083
0
      case X86::BI__builtin_ia32_cmpps128_mask:
16084
0
        IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16085
0
        break;
16086
0
      case X86::BI__builtin_ia32_cmpps256_mask:
16087
0
        IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16088
0
        break;
16089
0
      case X86::BI__builtin_ia32_cmppd128_mask:
16090
0
        IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16091
0
        break;
16092
0
      case X86::BI__builtin_ia32_cmppd256_mask:
16093
0
        IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16094
0
        break;
16095
0
      }
16096
16097
0
      Function *Intr = CGM.getIntrinsic(IID);
16098
0
      if (IsMaskFCmp) {
16099
0
        unsigned NumElts =
16100
0
            cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16101
0
        Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16102
0
        Value *Cmp = Builder.CreateCall(Intr, Ops);
16103
0
        return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16104
0
      }
16105
16106
0
      return Builder.CreateCall(Intr, Ops);
16107
0
    }
16108
16109
    // Builtins without the _mask suffix return a vector of integers
16110
    // of the same width as the input vectors
16111
0
    if (IsMaskFCmp) {
16112
      // We ignore SAE if strict FP is disabled. We only keep precise
16113
      // exception behavior under strict FP.
16114
      // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16115
      // object will be required.
16116
0
      unsigned NumElts =
16117
0
          cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16118
0
      Value *Cmp;
16119
0
      if (IsSignaling)
16120
0
        Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16121
0
      else
16122
0
        Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16123
0
      return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16124
0
    }
16125
16126
0
    return getVectorFCmpIR(Pred, IsSignaling);
16127
0
  }
16128
16129
  // SSE scalar comparison intrinsics
16130
0
  case X86::BI__builtin_ia32_cmpeqss:
16131
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16132
0
  case X86::BI__builtin_ia32_cmpltss:
16133
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16134
0
  case X86::BI__builtin_ia32_cmpless:
16135
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16136
0
  case X86::BI__builtin_ia32_cmpunordss:
16137
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16138
0
  case X86::BI__builtin_ia32_cmpneqss:
16139
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16140
0
  case X86::BI__builtin_ia32_cmpnltss:
16141
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16142
0
  case X86::BI__builtin_ia32_cmpnless:
16143
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16144
0
  case X86::BI__builtin_ia32_cmpordss:
16145
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16146
0
  case X86::BI__builtin_ia32_cmpeqsd:
16147
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16148
0
  case X86::BI__builtin_ia32_cmpltsd:
16149
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16150
0
  case X86::BI__builtin_ia32_cmplesd:
16151
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16152
0
  case X86::BI__builtin_ia32_cmpunordsd:
16153
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16154
0
  case X86::BI__builtin_ia32_cmpneqsd:
16155
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16156
0
  case X86::BI__builtin_ia32_cmpnltsd:
16157
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16158
0
  case X86::BI__builtin_ia32_cmpnlesd:
16159
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16160
0
  case X86::BI__builtin_ia32_cmpordsd:
16161
0
    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16162
16163
  // f16c half2float intrinsics
16164
0
  case X86::BI__builtin_ia32_vcvtph2ps:
16165
0
  case X86::BI__builtin_ia32_vcvtph2ps256:
16166
0
  case X86::BI__builtin_ia32_vcvtph2ps_mask:
16167
0
  case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16168
0
  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16169
0
    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16170
0
    return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16171
0
  }
16172
16173
  // AVX512 bf16 intrinsics
16174
0
  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16175
0
    Ops[2] = getMaskVecValue(
16176
0
        *this, Ops[2],
16177
0
        cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16178
0
    Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16179
0
    return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16180
0
  }
16181
0
  case X86::BI__builtin_ia32_cvtsbf162ss_32:
16182
0
    return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16183
16184
0
  case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16185
0
  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16186
0
    Intrinsic::ID IID;
16187
0
    switch (BuiltinID) {
16188
0
    default: llvm_unreachable("Unsupported intrinsic!");
16189
0
    case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16190
0
      IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16191
0
      break;
16192
0
    case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16193
0
      IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16194
0
      break;
16195
0
    }
16196
0
    Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16197
0
    return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16198
0
  }
16199
16200
0
  case X86::BI__cpuid:
16201
0
  case X86::BI__cpuidex: {
16202
0
    Value *FuncId = EmitScalarExpr(E->getArg(1));
16203
0
    Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16204
0
                           ? EmitScalarExpr(E->getArg(2))
16205
0
                           : llvm::ConstantInt::get(Int32Ty, 0);
16206
16207
0
    llvm::StructType *CpuidRetTy =
16208
0
        llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16209
0
    llvm::FunctionType *FTy =
16210
0
        llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16211
16212
0
    StringRef Asm, Constraints;
16213
0
    if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16214
0
      Asm = "cpuid";
16215
0
      Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16216
0
    } else {
16217
      // x86-64 uses %rbx as the base register, so preserve it.
16218
0
      Asm = "xchgq %rbx, ${1:q}\n"
16219
0
            "cpuid\n"
16220
0
            "xchgq %rbx, ${1:q}";
16221
0
      Constraints = "={ax},=r,={cx},={dx},0,2";
16222
0
    }
16223
16224
0
    llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16225
0
                                               /*hasSideEffects=*/false);
16226
0
    Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16227
0
    Value *BasePtr = EmitScalarExpr(E->getArg(0));
16228
0
    Value *Store = nullptr;
16229
0
    for (unsigned i = 0; i < 4; i++) {
16230
0
      Value *Extracted = Builder.CreateExtractValue(IACall, i);
16231
0
      Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16232
0
      Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16233
0
    }
16234
16235
    // Return the last store instruction to signal that we have emitted the
16236
    // the intrinsic.
16237
0
    return Store;
16238
0
  }
16239
16240
0
  case X86::BI__emul:
16241
0
  case X86::BI__emulu: {
16242
0
    llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16243
0
    bool isSigned = (BuiltinID == X86::BI__emul);
16244
0
    Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16245
0
    Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16246
0
    return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16247
0
  }
16248
0
  case X86::BI__mulh:
16249
0
  case X86::BI__umulh:
16250
0
  case X86::BI_mul128:
16251
0
  case X86::BI_umul128: {
16252
0
    llvm::Type *ResType = ConvertType(E->getType());
16253
0
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16254
16255
0
    bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16256
0
    Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16257
0
    Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16258
16259
0
    Value *MulResult, *HigherBits;
16260
0
    if (IsSigned) {
16261
0
      MulResult = Builder.CreateNSWMul(LHS, RHS);
16262
0
      HigherBits = Builder.CreateAShr(MulResult, 64);
16263
0
    } else {
16264
0
      MulResult = Builder.CreateNUWMul(LHS, RHS);
16265
0
      HigherBits = Builder.CreateLShr(MulResult, 64);
16266
0
    }
16267
0
    HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16268
16269
0
    if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16270
0
      return HigherBits;
16271
16272
0
    Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16273
0
    Builder.CreateStore(HigherBits, HighBitsAddress);
16274
0
    return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16275
0
  }
16276
16277
0
  case X86::BI__faststorefence: {
16278
0
    return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16279
0
                               llvm::SyncScope::System);
16280
0
  }
16281
0
  case X86::BI__shiftleft128:
16282
0
  case X86::BI__shiftright128: {
16283
0
    llvm::Function *F = CGM.getIntrinsic(
16284
0
        BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16285
0
        Int64Ty);
16286
    // Flip low/high ops and zero-extend amount to matching type.
16287
    // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16288
    // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16289
0
    std::swap(Ops[0], Ops[1]);
16290
0
    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16291
0
    return Builder.CreateCall(F, Ops);
16292
0
  }
16293
0
  case X86::BI_ReadWriteBarrier:
16294
0
  case X86::BI_ReadBarrier:
16295
0
  case X86::BI_WriteBarrier: {
16296
0
    return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16297
0
                               llvm::SyncScope::SingleThread);
16298
0
  }
16299
16300
0
  case X86::BI_AddressOfReturnAddress: {
16301
0
    Function *F =
16302
0
        CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16303
0
    return Builder.CreateCall(F);
16304
0
  }
16305
0
  case X86::BI__stosb: {
16306
    // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16307
    // instruction, but it will create a memset that won't be optimized away.
16308
0
    return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16309
0
  }
16310
0
  case X86::BI__ud2:
16311
    // llvm.trap makes a ud2a instruction on x86.
16312
0
    return EmitTrapCall(Intrinsic::trap);
16313
0
  case X86::BI__int2c: {
16314
    // This syscall signals a driver assertion failure in x86 NT kernels.
16315
0
    llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16316
0
    llvm::InlineAsm *IA =
16317
0
        llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16318
0
    llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16319
0
        getLLVMContext(), llvm::AttributeList::FunctionIndex,
16320
0
        llvm::Attribute::NoReturn);
16321
0
    llvm::CallInst *CI = Builder.CreateCall(IA);
16322
0
    CI->setAttributes(NoReturnAttr);
16323
0
    return CI;
16324
0
  }
16325
0
  case X86::BI__readfsbyte:
16326
0
  case X86::BI__readfsword:
16327
0
  case X86::BI__readfsdword:
16328
0
  case X86::BI__readfsqword: {
16329
0
    llvm::Type *IntTy = ConvertType(E->getType());
16330
0
    Value *Ptr = Builder.CreateIntToPtr(
16331
0
        Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16332
0
    LoadInst *Load = Builder.CreateAlignedLoad(
16333
0
        IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16334
0
    Load->setVolatile(true);
16335
0
    return Load;
16336
0
  }
16337
0
  case X86::BI__readgsbyte:
16338
0
  case X86::BI__readgsword:
16339
0
  case X86::BI__readgsdword:
16340
0
  case X86::BI__readgsqword: {
16341
0
    llvm::Type *IntTy = ConvertType(E->getType());
16342
0
    Value *Ptr = Builder.CreateIntToPtr(
16343
0
        Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16344
0
    LoadInst *Load = Builder.CreateAlignedLoad(
16345
0
        IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16346
0
    Load->setVolatile(true);
16347
0
    return Load;
16348
0
  }
16349
0
  case X86::BI__builtin_ia32_encodekey128_u32: {
16350
0
    Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16351
16352
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16353
16354
0
    for (int i = 0; i < 3; ++i) {
16355
0
      Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16356
0
      Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16357
0
      Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16358
0
    }
16359
16360
0
    return Builder.CreateExtractValue(Call, 0);
16361
0
  }
16362
0
  case X86::BI__builtin_ia32_encodekey256_u32: {
16363
0
    Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16364
16365
0
    Value *Call =
16366
0
        Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16367
16368
0
    for (int i = 0; i < 4; ++i) {
16369
0
      Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16370
0
      Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16371
0
      Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16372
0
    }
16373
16374
0
    return Builder.CreateExtractValue(Call, 0);
16375
0
  }
16376
0
  case X86::BI__builtin_ia32_aesenc128kl_u8:
16377
0
  case X86::BI__builtin_ia32_aesdec128kl_u8:
16378
0
  case X86::BI__builtin_ia32_aesenc256kl_u8:
16379
0
  case X86::BI__builtin_ia32_aesdec256kl_u8: {
16380
0
    Intrinsic::ID IID;
16381
0
    StringRef BlockName;
16382
0
    switch (BuiltinID) {
16383
0
    default:
16384
0
      llvm_unreachable("Unexpected builtin");
16385
0
    case X86::BI__builtin_ia32_aesenc128kl_u8:
16386
0
      IID = Intrinsic::x86_aesenc128kl;
16387
0
      BlockName = "aesenc128kl";
16388
0
      break;
16389
0
    case X86::BI__builtin_ia32_aesdec128kl_u8:
16390
0
      IID = Intrinsic::x86_aesdec128kl;
16391
0
      BlockName = "aesdec128kl";
16392
0
      break;
16393
0
    case X86::BI__builtin_ia32_aesenc256kl_u8:
16394
0
      IID = Intrinsic::x86_aesenc256kl;
16395
0
      BlockName = "aesenc256kl";
16396
0
      break;
16397
0
    case X86::BI__builtin_ia32_aesdec256kl_u8:
16398
0
      IID = Intrinsic::x86_aesdec256kl;
16399
0
      BlockName = "aesdec256kl";
16400
0
      break;
16401
0
    }
16402
16403
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16404
16405
0
    BasicBlock *NoError =
16406
0
        createBasicBlock(BlockName + "_no_error", this->CurFn);
16407
0
    BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16408
0
    BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16409
16410
0
    Value *Ret = Builder.CreateExtractValue(Call, 0);
16411
0
    Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16412
0
    Value *Out = Builder.CreateExtractValue(Call, 1);
16413
0
    Builder.CreateCondBr(Succ, NoError, Error);
16414
16415
0
    Builder.SetInsertPoint(NoError);
16416
0
    Builder.CreateDefaultAlignedStore(Out, Ops[0]);
16417
0
    Builder.CreateBr(End);
16418
16419
0
    Builder.SetInsertPoint(Error);
16420
0
    Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16421
0
    Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16422
0
    Builder.CreateBr(End);
16423
16424
0
    Builder.SetInsertPoint(End);
16425
0
    return Builder.CreateExtractValue(Call, 0);
16426
0
  }
16427
0
  case X86::BI__builtin_ia32_aesencwide128kl_u8:
16428
0
  case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16429
0
  case X86::BI__builtin_ia32_aesencwide256kl_u8:
16430
0
  case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16431
0
    Intrinsic::ID IID;
16432
0
    StringRef BlockName;
16433
0
    switch (BuiltinID) {
16434
0
    case X86::BI__builtin_ia32_aesencwide128kl_u8:
16435
0
      IID = Intrinsic::x86_aesencwide128kl;
16436
0
      BlockName = "aesencwide128kl";
16437
0
      break;
16438
0
    case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16439
0
      IID = Intrinsic::x86_aesdecwide128kl;
16440
0
      BlockName = "aesdecwide128kl";
16441
0
      break;
16442
0
    case X86::BI__builtin_ia32_aesencwide256kl_u8:
16443
0
      IID = Intrinsic::x86_aesencwide256kl;
16444
0
      BlockName = "aesencwide256kl";
16445
0
      break;
16446
0
    case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16447
0
      IID = Intrinsic::x86_aesdecwide256kl;
16448
0
      BlockName = "aesdecwide256kl";
16449
0
      break;
16450
0
    }
16451
16452
0
    llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16453
0
    Value *InOps[9];
16454
0
    InOps[0] = Ops[2];
16455
0
    for (int i = 0; i != 8; ++i) {
16456
0
      Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16457
0
      InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16458
0
    }
16459
16460
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16461
16462
0
    BasicBlock *NoError =
16463
0
        createBasicBlock(BlockName + "_no_error", this->CurFn);
16464
0
    BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16465
0
    BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16466
16467
0
    Value *Ret = Builder.CreateExtractValue(Call, 0);
16468
0
    Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16469
0
    Builder.CreateCondBr(Succ, NoError, Error);
16470
16471
0
    Builder.SetInsertPoint(NoError);
16472
0
    for (int i = 0; i != 8; ++i) {
16473
0
      Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16474
0
      Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16475
0
      Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16476
0
    }
16477
0
    Builder.CreateBr(End);
16478
16479
0
    Builder.SetInsertPoint(Error);
16480
0
    for (int i = 0; i != 8; ++i) {
16481
0
      Value *Out = Builder.CreateExtractValue(Call, i + 1);
16482
0
      Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16483
0
      Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16484
0
      Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16485
0
    }
16486
0
    Builder.CreateBr(End);
16487
16488
0
    Builder.SetInsertPoint(End);
16489
0
    return Builder.CreateExtractValue(Call, 0);
16490
0
  }
16491
0
  case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16492
0
    IsConjFMA = true;
16493
0
    [[fallthrough]];
16494
0
  case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16495
0
    Intrinsic::ID IID = IsConjFMA
16496
0
                            ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16497
0
                            : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16498
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16499
0
    return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16500
0
  }
16501
0
  case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16502
0
    IsConjFMA = true;
16503
0
    [[fallthrough]];
16504
0
  case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16505
0
    Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16506
0
                                  : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16507
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16508
0
    Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16509
0
    return EmitX86Select(*this, And, Call, Ops[0]);
16510
0
  }
16511
0
  case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16512
0
    IsConjFMA = true;
16513
0
    [[fallthrough]];
16514
0
  case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16515
0
    Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16516
0
                                  : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16517
0
    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16518
0
    static constexpr int Mask[] = {0, 5, 6, 7};
16519
0
    return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16520
0
  }
16521
0
  case X86::BI__builtin_ia32_prefetchi:
16522
0
    return Builder.CreateCall(
16523
0
        CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16524
0
        {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16525
0
         llvm::ConstantInt::get(Int32Ty, 0)});
16526
0
  }
16527
0
}
16528
16529
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16530
0
                                           const CallExpr *E) {
16531
  // Do not emit the builtin arguments in the arguments of a function call,
16532
  // because the evaluation order of function arguments is not specified in C++.
16533
  // This is important when testing to ensure the arguments are emitted in the
16534
  // same order every time. Eg:
16535
  // Instead of:
16536
  //   return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16537
  //                             EmitScalarExpr(E->getArg(1)), "swdiv");
16538
  // Use:
16539
  //   Value *Op0 = EmitScalarExpr(E->getArg(0));
16540
  //   Value *Op1 = EmitScalarExpr(E->getArg(1));
16541
  //   return Builder.CreateFDiv(Op0, Op1, "swdiv")
16542
16543
0
  Intrinsic::ID ID = Intrinsic::not_intrinsic;
16544
16545
0
  switch (BuiltinID) {
16546
0
  default: return nullptr;
16547
16548
  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
16549
  // call __builtin_readcyclecounter.
16550
0
  case PPC::BI__builtin_ppc_get_timebase:
16551
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
16552
16553
  // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
16554
0
  case PPC::BI__builtin_altivec_lvx:
16555
0
  case PPC::BI__builtin_altivec_lvxl:
16556
0
  case PPC::BI__builtin_altivec_lvebx:
16557
0
  case PPC::BI__builtin_altivec_lvehx:
16558
0
  case PPC::BI__builtin_altivec_lvewx:
16559
0
  case PPC::BI__builtin_altivec_lvsl:
16560
0
  case PPC::BI__builtin_altivec_lvsr:
16561
0
  case PPC::BI__builtin_vsx_lxvd2x:
16562
0
  case PPC::BI__builtin_vsx_lxvw4x:
16563
0
  case PPC::BI__builtin_vsx_lxvd2x_be:
16564
0
  case PPC::BI__builtin_vsx_lxvw4x_be:
16565
0
  case PPC::BI__builtin_vsx_lxvl:
16566
0
  case PPC::BI__builtin_vsx_lxvll:
16567
0
  {
16568
0
    SmallVector<Value *, 2> Ops;
16569
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
16570
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
16571
0
    if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
16572
0
          BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
16573
0
      Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
16574
0
      Ops.pop_back();
16575
0
    }
16576
16577
0
    switch (BuiltinID) {
16578
0
    default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
16579
0
    case PPC::BI__builtin_altivec_lvx:
16580
0
      ID = Intrinsic::ppc_altivec_lvx;
16581
0
      break;
16582
0
    case PPC::BI__builtin_altivec_lvxl:
16583
0
      ID = Intrinsic::ppc_altivec_lvxl;
16584
0
      break;
16585
0
    case PPC::BI__builtin_altivec_lvebx:
16586
0
      ID = Intrinsic::ppc_altivec_lvebx;
16587
0
      break;
16588
0
    case PPC::BI__builtin_altivec_lvehx:
16589
0
      ID = Intrinsic::ppc_altivec_lvehx;
16590
0
      break;
16591
0
    case PPC::BI__builtin_altivec_lvewx:
16592
0
      ID = Intrinsic::ppc_altivec_lvewx;
16593
0
      break;
16594
0
    case PPC::BI__builtin_altivec_lvsl:
16595
0
      ID = Intrinsic::ppc_altivec_lvsl;
16596
0
      break;
16597
0
    case PPC::BI__builtin_altivec_lvsr:
16598
0
      ID = Intrinsic::ppc_altivec_lvsr;
16599
0
      break;
16600
0
    case PPC::BI__builtin_vsx_lxvd2x:
16601
0
      ID = Intrinsic::ppc_vsx_lxvd2x;
16602
0
      break;
16603
0
    case PPC::BI__builtin_vsx_lxvw4x:
16604
0
      ID = Intrinsic::ppc_vsx_lxvw4x;
16605
0
      break;
16606
0
    case PPC::BI__builtin_vsx_lxvd2x_be:
16607
0
      ID = Intrinsic::ppc_vsx_lxvd2x_be;
16608
0
      break;
16609
0
    case PPC::BI__builtin_vsx_lxvw4x_be:
16610
0
      ID = Intrinsic::ppc_vsx_lxvw4x_be;
16611
0
      break;
16612
0
    case PPC::BI__builtin_vsx_lxvl:
16613
0
      ID = Intrinsic::ppc_vsx_lxvl;
16614
0
      break;
16615
0
    case PPC::BI__builtin_vsx_lxvll:
16616
0
      ID = Intrinsic::ppc_vsx_lxvll;
16617
0
      break;
16618
0
    }
16619
0
    llvm::Function *F = CGM.getIntrinsic(ID);
16620
0
    return Builder.CreateCall(F, Ops, "");
16621
0
  }
16622
16623
  // vec_st, vec_xst_be
16624
0
  case PPC::BI__builtin_altivec_stvx:
16625
0
  case PPC::BI__builtin_altivec_stvxl:
16626
0
  case PPC::BI__builtin_altivec_stvebx:
16627
0
  case PPC::BI__builtin_altivec_stvehx:
16628
0
  case PPC::BI__builtin_altivec_stvewx:
16629
0
  case PPC::BI__builtin_vsx_stxvd2x:
16630
0
  case PPC::BI__builtin_vsx_stxvw4x:
16631
0
  case PPC::BI__builtin_vsx_stxvd2x_be:
16632
0
  case PPC::BI__builtin_vsx_stxvw4x_be:
16633
0
  case PPC::BI__builtin_vsx_stxvl:
16634
0
  case PPC::BI__builtin_vsx_stxvll:
16635
0
  {
16636
0
    SmallVector<Value *, 3> Ops;
16637
0
    Ops.push_back(EmitScalarExpr(E->getArg(0)));
16638
0
    Ops.push_back(EmitScalarExpr(E->getArg(1)));
16639
0
    Ops.push_back(EmitScalarExpr(E->getArg(2)));
16640
0
    if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
16641
0
          BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
16642
0
      Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
16643
0
      Ops.pop_back();
16644
0
    }
16645
16646
0
    switch (BuiltinID) {
16647
0
    default: llvm_unreachable("Unsupported st intrinsic!");
16648
0
    case PPC::BI__builtin_altivec_stvx:
16649
0
      ID = Intrinsic::ppc_altivec_stvx;
16650
0
      break;
16651
0
    case PPC::BI__builtin_altivec_stvxl:
16652
0
      ID = Intrinsic::ppc_altivec_stvxl;
16653
0
      break;
16654
0
    case PPC::BI__builtin_altivec_stvebx:
16655
0
      ID = Intrinsic::ppc_altivec_stvebx;
16656
0
      break;
16657
0
    case PPC::BI__builtin_altivec_stvehx:
16658
0
      ID = Intrinsic::ppc_altivec_stvehx;
16659
0
      break;
16660
0
    case PPC::BI__builtin_altivec_stvewx:
16661
0
      ID = Intrinsic::ppc_altivec_stvewx;
16662
0
      break;
16663
0
    case PPC::BI__builtin_vsx_stxvd2x:
16664
0
      ID = Intrinsic::ppc_vsx_stxvd2x;
16665
0
      break;
16666
0
    case PPC::BI__builtin_vsx_stxvw4x:
16667
0
      ID = Intrinsic::ppc_vsx_stxvw4x;
16668
0
      break;
16669
0
    case PPC::BI__builtin_vsx_stxvd2x_be:
16670
0
      ID = Intrinsic::ppc_vsx_stxvd2x_be;
16671
0
      break;
16672
0
    case PPC::BI__builtin_vsx_stxvw4x_be:
16673
0
      ID = Intrinsic::ppc_vsx_stxvw4x_be;
16674
0
      break;
16675
0
    case PPC::BI__builtin_vsx_stxvl:
16676
0
      ID = Intrinsic::ppc_vsx_stxvl;
16677
0
      break;
16678
0
    case PPC::BI__builtin_vsx_stxvll:
16679
0
      ID = Intrinsic::ppc_vsx_stxvll;
16680
0
      break;
16681
0
    }
16682
0
    llvm::Function *F = CGM.getIntrinsic(ID);
16683
0
    return Builder.CreateCall(F, Ops, "");
16684
0
  }
16685
0
  case PPC::BI__builtin_vsx_ldrmb: {
16686
    // Essentially boils down to performing an unaligned VMX load sequence so
16687
    // as to avoid crossing a page boundary and then shuffling the elements
16688
    // into the right side of the vector register.
16689
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16690
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16691
0
    int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16692
0
    llvm::Type *ResTy = ConvertType(E->getType());
16693
0
    bool IsLE = getTarget().isLittleEndian();
16694
16695
    // If the user wants the entire vector, just load the entire vector.
16696
0
    if (NumBytes == 16) {
16697
0
      Value *LD =
16698
0
          Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1)));
16699
0
      if (!IsLE)
16700
0
        return LD;
16701
16702
      // Reverse the bytes on LE.
16703
0
      SmallVector<int, 16> RevMask;
16704
0
      for (int Idx = 0; Idx < 16; Idx++)
16705
0
        RevMask.push_back(15 - Idx);
16706
0
      return Builder.CreateShuffleVector(LD, LD, RevMask);
16707
0
    }
16708
16709
0
    llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
16710
0
    llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
16711
0
                                                : Intrinsic::ppc_altivec_lvsl);
16712
0
    llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
16713
0
    Value *HiMem = Builder.CreateGEP(
16714
0
        Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
16715
0
    Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
16716
0
    Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
16717
0
    Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
16718
16719
0
    Op0 = IsLE ? HiLd : LoLd;
16720
0
    Op1 = IsLE ? LoLd : HiLd;
16721
0
    Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
16722
0
    Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
16723
16724
0
    if (IsLE) {
16725
0
      SmallVector<int, 16> Consts;
16726
0
      for (int Idx = 0; Idx < 16; Idx++) {
16727
0
        int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
16728
0
                                            : 16 - (NumBytes - Idx);
16729
0
        Consts.push_back(Val);
16730
0
      }
16731
0
      return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
16732
0
                                         Zero, Consts);
16733
0
    }
16734
0
    SmallVector<Constant *, 16> Consts;
16735
0
    for (int Idx = 0; Idx < 16; Idx++)
16736
0
      Consts.push_back(Builder.getInt8(NumBytes + Idx));
16737
0
    Value *Mask2 = ConstantVector::get(Consts);
16738
0
    return Builder.CreateBitCast(
16739
0
        Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
16740
0
  }
16741
0
  case PPC::BI__builtin_vsx_strmb: {
16742
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16743
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16744
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16745
0
    int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16746
0
    bool IsLE = getTarget().isLittleEndian();
16747
0
    auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
16748
      // Storing the whole vector, simply store it on BE and reverse bytes and
16749
      // store on LE.
16750
0
      if (Width == 16) {
16751
0
        Value *StVec = Op2;
16752
0
        if (IsLE) {
16753
0
          SmallVector<int, 16> RevMask;
16754
0
          for (int Idx = 0; Idx < 16; Idx++)
16755
0
            RevMask.push_back(15 - Idx);
16756
0
          StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
16757
0
        }
16758
0
        return Builder.CreateStore(
16759
0
            StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
16760
0
      }
16761
0
      auto *ConvTy = Int64Ty;
16762
0
      unsigned NumElts = 0;
16763
0
      switch (Width) {
16764
0
      default:
16765
0
        llvm_unreachable("width for stores must be a power of 2");
16766
0
      case 8:
16767
0
        ConvTy = Int64Ty;
16768
0
        NumElts = 2;
16769
0
        break;
16770
0
      case 4:
16771
0
        ConvTy = Int32Ty;
16772
0
        NumElts = 4;
16773
0
        break;
16774
0
      case 2:
16775
0
        ConvTy = Int16Ty;
16776
0
        NumElts = 8;
16777
0
        break;
16778
0
      case 1:
16779
0
        ConvTy = Int8Ty;
16780
0
        NumElts = 16;
16781
0
        break;
16782
0
      }
16783
0
      Value *Vec = Builder.CreateBitCast(
16784
0
          Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
16785
0
      Value *Ptr =
16786
0
          Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
16787
0
      Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
16788
0
      if (IsLE && Width > 1) {
16789
0
        Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
16790
0
        Elt = Builder.CreateCall(F, Elt);
16791
0
      }
16792
0
      return Builder.CreateStore(
16793
0
          Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
16794
0
    };
16795
0
    unsigned Stored = 0;
16796
0
    unsigned RemainingBytes = NumBytes;
16797
0
    Value *Result;
16798
0
    if (NumBytes == 16)
16799
0
      return StoreSubVec(16, 0, 0);
16800
0
    if (NumBytes >= 8) {
16801
0
      Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
16802
0
      RemainingBytes -= 8;
16803
0
      Stored += 8;
16804
0
    }
16805
0
    if (RemainingBytes >= 4) {
16806
0
      Result = StoreSubVec(4, NumBytes - Stored - 4,
16807
0
                           IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
16808
0
      RemainingBytes -= 4;
16809
0
      Stored += 4;
16810
0
    }
16811
0
    if (RemainingBytes >= 2) {
16812
0
      Result = StoreSubVec(2, NumBytes - Stored - 2,
16813
0
                           IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
16814
0
      RemainingBytes -= 2;
16815
0
      Stored += 2;
16816
0
    }
16817
0
    if (RemainingBytes)
16818
0
      Result =
16819
0
          StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
16820
0
    return Result;
16821
0
  }
16822
  // Square root
16823
0
  case PPC::BI__builtin_vsx_xvsqrtsp:
16824
0
  case PPC::BI__builtin_vsx_xvsqrtdp: {
16825
0
    llvm::Type *ResultType = ConvertType(E->getType());
16826
0
    Value *X = EmitScalarExpr(E->getArg(0));
16827
0
    if (Builder.getIsFPConstrained()) {
16828
0
      llvm::Function *F = CGM.getIntrinsic(
16829
0
          Intrinsic::experimental_constrained_sqrt, ResultType);
16830
0
      return Builder.CreateConstrainedFPCall(F, X);
16831
0
    } else {
16832
0
      llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
16833
0
      return Builder.CreateCall(F, X);
16834
0
    }
16835
0
  }
16836
  // Count leading zeros
16837
0
  case PPC::BI__builtin_altivec_vclzb:
16838
0
  case PPC::BI__builtin_altivec_vclzh:
16839
0
  case PPC::BI__builtin_altivec_vclzw:
16840
0
  case PPC::BI__builtin_altivec_vclzd: {
16841
0
    llvm::Type *ResultType = ConvertType(E->getType());
16842
0
    Value *X = EmitScalarExpr(E->getArg(0));
16843
0
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
16844
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
16845
0
    return Builder.CreateCall(F, {X, Undef});
16846
0
  }
16847
0
  case PPC::BI__builtin_altivec_vctzb:
16848
0
  case PPC::BI__builtin_altivec_vctzh:
16849
0
  case PPC::BI__builtin_altivec_vctzw:
16850
0
  case PPC::BI__builtin_altivec_vctzd: {
16851
0
    llvm::Type *ResultType = ConvertType(E->getType());
16852
0
    Value *X = EmitScalarExpr(E->getArg(0));
16853
0
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
16854
0
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
16855
0
    return Builder.CreateCall(F, {X, Undef});
16856
0
  }
16857
0
  case PPC::BI__builtin_altivec_vinsd:
16858
0
  case PPC::BI__builtin_altivec_vinsw:
16859
0
  case PPC::BI__builtin_altivec_vinsd_elt:
16860
0
  case PPC::BI__builtin_altivec_vinsw_elt: {
16861
0
    llvm::Type *ResultType = ConvertType(E->getType());
16862
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16863
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16864
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16865
16866
0
    bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
16867
0
                        BuiltinID == PPC::BI__builtin_altivec_vinsd);
16868
16869
0
    bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
16870
0
                    BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
16871
16872
    // The third argument must be a compile time constant.
16873
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
16874
0
    assert(ArgCI &&
16875
0
           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
16876
16877
    // Valid value for the third argument is dependent on the input type and
16878
    // builtin called.
16879
0
    int ValidMaxValue = 0;
16880
0
    if (IsUnaligned)
16881
0
      ValidMaxValue = (Is32bit) ? 12 : 8;
16882
0
    else
16883
0
      ValidMaxValue = (Is32bit) ? 3 : 1;
16884
16885
    // Get value of third argument.
16886
0
    int64_t ConstArg = ArgCI->getSExtValue();
16887
16888
    // Compose range checking error message.
16889
0
    std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
16890
0
    RangeErrMsg += " number " + llvm::to_string(ConstArg);
16891
0
    RangeErrMsg += " is outside of the valid range [0, ";
16892
0
    RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
16893
16894
    // Issue error if third argument is not within the valid range.
16895
0
    if (ConstArg < 0 || ConstArg > ValidMaxValue)
16896
0
      CGM.Error(E->getExprLoc(), RangeErrMsg);
16897
16898
    // Input to vec_replace_elt is an element index, convert to byte index.
16899
0
    if (!IsUnaligned) {
16900
0
      ConstArg *= Is32bit ? 4 : 8;
16901
      // Fix the constant according to endianess.
16902
0
      if (getTarget().isLittleEndian())
16903
0
        ConstArg = (Is32bit ? 12 : 8) - ConstArg;
16904
0
    }
16905
16906
0
    ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
16907
0
    Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
16908
    // Casting input to vector int as per intrinsic definition.
16909
0
    Op0 =
16910
0
        Is32bit
16911
0
            ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
16912
0
            : Builder.CreateBitCast(Op0,
16913
0
                                    llvm::FixedVectorType::get(Int64Ty, 2));
16914
0
    return Builder.CreateBitCast(
16915
0
        Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
16916
0
  }
16917
0
  case PPC::BI__builtin_altivec_vpopcntb:
16918
0
  case PPC::BI__builtin_altivec_vpopcnth:
16919
0
  case PPC::BI__builtin_altivec_vpopcntw:
16920
0
  case PPC::BI__builtin_altivec_vpopcntd: {
16921
0
    llvm::Type *ResultType = ConvertType(E->getType());
16922
0
    Value *X = EmitScalarExpr(E->getArg(0));
16923
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
16924
0
    return Builder.CreateCall(F, X);
16925
0
  }
16926
0
  case PPC::BI__builtin_altivec_vadduqm:
16927
0
  case PPC::BI__builtin_altivec_vsubuqm: {
16928
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16929
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16930
0
    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16931
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
16932
0
    Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
16933
0
    if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
16934
0
      return Builder.CreateAdd(Op0, Op1, "vadduqm");
16935
0
    else
16936
0
      return Builder.CreateSub(Op0, Op1, "vsubuqm");
16937
0
  }
16938
0
  case PPC::BI__builtin_altivec_vaddcuq_c:
16939
0
  case PPC::BI__builtin_altivec_vsubcuq_c: {
16940
0
    SmallVector<Value *, 2> Ops;
16941
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16942
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16943
0
    llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
16944
0
        llvm::IntegerType::get(getLLVMContext(), 128), 1);
16945
0
    Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
16946
0
    Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
16947
0
    ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
16948
0
             ? Intrinsic::ppc_altivec_vaddcuq
16949
0
             : Intrinsic::ppc_altivec_vsubcuq;
16950
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
16951
0
  }
16952
0
  case PPC::BI__builtin_altivec_vaddeuqm_c:
16953
0
  case PPC::BI__builtin_altivec_vaddecuq_c:
16954
0
  case PPC::BI__builtin_altivec_vsubeuqm_c:
16955
0
  case PPC::BI__builtin_altivec_vsubecuq_c: {
16956
0
    SmallVector<Value *, 3> Ops;
16957
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16958
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16959
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16960
0
    llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
16961
0
        llvm::IntegerType::get(getLLVMContext(), 128), 1);
16962
0
    Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
16963
0
    Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
16964
0
    Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
16965
0
    switch (BuiltinID) {
16966
0
    default:
16967
0
      llvm_unreachable("Unsupported intrinsic!");
16968
0
    case PPC::BI__builtin_altivec_vaddeuqm_c:
16969
0
      ID = Intrinsic::ppc_altivec_vaddeuqm;
16970
0
      break;
16971
0
    case PPC::BI__builtin_altivec_vaddecuq_c:
16972
0
      ID = Intrinsic::ppc_altivec_vaddecuq;
16973
0
      break;
16974
0
    case PPC::BI__builtin_altivec_vsubeuqm_c:
16975
0
      ID = Intrinsic::ppc_altivec_vsubeuqm;
16976
0
      break;
16977
0
    case PPC::BI__builtin_altivec_vsubecuq_c:
16978
0
      ID = Intrinsic::ppc_altivec_vsubecuq;
16979
0
      break;
16980
0
    }
16981
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
16982
0
  }
16983
  // Rotate and insert under mask operation.
16984
  // __rldimi(rs, is, shift, mask)
16985
  // (rotl64(rs, shift) & mask) | (is & ~mask)
16986
  // __rlwimi(rs, is, shift, mask)
16987
  // (rotl(rs, shift) & mask) | (is & ~mask)
16988
0
  case PPC::BI__builtin_ppc_rldimi:
16989
0
  case PPC::BI__builtin_ppc_rlwimi: {
16990
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
16991
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
16992
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
16993
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
16994
0
    llvm::Type *Ty = Op0->getType();
16995
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
16996
0
    if (BuiltinID == PPC::BI__builtin_ppc_rldimi)
16997
0
      Op2 = Builder.CreateZExt(Op2, Int64Ty);
16998
0
    Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
16999
0
    Value *X = Builder.CreateAnd(Shift, Op3);
17000
0
    Value *Y = Builder.CreateAnd(Op1, Builder.CreateNot(Op3));
17001
0
    return Builder.CreateOr(X, Y);
17002
0
  }
17003
  // Rotate and insert under mask operation.
17004
  // __rlwnm(rs, shift, mask)
17005
  // rotl(rs, shift) & mask
17006
0
  case PPC::BI__builtin_ppc_rlwnm: {
17007
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17008
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17009
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17010
0
    llvm::Type *Ty = Op0->getType();
17011
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17012
0
    Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op1});
17013
0
    return Builder.CreateAnd(Shift, Op2);
17014
0
  }
17015
0
  case PPC::BI__builtin_ppc_poppar4:
17016
0
  case PPC::BI__builtin_ppc_poppar8: {
17017
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17018
0
    llvm::Type *ArgType = Op0->getType();
17019
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17020
0
    Value *Tmp = Builder.CreateCall(F, Op0);
17021
17022
0
    llvm::Type *ResultType = ConvertType(E->getType());
17023
0
    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17024
0
    if (Result->getType() != ResultType)
17025
0
      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17026
0
                                     "cast");
17027
0
    return Result;
17028
0
  }
17029
0
  case PPC::BI__builtin_ppc_cmpb: {
17030
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17031
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17032
0
    if (getTarget().getTriple().isPPC64()) {
17033
0
      Function *F =
17034
0
          CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17035
0
      return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17036
0
    }
17037
    // For 32 bit, emit the code as below:
17038
    // %conv = trunc i64 %a to i32
17039
    // %conv1 = trunc i64 %b to i32
17040
    // %shr = lshr i64 %a, 32
17041
    // %conv2 = trunc i64 %shr to i32
17042
    // %shr3 = lshr i64 %b, 32
17043
    // %conv4 = trunc i64 %shr3 to i32
17044
    // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17045
    // %conv5 = zext i32 %0 to i64
17046
    // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17047
    // %conv614 = zext i32 %1 to i64
17048
    // %shl = shl nuw i64 %conv614, 32
17049
    // %or = or i64 %shl, %conv5
17050
    // ret i64 %or
17051
0
    Function *F =
17052
0
        CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17053
0
    Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17054
0
    Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17055
0
    Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17056
0
    Value *ArgOneHi =
17057
0
        Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17058
0
    Value *ArgTwoHi =
17059
0
        Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17060
0
    Value *ResLo = Builder.CreateZExt(
17061
0
        Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17062
0
    Value *ResHiShift = Builder.CreateZExt(
17063
0
        Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17064
0
    Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17065
0
    return Builder.CreateOr(ResLo, ResHi);
17066
0
  }
17067
  // Copy sign
17068
0
  case PPC::BI__builtin_vsx_xvcpsgnsp:
17069
0
  case PPC::BI__builtin_vsx_xvcpsgndp: {
17070
0
    llvm::Type *ResultType = ConvertType(E->getType());
17071
0
    Value *X = EmitScalarExpr(E->getArg(0));
17072
0
    Value *Y = EmitScalarExpr(E->getArg(1));
17073
0
    ID = Intrinsic::copysign;
17074
0
    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17075
0
    return Builder.CreateCall(F, {X, Y});
17076
0
  }
17077
  // Rounding/truncation
17078
0
  case PPC::BI__builtin_vsx_xvrspip:
17079
0
  case PPC::BI__builtin_vsx_xvrdpip:
17080
0
  case PPC::BI__builtin_vsx_xvrdpim:
17081
0
  case PPC::BI__builtin_vsx_xvrspim:
17082
0
  case PPC::BI__builtin_vsx_xvrdpi:
17083
0
  case PPC::BI__builtin_vsx_xvrspi:
17084
0
  case PPC::BI__builtin_vsx_xvrdpic:
17085
0
  case PPC::BI__builtin_vsx_xvrspic:
17086
0
  case PPC::BI__builtin_vsx_xvrdpiz:
17087
0
  case PPC::BI__builtin_vsx_xvrspiz: {
17088
0
    llvm::Type *ResultType = ConvertType(E->getType());
17089
0
    Value *X = EmitScalarExpr(E->getArg(0));
17090
0
    if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17091
0
        BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17092
0
      ID = Builder.getIsFPConstrained()
17093
0
               ? Intrinsic::experimental_constrained_floor
17094
0
               : Intrinsic::floor;
17095
0
    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17096
0
             BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17097
0
      ID = Builder.getIsFPConstrained()
17098
0
               ? Intrinsic::experimental_constrained_round
17099
0
               : Intrinsic::round;
17100
0
    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17101
0
             BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17102
0
      ID = Builder.getIsFPConstrained()
17103
0
               ? Intrinsic::experimental_constrained_rint
17104
0
               : Intrinsic::rint;
17105
0
    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17106
0
             BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17107
0
      ID = Builder.getIsFPConstrained()
17108
0
               ? Intrinsic::experimental_constrained_ceil
17109
0
               : Intrinsic::ceil;
17110
0
    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17111
0
             BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17112
0
      ID = Builder.getIsFPConstrained()
17113
0
               ? Intrinsic::experimental_constrained_trunc
17114
0
               : Intrinsic::trunc;
17115
0
    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17116
0
    return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17117
0
                                        : Builder.CreateCall(F, X);
17118
0
  }
17119
17120
  // Absolute value
17121
0
  case PPC::BI__builtin_vsx_xvabsdp:
17122
0
  case PPC::BI__builtin_vsx_xvabssp: {
17123
0
    llvm::Type *ResultType = ConvertType(E->getType());
17124
0
    Value *X = EmitScalarExpr(E->getArg(0));
17125
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17126
0
    return Builder.CreateCall(F, X);
17127
0
  }
17128
17129
  // Fastmath by default
17130
0
  case PPC::BI__builtin_ppc_recipdivf:
17131
0
  case PPC::BI__builtin_ppc_recipdivd:
17132
0
  case PPC::BI__builtin_ppc_rsqrtf:
17133
0
  case PPC::BI__builtin_ppc_rsqrtd: {
17134
0
    FastMathFlags FMF = Builder.getFastMathFlags();
17135
0
    Builder.getFastMathFlags().setFast();
17136
0
    llvm::Type *ResultType = ConvertType(E->getType());
17137
0
    Value *X = EmitScalarExpr(E->getArg(0));
17138
17139
0
    if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17140
0
        BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17141
0
      Value *Y = EmitScalarExpr(E->getArg(1));
17142
0
      Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17143
0
      Builder.getFastMathFlags() &= (FMF);
17144
0
      return FDiv;
17145
0
    }
17146
0
    auto *One = ConstantFP::get(ResultType, 1.0);
17147
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17148
0
    Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17149
0
    Builder.getFastMathFlags() &= (FMF);
17150
0
    return FDiv;
17151
0
  }
17152
0
  case PPC::BI__builtin_ppc_alignx: {
17153
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17154
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17155
0
    ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17156
0
    if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17157
0
      AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17158
0
                                     llvm::Value::MaximumAlignment);
17159
17160
0
    emitAlignmentAssumption(Op1, E->getArg(1),
17161
0
                            /*The expr loc is sufficient.*/ SourceLocation(),
17162
0
                            AlignmentCI, nullptr);
17163
0
    return Op1;
17164
0
  }
17165
0
  case PPC::BI__builtin_ppc_rdlam: {
17166
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17167
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17168
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17169
0
    llvm::Type *Ty = Op0->getType();
17170
0
    Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17171
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17172
0
    Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17173
0
    return Builder.CreateAnd(Rotate, Op2);
17174
0
  }
17175
0
  case PPC::BI__builtin_ppc_load2r: {
17176
0
    Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17177
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17178
0
    Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17179
0
    return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17180
0
  }
17181
  // FMA variations
17182
0
  case PPC::BI__builtin_ppc_fnmsub:
17183
0
  case PPC::BI__builtin_ppc_fnmsubs:
17184
0
  case PPC::BI__builtin_vsx_xvmaddadp:
17185
0
  case PPC::BI__builtin_vsx_xvmaddasp:
17186
0
  case PPC::BI__builtin_vsx_xvnmaddadp:
17187
0
  case PPC::BI__builtin_vsx_xvnmaddasp:
17188
0
  case PPC::BI__builtin_vsx_xvmsubadp:
17189
0
  case PPC::BI__builtin_vsx_xvmsubasp:
17190
0
  case PPC::BI__builtin_vsx_xvnmsubadp:
17191
0
  case PPC::BI__builtin_vsx_xvnmsubasp: {
17192
0
    llvm::Type *ResultType = ConvertType(E->getType());
17193
0
    Value *X = EmitScalarExpr(E->getArg(0));
17194
0
    Value *Y = EmitScalarExpr(E->getArg(1));
17195
0
    Value *Z = EmitScalarExpr(E->getArg(2));
17196
0
    llvm::Function *F;
17197
0
    if (Builder.getIsFPConstrained())
17198
0
      F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17199
0
    else
17200
0
      F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17201
0
    switch (BuiltinID) {
17202
0
      case PPC::BI__builtin_vsx_xvmaddadp:
17203
0
      case PPC::BI__builtin_vsx_xvmaddasp:
17204
0
        if (Builder.getIsFPConstrained())
17205
0
          return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17206
0
        else
17207
0
          return Builder.CreateCall(F, {X, Y, Z});
17208
0
      case PPC::BI__builtin_vsx_xvnmaddadp:
17209
0
      case PPC::BI__builtin_vsx_xvnmaddasp:
17210
0
        if (Builder.getIsFPConstrained())
17211
0
          return Builder.CreateFNeg(
17212
0
              Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17213
0
        else
17214
0
          return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17215
0
      case PPC::BI__builtin_vsx_xvmsubadp:
17216
0
      case PPC::BI__builtin_vsx_xvmsubasp:
17217
0
        if (Builder.getIsFPConstrained())
17218
0
          return Builder.CreateConstrainedFPCall(
17219
0
              F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17220
0
        else
17221
0
          return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17222
0
      case PPC::BI__builtin_ppc_fnmsub:
17223
0
      case PPC::BI__builtin_ppc_fnmsubs:
17224
0
      case PPC::BI__builtin_vsx_xvnmsubadp:
17225
0
      case PPC::BI__builtin_vsx_xvnmsubasp:
17226
0
        if (Builder.getIsFPConstrained())
17227
0
          return Builder.CreateFNeg(
17228
0
              Builder.CreateConstrainedFPCall(
17229
0
                  F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17230
0
              "neg");
17231
0
        else
17232
0
          return Builder.CreateCall(
17233
0
              CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17234
0
      }
17235
0
    llvm_unreachable("Unknown FMA operation");
17236
0
    return nullptr; // Suppress no-return warning
17237
0
  }
17238
17239
0
  case PPC::BI__builtin_vsx_insertword: {
17240
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17241
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17242
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17243
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17244
17245
    // Third argument is a compile time constant int. It must be clamped to
17246
    // to the range [0, 12].
17247
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17248
0
    assert(ArgCI &&
17249
0
           "Third arg to xxinsertw intrinsic must be constant integer");
17250
0
    const int64_t MaxIndex = 12;
17251
0
    int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17252
17253
    // The builtin semantics don't exactly match the xxinsertw instructions
17254
    // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17255
    // word from the first argument, and inserts it in the second argument. The
17256
    // instruction extracts the word from its second input register and inserts
17257
    // it into its first input register, so swap the first and second arguments.
17258
0
    std::swap(Op0, Op1);
17259
17260
    // Need to cast the second argument from a vector of unsigned int to a
17261
    // vector of long long.
17262
0
    Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17263
17264
0
    if (getTarget().isLittleEndian()) {
17265
      // Reverse the double words in the vector we will extract from.
17266
0
      Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17267
0
      Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17268
17269
      // Reverse the index.
17270
0
      Index = MaxIndex - Index;
17271
0
    }
17272
17273
    // Intrinsic expects the first arg to be a vector of int.
17274
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17275
0
    Op2 = ConstantInt::getSigned(Int32Ty, Index);
17276
0
    return Builder.CreateCall(F, {Op0, Op1, Op2});
17277
0
  }
17278
17279
0
  case PPC::BI__builtin_vsx_extractuword: {
17280
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17281
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17282
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17283
17284
    // Intrinsic expects the first argument to be a vector of doublewords.
17285
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17286
17287
    // The second argument is a compile time constant int that needs to
17288
    // be clamped to the range [0, 12].
17289
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17290
0
    assert(ArgCI &&
17291
0
           "Second Arg to xxextractuw intrinsic must be a constant integer!");
17292
0
    const int64_t MaxIndex = 12;
17293
0
    int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17294
17295
0
    if (getTarget().isLittleEndian()) {
17296
      // Reverse the index.
17297
0
      Index = MaxIndex - Index;
17298
0
      Op1 = ConstantInt::getSigned(Int32Ty, Index);
17299
17300
      // Emit the call, then reverse the double words of the results vector.
17301
0
      Value *Call = Builder.CreateCall(F, {Op0, Op1});
17302
17303
0
      Value *ShuffleCall =
17304
0
          Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17305
0
      return ShuffleCall;
17306
0
    } else {
17307
0
      Op1 = ConstantInt::getSigned(Int32Ty, Index);
17308
0
      return Builder.CreateCall(F, {Op0, Op1});
17309
0
    }
17310
0
  }
17311
17312
0
  case PPC::BI__builtin_vsx_xxpermdi: {
17313
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17314
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17315
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17316
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17317
0
    assert(ArgCI && "Third arg must be constant integer!");
17318
17319
0
    unsigned Index = ArgCI->getZExtValue();
17320
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17321
0
    Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17322
17323
    // Account for endianness by treating this as just a shuffle. So we use the
17324
    // same indices for both LE and BE in order to produce expected results in
17325
    // both cases.
17326
0
    int ElemIdx0 = (Index & 2) >> 1;
17327
0
    int ElemIdx1 = 2 + (Index & 1);
17328
17329
0
    int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17330
0
    Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17331
0
    QualType BIRetType = E->getType();
17332
0
    auto RetTy = ConvertType(BIRetType);
17333
0
    return Builder.CreateBitCast(ShuffleCall, RetTy);
17334
0
  }
17335
17336
0
  case PPC::BI__builtin_vsx_xxsldwi: {
17337
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17338
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17339
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17340
0
    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17341
0
    assert(ArgCI && "Third argument must be a compile time constant");
17342
0
    unsigned Index = ArgCI->getZExtValue() & 0x3;
17343
0
    Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17344
0
    Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17345
17346
    // Create a shuffle mask
17347
0
    int ElemIdx0;
17348
0
    int ElemIdx1;
17349
0
    int ElemIdx2;
17350
0
    int ElemIdx3;
17351
0
    if (getTarget().isLittleEndian()) {
17352
      // Little endian element N comes from element 8+N-Index of the
17353
      // concatenated wide vector (of course, using modulo arithmetic on
17354
      // the total number of elements).
17355
0
      ElemIdx0 = (8 - Index) % 8;
17356
0
      ElemIdx1 = (9 - Index) % 8;
17357
0
      ElemIdx2 = (10 - Index) % 8;
17358
0
      ElemIdx3 = (11 - Index) % 8;
17359
0
    } else {
17360
      // Big endian ElemIdx<N> = Index + N
17361
0
      ElemIdx0 = Index;
17362
0
      ElemIdx1 = Index + 1;
17363
0
      ElemIdx2 = Index + 2;
17364
0
      ElemIdx3 = Index + 3;
17365
0
    }
17366
17367
0
    int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17368
0
    Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17369
0
    QualType BIRetType = E->getType();
17370
0
    auto RetTy = ConvertType(BIRetType);
17371
0
    return Builder.CreateBitCast(ShuffleCall, RetTy);
17372
0
  }
17373
17374
0
  case PPC::BI__builtin_pack_vector_int128: {
17375
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17376
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17377
0
    bool isLittleEndian = getTarget().isLittleEndian();
17378
0
    Value *PoisonValue =
17379
0
        llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17380
0
    Value *Res = Builder.CreateInsertElement(
17381
0
        PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17382
0
    Res = Builder.CreateInsertElement(Res, Op1,
17383
0
                                      (uint64_t)(isLittleEndian ? 0 : 1));
17384
0
    return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17385
0
  }
17386
17387
0
  case PPC::BI__builtin_unpack_vector_int128: {
17388
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17389
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17390
0
    ConstantInt *Index = cast<ConstantInt>(Op1);
17391
0
    Value *Unpacked = Builder.CreateBitCast(
17392
0
        Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17393
17394
0
    if (getTarget().isLittleEndian())
17395
0
      Index =
17396
0
          ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
17397
17398
0
    return Builder.CreateExtractElement(Unpacked, Index);
17399
0
  }
17400
17401
0
  case PPC::BI__builtin_ppc_sthcx: {
17402
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17403
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17404
0
    Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17405
0
    return Builder.CreateCall(F, {Op0, Op1});
17406
0
  }
17407
17408
  // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17409
  // Some of the MMA instructions accumulate their result into an existing
17410
  // accumulator whereas the others generate a new accumulator. So we need to
17411
  // use custom code generation to expand a builtin call with a pointer to a
17412
  // load (if the corresponding instruction accumulates its result) followed by
17413
  // the call to the intrinsic and a store of the result.
17414
0
#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17415
0
  case PPC::BI__builtin_##Name:
17416
0
#include "clang/Basic/BuiltinsPPC.def"
17417
0
  {
17418
0
    SmallVector<Value *, 4> Ops;
17419
0
    for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
17420
0
      if (E->getArg(i)->getType()->isArrayType())
17421
0
        Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
17422
0
      else
17423
0
        Ops.push_back(EmitScalarExpr(E->getArg(i)));
17424
    // The first argument of these two builtins is a pointer used to store their
17425
    // result. However, the llvm intrinsics return their result in multiple
17426
    // return values. So, here we emit code extracting these values from the
17427
    // intrinsic results and storing them using that pointer.
17428
0
    if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
17429
0
        BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
17430
0
        BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
17431
0
      unsigned NumVecs = 2;
17432
0
      auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17433
0
      if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17434
0
        NumVecs = 4;
17435
0
        Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17436
0
      }
17437
0
      llvm::Function *F = CGM.getIntrinsic(Intrinsic);
17438
0
      Address Addr = EmitPointerWithAlignment(E->getArg(1));
17439
0
      Value *Vec = Builder.CreateLoad(Addr);
17440
0
      Value *Call = Builder.CreateCall(F, {Vec});
17441
0
      llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
17442
0
      Value *Ptr = Ops[0];
17443
0
      for (unsigned i=0; i<NumVecs; i++) {
17444
0
        Value *Vec = Builder.CreateExtractValue(Call, i);
17445
0
        llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
17446
0
        Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
17447
0
        Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
17448
0
      }
17449
0
      return Call;
17450
0
    }
17451
0
    if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17452
0
        BuiltinID == PPC::BI__builtin_mma_build_acc) {
17453
      // Reverse the order of the operands for LE, so the
17454
      // same builtin call can be used on both LE and BE
17455
      // without the need for the programmer to swap operands.
17456
      // The operands are reversed starting from the second argument,
17457
      // the first operand is the pointer to the pair/accumulator
17458
      // that is being built.
17459
0
      if (getTarget().isLittleEndian())
17460
0
        std::reverse(Ops.begin() + 1, Ops.end());
17461
0
    }
17462
0
    bool Accumulate;
17463
0
    switch (BuiltinID) {
17464
0
  #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17465
0
    case PPC::BI__builtin_##Name: \
17466
0
      ID = Intrinsic::ppc_##Intr; \
17467
0
      Accumulate = Acc; \
17468
0
      break;
17469
0
  #include "clang/Basic/BuiltinsPPC.def"
17470
0
    }
17471
0
    if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17472
0
        BuiltinID == PPC::BI__builtin_vsx_stxvp ||
17473
0
        BuiltinID == PPC::BI__builtin_mma_lxvp ||
17474
0
        BuiltinID == PPC::BI__builtin_mma_stxvp) {
17475
0
      if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17476
0
          BuiltinID == PPC::BI__builtin_mma_lxvp) {
17477
0
        Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17478
0
      } else {
17479
0
        Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17480
0
      }
17481
0
      Ops.pop_back();
17482
0
      llvm::Function *F = CGM.getIntrinsic(ID);
17483
0
      return Builder.CreateCall(F, Ops, "");
17484
0
    }
17485
0
    SmallVector<Value*, 4> CallOps;
17486
0
    if (Accumulate) {
17487
0
      Address Addr = EmitPointerWithAlignment(E->getArg(0));
17488
0
      Value *Acc = Builder.CreateLoad(Addr);
17489
0
      CallOps.push_back(Acc);
17490
0
    }
17491
0
    for (unsigned i=1; i<Ops.size(); i++)
17492
0
      CallOps.push_back(Ops[i]);
17493
0
    llvm::Function *F = CGM.getIntrinsic(ID);
17494
0
    Value *Call = Builder.CreateCall(F, CallOps);
17495
0
    return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
17496
0
  }
17497
17498
0
  case PPC::BI__builtin_ppc_compare_and_swap:
17499
0
  case PPC::BI__builtin_ppc_compare_and_swaplp: {
17500
0
    Address Addr = EmitPointerWithAlignment(E->getArg(0));
17501
0
    Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
17502
0
    Value *OldVal = Builder.CreateLoad(OldValAddr);
17503
0
    QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
17504
0
    LValue LV = MakeAddrLValue(Addr, AtomicTy);
17505
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17506
0
    auto Pair = EmitAtomicCompareExchange(
17507
0
        LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
17508
0
        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
17509
    // Unlike c11's atomic_compare_exchange, according to
17510
    // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17511
    // > In either case, the contents of the memory location specified by addr
17512
    // > are copied into the memory location specified by old_val_addr.
17513
    // But it hasn't specified storing to OldValAddr is atomic or not and
17514
    // which order to use. Now following XL's codegen, treat it as a normal
17515
    // store.
17516
0
    Value *LoadedVal = Pair.first.getScalarVal();
17517
0
    Builder.CreateStore(LoadedVal, OldValAddr);
17518
0
    return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
17519
0
  }
17520
0
  case PPC::BI__builtin_ppc_fetch_and_add:
17521
0
  case PPC::BI__builtin_ppc_fetch_and_addlp: {
17522
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
17523
0
                                 llvm::AtomicOrdering::Monotonic);
17524
0
  }
17525
0
  case PPC::BI__builtin_ppc_fetch_and_and:
17526
0
  case PPC::BI__builtin_ppc_fetch_and_andlp: {
17527
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
17528
0
                                 llvm::AtomicOrdering::Monotonic);
17529
0
  }
17530
17531
0
  case PPC::BI__builtin_ppc_fetch_and_or:
17532
0
  case PPC::BI__builtin_ppc_fetch_and_orlp: {
17533
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
17534
0
                                 llvm::AtomicOrdering::Monotonic);
17535
0
  }
17536
0
  case PPC::BI__builtin_ppc_fetch_and_swap:
17537
0
  case PPC::BI__builtin_ppc_fetch_and_swaplp: {
17538
0
    return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
17539
0
                                 llvm::AtomicOrdering::Monotonic);
17540
0
  }
17541
0
  case PPC::BI__builtin_ppc_ldarx:
17542
0
  case PPC::BI__builtin_ppc_lwarx:
17543
0
  case PPC::BI__builtin_ppc_lharx:
17544
0
  case PPC::BI__builtin_ppc_lbarx:
17545
0
    return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
17546
0
  case PPC::BI__builtin_ppc_mfspr: {
17547
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17548
0
    llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17549
0
                              ? Int32Ty
17550
0
                              : Int64Ty;
17551
0
    Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
17552
0
    return Builder.CreateCall(F, {Op0});
17553
0
  }
17554
0
  case PPC::BI__builtin_ppc_mtspr: {
17555
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17556
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17557
0
    llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17558
0
                              ? Int32Ty
17559
0
                              : Int64Ty;
17560
0
    Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
17561
0
    return Builder.CreateCall(F, {Op0, Op1});
17562
0
  }
17563
0
  case PPC::BI__builtin_ppc_popcntb: {
17564
0
    Value *ArgValue = EmitScalarExpr(E->getArg(0));
17565
0
    llvm::Type *ArgType = ArgValue->getType();
17566
0
    Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
17567
0
    return Builder.CreateCall(F, {ArgValue}, "popcntb");
17568
0
  }
17569
0
  case PPC::BI__builtin_ppc_mtfsf: {
17570
    // The builtin takes a uint32 that needs to be cast to an
17571
    // f64 to be passed to the intrinsic.
17572
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17573
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17574
0
    Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
17575
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
17576
0
    return Builder.CreateCall(F, {Op0, Cast}, "");
17577
0
  }
17578
17579
0
  case PPC::BI__builtin_ppc_swdiv_nochk:
17580
0
  case PPC::BI__builtin_ppc_swdivs_nochk: {
17581
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17582
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17583
0
    FastMathFlags FMF = Builder.getFastMathFlags();
17584
0
    Builder.getFastMathFlags().setFast();
17585
0
    Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
17586
0
    Builder.getFastMathFlags() &= (FMF);
17587
0
    return FDiv;
17588
0
  }
17589
0
  case PPC::BI__builtin_ppc_fric:
17590
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17591
0
                           *this, E, Intrinsic::rint,
17592
0
                           Intrinsic::experimental_constrained_rint))
17593
0
        .getScalarVal();
17594
0
  case PPC::BI__builtin_ppc_frim:
17595
0
  case PPC::BI__builtin_ppc_frims:
17596
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17597
0
                           *this, E, Intrinsic::floor,
17598
0
                           Intrinsic::experimental_constrained_floor))
17599
0
        .getScalarVal();
17600
0
  case PPC::BI__builtin_ppc_frin:
17601
0
  case PPC::BI__builtin_ppc_frins:
17602
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17603
0
                           *this, E, Intrinsic::round,
17604
0
                           Intrinsic::experimental_constrained_round))
17605
0
        .getScalarVal();
17606
0
  case PPC::BI__builtin_ppc_frip:
17607
0
  case PPC::BI__builtin_ppc_frips:
17608
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17609
0
                           *this, E, Intrinsic::ceil,
17610
0
                           Intrinsic::experimental_constrained_ceil))
17611
0
        .getScalarVal();
17612
0
  case PPC::BI__builtin_ppc_friz:
17613
0
  case PPC::BI__builtin_ppc_frizs:
17614
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17615
0
                           *this, E, Intrinsic::trunc,
17616
0
                           Intrinsic::experimental_constrained_trunc))
17617
0
        .getScalarVal();
17618
0
  case PPC::BI__builtin_ppc_fsqrt:
17619
0
  case PPC::BI__builtin_ppc_fsqrts:
17620
0
    return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
17621
0
                           *this, E, Intrinsic::sqrt,
17622
0
                           Intrinsic::experimental_constrained_sqrt))
17623
0
        .getScalarVal();
17624
0
  case PPC::BI__builtin_ppc_test_data_class: {
17625
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17626
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17627
0
    return Builder.CreateCall(
17628
0
        CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
17629
0
        {Op0, Op1}, "test_data_class");
17630
0
  }
17631
0
  case PPC::BI__builtin_ppc_maxfe: {
17632
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17633
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17634
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17635
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17636
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
17637
0
                              {Op0, Op1, Op2, Op3});
17638
0
  }
17639
0
  case PPC::BI__builtin_ppc_maxfl: {
17640
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17641
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17642
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17643
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17644
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
17645
0
                              {Op0, Op1, Op2, Op3});
17646
0
  }
17647
0
  case PPC::BI__builtin_ppc_maxfs: {
17648
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17649
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17650
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17651
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17652
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
17653
0
                              {Op0, Op1, Op2, Op3});
17654
0
  }
17655
0
  case PPC::BI__builtin_ppc_minfe: {
17656
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17657
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17658
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17659
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17660
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
17661
0
                              {Op0, Op1, Op2, Op3});
17662
0
  }
17663
0
  case PPC::BI__builtin_ppc_minfl: {
17664
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17665
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17666
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17667
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17668
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
17669
0
                              {Op0, Op1, Op2, Op3});
17670
0
  }
17671
0
  case PPC::BI__builtin_ppc_minfs: {
17672
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17673
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17674
0
    Value *Op2 = EmitScalarExpr(E->getArg(2));
17675
0
    Value *Op3 = EmitScalarExpr(E->getArg(3));
17676
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
17677
0
                              {Op0, Op1, Op2, Op3});
17678
0
  }
17679
0
  case PPC::BI__builtin_ppc_swdiv:
17680
0
  case PPC::BI__builtin_ppc_swdivs: {
17681
0
    Value *Op0 = EmitScalarExpr(E->getArg(0));
17682
0
    Value *Op1 = EmitScalarExpr(E->getArg(1));
17683
0
    return Builder.CreateFDiv(Op0, Op1, "swdiv");
17684
0
  }
17685
0
  case PPC::BI__builtin_ppc_set_fpscr_rn:
17686
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
17687
0
                              {EmitScalarExpr(E->getArg(0))});
17688
0
  case PPC::BI__builtin_ppc_mffs:
17689
0
    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
17690
0
  }
17691
0
}
17692
17693
namespace {
17694
// If \p E is not null pointer, insert address space cast to match return
17695
// type of \p E if necessary.
17696
Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
17697
0
                             const CallExpr *E = nullptr) {
17698
0
  auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
17699
0
  auto *Call = CGF.Builder.CreateCall(F);
17700
0
  Call->addRetAttr(
17701
0
      Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
17702
0
  Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
17703
0
  if (!E)
17704
0
    return Call;
17705
0
  QualType BuiltinRetType = E->getType();
17706
0
  auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
17707
0
  if (RetTy == Call->getType())
17708
0
    return Call;
17709
0
  return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
17710
0
}
17711
17712
0
Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
17713
0
  auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
17714
0
  auto *Call = CGF.Builder.CreateCall(F);
17715
0
  Call->addRetAttr(
17716
0
      Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
17717
0
  Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
17718
0
  return Call;
17719
0
}
17720
17721
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17722
/// Emit code based on Code Object ABI version.
17723
/// COV_4    : Emit code to use dispatch ptr
17724
/// COV_5    : Emit code to use implicitarg ptr
17725
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
17726
///            and use its value for COV_4 or COV_5 approach. It is used for
17727
///            compiling device libraries in an ABI-agnostic way.
17728
///
17729
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
17730
///       clang during compilation of user code.
17731
0
Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
17732
0
  llvm::LoadInst *LD;
17733
17734
0
  auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
17735
17736
0
  if (Cov == CodeObjectVersionKind::COV_None) {
17737
0
    StringRef Name = "__oclc_ABI_version";
17738
0
    auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
17739
0
    if (!ABIVersionC)
17740
0
      ABIVersionC = new llvm::GlobalVariable(
17741
0
          CGF.CGM.getModule(), CGF.Int32Ty, false,
17742
0
          llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
17743
0
          llvm::GlobalVariable::NotThreadLocal,
17744
0
          CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
17745
17746
    // This load will be eliminated by the IPSCCP because it is constant
17747
    // weak_odr without externally_initialized. Either changing it to weak or
17748
    // adding externally_initialized will keep the load.
17749
0
    Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
17750
0
                                                      CGF.CGM.getIntAlign());
17751
17752
0
    Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
17753
0
        ABIVersion,
17754
0
        llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
17755
17756
    // Indexing the implicit kernarg segment.
17757
0
    Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
17758
0
        CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
17759
17760
    // Indexing the HSA kernel_dispatch_packet struct.
17761
0
    Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
17762
0
        CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
17763
17764
0
    auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
17765
0
    LD = CGF.Builder.CreateLoad(
17766
0
        Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
17767
0
  } else {
17768
0
    Value *GEP = nullptr;
17769
0
    if (Cov == CodeObjectVersionKind::COV_5) {
17770
      // Indexing the implicit kernarg segment.
17771
0
      GEP = CGF.Builder.CreateConstGEP1_32(
17772
0
          CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
17773
0
    } else {
17774
      // Indexing the HSA kernel_dispatch_packet struct.
17775
0
      GEP = CGF.Builder.CreateConstGEP1_32(
17776
0
          CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
17777
0
    }
17778
0
    LD = CGF.Builder.CreateLoad(
17779
0
        Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
17780
0
  }
17781
17782
0
  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
17783
0
  llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
17784
0
      APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
17785
0
  LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
17786
0
  LD->setMetadata(llvm::LLVMContext::MD_noundef,
17787
0
                  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17788
0
  LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
17789
0
                  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17790
0
  return LD;
17791
0
}
17792
17793
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
17794
0
Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
17795
0
  const unsigned XOffset = 12;
17796
0
  auto *DP = EmitAMDGPUDispatchPtr(CGF);
17797
  // Indexing the HSA kernel_dispatch_packet struct.
17798
0
  auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
17799
0
  auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
17800
0
  auto *LD = CGF.Builder.CreateLoad(
17801
0
      Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
17802
0
  LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
17803
0
                  llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
17804
0
  return LD;
17805
0
}
17806
} // namespace
17807
17808
// For processing memory ordering and memory scope arguments of various
17809
// amdgcn builtins.
17810
// \p Order takes a C++11 comptabile memory-ordering specifier and converts
17811
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
17812
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
17813
// specific SyncScopeID and writes it to \p SSID.
17814
void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
17815
                                              llvm::AtomicOrdering &AO,
17816
0
                                              llvm::SyncScope::ID &SSID) {
17817
0
  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
17818
17819
  // Map C11/C++11 memory ordering to LLVM memory ordering
17820
0
  assert(llvm::isValidAtomicOrderingCABI(ord));
17821
0
  switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
17822
0
  case llvm::AtomicOrderingCABI::acquire:
17823
0
  case llvm::AtomicOrderingCABI::consume:
17824
0
    AO = llvm::AtomicOrdering::Acquire;
17825
0
    break;
17826
0
  case llvm::AtomicOrderingCABI::release:
17827
0
    AO = llvm::AtomicOrdering::Release;
17828
0
    break;
17829
0
  case llvm::AtomicOrderingCABI::acq_rel:
17830
0
    AO = llvm::AtomicOrdering::AcquireRelease;
17831
0
    break;
17832
0
  case llvm::AtomicOrderingCABI::seq_cst:
17833
0
    AO = llvm::AtomicOrdering::SequentiallyConsistent;
17834
0
    break;
17835
0
  case llvm::AtomicOrderingCABI::relaxed:
17836
0
    AO = llvm::AtomicOrdering::Monotonic;
17837
0
    break;
17838
0
  }
17839
17840
0
  StringRef scp;
17841
0
  llvm::getConstantStringInfo(Scope, scp);
17842
0
  SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
17843
0
}
17844
17845
llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
17846
                                                          unsigned Idx,
17847
0
                                                          const CallExpr *E) {
17848
0
  llvm::Value *Arg = nullptr;
17849
0
  if ((ICEArguments & (1 << Idx)) == 0) {
17850
0
    Arg = EmitScalarExpr(E->getArg(Idx));
17851
0
  } else {
17852
    // If this is required to be a constant, constant fold it so that we
17853
    // know that the generated intrinsic gets a ConstantInt.
17854
0
    std::optional<llvm::APSInt> Result =
17855
0
        E->getArg(Idx)->getIntegerConstantExpr(getContext());
17856
0
    assert(Result && "Expected argument to be a constant");
17857
0
    Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
17858
0
  }
17859
0
  return Arg;
17860
0
}
17861
17862
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
17863
0
                                              const CallExpr *E) {
17864
0
  llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
17865
0
  llvm::SyncScope::ID SSID;
17866
0
  switch (BuiltinID) {
17867
0
  case AMDGPU::BI__builtin_amdgcn_div_scale:
17868
0
  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
17869
    // Translate from the intrinsics's struct return to the builtin's out
17870
    // argument.
17871
17872
0
    Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
17873
17874
0
    llvm::Value *X = EmitScalarExpr(E->getArg(0));
17875
0
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
17876
0
    llvm::Value *Z = EmitScalarExpr(E->getArg(2));
17877
17878
0
    llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
17879
0
                                           X->getType());
17880
17881
0
    llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
17882
17883
0
    llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
17884
0
    llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
17885
17886
0
    llvm::Type *RealFlagType = FlagOutPtr.getElementType();
17887
17888
0
    llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
17889
0
    Builder.CreateStore(FlagExt, FlagOutPtr);
17890
0
    return Result;
17891
0
  }
17892
0
  case AMDGPU::BI__builtin_amdgcn_div_fmas:
17893
0
  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
17894
0
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17895
0
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17896
0
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
17897
0
    llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
17898
17899
0
    llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
17900
0
                                      Src0->getType());
17901
0
    llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
17902
0
    return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
17903
0
  }
17904
17905
0
  case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
17906
0
    return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
17907
0
  case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
17908
0
    return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
17909
0
  case AMDGPU::BI__builtin_amdgcn_mov_dpp:
17910
0
  case AMDGPU::BI__builtin_amdgcn_update_dpp: {
17911
0
    llvm::SmallVector<llvm::Value *, 6> Args;
17912
    // Find out if any arguments are required to be integer constant
17913
    // expressions.
17914
0
    unsigned ICEArguments = 0;
17915
0
    ASTContext::GetBuiltinTypeError Error;
17916
0
    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
17917
0
    assert(Error == ASTContext::GE_None && "Should not codegen an error");
17918
0
    for (unsigned I = 0; I != E->getNumArgs(); ++I) {
17919
0
      Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
17920
0
    }
17921
0
    assert(Args.size() == 5 || Args.size() == 6);
17922
0
    if (Args.size() == 5)
17923
0
      Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
17924
0
    Function *F =
17925
0
        CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
17926
0
    return Builder.CreateCall(F, Args);
17927
0
  }
17928
0
  case AMDGPU::BI__builtin_amdgcn_div_fixup:
17929
0
  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
17930
0
  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
17931
0
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
17932
0
  case AMDGPU::BI__builtin_amdgcn_trig_preop:
17933
0
  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
17934
0
    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
17935
0
  case AMDGPU::BI__builtin_amdgcn_rcp:
17936
0
  case AMDGPU::BI__builtin_amdgcn_rcpf:
17937
0
  case AMDGPU::BI__builtin_amdgcn_rcph:
17938
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
17939
0
  case AMDGPU::BI__builtin_amdgcn_sqrt:
17940
0
  case AMDGPU::BI__builtin_amdgcn_sqrtf:
17941
0
  case AMDGPU::BI__builtin_amdgcn_sqrth:
17942
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
17943
0
  case AMDGPU::BI__builtin_amdgcn_rsq:
17944
0
  case AMDGPU::BI__builtin_amdgcn_rsqf:
17945
0
  case AMDGPU::BI__builtin_amdgcn_rsqh:
17946
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
17947
0
  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
17948
0
  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
17949
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
17950
0
  case AMDGPU::BI__builtin_amdgcn_sinf:
17951
0
  case AMDGPU::BI__builtin_amdgcn_sinh:
17952
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
17953
0
  case AMDGPU::BI__builtin_amdgcn_cosf:
17954
0
  case AMDGPU::BI__builtin_amdgcn_cosh:
17955
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
17956
0
  case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
17957
0
    return EmitAMDGPUDispatchPtr(*this, E);
17958
0
  case AMDGPU::BI__builtin_amdgcn_logf:
17959
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
17960
0
  case AMDGPU::BI__builtin_amdgcn_exp2f:
17961
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2);
17962
0
  case AMDGPU::BI__builtin_amdgcn_log_clampf:
17963
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
17964
0
  case AMDGPU::BI__builtin_amdgcn_ldexp:
17965
0
  case AMDGPU::BI__builtin_amdgcn_ldexpf: {
17966
0
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17967
0
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17968
0
    llvm::Function *F =
17969
0
        CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
17970
0
    return Builder.CreateCall(F, {Src0, Src1});
17971
0
  }
17972
0
  case AMDGPU::BI__builtin_amdgcn_ldexph: {
17973
    // The raw instruction has a different behavior for out of bounds exponent
17974
    // values (implicit truncation instead of saturate to short_min/short_max).
17975
0
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17976
0
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17977
0
    llvm::Function *F =
17978
0
        CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
17979
0
    return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
17980
0
  }
17981
0
  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
17982
0
  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
17983
0
  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
17984
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
17985
0
  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
17986
0
  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
17987
0
    Value *Src0 = EmitScalarExpr(E->getArg(0));
17988
0
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
17989
0
                                { Builder.getInt32Ty(), Src0->getType() });
17990
0
    return Builder.CreateCall(F, Src0);
17991
0
  }
17992
0
  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
17993
0
    Value *Src0 = EmitScalarExpr(E->getArg(0));
17994
0
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
17995
0
                                { Builder.getInt16Ty(), Src0->getType() });
17996
0
    return Builder.CreateCall(F, Src0);
17997
0
  }
17998
0
  case AMDGPU::BI__builtin_amdgcn_fract:
17999
0
  case AMDGPU::BI__builtin_amdgcn_fractf:
18000
0
  case AMDGPU::BI__builtin_amdgcn_fracth:
18001
0
    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
18002
0
  case AMDGPU::BI__builtin_amdgcn_lerp:
18003
0
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
18004
0
  case AMDGPU::BI__builtin_amdgcn_ubfe:
18005
0
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
18006
0
  case AMDGPU::BI__builtin_amdgcn_sbfe:
18007
0
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
18008
0
  case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18009
0
  case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18010
0
    llvm::Type *ResultType = ConvertType(E->getType());
18011
0
    llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18012
0
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18013
0
    return Builder.CreateCall(F, { Src });
18014
0
  }
18015
0
  case AMDGPU::BI__builtin_amdgcn_uicmp:
18016
0
  case AMDGPU::BI__builtin_amdgcn_uicmpl:
18017
0
  case AMDGPU::BI__builtin_amdgcn_sicmp:
18018
0
  case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18019
0
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18020
0
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18021
0
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18022
18023
    // FIXME-GFX10: How should 32 bit mask be handled?
18024
0
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18025
0
      { Builder.getInt64Ty(), Src0->getType() });
18026
0
    return Builder.CreateCall(F, { Src0, Src1, Src2 });
18027
0
  }
18028
0
  case AMDGPU::BI__builtin_amdgcn_fcmp:
18029
0
  case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18030
0
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18031
0
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18032
0
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18033
18034
    // FIXME-GFX10: How should 32 bit mask be handled?
18035
0
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18036
0
      { Builder.getInt64Ty(), Src0->getType() });
18037
0
    return Builder.CreateCall(F, { Src0, Src1, Src2 });
18038
0
  }
18039
0
  case AMDGPU::BI__builtin_amdgcn_class:
18040
0
  case AMDGPU::BI__builtin_amdgcn_classf:
18041
0
  case AMDGPU::BI__builtin_amdgcn_classh:
18042
0
    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18043
0
  case AMDGPU::BI__builtin_amdgcn_fmed3f:
18044
0
  case AMDGPU::BI__builtin_amdgcn_fmed3h:
18045
0
    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
18046
0
  case AMDGPU::BI__builtin_amdgcn_ds_append:
18047
0
  case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18048
0
    Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18049
0
      Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18050
0
    Value *Src0 = EmitScalarExpr(E->getArg(0));
18051
0
    Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18052
0
    return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18053
0
  }
18054
0
  case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18055
0
  case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18056
0
  case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
18057
0
    Intrinsic::ID Intrin;
18058
0
    switch (BuiltinID) {
18059
0
    case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18060
0
      Intrin = Intrinsic::amdgcn_ds_fadd;
18061
0
      break;
18062
0
    case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18063
0
      Intrin = Intrinsic::amdgcn_ds_fmin;
18064
0
      break;
18065
0
    case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
18066
0
      Intrin = Intrinsic::amdgcn_ds_fmax;
18067
0
      break;
18068
0
    }
18069
0
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18070
0
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18071
0
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18072
0
    llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18073
0
    llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
18074
0
    llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
18075
0
    llvm::FunctionType *FTy = F->getFunctionType();
18076
0
    llvm::Type *PTy = FTy->getParamType(0);
18077
0
    Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
18078
0
    return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
18079
0
  }
18080
0
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18081
0
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18082
0
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18083
0
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18084
0
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18085
0
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18086
0
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18087
0
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18088
0
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18089
0
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18090
0
    Intrinsic::ID IID;
18091
0
    llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18092
0
    switch (BuiltinID) {
18093
0
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18094
0
      ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18095
0
      IID = Intrinsic::amdgcn_global_atomic_fadd;
18096
0
      break;
18097
0
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18098
0
      ArgTy = llvm::FixedVectorType::get(
18099
0
          llvm::Type::getHalfTy(getLLVMContext()), 2);
18100
0
      IID = Intrinsic::amdgcn_global_atomic_fadd;
18101
0
      break;
18102
0
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18103
0
      IID = Intrinsic::amdgcn_global_atomic_fadd;
18104
0
      break;
18105
0
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18106
0
      IID = Intrinsic::amdgcn_global_atomic_fmin;
18107
0
      break;
18108
0
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18109
0
      IID = Intrinsic::amdgcn_global_atomic_fmax;
18110
0
      break;
18111
0
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18112
0
      IID = Intrinsic::amdgcn_flat_atomic_fadd;
18113
0
      break;
18114
0
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18115
0
      IID = Intrinsic::amdgcn_flat_atomic_fmin;
18116
0
      break;
18117
0
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18118
0
      IID = Intrinsic::amdgcn_flat_atomic_fmax;
18119
0
      break;
18120
0
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18121
0
      ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18122
0
      IID = Intrinsic::amdgcn_flat_atomic_fadd;
18123
0
      break;
18124
0
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18125
0
      ArgTy = llvm::FixedVectorType::get(
18126
0
          llvm::Type::getHalfTy(getLLVMContext()), 2);
18127
0
      IID = Intrinsic::amdgcn_flat_atomic_fadd;
18128
0
      break;
18129
0
    }
18130
0
    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18131
0
    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18132
0
    llvm::Function *F =
18133
0
        CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18134
0
    return Builder.CreateCall(F, {Addr, Val});
18135
0
  }
18136
0
  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18137
0
  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18138
0
    Intrinsic::ID IID;
18139
0
    switch (BuiltinID) {
18140
0
    case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18141
0
      IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18142
0
      break;
18143
0
    case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18144
0
      IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18145
0
      break;
18146
0
    }
18147
0
    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18148
0
    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18149
0
    llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18150
0
    return Builder.CreateCall(F, {Addr, Val});
18151
0
  }
18152
0
  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18153
0
  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18154
0
  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
18155
0
    Intrinsic::ID IID;
18156
0
    llvm::Type *ArgTy;
18157
0
    switch (BuiltinID) {
18158
0
    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18159
0
      ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18160
0
      IID = Intrinsic::amdgcn_ds_fadd;
18161
0
      break;
18162
0
    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18163
0
      ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18164
0
      IID = Intrinsic::amdgcn_ds_fadd;
18165
0
      break;
18166
0
    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
18167
0
      ArgTy = llvm::FixedVectorType::get(
18168
0
          llvm::Type::getHalfTy(getLLVMContext()), 2);
18169
0
      IID = Intrinsic::amdgcn_ds_fadd;
18170
0
      break;
18171
0
    }
18172
0
    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18173
0
    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18174
0
    llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
18175
0
        llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
18176
0
    llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
18177
0
        llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
18178
0
    llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
18179
0
    return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
18180
0
  }
18181
0
  case AMDGPU::BI__builtin_amdgcn_read_exec:
18182
0
    return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
18183
0
  case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18184
0
    return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
18185
0
  case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18186
0
    return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
18187
0
  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18188
0
  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18189
0
  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18190
0
  case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18191
0
    llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
18192
0
    llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
18193
0
    llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
18194
0
    llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
18195
0
    llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
18196
0
    llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18197
18198
    // The builtins take these arguments as vec4 where the last element is
18199
    // ignored. The intrinsic takes them as vec3.
18200
0
    RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18201
0
                                            ArrayRef<int>{0, 1, 2});
18202
0
    RayDir =
18203
0
        Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18204
0
    RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18205
0
                                                ArrayRef<int>{0, 1, 2});
18206
18207
0
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18208
0
                                   {NodePtr->getType(), RayDir->getType()});
18209
0
    return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
18210
0
                                  RayInverseDir, TextureDescr});
18211
0
  }
18212
18213
0
  case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18214
0
    SmallVector<Value *, 4> Args;
18215
0
    for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18216
0
      Args.push_back(EmitScalarExpr(E->getArg(i)));
18217
18218
0
    Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18219
0
    Value *Call = Builder.CreateCall(F, Args);
18220
0
    Value *Rtn = Builder.CreateExtractValue(Call, 0);
18221
0
    Value *A = Builder.CreateExtractValue(Call, 1);
18222
0
    llvm::Type *RetTy = ConvertType(E->getType());
18223
0
    Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
18224
0
                                            (uint64_t)0);
18225
0
    return Builder.CreateInsertElement(I0, A, 1);
18226
0
  }
18227
18228
0
  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18229
0
  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18230
0
  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18231
0
  case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18232
0
  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18233
0
  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18234
0
  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18235
0
  case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18236
0
  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18237
0
  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18238
0
  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18239
0
  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18240
0
  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18241
0
  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18242
0
  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18243
0
  case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: {
18244
18245
    // These operations perform a matrix multiplication and accumulation of
18246
    // the form:
18247
    //             D = A * B + C
18248
    // The return type always matches the type of matrix C.
18249
0
    unsigned ArgForMatchingRetType;
18250
0
    unsigned BuiltinWMMAOp;
18251
18252
0
    switch (BuiltinID) {
18253
0
    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18254
0
    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18255
0
      ArgForMatchingRetType = 2;
18256
0
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18257
0
      break;
18258
0
    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18259
0
    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18260
0
      ArgForMatchingRetType = 2;
18261
0
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18262
0
      break;
18263
0
    case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18264
0
    case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18265
0
      ArgForMatchingRetType = 2;
18266
0
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18267
0
      break;
18268
0
    case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18269
0
    case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18270
0
      ArgForMatchingRetType = 2;
18271
0
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
18272
0
      break;
18273
0
    case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18274
0
    case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18275
0
      ArgForMatchingRetType = 2;
18276
0
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
18277
0
      break;
18278
0
    case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18279
0
    case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18280
0
      ArgForMatchingRetType = 2;
18281
0
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
18282
0
      break;
18283
0
    case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18284
0
    case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18285
0
      ArgForMatchingRetType = 4;
18286
0
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
18287
0
      break;
18288
0
    case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18289
0
    case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18290
0
      ArgForMatchingRetType = 4;
18291
0
      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
18292
0
      break;
18293
0
    }
18294
18295
0
    SmallVector<Value *, 6> Args;
18296
0
    for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18297
0
      Args.push_back(EmitScalarExpr(E->getArg(i)));
18298
18299
0
    Function *F = CGM.getIntrinsic(BuiltinWMMAOp,
18300
0
                                   {Args[ArgForMatchingRetType]->getType()});
18301
18302
0
    return Builder.CreateCall(F, Args);
18303
0
  }
18304
18305
  // amdgcn workitem
18306
0
  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
18307
0
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
18308
0
  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
18309
0
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
18310
0
  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
18311
0
    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
18312
18313
  // amdgcn workgroup size
18314
0
  case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
18315
0
    return EmitAMDGPUWorkGroupSize(*this, 0);
18316
0
  case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
18317
0
    return EmitAMDGPUWorkGroupSize(*this, 1);
18318
0
  case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
18319
0
    return EmitAMDGPUWorkGroupSize(*this, 2);
18320
18321
  // amdgcn grid size
18322
0
  case AMDGPU::BI__builtin_amdgcn_grid_size_x:
18323
0
    return EmitAMDGPUGridSize(*this, 0);
18324
0
  case AMDGPU::BI__builtin_amdgcn_grid_size_y:
18325
0
    return EmitAMDGPUGridSize(*this, 1);
18326
0
  case AMDGPU::BI__builtin_amdgcn_grid_size_z:
18327
0
    return EmitAMDGPUGridSize(*this, 2);
18328
18329
  // r600 intrinsics
18330
0
  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
18331
0
  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
18332
0
    return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
18333
0
  case AMDGPU::BI__builtin_r600_read_tidig_x:
18334
0
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
18335
0
  case AMDGPU::BI__builtin_r600_read_tidig_y:
18336
0
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
18337
0
  case AMDGPU::BI__builtin_r600_read_tidig_z:
18338
0
    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
18339
0
  case AMDGPU::BI__builtin_amdgcn_alignbit: {
18340
0
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18341
0
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18342
0
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18343
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
18344
0
    return Builder.CreateCall(F, { Src0, Src1, Src2 });
18345
0
  }
18346
0
  case AMDGPU::BI__builtin_amdgcn_fence: {
18347
0
    ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
18348
0
                            EmitScalarExpr(E->getArg(1)), AO, SSID);
18349
0
    return Builder.CreateFence(AO, SSID);
18350
0
  }
18351
0
  case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
18352
0
  case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
18353
0
  case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
18354
0
  case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
18355
0
    llvm::AtomicRMWInst::BinOp BinOp;
18356
0
    switch (BuiltinID) {
18357
0
    case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
18358
0
    case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
18359
0
      BinOp = llvm::AtomicRMWInst::UIncWrap;
18360
0
      break;
18361
0
    case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
18362
0
    case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
18363
0
      BinOp = llvm::AtomicRMWInst::UDecWrap;
18364
0
      break;
18365
0
    }
18366
18367
0
    Address Ptr = CheckAtomicAlignment(*this, E);
18368
0
    Value *Val = EmitScalarExpr(E->getArg(1));
18369
18370
0
    ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
18371
0
                            EmitScalarExpr(E->getArg(3)), AO, SSID);
18372
18373
0
    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
18374
0
    bool Volatile =
18375
0
        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
18376
18377
0
    llvm::AtomicRMWInst *RMW =
18378
0
        Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
18379
0
    if (Volatile)
18380
0
      RMW->setVolatile(true);
18381
0
    return RMW;
18382
0
  }
18383
0
  case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
18384
0
  case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
18385
0
    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
18386
0
    llvm::Type *ResultType = ConvertType(E->getType());
18387
    // s_sendmsg_rtn is mangled using return type only.
18388
0
    Function *F =
18389
0
        CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
18390
0
    return Builder.CreateCall(F, {Arg});
18391
0
  }
18392
0
  default:
18393
0
    return nullptr;
18394
0
  }
18395
0
}
18396
18397
/// Handle a SystemZ function in which the final argument is a pointer
18398
/// to an int that receives the post-instruction CC value.  At the LLVM level
18399
/// this is represented as a function that returns a {result, cc} pair.
18400
static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
18401
                                         unsigned IntrinsicID,
18402
0
                                         const CallExpr *E) {
18403
0
  unsigned NumArgs = E->getNumArgs() - 1;
18404
0
  SmallVector<Value *, 8> Args(NumArgs);
18405
0
  for (unsigned I = 0; I < NumArgs; ++I)
18406
0
    Args[I] = CGF.EmitScalarExpr(E->getArg(I));
18407
0
  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
18408
0
  Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
18409
0
  Value *Call = CGF.Builder.CreateCall(F, Args);
18410
0
  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
18411
0
  CGF.Builder.CreateStore(CC, CCPtr);
18412
0
  return CGF.Builder.CreateExtractValue(Call, 0);
18413
0
}
18414
18415
Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
18416
0
                                               const CallExpr *E) {
18417
0
  switch (BuiltinID) {
18418
0
  case SystemZ::BI__builtin_tbegin: {
18419
0
    Value *TDB = EmitScalarExpr(E->getArg(0));
18420
0
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
18421
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
18422
0
    return Builder.CreateCall(F, {TDB, Control});
18423
0
  }
18424
0
  case SystemZ::BI__builtin_tbegin_nofloat: {
18425
0
    Value *TDB = EmitScalarExpr(E->getArg(0));
18426
0
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
18427
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
18428
0
    return Builder.CreateCall(F, {TDB, Control});
18429
0
  }
18430
0
  case SystemZ::BI__builtin_tbeginc: {
18431
0
    Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
18432
0
    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
18433
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
18434
0
    return Builder.CreateCall(F, {TDB, Control});
18435
0
  }
18436
0
  case SystemZ::BI__builtin_tabort: {
18437
0
    Value *Data = EmitScalarExpr(E->getArg(0));
18438
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
18439
0
    return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
18440
0
  }
18441
0
  case SystemZ::BI__builtin_non_tx_store: {
18442
0
    Value *Address = EmitScalarExpr(E->getArg(0));
18443
0
    Value *Data = EmitScalarExpr(E->getArg(1));
18444
0
    Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
18445
0
    return Builder.CreateCall(F, {Data, Address});
18446
0
  }
18447
18448
  // Vector builtins.  Note that most vector builtins are mapped automatically
18449
  // to target-specific LLVM intrinsics.  The ones handled specially here can
18450
  // be represented via standard LLVM IR, which is preferable to enable common
18451
  // LLVM optimizations.
18452
18453
0
  case SystemZ::BI__builtin_s390_vpopctb:
18454
0
  case SystemZ::BI__builtin_s390_vpopcth:
18455
0
  case SystemZ::BI__builtin_s390_vpopctf:
18456
0
  case SystemZ::BI__builtin_s390_vpopctg: {
18457
0
    llvm::Type *ResultType = ConvertType(E->getType());
18458
0
    Value *X = EmitScalarExpr(E->getArg(0));
18459
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
18460
0
    return Builder.CreateCall(F, X);
18461
0
  }
18462
18463
0
  case SystemZ::BI__builtin_s390_vclzb:
18464
0
  case SystemZ::BI__builtin_s390_vclzh:
18465
0
  case SystemZ::BI__builtin_s390_vclzf:
18466
0
  case SystemZ::BI__builtin_s390_vclzg: {
18467
0
    llvm::Type *ResultType = ConvertType(E->getType());
18468
0
    Value *X = EmitScalarExpr(E->getArg(0));
18469
0
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18470
0
    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18471
0
    return Builder.CreateCall(F, {X, Undef});
18472
0
  }
18473
18474
0
  case SystemZ::BI__builtin_s390_vctzb:
18475
0
  case SystemZ::BI__builtin_s390_vctzh:
18476
0
  case SystemZ::BI__builtin_s390_vctzf:
18477
0
  case SystemZ::BI__builtin_s390_vctzg: {
18478
0
    llvm::Type *ResultType = ConvertType(E->getType());
18479
0
    Value *X = EmitScalarExpr(E->getArg(0));
18480
0
    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18481
0
    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18482
0
    return Builder.CreateCall(F, {X, Undef});
18483
0
  }
18484
18485
0
  case SystemZ::BI__builtin_s390_verllb:
18486
0
  case SystemZ::BI__builtin_s390_verllh:
18487
0
  case SystemZ::BI__builtin_s390_verllf:
18488
0
  case SystemZ::BI__builtin_s390_verllg: {
18489
0
    llvm::Type *ResultType = ConvertType(E->getType());
18490
0
    llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18491
0
    llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
18492
    // Splat scalar rotate amount to vector type.
18493
0
    unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
18494
0
    Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
18495
0
    Amt = Builder.CreateVectorSplat(NumElts, Amt);
18496
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
18497
0
    return Builder.CreateCall(F, { Src, Src, Amt });
18498
0
  }
18499
18500
0
  case SystemZ::BI__builtin_s390_verllvb:
18501
0
  case SystemZ::BI__builtin_s390_verllvh:
18502
0
  case SystemZ::BI__builtin_s390_verllvf:
18503
0
  case SystemZ::BI__builtin_s390_verllvg: {
18504
0
    llvm::Type *ResultType = ConvertType(E->getType());
18505
0
    llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18506
0
    llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
18507
0
    Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
18508
0
    return Builder.CreateCall(F, { Src, Src, Amt });
18509
0
  }
18510
18511
0
  case SystemZ::BI__builtin_s390_vfsqsb:
18512
0
  case SystemZ::BI__builtin_s390_vfsqdb: {
18513
0
    llvm::Type *ResultType = ConvertType(E->getType());
18514
0
    Value *X = EmitScalarExpr(E->getArg(0));
18515
0
    if (Builder.getIsFPConstrained()) {
18516
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
18517
0
      return Builder.CreateConstrainedFPCall(F, { X });
18518
0
    } else {
18519
0
      Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18520
0
      return Builder.CreateCall(F, X);
18521
0
    }
18522
0
  }
18523
0
  case SystemZ::BI__builtin_s390_vfmasb:
18524
0
  case SystemZ::BI__builtin_s390_vfmadb: {
18525
0
    llvm::Type *ResultType = ConvertType(E->getType());
18526
0
    Value *X = EmitScalarExpr(E->getArg(0));
18527
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18528
0
    Value *Z = EmitScalarExpr(E->getArg(2));
18529
0
    if (Builder.getIsFPConstrained()) {
18530
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18531
0
      return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18532
0
    } else {
18533
0
      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18534
0
      return Builder.CreateCall(F, {X, Y, Z});
18535
0
    }
18536
0
  }
18537
0
  case SystemZ::BI__builtin_s390_vfmssb:
18538
0
  case SystemZ::BI__builtin_s390_vfmsdb: {
18539
0
    llvm::Type *ResultType = ConvertType(E->getType());
18540
0
    Value *X = EmitScalarExpr(E->getArg(0));
18541
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18542
0
    Value *Z = EmitScalarExpr(E->getArg(2));
18543
0
    if (Builder.getIsFPConstrained()) {
18544
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18545
0
      return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18546
0
    } else {
18547
0
      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18548
0
      return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18549
0
    }
18550
0
  }
18551
0
  case SystemZ::BI__builtin_s390_vfnmasb:
18552
0
  case SystemZ::BI__builtin_s390_vfnmadb: {
18553
0
    llvm::Type *ResultType = ConvertType(E->getType());
18554
0
    Value *X = EmitScalarExpr(E->getArg(0));
18555
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18556
0
    Value *Z = EmitScalarExpr(E->getArg(2));
18557
0
    if (Builder.getIsFPConstrained()) {
18558
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18559
0
      return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y,  Z}), "neg");
18560
0
    } else {
18561
0
      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18562
0
      return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18563
0
    }
18564
0
  }
18565
0
  case SystemZ::BI__builtin_s390_vfnmssb:
18566
0
  case SystemZ::BI__builtin_s390_vfnmsdb: {
18567
0
    llvm::Type *ResultType = ConvertType(E->getType());
18568
0
    Value *X = EmitScalarExpr(E->getArg(0));
18569
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18570
0
    Value *Z = EmitScalarExpr(E->getArg(2));
18571
0
    if (Builder.getIsFPConstrained()) {
18572
0
      Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18573
0
      Value *NegZ = Builder.CreateFNeg(Z, "sub");
18574
0
      return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
18575
0
    } else {
18576
0
      Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18577
0
      Value *NegZ = Builder.CreateFNeg(Z, "neg");
18578
0
      return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
18579
0
    }
18580
0
  }
18581
0
  case SystemZ::BI__builtin_s390_vflpsb:
18582
0
  case SystemZ::BI__builtin_s390_vflpdb: {
18583
0
    llvm::Type *ResultType = ConvertType(E->getType());
18584
0
    Value *X = EmitScalarExpr(E->getArg(0));
18585
0
    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18586
0
    return Builder.CreateCall(F, X);
18587
0
  }
18588
0
  case SystemZ::BI__builtin_s390_vflnsb:
18589
0
  case SystemZ::BI__builtin_s390_vflndb: {
18590
0
    llvm::Type *ResultType = ConvertType(E->getType());
18591
0
    Value *X = EmitScalarExpr(E->getArg(0));
18592
0
    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18593
0
    return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
18594
0
  }
18595
0
  case SystemZ::BI__builtin_s390_vfisb:
18596
0
  case SystemZ::BI__builtin_s390_vfidb: {
18597
0
    llvm::Type *ResultType = ConvertType(E->getType());
18598
0
    Value *X = EmitScalarExpr(E->getArg(0));
18599
    // Constant-fold the M4 and M5 mask arguments.
18600
0
    llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
18601
0
    llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
18602
    // Check whether this instance can be represented via a LLVM standard
18603
    // intrinsic.  We only support some combinations of M4 and M5.
18604
0
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
18605
0
    Intrinsic::ID CI;
18606
0
    switch (M4.getZExtValue()) {
18607
0
    default: break;
18608
0
    case 0:  // IEEE-inexact exception allowed
18609
0
      switch (M5.getZExtValue()) {
18610
0
      default: break;
18611
0
      case 0: ID = Intrinsic::rint;
18612
0
              CI = Intrinsic::experimental_constrained_rint; break;
18613
0
      }
18614
0
      break;
18615
0
    case 4:  // IEEE-inexact exception suppressed
18616
0
      switch (M5.getZExtValue()) {
18617
0
      default: break;
18618
0
      case 0: ID = Intrinsic::nearbyint;
18619
0
              CI = Intrinsic::experimental_constrained_nearbyint; break;
18620
0
      case 1: ID = Intrinsic::round;
18621
0
              CI = Intrinsic::experimental_constrained_round; break;
18622
0
      case 5: ID = Intrinsic::trunc;
18623
0
              CI = Intrinsic::experimental_constrained_trunc; break;
18624
0
      case 6: ID = Intrinsic::ceil;
18625
0
              CI = Intrinsic::experimental_constrained_ceil; break;
18626
0
      case 7: ID = Intrinsic::floor;
18627
0
              CI = Intrinsic::experimental_constrained_floor; break;
18628
0
      }
18629
0
      break;
18630
0
    }
18631
0
    if (ID != Intrinsic::not_intrinsic) {
18632
0
      if (Builder.getIsFPConstrained()) {
18633
0
        Function *F = CGM.getIntrinsic(CI, ResultType);
18634
0
        return Builder.CreateConstrainedFPCall(F, X);
18635
0
      } else {
18636
0
        Function *F = CGM.getIntrinsic(ID, ResultType);
18637
0
        return Builder.CreateCall(F, X);
18638
0
      }
18639
0
    }
18640
0
    switch (BuiltinID) { // FIXME: constrained version?
18641
0
      case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
18642
0
      case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
18643
0
      default: llvm_unreachable("Unknown BuiltinID");
18644
0
    }
18645
0
    Function *F = CGM.getIntrinsic(ID);
18646
0
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
18647
0
    Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
18648
0
    return Builder.CreateCall(F, {X, M4Value, M5Value});
18649
0
  }
18650
0
  case SystemZ::BI__builtin_s390_vfmaxsb:
18651
0
  case SystemZ::BI__builtin_s390_vfmaxdb: {
18652
0
    llvm::Type *ResultType = ConvertType(E->getType());
18653
0
    Value *X = EmitScalarExpr(E->getArg(0));
18654
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18655
    // Constant-fold the M4 mask argument.
18656
0
    llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
18657
    // Check whether this instance can be represented via a LLVM standard
18658
    // intrinsic.  We only support some values of M4.
18659
0
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
18660
0
    Intrinsic::ID CI;
18661
0
    switch (M4.getZExtValue()) {
18662
0
    default: break;
18663
0
    case 4: ID = Intrinsic::maxnum;
18664
0
            CI = Intrinsic::experimental_constrained_maxnum; break;
18665
0
    }
18666
0
    if (ID != Intrinsic::not_intrinsic) {
18667
0
      if (Builder.getIsFPConstrained()) {
18668
0
        Function *F = CGM.getIntrinsic(CI, ResultType);
18669
0
        return Builder.CreateConstrainedFPCall(F, {X, Y});
18670
0
      } else {
18671
0
        Function *F = CGM.getIntrinsic(ID, ResultType);
18672
0
        return Builder.CreateCall(F, {X, Y});
18673
0
      }
18674
0
    }
18675
0
    switch (BuiltinID) {
18676
0
      case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
18677
0
      case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
18678
0
      default: llvm_unreachable("Unknown BuiltinID");
18679
0
    }
18680
0
    Function *F = CGM.getIntrinsic(ID);
18681
0
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
18682
0
    return Builder.CreateCall(F, {X, Y, M4Value});
18683
0
  }
18684
0
  case SystemZ::BI__builtin_s390_vfminsb:
18685
0
  case SystemZ::BI__builtin_s390_vfmindb: {
18686
0
    llvm::Type *ResultType = ConvertType(E->getType());
18687
0
    Value *X = EmitScalarExpr(E->getArg(0));
18688
0
    Value *Y = EmitScalarExpr(E->getArg(1));
18689
    // Constant-fold the M4 mask argument.
18690
0
    llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
18691
    // Check whether this instance can be represented via a LLVM standard
18692
    // intrinsic.  We only support some values of M4.
18693
0
    Intrinsic::ID ID = Intrinsic::not_intrinsic;
18694
0
    Intrinsic::ID CI;
18695
0
    switch (M4.getZExtValue()) {
18696
0
    default: break;
18697
0
    case 4: ID = Intrinsic::minnum;
18698
0
            CI = Intrinsic::experimental_constrained_minnum; break;
18699
0
    }
18700
0
    if (ID != Intrinsic::not_intrinsic) {
18701
0
      if (Builder.getIsFPConstrained()) {
18702
0
        Function *F = CGM.getIntrinsic(CI, ResultType);
18703
0
        return Builder.CreateConstrainedFPCall(F, {X, Y});
18704
0
      } else {
18705
0
        Function *F = CGM.getIntrinsic(ID, ResultType);
18706
0
        return Builder.CreateCall(F, {X, Y});
18707
0
      }
18708
0
    }
18709
0
    switch (BuiltinID) {
18710
0
      case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
18711
0
      case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
18712
0
      default: llvm_unreachable("Unknown BuiltinID");
18713
0
    }
18714
0
    Function *F = CGM.getIntrinsic(ID);
18715
0
    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
18716
0
    return Builder.CreateCall(F, {X, Y, M4Value});
18717
0
  }
18718
18719
0
  case SystemZ::BI__builtin_s390_vlbrh:
18720
0
  case SystemZ::BI__builtin_s390_vlbrf:
18721
0
  case SystemZ::BI__builtin_s390_vlbrg: {
18722
0
    llvm::Type *ResultType = ConvertType(E->getType());
18723
0
    Value *X = EmitScalarExpr(E->getArg(0));
18724
0
    Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
18725
0
    return Builder.CreateCall(F, X);
18726
0
  }
18727
18728
  // Vector intrinsics that output the post-instruction CC value.
18729
18730
0
#define INTRINSIC_WITH_CC(NAME) \
18731
0
    case SystemZ::BI__builtin_##NAME: \
18732
0
      return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
18733
18734
0
  INTRINSIC_WITH_CC(s390_vpkshs);
18735
0
  INTRINSIC_WITH_CC(s390_vpksfs);
18736
0
  INTRINSIC_WITH_CC(s390_vpksgs);
18737
18738
0
  INTRINSIC_WITH_CC(s390_vpklshs);
18739
0
  INTRINSIC_WITH_CC(s390_vpklsfs);
18740
0
  INTRINSIC_WITH_CC(s390_vpklsgs);
18741
18742
0
  INTRINSIC_WITH_CC(s390_vceqbs);
18743
0
  INTRINSIC_WITH_CC(s390_vceqhs);
18744
0
  INTRINSIC_WITH_CC(s390_vceqfs);
18745
0
  INTRINSIC_WITH_CC(s390_vceqgs);
18746
18747
0
  INTRINSIC_WITH_CC(s390_vchbs);
18748
0
  INTRINSIC_WITH_CC(s390_vchhs);
18749
0
  INTRINSIC_WITH_CC(s390_vchfs);
18750
0
  INTRINSIC_WITH_CC(s390_vchgs);
18751
18752
0
  INTRINSIC_WITH_CC(s390_vchlbs);
18753
0
  INTRINSIC_WITH_CC(s390_vchlhs);
18754
0
  INTRINSIC_WITH_CC(s390_vchlfs);
18755
0
  INTRINSIC_WITH_CC(s390_vchlgs);
18756
18757
0
  INTRINSIC_WITH_CC(s390_vfaebs);
18758
0
  INTRINSIC_WITH_CC(s390_vfaehs);
18759
0
  INTRINSIC_WITH_CC(s390_vfaefs);
18760
18761
0
  INTRINSIC_WITH_CC(s390_vfaezbs);
18762
0
  INTRINSIC_WITH_CC(s390_vfaezhs);
18763
0
  INTRINSIC_WITH_CC(s390_vfaezfs);
18764
18765
0
  INTRINSIC_WITH_CC(s390_vfeebs);
18766
0
  INTRINSIC_WITH_CC(s390_vfeehs);
18767
0
  INTRINSIC_WITH_CC(s390_vfeefs);
18768
18769
0
  INTRINSIC_WITH_CC(s390_vfeezbs);
18770
0
  INTRINSIC_WITH_CC(s390_vfeezhs);
18771
0
  INTRINSIC_WITH_CC(s390_vfeezfs);
18772
18773
0
  INTRINSIC_WITH_CC(s390_vfenebs);
18774
0
  INTRINSIC_WITH_CC(s390_vfenehs);
18775
0
  INTRINSIC_WITH_CC(s390_vfenefs);
18776
18777
0
  INTRINSIC_WITH_CC(s390_vfenezbs);
18778
0
  INTRINSIC_WITH_CC(s390_vfenezhs);
18779
0
  INTRINSIC_WITH_CC(s390_vfenezfs);
18780
18781
0
  INTRINSIC_WITH_CC(s390_vistrbs);
18782
0
  INTRINSIC_WITH_CC(s390_vistrhs);
18783
0
  INTRINSIC_WITH_CC(s390_vistrfs);
18784
18785
0
  INTRINSIC_WITH_CC(s390_vstrcbs);
18786
0
  INTRINSIC_WITH_CC(s390_vstrchs);
18787
0
  INTRINSIC_WITH_CC(s390_vstrcfs);
18788
18789
0
  INTRINSIC_WITH_CC(s390_vstrczbs);
18790
0
  INTRINSIC_WITH_CC(s390_vstrczhs);
18791
0
  INTRINSIC_WITH_CC(s390_vstrczfs);
18792
18793
0
  INTRINSIC_WITH_CC(s390_vfcesbs);
18794
0
  INTRINSIC_WITH_CC(s390_vfcedbs);
18795
0
  INTRINSIC_WITH_CC(s390_vfchsbs);
18796
0
  INTRINSIC_WITH_CC(s390_vfchdbs);
18797
0
  INTRINSIC_WITH_CC(s390_vfchesbs);
18798
0
  INTRINSIC_WITH_CC(s390_vfchedbs);
18799
18800
0
  INTRINSIC_WITH_CC(s390_vftcisb);
18801
0
  INTRINSIC_WITH_CC(s390_vftcidb);
18802
18803
0
  INTRINSIC_WITH_CC(s390_vstrsb);
18804
0
  INTRINSIC_WITH_CC(s390_vstrsh);
18805
0
  INTRINSIC_WITH_CC(s390_vstrsf);
18806
18807
0
  INTRINSIC_WITH_CC(s390_vstrszb);
18808
0
  INTRINSIC_WITH_CC(s390_vstrszh);
18809
0
  INTRINSIC_WITH_CC(s390_vstrszf);
18810
18811
0
#undef INTRINSIC_WITH_CC
18812
18813
0
  default:
18814
0
    return nullptr;
18815
0
  }
18816
0
}
18817
18818
namespace {
18819
// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
18820
struct NVPTXMmaLdstInfo {
18821
  unsigned NumResults;  // Number of elements to load/store
18822
  // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
18823
  unsigned IID_col;
18824
  unsigned IID_row;
18825
};
18826
18827
#define MMA_INTR(geom_op_type, layout) \
18828
0
  Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
18829
#define MMA_LDST(n, geom_op_type)                                              \
18830
0
  { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
18831
18832
0
static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
18833
0
  switch (BuiltinID) {
18834
  // FP MMA loads
18835
0
  case NVPTX::BI__hmma_m16n16k16_ld_a:
18836
0
    return MMA_LDST(8, m16n16k16_load_a_f16);
18837
0
  case NVPTX::BI__hmma_m16n16k16_ld_b:
18838
0
    return MMA_LDST(8, m16n16k16_load_b_f16);
18839
0
  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
18840
0
    return MMA_LDST(4, m16n16k16_load_c_f16);
18841
0
  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
18842
0
    return MMA_LDST(8, m16n16k16_load_c_f32);
18843
0
  case NVPTX::BI__hmma_m32n8k16_ld_a:
18844
0
    return MMA_LDST(8, m32n8k16_load_a_f16);
18845
0
  case NVPTX::BI__hmma_m32n8k16_ld_b:
18846
0
    return MMA_LDST(8, m32n8k16_load_b_f16);
18847
0
  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
18848
0
    return MMA_LDST(4, m32n8k16_load_c_f16);
18849
0
  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
18850
0
    return MMA_LDST(8, m32n8k16_load_c_f32);
18851
0
  case NVPTX::BI__hmma_m8n32k16_ld_a:
18852
0
    return MMA_LDST(8, m8n32k16_load_a_f16);
18853
0
  case NVPTX::BI__hmma_m8n32k16_ld_b:
18854
0
    return MMA_LDST(8, m8n32k16_load_b_f16);
18855
0
  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
18856
0
    return MMA_LDST(4, m8n32k16_load_c_f16);
18857
0
  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
18858
0
    return MMA_LDST(8, m8n32k16_load_c_f32);
18859
18860
  // Integer MMA loads
18861
0
  case NVPTX::BI__imma_m16n16k16_ld_a_s8:
18862
0
    return MMA_LDST(2, m16n16k16_load_a_s8);
18863
0
  case NVPTX::BI__imma_m16n16k16_ld_a_u8:
18864
0
    return MMA_LDST(2, m16n16k16_load_a_u8);
18865
0
  case NVPTX::BI__imma_m16n16k16_ld_b_s8:
18866
0
    return MMA_LDST(2, m16n16k16_load_b_s8);
18867
0
  case NVPTX::BI__imma_m16n16k16_ld_b_u8:
18868
0
    return MMA_LDST(2, m16n16k16_load_b_u8);
18869
0
  case NVPTX::BI__imma_m16n16k16_ld_c:
18870
0
    return MMA_LDST(8, m16n16k16_load_c_s32);
18871
0
  case NVPTX::BI__imma_m32n8k16_ld_a_s8:
18872
0
    return MMA_LDST(4, m32n8k16_load_a_s8);
18873
0
  case NVPTX::BI__imma_m32n8k16_ld_a_u8:
18874
0
    return MMA_LDST(4, m32n8k16_load_a_u8);
18875
0
  case NVPTX::BI__imma_m32n8k16_ld_b_s8:
18876
0
    return MMA_LDST(1, m32n8k16_load_b_s8);
18877
0
  case NVPTX::BI__imma_m32n8k16_ld_b_u8:
18878
0
    return MMA_LDST(1, m32n8k16_load_b_u8);
18879
0
  case NVPTX::BI__imma_m32n8k16_ld_c:
18880
0
    return MMA_LDST(8, m32n8k16_load_c_s32);
18881
0
  case NVPTX::BI__imma_m8n32k16_ld_a_s8:
18882
0
    return MMA_LDST(1, m8n32k16_load_a_s8);
18883
0
  case NVPTX::BI__imma_m8n32k16_ld_a_u8:
18884
0
    return MMA_LDST(1, m8n32k16_load_a_u8);
18885
0
  case NVPTX::BI__imma_m8n32k16_ld_b_s8:
18886
0
    return MMA_LDST(4, m8n32k16_load_b_s8);
18887
0
  case NVPTX::BI__imma_m8n32k16_ld_b_u8:
18888
0
    return MMA_LDST(4, m8n32k16_load_b_u8);
18889
0
  case NVPTX::BI__imma_m8n32k16_ld_c:
18890
0
    return MMA_LDST(8, m8n32k16_load_c_s32);
18891
18892
  // Sub-integer MMA loads.
18893
  // Only row/col layout is supported by A/B fragments.
18894
0
  case NVPTX::BI__imma_m8n8k32_ld_a_s4:
18895
0
    return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
18896
0
  case NVPTX::BI__imma_m8n8k32_ld_a_u4:
18897
0
    return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
18898
0
  case NVPTX::BI__imma_m8n8k32_ld_b_s4:
18899
0
    return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
18900
0
  case NVPTX::BI__imma_m8n8k32_ld_b_u4:
18901
0
    return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
18902
0
  case NVPTX::BI__imma_m8n8k32_ld_c:
18903
0
    return MMA_LDST(2, m8n8k32_load_c_s32);
18904
0
  case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
18905
0
    return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
18906
0
  case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
18907
0
    return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
18908
0
  case NVPTX::BI__bmma_m8n8k128_ld_c:
18909
0
    return MMA_LDST(2, m8n8k128_load_c_s32);
18910
18911
  // Double MMA loads
18912
0
  case NVPTX::BI__dmma_m8n8k4_ld_a:
18913
0
    return MMA_LDST(1, m8n8k4_load_a_f64);
18914
0
  case NVPTX::BI__dmma_m8n8k4_ld_b:
18915
0
    return MMA_LDST(1, m8n8k4_load_b_f64);
18916
0
  case NVPTX::BI__dmma_m8n8k4_ld_c:
18917
0
    return MMA_LDST(2, m8n8k4_load_c_f64);
18918
18919
  // Alternate float MMA loads
18920
0
  case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
18921
0
    return MMA_LDST(4, m16n16k16_load_a_bf16);
18922
0
  case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
18923
0
    return MMA_LDST(4, m16n16k16_load_b_bf16);
18924
0
  case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
18925
0
    return MMA_LDST(2, m8n32k16_load_a_bf16);
18926
0
  case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
18927
0
    return MMA_LDST(8, m8n32k16_load_b_bf16);
18928
0
  case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
18929
0
    return MMA_LDST(8, m32n8k16_load_a_bf16);
18930
0
  case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
18931
0
    return MMA_LDST(2, m32n8k16_load_b_bf16);
18932
0
  case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
18933
0
    return MMA_LDST(4, m16n16k8_load_a_tf32);
18934
0
  case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
18935
0
    return MMA_LDST(4, m16n16k8_load_b_tf32);
18936
0
  case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
18937
0
    return MMA_LDST(8, m16n16k8_load_c_f32);
18938
18939
  // NOTE: We need to follow inconsitent naming scheme used by NVCC.  Unlike
18940
  // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
18941
  // use fragment C for both loads and stores.
18942
  // FP MMA stores.
18943
0
  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
18944
0
    return MMA_LDST(4, m16n16k16_store_d_f16);
18945
0
  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
18946
0
    return MMA_LDST(8, m16n16k16_store_d_f32);
18947
0
  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
18948
0
    return MMA_LDST(4, m32n8k16_store_d_f16);
18949
0
  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
18950
0
    return MMA_LDST(8, m32n8k16_store_d_f32);
18951
0
  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
18952
0
    return MMA_LDST(4, m8n32k16_store_d_f16);
18953
0
  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
18954
0
    return MMA_LDST(8, m8n32k16_store_d_f32);
18955
18956
  // Integer and sub-integer MMA stores.
18957
  // Another naming quirk. Unlike other MMA builtins that use PTX types in the
18958
  // name, integer loads/stores use LLVM's i32.
18959
0
  case NVPTX::BI__imma_m16n16k16_st_c_i32:
18960
0
    return MMA_LDST(8, m16n16k16_store_d_s32);
18961
0
  case NVPTX::BI__imma_m32n8k16_st_c_i32:
18962
0
    return MMA_LDST(8, m32n8k16_store_d_s32);
18963
0
  case NVPTX::BI__imma_m8n32k16_st_c_i32:
18964
0
    return MMA_LDST(8, m8n32k16_store_d_s32);
18965
0
  case NVPTX::BI__imma_m8n8k32_st_c_i32:
18966
0
    return MMA_LDST(2, m8n8k32_store_d_s32);
18967
0
  case NVPTX::BI__bmma_m8n8k128_st_c_i32:
18968
0
    return MMA_LDST(2, m8n8k128_store_d_s32);
18969
18970
  // Double MMA store
18971
0
  case NVPTX::BI__dmma_m8n8k4_st_c_f64:
18972
0
    return MMA_LDST(2, m8n8k4_store_d_f64);
18973
18974
  // Alternate float MMA store
18975
0
  case NVPTX::BI__mma_m16n16k8_st_c_f32:
18976
0
    return MMA_LDST(8, m16n16k8_store_d_f32);
18977
18978
0
  default:
18979
0
    llvm_unreachable("Unknown MMA builtin");
18980
0
  }
18981
0
}
18982
#undef MMA_LDST
18983
#undef MMA_INTR
18984
18985
18986
struct NVPTXMmaInfo {
18987
  unsigned NumEltsA;
18988
  unsigned NumEltsB;
18989
  unsigned NumEltsC;
18990
  unsigned NumEltsD;
18991
18992
  // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
18993
  // over 'col' for layout. The index of non-satf variants is expected to match
18994
  // the undocumented layout constants used by CUDA's mma.hpp.
18995
  std::array<unsigned, 8> Variants;
18996
18997
0
  unsigned getMMAIntrinsic(int Layout, bool Satf) {
18998
0
    unsigned Index = Layout + 4 * Satf;
18999
0
    if (Index >= Variants.size())
19000
0
      return 0;
19001
0
    return Variants[Index];
19002
0
  }
19003
};
19004
19005
  // Returns an intrinsic that matches Layout and Satf for valid combinations of
19006
  // Layout and Satf, 0 otherwise.
19007
0
static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
19008
  // clang-format off
19009
0
#define MMA_VARIANTS(geom, type)                                    \
19010
0
      Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type,             \
19011
0
      Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type,             \
19012
0
      Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type,             \
19013
0
      Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
19014
0
#define MMA_SATF_VARIANTS(geom, type)                               \
19015
0
      MMA_VARIANTS(geom, type),                                     \
19016
0
      Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
19017
0
      Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19018
0
      Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
19019
0
      Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
19020
// Sub-integer MMA only supports row.col layout.
19021
0
#define MMA_VARIANTS_I4(geom, type) \
19022
0
      0, \
19023
0
      Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type,             \
19024
0
      0, \
19025
0
      0, \
19026
0
      0, \
19027
0
      Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19028
0
      0, \
19029
0
      0
19030
// b1 MMA does not support .satfinite.
19031
0
#define MMA_VARIANTS_B1_XOR(geom, type) \
19032
0
      0, \
19033
0
      Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type,             \
19034
0
      0, \
19035
0
      0, \
19036
0
      0, \
19037
0
      0, \
19038
0
      0, \
19039
0
      0
19040
0
#define MMA_VARIANTS_B1_AND(geom, type) \
19041
0
      0, \
19042
0
      Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type,             \
19043
0
      0, \
19044
0
      0, \
19045
0
      0, \
19046
0
      0, \
19047
0
      0, \
19048
0
      0
19049
  // clang-format on
19050
0
  switch (BuiltinID) {
19051
  // FP MMA
19052
  // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
19053
  // NumEltsN of return value are ordered as A,B,C,D.
19054
0
  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19055
0
    return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
19056
0
  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19057
0
    return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
19058
0
  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19059
0
    return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
19060
0
  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19061
0
    return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
19062
0
  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19063
0
    return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
19064
0
  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19065
0
    return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
19066
0
  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19067
0
    return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
19068
0
  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19069
0
    return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
19070
0
  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19071
0
    return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
19072
0
  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19073
0
    return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
19074
0
  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19075
0
    return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
19076
0
  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19077
0
    return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
19078
19079
  // Integer MMA
19080
0
  case NVPTX::BI__imma_m16n16k16_mma_s8:
19081
0
    return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
19082
0
  case NVPTX::BI__imma_m16n16k16_mma_u8:
19083
0
    return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
19084
0
  case NVPTX::BI__imma_m32n8k16_mma_s8:
19085
0
    return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
19086
0
  case NVPTX::BI__imma_m32n8k16_mma_u8:
19087
0
    return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
19088
0
  case NVPTX::BI__imma_m8n32k16_mma_s8:
19089
0
    return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
19090
0
  case NVPTX::BI__imma_m8n32k16_mma_u8:
19091
0
    return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
19092
19093
  // Sub-integer MMA
19094
0
  case NVPTX::BI__imma_m8n8k32_mma_s4:
19095
0
    return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
19096
0
  case NVPTX::BI__imma_m8n8k32_mma_u4:
19097
0
    return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
19098
0
  case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
19099
0
    return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
19100
0
  case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
19101
0
    return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
19102
19103
  // Double MMA
19104
0
  case NVPTX::BI__dmma_m8n8k4_mma_f64:
19105
0
    return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
19106
19107
  // Alternate FP MMA
19108
0
  case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
19109
0
    return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
19110
0
  case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
19111
0
    return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
19112
0
  case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
19113
0
    return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
19114
0
  case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
19115
0
    return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
19116
0
  default:
19117
0
    llvm_unreachable("Unexpected builtin ID.");
19118
0
  }
19119
0
#undef MMA_VARIANTS
19120
0
#undef MMA_SATF_VARIANTS
19121
0
#undef MMA_VARIANTS_I4
19122
0
#undef MMA_VARIANTS_B1_AND
19123
0
#undef MMA_VARIANTS_B1_XOR
19124
0
}
19125
19126
static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
19127
0
                         const CallExpr *E) {
19128
0
  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19129
0
  QualType ArgType = E->getArg(0)->getType();
19130
0
  clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
19131
0
  llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
19132
0
  return CGF.Builder.CreateCall(
19133
0
      CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19134
0
      {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
19135
0
}
19136
19137
static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
19138
0
                               const CallExpr *E) {
19139
0
  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19140
0
  llvm::Type *ElemTy =
19141
0
      CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
19142
0
  return CGF.Builder.CreateCall(
19143
0
      CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19144
0
      {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
19145
0
}
19146
19147
static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
19148
                          CodeGenFunction &CGF, const CallExpr *E,
19149
0
                          int SrcSize) {
19150
0
  return E->getNumArgs() == 3
19151
0
             ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
19152
0
                                      {CGF.EmitScalarExpr(E->getArg(0)),
19153
0
                                       CGF.EmitScalarExpr(E->getArg(1)),
19154
0
                                       CGF.EmitScalarExpr(E->getArg(2))})
19155
0
             : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
19156
0
                                      {CGF.EmitScalarExpr(E->getArg(0)),
19157
0
                                       CGF.EmitScalarExpr(E->getArg(1))});
19158
0
}
19159
19160
static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
19161
0
                           const CallExpr *E, CodeGenFunction &CGF) {
19162
0
  auto &C = CGF.CGM.getContext();
19163
0
  if (!(C.getLangOpts().NativeHalfType ||
19164
0
        !C.getTargetInfo().useFP16ConversionIntrinsics())) {
19165
0
    CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
19166
0
                                       " requires native half type support.");
19167
0
    return nullptr;
19168
0
  }
19169
19170
0
  if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
19171
0
      IntrinsicID == Intrinsic::nvvm_ldu_global_f)
19172
0
    return MakeLdgLdu(IntrinsicID, CGF, E);
19173
19174
0
  SmallVector<Value *, 16> Args;
19175
0
  auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
19176
0
  auto *FTy = F->getFunctionType();
19177
0
  unsigned ICEArguments = 0;
19178
0
  ASTContext::GetBuiltinTypeError Error;
19179
0
  C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
19180
0
  assert(Error == ASTContext::GE_None && "Should not codegen an error");
19181
0
  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
19182
0
    assert((ICEArguments & (1 << i)) == 0);
19183
0
    auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
19184
0
    auto *PTy = FTy->getParamType(i);
19185
0
    if (PTy != ArgValue->getType())
19186
0
      ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
19187
0
    Args.push_back(ArgValue);
19188
0
  }
19189
19190
0
  return CGF.Builder.CreateCall(F, Args);
19191
0
}
19192
} // namespace
19193
19194
Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
19195
0
                                             const CallExpr *E) {
19196
0
  switch (BuiltinID) {
19197
0
  case NVPTX::BI__nvvm_atom_add_gen_i:
19198
0
  case NVPTX::BI__nvvm_atom_add_gen_l:
19199
0
  case NVPTX::BI__nvvm_atom_add_gen_ll:
19200
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
19201
19202
0
  case NVPTX::BI__nvvm_atom_sub_gen_i:
19203
0
  case NVPTX::BI__nvvm_atom_sub_gen_l:
19204
0
  case NVPTX::BI__nvvm_atom_sub_gen_ll:
19205
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
19206
19207
0
  case NVPTX::BI__nvvm_atom_and_gen_i:
19208
0
  case NVPTX::BI__nvvm_atom_and_gen_l:
19209
0
  case NVPTX::BI__nvvm_atom_and_gen_ll:
19210
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
19211
19212
0
  case NVPTX::BI__nvvm_atom_or_gen_i:
19213
0
  case NVPTX::BI__nvvm_atom_or_gen_l:
19214
0
  case NVPTX::BI__nvvm_atom_or_gen_ll:
19215
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
19216
19217
0
  case NVPTX::BI__nvvm_atom_xor_gen_i:
19218
0
  case NVPTX::BI__nvvm_atom_xor_gen_l:
19219
0
  case NVPTX::BI__nvvm_atom_xor_gen_ll:
19220
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
19221
19222
0
  case NVPTX::BI__nvvm_atom_xchg_gen_i:
19223
0
  case NVPTX::BI__nvvm_atom_xchg_gen_l:
19224
0
  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
19225
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
19226
19227
0
  case NVPTX::BI__nvvm_atom_max_gen_i:
19228
0
  case NVPTX::BI__nvvm_atom_max_gen_l:
19229
0
  case NVPTX::BI__nvvm_atom_max_gen_ll:
19230
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
19231
19232
0
  case NVPTX::BI__nvvm_atom_max_gen_ui:
19233
0
  case NVPTX::BI__nvvm_atom_max_gen_ul:
19234
0
  case NVPTX::BI__nvvm_atom_max_gen_ull:
19235
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
19236
19237
0
  case NVPTX::BI__nvvm_atom_min_gen_i:
19238
0
  case NVPTX::BI__nvvm_atom_min_gen_l:
19239
0
  case NVPTX::BI__nvvm_atom_min_gen_ll:
19240
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
19241
19242
0
  case NVPTX::BI__nvvm_atom_min_gen_ui:
19243
0
  case NVPTX::BI__nvvm_atom_min_gen_ul:
19244
0
  case NVPTX::BI__nvvm_atom_min_gen_ull:
19245
0
    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
19246
19247
0
  case NVPTX::BI__nvvm_atom_cas_gen_i:
19248
0
  case NVPTX::BI__nvvm_atom_cas_gen_l:
19249
0
  case NVPTX::BI__nvvm_atom_cas_gen_ll:
19250
    // __nvvm_atom_cas_gen_* should return the old value rather than the
19251
    // success flag.
19252
0
    return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
19253
19254
0
  case NVPTX::BI__nvvm_atom_add_gen_f:
19255
0
  case NVPTX::BI__nvvm_atom_add_gen_d: {
19256
0
    Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
19257
0
    Value *Val = EmitScalarExpr(E->getArg(1));
19258
19259
0
    return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
19260
0
                                   AtomicOrdering::SequentiallyConsistent);
19261
0
  }
19262
19263
0
  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
19264
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19265
0
    Value *Val = EmitScalarExpr(E->getArg(1));
19266
0
    Function *FnALI32 =
19267
0
        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
19268
0
    return Builder.CreateCall(FnALI32, {Ptr, Val});
19269
0
  }
19270
19271
0
  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
19272
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19273
0
    Value *Val = EmitScalarExpr(E->getArg(1));
19274
0
    Function *FnALD32 =
19275
0
        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
19276
0
    return Builder.CreateCall(FnALD32, {Ptr, Val});
19277
0
  }
19278
19279
0
  case NVPTX::BI__nvvm_ldg_c:
19280
0
  case NVPTX::BI__nvvm_ldg_sc:
19281
0
  case NVPTX::BI__nvvm_ldg_c2:
19282
0
  case NVPTX::BI__nvvm_ldg_sc2:
19283
0
  case NVPTX::BI__nvvm_ldg_c4:
19284
0
  case NVPTX::BI__nvvm_ldg_sc4:
19285
0
  case NVPTX::BI__nvvm_ldg_s:
19286
0
  case NVPTX::BI__nvvm_ldg_s2:
19287
0
  case NVPTX::BI__nvvm_ldg_s4:
19288
0
  case NVPTX::BI__nvvm_ldg_i:
19289
0
  case NVPTX::BI__nvvm_ldg_i2:
19290
0
  case NVPTX::BI__nvvm_ldg_i4:
19291
0
  case NVPTX::BI__nvvm_ldg_l:
19292
0
  case NVPTX::BI__nvvm_ldg_l2:
19293
0
  case NVPTX::BI__nvvm_ldg_ll:
19294
0
  case NVPTX::BI__nvvm_ldg_ll2:
19295
0
  case NVPTX::BI__nvvm_ldg_uc:
19296
0
  case NVPTX::BI__nvvm_ldg_uc2:
19297
0
  case NVPTX::BI__nvvm_ldg_uc4:
19298
0
  case NVPTX::BI__nvvm_ldg_us:
19299
0
  case NVPTX::BI__nvvm_ldg_us2:
19300
0
  case NVPTX::BI__nvvm_ldg_us4:
19301
0
  case NVPTX::BI__nvvm_ldg_ui:
19302
0
  case NVPTX::BI__nvvm_ldg_ui2:
19303
0
  case NVPTX::BI__nvvm_ldg_ui4:
19304
0
  case NVPTX::BI__nvvm_ldg_ul:
19305
0
  case NVPTX::BI__nvvm_ldg_ul2:
19306
0
  case NVPTX::BI__nvvm_ldg_ull:
19307
0
  case NVPTX::BI__nvvm_ldg_ull2:
19308
    // PTX Interoperability section 2.2: "For a vector with an even number of
19309
    // elements, its alignment is set to number of elements times the alignment
19310
    // of its member: n*alignof(t)."
19311
0
    return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
19312
0
  case NVPTX::BI__nvvm_ldg_f:
19313
0
  case NVPTX::BI__nvvm_ldg_f2:
19314
0
  case NVPTX::BI__nvvm_ldg_f4:
19315
0
  case NVPTX::BI__nvvm_ldg_d:
19316
0
  case NVPTX::BI__nvvm_ldg_d2:
19317
0
    return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
19318
19319
0
  case NVPTX::BI__nvvm_ldu_c:
19320
0
  case NVPTX::BI__nvvm_ldu_sc:
19321
0
  case NVPTX::BI__nvvm_ldu_c2:
19322
0
  case NVPTX::BI__nvvm_ldu_sc2:
19323
0
  case NVPTX::BI__nvvm_ldu_c4:
19324
0
  case NVPTX::BI__nvvm_ldu_sc4:
19325
0
  case NVPTX::BI__nvvm_ldu_s:
19326
0
  case NVPTX::BI__nvvm_ldu_s2:
19327
0
  case NVPTX::BI__nvvm_ldu_s4:
19328
0
  case NVPTX::BI__nvvm_ldu_i:
19329
0
  case NVPTX::BI__nvvm_ldu_i2:
19330
0
  case NVPTX::BI__nvvm_ldu_i4:
19331
0
  case NVPTX::BI__nvvm_ldu_l:
19332
0
  case NVPTX::BI__nvvm_ldu_l2:
19333
0
  case NVPTX::BI__nvvm_ldu_ll:
19334
0
  case NVPTX::BI__nvvm_ldu_ll2:
19335
0
  case NVPTX::BI__nvvm_ldu_uc:
19336
0
  case NVPTX::BI__nvvm_ldu_uc2:
19337
0
  case NVPTX::BI__nvvm_ldu_uc4:
19338
0
  case NVPTX::BI__nvvm_ldu_us:
19339
0
  case NVPTX::BI__nvvm_ldu_us2:
19340
0
  case NVPTX::BI__nvvm_ldu_us4:
19341
0
  case NVPTX::BI__nvvm_ldu_ui:
19342
0
  case NVPTX::BI__nvvm_ldu_ui2:
19343
0
  case NVPTX::BI__nvvm_ldu_ui4:
19344
0
  case NVPTX::BI__nvvm_ldu_ul:
19345
0
  case NVPTX::BI__nvvm_ldu_ul2:
19346
0
  case NVPTX::BI__nvvm_ldu_ull:
19347
0
  case NVPTX::BI__nvvm_ldu_ull2:
19348
0
    return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
19349
0
  case NVPTX::BI__nvvm_ldu_f:
19350
0
  case NVPTX::BI__nvvm_ldu_f2:
19351
0
  case NVPTX::BI__nvvm_ldu_f4:
19352
0
  case NVPTX::BI__nvvm_ldu_d:
19353
0
  case NVPTX::BI__nvvm_ldu_d2:
19354
0
    return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
19355
19356
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_i:
19357
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_l:
19358
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
19359
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
19360
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_i:
19361
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_l:
19362
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
19363
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
19364
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_f:
19365
0
  case NVPTX::BI__nvvm_atom_cta_add_gen_d:
19366
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
19367
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_f:
19368
0
  case NVPTX::BI__nvvm_atom_sys_add_gen_d:
19369
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
19370
0
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
19371
0
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
19372
0
  case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
19373
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
19374
0
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
19375
0
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
19376
0
  case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
19377
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
19378
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_i:
19379
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
19380
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_l:
19381
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
19382
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
19383
0
  case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
19384
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
19385
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_i:
19386
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
19387
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_l:
19388
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
19389
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
19390
0
  case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
19391
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
19392
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_i:
19393
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
19394
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_l:
19395
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
19396
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
19397
0
  case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
19398
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
19399
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_i:
19400
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
19401
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_l:
19402
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
19403
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
19404
0
  case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
19405
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
19406
0
  case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
19407
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
19408
0
  case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
19409
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
19410
0
  case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
19411
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
19412
0
  case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
19413
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
19414
0
  case NVPTX::BI__nvvm_atom_cta_and_gen_i:
19415
0
  case NVPTX::BI__nvvm_atom_cta_and_gen_l:
19416
0
  case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
19417
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
19418
0
  case NVPTX::BI__nvvm_atom_sys_and_gen_i:
19419
0
  case NVPTX::BI__nvvm_atom_sys_and_gen_l:
19420
0
  case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
19421
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
19422
0
  case NVPTX::BI__nvvm_atom_cta_or_gen_i:
19423
0
  case NVPTX::BI__nvvm_atom_cta_or_gen_l:
19424
0
  case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
19425
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
19426
0
  case NVPTX::BI__nvvm_atom_sys_or_gen_i:
19427
0
  case NVPTX::BI__nvvm_atom_sys_or_gen_l:
19428
0
  case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
19429
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
19430
0
  case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
19431
0
  case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
19432
0
  case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
19433
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
19434
0
  case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
19435
0
  case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
19436
0
  case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
19437
0
    return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
19438
0
  case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
19439
0
  case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
19440
0
  case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
19441
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19442
0
    llvm::Type *ElemTy =
19443
0
        ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
19444
0
    return Builder.CreateCall(
19445
0
        CGM.getIntrinsic(
19446
0
            Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
19447
0
        {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
19448
0
  }
19449
0
  case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
19450
0
  case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
19451
0
  case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
19452
0
    Value *Ptr = EmitScalarExpr(E->getArg(0));
19453
0
    llvm::Type *ElemTy =
19454
0
        ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
19455
0
    return Builder.CreateCall(
19456
0
        CGM.getIntrinsic(
19457
0
            Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
19458
0
        {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
19459
0
  }
19460
0
  case NVPTX::BI__nvvm_match_all_sync_i32p:
19461
0
  case NVPTX::BI__nvvm_match_all_sync_i64p: {
19462
0
    Value *Mask = EmitScalarExpr(E->getArg(0));
19463
0
    Value *Val = EmitScalarExpr(E->getArg(1));
19464
0
    Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
19465
0
    Value *ResultPair = Builder.CreateCall(
19466
0
        CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
19467
0
                             ? Intrinsic::nvvm_match_all_sync_i32p
19468
0
                             : Intrinsic::nvvm_match_all_sync_i64p),
19469
0
        {Mask, Val});
19470
0
    Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
19471
0
                                     PredOutPtr.getElementType());
19472
0
    Builder.CreateStore(Pred, PredOutPtr);
19473
0
    return Builder.CreateExtractValue(ResultPair, 0);
19474
0
  }
19475
19476
  // FP MMA loads
19477
0
  case NVPTX::BI__hmma_m16n16k16_ld_a:
19478
0
  case NVPTX::BI__hmma_m16n16k16_ld_b:
19479
0
  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19480
0
  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19481
0
  case NVPTX::BI__hmma_m32n8k16_ld_a:
19482
0
  case NVPTX::BI__hmma_m32n8k16_ld_b:
19483
0
  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19484
0
  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19485
0
  case NVPTX::BI__hmma_m8n32k16_ld_a:
19486
0
  case NVPTX::BI__hmma_m8n32k16_ld_b:
19487
0
  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19488
0
  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19489
  // Integer MMA loads.
19490
0
  case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19491
0
  case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19492
0
  case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19493
0
  case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19494
0
  case NVPTX::BI__imma_m16n16k16_ld_c:
19495
0
  case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19496
0
  case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19497
0
  case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19498
0
  case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19499
0
  case NVPTX::BI__imma_m32n8k16_ld_c:
19500
0
  case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19501
0
  case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19502
0
  case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19503
0
  case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19504
0
  case NVPTX::BI__imma_m8n32k16_ld_c:
19505
  // Sub-integer MMA loads.
19506
0
  case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19507
0
  case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19508
0
  case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19509
0
  case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19510
0
  case NVPTX::BI__imma_m8n8k32_ld_c:
19511
0
  case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19512
0
  case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19513
0
  case NVPTX::BI__bmma_m8n8k128_ld_c:
19514
  // Double MMA loads.
19515
0
  case NVPTX::BI__dmma_m8n8k4_ld_a:
19516
0
  case NVPTX::BI__dmma_m8n8k4_ld_b:
19517
0
  case NVPTX::BI__dmma_m8n8k4_ld_c:
19518
  // Alternate float MMA loads.
19519
0
  case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19520
0
  case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19521
0
  case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19522
0
  case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19523
0
  case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19524
0
  case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19525
0
  case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19526
0
  case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19527
0
  case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
19528
0
    Address Dst = EmitPointerWithAlignment(E->getArg(0));
19529
0
    Value *Src = EmitScalarExpr(E->getArg(1));
19530
0
    Value *Ldm = EmitScalarExpr(E->getArg(2));
19531
0
    std::optional<llvm::APSInt> isColMajorArg =
19532
0
        E->getArg(3)->getIntegerConstantExpr(getContext());
19533
0
    if (!isColMajorArg)
19534
0
      return nullptr;
19535
0
    bool isColMajor = isColMajorArg->getSExtValue();
19536
0
    NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
19537
0
    unsigned IID = isColMajor ? II.IID_col : II.IID_row;
19538
0
    if (IID == 0)
19539
0
      return nullptr;
19540
19541
0
    Value *Result =
19542
0
        Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
19543
19544
    // Save returned values.
19545
0
    assert(II.NumResults);
19546
0
    if (II.NumResults == 1) {
19547
0
      Builder.CreateAlignedStore(Result, Dst.getPointer(),
19548
0
                                 CharUnits::fromQuantity(4));
19549
0
    } else {
19550
0
      for (unsigned i = 0; i < II.NumResults; ++i) {
19551
0
        Builder.CreateAlignedStore(
19552
0
            Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
19553
0
                                  Dst.getElementType()),
19554
0
            Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
19555
0
                              llvm::ConstantInt::get(IntTy, i)),
19556
0
            CharUnits::fromQuantity(4));
19557
0
      }
19558
0
    }
19559
0
    return Result;
19560
0
  }
19561
19562
0
  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19563
0
  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19564
0
  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19565
0
  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19566
0
  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19567
0
  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19568
0
  case NVPTX::BI__imma_m16n16k16_st_c_i32:
19569
0
  case NVPTX::BI__imma_m32n8k16_st_c_i32:
19570
0
  case NVPTX::BI__imma_m8n32k16_st_c_i32:
19571
0
  case NVPTX::BI__imma_m8n8k32_st_c_i32:
19572
0
  case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19573
0
  case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19574
0
  case NVPTX::BI__mma_m16n16k8_st_c_f32: {
19575
0
    Value *Dst = EmitScalarExpr(E->getArg(0));
19576
0
    Address Src = EmitPointerWithAlignment(E->getArg(1));
19577
0
    Value *Ldm = EmitScalarExpr(E->getArg(2));
19578
0
    std::optional<llvm::APSInt> isColMajorArg =
19579
0
        E->getArg(3)->getIntegerConstantExpr(getContext());
19580
0
    if (!isColMajorArg)
19581
0
      return nullptr;
19582
0
    bool isColMajor = isColMajorArg->getSExtValue();
19583
0
    NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
19584
0
    unsigned IID = isColMajor ? II.IID_col : II.IID_row;
19585
0
    if (IID == 0)
19586
0
      return nullptr;
19587
0
    Function *Intrinsic =
19588
0
        CGM.getIntrinsic(IID, Dst->getType());
19589
0
    llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
19590
0
    SmallVector<Value *, 10> Values = {Dst};
19591
0
    for (unsigned i = 0; i < II.NumResults; ++i) {
19592
0
      Value *V = Builder.CreateAlignedLoad(
19593
0
          Src.getElementType(),
19594
0
          Builder.CreateGEP(Src.getElementType(), Src.getPointer(),
19595
0
                            llvm::ConstantInt::get(IntTy, i)),
19596
0
          CharUnits::fromQuantity(4));
19597
0
      Values.push_back(Builder.CreateBitCast(V, ParamType));
19598
0
    }
19599
0
    Values.push_back(Ldm);
19600
0
    Value *Result = Builder.CreateCall(Intrinsic, Values);
19601
0
    return Result;
19602
0
  }
19603
19604
  // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
19605
  // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
19606
0
  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19607
0
  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19608
0
  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19609
0
  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19610
0
  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19611
0
  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19612
0
  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19613
0
  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19614
0
  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19615
0
  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19616
0
  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19617
0
  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19618
0
  case NVPTX::BI__imma_m16n16k16_mma_s8:
19619
0
  case NVPTX::BI__imma_m16n16k16_mma_u8:
19620
0
  case NVPTX::BI__imma_m32n8k16_mma_s8:
19621
0
  case NVPTX::BI__imma_m32n8k16_mma_u8:
19622
0
  case NVPTX::BI__imma_m8n32k16_mma_s8:
19623
0
  case NVPTX::BI__imma_m8n32k16_mma_u8:
19624
0
  case NVPTX::BI__imma_m8n8k32_mma_s4:
19625
0
  case NVPTX::BI__imma_m8n8k32_mma_u4:
19626
0
  case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
19627
0
  case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
19628
0
  case NVPTX::BI__dmma_m8n8k4_mma_f64:
19629
0
  case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
19630
0
  case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
19631
0
  case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
19632
0
  case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
19633
0
    Address Dst = EmitPointerWithAlignment(E->getArg(0));
19634
0
    Address SrcA = EmitPointerWithAlignment(E->getArg(1));
19635
0
    Address SrcB = EmitPointerWithAlignment(E->getArg(2));
19636
0
    Address SrcC = EmitPointerWithAlignment(E->getArg(3));
19637
0
    std::optional<llvm::APSInt> LayoutArg =
19638
0
        E->getArg(4)->getIntegerConstantExpr(getContext());
19639
0
    if (!LayoutArg)
19640
0
      return nullptr;
19641
0
    int Layout = LayoutArg->getSExtValue();
19642
0
    if (Layout < 0 || Layout > 3)
19643
0
      return nullptr;
19644
0
    llvm::APSInt SatfArg;
19645
0
    if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
19646
0
        BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
19647
0
      SatfArg = 0;  // .b1 does not have satf argument.
19648
0
    else if (std::optional<llvm::APSInt> OptSatfArg =
19649
0
                 E->getArg(5)->getIntegerConstantExpr(getContext()))
19650
0
      SatfArg = *OptSatfArg;
19651
0
    else
19652
0
      return nullptr;
19653
0
    bool Satf = SatfArg.getSExtValue();
19654
0
    NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
19655
0
    unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
19656
0
    if (IID == 0)  // Unsupported combination of Layout/Satf.
19657
0
      return nullptr;
19658
19659
0
    SmallVector<Value *, 24> Values;
19660
0
    Function *Intrinsic = CGM.getIntrinsic(IID);
19661
0
    llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
19662
    // Load A
19663
0
    for (unsigned i = 0; i < MI.NumEltsA; ++i) {
19664
0
      Value *V = Builder.CreateAlignedLoad(
19665
0
          SrcA.getElementType(),
19666
0
          Builder.CreateGEP(SrcA.getElementType(), SrcA.getPointer(),
19667
0
                            llvm::ConstantInt::get(IntTy, i)),
19668
0
          CharUnits::fromQuantity(4));
19669
0
      Values.push_back(Builder.CreateBitCast(V, AType));
19670
0
    }
19671
    // Load B
19672
0
    llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
19673
0
    for (unsigned i = 0; i < MI.NumEltsB; ++i) {
19674
0
      Value *V = Builder.CreateAlignedLoad(
19675
0
          SrcB.getElementType(),
19676
0
          Builder.CreateGEP(SrcB.getElementType(), SrcB.getPointer(),
19677
0
                            llvm::ConstantInt::get(IntTy, i)),
19678
0
          CharUnits::fromQuantity(4));
19679
0
      Values.push_back(Builder.CreateBitCast(V, BType));
19680
0
    }
19681
    // Load C
19682
0
    llvm::Type *CType =
19683
0
        Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
19684
0
    for (unsigned i = 0; i < MI.NumEltsC; ++i) {
19685
0
      Value *V = Builder.CreateAlignedLoad(
19686
0
          SrcC.getElementType(),
19687
0
          Builder.CreateGEP(SrcC.getElementType(), SrcC.getPointer(),
19688
0
                            llvm::ConstantInt::get(IntTy, i)),
19689
0
          CharUnits::fromQuantity(4));
19690
0
      Values.push_back(Builder.CreateBitCast(V, CType));
19691
0
    }
19692
0
    Value *Result = Builder.CreateCall(Intrinsic, Values);
19693
0
    llvm::Type *DType = Dst.getElementType();
19694
0
    for (unsigned i = 0; i < MI.NumEltsD; ++i)
19695
0
      Builder.CreateAlignedStore(
19696
0
          Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
19697
0
          Builder.CreateGEP(Dst.getElementType(), Dst.getPointer(),
19698
0
                            llvm::ConstantInt::get(IntTy, i)),
19699
0
          CharUnits::fromQuantity(4));
19700
0
    return Result;
19701
0
  }
19702
  // The following builtins require half type support
19703
0
  case NVPTX::BI__nvvm_ex2_approx_f16:
19704
0
    return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
19705
0
  case NVPTX::BI__nvvm_ex2_approx_f16x2:
19706
0
    return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
19707
0
  case NVPTX::BI__nvvm_ff2f16x2_rn:
19708
0
    return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
19709
0
  case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
19710
0
    return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
19711
0
  case NVPTX::BI__nvvm_ff2f16x2_rz:
19712
0
    return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
19713
0
  case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
19714
0
    return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
19715
0
  case NVPTX::BI__nvvm_fma_rn_f16:
19716
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
19717
0
  case NVPTX::BI__nvvm_fma_rn_f16x2:
19718
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
19719
0
  case NVPTX::BI__nvvm_fma_rn_ftz_f16:
19720
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
19721
0
  case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
19722
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
19723
0
  case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
19724
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
19725
0
                        *this);
19726
0
  case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
19727
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
19728
0
                        *this);
19729
0
  case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
19730
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
19731
0
                        *this);
19732
0
  case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
19733
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
19734
0
                        *this);
19735
0
  case NVPTX::BI__nvvm_fma_rn_relu_f16:
19736
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
19737
0
  case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
19738
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
19739
0
  case NVPTX::BI__nvvm_fma_rn_sat_f16:
19740
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
19741
0
  case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
19742
0
    return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
19743
0
  case NVPTX::BI__nvvm_fmax_f16:
19744
0
    return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
19745
0
  case NVPTX::BI__nvvm_fmax_f16x2:
19746
0
    return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
19747
0
  case NVPTX::BI__nvvm_fmax_ftz_f16:
19748
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
19749
0
  case NVPTX::BI__nvvm_fmax_ftz_f16x2:
19750
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
19751
0
  case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
19752
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
19753
0
  case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
19754
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
19755
0
                        *this);
19756
0
  case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
19757
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
19758
0
                        E, *this);
19759
0
  case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
19760
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
19761
0
                        BuiltinID, E, *this);
19762
0
  case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
19763
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
19764
0
                        *this);
19765
0
  case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
19766
0
    return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
19767
0
                        E, *this);
19768
0
  case NVPTX::BI__nvvm_fmax_nan_f16:
19769
0
    return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
19770
0
  case NVPTX::BI__nvvm_fmax_nan_f16x2:
19771
0
    return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
19772
0
  case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
19773
0
    return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
19774
0
                        *this);
19775
0
  case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
19776
0
    return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
19777
0
                        E, *this);
19778
0
  case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
19779
0
    return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
19780
0
                        *this);
19781
0
  case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
19782
0
    return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
19783
0
                        *this);
19784
0
  case NVPTX::BI__nvvm_fmin_f16:
19785
0
    return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
19786
0
  case NVPTX::BI__nvvm_fmin_f16x2:
19787
0
    return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
19788
0
  case NVPTX::BI__nvvm_fmin_ftz_f16:
19789
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
19790
0
  case NVPTX::BI__nvvm_fmin_ftz_f16x2:
19791
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
19792
0
  case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
19793
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
19794
0
  case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
19795
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
19796
0
                        *this);
19797
0
  case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
19798
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
19799
0
                        E, *this);
19800
0
  case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
19801
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
19802
0
                        BuiltinID, E, *this);
19803
0
  case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
19804
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
19805
0
                        *this);
19806
0
  case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
19807
0
    return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
19808
0
                        E, *this);
19809
0
  case NVPTX::BI__nvvm_fmin_nan_f16:
19810
0
    return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
19811
0
  case NVPTX::BI__nvvm_fmin_nan_f16x2:
19812
0
    return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
19813
0
  case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
19814
0
    return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
19815
0
                        *this);
19816
0
  case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
19817
0
    return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
19818
0
                        E, *this);
19819
0
  case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
19820
0
    return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
19821
0
                        *this);
19822
0
  case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
19823
0
    return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
19824
0
                        *this);
19825
0
  case NVPTX::BI__nvvm_ldg_h:
19826
0
    return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
19827
0
  case NVPTX::BI__nvvm_ldg_h2:
19828
0
    return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
19829
0
  case NVPTX::BI__nvvm_ldu_h:
19830
0
    return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
19831
0
  case NVPTX::BI__nvvm_ldu_h2: {
19832
0
    return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
19833
0
  }
19834
0
  case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
19835
0
    return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
19836
0
                       Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
19837
0
                       4);
19838
0
  case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
19839
0
    return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
19840
0
                       Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
19841
0
                       8);
19842
0
  case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
19843
0
    return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
19844
0
                       Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
19845
0
                       16);
19846
0
  case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
19847
0
    return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
19848
0
                       Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
19849
0
                       16);
19850
0
  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
19851
0
    return Builder.CreateCall(
19852
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
19853
0
  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
19854
0
    return Builder.CreateCall(
19855
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
19856
0
  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
19857
0
    return Builder.CreateCall(
19858
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
19859
0
  case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
19860
0
    return Builder.CreateCall(
19861
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
19862
0
  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
19863
0
    return Builder.CreateCall(
19864
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
19865
0
  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
19866
0
    return Builder.CreateCall(
19867
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
19868
0
  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
19869
0
    return Builder.CreateCall(
19870
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
19871
0
  case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
19872
0
    return Builder.CreateCall(
19873
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
19874
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
19875
0
    return Builder.CreateCall(
19876
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
19877
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
19878
0
    return Builder.CreateCall(
19879
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
19880
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
19881
0
    return Builder.CreateCall(
19882
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
19883
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
19884
0
    return Builder.CreateCall(
19885
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
19886
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
19887
0
    return Builder.CreateCall(
19888
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
19889
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
19890
0
    return Builder.CreateCall(
19891
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
19892
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
19893
0
    return Builder.CreateCall(
19894
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
19895
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
19896
0
    return Builder.CreateCall(
19897
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
19898
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
19899
0
    return Builder.CreateCall(
19900
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
19901
0
  case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
19902
0
    return Builder.CreateCall(
19903
0
        CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
19904
0
  case NVPTX::BI__nvvm_is_explicit_cluster:
19905
0
    return Builder.CreateCall(
19906
0
        CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
19907
0
  case NVPTX::BI__nvvm_isspacep_shared_cluster:
19908
0
    return Builder.CreateCall(
19909
0
        CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
19910
0
        EmitScalarExpr(E->getArg(0)));
19911
0
  case NVPTX::BI__nvvm_mapa:
19912
0
    return Builder.CreateCall(
19913
0
        CGM.getIntrinsic(Intrinsic::nvvm_mapa),
19914
0
        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
19915
0
  case NVPTX::BI__nvvm_mapa_shared_cluster:
19916
0
    return Builder.CreateCall(
19917
0
        CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
19918
0
        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
19919
0
  case NVPTX::BI__nvvm_getctarank:
19920
0
    return Builder.CreateCall(
19921
0
        CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
19922
0
        EmitScalarExpr(E->getArg(0)));
19923
0
  case NVPTX::BI__nvvm_getctarank_shared_cluster:
19924
0
    return Builder.CreateCall(
19925
0
        CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
19926
0
        EmitScalarExpr(E->getArg(0)));
19927
0
  case NVPTX::BI__nvvm_barrier_cluster_arrive:
19928
0
    return Builder.CreateCall(
19929
0
        CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
19930
0
  case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
19931
0
    return Builder.CreateCall(
19932
0
        CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
19933
0
  case NVPTX::BI__nvvm_barrier_cluster_wait:
19934
0
    return Builder.CreateCall(
19935
0
        CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
19936
0
  case NVPTX::BI__nvvm_fence_sc_cluster:
19937
0
    return Builder.CreateCall(
19938
0
        CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
19939
0
  default:
19940
0
    return nullptr;
19941
0
  }
19942
0
}
19943
19944
namespace {
19945
struct BuiltinAlignArgs {
19946
  llvm::Value *Src = nullptr;
19947
  llvm::Type *SrcType = nullptr;
19948
  llvm::Value *Alignment = nullptr;
19949
  llvm::Value *Mask = nullptr;
19950
  llvm::IntegerType *IntType = nullptr;
19951
19952
0
  BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
19953
0
    QualType AstType = E->getArg(0)->getType();
19954
0
    if (AstType->isArrayType())
19955
0
      Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).getPointer();
19956
0
    else
19957
0
      Src = CGF.EmitScalarExpr(E->getArg(0));
19958
0
    SrcType = Src->getType();
19959
0
    if (SrcType->isPointerTy()) {
19960
0
      IntType = IntegerType::get(
19961
0
          CGF.getLLVMContext(),
19962
0
          CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
19963
0
    } else {
19964
0
      assert(SrcType->isIntegerTy());
19965
0
      IntType = cast<llvm::IntegerType>(SrcType);
19966
0
    }
19967
0
    Alignment = CGF.EmitScalarExpr(E->getArg(1));
19968
0
    Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
19969
0
    auto *One = llvm::ConstantInt::get(IntType, 1);
19970
0
    Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
19971
0
  }
19972
};
19973
} // namespace
19974
19975
/// Generate (x & (y-1)) == 0.
19976
0
RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
19977
0
  BuiltinAlignArgs Args(E, *this);
19978
0
  llvm::Value *SrcAddress = Args.Src;
19979
0
  if (Args.SrcType->isPointerTy())
19980
0
    SrcAddress =
19981
0
        Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
19982
0
  return RValue::get(Builder.CreateICmpEQ(
19983
0
      Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
19984
0
      llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
19985
0
}
19986
19987
/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
19988
/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
19989
/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
19990
0
RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
19991
0
  BuiltinAlignArgs Args(E, *this);
19992
0
  llvm::Value *SrcForMask = Args.Src;
19993
0
  if (AlignUp) {
19994
    // When aligning up we have to first add the mask to ensure we go over the
19995
    // next alignment value and then align down to the next valid multiple.
19996
    // By adding the mask, we ensure that align_up on an already aligned
19997
    // value will not change the value.
19998
0
    if (Args.Src->getType()->isPointerTy()) {
19999
0
      if (getLangOpts().isSignedOverflowDefined())
20000
0
        SrcForMask =
20001
0
            Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
20002
0
      else
20003
0
        SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
20004
0
                                            /*SignedIndices=*/true,
20005
0
                                            /*isSubtraction=*/false,
20006
0
                                            E->getExprLoc(), "over_boundary");
20007
0
    } else {
20008
0
      SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
20009
0
    }
20010
0
  }
20011
  // Invert the mask to only clear the lower bits.
20012
0
  llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
20013
0
  llvm::Value *Result = nullptr;
20014
0
  if (Args.Src->getType()->isPointerTy()) {
20015
0
    Result = Builder.CreateIntrinsic(
20016
0
        Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
20017
0
        {SrcForMask, InvertedMask}, nullptr, "aligned_result");
20018
0
  } else {
20019
0
    Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
20020
0
  }
20021
0
  assert(Result->getType() == Args.SrcType);
20022
0
  return RValue::get(Result);
20023
0
}
20024
20025
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
20026
0
                                                   const CallExpr *E) {
20027
0
  switch (BuiltinID) {
20028
0
  case WebAssembly::BI__builtin_wasm_memory_size: {
20029
0
    llvm::Type *ResultType = ConvertType(E->getType());
20030
0
    Value *I = EmitScalarExpr(E->getArg(0));
20031
0
    Function *Callee =
20032
0
        CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
20033
0
    return Builder.CreateCall(Callee, I);
20034
0
  }
20035
0
  case WebAssembly::BI__builtin_wasm_memory_grow: {
20036
0
    llvm::Type *ResultType = ConvertType(E->getType());
20037
0
    Value *Args[] = {EmitScalarExpr(E->getArg(0)),
20038
0
                     EmitScalarExpr(E->getArg(1))};
20039
0
    Function *Callee =
20040
0
        CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
20041
0
    return Builder.CreateCall(Callee, Args);
20042
0
  }
20043
0
  case WebAssembly::BI__builtin_wasm_tls_size: {
20044
0
    llvm::Type *ResultType = ConvertType(E->getType());
20045
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
20046
0
    return Builder.CreateCall(Callee);
20047
0
  }
20048
0
  case WebAssembly::BI__builtin_wasm_tls_align: {
20049
0
    llvm::Type *ResultType = ConvertType(E->getType());
20050
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
20051
0
    return Builder.CreateCall(Callee);
20052
0
  }
20053
0
  case WebAssembly::BI__builtin_wasm_tls_base: {
20054
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
20055
0
    return Builder.CreateCall(Callee);
20056
0
  }
20057
0
  case WebAssembly::BI__builtin_wasm_throw: {
20058
0
    Value *Tag = EmitScalarExpr(E->getArg(0));
20059
0
    Value *Obj = EmitScalarExpr(E->getArg(1));
20060
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
20061
0
    return Builder.CreateCall(Callee, {Tag, Obj});
20062
0
  }
20063
0
  case WebAssembly::BI__builtin_wasm_rethrow: {
20064
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
20065
0
    return Builder.CreateCall(Callee);
20066
0
  }
20067
0
  case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
20068
0
    Value *Addr = EmitScalarExpr(E->getArg(0));
20069
0
    Value *Expected = EmitScalarExpr(E->getArg(1));
20070
0
    Value *Timeout = EmitScalarExpr(E->getArg(2));
20071
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
20072
0
    return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20073
0
  }
20074
0
  case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
20075
0
    Value *Addr = EmitScalarExpr(E->getArg(0));
20076
0
    Value *Expected = EmitScalarExpr(E->getArg(1));
20077
0
    Value *Timeout = EmitScalarExpr(E->getArg(2));
20078
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
20079
0
    return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20080
0
  }
20081
0
  case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
20082
0
    Value *Addr = EmitScalarExpr(E->getArg(0));
20083
0
    Value *Count = EmitScalarExpr(E->getArg(1));
20084
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
20085
0
    return Builder.CreateCall(Callee, {Addr, Count});
20086
0
  }
20087
0
  case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
20088
0
  case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
20089
0
  case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
20090
0
  case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
20091
0
    Value *Src = EmitScalarExpr(E->getArg(0));
20092
0
    llvm::Type *ResT = ConvertType(E->getType());
20093
0
    Function *Callee =
20094
0
        CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
20095
0
    return Builder.CreateCall(Callee, {Src});
20096
0
  }
20097
0
  case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
20098
0
  case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
20099
0
  case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
20100
0
  case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
20101
0
    Value *Src = EmitScalarExpr(E->getArg(0));
20102
0
    llvm::Type *ResT = ConvertType(E->getType());
20103
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
20104
0
                                        {ResT, Src->getType()});
20105
0
    return Builder.CreateCall(Callee, {Src});
20106
0
  }
20107
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
20108
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
20109
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
20110
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
20111
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
20112
0
    Value *Src = EmitScalarExpr(E->getArg(0));
20113
0
    llvm::Type *ResT = ConvertType(E->getType());
20114
0
    Function *Callee =
20115
0
        CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
20116
0
    return Builder.CreateCall(Callee, {Src});
20117
0
  }
20118
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
20119
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
20120
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
20121
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
20122
0
  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
20123
0
    Value *Src = EmitScalarExpr(E->getArg(0));
20124
0
    llvm::Type *ResT = ConvertType(E->getType());
20125
0
    Function *Callee =
20126
0
        CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
20127
0
    return Builder.CreateCall(Callee, {Src});
20128
0
  }
20129
0
  case WebAssembly::BI__builtin_wasm_min_f32:
20130
0
  case WebAssembly::BI__builtin_wasm_min_f64:
20131
0
  case WebAssembly::BI__builtin_wasm_min_f32x4:
20132
0
  case WebAssembly::BI__builtin_wasm_min_f64x2: {
20133
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20134
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20135
0
    Function *Callee =
20136
0
        CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
20137
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20138
0
  }
20139
0
  case WebAssembly::BI__builtin_wasm_max_f32:
20140
0
  case WebAssembly::BI__builtin_wasm_max_f64:
20141
0
  case WebAssembly::BI__builtin_wasm_max_f32x4:
20142
0
  case WebAssembly::BI__builtin_wasm_max_f64x2: {
20143
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20144
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20145
0
    Function *Callee =
20146
0
        CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
20147
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20148
0
  }
20149
0
  case WebAssembly::BI__builtin_wasm_pmin_f32x4:
20150
0
  case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
20151
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20152
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20153
0
    Function *Callee =
20154
0
        CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
20155
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20156
0
  }
20157
0
  case WebAssembly::BI__builtin_wasm_pmax_f32x4:
20158
0
  case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
20159
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20160
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20161
0
    Function *Callee =
20162
0
        CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
20163
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20164
0
  }
20165
0
  case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20166
0
  case WebAssembly::BI__builtin_wasm_floor_f32x4:
20167
0
  case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20168
0
  case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20169
0
  case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20170
0
  case WebAssembly::BI__builtin_wasm_floor_f64x2:
20171
0
  case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20172
0
  case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
20173
0
    unsigned IntNo;
20174
0
    switch (BuiltinID) {
20175
0
    case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20176
0
    case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20177
0
      IntNo = Intrinsic::ceil;
20178
0
      break;
20179
0
    case WebAssembly::BI__builtin_wasm_floor_f32x4:
20180
0
    case WebAssembly::BI__builtin_wasm_floor_f64x2:
20181
0
      IntNo = Intrinsic::floor;
20182
0
      break;
20183
0
    case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20184
0
    case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20185
0
      IntNo = Intrinsic::trunc;
20186
0
      break;
20187
0
    case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20188
0
    case WebAssembly::BI__builtin_wasm_nearest_f64x2:
20189
0
      IntNo = Intrinsic::nearbyint;
20190
0
      break;
20191
0
    default:
20192
0
      llvm_unreachable("unexpected builtin ID");
20193
0
    }
20194
0
    Value *Value = EmitScalarExpr(E->getArg(0));
20195
0
    Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
20196
0
    return Builder.CreateCall(Callee, Value);
20197
0
  }
20198
0
  case WebAssembly::BI__builtin_wasm_ref_null_extern: {
20199
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
20200
0
    return Builder.CreateCall(Callee);
20201
0
  }
20202
0
  case WebAssembly::BI__builtin_wasm_ref_null_func: {
20203
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
20204
0
    return Builder.CreateCall(Callee);
20205
0
  }
20206
0
  case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
20207
0
    Value *Src = EmitScalarExpr(E->getArg(0));
20208
0
    Value *Indices = EmitScalarExpr(E->getArg(1));
20209
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
20210
0
    return Builder.CreateCall(Callee, {Src, Indices});
20211
0
  }
20212
0
  case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20213
0
  case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20214
0
  case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20215
0
  case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20216
0
  case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20217
0
  case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20218
0
  case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20219
0
  case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
20220
0
    unsigned IntNo;
20221
0
    switch (BuiltinID) {
20222
0
    case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20223
0
    case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20224
0
      IntNo = Intrinsic::sadd_sat;
20225
0
      break;
20226
0
    case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20227
0
    case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20228
0
      IntNo = Intrinsic::uadd_sat;
20229
0
      break;
20230
0
    case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20231
0
    case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20232
0
      IntNo = Intrinsic::wasm_sub_sat_signed;
20233
0
      break;
20234
0
    case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20235
0
    case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
20236
0
      IntNo = Intrinsic::wasm_sub_sat_unsigned;
20237
0
      break;
20238
0
    default:
20239
0
      llvm_unreachable("unexpected builtin ID");
20240
0
    }
20241
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20242
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20243
0
    Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
20244
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20245
0
  }
20246
0
  case WebAssembly::BI__builtin_wasm_abs_i8x16:
20247
0
  case WebAssembly::BI__builtin_wasm_abs_i16x8:
20248
0
  case WebAssembly::BI__builtin_wasm_abs_i32x4:
20249
0
  case WebAssembly::BI__builtin_wasm_abs_i64x2: {
20250
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20251
0
    Value *Neg = Builder.CreateNeg(Vec, "neg");
20252
0
    Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
20253
0
    Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
20254
0
    return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
20255
0
  }
20256
0
  case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20257
0
  case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20258
0
  case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20259
0
  case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20260
0
  case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20261
0
  case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20262
0
  case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20263
0
  case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20264
0
  case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20265
0
  case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20266
0
  case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20267
0
  case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
20268
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20269
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20270
0
    Value *ICmp;
20271
0
    switch (BuiltinID) {
20272
0
    case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20273
0
    case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20274
0
    case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20275
0
      ICmp = Builder.CreateICmpSLT(LHS, RHS);
20276
0
      break;
20277
0
    case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20278
0
    case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20279
0
    case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20280
0
      ICmp = Builder.CreateICmpULT(LHS, RHS);
20281
0
      break;
20282
0
    case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20283
0
    case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20284
0
    case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20285
0
      ICmp = Builder.CreateICmpSGT(LHS, RHS);
20286
0
      break;
20287
0
    case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20288
0
    case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20289
0
    case WebAssembly::BI__builtin_wasm_max_u_i32x4:
20290
0
      ICmp = Builder.CreateICmpUGT(LHS, RHS);
20291
0
      break;
20292
0
    default:
20293
0
      llvm_unreachable("unexpected builtin ID");
20294
0
    }
20295
0
    return Builder.CreateSelect(ICmp, LHS, RHS);
20296
0
  }
20297
0
  case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
20298
0
  case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
20299
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20300
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20301
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
20302
0
                                        ConvertType(E->getType()));
20303
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20304
0
  }
20305
0
  case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
20306
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20307
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20308
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
20309
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20310
0
  }
20311
0
  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20312
0
  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20313
0
  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20314
0
  case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
20315
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20316
0
    unsigned IntNo;
20317
0
    switch (BuiltinID) {
20318
0
    case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20319
0
    case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20320
0
      IntNo = Intrinsic::wasm_extadd_pairwise_signed;
20321
0
      break;
20322
0
    case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20323
0
    case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
20324
0
      IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
20325
0
      break;
20326
0
    default:
20327
0
      llvm_unreachable("unexpected builtin ID");
20328
0
    }
20329
20330
0
    Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
20331
0
    return Builder.CreateCall(Callee, Vec);
20332
0
  }
20333
0
  case WebAssembly::BI__builtin_wasm_bitselect: {
20334
0
    Value *V1 = EmitScalarExpr(E->getArg(0));
20335
0
    Value *V2 = EmitScalarExpr(E->getArg(1));
20336
0
    Value *C = EmitScalarExpr(E->getArg(2));
20337
0
    Function *Callee =
20338
0
        CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
20339
0
    return Builder.CreateCall(Callee, {V1, V2, C});
20340
0
  }
20341
0
  case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
20342
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20343
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20344
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
20345
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20346
0
  }
20347
0
  case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
20348
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20349
0
    Function *Callee =
20350
0
        CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
20351
0
    return Builder.CreateCall(Callee, {Vec});
20352
0
  }
20353
0
  case WebAssembly::BI__builtin_wasm_any_true_v128:
20354
0
  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
20355
0
  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
20356
0
  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
20357
0
  case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
20358
0
    unsigned IntNo;
20359
0
    switch (BuiltinID) {
20360
0
    case WebAssembly::BI__builtin_wasm_any_true_v128:
20361
0
      IntNo = Intrinsic::wasm_anytrue;
20362
0
      break;
20363
0
    case WebAssembly::BI__builtin_wasm_all_true_i8x16:
20364
0
    case WebAssembly::BI__builtin_wasm_all_true_i16x8:
20365
0
    case WebAssembly::BI__builtin_wasm_all_true_i32x4:
20366
0
    case WebAssembly::BI__builtin_wasm_all_true_i64x2:
20367
0
      IntNo = Intrinsic::wasm_alltrue;
20368
0
      break;
20369
0
    default:
20370
0
      llvm_unreachable("unexpected builtin ID");
20371
0
    }
20372
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20373
0
    Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
20374
0
    return Builder.CreateCall(Callee, {Vec});
20375
0
  }
20376
0
  case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
20377
0
  case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
20378
0
  case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
20379
0
  case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
20380
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20381
0
    Function *Callee =
20382
0
        CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
20383
0
    return Builder.CreateCall(Callee, {Vec});
20384
0
  }
20385
0
  case WebAssembly::BI__builtin_wasm_abs_f32x4:
20386
0
  case WebAssembly::BI__builtin_wasm_abs_f64x2: {
20387
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20388
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
20389
0
    return Builder.CreateCall(Callee, {Vec});
20390
0
  }
20391
0
  case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
20392
0
  case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
20393
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20394
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
20395
0
    return Builder.CreateCall(Callee, {Vec});
20396
0
  }
20397
0
  case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
20398
0
  case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
20399
0
  case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
20400
0
  case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
20401
0
    Value *Low = EmitScalarExpr(E->getArg(0));
20402
0
    Value *High = EmitScalarExpr(E->getArg(1));
20403
0
    unsigned IntNo;
20404
0
    switch (BuiltinID) {
20405
0
    case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
20406
0
    case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
20407
0
      IntNo = Intrinsic::wasm_narrow_signed;
20408
0
      break;
20409
0
    case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
20410
0
    case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
20411
0
      IntNo = Intrinsic::wasm_narrow_unsigned;
20412
0
      break;
20413
0
    default:
20414
0
      llvm_unreachable("unexpected builtin ID");
20415
0
    }
20416
0
    Function *Callee =
20417
0
        CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
20418
0
    return Builder.CreateCall(Callee, {Low, High});
20419
0
  }
20420
0
  case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
20421
0
  case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
20422
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20423
0
    unsigned IntNo;
20424
0
    switch (BuiltinID) {
20425
0
    case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
20426
0
      IntNo = Intrinsic::fptosi_sat;
20427
0
      break;
20428
0
    case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
20429
0
      IntNo = Intrinsic::fptoui_sat;
20430
0
      break;
20431
0
    default:
20432
0
      llvm_unreachable("unexpected builtin ID");
20433
0
    }
20434
0
    llvm::Type *SrcT = Vec->getType();
20435
0
    llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
20436
0
    Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
20437
0
    Value *Trunc = Builder.CreateCall(Callee, Vec);
20438
0
    Value *Splat = Constant::getNullValue(TruncT);
20439
0
    return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
20440
0
  }
20441
0
  case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
20442
0
    Value *Ops[18];
20443
0
    size_t OpIdx = 0;
20444
0
    Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
20445
0
    Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
20446
0
    while (OpIdx < 18) {
20447
0
      std::optional<llvm::APSInt> LaneConst =
20448
0
          E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
20449
0
      assert(LaneConst && "Constant arg isn't actually constant?");
20450
0
      Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
20451
0
    }
20452
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
20453
0
    return Builder.CreateCall(Callee, Ops);
20454
0
  }
20455
0
  case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
20456
0
  case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
20457
0
  case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
20458
0
  case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
20459
0
    Value *A = EmitScalarExpr(E->getArg(0));
20460
0
    Value *B = EmitScalarExpr(E->getArg(1));
20461
0
    Value *C = EmitScalarExpr(E->getArg(2));
20462
0
    unsigned IntNo;
20463
0
    switch (BuiltinID) {
20464
0
    case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
20465
0
    case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
20466
0
      IntNo = Intrinsic::wasm_relaxed_madd;
20467
0
      break;
20468
0
    case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
20469
0
    case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
20470
0
      IntNo = Intrinsic::wasm_relaxed_nmadd;
20471
0
      break;
20472
0
    default:
20473
0
      llvm_unreachable("unexpected builtin ID");
20474
0
    }
20475
0
    Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
20476
0
    return Builder.CreateCall(Callee, {A, B, C});
20477
0
  }
20478
0
  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
20479
0
  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
20480
0
  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
20481
0
  case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
20482
0
    Value *A = EmitScalarExpr(E->getArg(0));
20483
0
    Value *B = EmitScalarExpr(E->getArg(1));
20484
0
    Value *C = EmitScalarExpr(E->getArg(2));
20485
0
    Function *Callee =
20486
0
        CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
20487
0
    return Builder.CreateCall(Callee, {A, B, C});
20488
0
  }
20489
0
  case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
20490
0
    Value *Src = EmitScalarExpr(E->getArg(0));
20491
0
    Value *Indices = EmitScalarExpr(E->getArg(1));
20492
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
20493
0
    return Builder.CreateCall(Callee, {Src, Indices});
20494
0
  }
20495
0
  case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
20496
0
  case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
20497
0
  case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
20498
0
  case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
20499
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20500
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20501
0
    unsigned IntNo;
20502
0
    switch (BuiltinID) {
20503
0
    case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
20504
0
    case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
20505
0
      IntNo = Intrinsic::wasm_relaxed_min;
20506
0
      break;
20507
0
    case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
20508
0
    case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
20509
0
      IntNo = Intrinsic::wasm_relaxed_max;
20510
0
      break;
20511
0
    default:
20512
0
      llvm_unreachable("unexpected builtin ID");
20513
0
    }
20514
0
    Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
20515
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20516
0
  }
20517
0
  case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
20518
0
  case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
20519
0
  case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
20520
0
  case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
20521
0
    Value *Vec = EmitScalarExpr(E->getArg(0));
20522
0
    unsigned IntNo;
20523
0
    switch (BuiltinID) {
20524
0
    case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
20525
0
      IntNo = Intrinsic::wasm_relaxed_trunc_signed;
20526
0
      break;
20527
0
    case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
20528
0
      IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
20529
0
      break;
20530
0
    case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
20531
0
      IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
20532
0
      break;
20533
0
    case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
20534
0
      IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
20535
0
      break;
20536
0
    default:
20537
0
      llvm_unreachable("unexpected builtin ID");
20538
0
    }
20539
0
    Function *Callee = CGM.getIntrinsic(IntNo);
20540
0
    return Builder.CreateCall(Callee, {Vec});
20541
0
  }
20542
0
  case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
20543
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20544
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20545
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
20546
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20547
0
  }
20548
0
  case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
20549
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20550
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20551
0
    Function *Callee =
20552
0
        CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
20553
0
    return Builder.CreateCall(Callee, {LHS, RHS});
20554
0
  }
20555
0
  case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
20556
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20557
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20558
0
    Value *Acc = EmitScalarExpr(E->getArg(2));
20559
0
    Function *Callee =
20560
0
        CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
20561
0
    return Builder.CreateCall(Callee, {LHS, RHS, Acc});
20562
0
  }
20563
0
  case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
20564
0
    Value *LHS = EmitScalarExpr(E->getArg(0));
20565
0
    Value *RHS = EmitScalarExpr(E->getArg(1));
20566
0
    Value *Acc = EmitScalarExpr(E->getArg(2));
20567
0
    Function *Callee =
20568
0
        CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
20569
0
    return Builder.CreateCall(Callee, {LHS, RHS, Acc});
20570
0
  }
20571
0
  case WebAssembly::BI__builtin_wasm_table_get: {
20572
0
    assert(E->getArg(0)->getType()->isArrayType());
20573
0
    Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20574
0
    Value *Index = EmitScalarExpr(E->getArg(1));
20575
0
    Function *Callee;
20576
0
    if (E->getType().isWebAssemblyExternrefType())
20577
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
20578
0
    else if (E->getType().isWebAssemblyFuncrefType())
20579
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
20580
0
    else
20581
0
      llvm_unreachable(
20582
0
          "Unexpected reference type for __builtin_wasm_table_get");
20583
0
    return Builder.CreateCall(Callee, {Table, Index});
20584
0
  }
20585
0
  case WebAssembly::BI__builtin_wasm_table_set: {
20586
0
    assert(E->getArg(0)->getType()->isArrayType());
20587
0
    Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20588
0
    Value *Index = EmitScalarExpr(E->getArg(1));
20589
0
    Value *Val = EmitScalarExpr(E->getArg(2));
20590
0
    Function *Callee;
20591
0
    if (E->getArg(2)->getType().isWebAssemblyExternrefType())
20592
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
20593
0
    else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
20594
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
20595
0
    else
20596
0
      llvm_unreachable(
20597
0
          "Unexpected reference type for __builtin_wasm_table_set");
20598
0
    return Builder.CreateCall(Callee, {Table, Index, Val});
20599
0
  }
20600
0
  case WebAssembly::BI__builtin_wasm_table_size: {
20601
0
    assert(E->getArg(0)->getType()->isArrayType());
20602
0
    Value *Value = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20603
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
20604
0
    return Builder.CreateCall(Callee, Value);
20605
0
  }
20606
0
  case WebAssembly::BI__builtin_wasm_table_grow: {
20607
0
    assert(E->getArg(0)->getType()->isArrayType());
20608
0
    Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20609
0
    Value *Val = EmitScalarExpr(E->getArg(1));
20610
0
    Value *NElems = EmitScalarExpr(E->getArg(2));
20611
20612
0
    Function *Callee;
20613
0
    if (E->getArg(1)->getType().isWebAssemblyExternrefType())
20614
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
20615
0
    else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
20616
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
20617
0
    else
20618
0
      llvm_unreachable(
20619
0
          "Unexpected reference type for __builtin_wasm_table_grow");
20620
20621
0
    return Builder.CreateCall(Callee, {Table, Val, NElems});
20622
0
  }
20623
0
  case WebAssembly::BI__builtin_wasm_table_fill: {
20624
0
    assert(E->getArg(0)->getType()->isArrayType());
20625
0
    Value *Table = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20626
0
    Value *Index = EmitScalarExpr(E->getArg(1));
20627
0
    Value *Val = EmitScalarExpr(E->getArg(2));
20628
0
    Value *NElems = EmitScalarExpr(E->getArg(3));
20629
20630
0
    Function *Callee;
20631
0
    if (E->getArg(2)->getType().isWebAssemblyExternrefType())
20632
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
20633
0
    else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
20634
0
      Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
20635
0
    else
20636
0
      llvm_unreachable(
20637
0
          "Unexpected reference type for __builtin_wasm_table_fill");
20638
20639
0
    return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
20640
0
  }
20641
0
  case WebAssembly::BI__builtin_wasm_table_copy: {
20642
0
    assert(E->getArg(0)->getType()->isArrayType());
20643
0
    Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).getPointer();
20644
0
    Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).getPointer();
20645
0
    Value *DstIdx = EmitScalarExpr(E->getArg(2));
20646
0
    Value *SrcIdx = EmitScalarExpr(E->getArg(3));
20647
0
    Value *NElems = EmitScalarExpr(E->getArg(4));
20648
20649
0
    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
20650
20651
0
    return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
20652
0
  }
20653
0
  default:
20654
0
    return nullptr;
20655
0
  }
20656
0
}
20657
20658
static std::pair<Intrinsic::ID, unsigned>
20659
0
getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {
20660
0
  struct Info {
20661
0
    unsigned BuiltinID;
20662
0
    Intrinsic::ID IntrinsicID;
20663
0
    unsigned VecLen;
20664
0
  };
20665
0
  static Info Infos[] = {
20666
0
#define CUSTOM_BUILTIN_MAPPING(x,s) \
20667
0
  { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
20668
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
20669
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
20670
0
    CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
20671
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
20672
0
    CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
20673
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
20674
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
20675
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
20676
0
    CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
20677
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
20678
0
    CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
20679
0
    CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
20680
0
    CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
20681
0
    CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
20682
0
    CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
20683
0
    CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
20684
0
    CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
20685
0
    CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
20686
0
    CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
20687
0
    CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
20688
0
    CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
20689
0
    CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
20690
    // Legacy builtins that take a vector in place of a vector predicate.
20691
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
20692
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
20693
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
20694
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
20695
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
20696
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
20697
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
20698
0
    CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
20699
0
#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
20700
0
#undef CUSTOM_BUILTIN_MAPPING
20701
0
  };
20702
20703
0
  auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
20704
0
  static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
20705
0
  (void)SortOnce;
20706
20707
0
  const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
20708
0
  if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
20709
0
    return {Intrinsic::not_intrinsic, 0};
20710
20711
0
  return {F->IntrinsicID, F->VecLen};
20712
0
}
20713
20714
Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
20715
0
                                               const CallExpr *E) {
20716
0
  Intrinsic::ID ID;
20717
0
  unsigned VecLen;
20718
0
  std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
20719
20720
0
  auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
20721
    // The base pointer is passed by address, so it needs to be loaded.
20722
0
    Address A = EmitPointerWithAlignment(E->getArg(0));
20723
0
    Address BP = Address(A.getPointer(), Int8PtrTy, A.getAlignment());
20724
0
    llvm::Value *Base = Builder.CreateLoad(BP);
20725
    // The treatment of both loads and stores is the same: the arguments for
20726
    // the builtin are the same as the arguments for the intrinsic.
20727
    // Load:
20728
    //   builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
20729
    //   builtin(Base, Mod, Start)      -> intr(Base, Mod, Start)
20730
    // Store:
20731
    //   builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
20732
    //   builtin(Base, Mod, Val, Start)      -> intr(Base, Mod, Val, Start)
20733
0
    SmallVector<llvm::Value*,5> Ops = { Base };
20734
0
    for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
20735
0
      Ops.push_back(EmitScalarExpr(E->getArg(i)));
20736
20737
0
    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
20738
    // The load intrinsics generate two results (Value, NewBase), stores
20739
    // generate one (NewBase). The new base address needs to be stored.
20740
0
    llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
20741
0
                                  : Result;
20742
0
    llvm::Value *LV = EmitScalarExpr(E->getArg(0));
20743
0
    Address Dest = EmitPointerWithAlignment(E->getArg(0));
20744
0
    llvm::Value *RetVal =
20745
0
        Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
20746
0
    if (IsLoad)
20747
0
      RetVal = Builder.CreateExtractValue(Result, 0);
20748
0
    return RetVal;
20749
0
  };
20750
20751
  // Handle the conversion of bit-reverse load intrinsics to bit code.
20752
  // The intrinsic call after this function only reads from memory and the
20753
  // write to memory is dealt by the store instruction.
20754
0
  auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
20755
    // The intrinsic generates one result, which is the new value for the base
20756
    // pointer. It needs to be returned. The result of the load instruction is
20757
    // passed to intrinsic by address, so the value needs to be stored.
20758
0
    llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
20759
20760
    // Expressions like &(*pt++) will be incremented per evaluation.
20761
    // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
20762
    // per call.
20763
0
    Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
20764
0
    DestAddr = Address(DestAddr.getPointer(), Int8Ty, DestAddr.getAlignment());
20765
0
    llvm::Value *DestAddress = DestAddr.getPointer();
20766
20767
    // Operands are Base, Dest, Modifier.
20768
    // The intrinsic format in LLVM IR is defined as
20769
    // { ValueType, i8* } (i8*, i32).
20770
0
    llvm::Value *Result = Builder.CreateCall(
20771
0
        CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
20772
20773
    // The value needs to be stored as the variable is passed by reference.
20774
0
    llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
20775
20776
    // The store needs to be truncated to fit the destination type.
20777
    // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
20778
    // to be handled with stores of respective destination type.
20779
0
    DestVal = Builder.CreateTrunc(DestVal, DestTy);
20780
20781
0
    Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
20782
    // The updated value of the base pointer is returned.
20783
0
    return Builder.CreateExtractValue(Result, 1);
20784
0
  };
20785
20786
0
  auto V2Q = [this, VecLen] (llvm::Value *Vec) {
20787
0
    Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
20788
0
                                     : Intrinsic::hexagon_V6_vandvrt;
20789
0
    return Builder.CreateCall(CGM.getIntrinsic(ID),
20790
0
                              {Vec, Builder.getInt32(-1)});
20791
0
  };
20792
0
  auto Q2V = [this, VecLen] (llvm::Value *Pred) {
20793
0
    Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
20794
0
                                     : Intrinsic::hexagon_V6_vandqrt;
20795
0
    return Builder.CreateCall(CGM.getIntrinsic(ID),
20796
0
                              {Pred, Builder.getInt32(-1)});
20797
0
  };
20798
20799
0
  switch (BuiltinID) {
20800
  // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
20801
  // and the corresponding C/C++ builtins use loads/stores to update
20802
  // the predicate.
20803
0
  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
20804
0
  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
20805
0
  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
20806
0
  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
20807
    // Get the type from the 0-th argument.
20808
0
    llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
20809
0
    Address PredAddr =
20810
0
        EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
20811
0
    llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
20812
0
    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
20813
0
        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
20814
20815
0
    llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
20816
0
    Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
20817
0
        PredAddr.getAlignment());
20818
0
    return Builder.CreateExtractValue(Result, 0);
20819
0
  }
20820
  // These are identical to the builtins above, except they don't consume
20821
  // input carry, only generate carry-out. Since they still produce two
20822
  // outputs, generate the store of the predicate, but no load.
20823
0
  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
20824
0
  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
20825
0
  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
20826
0
  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
20827
    // Get the type from the 0-th argument.
20828
0
    llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
20829
0
    Address PredAddr =
20830
0
        EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
20831
0
    llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
20832
0
        {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20833
20834
0
    llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
20835
0
    Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(),
20836
0
        PredAddr.getAlignment());
20837
0
    return Builder.CreateExtractValue(Result, 0);
20838
0
  }
20839
20840
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
20841
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
20842
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
20843
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
20844
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
20845
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
20846
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
20847
0
  case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
20848
0
    SmallVector<llvm::Value*,4> Ops;
20849
0
    const Expr *PredOp = E->getArg(0);
20850
    // There will be an implicit cast to a boolean vector. Strip it.
20851
0
    if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
20852
0
      if (Cast->getCastKind() == CK_BitCast)
20853
0
        PredOp = Cast->getSubExpr();
20854
0
      Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
20855
0
    }
20856
0
    for (int i = 1, e = E->getNumArgs(); i != e; ++i)
20857
0
      Ops.push_back(EmitScalarExpr(E->getArg(i)));
20858
0
    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
20859
0
  }
20860
20861
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
20862
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
20863
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
20864
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
20865
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
20866
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
20867
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
20868
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
20869
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
20870
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
20871
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
20872
0
  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
20873
0
    return MakeCircOp(ID, /*IsLoad=*/true);
20874
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
20875
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
20876
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
20877
0
  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
20878
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
20879
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
20880
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
20881
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
20882
0
  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
20883
0
  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
20884
0
    return MakeCircOp(ID, /*IsLoad=*/false);
20885
0
  case Hexagon::BI__builtin_brev_ldub:
20886
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
20887
0
  case Hexagon::BI__builtin_brev_ldb:
20888
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
20889
0
  case Hexagon::BI__builtin_brev_lduh:
20890
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
20891
0
  case Hexagon::BI__builtin_brev_ldh:
20892
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
20893
0
  case Hexagon::BI__builtin_brev_ldw:
20894
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
20895
0
  case Hexagon::BI__builtin_brev_ldd:
20896
0
    return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
20897
0
  } // switch
20898
20899
0
  return nullptr;
20900
0
}
20901
20902
Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
20903
                                             const CallExpr *E,
20904
0
                                             ReturnValueSlot ReturnValue) {
20905
0
  SmallVector<Value *, 4> Ops;
20906
0
  llvm::Type *ResultType = ConvertType(E->getType());
20907
20908
  // Find out if any arguments are required to be integer constant expressions.
20909
0
  unsigned ICEArguments = 0;
20910
0
  ASTContext::GetBuiltinTypeError Error;
20911
0
  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
20912
0
  if (Error == ASTContext::GE_Missing_type) {
20913
    // Vector intrinsics don't have a type string.
20914
0
    assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
20915
0
           BuiltinID <= clang::RISCV::LastRVVBuiltin);
20916
0
    ICEArguments = 0;
20917
0
    if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
20918
0
        BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
20919
0
      ICEArguments = 1 << 1;
20920
0
  } else {
20921
0
    assert(Error == ASTContext::GE_None && "Unexpected error");
20922
0
  }
20923
20924
0
  if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
20925
0
    ICEArguments |= (1 << 1);
20926
0
  if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
20927
0
    ICEArguments |= (1 << 2);
20928
20929
0
  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
20930
    // Handle aggregate argument, namely RVV tuple types in segment load/store
20931
0
    if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
20932
0
      LValue L = EmitAggExprToLValue(E->getArg(i));
20933
0
      llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this));
20934
0
      Ops.push_back(AggValue);
20935
0
      continue;
20936
0
    }
20937
0
    Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
20938
0
  }
20939
20940
0
  Intrinsic::ID ID = Intrinsic::not_intrinsic;
20941
0
  unsigned NF = 1;
20942
  // The 0th bit simulates the `vta` of RVV
20943
  // The 1st bit simulates the `vma` of RVV
20944
0
  constexpr unsigned RVV_VTA = 0x1;
20945
0
  constexpr unsigned RVV_VMA = 0x2;
20946
0
  int PolicyAttrs = 0;
20947
0
  bool IsMasked = false;
20948
20949
  // Required for overloaded intrinsics.
20950
0
  llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
20951
0
  switch (BuiltinID) {
20952
0
  default: llvm_unreachable("unexpected builtin ID");
20953
0
  case RISCV::BI__builtin_riscv_orc_b_32:
20954
0
  case RISCV::BI__builtin_riscv_orc_b_64:
20955
0
  case RISCV::BI__builtin_riscv_clz_32:
20956
0
  case RISCV::BI__builtin_riscv_clz_64:
20957
0
  case RISCV::BI__builtin_riscv_ctz_32:
20958
0
  case RISCV::BI__builtin_riscv_ctz_64:
20959
0
  case RISCV::BI__builtin_riscv_clmul_32:
20960
0
  case RISCV::BI__builtin_riscv_clmul_64:
20961
0
  case RISCV::BI__builtin_riscv_clmulh_32:
20962
0
  case RISCV::BI__builtin_riscv_clmulh_64:
20963
0
  case RISCV::BI__builtin_riscv_clmulr_32:
20964
0
  case RISCV::BI__builtin_riscv_clmulr_64:
20965
0
  case RISCV::BI__builtin_riscv_xperm4_32:
20966
0
  case RISCV::BI__builtin_riscv_xperm4_64:
20967
0
  case RISCV::BI__builtin_riscv_xperm8_32:
20968
0
  case RISCV::BI__builtin_riscv_xperm8_64:
20969
0
  case RISCV::BI__builtin_riscv_brev8_32:
20970
0
  case RISCV::BI__builtin_riscv_brev8_64:
20971
0
  case RISCV::BI__builtin_riscv_zip_32:
20972
0
  case RISCV::BI__builtin_riscv_unzip_32: {
20973
0
    switch (BuiltinID) {
20974
0
    default: llvm_unreachable("unexpected builtin ID");
20975
    // Zbb
20976
0
    case RISCV::BI__builtin_riscv_orc_b_32:
20977
0
    case RISCV::BI__builtin_riscv_orc_b_64:
20978
0
      ID = Intrinsic::riscv_orc_b;
20979
0
      break;
20980
0
    case RISCV::BI__builtin_riscv_clz_32:
20981
0
    case RISCV::BI__builtin_riscv_clz_64: {
20982
0
      Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
20983
0
      Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
20984
0
      if (Result->getType() != ResultType)
20985
0
        Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
20986
0
                                       "cast");
20987
0
      return Result;
20988
0
    }
20989
0
    case RISCV::BI__builtin_riscv_ctz_32:
20990
0
    case RISCV::BI__builtin_riscv_ctz_64: {
20991
0
      Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
20992
0
      Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
20993
0
      if (Result->getType() != ResultType)
20994
0
        Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
20995
0
                                       "cast");
20996
0
      return Result;
20997
0
    }
20998
20999
    // Zbc
21000
0
    case RISCV::BI__builtin_riscv_clmul_32:
21001
0
    case RISCV::BI__builtin_riscv_clmul_64:
21002
0
      ID = Intrinsic::riscv_clmul;
21003
0
      break;
21004
0
    case RISCV::BI__builtin_riscv_clmulh_32:
21005
0
    case RISCV::BI__builtin_riscv_clmulh_64:
21006
0
      ID = Intrinsic::riscv_clmulh;
21007
0
      break;
21008
0
    case RISCV::BI__builtin_riscv_clmulr_32:
21009
0
    case RISCV::BI__builtin_riscv_clmulr_64:
21010
0
      ID = Intrinsic::riscv_clmulr;
21011
0
      break;
21012
21013
    // Zbkx
21014
0
    case RISCV::BI__builtin_riscv_xperm8_32:
21015
0
    case RISCV::BI__builtin_riscv_xperm8_64:
21016
0
      ID = Intrinsic::riscv_xperm8;
21017
0
      break;
21018
0
    case RISCV::BI__builtin_riscv_xperm4_32:
21019
0
    case RISCV::BI__builtin_riscv_xperm4_64:
21020
0
      ID = Intrinsic::riscv_xperm4;
21021
0
      break;
21022
21023
    // Zbkb
21024
0
    case RISCV::BI__builtin_riscv_brev8_32:
21025
0
    case RISCV::BI__builtin_riscv_brev8_64:
21026
0
      ID = Intrinsic::riscv_brev8;
21027
0
      break;
21028
0
    case RISCV::BI__builtin_riscv_zip_32:
21029
0
      ID = Intrinsic::riscv_zip;
21030
0
      break;
21031
0
    case RISCV::BI__builtin_riscv_unzip_32:
21032
0
      ID = Intrinsic::riscv_unzip;
21033
0
      break;
21034
0
    }
21035
21036
0
    IntrinsicTypes = {ResultType};
21037
0
    break;
21038
0
  }
21039
21040
  // Zk builtins
21041
21042
  // Zknh
21043
0
  case RISCV::BI__builtin_riscv_sha256sig0:
21044
0
    ID = Intrinsic::riscv_sha256sig0;
21045
0
    break;
21046
0
  case RISCV::BI__builtin_riscv_sha256sig1:
21047
0
    ID = Intrinsic::riscv_sha256sig1;
21048
0
    break;
21049
0
  case RISCV::BI__builtin_riscv_sha256sum0:
21050
0
    ID = Intrinsic::riscv_sha256sum0;
21051
0
    break;
21052
0
  case RISCV::BI__builtin_riscv_sha256sum1:
21053
0
    ID = Intrinsic::riscv_sha256sum1;
21054
0
    break;
21055
21056
  // Zksed
21057
0
  case RISCV::BI__builtin_riscv_sm4ks:
21058
0
    ID = Intrinsic::riscv_sm4ks;
21059
0
    break;
21060
0
  case RISCV::BI__builtin_riscv_sm4ed:
21061
0
    ID = Intrinsic::riscv_sm4ed;
21062
0
    break;
21063
21064
  // Zksh
21065
0
  case RISCV::BI__builtin_riscv_sm3p0:
21066
0
    ID = Intrinsic::riscv_sm3p0;
21067
0
    break;
21068
0
  case RISCV::BI__builtin_riscv_sm3p1:
21069
0
    ID = Intrinsic::riscv_sm3p1;
21070
0
    break;
21071
21072
  // Zihintntl
21073
0
  case RISCV::BI__builtin_riscv_ntl_load: {
21074
0
    llvm::Type *ResTy = ConvertType(E->getType());
21075
0
    unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21076
0
    if (Ops.size() == 2)
21077
0
      DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
21078
21079
0
    llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21080
0
        getLLVMContext(),
21081
0
        llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
21082
0
    llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21083
0
        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
21084
21085
0
    int Width;
21086
0
    if(ResTy->isScalableTy()) {
21087
0
      const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
21088
0
      llvm::Type *ScalarTy = ResTy->getScalarType();
21089
0
      Width = ScalarTy->getPrimitiveSizeInBits() *
21090
0
              SVTy->getElementCount().getKnownMinValue();
21091
0
    } else
21092
0
      Width = ResTy->getPrimitiveSizeInBits();
21093
0
    LoadInst *Load = Builder.CreateLoad(
21094
0
        Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
21095
21096
0
    Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
21097
0
    Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
21098
0
                      RISCVDomainNode);
21099
21100
0
    return Load;
21101
0
  }
21102
0
  case RISCV::BI__builtin_riscv_ntl_store: {
21103
0
    unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21104
0
    if (Ops.size() == 3)
21105
0
      DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
21106
21107
0
    llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21108
0
        getLLVMContext(),
21109
0
        llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
21110
0
    llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21111
0
        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
21112
21113
0
    StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
21114
0
    Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
21115
0
    Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
21116
0
                       RISCVDomainNode);
21117
21118
0
    return Store;
21119
0
  }
21120
21121
  // Vector builtins are handled from here.
21122
0
#include "clang/Basic/riscv_vector_builtin_cg.inc"
21123
  // SiFive Vector builtins are handled from here.
21124
0
#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
21125
0
  }
21126
21127
0
  assert(ID != Intrinsic::not_intrinsic);
21128
21129
0
  llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
21130
0
  return Builder.CreateCall(F, Ops, "");
21131
0
}