Coverage Report

Created: 2025-08-25 06:58

/src/WasmEdge/lib/llvm/compiler.cpp
Line
Count
Source (jump to first uncovered line)
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: 2019-2024 Second State INC
3
4
#include "llvm/compiler.h"
5
6
#include "aot/version.h"
7
#include "common/defines.h"
8
#include "common/filesystem.h"
9
#include "common/spdlog.h"
10
#include "data.h"
11
#include "llvm.h"
12
#include "system/allocator.h"
13
14
#include <algorithm>
15
#include <array>
16
#include <cinttypes>
17
#include <cstdint>
18
#include <cstdlib>
19
#include <limits>
20
#include <memory>
21
#include <numeric>
22
#include <string>
23
#include <string_view>
24
#include <system_error>
25
26
namespace LLVM = WasmEdge::LLVM;
27
using namespace std::literals;
28
29
namespace {
30
31
static bool
32
isVoidReturn(WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
33
static LLVM::Type toLLVMType(LLVM::Context LLContext,
34
                             const WasmEdge::ValType &ValType) noexcept;
35
static std::vector<LLVM::Type>
36
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
37
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
38
static LLVM::Type
39
toLLVMRetsType(LLVM::Context LLContext,
40
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
41
static LLVM::Type
42
toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
43
           const WasmEdge::AST::FunctionType &FuncType) noexcept;
44
static LLVM::Value
45
toLLVMConstantZero(LLVM::Context LLContext,
46
                   const WasmEdge::ValType &ValType) noexcept;
47
static std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
48
                                             LLVM::Value Struct) noexcept;
49
class FunctionCompiler;
50
51
// XXX: Misalignment handler not implemented yet, forcing unalignment
52
// force unalignment load/store
53
static inline constexpr const bool kForceUnalignment = true;
54
55
// force checking div/rem on zero
56
static inline constexpr const bool kForceDivCheck = true;
57
58
// Size of a ValVariant
59
static inline constexpr const uint32_t kValSize = sizeof(WasmEdge::ValVariant);
60
61
// Translate Compiler::OptimizationLevel to llvm::PassBuilder version
62
#if LLVM_VERSION_MAJOR >= 13
63
static inline const char *
64
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
65
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
66
  switch (Level) {
67
  case OL::O0:
68
    return "default<O0>,function(tailcallelim)";
69
  case OL::O1:
70
    return "default<O1>,function(tailcallelim)";
71
  case OL::O2:
72
    return "default<O2>";
73
  case OL::O3:
74
    return "default<O3>";
75
  case OL::Os:
76
    return "default<Os>";
77
  case OL::Oz:
78
    return "default<Oz>";
79
  default:
80
    assumingUnreachable();
81
  }
82
}
83
#else
84
static inline std::pair<unsigned int, unsigned int>
85
1.96k
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
86
1.96k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
87
1.96k
  switch (Level) {
88
0
  case OL::O0:
89
0
    return {0, 0};
90
0
  case OL::O1:
91
0
    return {1, 0};
92
0
  case OL::O2:
93
0
    return {2, 0};
94
1.96k
  case OL::O3:
95
1.96k
    return {3, 0};
96
0
  case OL::Os:
97
0
    return {2, 1};
98
0
  case OL::Oz:
99
0
    return {2, 2};
100
0
  default:
101
0
    assumingUnreachable();
102
1.96k
  }
103
1.96k
}
104
#endif
105
106
static inline LLVMCodeGenOptLevel toLLVMCodeGenLevel(
107
1.96k
    WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
108
1.96k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
109
1.96k
  switch (Level) {
110
0
  case OL::O0:
111
0
    return LLVMCodeGenLevelNone;
112
0
  case OL::O1:
113
0
    return LLVMCodeGenLevelLess;
114
0
  case OL::O2:
115
0
    return LLVMCodeGenLevelDefault;
116
1.96k
  case OL::O3:
117
1.96k
    return LLVMCodeGenLevelAggressive;
118
0
  case OL::Os:
119
0
    return LLVMCodeGenLevelDefault;
120
0
  case OL::Oz:
121
0
    return LLVMCodeGenLevelDefault;
122
0
  default:
123
0
    assumingUnreachable();
124
1.96k
  }
125
1.96k
}
126
} // namespace
127
128
struct LLVM::Compiler::CompileContext {
129
  LLVM::Context LLContext;
130
  LLVM::Module &LLModule;
131
  LLVM::Attribute Cold;
132
  LLVM::Attribute NoAlias;
133
  LLVM::Attribute NoInline;
134
  LLVM::Attribute NoReturn;
135
  LLVM::Attribute ReadOnly;
136
  LLVM::Attribute StrictFP;
137
  LLVM::Attribute UWTable;
138
  LLVM::Attribute NoStackArgProbe;
139
  LLVM::Type VoidTy;
140
  LLVM::Type Int8Ty;
141
  LLVM::Type Int16Ty;
142
  LLVM::Type Int32Ty;
143
  LLVM::Type Int64Ty;
144
  LLVM::Type Int128Ty;
145
  LLVM::Type FloatTy;
146
  LLVM::Type DoubleTy;
147
  LLVM::Type Int8x16Ty;
148
  LLVM::Type Int16x8Ty;
149
  LLVM::Type Int32x4Ty;
150
  LLVM::Type Floatx4Ty;
151
  LLVM::Type Int64x2Ty;
152
  LLVM::Type Doublex2Ty;
153
  LLVM::Type Int128x1Ty;
154
  LLVM::Type Int8PtrTy;
155
  LLVM::Type Int32PtrTy;
156
  LLVM::Type Int64PtrTy;
157
  LLVM::Type Int128PtrTy;
158
  LLVM::Type Int8PtrPtrTy;
159
  LLVM::Type ExecCtxTy;
160
  LLVM::Type ExecCtxPtrTy;
161
  LLVM::Type IntrinsicsTableTy;
162
  LLVM::Type IntrinsicsTablePtrTy;
163
  LLVM::Message SubtargetFeatures;
164
165
#if defined(__x86_64__)
166
#if defined(__XOP__)
167
  bool SupportXOP = true;
168
#else
169
  bool SupportXOP = false;
170
#endif
171
172
#if defined(__SSE4_1__)
173
  bool SupportSSE4_1 = true;
174
#else
175
  bool SupportSSE4_1 = false;
176
#endif
177
178
#if defined(__SSSE3__)
179
  bool SupportSSSE3 = true;
180
#else
181
  bool SupportSSSE3 = false;
182
#endif
183
184
#if defined(__SSE2__)
185
  bool SupportSSE2 = true;
186
#else
187
  bool SupportSSE2 = false;
188
#endif
189
#endif
190
191
#if defined(__aarch64__)
192
#if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(__ARM_NEON_FP)
193
  bool SupportNEON = true;
194
#else
195
  bool SupportNEON = false;
196
#endif
197
#endif
198
199
  std::vector<const AST::CompositeType *> CompositeTypes;
200
  std::vector<LLVM::Value> FunctionWrappers;
201
  std::vector<std::tuple<uint32_t, LLVM::FunctionCallee,
202
                         const WasmEdge::AST::CodeSegment *>>
203
      Functions;
204
  std::vector<LLVM::Type> Globals;
205
  LLVM::Value IntrinsicsTable;
206
  LLVM::FunctionCallee Trap;
207
  CompileContext(LLVM::Context C, LLVM::Module &M,
208
                 bool IsGenericBinary) noexcept
209
1.96k
      : LLContext(C), LLModule(M),
210
1.96k
        Cold(LLVM::Attribute::createEnum(C, LLVM::Core::Cold, 0)),
211
1.96k
        NoAlias(LLVM::Attribute::createEnum(C, LLVM::Core::NoAlias, 0)),
212
1.96k
        NoInline(LLVM::Attribute::createEnum(C, LLVM::Core::NoInline, 0)),
213
1.96k
        NoReturn(LLVM::Attribute::createEnum(C, LLVM::Core::NoReturn, 0)),
214
1.96k
        ReadOnly(LLVM::Attribute::createEnum(C, LLVM::Core::ReadOnly, 0)),
215
1.96k
        StrictFP(LLVM::Attribute::createEnum(C, LLVM::Core::StrictFP, 0)),
216
1.96k
        UWTable(LLVM::Attribute::createEnum(C, LLVM::Core::UWTable,
217
1.96k
                                            LLVM::Core::UWTableDefault)),
218
        NoStackArgProbe(
219
1.96k
            LLVM::Attribute::createString(C, "no-stack-arg-probe"sv, {})),
220
1.96k
        VoidTy(LLContext.getVoidTy()), Int8Ty(LLContext.getInt8Ty()),
221
1.96k
        Int16Ty(LLContext.getInt16Ty()), Int32Ty(LLContext.getInt32Ty()),
222
1.96k
        Int64Ty(LLContext.getInt64Ty()), Int128Ty(LLContext.getInt128Ty()),
223
1.96k
        FloatTy(LLContext.getFloatTy()), DoubleTy(LLContext.getDoubleTy()),
224
1.96k
        Int8x16Ty(LLVM::Type::getVectorType(Int8Ty, 16)),
225
1.96k
        Int16x8Ty(LLVM::Type::getVectorType(Int16Ty, 8)),
226
1.96k
        Int32x4Ty(LLVM::Type::getVectorType(Int32Ty, 4)),
227
1.96k
        Floatx4Ty(LLVM::Type::getVectorType(FloatTy, 4)),
228
1.96k
        Int64x2Ty(LLVM::Type::getVectorType(Int64Ty, 2)),
229
1.96k
        Doublex2Ty(LLVM::Type::getVectorType(DoubleTy, 2)),
230
1.96k
        Int128x1Ty(LLVM::Type::getVectorType(Int128Ty, 1)),
231
1.96k
        Int8PtrTy(Int8Ty.getPointerTo()), Int32PtrTy(Int32Ty.getPointerTo()),
232
1.96k
        Int64PtrTy(Int64Ty.getPointerTo()),
233
1.96k
        Int128PtrTy(Int128Ty.getPointerTo()),
234
1.96k
        Int8PtrPtrTy(Int8PtrTy.getPointerTo()),
235
1.96k
        ExecCtxTy(LLVM::Type::getStructType(
236
1.96k
            "ExecCtx",
237
1.96k
            std::initializer_list<LLVM::Type>{
238
                // Memory
239
1.96k
                Int8PtrTy.getPointerTo(),
240
                // Globals
241
1.96k
                Int128PtrTy.getPointerTo(),
242
                // InstrCount
243
1.96k
                Int64PtrTy,
244
                // CostTable
245
1.96k
                LLVM::Type::getArrayType(Int64Ty, UINT16_MAX + 1)
246
1.96k
                    .getPointerTo(),
247
                // Gas
248
1.96k
                Int64PtrTy,
249
                // GasLimit
250
1.96k
                Int64Ty,
251
                // StopToken
252
1.96k
                Int32PtrTy,
253
1.96k
            })),
254
1.96k
        ExecCtxPtrTy(ExecCtxTy.getPointerTo()),
255
1.96k
        IntrinsicsTableTy(LLVM::Type::getArrayType(
256
1.96k
            Int8PtrTy,
257
1.96k
            static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax))),
258
1.96k
        IntrinsicsTablePtrTy(IntrinsicsTableTy.getPointerTo()),
259
1.96k
        IntrinsicsTable(LLModule.addGlobal(IntrinsicsTablePtrTy, true,
260
1.96k
                                           LLVMExternalLinkage, LLVM::Value(),
261
1.96k
                                           "intrinsics")) {
262
1.96k
    Trap.Ty = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
263
1.96k
    Trap.Fn = LLModule.addFunction(Trap.Ty, LLVMPrivateLinkage, "trap");
264
1.96k
    Trap.Fn.setDSOLocal(true);
265
1.96k
    Trap.Fn.addFnAttr(NoStackArgProbe);
266
1.96k
    Trap.Fn.addFnAttr(StrictFP);
267
1.96k
    Trap.Fn.addFnAttr(UWTable);
268
1.96k
    Trap.Fn.addFnAttr(NoReturn);
269
1.96k
    Trap.Fn.addFnAttr(Cold);
270
1.96k
    Trap.Fn.addFnAttr(NoInline);
271
272
1.96k
    LLModule.addGlobal(Int32Ty, true, LLVMExternalLinkage,
273
1.96k
                       LLVM::Value::getConstInt(Int32Ty, AOT::kBinaryVersion),
274
1.96k
                       "version");
275
276
1.96k
    if (!IsGenericBinary) {
277
1.96k
      SubtargetFeatures = LLVM::getHostCPUFeatures();
278
1.96k
      auto Features = SubtargetFeatures.string_view();
279
170k
      while (!Features.empty()) {
280
168k
        std::string_view Feature;
281
168k
        if (auto Pos = Features.find(','); Pos != std::string_view::npos) {
282
166k
          Feature = Features.substr(0, Pos);
283
166k
          Features = Features.substr(Pos + 1);
284
166k
        } else {
285
1.96k
          Feature = std::exchange(Features, std::string_view());
286
1.96k
        }
287
168k
        if (Feature[0] != '+') {
288
94.2k
          continue;
289
94.2k
        }
290
74.6k
        Feature = Feature.substr(1);
291
292
74.6k
#if defined(__x86_64__)
293
74.6k
        if (!SupportXOP && Feature == "xop"sv) {
294
0
          SupportXOP = true;
295
0
        }
296
74.6k
        if (!SupportSSE4_1 && Feature == "sse4.1"sv) {
297
1.96k
          SupportSSE4_1 = true;
298
1.96k
        }
299
74.6k
        if (!SupportSSSE3 && Feature == "ssse3"sv) {
300
1.96k
          SupportSSSE3 = true;
301
1.96k
        }
302
74.6k
        if (!SupportSSE2 && Feature == "sse2"sv) {
303
0
          SupportSSE2 = true;
304
0
        }
305
#elif defined(__aarch64__)
306
        if (!SupportNEON && Feature == "neon"sv) {
307
          SupportNEON = true;
308
        }
309
#endif
310
74.6k
      }
311
1.96k
    }
312
313
1.96k
    {
314
      // create trap
315
1.96k
      LLVM::Builder Builder(LLContext);
316
1.96k
      Builder.positionAtEnd(
317
1.96k
          LLVM::BasicBlock::create(LLContext, Trap.Fn, "entry"));
318
1.96k
      auto FnTy = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
319
1.96k
      auto CallTrap = Builder.createCall(
320
1.96k
          getIntrinsic(Builder, Executable::Intrinsics::kTrap, FnTy),
321
1.96k
          {Trap.Fn.getFirstParam()});
322
1.96k
      CallTrap.addCallSiteAttribute(NoReturn);
323
1.96k
      Builder.createUnreachable();
324
1.96k
    }
325
1.96k
  }
326
  LLVM::Value getMemory(LLVM::Builder &Builder, LLVM::Value ExecCtx,
327
20.8k
                        uint32_t Index) noexcept {
328
20.8k
    auto Array = Builder.createExtractValue(ExecCtx, 0);
329
#if WASMEDGE_ALLOCATOR_IS_STABLE
330
    auto VPtr = Builder.createLoad(
331
        Int8PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
332
                                              LLContext.getInt64(Index)));
333
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
334
                     LLVM::Metadata(LLContext, {}));
335
#else
336
20.8k
    auto VPtrPtr = Builder.createLoad(
337
20.8k
        Int8PtrPtrTy, Builder.createInBoundsGEP1(Int8PtrPtrTy, Array,
338
20.8k
                                                 LLContext.getInt64(Index)));
339
20.8k
    VPtrPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
340
20.8k
                        LLVM::Metadata(LLContext, {}));
341
20.8k
    auto VPtr = Builder.createLoad(
342
20.8k
        Int8PtrTy,
343
20.8k
        Builder.createInBoundsGEP1(Int8PtrTy, VPtrPtr, LLContext.getInt64(0)));
344
20.8k
#endif
345
20.8k
    return Builder.createBitCast(VPtr, Int8PtrTy);
346
20.8k
  }
347
  std::pair<LLVM::Type, LLVM::Value> getGlobal(LLVM::Builder &Builder,
348
                                               LLVM::Value ExecCtx,
349
304
                                               uint32_t Index) noexcept {
350
304
    auto Ty = Globals[Index];
351
304
    auto Array = Builder.createExtractValue(ExecCtx, 1);
352
304
    auto VPtr = Builder.createLoad(
353
304
        Int128PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
354
304
                                                LLContext.getInt64(Index)));
355
304
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
356
304
                     LLVM::Metadata(LLContext, {}));
357
304
    auto Ptr = Builder.createBitCast(VPtr, Ty.getPointerTo());
358
304
    return {Ty, Ptr};
359
304
  }
360
  LLVM::Value getInstrCount(LLVM::Builder &Builder,
361
0
                            LLVM::Value ExecCtx) noexcept {
362
0
    return Builder.createExtractValue(ExecCtx, 2);
363
0
  }
364
  LLVM::Value getCostTable(LLVM::Builder &Builder,
365
0
                           LLVM::Value ExecCtx) noexcept {
366
0
    return Builder.createExtractValue(ExecCtx, 3);
367
0
  }
368
0
  LLVM::Value getGas(LLVM::Builder &Builder, LLVM::Value ExecCtx) noexcept {
369
0
    return Builder.createExtractValue(ExecCtx, 4);
370
0
  }
371
  LLVM::Value getGasLimit(LLVM::Builder &Builder,
372
0
                          LLVM::Value ExecCtx) noexcept {
373
0
    return Builder.createExtractValue(ExecCtx, 5);
374
0
  }
375
  LLVM::Value getStopToken(LLVM::Builder &Builder,
376
0
                           LLVM::Value ExecCtx) noexcept {
377
0
    return Builder.createExtractValue(ExecCtx, 6);
378
0
  }
379
  LLVM::FunctionCallee getIntrinsic(LLVM::Builder &Builder,
380
                                    Executable::Intrinsics Index,
381
5.82k
                                    LLVM::Type Ty) noexcept {
382
5.82k
    const auto Value = static_cast<uint32_t>(Index);
383
5.82k
    auto PtrTy = Ty.getPointerTo();
384
5.82k
    auto PtrPtrTy = PtrTy.getPointerTo();
385
5.82k
    auto IT = Builder.createLoad(IntrinsicsTablePtrTy, IntrinsicsTable);
386
5.82k
    IT.setMetadata(LLContext, LLVM::Core::InvariantGroup,
387
5.82k
                   LLVM::Metadata(LLContext, {}));
388
5.82k
    auto VPtr =
389
5.82k
        Builder.createInBoundsGEP2(IntrinsicsTableTy, IT, LLContext.getInt64(0),
390
5.82k
                                   LLContext.getInt64(Value));
391
5.82k
    auto Ptr = Builder.createBitCast(VPtr, PtrPtrTy);
392
5.82k
    return {Ty, Builder.createLoad(PtrTy, Ptr)};
393
5.82k
  }
394
  std::pair<std::vector<ValType>, std::vector<ValType>>
395
16.7k
  resolveBlockType(const BlockType &BType) const noexcept {
396
16.7k
    using VecT = std::vector<ValType>;
397
16.7k
    using RetT = std::pair<VecT, VecT>;
398
16.7k
    if (BType.isEmpty()) {
399
1.91k
      return RetT{};
400
1.91k
    }
401
14.8k
    if (BType.isValType()) {
402
2.32k
      return RetT{{}, {BType.getValType()}};
403
12.4k
    } else {
404
      // Type index case. t2* = type[index].returns
405
12.4k
      const uint32_t TypeIdx = BType.getTypeIndex();
406
12.4k
      const auto &FType = CompositeTypes[TypeIdx]->getFuncType();
407
12.4k
      return RetT{
408
12.4k
          VecT(FType.getParamTypes().begin(), FType.getParamTypes().end()),
409
12.4k
          VecT(FType.getReturnTypes().begin(), FType.getReturnTypes().end())};
410
12.4k
    }
411
14.8k
  }
412
};
413
414
namespace {
415
416
using namespace WasmEdge;
417
418
31.1k
static bool isVoidReturn(Span<const ValType> ValTypes) noexcept {
419
31.1k
  return ValTypes.empty();
420
31.1k
}
421
422
static LLVM::Type toLLVMType(LLVM::Context LLContext,
423
2.46M
                             const ValType &ValType) noexcept {
424
2.46M
  switch (ValType.getCode()) {
425
491k
  case TypeCode::I32:
426
491k
    return LLContext.getInt32Ty();
427
200k
  case TypeCode::I64:
428
200k
    return LLContext.getInt64Ty();
429
0
  case TypeCode::Ref:
430
22.4k
  case TypeCode::RefNull:
431
1.70M
  case TypeCode::V128:
432
1.70M
    return LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2);
433
54.5k
  case TypeCode::F32:
434
54.5k
    return LLContext.getFloatTy();
435
20.2k
  case TypeCode::F64:
436
20.2k
    return LLContext.getDoubleTy();
437
0
  default:
438
0
    assumingUnreachable();
439
2.46M
  }
440
2.46M
}
441
442
static std::vector<LLVM::Type>
443
toLLVMTypeVector(LLVM::Context LLContext,
444
17.7k
                 Span<const ValType> ValTypes) noexcept {
445
17.7k
  std::vector<LLVM::Type> Result;
446
17.7k
  Result.reserve(ValTypes.size());
447
17.7k
  for (const auto &Type : ValTypes) {
448
17.2k
    Result.push_back(toLLVMType(LLContext, Type));
449
17.2k
  }
450
17.7k
  return Result;
451
17.7k
}
452
453
static std::vector<LLVM::Type>
454
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
455
14.3k
               Span<const ValType> ValTypes) noexcept {
456
14.3k
  auto Result = toLLVMTypeVector(LLContext, ValTypes);
457
14.3k
  Result.insert(Result.begin(), ExecCtxPtrTy);
458
14.3k
  return Result;
459
14.3k
}
460
461
static LLVM::Type toLLVMRetsType(LLVM::Context LLContext,
462
14.3k
                                 Span<const ValType> ValTypes) noexcept {
463
14.3k
  if (isVoidReturn(ValTypes)) {
464
3.35k
    return LLContext.getVoidTy();
465
3.35k
  }
466
11.0k
  if (ValTypes.size() == 1) {
467
10.4k
    return toLLVMType(LLContext, ValTypes.front());
468
10.4k
  }
469
578
  std::vector<LLVM::Type> Result;
470
578
  Result.reserve(ValTypes.size());
471
1.57k
  for (const auto &Type : ValTypes) {
472
1.57k
    Result.push_back(toLLVMType(LLContext, Type));
473
1.57k
  }
474
578
  return LLVM::Type::getStructType(Result);
475
11.0k
}
476
477
static LLVM::Type toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
478
14.3k
                             const AST::FunctionType &FuncType) noexcept {
479
14.3k
  auto ArgsTy =
480
14.3k
      toLLVMArgsType(LLContext, ExecCtxPtrTy, FuncType.getParamTypes());
481
14.3k
  auto RetTy = toLLVMRetsType(LLContext, FuncType.getReturnTypes());
482
14.3k
  return LLVM::Type::getFunctionType(RetTy, ArgsTy);
483
14.3k
}
484
485
static LLVM::Value toLLVMConstantZero(LLVM::Context LLContext,
486
2.43M
                                      const ValType &ValType) noexcept {
487
2.43M
  switch (ValType.getCode()) {
488
475k
  case TypeCode::I32:
489
475k
    return LLVM::Value::getConstNull(LLContext.getInt32Ty());
490
197k
  case TypeCode::I64:
491
197k
    return LLVM::Value::getConstNull(LLContext.getInt64Ty());
492
0
  case TypeCode::Ref:
493
21.9k
  case TypeCode::RefNull:
494
1.69M
  case TypeCode::V128:
495
1.69M
    return LLVM::Value::getConstNull(
496
1.69M
        LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2));
497
52.3k
  case TypeCode::F32:
498
52.3k
    return LLVM::Value::getConstNull(LLContext.getFloatTy());
499
17.7k
  case TypeCode::F64:
500
17.7k
    return LLVM::Value::getConstNull(LLContext.getDoubleTy());
501
0
  default:
502
0
    assumingUnreachable();
503
2.43M
  }
504
2.43M
}
505
506
class FunctionCompiler {
507
  struct Control;
508
509
public:
510
  FunctionCompiler(LLVM::Compiler::CompileContext &Context,
511
                   LLVM::FunctionCallee F, Span<const ValType> Locals,
512
                   bool Interruptible, bool InstructionCounting,
513
                   bool GasMeasuring) noexcept
514
9.83k
      : Context(Context), LLContext(Context.LLContext),
515
9.83k
        Interruptible(Interruptible), F(F), Builder(LLContext) {
516
9.83k
    if (F.Fn) {
517
9.83k
      Builder.positionAtEnd(LLVM::BasicBlock::create(LLContext, F.Fn, "entry"));
518
9.83k
      ExecCtx = Builder.createLoad(Context.ExecCtxTy, F.Fn.getFirstParam());
519
520
9.83k
      if (InstructionCounting) {
521
0
        LocalInstrCount = Builder.createAlloca(Context.Int64Ty);
522
0
        Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
523
0
      }
524
525
9.83k
      if (GasMeasuring) {
526
0
        LocalGas = Builder.createAlloca(Context.Int64Ty);
527
0
        Builder.createStore(LLContext.getInt64(0), LocalGas);
528
0
      }
529
530
18.5k
      for (LLVM::Value Arg = F.Fn.getFirstParam().getNextParam(); Arg;
531
9.83k
           Arg = Arg.getNextParam()) {
532
8.71k
        LLVM::Type Ty = Arg.getType();
533
8.71k
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
534
8.71k
        Builder.createStore(Arg, ArgPtr);
535
8.71k
        Local.emplace_back(Ty, ArgPtr);
536
8.71k
      }
537
538
2.43M
      for (const auto &Type : Locals) {
539
2.43M
        LLVM::Type Ty = toLLVMType(LLContext, Type);
540
2.43M
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
541
2.43M
        Builder.createStore(toLLVMConstantZero(LLContext, Type), ArgPtr);
542
2.43M
        Local.emplace_back(Ty, ArgPtr);
543
2.43M
      }
544
9.83k
    }
545
9.83k
  }
546
547
30.3k
  LLVM::BasicBlock getTrapBB(ErrCode::Value Error) noexcept {
548
30.3k
    if (auto Iter = TrapBB.find(Error); Iter != TrapBB.end()) {
549
27.5k
      return Iter->second;
550
27.5k
    }
551
2.80k
    auto BB = LLVM::BasicBlock::create(LLContext, F.Fn, "trap");
552
2.80k
    TrapBB.emplace(Error, BB);
553
2.80k
    return BB;
554
30.3k
  }
555
556
  void
557
  compile(const AST::CodeSegment &Code,
558
9.83k
          std::pair<std::vector<ValType>, std::vector<ValType>> Type) noexcept {
559
9.83k
    auto RetBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ret");
560
9.83k
    Type.first.clear();
561
9.83k
    enterBlock(RetBB, {}, {}, {}, std::move(Type));
562
9.83k
    compile(Code.getExpr().getInstrs());
563
9.83k
    assuming(ControlStack.empty());
564
9.83k
    compileReturn();
565
566
9.83k
    for (auto &[Error, BB] : TrapBB) {
567
2.80k
      Builder.positionAtEnd(BB);
568
2.80k
      updateInstrCount();
569
2.80k
      updateGasAtTrap();
570
2.80k
      auto CallTrap = Builder.createCall(
571
2.80k
          Context.Trap, {LLContext.getInt32(static_cast<uint32_t>(Error))});
572
2.80k
      CallTrap.addCallSiteAttribute(Context.NoReturn);
573
2.80k
      Builder.createUnreachable();
574
2.80k
    }
575
9.83k
  }
576
577
9.83k
  void compile(AST::InstrView Instrs) noexcept {
578
1.44M
    auto Dispatch = [this](const AST::Instruction &Instr) -> void {
579
1.44M
      switch (Instr.getOpCode()) {
580
      // Control instructions (for blocks)
581
3.17k
      case OpCode::Block: {
582
3.17k
        auto Block = LLVM::BasicBlock::create(LLContext, F.Fn, "block");
583
3.17k
        auto EndBlock = LLVM::BasicBlock::create(LLContext, F.Fn, "block.end");
584
3.17k
        Builder.createBr(Block);
585
586
3.17k
        Builder.positionAtEnd(Block);
587
3.17k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
588
3.17k
        const auto Arity = Type.first.size();
589
3.17k
        std::vector<LLVM::Value> Args(Arity);
590
3.17k
        if (isUnreachable()) {
591
749
          for (size_t I = 0; I < Arity; ++I) {
592
251
            auto Ty = toLLVMType(LLContext, Type.first[I]);
593
251
            Args[I] = LLVM::Value::getUndef(Ty);
594
251
          }
595
2.67k
        } else {
596
3.12k
          for (size_t I = 0; I < Arity; ++I) {
597
451
            const size_t J = Arity - 1 - I;
598
451
            Args[J] = stackPop();
599
451
          }
600
2.67k
        }
601
3.17k
        enterBlock(EndBlock, {}, {}, std::move(Args), std::move(Type));
602
3.17k
        checkStop();
603
3.17k
        updateGas();
604
3.17k
        return;
605
0
      }
606
1.53k
      case OpCode::Loop: {
607
1.53k
        auto Curr = Builder.getInsertBlock();
608
1.53k
        auto Loop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop");
609
1.53k
        auto EndLoop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop.end");
610
1.53k
        Builder.createBr(Loop);
611
612
1.53k
        Builder.positionAtEnd(Loop);
613
1.53k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
614
1.53k
        const auto Arity = Type.first.size();
615
1.53k
        std::vector<LLVM::Value> Args(Arity);
616
1.53k
        if (isUnreachable()) {
617
752
          for (size_t I = 0; I < Arity; ++I) {
618
299
            auto Ty = toLLVMType(LLContext, Type.first[I]);
619
299
            auto Value = LLVM::Value::getUndef(Ty);
620
299
            auto PHINode = Builder.createPHI(Ty);
621
299
            PHINode.addIncoming(Value, Curr);
622
299
            Args[I] = PHINode;
623
299
          }
624
1.07k
        } else {
625
1.55k
          for (size_t I = 0; I < Arity; ++I) {
626
475
            const size_t J = Arity - 1 - I;
627
475
            auto Value = stackPop();
628
475
            auto PHINode = Builder.createPHI(Value.getType());
629
475
            PHINode.addIncoming(Value, Curr);
630
475
            Args[J] = PHINode;
631
475
          }
632
1.07k
        }
633
1.53k
        enterBlock(Loop, EndLoop, {}, std::move(Args), std::move(Type));
634
1.53k
        checkStop();
635
1.53k
        updateGas();
636
1.53k
        return;
637
0
      }
638
2.20k
      case OpCode::If: {
639
2.20k
        auto Then = LLVM::BasicBlock::create(LLContext, F.Fn, "then");
640
2.20k
        auto Else = LLVM::BasicBlock::create(LLContext, F.Fn, "else");
641
2.20k
        auto EndIf = LLVM::BasicBlock::create(LLContext, F.Fn, "if.end");
642
2.20k
        LLVM::Value Cond;
643
2.20k
        if (isUnreachable()) {
644
439
          Cond = LLVM::Value::getUndef(LLContext.getInt1Ty());
645
1.76k
        } else {
646
1.76k
          Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
647
1.76k
        }
648
2.20k
        Builder.createCondBr(Cond, Then, Else);
649
650
2.20k
        Builder.positionAtEnd(Then);
651
2.20k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
652
2.20k
        const auto Arity = Type.first.size();
653
2.20k
        std::vector<LLVM::Value> Args(Arity);
654
2.20k
        if (isUnreachable()) {
655
894
          for (size_t I = 0; I < Arity; ++I) {
656
455
            auto Ty = toLLVMType(LLContext, Type.first[I]);
657
455
            Args[I] = LLVM::Value::getUndef(Ty);
658
455
          }
659
1.76k
        } else {
660
2.51k
          for (size_t I = 0; I < Arity; ++I) {
661
756
            const size_t J = Arity - 1 - I;
662
756
            Args[J] = stackPop();
663
756
          }
664
1.76k
        }
665
2.20k
        enterBlock(EndIf, {}, Else, std::move(Args), std::move(Type));
666
2.20k
        return;
667
0
      }
668
16.7k
      case OpCode::End: {
669
16.7k
        auto Entry = leaveBlock();
670
16.7k
        if (Entry.ElseBlock) {
671
894
          auto Block = Builder.getInsertBlock();
672
894
          Builder.positionAtEnd(Entry.ElseBlock);
673
894
          enterBlock(Block, {}, {}, std::move(Entry.Args),
674
894
                     std::move(Entry.Type), std::move(Entry.ReturnPHI));
675
894
          Entry = leaveBlock();
676
894
        }
677
16.7k
        buildPHI(Entry.Type.second, Entry.ReturnPHI);
678
16.7k
        return;
679
0
      }
680
1.30k
      case OpCode::Else: {
681
1.30k
        auto Entry = leaveBlock();
682
1.30k
        Builder.positionAtEnd(Entry.ElseBlock);
683
1.30k
        enterBlock(Entry.JumpBlock, {}, {}, std::move(Entry.Args),
684
1.30k
                   std::move(Entry.Type), std::move(Entry.ReturnPHI));
685
1.30k
        return;
686
0
      }
687
1.41M
      default:
688
1.41M
        break;
689
1.44M
      }
690
691
1.41M
      if (isUnreachable()) {
692
438k
        return;
693
438k
      }
694
695
979k
      switch (Instr.getOpCode()) {
696
      // Control instructions
697
2.73k
      case OpCode::Unreachable:
698
2.73k
        Builder.createBr(getTrapBB(ErrCode::Value::Unreachable));
699
2.73k
        setUnreachable();
700
2.73k
        Builder.positionAtEnd(
701
2.73k
            LLVM::BasicBlock::create(LLContext, F.Fn, "unreachable.end"));
702
2.73k
        break;
703
37.3k
      case OpCode::Nop:
704
37.3k
        break;
705
      // LEGACY-EH: remove the `Try` cases after deprecating legacy EH.
706
      // case OpCode::Try:
707
      // case OpCode::Throw:
708
      // case OpCode::Throw_ref:
709
700
      case OpCode::Br: {
710
700
        const auto Label = Instr.getJump().TargetIndex;
711
700
        setLableJumpPHI(Label);
712
700
        Builder.createBr(getLabel(Label));
713
700
        setUnreachable();
714
700
        Builder.positionAtEnd(
715
700
            LLVM::BasicBlock::create(LLContext, F.Fn, "br.end"));
716
700
        break;
717
0
      }
718
340
      case OpCode::Br_if: {
719
340
        const auto Label = Instr.getJump().TargetIndex;
720
340
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
721
340
        setLableJumpPHI(Label);
722
340
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_if.end");
723
340
        Builder.createCondBr(Cond, getLabel(Label), Next);
724
340
        Builder.positionAtEnd(Next);
725
340
        break;
726
0
      }
727
966
      case OpCode::Br_table: {
728
966
        auto LabelTable = Instr.getLabelList();
729
966
        assuming(LabelTable.size() <= std::numeric_limits<uint32_t>::max());
730
966
        const auto LabelTableSize =
731
966
            static_cast<uint32_t>(LabelTable.size() - 1);
732
966
        auto Value = stackPop();
733
966
        setLableJumpPHI(LabelTable[LabelTableSize].TargetIndex);
734
966
        auto Switch = Builder.createSwitch(
735
966
            Value, getLabel(LabelTable[LabelTableSize].TargetIndex),
736
966
            LabelTableSize);
737
36.6k
        for (uint32_t I = 0; I < LabelTableSize; ++I) {
738
35.7k
          setLableJumpPHI(LabelTable[I].TargetIndex);
739
35.7k
          Switch.addCase(LLContext.getInt32(I),
740
35.7k
                         getLabel(LabelTable[I].TargetIndex));
741
35.7k
        }
742
966
        setUnreachable();
743
966
        Builder.positionAtEnd(
744
966
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_table.end"));
745
966
        break;
746
966
      }
747
0
      case OpCode::Br_on_null: {
748
0
        const auto Label = Instr.getJump().TargetIndex;
749
0
        auto Value = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
750
0
        auto Cond = Builder.createICmpEQ(
751
0
            Builder.createExtractElement(Value, LLContext.getInt64(1)),
752
0
            LLContext.getInt64(0));
753
0
        setLableJumpPHI(Label);
754
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_null.end");
755
0
        Builder.createCondBr(Cond, getLabel(Label), Next);
756
0
        Builder.positionAtEnd(Next);
757
0
        stackPush(Value);
758
0
        break;
759
966
      }
760
0
      case OpCode::Br_on_non_null: {
761
0
        const auto Label = Instr.getJump().TargetIndex;
762
0
        auto Cond = Builder.createICmpNE(
763
0
            Builder.createExtractElement(
764
0
                Builder.createBitCast(Stack.back(), Context.Int64x2Ty),
765
0
                LLContext.getInt64(1)),
766
0
            LLContext.getInt64(0));
767
0
        setLableJumpPHI(Label);
768
0
        auto Next =
769
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_non_null.end");
770
0
        Builder.createCondBr(Cond, getLabel(Label), Next);
771
0
        Builder.positionAtEnd(Next);
772
0
        stackPop();
773
0
        break;
774
966
      }
775
0
      case OpCode::Br_on_cast:
776
0
      case OpCode::Br_on_cast_fail: {
777
0
        auto Ref = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
778
0
        const auto Label = Instr.getBrCast().Jump.TargetIndex;
779
0
        std::array<uint8_t, 16> Buf = {0};
780
0
        std::copy_n(Instr.getBrCast().RType2.getRawData().cbegin(), 8,
781
0
                    Buf.begin());
782
0
        auto VType = Builder.createExtractElement(
783
0
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
784
0
                                  Context.Int64x2Ty),
785
0
            LLContext.getInt64(0));
786
0
        auto IsRefTest = Builder.createCall(
787
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
788
0
                                 LLVM::Type::getFunctionType(
789
0
                                     Context.Int32Ty,
790
0
                                     {Context.Int64x2Ty, Context.Int64Ty},
791
0
                                     false)),
792
0
            {Ref, VType});
793
0
        auto Cond =
794
0
            (Instr.getOpCode() == OpCode::Br_on_cast)
795
0
                ? Builder.createICmpNE(IsRefTest, LLContext.getInt32(0))
796
0
                : Builder.createICmpEQ(IsRefTest, LLContext.getInt32(0));
797
0
        setLableJumpPHI(Label);
798
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_cast.end");
799
0
        Builder.createCondBr(Cond, getLabel(Label), Next);
800
0
        Builder.positionAtEnd(Next);
801
0
        break;
802
0
      }
803
663
      case OpCode::Return:
804
663
        compileReturn();
805
663
        setUnreachable();
806
663
        Builder.positionAtEnd(
807
663
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret.end"));
808
663
        break;
809
3.61k
      case OpCode::Call:
810
3.61k
        updateInstrCount();
811
3.61k
        updateGas();
812
3.61k
        compileCallOp(Instr.getTargetIndex());
813
3.61k
        break;
814
593
      case OpCode::Call_indirect:
815
593
        updateInstrCount();
816
593
        updateGas();
817
593
        compileIndirectCallOp(Instr.getSourceIndex(), Instr.getTargetIndex());
818
593
        break;
819
0
      case OpCode::Return_call:
820
0
        updateInstrCount();
821
0
        updateGas();
822
0
        compileReturnCallOp(Instr.getTargetIndex());
823
0
        setUnreachable();
824
0
        Builder.positionAtEnd(
825
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call.end"));
826
0
        break;
827
0
      case OpCode::Return_call_indirect:
828
0
        updateInstrCount();
829
0
        updateGas();
830
0
        compileReturnIndirectCallOp(Instr.getSourceIndex(),
831
0
                                    Instr.getTargetIndex());
832
0
        setUnreachable();
833
0
        Builder.positionAtEnd(
834
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_indir.end"));
835
0
        break;
836
0
      case OpCode::Call_ref:
837
0
        updateInstrCount();
838
0
        updateGas();
839
0
        compileCallRefOp(Instr.getTargetIndex());
840
0
        break;
841
0
      case OpCode::Return_call_ref:
842
0
        updateInstrCount();
843
0
        updateGas();
844
0
        compileReturnCallRefOp(Instr.getTargetIndex());
845
0
        setUnreachable();
846
0
        Builder.positionAtEnd(
847
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_ref.end"));
848
0
        break;
849
        // LEGACY-EH: remove the `Catch` cases after deprecating legacy EH.
850
        // case OpCode::Catch:
851
        // case OpCode::Catch_all:
852
        // case OpCode::Try_table:
853
854
      // Reference Instructions
855
808
      case OpCode::Ref__null: {
856
808
        std::array<uint8_t, 16> Buf = {0};
857
        // For null references, the dynamic type down scaling is needed.
858
808
        ValType VType;
859
808
        if (Instr.getValType().isAbsHeapType()) {
860
808
          switch (Instr.getValType().getHeapTypeCode()) {
861
0
          case TypeCode::NullFuncRef:
862
355
          case TypeCode::FuncRef:
863
355
            VType = TypeCode::NullFuncRef;
864
355
            break;
865
0
          case TypeCode::NullExternRef:
866
453
          case TypeCode::ExternRef:
867
453
            VType = TypeCode::NullExternRef;
868
453
            break;
869
0
          case TypeCode::NullRef:
870
0
          case TypeCode::AnyRef:
871
0
          case TypeCode::EqRef:
872
0
          case TypeCode::I31Ref:
873
0
          case TypeCode::StructRef:
874
0
          case TypeCode::ArrayRef:
875
0
            VType = TypeCode::NullRef;
876
0
            break;
877
0
          default:
878
0
            assumingUnreachable();
879
808
          }
880
808
        } else {
881
0
          assuming(Instr.getValType().getTypeIndex() <
882
0
                   Context.CompositeTypes.size());
883
0
          const auto *CompType =
884
0
              Context.CompositeTypes[Instr.getValType().getTypeIndex()];
885
0
          assuming(CompType != nullptr);
886
0
          if (CompType->isFunc()) {
887
0
            VType = TypeCode::NullFuncRef;
888
0
          } else {
889
0
            VType = TypeCode::NullRef;
890
0
          }
891
0
        }
892
808
        std::copy_n(VType.getRawData().cbegin(), 8, Buf.begin());
893
808
        stackPush(Builder.createBitCast(
894
808
            LLVM::Value::getConstVector8(LLContext, Buf), Context.Int64x2Ty));
895
808
        break;
896
808
      }
897
417
      case OpCode::Ref__is_null:
898
417
        stackPush(Builder.createZExt(
899
417
            Builder.createICmpEQ(
900
417
                Builder.createExtractElement(
901
417
                    Builder.createBitCast(stackPop(), Context.Int64x2Ty),
902
417
                    LLContext.getInt64(1)),
903
417
                LLContext.getInt64(0)),
904
417
            Context.Int32Ty));
905
417
        break;
906
28
      case OpCode::Ref__func:
907
28
        stackPush(Builder.createCall(
908
28
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefFunc,
909
28
                                 LLVM::Type::getFunctionType(Context.Int64x2Ty,
910
28
                                                             {Context.Int32Ty},
911
28
                                                             false)),
912
28
            {LLContext.getInt32(Instr.getTargetIndex())}));
913
28
        break;
914
0
      case OpCode::Ref__eq: {
915
0
        LLVM::Value RHS = stackPop();
916
0
        LLVM::Value LHS = stackPop();
917
0
        stackPush(Builder.createZExt(
918
0
            Builder.createICmpEQ(
919
0
                Builder.createExtractElement(LHS, LLContext.getInt64(1)),
920
0
                Builder.createExtractElement(RHS, LLContext.getInt64(1))),
921
0
            Context.Int32Ty));
922
0
        break;
923
808
      }
924
0
      case OpCode::Ref__as_non_null: {
925
0
        auto Next =
926
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "ref_as_non_null.ok");
927
0
        Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
928
0
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
929
0
            Builder.createExtractElement(Stack.back(), LLContext.getInt64(1)),
930
0
            LLContext.getInt64(0)));
931
0
        Builder.createCondBr(IsNotNull, Next,
932
0
                             getTrapBB(ErrCode::Value::CastNullToNonNull));
933
0
        Builder.positionAtEnd(Next);
934
0
        break;
935
808
      }
936
937
      // Reference Instructions (GC proposal)
938
0
      case OpCode::Struct__new:
939
0
      case OpCode::Struct__new_default: {
940
0
        LLVM::Value Args = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
941
0
        assuming(Instr.getTargetIndex() < Context.CompositeTypes.size());
942
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
943
0
        assuming(CompType != nullptr && !CompType->isFunc());
944
0
        auto ArgSize = CompType->getFieldTypes().size();
945
0
        if (Instr.getOpCode() == OpCode::Struct__new) {
946
0
          std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
947
0
          for (size_t I = 0; I < ArgSize; ++I) {
948
0
            ArgsVec[ArgSize - I - 1] = stackPop();
949
0
          }
950
0
          Args = Builder.createArray(ArgSize, kValSize);
951
0
          Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
952
0
        } else {
953
0
          ArgSize = 0;
954
0
        }
955
0
        stackPush(Builder.createCall(
956
0
            Context.getIntrinsic(
957
0
                Builder, Executable::Intrinsics::kStructNew,
958
0
                LLVM::Type::getFunctionType(
959
0
                    Context.Int64x2Ty,
960
0
                    {Context.Int32Ty, Context.Int8PtrTy, Context.Int32Ty},
961
0
                    false)),
962
0
            {LLContext.getInt32(Instr.getTargetIndex()), Args,
963
0
             LLContext.getInt32(static_cast<uint32_t>(ArgSize))}));
964
0
        break;
965
0
      }
966
0
      case OpCode::Struct__get:
967
0
      case OpCode::Struct__get_u:
968
0
      case OpCode::Struct__get_s: {
969
0
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
970
0
                 Context.CompositeTypes.size());
971
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
972
0
        assuming(CompType != nullptr && !CompType->isFunc());
973
0
        assuming(static_cast<size_t>(Instr.getSourceIndex()) <
974
0
                 CompType->getFieldTypes().size());
975
0
        const auto &StorageType =
976
0
            CompType->getFieldTypes()[Instr.getSourceIndex()].getStorageType();
977
0
        auto Ref = stackPop();
978
0
        auto IsSigned = (Instr.getOpCode() == OpCode::Struct__get_s)
979
0
                            ? LLContext.getInt8(1)
980
0
                            : LLContext.getInt8(0);
981
0
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
982
0
        Builder.createCall(
983
0
            Context.getIntrinsic(
984
0
                Builder, Executable::Intrinsics::kStructGet,
985
0
                LLVM::Type::getFunctionType(Context.VoidTy,
986
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
987
0
                                             Context.Int32Ty, Context.Int8Ty,
988
0
                                             Context.Int8PtrTy},
989
0
                                            false)),
990
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
991
0
             LLContext.getInt32(Instr.getSourceIndex()), IsSigned, Ret});
992
993
0
        switch (StorageType.getCode()) {
994
0
        case TypeCode::I8:
995
0
        case TypeCode::I16:
996
0
        case TypeCode::I32: {
997
0
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
998
0
                                               Context.Int64x2Ty));
999
0
          break;
1000
0
        }
1001
0
        case TypeCode::I64: {
1002
0
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1003
0
                                               Context.Int64x2Ty));
1004
0
          break;
1005
0
        }
1006
0
        case TypeCode::F32: {
1007
0
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1008
0
                                               Context.Int64x2Ty));
1009
0
          break;
1010
0
        }
1011
0
        case TypeCode::F64: {
1012
0
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1013
0
                                               Context.Int64x2Ty));
1014
0
          break;
1015
0
        }
1016
0
        case TypeCode::V128:
1017
0
        case TypeCode::Ref:
1018
0
        case TypeCode::RefNull: {
1019
0
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1020
0
                                               Context.Int64x2Ty));
1021
0
          break;
1022
0
        }
1023
0
        default:
1024
0
          assumingUnreachable();
1025
0
        }
1026
0
        break;
1027
0
      }
1028
0
      case OpCode::Struct__set: {
1029
0
        auto Val = stackPop();
1030
0
        auto Ref = stackPop();
1031
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1032
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1033
0
        Builder.createCall(
1034
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kStructSet,
1035
0
                                 LLVM::Type::getFunctionType(
1036
0
                                     Context.VoidTy,
1037
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1038
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1039
0
                                     false)),
1040
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1041
0
             LLContext.getInt32(Instr.getSourceIndex()), Arg});
1042
0
        break;
1043
0
      }
1044
0
      case OpCode::Array__new: {
1045
0
        auto Length = stackPop();
1046
0
        auto Val = stackPop();
1047
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1048
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1049
0
        stackPush(Builder.createCall(
1050
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1051
0
                                 LLVM::Type::getFunctionType(
1052
0
                                     Context.Int64x2Ty,
1053
0
                                     {Context.Int32Ty, Context.Int32Ty,
1054
0
                                      Context.Int8PtrTy, Context.Int32Ty},
1055
0
                                     false)),
1056
0
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1057
0
             LLContext.getInt32(1)}));
1058
0
        break;
1059
0
      }
1060
0
      case OpCode::Array__new_default: {
1061
0
        auto Length = stackPop();
1062
0
        LLVM::Value Arg = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
1063
0
        stackPush(Builder.createCall(
1064
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1065
0
                                 LLVM::Type::getFunctionType(
1066
0
                                     Context.Int64x2Ty,
1067
0
                                     {Context.Int32Ty, Context.Int32Ty,
1068
0
                                      Context.Int8PtrTy, Context.Int32Ty},
1069
0
                                     false)),
1070
0
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1071
0
             LLContext.getInt32(0)}));
1072
0
        break;
1073
0
      }
1074
0
      case OpCode::Array__new_fixed: {
1075
0
        const auto ArgSize = Instr.getSourceIndex();
1076
0
        std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
1077
0
        for (size_t I = 0; I < ArgSize; ++I) {
1078
0
          ArgsVec[ArgSize - I - 1] = stackPop();
1079
0
        }
1080
0
        LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
1081
0
        Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
1082
0
        stackPush(Builder.createCall(
1083
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1084
0
                                 LLVM::Type::getFunctionType(
1085
0
                                     Context.Int64x2Ty,
1086
0
                                     {Context.Int32Ty, Context.Int32Ty,
1087
0
                                      Context.Int8PtrTy, Context.Int32Ty},
1088
0
                                     false)),
1089
0
            {LLContext.getInt32(Instr.getTargetIndex()),
1090
0
             LLContext.getInt32(ArgSize), Args, LLContext.getInt32(ArgSize)}));
1091
0
        break;
1092
0
      }
1093
0
      case OpCode::Array__new_data:
1094
0
      case OpCode::Array__new_elem: {
1095
0
        auto Length = stackPop();
1096
0
        auto Start = stackPop();
1097
0
        stackPush(Builder.createCall(
1098
0
            Context.getIntrinsic(
1099
0
                Builder,
1100
0
                ((Instr.getOpCode() == OpCode::Array__new_data)
1101
0
                     ? Executable::Intrinsics::kArrayNewData
1102
0
                     : Executable::Intrinsics::kArrayNewElem),
1103
0
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1104
0
                                            {Context.Int32Ty, Context.Int32Ty,
1105
0
                                             Context.Int32Ty, Context.Int32Ty},
1106
0
                                            false)),
1107
0
            {LLContext.getInt32(Instr.getTargetIndex()),
1108
0
             LLContext.getInt32(Instr.getSourceIndex()), Start, Length}));
1109
0
        break;
1110
0
      }
1111
0
      case OpCode::Array__get:
1112
0
      case OpCode::Array__get_u:
1113
0
      case OpCode::Array__get_s: {
1114
0
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
1115
0
                 Context.CompositeTypes.size());
1116
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
1117
0
        assuming(CompType != nullptr && !CompType->isFunc());
1118
0
        assuming(static_cast<size_t>(1) == CompType->getFieldTypes().size());
1119
0
        const auto &StorageType = CompType->getFieldTypes()[0].getStorageType();
1120
0
        auto Idx = stackPop();
1121
0
        auto Ref = stackPop();
1122
0
        auto IsSigned = (Instr.getOpCode() == OpCode::Array__get_s)
1123
0
                            ? LLContext.getInt8(1)
1124
0
                            : LLContext.getInt8(0);
1125
0
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
1126
0
        Builder.createCall(
1127
0
            Context.getIntrinsic(
1128
0
                Builder, Executable::Intrinsics::kArrayGet,
1129
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1130
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1131
0
                                             Context.Int32Ty, Context.Int8Ty,
1132
0
                                             Context.Int8PtrTy},
1133
0
                                            false)),
1134
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, IsSigned,
1135
0
             Ret});
1136
1137
0
        switch (StorageType.getCode()) {
1138
0
        case TypeCode::I8:
1139
0
        case TypeCode::I16:
1140
0
        case TypeCode::I32: {
1141
0
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1142
0
                                               Context.Int64x2Ty));
1143
0
          break;
1144
0
        }
1145
0
        case TypeCode::I64: {
1146
0
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1147
0
                                               Context.Int64x2Ty));
1148
0
          break;
1149
0
        }
1150
0
        case TypeCode::F32: {
1151
0
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1152
0
                                               Context.Int64x2Ty));
1153
0
          break;
1154
0
        }
1155
0
        case TypeCode::F64: {
1156
0
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1157
0
                                               Context.Int64x2Ty));
1158
0
          break;
1159
0
        }
1160
0
        case TypeCode::V128:
1161
0
        case TypeCode::Ref:
1162
0
        case TypeCode::RefNull: {
1163
0
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1164
0
                                               Context.Int64x2Ty));
1165
0
          break;
1166
0
        }
1167
0
        default:
1168
0
          assumingUnreachable();
1169
0
        }
1170
0
        break;
1171
0
      }
1172
0
      case OpCode::Array__set: {
1173
0
        auto Val = stackPop();
1174
0
        auto Idx = stackPop();
1175
0
        auto Ref = stackPop();
1176
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1177
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1178
0
        Builder.createCall(
1179
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArraySet,
1180
0
                                 LLVM::Type::getFunctionType(
1181
0
                                     Context.VoidTy,
1182
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1183
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1184
0
                                     false)),
1185
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, Arg});
1186
0
        break;
1187
0
      }
1188
0
      case OpCode::Array__len: {
1189
0
        auto Ref = stackPop();
1190
0
        stackPush(Builder.createCall(
1191
0
            Context.getIntrinsic(
1192
0
                Builder, Executable::Intrinsics::kArrayLen,
1193
0
                LLVM::Type::getFunctionType(Context.Int32Ty,
1194
0
                                            {Context.Int64x2Ty}, false)),
1195
0
            {Ref}));
1196
0
        break;
1197
0
      }
1198
0
      case OpCode::Array__fill: {
1199
0
        auto Cnt = stackPop();
1200
0
        auto Val = stackPop();
1201
0
        auto Off = stackPop();
1202
0
        auto Ref = stackPop();
1203
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1204
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1205
0
        Builder.createCall(
1206
0
            Context.getIntrinsic(
1207
0
                Builder, Executable::Intrinsics::kArrayFill,
1208
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1209
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1210
0
                                             Context.Int32Ty, Context.Int32Ty,
1211
0
                                             Context.Int8PtrTy},
1212
0
                                            false)),
1213
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Off, Cnt, Arg});
1214
0
        break;
1215
0
      }
1216
0
      case OpCode::Array__copy: {
1217
0
        auto Cnt = stackPop();
1218
0
        auto SrcOff = stackPop();
1219
0
        auto SrcRef = stackPop();
1220
0
        auto DstOff = stackPop();
1221
0
        auto DstRef = stackPop();
1222
0
        Builder.createCall(
1223
0
            Context.getIntrinsic(
1224
0
                Builder, Executable::Intrinsics::kArrayCopy,
1225
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1226
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1227
0
                                             Context.Int32Ty, Context.Int64x2Ty,
1228
0
                                             Context.Int32Ty, Context.Int32Ty,
1229
0
                                             Context.Int32Ty},
1230
0
                                            false)),
1231
0
            {DstRef, LLContext.getInt32(Instr.getTargetIndex()), DstOff, SrcRef,
1232
0
             LLContext.getInt32(Instr.getSourceIndex()), SrcOff, Cnt});
1233
0
        break;
1234
0
      }
1235
0
      case OpCode::Array__init_data:
1236
0
      case OpCode::Array__init_elem: {
1237
0
        auto Cnt = stackPop();
1238
0
        auto SrcOff = stackPop();
1239
0
        auto DstOff = stackPop();
1240
0
        auto Ref = stackPop();
1241
0
        Builder.createCall(
1242
0
            Context.getIntrinsic(
1243
0
                Builder,
1244
0
                ((Instr.getOpCode() == OpCode::Array__init_data)
1245
0
                     ? Executable::Intrinsics::kArrayInitData
1246
0
                     : Executable::Intrinsics::kArrayInitElem),
1247
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1248
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1249
0
                                             Context.Int32Ty, Context.Int32Ty,
1250
0
                                             Context.Int32Ty, Context.Int32Ty},
1251
0
                                            false)),
1252
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1253
0
             LLContext.getInt32(Instr.getSourceIndex()), DstOff, SrcOff, Cnt});
1254
0
        break;
1255
0
      }
1256
0
      case OpCode::Ref__test:
1257
0
      case OpCode::Ref__test_null: {
1258
0
        auto Ref = stackPop();
1259
0
        std::array<uint8_t, 16> Buf = {0};
1260
0
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1261
0
        auto VType = Builder.createExtractElement(
1262
0
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1263
0
                                  Context.Int64x2Ty),
1264
0
            LLContext.getInt64(0));
1265
0
        stackPush(Builder.createCall(
1266
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
1267
0
                                 LLVM::Type::getFunctionType(
1268
0
                                     Context.Int32Ty,
1269
0
                                     {Context.Int64x2Ty, Context.Int64Ty},
1270
0
                                     false)),
1271
0
            {Ref, VType}));
1272
0
        break;
1273
0
      }
1274
0
      case OpCode::Ref__cast:
1275
0
      case OpCode::Ref__cast_null: {
1276
0
        auto Ref = stackPop();
1277
0
        std::array<uint8_t, 16> Buf = {0};
1278
0
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1279
0
        auto VType = Builder.createExtractElement(
1280
0
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1281
0
                                  Context.Int64x2Ty),
1282
0
            LLContext.getInt64(0));
1283
0
        stackPush(Builder.createCall(
1284
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefCast,
1285
0
                                 LLVM::Type::getFunctionType(
1286
0
                                     Context.Int64x2Ty,
1287
0
                                     {Context.Int64x2Ty, Context.Int64Ty},
1288
0
                                     false)),
1289
0
            {Ref, VType}));
1290
0
        break;
1291
0
      }
1292
0
      case OpCode::Any__convert_extern: {
1293
0
        std::array<uint8_t, 16> RawRef = {0};
1294
0
        auto Ref = stackPop();
1295
0
        auto PtrVal = Builder.createExtractElement(Ref, LLContext.getInt64(1));
1296
0
        auto IsNullBB =
1297
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.null");
1298
0
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1299
0
                                                  "any_conv_extern.not_null");
1300
0
        auto IsExtrefBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1301
0
                                                   "any_conv_extern.is_extref");
1302
0
        auto EndBB =
1303
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.end");
1304
0
        auto CondIsNull = Builder.createICmpEQ(PtrVal, LLContext.getInt64(0));
1305
0
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1306
1307
0
        Builder.positionAtEnd(IsNullBB);
1308
0
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullRef);
1309
0
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1310
0
        auto Ret1 = Builder.createBitCast(
1311
0
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1312
0
        Builder.createBr(EndBB);
1313
1314
0
        Builder.positionAtEnd(NotNullBB);
1315
0
        auto Ret2 = Builder.createBitCast(
1316
0
            Builder.createInsertElement(
1317
0
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1318
0
                LLContext.getInt8(0), LLContext.getInt64(1)),
1319
0
            Context.Int64x2Ty);
1320
0
        auto HType = Builder.createExtractElement(
1321
0
            Builder.createBitCast(Ret2, Context.Int8x16Ty),
1322
0
            LLContext.getInt64(3));
1323
0
        auto CondIsExtref = Builder.createOr(
1324
0
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1325
0
                                            TypeCode::ExternRef))),
1326
0
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1327
0
                                            TypeCode::NullExternRef))));
1328
0
        Builder.createCondBr(CondIsExtref, IsExtrefBB, EndBB);
1329
1330
0
        Builder.positionAtEnd(IsExtrefBB);
1331
0
        VT = ValType(TypeCode::Ref, TypeCode::AnyRef);
1332
0
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1333
0
        auto Ret3 = Builder.createInsertElement(
1334
0
            Builder.createBitCast(
1335
0
                LLVM::Value::getConstVector8(LLContext, RawRef),
1336
0
                Context.Int64x2Ty),
1337
0
            PtrVal, LLContext.getInt64(1));
1338
0
        Builder.createBr(EndBB);
1339
1340
0
        Builder.positionAtEnd(EndBB);
1341
0
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1342
0
        Ret.addIncoming(Ret1, IsNullBB);
1343
0
        Ret.addIncoming(Ret2, NotNullBB);
1344
0
        Ret.addIncoming(Ret3, IsExtrefBB);
1345
0
        stackPush(Ret);
1346
0
        break;
1347
0
      }
1348
0
      case OpCode::Extern__convert_any: {
1349
0
        std::array<uint8_t, 16> RawRef = {0};
1350
0
        auto Ref = stackPop();
1351
0
        auto IsNullBB =
1352
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.null");
1353
0
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1354
0
                                                  "extern_conv_any.not_null");
1355
0
        auto EndBB =
1356
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.end");
1357
0
        auto CondIsNull = Builder.createICmpEQ(
1358
0
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1359
0
            LLContext.getInt64(0));
1360
0
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1361
1362
0
        Builder.positionAtEnd(IsNullBB);
1363
0
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullExternRef);
1364
0
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1365
0
        auto Ret1 = Builder.createBitCast(
1366
0
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1367
0
        Builder.createBr(EndBB);
1368
1369
0
        Builder.positionAtEnd(NotNullBB);
1370
0
        auto Ret2 = Builder.createBitCast(
1371
0
            Builder.createInsertElement(
1372
0
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1373
0
                LLContext.getInt8(1), LLContext.getInt64(1)),
1374
0
            Context.Int64x2Ty);
1375
0
        Builder.createBr(EndBB);
1376
1377
0
        Builder.positionAtEnd(EndBB);
1378
0
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1379
0
        Ret.addIncoming(Ret1, IsNullBB);
1380
0
        Ret.addIncoming(Ret2, NotNullBB);
1381
0
        stackPush(Ret);
1382
0
        break;
1383
0
      }
1384
0
      case OpCode::Ref__i31: {
1385
0
        std::array<uint8_t, 16> RawRef = {0};
1386
0
        auto VT = ValType(TypeCode::Ref, TypeCode::I31Ref);
1387
0
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1388
0
        auto Ref = Builder.createBitCast(
1389
0
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1390
0
        auto Val = Builder.createZExt(
1391
0
            Builder.createOr(
1392
0
                Builder.createAnd(stackPop(), LLContext.getInt32(0x7FFFFFFFU)),
1393
0
                LLContext.getInt32(0x80000000U)),
1394
0
            Context.Int64Ty);
1395
0
        stackPush(Builder.createInsertElement(Ref, Val, LLContext.getInt64(1)));
1396
0
        break;
1397
0
      }
1398
0
      case OpCode::I31__get_s: {
1399
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1400
0
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1401
0
        auto Val = Builder.createTrunc(
1402
0
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1403
0
            Context.Int32Ty);
1404
0
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1405
0
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1406
0
            LLContext.getInt32(0)));
1407
0
        Builder.createCondBr(IsNotNull, Next,
1408
0
                             getTrapBB(ErrCode::Value::AccessNullI31));
1409
0
        Builder.positionAtEnd(Next);
1410
0
        Val = Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU));
1411
0
        stackPush(Builder.createOr(
1412
0
            Val, Builder.createShl(
1413
0
                     Builder.createAnd(Val, LLContext.getInt32(0x40000000U)),
1414
0
                     LLContext.getInt32(1))));
1415
0
        break;
1416
0
      }
1417
0
      case OpCode::I31__get_u: {
1418
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1419
0
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1420
0
        auto Val = Builder.createTrunc(
1421
0
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1422
0
            Context.Int32Ty);
1423
0
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1424
0
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1425
0
            LLContext.getInt32(0)));
1426
0
        Builder.createCondBr(IsNotNull, Next,
1427
0
                             getTrapBB(ErrCode::Value::AccessNullI31));
1428
0
        Builder.positionAtEnd(Next);
1429
0
        stackPush(Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU)));
1430
0
        break;
1431
0
      }
1432
1433
      // Parametric Instructions
1434
3.38k
      case OpCode::Drop:
1435
3.38k
        stackPop();
1436
3.38k
        break;
1437
690
      case OpCode::Select:
1438
1.12k
      case OpCode::Select_t: {
1439
1.12k
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
1440
1.12k
        auto False = stackPop();
1441
1.12k
        auto True = stackPop();
1442
1.12k
        stackPush(Builder.createSelect(Cond, True, False));
1443
1.12k
        break;
1444
690
      }
1445
1446
      // Variable Instructions
1447
10.4k
      case OpCode::Local__get: {
1448
10.4k
        const auto &L = Local[Instr.getTargetIndex()];
1449
10.4k
        stackPush(Builder.createLoad(L.first, L.second));
1450
10.4k
        break;
1451
690
      }
1452
4.01k
      case OpCode::Local__set:
1453
4.01k
        Builder.createStore(stackPop(), Local[Instr.getTargetIndex()].second);
1454
4.01k
        break;
1455
767
      case OpCode::Local__tee:
1456
767
        Builder.createStore(Stack.back(), Local[Instr.getTargetIndex()].second);
1457
767
        break;
1458
255
      case OpCode::Global__get: {
1459
255
        const auto G =
1460
255
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex());
1461
255
        stackPush(Builder.createLoad(G.first, G.second));
1462
255
        break;
1463
690
      }
1464
49
      case OpCode::Global__set:
1465
49
        Builder.createStore(
1466
49
            stackPop(),
1467
49
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex()).second);
1468
49
        break;
1469
1470
      // Table Instructions
1471
33
      case OpCode::Table__get: {
1472
33
        auto Idx = stackPop();
1473
33
        stackPush(Builder.createCall(
1474
33
            Context.getIntrinsic(
1475
33
                Builder, Executable::Intrinsics::kTableGet,
1476
33
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1477
33
                                            {Context.Int32Ty, Context.Int32Ty},
1478
33
                                            false)),
1479
33
            {LLContext.getInt32(Instr.getTargetIndex()), Idx}));
1480
33
        break;
1481
690
      }
1482
26
      case OpCode::Table__set: {
1483
26
        auto Ref = stackPop();
1484
26
        auto Idx = stackPop();
1485
26
        Builder.createCall(
1486
26
            Context.getIntrinsic(
1487
26
                Builder, Executable::Intrinsics::kTableSet,
1488
26
                LLVM::Type::getFunctionType(
1489
26
                    Context.Int64Ty,
1490
26
                    {Context.Int32Ty, Context.Int32Ty, Context.Int64x2Ty},
1491
26
                    false)),
1492
26
            {LLContext.getInt32(Instr.getTargetIndex()), Idx, Ref});
1493
26
        break;
1494
690
      }
1495
27
      case OpCode::Table__init: {
1496
27
        auto Len = stackPop();
1497
27
        auto Src = stackPop();
1498
27
        auto Dst = stackPop();
1499
27
        Builder.createCall(
1500
27
            Context.getIntrinsic(
1501
27
                Builder, Executable::Intrinsics::kTableInit,
1502
27
                LLVM::Type::getFunctionType(Context.VoidTy,
1503
27
                                            {Context.Int32Ty, Context.Int32Ty,
1504
27
                                             Context.Int32Ty, Context.Int32Ty,
1505
27
                                             Context.Int32Ty},
1506
27
                                            false)),
1507
27
            {LLContext.getInt32(Instr.getTargetIndex()),
1508
27
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1509
27
        break;
1510
690
      }
1511
33
      case OpCode::Elem__drop: {
1512
33
        Builder.createCall(
1513
33
            Context.getIntrinsic(Builder, Executable::Intrinsics::kElemDrop,
1514
33
                                 LLVM::Type::getFunctionType(
1515
33
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1516
33
            {LLContext.getInt32(Instr.getTargetIndex())});
1517
33
        break;
1518
690
      }
1519
16
      case OpCode::Table__copy: {
1520
16
        auto Len = stackPop();
1521
16
        auto Src = stackPop();
1522
16
        auto Dst = stackPop();
1523
16
        Builder.createCall(
1524
16
            Context.getIntrinsic(
1525
16
                Builder, Executable::Intrinsics::kTableCopy,
1526
16
                LLVM::Type::getFunctionType(Context.VoidTy,
1527
16
                                            {Context.Int32Ty, Context.Int32Ty,
1528
16
                                             Context.Int32Ty, Context.Int32Ty,
1529
16
                                             Context.Int32Ty},
1530
16
                                            false)),
1531
16
            {LLContext.getInt32(Instr.getTargetIndex()),
1532
16
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1533
16
        break;
1534
690
      }
1535
14
      case OpCode::Table__grow: {
1536
14
        auto NewSize = stackPop();
1537
14
        auto Val = stackPop();
1538
14
        stackPush(Builder.createCall(
1539
14
            Context.getIntrinsic(
1540
14
                Builder, Executable::Intrinsics::kTableGrow,
1541
14
                LLVM::Type::getFunctionType(
1542
14
                    Context.Int32Ty,
1543
14
                    {Context.Int32Ty, Context.Int64x2Ty, Context.Int32Ty},
1544
14
                    false)),
1545
14
            {LLContext.getInt32(Instr.getTargetIndex()), Val, NewSize}));
1546
14
        break;
1547
690
      }
1548
17
      case OpCode::Table__size: {
1549
17
        stackPush(Builder.createCall(
1550
17
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableSize,
1551
17
                                 LLVM::Type::getFunctionType(Context.Int32Ty,
1552
17
                                                             {Context.Int32Ty},
1553
17
                                                             false)),
1554
17
            {LLContext.getInt32(Instr.getTargetIndex())}));
1555
17
        break;
1556
690
      }
1557
3
      case OpCode::Table__fill: {
1558
3
        auto Len = stackPop();
1559
3
        auto Val = stackPop();
1560
3
        auto Off = stackPop();
1561
3
        Builder.createCall(
1562
3
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableFill,
1563
3
                                 LLVM::Type::getFunctionType(
1564
3
                                     Context.Int32Ty,
1565
3
                                     {Context.Int32Ty, Context.Int32Ty,
1566
3
                                      Context.Int64x2Ty, Context.Int32Ty},
1567
3
                                     false)),
1568
3
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1569
3
        break;
1570
690
      }
1571
1572
      // Memory Instructions
1573
1.05k
      case OpCode::I32__load:
1574
1.05k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1575
1.05k
                      Instr.getMemoryAlign(), Context.Int32Ty);
1576
1.05k
        break;
1577
3.36k
      case OpCode::I64__load:
1578
3.36k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1579
3.36k
                      Instr.getMemoryAlign(), Context.Int64Ty);
1580
3.36k
        break;
1581
102
      case OpCode::F32__load:
1582
102
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1583
102
                      Instr.getMemoryAlign(), Context.FloatTy);
1584
102
        break;
1585
228
      case OpCode::F64__load:
1586
228
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1587
228
                      Instr.getMemoryAlign(), Context.DoubleTy);
1588
228
        break;
1589
457
      case OpCode::I32__load8_s:
1590
457
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1591
457
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1592
457
                      true);
1593
457
        break;
1594
178
      case OpCode::I32__load8_u:
1595
178
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1596
178
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1597
178
                      false);
1598
178
        break;
1599
343
      case OpCode::I32__load16_s:
1600
343
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1601
343
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1602
343
                      true);
1603
343
        break;
1604
1.65k
      case OpCode::I32__load16_u:
1605
1.65k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1606
1.65k
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1607
1.65k
                      false);
1608
1.65k
        break;
1609
689
      case OpCode::I64__load8_s:
1610
689
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1611
689
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1612
689
                      true);
1613
689
        break;
1614
422
      case OpCode::I64__load8_u:
1615
422
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1616
422
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1617
422
                      false);
1618
422
        break;
1619
394
      case OpCode::I64__load16_s:
1620
394
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1621
394
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1622
394
                      true);
1623
394
        break;
1624
562
      case OpCode::I64__load16_u:
1625
562
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1626
562
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1627
562
                      false);
1628
562
        break;
1629
356
      case OpCode::I64__load32_s:
1630
356
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1631
356
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1632
356
                      true);
1633
356
        break;
1634
439
      case OpCode::I64__load32_u:
1635
439
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1636
439
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1637
439
                      false);
1638
439
        break;
1639
449
      case OpCode::I32__store:
1640
449
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1641
449
                       Instr.getMemoryAlign(), Context.Int32Ty);
1642
449
        break;
1643
1.48k
      case OpCode::I64__store:
1644
1.48k
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1645
1.48k
                       Instr.getMemoryAlign(), Context.Int64Ty);
1646
1.48k
        break;
1647
61
      case OpCode::F32__store:
1648
61
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1649
61
                       Instr.getMemoryAlign(), Context.FloatTy);
1650
61
        break;
1651
47
      case OpCode::F64__store:
1652
47
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1653
47
                       Instr.getMemoryAlign(), Context.DoubleTy);
1654
47
        break;
1655
330
      case OpCode::I32__store8:
1656
352
      case OpCode::I64__store8:
1657
352
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1658
352
                       Instr.getMemoryAlign(), Context.Int8Ty, true);
1659
352
        break;
1660
197
      case OpCode::I32__store16:
1661
250
      case OpCode::I64__store16:
1662
250
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1663
250
                       Instr.getMemoryAlign(), Context.Int16Ty, true);
1664
250
        break;
1665
38
      case OpCode::I64__store32:
1666
38
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1667
38
                       Instr.getMemoryAlign(), Context.Int32Ty, true);
1668
38
        break;
1669
836
      case OpCode::Memory__size:
1670
836
        stackPush(Builder.createCall(
1671
836
            Context.getIntrinsic(Builder, Executable::Intrinsics::kMemSize,
1672
836
                                 LLVM::Type::getFunctionType(Context.Int32Ty,
1673
836
                                                             {Context.Int32Ty},
1674
836
                                                             false)),
1675
836
            {LLContext.getInt32(Instr.getTargetIndex())}));
1676
836
        break;
1677
488
      case OpCode::Memory__grow: {
1678
488
        auto Diff = stackPop();
1679
488
        stackPush(Builder.createCall(
1680
488
            Context.getIntrinsic(
1681
488
                Builder, Executable::Intrinsics::kMemGrow,
1682
488
                LLVM::Type::getFunctionType(Context.Int32Ty,
1683
488
                                            {Context.Int32Ty, Context.Int32Ty},
1684
488
                                            false)),
1685
488
            {LLContext.getInt32(Instr.getTargetIndex()), Diff}));
1686
488
        break;
1687
197
      }
1688
23
      case OpCode::Memory__init: {
1689
23
        auto Len = stackPop();
1690
23
        auto Src = stackPop();
1691
23
        auto Dst = stackPop();
1692
23
        Builder.createCall(
1693
23
            Context.getIntrinsic(
1694
23
                Builder, Executable::Intrinsics::kMemInit,
1695
23
                LLVM::Type::getFunctionType(Context.VoidTy,
1696
23
                                            {Context.Int32Ty, Context.Int32Ty,
1697
23
                                             Context.Int32Ty, Context.Int32Ty,
1698
23
                                             Context.Int32Ty},
1699
23
                                            false)),
1700
23
            {LLContext.getInt32(Instr.getTargetIndex()),
1701
23
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1702
23
        break;
1703
197
      }
1704
22
      case OpCode::Data__drop: {
1705
22
        Builder.createCall(
1706
22
            Context.getIntrinsic(Builder, Executable::Intrinsics::kDataDrop,
1707
22
                                 LLVM::Type::getFunctionType(
1708
22
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1709
22
            {LLContext.getInt32(Instr.getTargetIndex())});
1710
22
        break;
1711
197
      }
1712
254
      case OpCode::Memory__copy: {
1713
254
        auto Len = stackPop();
1714
254
        auto Src = stackPop();
1715
254
        auto Dst = stackPop();
1716
254
        Builder.createCall(
1717
254
            Context.getIntrinsic(
1718
254
                Builder, Executable::Intrinsics::kMemCopy,
1719
254
                LLVM::Type::getFunctionType(Context.VoidTy,
1720
254
                                            {Context.Int32Ty, Context.Int32Ty,
1721
254
                                             Context.Int32Ty, Context.Int32Ty,
1722
254
                                             Context.Int32Ty},
1723
254
                                            false)),
1724
254
            {LLContext.getInt32(Instr.getTargetIndex()),
1725
254
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1726
254
        break;
1727
197
      }
1728
559
      case OpCode::Memory__fill: {
1729
559
        auto Len = stackPop();
1730
559
        auto Val = Builder.createTrunc(stackPop(), Context.Int8Ty);
1731
559
        auto Off = stackPop();
1732
559
        Builder.createCall(
1733
559
            Context.getIntrinsic(
1734
559
                Builder, Executable::Intrinsics::kMemFill,
1735
559
                LLVM::Type::getFunctionType(Context.VoidTy,
1736
559
                                            {Context.Int32Ty, Context.Int32Ty,
1737
559
                                             Context.Int8Ty, Context.Int32Ty},
1738
559
                                            false)),
1739
559
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1740
559
        break;
1741
197
      }
1742
1743
      // Const Numeric Instructions
1744
529k
      case OpCode::I32__const:
1745
529k
        stackPush(LLContext.getInt32(Instr.getNum().get<uint32_t>()));
1746
529k
        break;
1747
88.1k
      case OpCode::I64__const:
1748
88.1k
        stackPush(LLContext.getInt64(Instr.getNum().get<uint64_t>()));
1749
88.1k
        break;
1750
14.2k
      case OpCode::F32__const:
1751
14.2k
        stackPush(LLContext.getFloat(Instr.getNum().get<float>()));
1752
14.2k
        break;
1753
6.49k
      case OpCode::F64__const:
1754
6.49k
        stackPush(LLContext.getDouble(Instr.getNum().get<double>()));
1755
6.49k
        break;
1756
1757
      // Unary Numeric Instructions
1758
6.76k
      case OpCode::I32__eqz:
1759
6.76k
        stackPush(Builder.createZExt(
1760
6.76k
            Builder.createICmpEQ(stackPop(), LLContext.getInt32(0)),
1761
6.76k
            Context.Int32Ty));
1762
6.76k
        break;
1763
1.22k
      case OpCode::I64__eqz:
1764
1.22k
        stackPush(Builder.createZExt(
1765
1.22k
            Builder.createICmpEQ(stackPop(), LLContext.getInt64(0)),
1766
1.22k
            Context.Int32Ty));
1767
1.22k
        break;
1768
1.98k
      case OpCode::I32__clz:
1769
1.98k
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1770
1.98k
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int32Ty},
1771
1.98k
                                          {stackPop(), LLContext.getFalse()}));
1772
1.98k
        break;
1773
315
      case OpCode::I64__clz:
1774
315
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1775
315
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int64Ty},
1776
315
                                          {stackPop(), LLContext.getFalse()}));
1777
315
        break;
1778
1.88k
      case OpCode::I32__ctz:
1779
1.88k
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1780
1.88k
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int32Ty},
1781
1.88k
                                          {stackPop(), LLContext.getFalse()}));
1782
1.88k
        break;
1783
439
      case OpCode::I64__ctz:
1784
439
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1785
439
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int64Ty},
1786
439
                                          {stackPop(), LLContext.getFalse()}));
1787
439
        break;
1788
12.9k
      case OpCode::I32__popcnt:
1789
14.9k
      case OpCode::I64__popcnt:
1790
14.9k
        assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
1791
14.9k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, stackPop()));
1792
14.9k
        break;
1793
825
      case OpCode::F32__abs:
1794
1.37k
      case OpCode::F64__abs:
1795
1.37k
        assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
1796
1.37k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Fabs, stackPop()));
1797
1.37k
        break;
1798
1.04k
      case OpCode::F32__neg:
1799
1.81k
      case OpCode::F64__neg:
1800
1.81k
        stackPush(Builder.createFNeg(stackPop()));
1801
1.81k
        break;
1802
1.85k
      case OpCode::F32__ceil:
1803
4.47k
      case OpCode::F64__ceil:
1804
4.47k
        assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
1805
4.47k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ceil, stackPop()));
1806
4.47k
        break;
1807
883
      case OpCode::F32__floor:
1808
1.26k
      case OpCode::F64__floor:
1809
1.26k
        assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
1810
1.26k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Floor, stackPop()));
1811
1.26k
        break;
1812
504
      case OpCode::F32__trunc:
1813
791
      case OpCode::F64__trunc:
1814
791
        assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
1815
791
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Trunc, stackPop()));
1816
791
        break;
1817
834
      case OpCode::F32__nearest:
1818
1.19k
      case OpCode::F64__nearest: {
1819
1.19k
        const bool IsFloat = Instr.getOpCode() == OpCode::F32__nearest;
1820
1.19k
        LLVM::Value Value = stackPop();
1821
1822
1.19k
#if LLVM_VERSION_MAJOR >= 12
1823
1.19k
        assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
1824
1.19k
        if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
1825
1.19k
          stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, Value));
1826
1.19k
          break;
1827
1.19k
        }
1828
0
#endif
1829
1830
        // The VectorSize is only used when SSE4_1 or NEON is supported.
1831
0
        [[maybe_unused]] const uint32_t VectorSize = IsFloat ? 4 : 2;
1832
0
#if defined(__x86_64__)
1833
0
        if (Context.SupportSSE4_1) {
1834
0
          auto Zero = LLContext.getInt64(0);
1835
0
          auto VectorTy =
1836
0
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1837
0
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1838
0
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1839
0
          auto ID = IsFloat ? LLVM::Core::X86SSE41RoundSs
1840
0
                            : LLVM::Core::X86SSE41RoundSd;
1841
0
          assuming(ID != LLVM::Core::NotIntrinsic);
1842
0
          Ret = Builder.createIntrinsic(ID, {},
1843
0
                                        {Ret, Ret, LLContext.getInt32(8)});
1844
0
          Ret = Builder.createExtractElement(Ret, Zero);
1845
0
          stackPush(Ret);
1846
0
          break;
1847
0
        }
1848
0
#endif
1849
1850
#if defined(__aarch64__)
1851
        if (Context.SupportNEON &&
1852
            LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
1853
          auto Zero = LLContext.getInt64(0);
1854
          auto VectorTy =
1855
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1856
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1857
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1858
          Ret =
1859
              Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, Ret);
1860
          Ret = Builder.createExtractElement(Ret, Zero);
1861
          stackPush(Ret);
1862
          break;
1863
        }
1864
#endif
1865
1866
        // Fallback case.
1867
        // If the SSE4.1 is not supported on the x86_64 platform or
1868
        // the NEON is not supported on the aarch64 platform,
1869
        // then fallback to this.
1870
0
        assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
1871
0
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, Value));
1872
0
        break;
1873
0
      }
1874
427
      case OpCode::F32__sqrt:
1875
3.26k
      case OpCode::F64__sqrt:
1876
3.26k
        assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
1877
3.26k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, stackPop()));
1878
3.26k
        break;
1879
293
      case OpCode::I32__wrap_i64:
1880
293
        stackPush(Builder.createTrunc(stackPop(), Context.Int32Ty));
1881
293
        break;
1882
1.30k
      case OpCode::I32__trunc_f32_s:
1883
1.30k
        compileSignedTrunc(Context.Int32Ty);
1884
1.30k
        break;
1885
254
      case OpCode::I32__trunc_f64_s:
1886
254
        compileSignedTrunc(Context.Int32Ty);
1887
254
        break;
1888
169
      case OpCode::I32__trunc_f32_u:
1889
169
        compileUnsignedTrunc(Context.Int32Ty);
1890
169
        break;
1891
1.34k
      case OpCode::I32__trunc_f64_u:
1892
1.34k
        compileUnsignedTrunc(Context.Int32Ty);
1893
1.34k
        break;
1894
2.02k
      case OpCode::I64__extend_i32_s:
1895
2.02k
        stackPush(Builder.createSExt(stackPop(), Context.Int64Ty));
1896
2.02k
        break;
1897
365
      case OpCode::I64__extend_i32_u:
1898
365
        stackPush(Builder.createZExt(stackPop(), Context.Int64Ty));
1899
365
        break;
1900
52
      case OpCode::I64__trunc_f32_s:
1901
52
        compileSignedTrunc(Context.Int64Ty);
1902
52
        break;
1903
402
      case OpCode::I64__trunc_f64_s:
1904
402
        compileSignedTrunc(Context.Int64Ty);
1905
402
        break;
1906
922
      case OpCode::I64__trunc_f32_u:
1907
922
        compileUnsignedTrunc(Context.Int64Ty);
1908
922
        break;
1909
1.25k
      case OpCode::I64__trunc_f64_u:
1910
1.25k
        compileUnsignedTrunc(Context.Int64Ty);
1911
1.25k
        break;
1912
1.82k
      case OpCode::F32__convert_i32_s:
1913
2.25k
      case OpCode::F32__convert_i64_s:
1914
2.25k
        stackPush(Builder.createSIToFP(stackPop(), Context.FloatTy));
1915
2.25k
        break;
1916
636
      case OpCode::F32__convert_i32_u:
1917
1.74k
      case OpCode::F32__convert_i64_u:
1918
1.74k
        stackPush(Builder.createUIToFP(stackPop(), Context.FloatTy));
1919
1.74k
        break;
1920
1.64k
      case OpCode::F64__convert_i32_s:
1921
6.06k
      case OpCode::F64__convert_i64_s:
1922
6.06k
        stackPush(Builder.createSIToFP(stackPop(), Context.DoubleTy));
1923
6.06k
        break;
1924
1.39k
      case OpCode::F64__convert_i32_u:
1925
1.58k
      case OpCode::F64__convert_i64_u:
1926
1.58k
        stackPush(Builder.createUIToFP(stackPop(), Context.DoubleTy));
1927
1.58k
        break;
1928
174
      case OpCode::F32__demote_f64:
1929
174
        stackPush(Builder.createFPTrunc(stackPop(), Context.FloatTy));
1930
174
        break;
1931
91
      case OpCode::F64__promote_f32:
1932
91
        stackPush(Builder.createFPExt(stackPop(), Context.DoubleTy));
1933
91
        break;
1934
557
      case OpCode::I32__reinterpret_f32:
1935
557
        stackPush(Builder.createBitCast(stackPop(), Context.Int32Ty));
1936
557
        break;
1937
687
      case OpCode::I64__reinterpret_f64:
1938
687
        stackPush(Builder.createBitCast(stackPop(), Context.Int64Ty));
1939
687
        break;
1940
4.31k
      case OpCode::F32__reinterpret_i32:
1941
4.31k
        stackPush(Builder.createBitCast(stackPop(), Context.FloatTy));
1942
4.31k
        break;
1943
1.31k
      case OpCode::F64__reinterpret_i64:
1944
1.31k
        stackPush(Builder.createBitCast(stackPop(), Context.DoubleTy));
1945
1.31k
        break;
1946
2.31k
      case OpCode::I32__extend8_s:
1947
2.31k
        stackPush(Builder.createSExt(
1948
2.31k
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int32Ty));
1949
2.31k
        break;
1950
3.08k
      case OpCode::I32__extend16_s:
1951
3.08k
        stackPush(Builder.createSExt(
1952
3.08k
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int32Ty));
1953
3.08k
        break;
1954
370
      case OpCode::I64__extend8_s:
1955
370
        stackPush(Builder.createSExt(
1956
370
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int64Ty));
1957
370
        break;
1958
619
      case OpCode::I64__extend16_s:
1959
619
        stackPush(Builder.createSExt(
1960
619
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int64Ty));
1961
619
        break;
1962
753
      case OpCode::I64__extend32_s:
1963
753
        stackPush(Builder.createSExt(
1964
753
            Builder.createTrunc(stackPop(), Context.Int32Ty), Context.Int64Ty));
1965
753
        break;
1966
1967
      // Binary Numeric Instructions
1968
1.20k
      case OpCode::I32__eq:
1969
1.46k
      case OpCode::I64__eq: {
1970
1.46k
        LLVM::Value RHS = stackPop();
1971
1.46k
        LLVM::Value LHS = stackPop();
1972
1.46k
        stackPush(Builder.createZExt(Builder.createICmpEQ(LHS, RHS),
1973
1.46k
                                     Context.Int32Ty));
1974
1.46k
        break;
1975
1.20k
      }
1976
668
      case OpCode::I32__ne:
1977
695
      case OpCode::I64__ne: {
1978
695
        LLVM::Value RHS = stackPop();
1979
695
        LLVM::Value LHS = stackPop();
1980
695
        stackPush(Builder.createZExt(Builder.createICmpNE(LHS, RHS),
1981
695
                                     Context.Int32Ty));
1982
695
        break;
1983
668
      }
1984
4.32k
      case OpCode::I32__lt_s:
1985
4.93k
      case OpCode::I64__lt_s: {
1986
4.93k
        LLVM::Value RHS = stackPop();
1987
4.93k
        LLVM::Value LHS = stackPop();
1988
4.93k
        stackPush(Builder.createZExt(Builder.createICmpSLT(LHS, RHS),
1989
4.93k
                                     Context.Int32Ty));
1990
4.93k
        break;
1991
4.32k
      }
1992
6.12k
      case OpCode::I32__lt_u:
1993
6.50k
      case OpCode::I64__lt_u: {
1994
6.50k
        LLVM::Value RHS = stackPop();
1995
6.50k
        LLVM::Value LHS = stackPop();
1996
6.50k
        stackPush(Builder.createZExt(Builder.createICmpULT(LHS, RHS),
1997
6.50k
                                     Context.Int32Ty));
1998
6.50k
        break;
1999
6.12k
      }
2000
1.03k
      case OpCode::I32__gt_s:
2001
1.46k
      case OpCode::I64__gt_s: {
2002
1.46k
        LLVM::Value RHS = stackPop();
2003
1.46k
        LLVM::Value LHS = stackPop();
2004
1.46k
        stackPush(Builder.createZExt(Builder.createICmpSGT(LHS, RHS),
2005
1.46k
                                     Context.Int32Ty));
2006
1.46k
        break;
2007
1.03k
      }
2008
6.52k
      case OpCode::I32__gt_u:
2009
6.75k
      case OpCode::I64__gt_u: {
2010
6.75k
        LLVM::Value RHS = stackPop();
2011
6.75k
        LLVM::Value LHS = stackPop();
2012
6.75k
        stackPush(Builder.createZExt(Builder.createICmpUGT(LHS, RHS),
2013
6.75k
                                     Context.Int32Ty));
2014
6.75k
        break;
2015
6.52k
      }
2016
1.83k
      case OpCode::I32__le_s:
2017
2.73k
      case OpCode::I64__le_s: {
2018
2.73k
        LLVM::Value RHS = stackPop();
2019
2.73k
        LLVM::Value LHS = stackPop();
2020
2.73k
        stackPush(Builder.createZExt(Builder.createICmpSLE(LHS, RHS),
2021
2.73k
                                     Context.Int32Ty));
2022
2.73k
        break;
2023
1.83k
      }
2024
485
      case OpCode::I32__le_u:
2025
2.17k
      case OpCode::I64__le_u: {
2026
2.17k
        LLVM::Value RHS = stackPop();
2027
2.17k
        LLVM::Value LHS = stackPop();
2028
2.17k
        stackPush(Builder.createZExt(Builder.createICmpULE(LHS, RHS),
2029
2.17k
                                     Context.Int32Ty));
2030
2.17k
        break;
2031
485
      }
2032
1.29k
      case OpCode::I32__ge_s:
2033
1.32k
      case OpCode::I64__ge_s: {
2034
1.32k
        LLVM::Value RHS = stackPop();
2035
1.32k
        LLVM::Value LHS = stackPop();
2036
1.32k
        stackPush(Builder.createZExt(Builder.createICmpSGE(LHS, RHS),
2037
1.32k
                                     Context.Int32Ty));
2038
1.32k
        break;
2039
1.29k
      }
2040
2.65k
      case OpCode::I32__ge_u:
2041
3.28k
      case OpCode::I64__ge_u: {
2042
3.28k
        LLVM::Value RHS = stackPop();
2043
3.28k
        LLVM::Value LHS = stackPop();
2044
3.28k
        stackPush(Builder.createZExt(Builder.createICmpUGE(LHS, RHS),
2045
3.28k
                                     Context.Int32Ty));
2046
3.28k
        break;
2047
2.65k
      }
2048
158
      case OpCode::F32__eq:
2049
210
      case OpCode::F64__eq: {
2050
210
        LLVM::Value RHS = stackPop();
2051
210
        LLVM::Value LHS = stackPop();
2052
210
        stackPush(Builder.createZExt(Builder.createFCmpOEQ(LHS, RHS),
2053
210
                                     Context.Int32Ty));
2054
210
        break;
2055
158
      }
2056
120
      case OpCode::F32__ne:
2057
152
      case OpCode::F64__ne: {
2058
152
        LLVM::Value RHS = stackPop();
2059
152
        LLVM::Value LHS = stackPop();
2060
152
        stackPush(Builder.createZExt(Builder.createFCmpUNE(LHS, RHS),
2061
152
                                     Context.Int32Ty));
2062
152
        break;
2063
120
      }
2064
195
      case OpCode::F32__lt:
2065
323
      case OpCode::F64__lt: {
2066
323
        LLVM::Value RHS = stackPop();
2067
323
        LLVM::Value LHS = stackPop();
2068
323
        stackPush(Builder.createZExt(Builder.createFCmpOLT(LHS, RHS),
2069
323
                                     Context.Int32Ty));
2070
323
        break;
2071
195
      }
2072
147
      case OpCode::F32__gt:
2073
232
      case OpCode::F64__gt: {
2074
232
        LLVM::Value RHS = stackPop();
2075
232
        LLVM::Value LHS = stackPop();
2076
232
        stackPush(Builder.createZExt(Builder.createFCmpOGT(LHS, RHS),
2077
232
                                     Context.Int32Ty));
2078
232
        break;
2079
147
      }
2080
79
      case OpCode::F32__le:
2081
182
      case OpCode::F64__le: {
2082
182
        LLVM::Value RHS = stackPop();
2083
182
        LLVM::Value LHS = stackPop();
2084
182
        stackPush(Builder.createZExt(Builder.createFCmpOLE(LHS, RHS),
2085
182
                                     Context.Int32Ty));
2086
182
        break;
2087
79
      }
2088
330
      case OpCode::F32__ge:
2089
358
      case OpCode::F64__ge: {
2090
358
        LLVM::Value RHS = stackPop();
2091
358
        LLVM::Value LHS = stackPop();
2092
358
        stackPush(Builder.createZExt(Builder.createFCmpOGE(LHS, RHS),
2093
358
                                     Context.Int32Ty));
2094
358
        break;
2095
330
      }
2096
706
      case OpCode::I32__add:
2097
1.16k
      case OpCode::I64__add: {
2098
1.16k
        LLVM::Value RHS = stackPop();
2099
1.16k
        LLVM::Value LHS = stackPop();
2100
1.16k
        stackPush(Builder.createAdd(LHS, RHS));
2101
1.16k
        break;
2102
706
      }
2103
1.56k
      case OpCode::I32__sub:
2104
1.94k
      case OpCode::I64__sub: {
2105
1.94k
        LLVM::Value RHS = stackPop();
2106
1.94k
        LLVM::Value LHS = stackPop();
2107
2108
1.94k
        stackPush(Builder.createSub(LHS, RHS));
2109
1.94k
        break;
2110
1.56k
      }
2111
609
      case OpCode::I32__mul:
2112
1.19k
      case OpCode::I64__mul: {
2113
1.19k
        LLVM::Value RHS = stackPop();
2114
1.19k
        LLVM::Value LHS = stackPop();
2115
1.19k
        stackPush(Builder.createMul(LHS, RHS));
2116
1.19k
        break;
2117
609
      }
2118
1.34k
      case OpCode::I32__div_s:
2119
1.92k
      case OpCode::I64__div_s: {
2120
1.92k
        LLVM::Value RHS = stackPop();
2121
1.92k
        LLVM::Value LHS = stackPop();
2122
1.92k
        if constexpr (kForceDivCheck) {
2123
1.92k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_s;
2124
1.92k
          LLVM::Value IntZero =
2125
1.92k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2126
1.92k
          LLVM::Value IntMinusOne =
2127
1.92k
              Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2128
1.92k
                   : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2129
1.92k
          LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2130
1.34k
                                          std::numeric_limits<int32_t>::min()))
2131
1.92k
                                    : LLContext.getInt64(static_cast<uint64_t>(
2132
571
                                          std::numeric_limits<int64_t>::min()));
2133
2134
1.92k
          auto NoZeroBB =
2135
1.92k
              LLVM::BasicBlock::create(LLContext, F.Fn, "div.nozero");
2136
1.92k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2137
2138
1.92k
          auto IsNotZero =
2139
1.92k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2140
1.92k
          Builder.createCondBr(IsNotZero, NoZeroBB,
2141
1.92k
                               getTrapBB(ErrCode::Value::DivideByZero));
2142
2143
1.92k
          Builder.positionAtEnd(NoZeroBB);
2144
1.92k
          auto NotOverflow = Builder.createLikely(
2145
1.92k
              Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2146
1.92k
                               Builder.createICmpNE(RHS, IntMinusOne)));
2147
1.92k
          Builder.createCondBr(NotOverflow, OkBB,
2148
1.92k
                               getTrapBB(ErrCode::Value::IntegerOverflow));
2149
2150
1.92k
          Builder.positionAtEnd(OkBB);
2151
1.92k
        }
2152
1.92k
        stackPush(Builder.createSDiv(LHS, RHS));
2153
1.92k
        break;
2154
1.34k
      }
2155
3.37k
      case OpCode::I32__div_u:
2156
3.68k
      case OpCode::I64__div_u: {
2157
3.68k
        LLVM::Value RHS = stackPop();
2158
3.68k
        LLVM::Value LHS = stackPop();
2159
3.68k
        if constexpr (kForceDivCheck) {
2160
3.68k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_u;
2161
3.68k
          LLVM::Value IntZero =
2162
3.68k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2163
3.68k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2164
2165
3.68k
          auto IsNotZero =
2166
3.68k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2167
3.68k
          Builder.createCondBr(IsNotZero, OkBB,
2168
3.68k
                               getTrapBB(ErrCode::Value::DivideByZero));
2169
3.68k
          Builder.positionAtEnd(OkBB);
2170
3.68k
        }
2171
3.68k
        stackPush(Builder.createUDiv(LHS, RHS));
2172
3.68k
        break;
2173
3.37k
      }
2174
933
      case OpCode::I32__rem_s:
2175
1.38k
      case OpCode::I64__rem_s: {
2176
1.38k
        LLVM::Value RHS = stackPop();
2177
1.38k
        LLVM::Value LHS = stackPop();
2178
        // handle INT32_MIN % -1
2179
1.38k
        const bool Is32 = Instr.getOpCode() == OpCode::I32__rem_s;
2180
1.38k
        LLVM::Value IntMinusOne =
2181
1.38k
            Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2182
1.38k
                 : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2183
1.38k
        LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2184
933
                                        std::numeric_limits<int32_t>::min()))
2185
1.38k
                                  : LLContext.getInt64(static_cast<uint64_t>(
2186
451
                                        std::numeric_limits<int64_t>::min()));
2187
1.38k
        LLVM::Value IntZero =
2188
1.38k
            Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2189
2190
1.38k
        auto NoOverflowBB =
2191
1.38k
            LLVM::BasicBlock::create(LLContext, F.Fn, "no.overflow");
2192
1.38k
        auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "end.overflow");
2193
2194
1.38k
        if constexpr (kForceDivCheck) {
2195
1.38k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2196
2197
1.38k
          auto IsNotZero =
2198
1.38k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2199
1.38k
          Builder.createCondBr(IsNotZero, OkBB,
2200
1.38k
                               getTrapBB(ErrCode::Value::DivideByZero));
2201
1.38k
          Builder.positionAtEnd(OkBB);
2202
1.38k
        }
2203
2204
1.38k
        auto CurrBB = Builder.getInsertBlock();
2205
2206
1.38k
        auto NotOverflow = Builder.createLikely(
2207
1.38k
            Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2208
1.38k
                             Builder.createICmpNE(RHS, IntMinusOne)));
2209
1.38k
        Builder.createCondBr(NotOverflow, NoOverflowBB, EndBB);
2210
2211
1.38k
        Builder.positionAtEnd(NoOverflowBB);
2212
1.38k
        auto Ret1 = Builder.createSRem(LHS, RHS);
2213
1.38k
        Builder.createBr(EndBB);
2214
2215
1.38k
        Builder.positionAtEnd(EndBB);
2216
1.38k
        auto Ret = Builder.createPHI(Ret1.getType());
2217
1.38k
        Ret.addIncoming(Ret1, NoOverflowBB);
2218
1.38k
        Ret.addIncoming(IntZero, CurrBB);
2219
2220
1.38k
        stackPush(Ret);
2221
1.38k
        break;
2222
933
      }
2223
986
      case OpCode::I32__rem_u:
2224
1.56k
      case OpCode::I64__rem_u: {
2225
1.56k
        LLVM::Value RHS = stackPop();
2226
1.56k
        LLVM::Value LHS = stackPop();
2227
1.56k
        if constexpr (kForceDivCheck) {
2228
1.56k
          LLVM::Value IntZero = Instr.getOpCode() == OpCode::I32__rem_u
2229
1.56k
                                    ? LLContext.getInt32(0)
2230
1.56k
                                    : LLContext.getInt64(0);
2231
1.56k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2232
2233
1.56k
          auto IsNotZero =
2234
1.56k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2235
1.56k
          Builder.createCondBr(IsNotZero, OkBB,
2236
1.56k
                               getTrapBB(ErrCode::Value::DivideByZero));
2237
1.56k
          Builder.positionAtEnd(OkBB);
2238
1.56k
        }
2239
1.56k
        stackPush(Builder.createURem(LHS, RHS));
2240
1.56k
        break;
2241
986
      }
2242
675
      case OpCode::I32__and:
2243
2.02k
      case OpCode::I64__and: {
2244
2.02k
        LLVM::Value RHS = stackPop();
2245
2.02k
        LLVM::Value LHS = stackPop();
2246
2.02k
        stackPush(Builder.createAnd(LHS, RHS));
2247
2.02k
        break;
2248
675
      }
2249
962
      case OpCode::I32__or:
2250
1.29k
      case OpCode::I64__or: {
2251
1.29k
        LLVM::Value RHS = stackPop();
2252
1.29k
        LLVM::Value LHS = stackPop();
2253
1.29k
        stackPush(Builder.createOr(LHS, RHS));
2254
1.29k
        break;
2255
962
      }
2256
1.12k
      case OpCode::I32__xor:
2257
1.74k
      case OpCode::I64__xor: {
2258
1.74k
        LLVM::Value RHS = stackPop();
2259
1.74k
        LLVM::Value LHS = stackPop();
2260
1.74k
        stackPush(Builder.createXor(LHS, RHS));
2261
1.74k
        break;
2262
1.12k
      }
2263
1.48k
      case OpCode::I32__shl:
2264
1.92k
      case OpCode::I64__shl: {
2265
1.92k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shl
2266
1.92k
                               ? LLContext.getInt32(31)
2267
1.92k
                               : LLContext.getInt64(63);
2268
1.92k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2269
1.92k
        LLVM::Value LHS = stackPop();
2270
1.92k
        stackPush(Builder.createShl(LHS, RHS));
2271
1.92k
        break;
2272
1.48k
      }
2273
1.75k
      case OpCode::I32__shr_s:
2274
2.12k
      case OpCode::I64__shr_s: {
2275
2.12k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_s
2276
2.12k
                               ? LLContext.getInt32(31)
2277
2.12k
                               : LLContext.getInt64(63);
2278
2.12k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2279
2.12k
        LLVM::Value LHS = stackPop();
2280
2.12k
        stackPush(Builder.createAShr(LHS, RHS));
2281
2.12k
        break;
2282
1.75k
      }
2283
3.97k
      case OpCode::I32__shr_u:
2284
4.25k
      case OpCode::I64__shr_u: {
2285
4.25k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_u
2286
4.25k
                               ? LLContext.getInt32(31)
2287
4.25k
                               : LLContext.getInt64(63);
2288
4.25k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2289
4.25k
        LLVM::Value LHS = stackPop();
2290
4.25k
        stackPush(Builder.createLShr(LHS, RHS));
2291
4.25k
        break;
2292
3.97k
      }
2293
2.54k
      case OpCode::I32__rotl: {
2294
2.54k
        LLVM::Value RHS = stackPop();
2295
2.54k
        LLVM::Value LHS = stackPop();
2296
2.54k
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2297
2.54k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int32Ty},
2298
2.54k
                                          {LHS, LHS, RHS}));
2299
2.54k
        break;
2300
2.54k
      }
2301
876
      case OpCode::I32__rotr: {
2302
876
        LLVM::Value RHS = stackPop();
2303
876
        LLVM::Value LHS = stackPop();
2304
876
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2305
876
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int32Ty},
2306
876
                                          {LHS, LHS, RHS}));
2307
876
        break;
2308
876
      }
2309
869
      case OpCode::I64__rotl: {
2310
869
        LLVM::Value RHS = stackPop();
2311
869
        LLVM::Value LHS = stackPop();
2312
869
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2313
869
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int64Ty},
2314
869
                                          {LHS, LHS, RHS}));
2315
869
        break;
2316
869
      }
2317
1.37k
      case OpCode::I64__rotr: {
2318
1.37k
        LLVM::Value RHS = stackPop();
2319
1.37k
        LLVM::Value LHS = stackPop();
2320
1.37k
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2321
1.37k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int64Ty},
2322
1.37k
                                          {LHS, LHS, RHS}));
2323
1.37k
        break;
2324
1.37k
      }
2325
274
      case OpCode::F32__add:
2326
572
      case OpCode::F64__add: {
2327
572
        LLVM::Value RHS = stackPop();
2328
572
        LLVM::Value LHS = stackPop();
2329
572
        stackPush(Builder.createFAdd(LHS, RHS));
2330
572
        break;
2331
274
      }
2332
149
      case OpCode::F32__sub:
2333
444
      case OpCode::F64__sub: {
2334
444
        LLVM::Value RHS = stackPop();
2335
444
        LLVM::Value LHS = stackPop();
2336
444
        stackPush(Builder.createFSub(LHS, RHS));
2337
444
        break;
2338
149
      }
2339
537
      case OpCode::F32__mul:
2340
698
      case OpCode::F64__mul: {
2341
698
        LLVM::Value RHS = stackPop();
2342
698
        LLVM::Value LHS = stackPop();
2343
698
        stackPush(Builder.createFMul(LHS, RHS));
2344
698
        break;
2345
537
      }
2346
228
      case OpCode::F32__div:
2347
567
      case OpCode::F64__div: {
2348
567
        LLVM::Value RHS = stackPop();
2349
567
        LLVM::Value LHS = stackPop();
2350
567
        stackPush(Builder.createFDiv(LHS, RHS));
2351
567
        break;
2352
228
      }
2353
310
      case OpCode::F32__min:
2354
691
      case OpCode::F64__min: {
2355
691
        LLVM::Value RHS = stackPop();
2356
691
        LLVM::Value LHS = stackPop();
2357
691
        auto FpTy = Instr.getOpCode() == OpCode::F32__min ? Context.FloatTy
2358
691
                                                          : Context.DoubleTy;
2359
691
        auto IntTy = Instr.getOpCode() == OpCode::F32__min ? Context.Int32Ty
2360
691
                                                           : Context.Int64Ty;
2361
2362
691
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2363
691
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2364
2365
691
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2366
691
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2367
691
        auto OrInt = Builder.createOr(LHSInt, RHSInt);
2368
691
        auto OrFp = Builder.createBitCast(OrInt, FpTy);
2369
2370
691
        auto AddFp = Builder.createFAdd(LHS, RHS);
2371
2372
691
        assuming(LLVM::Core::MinNum != LLVM::Core::NotIntrinsic);
2373
691
        auto MinFp = Builder.createIntrinsic(LLVM::Core::MinNum,
2374
691
                                             {LHS.getType()}, {LHS, RHS});
2375
2376
691
        auto Ret = Builder.createSelect(
2377
691
            UEQ, Builder.createSelect(UNO, AddFp, OrFp), MinFp);
2378
691
        stackPush(Ret);
2379
691
        break;
2380
691
      }
2381
330
      case OpCode::F32__max:
2382
939
      case OpCode::F64__max: {
2383
939
        LLVM::Value RHS = stackPop();
2384
939
        LLVM::Value LHS = stackPop();
2385
939
        auto FpTy = Instr.getOpCode() == OpCode::F32__max ? Context.FloatTy
2386
939
                                                          : Context.DoubleTy;
2387
939
        auto IntTy = Instr.getOpCode() == OpCode::F32__max ? Context.Int32Ty
2388
939
                                                           : Context.Int64Ty;
2389
2390
939
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2391
939
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2392
2393
939
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2394
939
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2395
939
        auto AndInt = Builder.createAnd(LHSInt, RHSInt);
2396
939
        auto AndFp = Builder.createBitCast(AndInt, FpTy);
2397
2398
939
        auto AddFp = Builder.createFAdd(LHS, RHS);
2399
2400
939
        assuming(LLVM::Core::MaxNum != LLVM::Core::NotIntrinsic);
2401
939
        auto MaxFp = Builder.createIntrinsic(LLVM::Core::MaxNum,
2402
939
                                             {LHS.getType()}, {LHS, RHS});
2403
2404
939
        auto Ret = Builder.createSelect(
2405
939
            UEQ, Builder.createSelect(UNO, AddFp, AndFp), MaxFp);
2406
939
        stackPush(Ret);
2407
939
        break;
2408
939
      }
2409
436
      case OpCode::F32__copysign:
2410
841
      case OpCode::F64__copysign: {
2411
841
        LLVM::Value RHS = stackPop();
2412
841
        LLVM::Value LHS = stackPop();
2413
841
        assuming(LLVM::Core::CopySign != LLVM::Core::NotIntrinsic);
2414
841
        stackPush(Builder.createIntrinsic(LLVM::Core::CopySign, {LHS.getType()},
2415
841
                                          {LHS, RHS}));
2416
841
        break;
2417
841
      }
2418
2419
      // Saturating Truncation Numeric Instructions
2420
171
      case OpCode::I32__trunc_sat_f32_s:
2421
171
        compileSignedTruncSat(Context.Int32Ty);
2422
171
        break;
2423
96
      case OpCode::I32__trunc_sat_f32_u:
2424
96
        compileUnsignedTruncSat(Context.Int32Ty);
2425
96
        break;
2426
315
      case OpCode::I32__trunc_sat_f64_s:
2427
315
        compileSignedTruncSat(Context.Int32Ty);
2428
315
        break;
2429
197
      case OpCode::I32__trunc_sat_f64_u:
2430
197
        compileUnsignedTruncSat(Context.Int32Ty);
2431
197
        break;
2432
424
      case OpCode::I64__trunc_sat_f32_s:
2433
424
        compileSignedTruncSat(Context.Int64Ty);
2434
424
        break;
2435
452
      case OpCode::I64__trunc_sat_f32_u:
2436
452
        compileUnsignedTruncSat(Context.Int64Ty);
2437
452
        break;
2438
292
      case OpCode::I64__trunc_sat_f64_s:
2439
292
        compileSignedTruncSat(Context.Int64Ty);
2440
292
        break;
2441
333
      case OpCode::I64__trunc_sat_f64_u:
2442
333
        compileUnsignedTruncSat(Context.Int64Ty);
2443
333
        break;
2444
2445
      // SIMD Memory Instructions
2446
4.85k
      case OpCode::V128__load:
2447
4.85k
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2448
4.85k
                            Instr.getMemoryAlign(), Context.Int128x1Ty);
2449
4.85k
        break;
2450
147
      case OpCode::V128__load8x8_s:
2451
147
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2452
147
                            Instr.getMemoryAlign(),
2453
147
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2454
147
                            Context.Int16x8Ty, true);
2455
147
        break;
2456
42
      case OpCode::V128__load8x8_u:
2457
42
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2458
42
                            Instr.getMemoryAlign(),
2459
42
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2460
42
                            Context.Int16x8Ty, false);
2461
42
        break;
2462
419
      case OpCode::V128__load16x4_s:
2463
419
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2464
419
                            Instr.getMemoryAlign(),
2465
419
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2466
419
                            Context.Int32x4Ty, true);
2467
419
        break;
2468
441
      case OpCode::V128__load16x4_u:
2469
441
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2470
441
                            Instr.getMemoryAlign(),
2471
441
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2472
441
                            Context.Int32x4Ty, false);
2473
441
        break;
2474
137
      case OpCode::V128__load32x2_s:
2475
137
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2476
137
                            Instr.getMemoryAlign(),
2477
137
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2478
137
                            Context.Int64x2Ty, true);
2479
137
        break;
2480
133
      case OpCode::V128__load32x2_u:
2481
133
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2482
133
                            Instr.getMemoryAlign(),
2483
133
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2484
133
                            Context.Int64x2Ty, false);
2485
133
        break;
2486
73
      case OpCode::V128__load8_splat:
2487
73
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2488
73
                           Instr.getMemoryAlign(), Context.Int8Ty,
2489
73
                           Context.Int8x16Ty);
2490
73
        break;
2491
125
      case OpCode::V128__load16_splat:
2492
125
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2493
125
                           Instr.getMemoryAlign(), Context.Int16Ty,
2494
125
                           Context.Int16x8Ty);
2495
125
        break;
2496
202
      case OpCode::V128__load32_splat:
2497
202
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2498
202
                           Instr.getMemoryAlign(), Context.Int32Ty,
2499
202
                           Context.Int32x4Ty);
2500
202
        break;
2501
121
      case OpCode::V128__load64_splat:
2502
121
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2503
121
                           Instr.getMemoryAlign(), Context.Int64Ty,
2504
121
                           Context.Int64x2Ty);
2505
121
        break;
2506
81
      case OpCode::V128__load32_zero:
2507
81
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2508
81
                            Instr.getMemoryAlign(), Context.Int32Ty,
2509
81
                            Context.Int128Ty, false);
2510
81
        break;
2511
152
      case OpCode::V128__load64_zero:
2512
152
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2513
152
                            Instr.getMemoryAlign(), Context.Int64Ty,
2514
152
                            Context.Int128Ty, false);
2515
152
        break;
2516
219
      case OpCode::V128__store:
2517
219
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2518
219
                       Instr.getMemoryAlign(), Context.Int128x1Ty, false, true);
2519
219
        break;
2520
183
      case OpCode::V128__load8_lane:
2521
183
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2522
183
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2523
183
                          Context.Int8Ty, Context.Int8x16Ty);
2524
183
        break;
2525
154
      case OpCode::V128__load16_lane:
2526
154
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2527
154
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2528
154
                          Context.Int16Ty, Context.Int16x8Ty);
2529
154
        break;
2530
122
      case OpCode::V128__load32_lane:
2531
122
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2532
122
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2533
122
                          Context.Int32Ty, Context.Int32x4Ty);
2534
122
        break;
2535
21
      case OpCode::V128__load64_lane:
2536
21
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2537
21
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2538
21
                          Context.Int64Ty, Context.Int64x2Ty);
2539
21
        break;
2540
117
      case OpCode::V128__store8_lane:
2541
117
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2542
117
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2543
117
                           Context.Int8Ty, Context.Int8x16Ty);
2544
117
        break;
2545
63
      case OpCode::V128__store16_lane:
2546
63
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2547
63
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2548
63
                           Context.Int16Ty, Context.Int16x8Ty);
2549
63
        break;
2550
115
      case OpCode::V128__store32_lane:
2551
115
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2552
115
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2553
115
                           Context.Int32Ty, Context.Int32x4Ty);
2554
115
        break;
2555
23
      case OpCode::V128__store64_lane:
2556
23
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2557
23
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2558
23
                           Context.Int64Ty, Context.Int64x2Ty);
2559
23
        break;
2560
2561
      // SIMD Const Instructions
2562
343
      case OpCode::V128__const: {
2563
343
        const auto Value = Instr.getNum().get<uint64x2_t>();
2564
343
        auto Vector =
2565
343
            LLVM::Value::getConstVector64(LLContext, {Value[0], Value[1]});
2566
343
        stackPush(Builder.createBitCast(Vector, Context.Int64x2Ty));
2567
343
        break;
2568
841
      }
2569
2570
      // SIMD Shuffle Instructions
2571
15
      case OpCode::I8x16__shuffle: {
2572
15
        auto V2 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2573
15
        auto V1 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2574
15
        const auto V3 = Instr.getNum().get<uint128_t>();
2575
15
        std::array<uint8_t, 16> Mask;
2576
255
        for (size_t I = 0; I < 16; ++I) {
2577
240
          Mask[I] = static_cast<uint8_t>(V3 >> (I * 8));
2578
240
        }
2579
15
        stackPush(Builder.createBitCast(
2580
15
            Builder.createShuffleVector(
2581
15
                V1, V2, LLVM::Value::getConstVector8(LLContext, Mask)),
2582
15
            Context.Int64x2Ty));
2583
15
        break;
2584
841
      }
2585
2586
      // SIMD Lane Instructions
2587
61
      case OpCode::I8x16__extract_lane_s:
2588
61
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2589
61
                             Context.Int32Ty, true);
2590
61
        break;
2591
29
      case OpCode::I8x16__extract_lane_u:
2592
29
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2593
29
                             Context.Int32Ty, false);
2594
29
        break;
2595
157
      case OpCode::I8x16__replace_lane:
2596
157
        compileReplaceLaneOp(Context.Int8x16Ty, Instr.getMemoryLane());
2597
157
        break;
2598
433
      case OpCode::I16x8__extract_lane_s:
2599
433
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2600
433
                             Context.Int32Ty, true);
2601
433
        break;
2602
526
      case OpCode::I16x8__extract_lane_u:
2603
526
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2604
526
                             Context.Int32Ty, false);
2605
526
        break;
2606
251
      case OpCode::I16x8__replace_lane:
2607
251
        compileReplaceLaneOp(Context.Int16x8Ty, Instr.getMemoryLane());
2608
251
        break;
2609
66
      case OpCode::I32x4__extract_lane:
2610
66
        compileExtractLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2611
66
        break;
2612
217
      case OpCode::I32x4__replace_lane:
2613
217
        compileReplaceLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2614
217
        break;
2615
135
      case OpCode::I64x2__extract_lane:
2616
135
        compileExtractLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2617
135
        break;
2618
14
      case OpCode::I64x2__replace_lane:
2619
14
        compileReplaceLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2620
14
        break;
2621
63
      case OpCode::F32x4__extract_lane:
2622
63
        compileExtractLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2623
63
        break;
2624
23
      case OpCode::F32x4__replace_lane:
2625
23
        compileReplaceLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2626
23
        break;
2627
76
      case OpCode::F64x2__extract_lane:
2628
76
        compileExtractLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2629
76
        break;
2630
7
      case OpCode::F64x2__replace_lane:
2631
7
        compileReplaceLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2632
7
        break;
2633
2634
      // SIMD Numeric Instructions
2635
64
      case OpCode::I8x16__swizzle:
2636
64
        compileVectorSwizzle();
2637
64
        break;
2638
33.4k
      case OpCode::I8x16__splat:
2639
33.4k
        compileSplatOp(Context.Int8x16Ty);
2640
33.4k
        break;
2641
9.45k
      case OpCode::I16x8__splat:
2642
9.45k
        compileSplatOp(Context.Int16x8Ty);
2643
9.45k
        break;
2644
1.24k
      case OpCode::I32x4__splat:
2645
1.24k
        compileSplatOp(Context.Int32x4Ty);
2646
1.24k
        break;
2647
401
      case OpCode::I64x2__splat:
2648
401
        compileSplatOp(Context.Int64x2Ty);
2649
401
        break;
2650
355
      case OpCode::F32x4__splat:
2651
355
        compileSplatOp(Context.Floatx4Ty);
2652
355
        break;
2653
56
      case OpCode::F64x2__splat:
2654
56
        compileSplatOp(Context.Doublex2Ty);
2655
56
        break;
2656
104
      case OpCode::I8x16__eq:
2657
104
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntEQ);
2658
104
        break;
2659
308
      case OpCode::I8x16__ne:
2660
308
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntNE);
2661
308
        break;
2662
56
      case OpCode::I8x16__lt_s:
2663
56
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLT);
2664
56
        break;
2665
80
      case OpCode::I8x16__lt_u:
2666
80
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULT);
2667
80
        break;
2668
137
      case OpCode::I8x16__gt_s:
2669
137
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGT);
2670
137
        break;
2671
217
      case OpCode::I8x16__gt_u:
2672
217
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGT);
2673
217
        break;
2674
90
      case OpCode::I8x16__le_s:
2675
90
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLE);
2676
90
        break;
2677
96
      case OpCode::I8x16__le_u:
2678
96
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULE);
2679
96
        break;
2680
620
      case OpCode::I8x16__ge_s:
2681
620
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGE);
2682
620
        break;
2683
124
      case OpCode::I8x16__ge_u:
2684
124
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGE);
2685
124
        break;
2686
77
      case OpCode::I16x8__eq:
2687
77
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntEQ);
2688
77
        break;
2689
231
      case OpCode::I16x8__ne:
2690
231
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntNE);
2691
231
        break;
2692
53
      case OpCode::I16x8__lt_s:
2693
53
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLT);
2694
53
        break;
2695
226
      case OpCode::I16x8__lt_u:
2696
226
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULT);
2697
226
        break;
2698
225
      case OpCode::I16x8__gt_s:
2699
225
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGT);
2700
225
        break;
2701
139
      case OpCode::I16x8__gt_u:
2702
139
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGT);
2703
139
        break;
2704
76
      case OpCode::I16x8__le_s:
2705
76
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLE);
2706
76
        break;
2707
96
      case OpCode::I16x8__le_u:
2708
96
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULE);
2709
96
        break;
2710
155
      case OpCode::I16x8__ge_s:
2711
155
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGE);
2712
155
        break;
2713
68
      case OpCode::I16x8__ge_u:
2714
68
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGE);
2715
68
        break;
2716
56
      case OpCode::I32x4__eq:
2717
56
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntEQ);
2718
56
        break;
2719
94
      case OpCode::I32x4__ne:
2720
94
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntNE);
2721
94
        break;
2722
41
      case OpCode::I32x4__lt_s:
2723
41
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLT);
2724
41
        break;
2725
161
      case OpCode::I32x4__lt_u:
2726
161
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULT);
2727
161
        break;
2728
111
      case OpCode::I32x4__gt_s:
2729
111
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGT);
2730
111
        break;
2731
226
      case OpCode::I32x4__gt_u:
2732
226
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGT);
2733
226
        break;
2734
279
      case OpCode::I32x4__le_s:
2735
279
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLE);
2736
279
        break;
2737
252
      case OpCode::I32x4__le_u:
2738
252
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULE);
2739
252
        break;
2740
54
      case OpCode::I32x4__ge_s:
2741
54
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGE);
2742
54
        break;
2743
97
      case OpCode::I32x4__ge_u:
2744
97
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGE);
2745
97
        break;
2746
101
      case OpCode::I64x2__eq:
2747
101
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntEQ);
2748
101
        break;
2749
52
      case OpCode::I64x2__ne:
2750
52
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntNE);
2751
52
        break;
2752
50
      case OpCode::I64x2__lt_s:
2753
50
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLT);
2754
50
        break;
2755
127
      case OpCode::I64x2__gt_s:
2756
127
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGT);
2757
127
        break;
2758
33
      case OpCode::I64x2__le_s:
2759
33
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLE);
2760
33
        break;
2761
41
      case OpCode::I64x2__ge_s:
2762
41
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGE);
2763
41
        break;
2764
1.38k
      case OpCode::F32x4__eq:
2765
1.38k
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOEQ,
2766
1.38k
                               Context.Int32x4Ty);
2767
1.38k
        break;
2768
38
      case OpCode::F32x4__ne:
2769
38
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealUNE,
2770
38
                               Context.Int32x4Ty);
2771
38
        break;
2772
901
      case OpCode::F32x4__lt:
2773
901
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLT,
2774
901
                               Context.Int32x4Ty);
2775
901
        break;
2776
93
      case OpCode::F32x4__gt:
2777
93
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGT,
2778
93
                               Context.Int32x4Ty);
2779
93
        break;
2780
335
      case OpCode::F32x4__le:
2781
335
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLE,
2782
335
                               Context.Int32x4Ty);
2783
335
        break;
2784
76
      case OpCode::F32x4__ge:
2785
76
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGE,
2786
76
                               Context.Int32x4Ty);
2787
76
        break;
2788
58
      case OpCode::F64x2__eq:
2789
58
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOEQ,
2790
58
                               Context.Int64x2Ty);
2791
58
        break;
2792
116
      case OpCode::F64x2__ne:
2793
116
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealUNE,
2794
116
                               Context.Int64x2Ty);
2795
116
        break;
2796
142
      case OpCode::F64x2__lt:
2797
142
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLT,
2798
142
                               Context.Int64x2Ty);
2799
142
        break;
2800
57
      case OpCode::F64x2__gt:
2801
57
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGT,
2802
57
                               Context.Int64x2Ty);
2803
57
        break;
2804
166
      case OpCode::F64x2__le:
2805
166
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLE,
2806
166
                               Context.Int64x2Ty);
2807
166
        break;
2808
87
      case OpCode::F64x2__ge:
2809
87
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGE,
2810
87
                               Context.Int64x2Ty);
2811
87
        break;
2812
139
      case OpCode::V128__not:
2813
139
        Stack.back() = Builder.createNot(Stack.back());
2814
139
        break;
2815
75
      case OpCode::V128__and: {
2816
75
        auto RHS = stackPop();
2817
75
        auto LHS = stackPop();
2818
75
        stackPush(Builder.createAnd(LHS, RHS));
2819
75
        break;
2820
841
      }
2821
98
      case OpCode::V128__andnot: {
2822
98
        auto RHS = stackPop();
2823
98
        auto LHS = stackPop();
2824
98
        stackPush(Builder.createAnd(LHS, Builder.createNot(RHS)));
2825
98
        break;
2826
841
      }
2827
123
      case OpCode::V128__or: {
2828
123
        auto RHS = stackPop();
2829
123
        auto LHS = stackPop();
2830
123
        stackPush(Builder.createOr(LHS, RHS));
2831
123
        break;
2832
841
      }
2833
61
      case OpCode::V128__xor: {
2834
61
        auto RHS = stackPop();
2835
61
        auto LHS = stackPop();
2836
61
        stackPush(Builder.createXor(LHS, RHS));
2837
61
        break;
2838
841
      }
2839
127
      case OpCode::V128__bitselect: {
2840
127
        auto C = stackPop();
2841
127
        auto V2 = stackPop();
2842
127
        auto V1 = stackPop();
2843
127
        stackPush(Builder.createXor(
2844
127
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
2845
127
        break;
2846
841
      }
2847
107
      case OpCode::V128__any_true:
2848
107
        compileVectorAnyTrue();
2849
107
        break;
2850
833
      case OpCode::I8x16__abs:
2851
833
        compileVectorAbs(Context.Int8x16Ty);
2852
833
        break;
2853
1.44k
      case OpCode::I8x16__neg:
2854
1.44k
        compileVectorNeg(Context.Int8x16Ty);
2855
1.44k
        break;
2856
104
      case OpCode::I8x16__popcnt:
2857
104
        compileVectorPopcnt();
2858
104
        break;
2859
314
      case OpCode::I8x16__all_true:
2860
314
        compileVectorAllTrue(Context.Int8x16Ty);
2861
314
        break;
2862
487
      case OpCode::I8x16__bitmask:
2863
487
        compileVectorBitMask(Context.Int8x16Ty);
2864
487
        break;
2865
78
      case OpCode::I8x16__narrow_i16x8_s:
2866
78
        compileVectorNarrow(Context.Int16x8Ty, true);
2867
78
        break;
2868
172
      case OpCode::I8x16__narrow_i16x8_u:
2869
172
        compileVectorNarrow(Context.Int16x8Ty, false);
2870
172
        break;
2871
107
      case OpCode::I8x16__shl:
2872
107
        compileVectorShl(Context.Int8x16Ty);
2873
107
        break;
2874
1.04k
      case OpCode::I8x16__shr_s:
2875
1.04k
        compileVectorAShr(Context.Int8x16Ty);
2876
1.04k
        break;
2877
59
      case OpCode::I8x16__shr_u:
2878
59
        compileVectorLShr(Context.Int8x16Ty);
2879
59
        break;
2880
61
      case OpCode::I8x16__add:
2881
61
        compileVectorVectorAdd(Context.Int8x16Ty);
2882
61
        break;
2883
460
      case OpCode::I8x16__add_sat_s:
2884
460
        compileVectorVectorAddSat(Context.Int8x16Ty, true);
2885
460
        break;
2886
70
      case OpCode::I8x16__add_sat_u:
2887
70
        compileVectorVectorAddSat(Context.Int8x16Ty, false);
2888
70
        break;
2889
71
      case OpCode::I8x16__sub:
2890
71
        compileVectorVectorSub(Context.Int8x16Ty);
2891
71
        break;
2892
207
      case OpCode::I8x16__sub_sat_s:
2893
207
        compileVectorVectorSubSat(Context.Int8x16Ty, true);
2894
207
        break;
2895
85
      case OpCode::I8x16__sub_sat_u:
2896
85
        compileVectorVectorSubSat(Context.Int8x16Ty, false);
2897
85
        break;
2898
54
      case OpCode::I8x16__min_s:
2899
54
        compileVectorVectorSMin(Context.Int8x16Ty);
2900
54
        break;
2901
61
      case OpCode::I8x16__min_u:
2902
61
        compileVectorVectorUMin(Context.Int8x16Ty);
2903
61
        break;
2904
256
      case OpCode::I8x16__max_s:
2905
256
        compileVectorVectorSMax(Context.Int8x16Ty);
2906
256
        break;
2907
86
      case OpCode::I8x16__max_u:
2908
86
        compileVectorVectorUMax(Context.Int8x16Ty);
2909
86
        break;
2910
117
      case OpCode::I8x16__avgr_u:
2911
117
        compileVectorVectorUAvgr(Context.Int8x16Ty);
2912
117
        break;
2913
207
      case OpCode::I16x8__abs:
2914
207
        compileVectorAbs(Context.Int16x8Ty);
2915
207
        break;
2916
246
      case OpCode::I16x8__neg:
2917
246
        compileVectorNeg(Context.Int16x8Ty);
2918
246
        break;
2919
108
      case OpCode::I16x8__all_true:
2920
108
        compileVectorAllTrue(Context.Int16x8Ty);
2921
108
        break;
2922
112
      case OpCode::I16x8__bitmask:
2923
112
        compileVectorBitMask(Context.Int16x8Ty);
2924
112
        break;
2925
45
      case OpCode::I16x8__narrow_i32x4_s:
2926
45
        compileVectorNarrow(Context.Int32x4Ty, true);
2927
45
        break;
2928
349
      case OpCode::I16x8__narrow_i32x4_u:
2929
349
        compileVectorNarrow(Context.Int32x4Ty, false);
2930
349
        break;
2931
822
      case OpCode::I16x8__extend_low_i8x16_s:
2932
822
        compileVectorExtend(Context.Int8x16Ty, true, true);
2933
822
        break;
2934
58
      case OpCode::I16x8__extend_high_i8x16_s:
2935
58
        compileVectorExtend(Context.Int8x16Ty, true, false);
2936
58
        break;
2937
482
      case OpCode::I16x8__extend_low_i8x16_u:
2938
482
        compileVectorExtend(Context.Int8x16Ty, false, true);
2939
482
        break;
2940
14
      case OpCode::I16x8__extend_high_i8x16_u:
2941
14
        compileVectorExtend(Context.Int8x16Ty, false, false);
2942
14
        break;
2943
77
      case OpCode::I16x8__shl:
2944
77
        compileVectorShl(Context.Int16x8Ty);
2945
77
        break;
2946
253
      case OpCode::I16x8__shr_s:
2947
253
        compileVectorAShr(Context.Int16x8Ty);
2948
253
        break;
2949
50
      case OpCode::I16x8__shr_u:
2950
50
        compileVectorLShr(Context.Int16x8Ty);
2951
50
        break;
2952
106
      case OpCode::I16x8__add:
2953
106
        compileVectorVectorAdd(Context.Int16x8Ty);
2954
106
        break;
2955
22
      case OpCode::I16x8__add_sat_s:
2956
22
        compileVectorVectorAddSat(Context.Int16x8Ty, true);
2957
22
        break;
2958
670
      case OpCode::I16x8__add_sat_u:
2959
670
        compileVectorVectorAddSat(Context.Int16x8Ty, false);
2960
670
        break;
2961
334
      case OpCode::I16x8__sub:
2962
334
        compileVectorVectorSub(Context.Int16x8Ty);
2963
334
        break;
2964
24
      case OpCode::I16x8__sub_sat_s:
2965
24
        compileVectorVectorSubSat(Context.Int16x8Ty, true);
2966
24
        break;
2967
71
      case OpCode::I16x8__sub_sat_u:
2968
71
        compileVectorVectorSubSat(Context.Int16x8Ty, false);
2969
71
        break;
2970
115
      case OpCode::I16x8__mul:
2971
115
        compileVectorVectorMul(Context.Int16x8Ty);
2972
115
        break;
2973
112
      case OpCode::I16x8__min_s:
2974
112
        compileVectorVectorSMin(Context.Int16x8Ty);
2975
112
        break;
2976
124
      case OpCode::I16x8__min_u:
2977
124
        compileVectorVectorUMin(Context.Int16x8Ty);
2978
124
        break;
2979
79
      case OpCode::I16x8__max_s:
2980
79
        compileVectorVectorSMax(Context.Int16x8Ty);
2981
79
        break;
2982
573
      case OpCode::I16x8__max_u:
2983
573
        compileVectorVectorUMax(Context.Int16x8Ty);
2984
573
        break;
2985
105
      case OpCode::I16x8__avgr_u:
2986
105
        compileVectorVectorUAvgr(Context.Int16x8Ty);
2987
105
        break;
2988
71
      case OpCode::I16x8__extmul_low_i8x16_s:
2989
71
        compileVectorExtMul(Context.Int8x16Ty, true, true);
2990
71
        break;
2991
286
      case OpCode::I16x8__extmul_high_i8x16_s:
2992
286
        compileVectorExtMul(Context.Int8x16Ty, true, false);
2993
286
        break;
2994
113
      case OpCode::I16x8__extmul_low_i8x16_u:
2995
113
        compileVectorExtMul(Context.Int8x16Ty, false, true);
2996
113
        break;
2997
435
      case OpCode::I16x8__extmul_high_i8x16_u:
2998
435
        compileVectorExtMul(Context.Int8x16Ty, false, false);
2999
435
        break;
3000
134
      case OpCode::I16x8__q15mulr_sat_s:
3001
134
        compileVectorVectorQ15MulSat();
3002
134
        break;
3003
295
      case OpCode::I16x8__extadd_pairwise_i8x16_s:
3004
295
        compileVectorExtAddPairwise(Context.Int8x16Ty, true);
3005
295
        break;
3006
329
      case OpCode::I16x8__extadd_pairwise_i8x16_u:
3007
329
        compileVectorExtAddPairwise(Context.Int8x16Ty, false);
3008
329
        break;
3009
67
      case OpCode::I32x4__abs:
3010
67
        compileVectorAbs(Context.Int32x4Ty);
3011
67
        break;
3012
179
      case OpCode::I32x4__neg:
3013
179
        compileVectorNeg(Context.Int32x4Ty);
3014
179
        break;
3015
174
      case OpCode::I32x4__all_true:
3016
174
        compileVectorAllTrue(Context.Int32x4Ty);
3017
174
        break;
3018
82
      case OpCode::I32x4__bitmask:
3019
82
        compileVectorBitMask(Context.Int32x4Ty);
3020
82
        break;
3021
155
      case OpCode::I32x4__extend_low_i16x8_s:
3022
155
        compileVectorExtend(Context.Int16x8Ty, true, true);
3023
155
        break;
3024
510
      case OpCode::I32x4__extend_high_i16x8_s:
3025
510
        compileVectorExtend(Context.Int16x8Ty, true, false);
3026
510
        break;
3027
1.89k
      case OpCode::I32x4__extend_low_i16x8_u:
3028
1.89k
        compileVectorExtend(Context.Int16x8Ty, false, true);
3029
1.89k
        break;
3030
140
      case OpCode::I32x4__extend_high_i16x8_u:
3031
140
        compileVectorExtend(Context.Int16x8Ty, false, false);
3032
140
        break;
3033
1.02k
      case OpCode::I32x4__shl:
3034
1.02k
        compileVectorShl(Context.Int32x4Ty);
3035
1.02k
        break;
3036
170
      case OpCode::I32x4__shr_s:
3037
170
        compileVectorAShr(Context.Int32x4Ty);
3038
170
        break;
3039
100
      case OpCode::I32x4__shr_u:
3040
100
        compileVectorLShr(Context.Int32x4Ty);
3041
100
        break;
3042
100
      case OpCode::I32x4__add:
3043
100
        compileVectorVectorAdd(Context.Int32x4Ty);
3044
100
        break;
3045
142
      case OpCode::I32x4__sub:
3046
142
        compileVectorVectorSub(Context.Int32x4Ty);
3047
142
        break;
3048
214
      case OpCode::I32x4__mul:
3049
214
        compileVectorVectorMul(Context.Int32x4Ty);
3050
214
        break;
3051
101
      case OpCode::I32x4__min_s:
3052
101
        compileVectorVectorSMin(Context.Int32x4Ty);
3053
101
        break;
3054
64
      case OpCode::I32x4__min_u:
3055
64
        compileVectorVectorUMin(Context.Int32x4Ty);
3056
64
        break;
3057
66
      case OpCode::I32x4__max_s:
3058
66
        compileVectorVectorSMax(Context.Int32x4Ty);
3059
66
        break;
3060
96
      case OpCode::I32x4__max_u:
3061
96
        compileVectorVectorUMax(Context.Int32x4Ty);
3062
96
        break;
3063
102
      case OpCode::I32x4__extmul_low_i16x8_s:
3064
102
        compileVectorExtMul(Context.Int16x8Ty, true, true);
3065
102
        break;
3066
49
      case OpCode::I32x4__extmul_high_i16x8_s:
3067
49
        compileVectorExtMul(Context.Int16x8Ty, true, false);
3068
49
        break;
3069
223
      case OpCode::I32x4__extmul_low_i16x8_u:
3070
223
        compileVectorExtMul(Context.Int16x8Ty, false, true);
3071
223
        break;
3072
44
      case OpCode::I32x4__extmul_high_i16x8_u:
3073
44
        compileVectorExtMul(Context.Int16x8Ty, false, false);
3074
44
        break;
3075
1.14k
      case OpCode::I32x4__extadd_pairwise_i16x8_s:
3076
1.14k
        compileVectorExtAddPairwise(Context.Int16x8Ty, true);
3077
1.14k
        break;
3078
453
      case OpCode::I32x4__extadd_pairwise_i16x8_u:
3079
453
        compileVectorExtAddPairwise(Context.Int16x8Ty, false);
3080
453
        break;
3081
114
      case OpCode::I32x4__dot_i16x8_s: {
3082
114
        auto ExtendTy = Context.Int16x8Ty.getExtendedElementVectorType();
3083
114
        auto Undef = LLVM::Value::getUndef(ExtendTy);
3084
114
        auto LHS = Builder.createSExt(
3085
114
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3086
114
        auto RHS = Builder.createSExt(
3087
114
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3088
114
        auto M = Builder.createMul(LHS, RHS);
3089
114
        auto L = Builder.createShuffleVector(
3090
114
            M, Undef,
3091
114
            LLVM::Value::getConstVector32(LLContext, {0U, 2U, 4U, 6U}));
3092
114
        auto R = Builder.createShuffleVector(
3093
114
            M, Undef,
3094
114
            LLVM::Value::getConstVector32(LLContext, {1U, 3U, 5U, 7U}));
3095
114
        auto V = Builder.createAdd(L, R);
3096
114
        stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
3097
114
        break;
3098
841
      }
3099
876
      case OpCode::I64x2__abs:
3100
876
        compileVectorAbs(Context.Int64x2Ty);
3101
876
        break;
3102
527
      case OpCode::I64x2__neg:
3103
527
        compileVectorNeg(Context.Int64x2Ty);
3104
527
        break;
3105
286
      case OpCode::I64x2__all_true:
3106
286
        compileVectorAllTrue(Context.Int64x2Ty);
3107
286
        break;
3108
231
      case OpCode::I64x2__bitmask:
3109
231
        compileVectorBitMask(Context.Int64x2Ty);
3110
231
        break;
3111
116
      case OpCode::I64x2__extend_low_i32x4_s:
3112
116
        compileVectorExtend(Context.Int32x4Ty, true, true);
3113
116
        break;
3114
681
      case OpCode::I64x2__extend_high_i32x4_s:
3115
681
        compileVectorExtend(Context.Int32x4Ty, true, false);
3116
681
        break;
3117
179
      case OpCode::I64x2__extend_low_i32x4_u:
3118
179
        compileVectorExtend(Context.Int32x4Ty, false, true);
3119
179
        break;
3120
504
      case OpCode::I64x2__extend_high_i32x4_u:
3121
504
        compileVectorExtend(Context.Int32x4Ty, false, false);
3122
504
        break;
3123
119
      case OpCode::I64x2__shl:
3124
119
        compileVectorShl(Context.Int64x2Ty);
3125
119
        break;
3126
273
      case OpCode::I64x2__shr_s:
3127
273
        compileVectorAShr(Context.Int64x2Ty);
3128
273
        break;
3129
82
      case OpCode::I64x2__shr_u:
3130
82
        compileVectorLShr(Context.Int64x2Ty);
3131
82
        break;
3132
35
      case OpCode::I64x2__add:
3133
35
        compileVectorVectorAdd(Context.Int64x2Ty);
3134
35
        break;
3135
248
      case OpCode::I64x2__sub:
3136
248
        compileVectorVectorSub(Context.Int64x2Ty);
3137
248
        break;
3138
74
      case OpCode::I64x2__mul:
3139
74
        compileVectorVectorMul(Context.Int64x2Ty);
3140
74
        break;
3141
38
      case OpCode::I64x2__extmul_low_i32x4_s:
3142
38
        compileVectorExtMul(Context.Int32x4Ty, true, true);
3143
38
        break;
3144
284
      case OpCode::I64x2__extmul_high_i32x4_s:
3145
284
        compileVectorExtMul(Context.Int32x4Ty, true, false);
3146
284
        break;
3147
34
      case OpCode::I64x2__extmul_low_i32x4_u:
3148
34
        compileVectorExtMul(Context.Int32x4Ty, false, true);
3149
34
        break;
3150
118
      case OpCode::I64x2__extmul_high_i32x4_u:
3151
118
        compileVectorExtMul(Context.Int32x4Ty, false, false);
3152
118
        break;
3153
106
      case OpCode::F32x4__abs:
3154
106
        compileVectorFAbs(Context.Floatx4Ty);
3155
106
        break;
3156
140
      case OpCode::F32x4__neg:
3157
140
        compileVectorFNeg(Context.Floatx4Ty);
3158
140
        break;
3159
188
      case OpCode::F32x4__sqrt:
3160
188
        compileVectorFSqrt(Context.Floatx4Ty);
3161
188
        break;
3162
133
      case OpCode::F32x4__add:
3163
133
        compileVectorVectorFAdd(Context.Floatx4Ty);
3164
133
        break;
3165
235
      case OpCode::F32x4__sub:
3166
235
        compileVectorVectorFSub(Context.Floatx4Ty);
3167
235
        break;
3168
41
      case OpCode::F32x4__mul:
3169
41
        compileVectorVectorFMul(Context.Floatx4Ty);
3170
41
        break;
3171
185
      case OpCode::F32x4__div:
3172
185
        compileVectorVectorFDiv(Context.Floatx4Ty);
3173
185
        break;
3174
123
      case OpCode::F32x4__min:
3175
123
        compileVectorVectorFMin(Context.Floatx4Ty);
3176
123
        break;
3177
36
      case OpCode::F32x4__max:
3178
36
        compileVectorVectorFMax(Context.Floatx4Ty);
3179
36
        break;
3180
54
      case OpCode::F32x4__pmin:
3181
54
        compileVectorVectorFPMin(Context.Floatx4Ty);
3182
54
        break;
3183
246
      case OpCode::F32x4__pmax:
3184
246
        compileVectorVectorFPMax(Context.Floatx4Ty);
3185
246
        break;
3186
734
      case OpCode::F32x4__ceil:
3187
734
        compileVectorFCeil(Context.Floatx4Ty);
3188
734
        break;
3189
1.44k
      case OpCode::F32x4__floor:
3190
1.44k
        compileVectorFFloor(Context.Floatx4Ty);
3191
1.44k
        break;
3192
1.53k
      case OpCode::F32x4__trunc:
3193
1.53k
        compileVectorFTrunc(Context.Floatx4Ty);
3194
1.53k
        break;
3195
202
      case OpCode::F32x4__nearest:
3196
202
        compileVectorFNearest(Context.Floatx4Ty);
3197
202
        break;
3198
438
      case OpCode::F64x2__abs:
3199
438
        compileVectorFAbs(Context.Doublex2Ty);
3200
438
        break;
3201
797
      case OpCode::F64x2__neg:
3202
797
        compileVectorFNeg(Context.Doublex2Ty);
3203
797
        break;
3204
127
      case OpCode::F64x2__sqrt:
3205
127
        compileVectorFSqrt(Context.Doublex2Ty);
3206
127
        break;
3207
52
      case OpCode::F64x2__add:
3208
52
        compileVectorVectorFAdd(Context.Doublex2Ty);
3209
52
        break;
3210
211
      case OpCode::F64x2__sub:
3211
211
        compileVectorVectorFSub(Context.Doublex2Ty);
3212
211
        break;
3213
140
      case OpCode::F64x2__mul:
3214
140
        compileVectorVectorFMul(Context.Doublex2Ty);
3215
140
        break;
3216
37
      case OpCode::F64x2__div:
3217
37
        compileVectorVectorFDiv(Context.Doublex2Ty);
3218
37
        break;
3219
161
      case OpCode::F64x2__min:
3220
161
        compileVectorVectorFMin(Context.Doublex2Ty);
3221
161
        break;
3222
199
      case OpCode::F64x2__max:
3223
199
        compileVectorVectorFMax(Context.Doublex2Ty);
3224
199
        break;
3225
245
      case OpCode::F64x2__pmin:
3226
245
        compileVectorVectorFPMin(Context.Doublex2Ty);
3227
245
        break;
3228
68
      case OpCode::F64x2__pmax:
3229
68
        compileVectorVectorFPMax(Context.Doublex2Ty);
3230
68
        break;
3231
522
      case OpCode::F64x2__ceil:
3232
522
        compileVectorFCeil(Context.Doublex2Ty);
3233
522
        break;
3234
632
      case OpCode::F64x2__floor:
3235
632
        compileVectorFFloor(Context.Doublex2Ty);
3236
632
        break;
3237
115
      case OpCode::F64x2__trunc:
3238
115
        compileVectorFTrunc(Context.Doublex2Ty);
3239
115
        break;
3240
152
      case OpCode::F64x2__nearest:
3241
152
        compileVectorFNearest(Context.Doublex2Ty);
3242
152
        break;
3243
210
      case OpCode::I32x4__trunc_sat_f32x4_s:
3244
210
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3245
210
        break;
3246
3.68k
      case OpCode::I32x4__trunc_sat_f32x4_u:
3247
3.68k
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3248
3.68k
        break;
3249
315
      case OpCode::F32x4__convert_i32x4_s:
3250
315
        compileVectorConvertS(Context.Int32x4Ty, Context.Floatx4Ty, false);
3251
315
        break;
3252
711
      case OpCode::F32x4__convert_i32x4_u:
3253
711
        compileVectorConvertU(Context.Int32x4Ty, Context.Floatx4Ty, false);
3254
711
        break;
3255
740
      case OpCode::I32x4__trunc_sat_f64x2_s_zero:
3256
740
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3257
740
        break;
3258
2.11k
      case OpCode::I32x4__trunc_sat_f64x2_u_zero:
3259
2.11k
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3260
2.11k
        break;
3261
329
      case OpCode::F64x2__convert_low_i32x4_s:
3262
329
        compileVectorConvertS(Context.Int32x4Ty, Context.Doublex2Ty, true);
3263
329
        break;
3264
1.23k
      case OpCode::F64x2__convert_low_i32x4_u:
3265
1.23k
        compileVectorConvertU(Context.Int32x4Ty, Context.Doublex2Ty, true);
3266
1.23k
        break;
3267
563
      case OpCode::F32x4__demote_f64x2_zero:
3268
563
        compileVectorDemote();
3269
563
        break;
3270
553
      case OpCode::F64x2__promote_low_f32x4:
3271
553
        compileVectorPromote();
3272
553
        break;
3273
3274
      // Relaxed SIMD Instructions
3275
0
      case OpCode::I8x16__relaxed_swizzle:
3276
0
        compileVectorSwizzle();
3277
0
        break;
3278
0
      case OpCode::I32x4__relaxed_trunc_f32x4_s:
3279
0
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3280
0
        break;
3281
0
      case OpCode::I32x4__relaxed_trunc_f32x4_u:
3282
0
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3283
0
        break;
3284
0
      case OpCode::I32x4__relaxed_trunc_f64x2_s_zero:
3285
0
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3286
0
        break;
3287
0
      case OpCode::I32x4__relaxed_trunc_f64x2_u_zero:
3288
0
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3289
0
        break;
3290
0
      case OpCode::F32x4__relaxed_madd:
3291
0
        compileVectorVectorMAdd(Context.Floatx4Ty);
3292
0
        break;
3293
0
      case OpCode::F32x4__relaxed_nmadd:
3294
0
        compileVectorVectorNMAdd(Context.Floatx4Ty);
3295
0
        break;
3296
0
      case OpCode::F64x2__relaxed_madd:
3297
0
        compileVectorVectorMAdd(Context.Doublex2Ty);
3298
0
        break;
3299
0
      case OpCode::F64x2__relaxed_nmadd:
3300
0
        compileVectorVectorNMAdd(Context.Doublex2Ty);
3301
0
        break;
3302
0
      case OpCode::I8x16__relaxed_laneselect:
3303
0
      case OpCode::I16x8__relaxed_laneselect:
3304
0
      case OpCode::I32x4__relaxed_laneselect:
3305
0
      case OpCode::I64x2__relaxed_laneselect: {
3306
0
        auto C = stackPop();
3307
0
        auto V2 = stackPop();
3308
0
        auto V1 = stackPop();
3309
0
        stackPush(Builder.createXor(
3310
0
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
3311
0
        break;
3312
0
      }
3313
0
      case OpCode::F32x4__relaxed_min:
3314
0
        compileVectorVectorFMin(Context.Floatx4Ty);
3315
0
        break;
3316
0
      case OpCode::F32x4__relaxed_max:
3317
0
        compileVectorVectorFMax(Context.Floatx4Ty);
3318
0
        break;
3319
0
      case OpCode::F64x2__relaxed_min:
3320
0
        compileVectorVectorFMin(Context.Doublex2Ty);
3321
0
        break;
3322
0
      case OpCode::F64x2__relaxed_max:
3323
0
        compileVectorVectorFMax(Context.Doublex2Ty);
3324
0
        break;
3325
0
      case OpCode::I16x8__relaxed_q15mulr_s:
3326
0
        compileVectorVectorQ15MulSat();
3327
0
        break;
3328
0
      case OpCode::I16x8__relaxed_dot_i8x16_i7x16_s:
3329
0
        compileVectorRelaxedIntegerDotProduct();
3330
0
        break;
3331
0
      case OpCode::I32x4__relaxed_dot_i8x16_i7x16_add_s:
3332
0
        compileVectorRelaxedIntegerDotProductAdd();
3333
0
        break;
3334
3335
      // Atomic Instructions
3336
188
      case OpCode::Atomic__fence:
3337
188
        return compileMemoryFence();
3338
33
      case OpCode::Memory__atomic__notify:
3339
33
        return compileAtomicNotify(Instr.getTargetIndex(),
3340
33
                                   Instr.getMemoryOffset());
3341
5
      case OpCode::Memory__atomic__wait32:
3342
5
        return compileAtomicWait(Instr.getTargetIndex(),
3343
5
                                 Instr.getMemoryOffset(), Context.Int32Ty, 32);
3344
2
      case OpCode::Memory__atomic__wait64:
3345
2
        return compileAtomicWait(Instr.getTargetIndex(),
3346
2
                                 Instr.getMemoryOffset(), Context.Int64Ty, 64);
3347
0
      case OpCode::I32__atomic__load:
3348
0
        return compileAtomicLoad(
3349
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3350
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int32Ty, true);
3351
0
      case OpCode::I64__atomic__load:
3352
0
        return compileAtomicLoad(
3353
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3354
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int64Ty, true);
3355
0
      case OpCode::I32__atomic__load8_u:
3356
0
        return compileAtomicLoad(
3357
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3358
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int8Ty);
3359
0
      case OpCode::I32__atomic__load16_u:
3360
0
        return compileAtomicLoad(
3361
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3362
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int16Ty);
3363
0
      case OpCode::I64__atomic__load8_u:
3364
0
        return compileAtomicLoad(
3365
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3366
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int8Ty);
3367
0
      case OpCode::I64__atomic__load16_u:
3368
0
        return compileAtomicLoad(
3369
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3370
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int16Ty);
3371
0
      case OpCode::I64__atomic__load32_u:
3372
0
        return compileAtomicLoad(
3373
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3374
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int32Ty);
3375
0
      case OpCode::I32__atomic__store:
3376
0
        return compileAtomicStore(
3377
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3378
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int32Ty, true);
3379
0
      case OpCode::I64__atomic__store:
3380
0
        return compileAtomicStore(
3381
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3382
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int64Ty, true);
3383
0
      case OpCode::I32__atomic__store8:
3384
0
        return compileAtomicStore(
3385
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3386
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int8Ty, true);
3387
0
      case OpCode::I32__atomic__store16:
3388
0
        return compileAtomicStore(
3389
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3390
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int16Ty, true);
3391
0
      case OpCode::I64__atomic__store8:
3392
0
        return compileAtomicStore(
3393
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3394
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int8Ty, true);
3395
0
      case OpCode::I64__atomic__store16:
3396
0
        return compileAtomicStore(
3397
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3398
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int16Ty, true);
3399
0
      case OpCode::I64__atomic__store32:
3400
0
        return compileAtomicStore(
3401
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3402
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int32Ty, true);
3403
0
      case OpCode::I32__atomic__rmw__add:
3404
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3405
0
                                  Instr.getMemoryOffset(),
3406
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3407
0
                                  Context.Int32Ty, Context.Int32Ty, true);
3408
0
      case OpCode::I64__atomic__rmw__add:
3409
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3410
0
                                  Instr.getMemoryOffset(),
3411
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3412
0
                                  Context.Int64Ty, Context.Int64Ty, true);
3413
0
      case OpCode::I32__atomic__rmw8__add_u:
3414
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3415
0
                                  Instr.getMemoryOffset(),
3416
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3417
0
                                  Context.Int32Ty, Context.Int8Ty);
3418
0
      case OpCode::I32__atomic__rmw16__add_u:
3419
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3420
0
                                  Instr.getMemoryOffset(),
3421
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3422
0
                                  Context.Int32Ty, Context.Int16Ty);
3423
0
      case OpCode::I64__atomic__rmw8__add_u:
3424
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3425
0
                                  Instr.getMemoryOffset(),
3426
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3427
0
                                  Context.Int64Ty, Context.Int8Ty);
3428
0
      case OpCode::I64__atomic__rmw16__add_u:
3429
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3430
0
                                  Instr.getMemoryOffset(),
3431
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3432
0
                                  Context.Int64Ty, Context.Int16Ty);
3433
0
      case OpCode::I64__atomic__rmw32__add_u:
3434
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3435
0
                                  Instr.getMemoryOffset(),
3436
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3437
0
                                  Context.Int64Ty, Context.Int32Ty);
3438
0
      case OpCode::I32__atomic__rmw__sub:
3439
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3440
0
                                  Instr.getMemoryOffset(),
3441
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3442
0
                                  Context.Int32Ty, Context.Int32Ty, true);
3443
0
      case OpCode::I64__atomic__rmw__sub:
3444
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3445
0
                                  Instr.getMemoryOffset(),
3446
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3447
0
                                  Context.Int64Ty, Context.Int64Ty, true);
3448
0
      case OpCode::I32__atomic__rmw8__sub_u:
3449
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3450
0
                                  Instr.getMemoryOffset(),
3451
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3452
0
                                  Context.Int32Ty, Context.Int8Ty);
3453
0
      case OpCode::I32__atomic__rmw16__sub_u:
3454
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3455
0
                                  Instr.getMemoryOffset(),
3456
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3457
0
                                  Context.Int32Ty, Context.Int16Ty);
3458
0
      case OpCode::I64__atomic__rmw8__sub_u:
3459
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3460
0
                                  Instr.getMemoryOffset(),
3461
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3462
0
                                  Context.Int64Ty, Context.Int8Ty);
3463
0
      case OpCode::I64__atomic__rmw16__sub_u:
3464
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3465
0
                                  Instr.getMemoryOffset(),
3466
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3467
0
                                  Context.Int64Ty, Context.Int16Ty);
3468
0
      case OpCode::I64__atomic__rmw32__sub_u:
3469
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3470
0
                                  Instr.getMemoryOffset(),
3471
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3472
0
                                  Context.Int64Ty, Context.Int32Ty);
3473
0
      case OpCode::I32__atomic__rmw__and:
3474
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3475
0
                                  Instr.getMemoryOffset(),
3476
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3477
0
                                  Context.Int32Ty, Context.Int32Ty, true);
3478
0
      case OpCode::I64__atomic__rmw__and:
3479
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3480
0
                                  Instr.getMemoryOffset(),
3481
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3482
0
                                  Context.Int64Ty, Context.Int64Ty, true);
3483
0
      case OpCode::I32__atomic__rmw8__and_u:
3484
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3485
0
                                  Instr.getMemoryOffset(),
3486
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3487
0
                                  Context.Int32Ty, Context.Int8Ty);
3488
0
      case OpCode::I32__atomic__rmw16__and_u:
3489
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3490
0
                                  Instr.getMemoryOffset(),
3491
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3492
0
                                  Context.Int32Ty, Context.Int16Ty);
3493
0
      case OpCode::I64__atomic__rmw8__and_u:
3494
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3495
0
                                  Instr.getMemoryOffset(),
3496
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3497
0
                                  Context.Int64Ty, Context.Int8Ty);
3498
0
      case OpCode::I64__atomic__rmw16__and_u:
3499
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3500
0
                                  Instr.getMemoryOffset(),
3501
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3502
0
                                  Context.Int64Ty, Context.Int16Ty);
3503
0
      case OpCode::I64__atomic__rmw32__and_u:
3504
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3505
0
                                  Instr.getMemoryOffset(),
3506
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3507
0
                                  Context.Int64Ty, Context.Int32Ty);
3508
0
      case OpCode::I32__atomic__rmw__or:
3509
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3510
0
                                  Instr.getMemoryOffset(),
3511
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3512
0
                                  Context.Int32Ty, Context.Int32Ty, true);
3513
0
      case OpCode::I64__atomic__rmw__or:
3514
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3515
0
                                  Instr.getMemoryOffset(),
3516
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3517
0
                                  Context.Int64Ty, Context.Int64Ty, true);
3518
0
      case OpCode::I32__atomic__rmw8__or_u:
3519
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3520
0
                                  Instr.getMemoryOffset(),
3521
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3522
0
                                  Context.Int32Ty, Context.Int8Ty);
3523
0
      case OpCode::I32__atomic__rmw16__or_u:
3524
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3525
0
                                  Instr.getMemoryOffset(),
3526
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3527
0
                                  Context.Int32Ty, Context.Int16Ty);
3528
0
      case OpCode::I64__atomic__rmw8__or_u:
3529
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3530
0
                                  Instr.getMemoryOffset(),
3531
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3532
0
                                  Context.Int64Ty, Context.Int8Ty);
3533
0
      case OpCode::I64__atomic__rmw16__or_u:
3534
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3535
0
                                  Instr.getMemoryOffset(),
3536
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3537
0
                                  Context.Int64Ty, Context.Int16Ty);
3538
0
      case OpCode::I64__atomic__rmw32__or_u:
3539
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3540
0
                                  Instr.getMemoryOffset(),
3541
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3542
0
                                  Context.Int64Ty, Context.Int32Ty);
3543
0
      case OpCode::I32__atomic__rmw__xor:
3544
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3545
0
                                  Instr.getMemoryOffset(),
3546
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3547
0
                                  Context.Int32Ty, Context.Int32Ty, true);
3548
0
      case OpCode::I64__atomic__rmw__xor:
3549
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3550
0
                                  Instr.getMemoryOffset(),
3551
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3552
0
                                  Context.Int64Ty, Context.Int64Ty, true);
3553
0
      case OpCode::I32__atomic__rmw8__xor_u:
3554
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3555
0
                                  Instr.getMemoryOffset(),
3556
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3557
0
                                  Context.Int32Ty, Context.Int8Ty);
3558
0
      case OpCode::I32__atomic__rmw16__xor_u:
3559
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3560
0
                                  Instr.getMemoryOffset(),
3561
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3562
0
                                  Context.Int32Ty, Context.Int16Ty);
3563
0
      case OpCode::I64__atomic__rmw8__xor_u:
3564
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3565
0
                                  Instr.getMemoryOffset(),
3566
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3567
0
                                  Context.Int64Ty, Context.Int8Ty);
3568
0
      case OpCode::I64__atomic__rmw16__xor_u:
3569
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3570
0
                                  Instr.getMemoryOffset(),
3571
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3572
0
                                  Context.Int64Ty, Context.Int16Ty);
3573
0
      case OpCode::I64__atomic__rmw32__xor_u:
3574
0
        return compileAtomicRMWOp(Instr.getTargetIndex(),
3575
0
                                  Instr.getMemoryOffset(),
3576
0
                                  Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3577
0
                                  Context.Int64Ty, Context.Int32Ty);
3578
0
      case OpCode::I32__atomic__rmw__xchg:
3579
0
        return compileAtomicRMWOp(
3580
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3581
0
            Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg, Context.Int32Ty,
3582
0
            Context.Int32Ty, true);
3583
0
      case OpCode::I64__atomic__rmw__xchg:
3584
0
        return compileAtomicRMWOp(
3585
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3586
0
            Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg, Context.Int64Ty,
3587
0
            Context.Int64Ty, true);
3588
0
      case OpCode::I32__atomic__rmw8__xchg_u:
3589
0
        return compileAtomicRMWOp(
3590
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3591
0
            Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg, Context.Int32Ty,
3592
0
            Context.Int8Ty);
3593
0
      case OpCode::I32__atomic__rmw16__xchg_u:
3594
0
        return compileAtomicRMWOp(
3595
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3596
0
            Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg, Context.Int32Ty,
3597
0
            Context.Int16Ty);
3598
0
      case OpCode::I64__atomic__rmw8__xchg_u:
3599
0
        return compileAtomicRMWOp(
3600
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3601
0
            Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg, Context.Int64Ty,
3602
0
            Context.Int8Ty);
3603
0
      case OpCode::I64__atomic__rmw16__xchg_u:
3604
0
        return compileAtomicRMWOp(
3605
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3606
0
            Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg, Context.Int64Ty,
3607
0
            Context.Int16Ty);
3608
0
      case OpCode::I64__atomic__rmw32__xchg_u:
3609
0
        return compileAtomicRMWOp(
3610
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3611
0
            Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg, Context.Int64Ty,
3612
0
            Context.Int32Ty);
3613
0
      case OpCode::I32__atomic__rmw__cmpxchg:
3614
0
        return compileAtomicCompareExchange(
3615
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3616
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int32Ty, true);
3617
0
      case OpCode::I64__atomic__rmw__cmpxchg:
3618
0
        return compileAtomicCompareExchange(
3619
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3620
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int64Ty, true);
3621
0
      case OpCode::I32__atomic__rmw8__cmpxchg_u:
3622
0
        return compileAtomicCompareExchange(
3623
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3624
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int8Ty);
3625
0
      case OpCode::I32__atomic__rmw16__cmpxchg_u:
3626
0
        return compileAtomicCompareExchange(
3627
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3628
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int16Ty);
3629
0
      case OpCode::I64__atomic__rmw8__cmpxchg_u:
3630
0
        return compileAtomicCompareExchange(
3631
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3632
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int8Ty);
3633
0
      case OpCode::I64__atomic__rmw16__cmpxchg_u:
3634
0
        return compileAtomicCompareExchange(
3635
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3636
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int16Ty);
3637
0
      case OpCode::I64__atomic__rmw32__cmpxchg_u:
3638
0
        return compileAtomicCompareExchange(
3639
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3640
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int32Ty);
3641
3642
0
      default:
3643
0
        assumingUnreachable();
3644
979k
      }
3645
979k
      return;
3646
979k
    };
3647
1.44M
    for (const auto &Instr : Instrs) {
3648
      // Update instruction count
3649
1.44M
      if (LocalInstrCount) {
3650
0
        Builder.createStore(
3651
0
            Builder.createAdd(
3652
0
                Builder.createLoad(Context.Int64Ty, LocalInstrCount),
3653
0
                LLContext.getInt64(1)),
3654
0
            LocalInstrCount);
3655
0
      }
3656
1.44M
      if (LocalGas) {
3657
0
        auto NewGas = Builder.createAdd(
3658
0
            Builder.createLoad(Context.Int64Ty, LocalGas),
3659
0
            Builder.createLoad(
3660
0
                Context.Int64Ty,
3661
0
                Builder.createConstInBoundsGEP2_64(
3662
0
                    LLVM::Type::getArrayType(Context.Int64Ty, UINT16_MAX + 1),
3663
0
                    Context.getCostTable(Builder, ExecCtx), 0,
3664
0
                    uint16_t(Instr.getOpCode()))));
3665
0
        Builder.createStore(NewGas, LocalGas);
3666
0
      }
3667
3668
      // Make the instruction node according to Code.
3669
1.44M
      Dispatch(Instr);
3670
1.44M
    }
3671
9.83k
  }
3672
2.01k
  void compileSignedTrunc(LLVM::Type IntType) noexcept {
3673
2.01k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.norm");
3674
2.01k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmin");
3675
2.01k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmax");
3676
2.01k
    auto Value = stackPop();
3677
2.01k
    const auto [Precise, MinFp, MaxFp] =
3678
2.01k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3679
2.01k
      const auto BitWidth = IntType.getIntegerBitWidth();
3680
2.01k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3681
2.01k
        switch (BitWidth) {
3682
1.56k
        case 32:
3683
1.56k
          return {std::numeric_limits<int32_t>::min(),
3684
1.56k
                  std::numeric_limits<int32_t>::max()};
3685
454
        case 64:
3686
454
          return {std::numeric_limits<int64_t>::min(),
3687
454
                  std::numeric_limits<int64_t>::max()};
3688
0
        default:
3689
0
          assumingUnreachable();
3690
2.01k
        }
3691
2.01k
      }();
3692
2.01k
      auto FPType = Value.getType();
3693
2.01k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3694
2.01k
      const auto FPWidth = FPType.getFPMantissaWidth();
3695
2.01k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3696
2.01k
              LLVM::Value::getConstReal(FPType, Max)};
3697
2.01k
    }();
3698
3699
2.01k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3700
2.01k
    Builder.createCondBr(IsNotNan, NormBB,
3701
2.01k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3702
3703
2.01k
    Builder.positionAtEnd(NormBB);
3704
2.01k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3705
2.01k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3706
2.01k
    auto IsNotUnderflow =
3707
2.01k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3708
2.01k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3709
2.01k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3710
3711
2.01k
    Builder.positionAtEnd(NotMinBB);
3712
2.01k
    auto IsNotOverflow = Builder.createLikely(
3713
2.01k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3714
2.01k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3715
2.01k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3716
3717
2.01k
    Builder.positionAtEnd(NotMaxBB);
3718
2.01k
    stackPush(Builder.createFPToSI(Trunc, IntType));
3719
2.01k
  }
3720
1.20k
  void compileSignedTruncSat(LLVM::Type IntType) noexcept {
3721
1.20k
    auto CurrBB = Builder.getInsertBlock();
3722
1.20k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.norm");
3723
1.20k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmin");
3724
1.20k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmax");
3725
1.20k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.end");
3726
1.20k
    auto Value = stackPop();
3727
1.20k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3728
1.20k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3729
1.20k
      const auto BitWidth = IntType.getIntegerBitWidth();
3730
1.20k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3731
1.20k
        switch (BitWidth) {
3732
486
        case 32:
3733
486
          return {std::numeric_limits<int32_t>::min(),
3734
486
                  std::numeric_limits<int32_t>::max()};
3735
716
        case 64:
3736
716
          return {std::numeric_limits<int64_t>::min(),
3737
716
                  std::numeric_limits<int64_t>::max()};
3738
0
        default:
3739
0
          assumingUnreachable();
3740
1.20k
        }
3741
1.20k
      }();
3742
1.20k
      auto FPType = Value.getType();
3743
1.20k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3744
1.20k
      const auto FPWidth = FPType.getFPMantissaWidth();
3745
1.20k
      return {BitWidth <= FPWidth, static_cast<uint64_t>(Min),
3746
1.20k
              static_cast<uint64_t>(Max),
3747
1.20k
              LLVM::Value::getConstReal(FPType, Min),
3748
1.20k
              LLVM::Value::getConstReal(FPType, Max)};
3749
1.20k
    }();
3750
3751
1.20k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3752
1.20k
    Builder.createCondBr(IsNotNan, NormBB, EndBB);
3753
3754
1.20k
    Builder.positionAtEnd(NormBB);
3755
1.20k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3756
1.20k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3757
1.20k
    auto IsNotUnderflow =
3758
1.20k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3759
1.20k
    Builder.createCondBr(IsNotUnderflow, NotMinBB, EndBB);
3760
3761
1.20k
    Builder.positionAtEnd(NotMinBB);
3762
1.20k
    auto IsNotOverflow = Builder.createLikely(
3763
1.20k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3764
1.20k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3765
3766
1.20k
    Builder.positionAtEnd(NotMaxBB);
3767
1.20k
    auto IntValue = Builder.createFPToSI(Trunc, IntType);
3768
1.20k
    Builder.createBr(EndBB);
3769
3770
1.20k
    Builder.positionAtEnd(EndBB);
3771
1.20k
    auto PHIRet = Builder.createPHI(IntType);
3772
1.20k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, 0, true), CurrBB);
3773
1.20k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt, true), NormBB);
3774
1.20k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt, true),
3775
1.20k
                       NotMinBB);
3776
1.20k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3777
3778
1.20k
    stackPush(PHIRet);
3779
1.20k
  }
3780
3.68k
  void compileUnsignedTrunc(LLVM::Type IntType) noexcept {
3781
3.68k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.norm");
3782
3.68k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmin");
3783
3.68k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmax");
3784
3.68k
    auto Value = stackPop();
3785
3.68k
    const auto [Precise, MinFp, MaxFp] =
3786
3.68k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3787
3.68k
      const auto BitWidth = IntType.getIntegerBitWidth();
3788
3.68k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3789
3.68k
        switch (BitWidth) {
3790
1.51k
        case 32:
3791
1.51k
          return {std::numeric_limits<uint32_t>::min(),
3792
1.51k
                  std::numeric_limits<uint32_t>::max()};
3793
2.17k
        case 64:
3794
2.17k
          return {std::numeric_limits<uint64_t>::min(),
3795
2.17k
                  std::numeric_limits<uint64_t>::max()};
3796
0
        default:
3797
0
          assumingUnreachable();
3798
3.68k
        }
3799
3.68k
      }();
3800
3.68k
      auto FPType = Value.getType();
3801
3.68k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3802
3.68k
      const auto FPWidth = FPType.getFPMantissaWidth();
3803
3.68k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3804
3.68k
              LLVM::Value::getConstReal(FPType, Max)};
3805
3.68k
    }();
3806
3807
3.68k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3808
3.68k
    Builder.createCondBr(IsNotNan, NormBB,
3809
3.68k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3810
3811
3.68k
    Builder.positionAtEnd(NormBB);
3812
3.68k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3813
3.68k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3814
3.68k
    auto IsNotUnderflow =
3815
3.68k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3816
3.68k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3817
3.68k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3818
3819
3.68k
    Builder.positionAtEnd(NotMinBB);
3820
3.68k
    auto IsNotOverflow = Builder.createLikely(
3821
3.68k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3822
3.68k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3823
3.68k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3824
3825
3.68k
    Builder.positionAtEnd(NotMaxBB);
3826
3.68k
    stackPush(Builder.createFPToUI(Trunc, IntType));
3827
3.68k
  }
3828
1.07k
  void compileUnsignedTruncSat(LLVM::Type IntType) noexcept {
3829
1.07k
    auto CurrBB = Builder.getInsertBlock();
3830
1.07k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.norm");
3831
1.07k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.notmax");
3832
1.07k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.end");
3833
1.07k
    auto Value = stackPop();
3834
1.07k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3835
1.07k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3836
1.07k
      const auto BitWidth = IntType.getIntegerBitWidth();
3837
1.07k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3838
1.07k
        switch (BitWidth) {
3839
293
        case 32:
3840
293
          return {std::numeric_limits<uint32_t>::min(),
3841
293
                  std::numeric_limits<uint32_t>::max()};
3842
785
        case 64:
3843
785
          return {std::numeric_limits<uint64_t>::min(),
3844
785
                  std::numeric_limits<uint64_t>::max()};
3845
0
        default:
3846
0
          assumingUnreachable();
3847
1.07k
        }
3848
1.07k
      }();
3849
1.07k
      auto FPType = Value.getType();
3850
1.07k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3851
1.07k
      const auto FPWidth = FPType.getFPMantissaWidth();
3852
1.07k
      return {BitWidth <= FPWidth, Min, Max,
3853
1.07k
              LLVM::Value::getConstReal(FPType, Min),
3854
1.07k
              LLVM::Value::getConstReal(FPType, Max)};
3855
1.07k
    }();
3856
3857
1.07k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3858
1.07k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3859
1.07k
    auto IsNotUnderflow =
3860
1.07k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3861
1.07k
    Builder.createCondBr(IsNotUnderflow, NormBB, EndBB);
3862
3863
1.07k
    Builder.positionAtEnd(NormBB);
3864
1.07k
    auto IsNotOverflow = Builder.createLikely(
3865
1.07k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3866
1.07k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3867
3868
1.07k
    Builder.positionAtEnd(NotMaxBB);
3869
1.07k
    auto IntValue = Builder.createFPToUI(Trunc, IntType);
3870
1.07k
    Builder.createBr(EndBB);
3871
3872
1.07k
    Builder.positionAtEnd(EndBB);
3873
1.07k
    auto PHIRet = Builder.createPHI(IntType);
3874
1.07k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt), CurrBB);
3875
1.07k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt), NormBB);
3876
1.07k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3877
3878
1.07k
    stackPush(PHIRet);
3879
1.07k
  }
3880
3881
  void compileAtomicCheckOffsetAlignment(LLVM::Value Offset,
3882
40
                                         LLVM::Type IntType) noexcept {
3883
40
    const auto BitWidth = IntType.getIntegerBitWidth();
3884
40
    auto BWMask = LLContext.getInt64((BitWidth >> 3) - 1);
3885
40
    auto Value = Builder.createAnd(Offset, BWMask);
3886
40
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "address_align_ok");
3887
40
    auto IsAddressAligned = Builder.createLikely(
3888
40
        Builder.createICmpEQ(Value, LLContext.getInt64(0)));
3889
40
    Builder.createCondBr(IsAddressAligned, OkBB,
3890
40
                         getTrapBB(ErrCode::Value::UnalignedAtomicAccess));
3891
3892
40
    Builder.positionAtEnd(OkBB);
3893
40
  }
3894
3895
188
  void compileMemoryFence() noexcept {
3896
188
    Builder.createFence(LLVMAtomicOrderingSequentiallyConsistent);
3897
188
  }
3898
  void compileAtomicNotify(unsigned MemoryIndex,
3899
33
                           unsigned MemoryOffset) noexcept {
3900
33
    auto Count = stackPop();
3901
33
    auto Addr = Builder.createZExt(Stack.back(), Context.Int64Ty);
3902
33
    if (MemoryOffset != 0) {
3903
26
      Addr = Builder.createAdd(Addr, LLContext.getInt64(MemoryOffset));
3904
26
    }
3905
33
    compileAtomicCheckOffsetAlignment(Addr, Context.Int32Ty);
3906
33
    auto Offset = stackPop();
3907
3908
33
    stackPush(Builder.createCall(
3909
33
        Context.getIntrinsic(
3910
33
            Builder, Executable::Intrinsics::kMemAtomicNotify,
3911
33
            LLVM::Type::getFunctionType(
3912
33
                Context.Int32Ty,
3913
33
                {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
3914
33
        {LLContext.getInt32(MemoryIndex), Offset, Count}));
3915
33
  }
3916
  void compileAtomicWait(unsigned MemoryIndex, unsigned MemoryOffset,
3917
7
                         LLVM::Type TargetType, uint32_t BitWidth) noexcept {
3918
7
    auto Timeout = stackPop();
3919
7
    auto ExpectedValue = Builder.createZExtOrTrunc(stackPop(), Context.Int64Ty);
3920
7
    auto Addr = Builder.createZExt(Stack.back(), Context.Int64Ty);
3921
7
    if (MemoryOffset != 0) {
3922
3
      Addr = Builder.createAdd(Addr, LLContext.getInt64(MemoryOffset));
3923
3
    }
3924
7
    compileAtomicCheckOffsetAlignment(Addr, TargetType);
3925
7
    auto Offset = stackPop();
3926
3927
7
    stackPush(Builder.createCall(
3928
7
        Context.getIntrinsic(
3929
7
            Builder, Executable::Intrinsics::kMemAtomicWait,
3930
7
            LLVM::Type::getFunctionType(Context.Int32Ty,
3931
7
                                        {Context.Int32Ty, Context.Int32Ty,
3932
7
                                         Context.Int64Ty, Context.Int64Ty,
3933
7
                                         Context.Int32Ty},
3934
7
                                        false)),
3935
7
        {LLContext.getInt32(MemoryIndex), Offset, ExpectedValue, Timeout,
3936
7
         LLContext.getInt32(BitWidth)}));
3937
7
  }
3938
  void compileAtomicLoad(unsigned MemoryIndex, unsigned MemoryOffset,
3939
                         unsigned Alignment, LLVM::Type IntType,
3940
0
                         LLVM::Type TargetType, bool Signed = false) noexcept {
3941
3942
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
3943
0
    if (MemoryOffset != 0) {
3944
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
3945
0
    }
3946
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
3947
0
    auto VPtr = Builder.createInBoundsGEP1(
3948
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
3949
0
        Offset);
3950
3951
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
3952
0
    auto Load = Builder.createLoad(TargetType, Ptr, true);
3953
0
    Load.setAlignment(1 << Alignment);
3954
0
    Load.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
3955
3956
0
    if (Signed) {
3957
0
      Stack.back() = Builder.createSExt(Load, IntType);
3958
0
    } else {
3959
0
      Stack.back() = Builder.createZExt(Load, IntType);
3960
0
    }
3961
0
  }
3962
  void compileAtomicStore(unsigned MemoryIndex, unsigned MemoryOffset,
3963
                          unsigned Alignment, LLVM::Type, LLVM::Type TargetType,
3964
0
                          bool Signed = false) noexcept {
3965
0
    auto V = stackPop();
3966
3967
0
    if (Signed) {
3968
0
      V = Builder.createSExtOrTrunc(V, TargetType);
3969
0
    } else {
3970
0
      V = Builder.createZExtOrTrunc(V, TargetType);
3971
0
    }
3972
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
3973
0
    if (MemoryOffset != 0) {
3974
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
3975
0
    }
3976
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
3977
0
    auto VPtr = Builder.createInBoundsGEP1(
3978
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
3979
0
        Offset);
3980
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
3981
0
    auto Store = Builder.createStore(V, Ptr, true);
3982
0
    Store.setAlignment(1 << Alignment);
3983
0
    Store.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
3984
0
  }
3985
3986
  void compileAtomicRMWOp(unsigned MemoryIndex, unsigned MemoryOffset,
3987
                          [[maybe_unused]] unsigned Alignment,
3988
                          LLVMAtomicRMWBinOp BinOp, LLVM::Type IntType,
3989
0
                          LLVM::Type TargetType, bool Signed = false) noexcept {
3990
0
    auto Value = Builder.createSExtOrTrunc(stackPop(), TargetType);
3991
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
3992
0
    if (MemoryOffset != 0) {
3993
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
3994
0
    }
3995
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
3996
0
    auto VPtr = Builder.createInBoundsGEP1(
3997
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
3998
0
        Offset);
3999
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4000
4001
0
    auto Ret = Builder.createAtomicRMW(
4002
0
        BinOp, Ptr, Value, LLVMAtomicOrderingSequentiallyConsistent);
4003
#if LLVM_VERSION_MAJOR >= 13
4004
    Ret.setAlignment(1 << Alignment);
4005
#endif
4006
0
    if (Signed) {
4007
0
      Stack.back() = Builder.createSExt(Ret, IntType);
4008
0
    } else {
4009
0
      Stack.back() = Builder.createZExt(Ret, IntType);
4010
0
    }
4011
0
  }
4012
  void compileAtomicCompareExchange(unsigned MemoryIndex, unsigned MemoryOffset,
4013
                                    [[maybe_unused]] unsigned Alignment,
4014
                                    LLVM::Type IntType, LLVM::Type TargetType,
4015
0
                                    bool Signed = false) noexcept {
4016
4017
0
    auto Replacement = Builder.createSExtOrTrunc(stackPop(), TargetType);
4018
0
    auto Expected = Builder.createSExtOrTrunc(stackPop(), TargetType);
4019
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4020
0
    if (MemoryOffset != 0) {
4021
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4022
0
    }
4023
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4024
0
    auto VPtr = Builder.createInBoundsGEP1(
4025
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4026
0
        Offset);
4027
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4028
4029
0
    auto Ret = Builder.createAtomicCmpXchg(
4030
0
        Ptr, Expected, Replacement, LLVMAtomicOrderingSequentiallyConsistent,
4031
0
        LLVMAtomicOrderingSequentiallyConsistent);
4032
#if LLVM_VERSION_MAJOR >= 13
4033
    Ret.setAlignment(1 << Alignment);
4034
#endif
4035
0
    auto OldVal = Builder.createExtractValue(Ret, 0);
4036
0
    if (Signed) {
4037
0
      Stack.back() = Builder.createSExt(OldVal, IntType);
4038
0
    } else {
4039
0
      Stack.back() = Builder.createZExt(OldVal, IntType);
4040
0
    }
4041
0
  }
4042
4043
10.4k
  void compileReturn() noexcept {
4044
10.4k
    updateInstrCount();
4045
10.4k
    updateGas();
4046
10.4k
    auto Ty = F.Ty.getReturnType();
4047
10.4k
    if (Ty.isVoidTy()) {
4048
1.85k
      Builder.createRetVoid();
4049
8.64k
    } else if (Ty.isStructTy()) {
4050
289
      const auto Count = Ty.getStructNumElements();
4051
289
      std::vector<LLVM::Value> Ret(Count);
4052
1.09k
      for (unsigned I = 0; I < Count; ++I) {
4053
806
        const unsigned J = Count - 1 - I;
4054
806
        Ret[J] = stackPop();
4055
806
      }
4056
289
      Builder.createAggregateRet(Ret);
4057
8.35k
    } else {
4058
8.35k
      Builder.createRet(stackPop());
4059
8.35k
    }
4060
10.4k
  }
4061
4062
17.5k
  void updateInstrCount() noexcept {
4063
17.5k
    if (LocalInstrCount) {
4064
0
      auto Store [[maybe_unused]] = Builder.createAtomicRMW(
4065
0
          LLVMAtomicRMWBinOpAdd, Context.getInstrCount(Builder, ExecCtx),
4066
0
          Builder.createLoad(Context.Int64Ty, LocalInstrCount),
4067
0
          LLVMAtomicOrderingMonotonic);
4068
#if LLVM_VERSION_MAJOR >= 13
4069
      Store.setAlignment(8);
4070
#endif
4071
0
      Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
4072
0
    }
4073
17.5k
  }
4074
4075
19.4k
  void updateGas() noexcept {
4076
19.4k
    if (LocalGas) {
4077
0
      auto CurrBB = Builder.getInsertBlock();
4078
0
      auto CheckBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_check");
4079
0
      auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_ok");
4080
0
      auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_end");
4081
4082
0
      auto Cost = Builder.createLoad(Context.Int64Ty, LocalGas);
4083
0
      Cost.setAlignment(64);
4084
0
      auto GasPtr = Context.getGas(Builder, ExecCtx);
4085
0
      auto GasLimit = Context.getGasLimit(Builder, ExecCtx);
4086
0
      auto Gas = Builder.createLoad(Context.Int64Ty, GasPtr);
4087
0
      Gas.setAlignment(64);
4088
0
      Gas.setOrdering(LLVMAtomicOrderingMonotonic);
4089
0
      Builder.createBr(CheckBB);
4090
0
      Builder.positionAtEnd(CheckBB);
4091
4092
0
      auto PHIOldGas = Builder.createPHI(Context.Int64Ty);
4093
0
      auto NewGas = Builder.createAdd(PHIOldGas, Cost);
4094
0
      auto IsGasRemain =
4095
0
          Builder.createLikely(Builder.createICmpULE(NewGas, GasLimit));
4096
0
      Builder.createCondBr(IsGasRemain, OkBB,
4097
0
                           getTrapBB(ErrCode::Value::CostLimitExceeded));
4098
0
      Builder.positionAtEnd(OkBB);
4099
4100
0
      auto RGasAndSucceed = Builder.createAtomicCmpXchg(
4101
0
          GasPtr, PHIOldGas, NewGas, LLVMAtomicOrderingMonotonic,
4102
0
          LLVMAtomicOrderingMonotonic);
4103
#if LLVM_VERSION_MAJOR >= 13
4104
      RGasAndSucceed.setAlignment(8);
4105
#endif
4106
0
      RGasAndSucceed.setWeak(true);
4107
0
      auto RGas = Builder.createExtractValue(RGasAndSucceed, 0);
4108
0
      auto Succeed = Builder.createExtractValue(RGasAndSucceed, 1);
4109
0
      Builder.createCondBr(Builder.createLikely(Succeed), EndBB, CheckBB);
4110
0
      Builder.positionAtEnd(EndBB);
4111
4112
0
      Builder.createStore(LLContext.getInt64(0), LocalGas);
4113
4114
0
      PHIOldGas.addIncoming(Gas, CurrBB);
4115
0
      PHIOldGas.addIncoming(RGas, OkBB);
4116
0
    }
4117
19.4k
  }
4118
4119
2.80k
  void updateGasAtTrap() noexcept {
4120
2.80k
    if (LocalGas) {
4121
0
      auto Update [[maybe_unused]] = Builder.createAtomicRMW(
4122
0
          LLVMAtomicRMWBinOpAdd, Context.getGas(Builder, ExecCtx),
4123
0
          Builder.createLoad(Context.Int64Ty, LocalGas),
4124
0
          LLVMAtomicOrderingMonotonic);
4125
#if LLVM_VERSION_MAJOR >= 13
4126
      Update.setAlignment(8);
4127
#endif
4128
0
    }
4129
2.80k
  }
4130
4131
private:
4132
3.61k
  void compileCallOp(const unsigned int FuncIndex) noexcept {
4133
3.61k
    const auto &FuncType =
4134
3.61k
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4135
3.61k
            ->getFuncType();
4136
3.61k
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4137
3.61k
    const auto &ParamTypes = FuncType.getParamTypes();
4138
4139
3.61k
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4140
3.61k
    Args[0] = F.Fn.getFirstParam();
4141
4.38k
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4142
774
      const size_t J = ParamTypes.size() - 1 - I;
4143
774
      Args[J + 1] = stackPop();
4144
774
    }
4145
4146
3.61k
    auto Ret = Builder.createCall(Function, Args);
4147
3.61k
    auto Ty = Ret.getType();
4148
3.61k
    if (Ty.isVoidTy()) {
4149
      // nothing to do
4150
1.85k
    } else if (Ty.isStructTy()) {
4151
157
      for (auto Val : unpackStruct(Builder, Ret)) {
4152
157
        stackPush(Val);
4153
157
      }
4154
1.78k
    } else {
4155
1.78k
      stackPush(Ret);
4156
1.78k
    }
4157
3.61k
  }
4158
4159
  void compileIndirectCallOp(const uint32_t TableIndex,
4160
593
                             const uint32_t FuncTypeIndex) noexcept {
4161
593
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4162
593
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4163
593
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4164
4165
593
    LLVM::Value FuncIndex = stackPop();
4166
593
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4167
593
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4168
593
    auto RTy = FTy.getReturnType();
4169
4170
593
    const size_t ArgSize = FuncType.getParamTypes().size();
4171
593
    const size_t RetSize =
4172
593
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4173
593
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4174
593
    ArgsVec[0] = F.Fn.getFirstParam();
4175
1.18k
    for (size_t I = 0; I < ArgSize; ++I) {
4176
590
      const size_t J = ArgSize - I;
4177
590
      ArgsVec[J] = stackPop();
4178
590
    }
4179
4180
593
    std::vector<LLVM::Value> FPtrRetsVec;
4181
593
    FPtrRetsVec.reserve(RetSize);
4182
593
    {
4183
593
      auto FPtr = Builder.createCall(
4184
593
          Context.getIntrinsic(
4185
593
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4186
593
              LLVM::Type::getFunctionType(
4187
593
                  FTy.getPointerTo(),
4188
593
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4189
593
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4190
593
           FuncIndex});
4191
593
      Builder.createCondBr(
4192
593
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4193
593
          NotNullBB, IsNullBB);
4194
593
      Builder.positionAtEnd(NotNullBB);
4195
4196
593
      auto FPtrRet =
4197
593
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4198
593
      if (RetSize == 0) {
4199
        // nothing to do
4200
456
      } else if (RetSize == 1) {
4201
441
        FPtrRetsVec.push_back(FPtrRet);
4202
441
      } else {
4203
30
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4204
30
          FPtrRetsVec.push_back(Val);
4205
30
        }
4206
15
      }
4207
593
    }
4208
4209
593
    Builder.createBr(EndBB);
4210
593
    Builder.positionAtEnd(IsNullBB);
4211
4212
593
    std::vector<LLVM::Value> RetsVec;
4213
593
    {
4214
593
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4215
593
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4216
593
      Builder.createArrayPtrStore(
4217
593
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4218
593
          kValSize);
4219
4220
593
      Builder.createCall(
4221
593
          Context.getIntrinsic(
4222
593
              Builder, Executable::Intrinsics::kCallIndirect,
4223
593
              LLVM::Type::getFunctionType(Context.VoidTy,
4224
593
                                          {Context.Int32Ty, Context.Int32Ty,
4225
593
                                           Context.Int32Ty, Context.Int8PtrTy,
4226
593
                                           Context.Int8PtrTy},
4227
593
                                          false)),
4228
593
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4229
593
           FuncIndex, Args, Rets});
4230
4231
593
      if (RetSize == 0) {
4232
        // nothing to do
4233
456
      } else if (RetSize == 1) {
4234
441
        RetsVec.push_back(
4235
441
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4236
441
      } else {
4237
15
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4238
15
                                             kValSize);
4239
15
      }
4240
593
      Builder.createBr(EndBB);
4241
593
      Builder.positionAtEnd(EndBB);
4242
593
    }
4243
4244
1.06k
    for (unsigned I = 0; I < RetSize; ++I) {
4245
471
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4246
471
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4247
471
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4248
471
      stackPush(PHIRet);
4249
471
    }
4250
593
  }
4251
4252
0
  void compileReturnCallOp(const unsigned int FuncIndex) noexcept {
4253
0
    const auto &FuncType =
4254
0
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4255
0
            ->getFuncType();
4256
0
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4257
0
    const auto &ParamTypes = FuncType.getParamTypes();
4258
4259
0
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4260
0
    Args[0] = F.Fn.getFirstParam();
4261
0
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4262
0
      const size_t J = ParamTypes.size() - 1 - I;
4263
0
      Args[J + 1] = stackPop();
4264
0
    }
4265
4266
0
    auto Ret = Builder.createCall(Function, Args);
4267
0
    auto Ty = Ret.getType();
4268
0
    if (Ty.isVoidTy()) {
4269
0
      Builder.createRetVoid();
4270
0
    } else {
4271
0
      Builder.createRet(Ret);
4272
0
    }
4273
0
  }
4274
4275
  void compileReturnIndirectCallOp(const uint32_t TableIndex,
4276
0
                                   const uint32_t FuncTypeIndex) noexcept {
4277
0
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4278
0
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4279
4280
0
    LLVM::Value FuncIndex = stackPop();
4281
0
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4282
0
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4283
0
    auto RTy = FTy.getReturnType();
4284
4285
0
    const size_t ArgSize = FuncType.getParamTypes().size();
4286
0
    const size_t RetSize =
4287
0
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4288
0
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4289
0
    ArgsVec[0] = F.Fn.getFirstParam();
4290
0
    for (size_t I = 0; I < ArgSize; ++I) {
4291
0
      const size_t J = ArgSize - I;
4292
0
      ArgsVec[J] = stackPop();
4293
0
    }
4294
4295
0
    {
4296
0
      auto FPtr = Builder.createCall(
4297
0
          Context.getIntrinsic(
4298
0
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4299
0
              LLVM::Type::getFunctionType(
4300
0
                  FTy.getPointerTo(),
4301
0
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4302
0
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4303
0
           FuncIndex});
4304
0
      Builder.createCondBr(
4305
0
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4306
0
          NotNullBB, IsNullBB);
4307
0
      Builder.positionAtEnd(NotNullBB);
4308
4309
0
      auto FPtrRet =
4310
0
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4311
0
      if (RetSize == 0) {
4312
0
        Builder.createRetVoid();
4313
0
      } else {
4314
0
        Builder.createRet(FPtrRet);
4315
0
      }
4316
0
    }
4317
4318
0
    Builder.positionAtEnd(IsNullBB);
4319
4320
0
    {
4321
0
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4322
0
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4323
0
      Builder.createArrayPtrStore(
4324
0
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4325
0
          kValSize);
4326
4327
0
      Builder.createCall(
4328
0
          Context.getIntrinsic(
4329
0
              Builder, Executable::Intrinsics::kCallIndirect,
4330
0
              LLVM::Type::getFunctionType(Context.VoidTy,
4331
0
                                          {Context.Int32Ty, Context.Int32Ty,
4332
0
                                           Context.Int32Ty, Context.Int8PtrTy,
4333
0
                                           Context.Int8PtrTy},
4334
0
                                          false)),
4335
0
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4336
0
           FuncIndex, Args, Rets});
4337
4338
0
      if (RetSize == 0) {
4339
0
        Builder.createRetVoid();
4340
0
      } else if (RetSize == 1) {
4341
0
        Builder.createRet(
4342
0
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4343
0
      } else {
4344
0
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4345
0
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4346
0
      }
4347
0
    }
4348
0
  }
4349
4350
0
  void compileCallRefOp(const unsigned int TypeIndex) noexcept {
4351
0
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4352
0
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4353
0
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4354
4355
0
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4356
0
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4357
0
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4358
0
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4359
0
        LLContext.getInt64(0)));
4360
0
    Builder.createCondBr(IsRefNotNull, OkBB,
4361
0
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4362
0
    Builder.positionAtEnd(OkBB);
4363
4364
0
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4365
0
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4366
0
    auto RTy = FTy.getReturnType();
4367
4368
0
    const size_t ArgSize = FuncType.getParamTypes().size();
4369
0
    const size_t RetSize =
4370
0
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4371
0
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4372
0
    ArgsVec[0] = F.Fn.getFirstParam();
4373
0
    for (size_t I = 0; I < ArgSize; ++I) {
4374
0
      const size_t J = ArgSize - I;
4375
0
      ArgsVec[J] = stackPop();
4376
0
    }
4377
4378
0
    std::vector<LLVM::Value> FPtrRetsVec;
4379
0
    FPtrRetsVec.reserve(RetSize);
4380
0
    {
4381
0
      auto FPtr = Builder.createCall(
4382
0
          Context.getIntrinsic(
4383
0
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4384
0
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4385
0
                                          {Context.Int64x2Ty}, false)),
4386
0
          {Ref});
4387
0
      Builder.createCondBr(
4388
0
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4389
0
          NotNullBB, IsNullBB);
4390
0
      Builder.positionAtEnd(NotNullBB);
4391
4392
0
      auto FPtrRet =
4393
0
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4394
0
      if (RetSize == 0) {
4395
        // nothing to do
4396
0
      } else if (RetSize == 1) {
4397
0
        FPtrRetsVec.push_back(FPtrRet);
4398
0
      } else {
4399
0
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4400
0
          FPtrRetsVec.push_back(Val);
4401
0
        }
4402
0
      }
4403
0
    }
4404
4405
0
    Builder.createBr(EndBB);
4406
0
    Builder.positionAtEnd(IsNullBB);
4407
4408
0
    std::vector<LLVM::Value> RetsVec;
4409
0
    {
4410
0
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4411
0
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4412
0
      Builder.createArrayPtrStore(
4413
0
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4414
0
          kValSize);
4415
4416
0
      Builder.createCall(
4417
0
          Context.getIntrinsic(
4418
0
              Builder, Executable::Intrinsics::kCallRef,
4419
0
              LLVM::Type::getFunctionType(
4420
0
                  Context.VoidTy,
4421
0
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4422
0
                  false)),
4423
0
          {Ref, Args, Rets});
4424
4425
0
      if (RetSize == 0) {
4426
        // nothing to do
4427
0
      } else if (RetSize == 1) {
4428
0
        RetsVec.push_back(
4429
0
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4430
0
      } else {
4431
0
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4432
0
                                             kValSize);
4433
0
      }
4434
0
      Builder.createBr(EndBB);
4435
0
      Builder.positionAtEnd(EndBB);
4436
0
    }
4437
4438
0
    for (unsigned I = 0; I < RetSize; ++I) {
4439
0
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4440
0
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4441
0
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4442
0
      stackPush(PHIRet);
4443
0
    }
4444
0
  }
4445
4446
0
  void compileReturnCallRefOp(const unsigned int TypeIndex) noexcept {
4447
0
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4448
0
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4449
4450
0
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4451
0
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4452
0
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4453
0
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4454
0
        LLContext.getInt64(0)));
4455
0
    Builder.createCondBr(IsRefNotNull, OkBB,
4456
0
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4457
0
    Builder.positionAtEnd(OkBB);
4458
4459
0
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4460
0
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4461
0
    auto RTy = FTy.getReturnType();
4462
4463
0
    const size_t ArgSize = FuncType.getParamTypes().size();
4464
0
    const size_t RetSize =
4465
0
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4466
0
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4467
0
    ArgsVec[0] = F.Fn.getFirstParam();
4468
0
    for (size_t I = 0; I < ArgSize; ++I) {
4469
0
      const size_t J = ArgSize - I;
4470
0
      ArgsVec[J] = stackPop();
4471
0
    }
4472
4473
0
    {
4474
0
      auto FPtr = Builder.createCall(
4475
0
          Context.getIntrinsic(
4476
0
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4477
0
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4478
0
                                          {Context.Int64x2Ty}, false)),
4479
0
          {Ref});
4480
0
      Builder.createCondBr(
4481
0
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4482
0
          NotNullBB, IsNullBB);
4483
0
      Builder.positionAtEnd(NotNullBB);
4484
4485
0
      auto FPtrRet =
4486
0
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4487
0
      if (RetSize == 0) {
4488
0
        Builder.createRetVoid();
4489
0
      } else {
4490
0
        Builder.createRet(FPtrRet);
4491
0
      }
4492
0
    }
4493
4494
0
    Builder.positionAtEnd(IsNullBB);
4495
4496
0
    {
4497
0
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4498
0
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4499
0
      Builder.createArrayPtrStore(
4500
0
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4501
0
          kValSize);
4502
4503
0
      Builder.createCall(
4504
0
          Context.getIntrinsic(
4505
0
              Builder, Executable::Intrinsics::kCallRef,
4506
0
              LLVM::Type::getFunctionType(
4507
0
                  Context.VoidTy,
4508
0
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4509
0
                  false)),
4510
0
          {Ref, Args, Rets});
4511
4512
0
      if (RetSize == 0) {
4513
0
        Builder.createRetVoid();
4514
0
      } else if (RetSize == 1) {
4515
0
        Builder.createRet(
4516
0
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4517
0
      } else {
4518
0
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4519
0
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4520
0
      }
4521
0
    }
4522
0
  }
4523
4524
  void compileLoadOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4525
17.6k
                     LLVM::Type LoadTy) noexcept {
4526
17.6k
    if constexpr (kForceUnalignment) {
4527
17.6k
      Alignment = 0;
4528
17.6k
    }
4529
17.6k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4530
17.6k
    if (Offset != 0) {
4531
11.4k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4532
11.4k
    }
4533
4534
17.6k
    auto VPtr = Builder.createInBoundsGEP1(
4535
17.6k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4536
17.6k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4537
17.6k
    auto LoadInst = Builder.createLoad(LoadTy, Ptr, true);
4538
17.6k
    LoadInst.setAlignment(1 << Alignment);
4539
17.6k
    stackPush(LoadInst);
4540
17.6k
  }
4541
  void compileLoadOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4542
                     LLVM::Type LoadTy, LLVM::Type ExtendTy,
4543
7.04k
                     bool Signed) noexcept {
4544
7.04k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4545
7.04k
    if (Signed) {
4546
2.94k
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4547
4.10k
    } else {
4548
4.10k
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4549
4.10k
    }
4550
7.04k
  }
4551
  void compileVectorLoadOp(unsigned MemoryIndex, unsigned Offset,
4552
4.85k
                           unsigned Alignment, LLVM::Type LoadTy) noexcept {
4553
4.85k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4554
4.85k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4555
4.85k
  }
4556
  void compileVectorLoadOp(unsigned MemoryIndex, unsigned Offset,
4557
                           unsigned Alignment, LLVM::Type LoadTy,
4558
1.55k
                           LLVM::Type ExtendTy, bool Signed) noexcept {
4559
1.55k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy, ExtendTy, Signed);
4560
1.55k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4561
1.55k
  }
4562
  void compileSplatLoadOp(unsigned MemoryIndex, unsigned Offset,
4563
                          unsigned Alignment, LLVM::Type LoadTy,
4564
521
                          LLVM::Type VectorTy) noexcept {
4565
521
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4566
521
    compileSplatOp(VectorTy);
4567
521
  }
4568
  void compileLoadLaneOp(unsigned MemoryIndex, unsigned Offset,
4569
                         unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4570
480
                         LLVM::Type VectorTy) noexcept {
4571
480
    auto Vector = stackPop();
4572
480
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4573
480
    auto Value = Stack.back();
4574
480
    Stack.back() = Builder.createBitCast(
4575
480
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4576
480
                                    Value, LLContext.getInt64(Index)),
4577
480
        Context.Int64x2Ty);
4578
480
  }
4579
  void compileStoreOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4580
                      LLVM::Type LoadTy, bool Trunc = false,
4581
3.21k
                      bool BitCast = false) noexcept {
4582
3.21k
    if constexpr (kForceUnalignment) {
4583
3.21k
      Alignment = 0;
4584
3.21k
    }
4585
3.21k
    auto V = stackPop();
4586
3.21k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4587
3.21k
    if (Offset != 0) {
4588
2.40k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4589
2.40k
    }
4590
4591
3.21k
    if (Trunc) {
4592
640
      V = Builder.createTrunc(V, LoadTy);
4593
640
    }
4594
3.21k
    if (BitCast) {
4595
219
      V = Builder.createBitCast(V, LoadTy);
4596
219
    }
4597
3.21k
    auto VPtr = Builder.createInBoundsGEP1(
4598
3.21k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4599
3.21k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4600
3.21k
    auto StoreInst = Builder.createStore(V, Ptr, true);
4601
3.21k
    StoreInst.setAlignment(1 << Alignment);
4602
3.21k
  }
4603
  void compileStoreLaneOp(unsigned MemoryIndex, unsigned Offset,
4604
                          unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4605
318
                          LLVM::Type VectorTy) noexcept {
4606
318
    auto Vector = Stack.back();
4607
318
    Stack.back() = Builder.createExtractElement(
4608
318
        Builder.createBitCast(Vector, VectorTy), LLContext.getInt64(Index));
4609
318
    compileStoreOp(MemoryIndex, Offset, Alignment, LoadTy);
4610
318
  }
4611
45.4k
  void compileSplatOp(LLVM::Type VectorTy) noexcept {
4612
45.4k
    auto Undef = LLVM::Value::getUndef(VectorTy);
4613
45.4k
    auto Zeros = LLVM::Value::getConstNull(
4614
45.4k
        LLVM::Type::getVectorType(Context.Int32Ty, VectorTy.getVectorSize()));
4615
45.4k
    auto Value = Builder.createTrunc(Stack.back(), VectorTy.getElementType());
4616
45.4k
    auto Vector =
4617
45.4k
        Builder.createInsertElement(Undef, Value, LLContext.getInt64(0));
4618
45.4k
    Vector = Builder.createShuffleVector(Vector, Undef, Zeros);
4619
4620
45.4k
    Stack.back() = Builder.createBitCast(Vector, Context.Int64x2Ty);
4621
45.4k
  }
4622
1.38k
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4623
1.38k
    auto Vector = Builder.createBitCast(Stack.back(), VectorTy);
4624
1.38k
    Stack.back() =
4625
1.38k
        Builder.createExtractElement(Vector, LLContext.getInt64(Index));
4626
1.38k
  }
4627
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index,
4628
1.04k
                            LLVM::Type ExtendTy, bool Signed) noexcept {
4629
1.04k
    compileExtractLaneOp(VectorTy, Index);
4630
1.04k
    if (Signed) {
4631
494
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4632
555
    } else {
4633
555
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4634
555
    }
4635
1.04k
  }
4636
669
  void compileReplaceLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4637
669
    auto Value = Builder.createTrunc(stackPop(), VectorTy.getElementType());
4638
669
    auto Vector = Stack.back();
4639
669
    Stack.back() = Builder.createBitCast(
4640
669
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4641
669
                                    Value, LLContext.getInt64(Index)),
4642
669
        Context.Int64x2Ty);
4643
669
  }
4644
  void compileVectorCompareOp(LLVM::Type VectorTy,
4645
4.95k
                              LLVMIntPredicate Predicate) noexcept {
4646
4.95k
    auto RHS = stackPop();
4647
4.95k
    auto LHS = stackPop();
4648
4.95k
    auto Result = Builder.createSExt(
4649
4.95k
        Builder.createICmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4650
4.95k
                           Builder.createBitCast(RHS, VectorTy)),
4651
4.95k
        VectorTy);
4652
4.95k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4653
4.95k
  }
4654
  void compileVectorCompareOp(LLVM::Type VectorTy, LLVMRealPredicate Predicate,
4655
3.45k
                              LLVM::Type ResultTy) noexcept {
4656
3.45k
    auto RHS = stackPop();
4657
3.45k
    auto LHS = stackPop();
4658
3.45k
    auto Result = Builder.createSExt(
4659
3.45k
        Builder.createFCmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4660
3.45k
                           Builder.createBitCast(RHS, VectorTy)),
4661
3.45k
        ResultTy);
4662
3.45k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4663
3.45k
  }
4664
  template <typename Func>
4665
24.2k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
24.2k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
24.2k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
24.2k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
1.98k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
1.98k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
1.98k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
1.98k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
2.39k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
2.39k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
2.39k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
2.39k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
104
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
104
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
104
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
104
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
2.21k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
2.21k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
2.21k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
2.21k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
544
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
544
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
544
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
544
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
937
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
937
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
937
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
937
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
315
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
315
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
315
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
315
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
1.25k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
1.25k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
1.25k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
1.25k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
2.08k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
2.08k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
2.08k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
2.08k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
1.65k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
1.65k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
1.65k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
1.65k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
354
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
354
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
354
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
354
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
950
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
950
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
950
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
950
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
5.79k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
5.79k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
5.79k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
5.79k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
644
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
644
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
644
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
644
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
1.94k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
1.94k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
1.94k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
1.94k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
563
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
563
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
563
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
563
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4665
553
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4666
553
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4667
553
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4668
553
  }
4669
1.98k
  void compileVectorAbs(LLVM::Type VectorTy) noexcept {
4670
1.98k
    compileVectorOp(VectorTy, [this, VectorTy](auto V) noexcept {
4671
1.98k
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4672
1.98k
      auto C = Builder.createICmpSLT(V, Zero);
4673
1.98k
      return Builder.createSelect(C, Builder.createNeg(V), V);
4674
1.98k
    });
4675
1.98k
  }
4676
2.39k
  void compileVectorNeg(LLVM::Type VectorTy) noexcept {
4677
2.39k
    compileVectorOp(VectorTy,
4678
2.39k
                    [this](auto V) noexcept { return Builder.createNeg(V); });
4679
2.39k
  }
4680
104
  void compileVectorPopcnt() noexcept {
4681
104
    compileVectorOp(Context.Int8x16Ty, [this](auto V) noexcept {
4682
104
      assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
4683
104
      return Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, V);
4684
104
    });
4685
104
  }
4686
  template <typename Func>
4687
1.90k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4688
1.90k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4689
1.90k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4690
1.90k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}&&)
Line
Count
Source
4687
107
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4688
107
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4689
107
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4690
107
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4687
882
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4688
882
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4689
882
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4690
882
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4687
912
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4688
912
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4689
912
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4690
912
  }
4691
107
  void compileVectorAnyTrue() noexcept {
4692
107
    compileVectorReduceIOp(Context.Int128x1Ty, [this](auto V) noexcept {
4693
107
      auto Zero = LLVM::Value::getConstNull(Context.Int128x1Ty);
4694
107
      return Builder.createBitCast(Builder.createICmpNE(V, Zero),
4695
107
                                   LLContext.getInt1Ty());
4696
107
    });
4697
107
  }
4698
882
  void compileVectorAllTrue(LLVM::Type VectorTy) noexcept {
4699
882
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4700
882
      const auto Size = VectorTy.getVectorSize();
4701
882
      auto IntType = LLContext.getIntNTy(Size);
4702
882
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4703
882
      auto Cmp = Builder.createBitCast(Builder.createICmpEQ(V, Zero), IntType);
4704
882
      auto CmpZero = LLVM::Value::getConstInt(IntType, 0);
4705
882
      return Builder.createICmpEQ(Cmp, CmpZero);
4706
882
    });
4707
882
  }
4708
912
  void compileVectorBitMask(LLVM::Type VectorTy) noexcept {
4709
912
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4710
912
      const auto Size = VectorTy.getVectorSize();
4711
912
      auto IntType = LLContext.getIntNTy(Size);
4712
912
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4713
912
      return Builder.createBitCast(Builder.createICmpSLT(V, Zero), IntType);
4714
912
    });
4715
912
  }
4716
  template <typename Func>
4717
3.35k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4718
3.35k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4719
3.35k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4720
3.35k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4721
3.35k
    auto RHS = Builder.createVectorSplat(
4722
3.35k
        VectorTy.getVectorSize(),
4723
3.35k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4724
3.35k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4725
3.35k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4726
3.35k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4727
3.35k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4717
1.32k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4718
1.32k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4719
1.32k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4720
1.32k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4721
1.32k
    auto RHS = Builder.createVectorSplat(
4722
1.32k
        VectorTy.getVectorSize(),
4723
1.32k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4724
1.32k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4725
1.32k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4726
1.32k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4727
1.32k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4717
1.73k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4718
1.73k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4719
1.73k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4720
1.73k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4721
1.73k
    auto RHS = Builder.createVectorSplat(
4722
1.73k
        VectorTy.getVectorSize(),
4723
1.73k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4724
1.73k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4725
1.73k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4726
1.73k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4727
1.73k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4717
291
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4718
291
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4719
291
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4720
291
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4721
291
    auto RHS = Builder.createVectorSplat(
4722
291
        VectorTy.getVectorSize(),
4723
291
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4724
291
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4725
291
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4726
291
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4727
291
  }
4728
1.32k
  void compileVectorShl(LLVM::Type VectorTy) noexcept {
4729
1.32k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4730
1.32k
      return Builder.createShl(LHS, RHS);
4731
1.32k
    });
4732
1.32k
  }
4733
291
  void compileVectorLShr(LLVM::Type VectorTy) noexcept {
4734
291
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4735
291
      return Builder.createLShr(LHS, RHS);
4736
291
    });
4737
291
  }
4738
1.73k
  void compileVectorAShr(LLVM::Type VectorTy) noexcept {
4739
1.73k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4740
1.73k
      return Builder.createAShr(LHS, RHS);
4741
1.73k
    });
4742
1.73k
  }
4743
  template <typename Func>
4744
7.30k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
7.30k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
7.30k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
7.30k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
7.30k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
302
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
302
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
302
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
302
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
302
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
1.22k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
1.22k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
1.22k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
1.22k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
1.22k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
795
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
795
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
795
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
795
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
795
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
387
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
387
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
387
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
387
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
387
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
267
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
267
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
267
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
267
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
267
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
249
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
249
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
249
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
249
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
249
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
401
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
401
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
401
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
401
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
401
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
755
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
755
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
755
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
755
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
755
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
222
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
222
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
222
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
222
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
222
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
403
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
403
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
403
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
403
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
403
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
134
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
134
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
134
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
134
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
134
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
185
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
185
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
185
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
185
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
185
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
446
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
446
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
446
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
446
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
446
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
181
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
181
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
181
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
181
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
181
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
222
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
222
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
222
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
222
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
222
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
284
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
284
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
284
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
284
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
284
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
235
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
235
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
235
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
235
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
235
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
299
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
299
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
299
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
299
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
299
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4744
314
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4745
314
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4746
314
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4747
314
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4748
314
  }
4749
302
  void compileVectorVectorAdd(LLVM::Type VectorTy) noexcept {
4750
302
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4751
302
      return Builder.createAdd(LHS, RHS);
4752
302
    });
4753
302
  }
4754
1.22k
  void compileVectorVectorAddSat(LLVM::Type VectorTy, bool Signed) noexcept {
4755
1.22k
    auto ID = Signed ? LLVM::Core::SAddSat : LLVM::Core::UAddSat;
4756
1.22k
    assuming(ID != LLVM::Core::NotIntrinsic);
4757
1.22k
    compileVectorVectorOp(
4758
1.22k
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
4759
1.22k
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
4760
1.22k
        });
4761
1.22k
  }
4762
795
  void compileVectorVectorSub(LLVM::Type VectorTy) noexcept {
4763
795
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4764
795
      return Builder.createSub(LHS, RHS);
4765
795
    });
4766
795
  }
4767
387
  void compileVectorVectorSubSat(LLVM::Type VectorTy, bool Signed) noexcept {
4768
387
    auto ID = Signed ? LLVM::Core::SSubSat : LLVM::Core::USubSat;
4769
387
    assuming(ID != LLVM::Core::NotIntrinsic);
4770
387
    compileVectorVectorOp(
4771
387
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
4772
387
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
4773
387
        });
4774
387
  }
4775
403
  void compileVectorVectorMul(LLVM::Type VectorTy) noexcept {
4776
403
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4777
403
      return Builder.createMul(LHS, RHS);
4778
403
    });
4779
403
  }
4780
64
  void compileVectorSwizzle() noexcept {
4781
64
    auto Index = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
4782
64
    auto Vector = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
4783
4784
64
#if defined(__x86_64__)
4785
64
    if (Context.SupportSSSE3) {
4786
64
      auto Magic = Builder.createVectorSplat(16, LLContext.getInt8(112));
4787
64
      auto Added = Builder.createAdd(Index, Magic);
4788
64
      auto NewIndex = Builder.createSelect(
4789
64
          Builder.createICmpUGT(Index, Added),
4790
64
          LLVM::Value::getConstAllOnes(Context.Int8x16Ty), Added);
4791
64
      assuming(LLVM::Core::X86SSSE3PShufB128 != LLVM::Core::NotIntrinsic);
4792
64
      stackPush(Builder.createBitCast(
4793
64
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PShufB128, {},
4794
64
                                  {Vector, NewIndex}),
4795
64
          Context.Int64x2Ty));
4796
64
      return;
4797
64
    }
4798
0
#endif
4799
4800
#if defined(__aarch64__)
4801
    if (Context.SupportNEON) {
4802
      assuming(LLVM::Core::AArch64NeonTbl1 != LLVM::Core::NotIntrinsic);
4803
      stackPush(Builder.createBitCast(
4804
          Builder.createIntrinsic(LLVM::Core::AArch64NeonTbl1,
4805
                                  {Context.Int8x16Ty}, {Vector, Index}),
4806
          Context.Int64x2Ty));
4807
      return;
4808
    }
4809
#endif
4810
4811
    // Fallback case.
4812
    // If the SSSE3 is not supported on the x86_64 platform or
4813
    // the NEON is not supported on the aarch64 platform,
4814
    // then fallback to this.
4815
0
    auto Mask = Builder.createVectorSplat(16, LLContext.getInt8(15));
4816
0
    auto Zero = Builder.createVectorSplat(16, LLContext.getInt8(0));
4817
0
    auto IsOver = Builder.createICmpUGT(Index, Mask);
4818
0
    auto InboundIndex = Builder.createAnd(Index, Mask);
4819
0
    auto Array = Builder.createArray(16, 1);
4820
0
    for (size_t I = 0; I < 16; ++I) {
4821
0
      Builder.createStore(
4822
0
          Builder.createExtractElement(Vector, LLContext.getInt64(I)),
4823
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array,
4824
0
                                     LLContext.getInt64(I)));
4825
0
    }
4826
0
    LLVM::Value Ret = LLVM::Value::getUndef(Context.Int8x16Ty);
4827
0
    for (size_t I = 0; I < 16; ++I) {
4828
0
      auto Idx =
4829
0
          Builder.createExtractElement(InboundIndex, LLContext.getInt64(I));
4830
0
      auto Value = Builder.createLoad(
4831
0
          Context.Int8Ty,
4832
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array, Idx));
4833
0
      Ret = Builder.createInsertElement(Ret, Value, LLContext.getInt64(I));
4834
0
    }
4835
0
    Ret = Builder.createSelect(IsOver, Zero, Ret);
4836
0
    stackPush(Builder.createBitCast(Ret, Context.Int64x2Ty));
4837
0
  }
4838
4839
134
  void compileVectorVectorQ15MulSat() noexcept {
4840
134
    compileVectorVectorOp(
4841
134
        Context.Int16x8Ty, [this](auto LHS, auto RHS) noexcept -> LLVM::Value {
4842
134
#if defined(__x86_64__)
4843
134
          if (Context.SupportSSSE3) {
4844
134
            assuming(LLVM::Core::X86SSSE3PMulHrSw128 !=
4845
134
                     LLVM::Core::NotIntrinsic);
4846
134
            auto Result = Builder.createIntrinsic(
4847
134
                LLVM::Core::X86SSSE3PMulHrSw128, {}, {LHS, RHS});
4848
134
            auto IntMaxV = Builder.createVectorSplat(
4849
134
                8, LLContext.getInt16(UINT16_C(0x8000)));
4850
134
            auto NotOver = Builder.createSExt(
4851
134
                Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
4852
134
            return Builder.createXor(Result, NotOver);
4853
134
          }
4854
0
#endif
4855
4856
#if defined(__aarch64__)
4857
          if (Context.SupportNEON) {
4858
            assuming(LLVM::Core::AArch64NeonSQRDMulH !=
4859
                     LLVM::Core::NotIntrinsic);
4860
            return Builder.createBinaryIntrinsic(
4861
                LLVM::Core::AArch64NeonSQRDMulH, LHS, RHS);
4862
          }
4863
#endif
4864
4865
          // Fallback case.
4866
          // If the SSSE3 is not supported on the x86_64 platform or
4867
          // the NEON is not supported on the aarch64 platform,
4868
          // then fallback to this.
4869
0
          auto ExtTy = Context.Int16x8Ty.getExtendedElementVectorType();
4870
0
          auto Offset = Builder.createVectorSplat(
4871
0
              8, LLContext.getInt32(UINT32_C(0x4000)));
4872
0
          auto Shift =
4873
0
              Builder.createVectorSplat(8, LLContext.getInt32(UINT32_C(15)));
4874
0
          auto ExtLHS = Builder.createSExt(LHS, ExtTy);
4875
0
          auto ExtRHS = Builder.createSExt(RHS, ExtTy);
4876
0
          auto Result = Builder.createTrunc(
4877
0
              Builder.createAShr(
4878
0
                  Builder.createAdd(Builder.createMul(ExtLHS, ExtRHS), Offset),
4879
0
                  Shift),
4880
0
              Context.Int16x8Ty);
4881
0
          auto IntMaxV = Builder.createVectorSplat(
4882
0
              8, LLContext.getInt16(UINT16_C(0x8000)));
4883
0
          auto NotOver = Builder.createSExt(
4884
0
              Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
4885
0
          return Builder.createXor(Result, NotOver);
4886
134
        });
4887
134
  }
4888
267
  void compileVectorVectorSMin(LLVM::Type VectorTy) noexcept {
4889
267
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4890
267
      auto C = Builder.createICmpSLE(LHS, RHS);
4891
267
      return Builder.createSelect(C, LHS, RHS);
4892
267
    });
4893
267
  }
4894
249
  void compileVectorVectorUMin(LLVM::Type VectorTy) noexcept {
4895
249
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4896
249
      auto C = Builder.createICmpULE(LHS, RHS);
4897
249
      return Builder.createSelect(C, LHS, RHS);
4898
249
    });
4899
249
  }
4900
401
  void compileVectorVectorSMax(LLVM::Type VectorTy) noexcept {
4901
401
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4902
401
      auto C = Builder.createICmpSGE(LHS, RHS);
4903
401
      return Builder.createSelect(C, LHS, RHS);
4904
401
    });
4905
401
  }
4906
755
  void compileVectorVectorUMax(LLVM::Type VectorTy) noexcept {
4907
755
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4908
755
      auto C = Builder.createICmpUGE(LHS, RHS);
4909
755
      return Builder.createSelect(C, LHS, RHS);
4910
755
    });
4911
755
  }
4912
222
  void compileVectorVectorUAvgr(LLVM::Type VectorTy) noexcept {
4913
222
    auto ExtendTy = VectorTy.getExtendedElementVectorType();
4914
222
    compileVectorVectorOp(
4915
222
        VectorTy,
4916
222
        [this, VectorTy, ExtendTy](auto LHS, auto RHS) noexcept -> LLVM::Value {
4917
222
#if defined(__x86_64__)
4918
222
          if (Context.SupportSSE2) {
4919
222
            const auto ID = [VectorTy]() noexcept {
4920
222
              switch (VectorTy.getElementType().getIntegerBitWidth()) {
4921
117
              case 8:
4922
117
                return LLVM::Core::X86SSE2PAvgB;
4923
105
              case 16:
4924
105
                return LLVM::Core::X86SSE2PAvgW;
4925
0
              default:
4926
0
                assumingUnreachable();
4927
222
              }
4928
222
            }();
4929
222
            assuming(ID != LLVM::Core::NotIntrinsic);
4930
222
            return Builder.createIntrinsic(ID, {}, {LHS, RHS});
4931
222
          }
4932
0
#endif
4933
4934
#if defined(__aarch64__)
4935
          if (Context.SupportNEON) {
4936
            assuming(LLVM::Core::AArch64NeonURHAdd != LLVM::Core::NotIntrinsic);
4937
            return Builder.createBinaryIntrinsic(LLVM::Core::AArch64NeonURHAdd,
4938
                                                 LHS, RHS);
4939
          }
4940
#endif
4941
4942
          // Fallback case.
4943
          // If the SSE2 is not supported on the x86_64 platform or
4944
          // the NEON is not supported on the aarch64 platform,
4945
          // then fallback to this.
4946
0
          auto EL = Builder.createZExt(LHS, ExtendTy);
4947
0
          auto ER = Builder.createZExt(RHS, ExtendTy);
4948
0
          auto One = Builder.createZExt(
4949
0
              Builder.createVectorSplat(ExtendTy.getVectorSize(),
4950
0
                                        LLContext.getTrue()),
4951
0
              ExtendTy);
4952
0
          return Builder.createTrunc(
4953
0
              Builder.createLShr(
4954
0
                  Builder.createAdd(Builder.createAdd(EL, ER), One), One),
4955
0
              VectorTy);
4956
222
        });
4957
222
  }
4958
644
  void compileVectorNarrow(LLVM::Type FromTy, bool Signed) noexcept {
4959
644
    auto [MinInt,
4960
644
          MaxInt] = [&]() noexcept -> std::tuple<LLVM::Value, LLVM::Value> {
4961
644
      switch (FromTy.getElementType().getIntegerBitWidth()) {
4962
250
      case 16: {
4963
250
        const auto Min =
4964
250
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::min()
4965
250
                                        : std::numeric_limits<uint8_t>::min());
4966
250
        const auto Max =
4967
250
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::max()
4968
250
                                        : std::numeric_limits<uint8_t>::max());
4969
250
        return {LLContext.getInt16(static_cast<uint16_t>(Min)),
4970
250
                LLContext.getInt16(static_cast<uint16_t>(Max))};
4971
0
      }
4972
394
      case 32: {
4973
394
        const auto Min =
4974
394
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::min()
4975
394
                                        : std::numeric_limits<uint16_t>::min());
4976
394
        const auto Max =
4977
394
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::max()
4978
394
                                        : std::numeric_limits<uint16_t>::max());
4979
394
        return {LLContext.getInt32(static_cast<uint32_t>(Min)),
4980
394
                LLContext.getInt32(static_cast<uint32_t>(Max))};
4981
0
      }
4982
0
      default:
4983
0
        assumingUnreachable();
4984
644
      }
4985
644
    }();
4986
644
    const auto Count = FromTy.getVectorSize();
4987
644
    auto VMin = Builder.createVectorSplat(Count, MinInt);
4988
644
    auto VMax = Builder.createVectorSplat(Count, MaxInt);
4989
4990
644
    auto TruncTy = FromTy.getTruncatedElementVectorType();
4991
4992
644
    auto F2 = Builder.createBitCast(stackPop(), FromTy);
4993
644
    F2 = Builder.createSelect(Builder.createICmpSLT(F2, VMin), VMin, F2);
4994
644
    F2 = Builder.createSelect(Builder.createICmpSGT(F2, VMax), VMax, F2);
4995
644
    F2 = Builder.createTrunc(F2, TruncTy);
4996
4997
644
    auto F1 = Builder.createBitCast(stackPop(), FromTy);
4998
644
    F1 = Builder.createSelect(Builder.createICmpSLT(F1, VMin), VMin, F1);
4999
644
    F1 = Builder.createSelect(Builder.createICmpSGT(F1, VMax), VMax, F1);
5000
644
    F1 = Builder.createTrunc(F1, TruncTy);
5001
5002
644
    std::vector<uint32_t> Mask(Count * 2);
5003
644
    std::iota(Mask.begin(), Mask.end(), 0);
5004
644
    stackPush(Builder.createBitCast(
5005
644
        Builder.createShuffleVector(
5006
644
            F1, F2, LLVM::Value::getConstVector32(LLContext, Mask)),
5007
644
        Context.Int64x2Ty));
5008
644
  }
5009
5.55k
  void compileVectorExtend(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5010
5.55k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5011
5.55k
    const auto Count = FromTy.getVectorSize();
5012
5.55k
    std::vector<uint32_t> Mask(Count / 2);
5013
5.55k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5014
5.55k
    auto R = Builder.createBitCast(Stack.back(), FromTy);
5015
5.55k
    if (Signed) {
5016
2.34k
      R = Builder.createSExt(R, ExtTy);
5017
3.21k
    } else {
5018
3.21k
      R = Builder.createZExt(R, ExtTy);
5019
3.21k
    }
5020
5.55k
    R = Builder.createShuffleVector(
5021
5.55k
        R, LLVM::Value::getUndef(ExtTy),
5022
5.55k
        LLVM::Value::getConstVector32(LLContext, Mask));
5023
5.55k
    Stack.back() = Builder.createBitCast(R, Context.Int64x2Ty);
5024
5.55k
  }
5025
1.79k
  void compileVectorExtMul(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5026
1.79k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5027
1.79k
    const auto Count = FromTy.getVectorSize();
5028
1.79k
    std::vector<uint32_t> Mask(Count / 2);
5029
1.79k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5030
3.59k
    auto Extend = [this, FromTy, Signed, ExtTy, &Mask](LLVM::Value R) noexcept {
5031
3.59k
      R = Builder.createBitCast(R, FromTy);
5032
3.59k
      if (Signed) {
5033
1.66k
        R = Builder.createSExt(R, ExtTy);
5034
1.93k
      } else {
5035
1.93k
        R = Builder.createZExt(R, ExtTy);
5036
1.93k
      }
5037
3.59k
      return Builder.createShuffleVector(
5038
3.59k
          R, LLVM::Value::getUndef(ExtTy),
5039
3.59k
          LLVM::Value::getConstVector32(LLContext, Mask));
5040
3.59k
    };
5041
1.79k
    auto RHS = Extend(stackPop());
5042
1.79k
    auto LHS = Extend(stackPop());
5043
1.79k
    stackPush(
5044
1.79k
        Builder.createBitCast(Builder.createMul(RHS, LHS), Context.Int64x2Ty));
5045
1.79k
  }
5046
2.21k
  void compileVectorExtAddPairwise(LLVM::Type VectorTy, bool Signed) noexcept {
5047
2.21k
    compileVectorOp(
5048
2.21k
        VectorTy, [this, VectorTy, Signed](auto V) noexcept -> LLVM::Value {
5049
2.21k
          auto ExtTy = VectorTy.getExtendedElementVectorType()
5050
2.21k
                           .getHalfElementsVectorType();
5051
2.21k
#if defined(__x86_64__)
5052
2.21k
          const auto Count = VectorTy.getVectorSize();
5053
2.21k
          if (Context.SupportXOP) {
5054
0
            const auto ID = [Count, Signed]() noexcept {
5055
0
              switch (Count) {
5056
0
              case 8:
5057
0
                return Signed ? LLVM::Core::X86XOpVPHAddWD
5058
0
                              : LLVM::Core::X86XOpVPHAddUWD;
5059
0
              case 16:
5060
0
                return Signed ? LLVM::Core::X86XOpVPHAddBW
5061
0
                              : LLVM::Core::X86XOpVPHAddUBW;
5062
0
              default:
5063
0
                assumingUnreachable();
5064
0
              }
5065
0
            }();
5066
0
            assuming(ID != LLVM::Core::NotIntrinsic);
5067
0
            return Builder.createUnaryIntrinsic(ID, V);
5068
0
          }
5069
2.21k
          if (Context.SupportSSSE3 && Count == 16) {
5070
624
            assuming(LLVM::Core::X86SSSE3PMAddUbSw128 !=
5071
624
                     LLVM::Core::NotIntrinsic);
5072
624
            if (Signed) {
5073
295
              return Builder.createIntrinsic(
5074
295
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5075
295
                  {Builder.createVectorSplat(16, LLContext.getInt8(1)), V});
5076
329
            } else {
5077
329
              return Builder.createIntrinsic(
5078
329
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5079
329
                  {V, Builder.createVectorSplat(16, LLContext.getInt8(1))});
5080
329
            }
5081
624
          }
5082
1.59k
          if (Context.SupportSSE2 && Count == 8) {
5083
1.59k
            assuming(LLVM::Core::X86SSE2PMAddWd != LLVM::Core::NotIntrinsic);
5084
1.59k
            if (Signed) {
5085
1.14k
              return Builder.createIntrinsic(
5086
1.14k
                  LLVM::Core::X86SSE2PMAddWd, {},
5087
1.14k
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5088
1.14k
            } else {
5089
453
              V = Builder.createXor(
5090
453
                  V, Builder.createVectorSplat(8, LLContext.getInt16(0x8000)));
5091
453
              V = Builder.createIntrinsic(
5092
453
                  LLVM::Core::X86SSE2PMAddWd, {},
5093
453
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5094
453
              return Builder.createAdd(
5095
453
                  V, Builder.createVectorSplat(4, LLContext.getInt32(0x10000)));
5096
453
            }
5097
1.59k
          }
5098
0
#endif
5099
5100
#if defined(__aarch64__)
5101
          if (Context.SupportNEON) {
5102
            const auto ID = Signed ? LLVM::Core::AArch64NeonSAddLP
5103
                                   : LLVM::Core::AArch64NeonUAddLP;
5104
            assuming(ID != LLVM::Core::NotIntrinsic);
5105
            return Builder.createIntrinsic(ID, {ExtTy, VectorTy}, {V});
5106
          }
5107
#endif
5108
5109
          // Fallback case.
5110
          // If the XOP, SSSE3, or SSE2 is not supported on the x86_64 platform
5111
          // or the NEON is not supported on the aarch64 platform,
5112
          // then fallback to this.
5113
0
          auto Width = LLVM::Value::getConstInt(
5114
0
              ExtTy.getElementType(),
5115
0
              VectorTy.getElementType().getIntegerBitWidth());
5116
0
          Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5117
0
          auto EV = Builder.createBitCast(V, ExtTy);
5118
0
          LLVM::Value L, R;
5119
0
          if (Signed) {
5120
0
            L = Builder.createAShr(EV, Width);
5121
0
            R = Builder.createAShr(Builder.createShl(EV, Width), Width);
5122
0
          } else {
5123
0
            L = Builder.createLShr(EV, Width);
5124
0
            R = Builder.createLShr(Builder.createShl(EV, Width), Width);
5125
0
          }
5126
0
          return Builder.createAdd(L, R);
5127
1.59k
        });
5128
2.21k
  }
5129
544
  void compileVectorFAbs(LLVM::Type VectorTy) noexcept {
5130
544
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5131
544
      assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
5132
544
      return Builder.createUnaryIntrinsic(LLVM::Core::Fabs, V);
5133
544
    });
5134
544
  }
5135
937
  void compileVectorFNeg(LLVM::Type VectorTy) noexcept {
5136
937
    compileVectorOp(VectorTy,
5137
937
                    [this](auto V) noexcept { return Builder.createFNeg(V); });
5138
937
  }
5139
315
  void compileVectorFSqrt(LLVM::Type VectorTy) noexcept {
5140
315
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5141
315
      assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
5142
315
      return Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, V);
5143
315
    });
5144
315
  }
5145
1.25k
  void compileVectorFCeil(LLVM::Type VectorTy) noexcept {
5146
1.25k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5147
1.25k
      assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
5148
1.25k
      return Builder.createUnaryIntrinsic(LLVM::Core::Ceil, V);
5149
1.25k
    });
5150
1.25k
  }
5151
2.08k
  void compileVectorFFloor(LLVM::Type VectorTy) noexcept {
5152
2.08k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5153
2.08k
      assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
5154
2.08k
      return Builder.createUnaryIntrinsic(LLVM::Core::Floor, V);
5155
2.08k
    });
5156
2.08k
  }
5157
1.65k
  void compileVectorFTrunc(LLVM::Type VectorTy) noexcept {
5158
1.65k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5159
1.65k
      assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
5160
1.65k
      return Builder.createUnaryIntrinsic(LLVM::Core::Trunc, V);
5161
1.65k
    });
5162
1.65k
  }
5163
354
  void compileVectorFNearest(LLVM::Type VectorTy) noexcept {
5164
354
    compileVectorOp(VectorTy, [&](auto V) noexcept {
5165
354
#if LLVM_VERSION_MAJOR >= 12
5166
354
      assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
5167
354
      if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
5168
354
        return Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, V);
5169
354
      }
5170
0
#endif
5171
5172
0
#if defined(__x86_64__)
5173
0
      if (Context.SupportSSE4_1) {
5174
0
        const bool IsFloat = VectorTy.getElementType().isFloatTy();
5175
0
        auto ID =
5176
0
            IsFloat ? LLVM::Core::X86SSE41RoundPs : LLVM::Core::X86SSE41RoundPd;
5177
0
        assuming(ID != LLVM::Core::NotIntrinsic);
5178
0
        return Builder.createIntrinsic(ID, {}, {V, LLContext.getInt32(8)});
5179
0
      }
5180
0
#endif
5181
5182
#if defined(__aarch64__)
5183
      if (Context.SupportNEON &&
5184
          LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
5185
        return Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, V);
5186
      }
5187
#endif
5188
5189
      // Fallback case.
5190
      // If the SSE4.1 is not supported on the x86_64 platform or
5191
      // the NEON is not supported on the aarch64 platform,
5192
      // then fallback to this.
5193
0
      assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
5194
0
      return Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, V);
5195
0
    });
5196
354
  }
5197
185
  void compileVectorVectorFAdd(LLVM::Type VectorTy) noexcept {
5198
185
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5199
185
      return Builder.createFAdd(LHS, RHS);
5200
185
    });
5201
185
  }
5202
446
  void compileVectorVectorFSub(LLVM::Type VectorTy) noexcept {
5203
446
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5204
446
      return Builder.createFSub(LHS, RHS);
5205
446
    });
5206
446
  }
5207
181
  void compileVectorVectorFMul(LLVM::Type VectorTy) noexcept {
5208
181
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5209
181
      return Builder.createFMul(LHS, RHS);
5210
181
    });
5211
181
  }
5212
222
  void compileVectorVectorFDiv(LLVM::Type VectorTy) noexcept {
5213
222
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5214
222
      return Builder.createFDiv(LHS, RHS);
5215
222
    });
5216
222
  }
5217
284
  void compileVectorVectorFMin(LLVM::Type VectorTy) noexcept {
5218
284
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5219
284
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5220
284
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5221
284
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5222
284
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5223
284
      auto Ret = Builder.createBitCast(
5224
284
          Builder.createOr(Builder.createBitCast(LHS, Context.Int64x2Ty),
5225
284
                           Builder.createBitCast(RHS, Context.Int64x2Ty)),
5226
284
          LHS.getType());
5227
284
      Ret = Builder.createSelect(OGT, RHS, Ret);
5228
284
      Ret = Builder.createSelect(OLT, LHS, Ret);
5229
284
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5230
284
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5231
284
      return Ret;
5232
284
    });
5233
284
  }
5234
235
  void compileVectorVectorFMax(LLVM::Type VectorTy) noexcept {
5235
235
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5236
235
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5237
235
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5238
235
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5239
235
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5240
235
      auto Ret = Builder.createBitCast(
5241
235
          Builder.createAnd(Builder.createBitCast(LHS, Context.Int64x2Ty),
5242
235
                            Builder.createBitCast(RHS, Context.Int64x2Ty)),
5243
235
          LHS.getType());
5244
235
      Ret = Builder.createSelect(OLT, RHS, Ret);
5245
235
      Ret = Builder.createSelect(OGT, LHS, Ret);
5246
235
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5247
235
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5248
235
      return Ret;
5249
235
    });
5250
235
  }
5251
299
  void compileVectorVectorFPMin(LLVM::Type VectorTy) noexcept {
5252
299
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5253
299
      auto Cmp = Builder.createFCmpOLT(RHS, LHS);
5254
299
      return Builder.createSelect(Cmp, RHS, LHS);
5255
299
    });
5256
299
  }
5257
314
  void compileVectorVectorFPMax(LLVM::Type VectorTy) noexcept {
5258
314
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5259
314
      auto Cmp = Builder.createFCmpOGT(RHS, LHS);
5260
314
      return Builder.createSelect(Cmp, RHS, LHS);
5261
314
    });
5262
314
  }
5263
950
  void compileVectorTruncSatS32(LLVM::Type VectorTy, bool PadZero) noexcept {
5264
950
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5265
950
      const auto Size = VectorTy.getVectorSize();
5266
950
      auto FPTy = VectorTy.getElementType();
5267
950
      auto IntMin = LLContext.getInt32(
5268
950
          static_cast<uint32_t>(std::numeric_limits<int32_t>::min()));
5269
950
      auto IntMax = LLContext.getInt32(
5270
950
          static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
5271
950
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5272
950
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5273
950
      auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5274
950
      auto FPMin = Builder.createSIToFP(IntMin, FPTy);
5275
950
      auto FPMax = Builder.createSIToFP(IntMax, FPTy);
5276
950
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5277
950
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5278
5279
950
      auto Normal = Builder.createFCmpORD(V, V);
5280
950
      auto NotUnder = Builder.createFCmpUGE(V, FPMinV);
5281
950
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5282
950
      V = Builder.createFPToSI(
5283
950
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5284
950
      V = Builder.createSelect(Normal, V, IntZeroV);
5285
950
      V = Builder.createSelect(NotUnder, V, IntMinV);
5286
950
      V = Builder.createSelect(NotOver, V, IntMaxV);
5287
950
      if (PadZero) {
5288
740
        std::vector<uint32_t> Mask(Size * 2);
5289
740
        std::iota(Mask.begin(), Mask.end(), 0);
5290
740
        V = Builder.createShuffleVector(
5291
740
            V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5292
740
      }
5293
950
      return V;
5294
950
    });
5295
950
  }
5296
5.79k
  void compileVectorTruncSatU32(LLVM::Type VectorTy, bool PadZero) noexcept {
5297
5.79k
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5298
5.79k
      const auto Size = VectorTy.getVectorSize();
5299
5.79k
      auto FPTy = VectorTy.getElementType();
5300
5.79k
      auto IntMin = LLContext.getInt32(std::numeric_limits<uint32_t>::min());
5301
5.79k
      auto IntMax = LLContext.getInt32(std::numeric_limits<uint32_t>::max());
5302
5.79k
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5303
5.79k
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5304
5.79k
      auto FPMin = Builder.createUIToFP(IntMin, FPTy);
5305
5.79k
      auto FPMax = Builder.createUIToFP(IntMax, FPTy);
5306
5.79k
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5307
5.79k
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5308
5309
5.79k
      auto NotUnder = Builder.createFCmpOGE(V, FPMinV);
5310
5.79k
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5311
5.79k
      V = Builder.createFPToUI(
5312
5.79k
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5313
5.79k
      V = Builder.createSelect(NotUnder, V, IntMinV);
5314
5.79k
      V = Builder.createSelect(NotOver, V, IntMaxV);
5315
5.79k
      if (PadZero) {
5316
2.11k
        auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5317
2.11k
        std::vector<uint32_t> Mask(Size * 2);
5318
2.11k
        std::iota(Mask.begin(), Mask.end(), 0);
5319
2.11k
        V = Builder.createShuffleVector(
5320
2.11k
            V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5321
2.11k
      }
5322
5.79k
      return V;
5323
5.79k
    });
5324
5.79k
  }
5325
  void compileVectorConvertS(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5326
644
                             bool Low) noexcept {
5327
644
    compileVectorOp(VectorTy,
5328
644
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5329
644
                      if (Low) {
5330
329
                        const auto Size = VectorTy.getVectorSize() / 2;
5331
329
                        std::vector<uint32_t> Mask(Size);
5332
329
                        std::iota(Mask.begin(), Mask.end(), 0);
5333
329
                        V = Builder.createShuffleVector(
5334
329
                            V, LLVM::Value::getUndef(VectorTy),
5335
329
                            LLVM::Value::getConstVector32(LLContext, Mask));
5336
329
                      }
5337
644
                      return Builder.createSIToFP(V, FPVectorTy);
5338
644
                    });
5339
644
  }
5340
  void compileVectorConvertU(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5341
1.94k
                             bool Low) noexcept {
5342
1.94k
    compileVectorOp(VectorTy,
5343
1.94k
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5344
1.94k
                      if (Low) {
5345
1.23k
                        const auto Size = VectorTy.getVectorSize() / 2;
5346
1.23k
                        std::vector<uint32_t> Mask(Size);
5347
1.23k
                        std::iota(Mask.begin(), Mask.end(), 0);
5348
1.23k
                        V = Builder.createShuffleVector(
5349
1.23k
                            V, LLVM::Value::getUndef(VectorTy),
5350
1.23k
                            LLVM::Value::getConstVector32(LLContext, Mask));
5351
1.23k
                      }
5352
1.94k
                      return Builder.createUIToFP(V, FPVectorTy);
5353
1.94k
                    });
5354
1.94k
  }
5355
563
  void compileVectorDemote() noexcept {
5356
563
    compileVectorOp(Context.Doublex2Ty, [this](auto V) noexcept {
5357
563
      auto Demoted = Builder.createFPTrunc(
5358
563
          V, LLVM::Type::getVectorType(Context.FloatTy, 2));
5359
563
      auto ZeroV = LLVM::Value::getConstNull(Demoted.getType());
5360
563
      return Builder.createShuffleVector(
5361
563
          Demoted, ZeroV,
5362
563
          LLVM::Value::getConstVector32(LLContext, {0u, 1u, 2u, 3u}));
5363
563
    });
5364
563
  }
5365
553
  void compileVectorPromote() noexcept {
5366
553
    compileVectorOp(Context.Floatx4Ty, [this](auto V) noexcept {
5367
553
      auto UndefV = LLVM::Value::getUndef(V.getType());
5368
553
      auto Low = Builder.createShuffleVector(
5369
553
          V, UndefV, LLVM::Value::getConstVector32(LLContext, {0u, 1u}));
5370
553
      return Builder.createFPExt(
5371
553
          Low, LLVM::Type::getVectorType(Context.DoubleTy, 2));
5372
553
    });
5373
553
  }
5374
5375
0
  void compileVectorVectorMAdd(LLVM::Type VectorTy) noexcept {
5376
0
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5377
0
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5378
0
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5379
0
    stackPush(Builder.createBitCast(
5380
0
        Builder.createFAdd(Builder.createFMul(LHS, RHS), C),
5381
0
        Context.Int64x2Ty));
5382
0
  }
5383
5384
0
  void compileVectorVectorNMAdd(LLVM::Type VectorTy) noexcept {
5385
0
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5386
0
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5387
0
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5388
0
    stackPush(Builder.createBitCast(
5389
0
        Builder.createFAdd(Builder.createFMul(Builder.createFNeg(LHS), RHS), C),
5390
0
        Context.Int64x2Ty));
5391
0
  }
5392
5393
0
  void compileVectorRelaxedIntegerDotProduct() noexcept {
5394
0
    auto OriTy = Context.Int8x16Ty;
5395
0
    auto ExtTy = Context.Int16x8Ty;
5396
0
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5397
0
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5398
0
#if defined(__x86_64__)
5399
0
    if (Context.SupportSSSE3) {
5400
0
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5401
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5402
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5403
      // side to match the WebAssembly spec
5404
0
      return stackPush(Builder.createBitCast(
5405
0
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5406
0
                                  {RHS, LHS}),
5407
0
          Context.Int64x2Ty));
5408
0
    }
5409
0
#endif
5410
0
    auto Width = LLVM::Value::getConstInt(
5411
0
        ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5412
0
    Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5413
0
    auto EA = Builder.createBitCast(LHS, ExtTy);
5414
0
    auto EB = Builder.createBitCast(RHS, ExtTy);
5415
5416
0
    LLVM::Value AL, AR, BL, BR;
5417
0
    AL = Builder.createAShr(EA, Width);
5418
0
    AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5419
0
    BL = Builder.createAShr(EB, Width);
5420
0
    BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5421
5422
0
    return stackPush(Builder.createBitCast(
5423
0
        Builder.createAdd(Builder.createMul(AL, BL), Builder.createMul(AR, BR)),
5424
0
        Context.Int64x2Ty));
5425
0
  }
5426
5427
0
  void compileVectorRelaxedIntegerDotProductAdd() noexcept {
5428
0
    auto OriTy = Context.Int8x16Ty;
5429
0
    auto ExtTy = Context.Int16x8Ty;
5430
0
    auto FinTy = Context.Int32x4Ty;
5431
0
    auto VC = Builder.createBitCast(stackPop(), FinTy);
5432
0
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5433
0
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5434
0
    LLVM::Value IM;
5435
0
#if defined(__x86_64__)
5436
0
    if (Context.SupportSSSE3) {
5437
0
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5438
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5439
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5440
      // side to match the WebAssembly spec
5441
0
      IM = Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5442
0
                                   {RHS, LHS});
5443
0
    } else
5444
0
#endif
5445
0
    {
5446
0
      auto Width = LLVM::Value::getConstInt(
5447
0
          ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5448
0
      Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5449
0
      auto EA = Builder.createBitCast(LHS, ExtTy);
5450
0
      auto EB = Builder.createBitCast(RHS, ExtTy);
5451
5452
0
      LLVM::Value AL, AR, BL, BR;
5453
0
      AL = Builder.createAShr(EA, Width);
5454
0
      AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5455
0
      BL = Builder.createAShr(EB, Width);
5456
0
      BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5457
0
      IM = Builder.createAdd(Builder.createMul(AL, BL),
5458
0
                             Builder.createMul(AR, BR));
5459
0
    }
5460
5461
0
    auto Width = LLVM::Value::getConstInt(
5462
0
        FinTy.getElementType(), ExtTy.getElementType().getIntegerBitWidth());
5463
0
    Width = Builder.createVectorSplat(FinTy.getVectorSize(), Width);
5464
0
    auto IME = Builder.createBitCast(IM, FinTy);
5465
0
    auto L = Builder.createAShr(IME, Width);
5466
0
    auto R = Builder.createAShr(Builder.createShl(IME, Width), Width);
5467
5468
0
    return stackPush(Builder.createBitCast(
5469
0
        Builder.createAdd(Builder.createAdd(L, R), VC), Context.Int64x2Ty));
5470
0
  }
5471
5472
  void
5473
  enterBlock(LLVM::BasicBlock JumpBlock, LLVM::BasicBlock NextBlock,
5474
             LLVM::BasicBlock ElseBlock, std::vector<LLVM::Value> Args,
5475
             std::pair<std::vector<ValType>, std::vector<ValType>> Type,
5476
             std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5477
18.9k
                 ReturnPHI = {}) noexcept {
5478
18.9k
    assuming(Type.first.size() == Args.size());
5479
18.9k
    for (auto &Value : Args) {
5480
3.89k
      stackPush(Value);
5481
3.89k
    }
5482
18.9k
    const auto Unreachable = isUnreachable();
5483
18.9k
    ControlStack.emplace_back(Stack.size() - Args.size(), Unreachable,
5484
18.9k
                              JumpBlock, NextBlock, ElseBlock, std::move(Args),
5485
18.9k
                              std::move(Type), std::move(ReturnPHI));
5486
18.9k
  }
5487
5488
18.9k
  Control leaveBlock() noexcept {
5489
18.9k
    Control Entry = std::move(ControlStack.back());
5490
18.9k
    ControlStack.pop_back();
5491
5492
18.9k
    auto NextBlock = Entry.NextBlock ? Entry.NextBlock : Entry.JumpBlock;
5493
18.9k
    if (!Entry.Unreachable) {
5494
12.0k
      const auto &ReturnType = Entry.Type.second;
5495
12.0k
      if (!ReturnType.empty()) {
5496
9.15k
        std::vector<LLVM::Value> Rets(ReturnType.size());
5497
18.6k
        for (size_t I = 0; I < Rets.size(); ++I) {
5498
9.52k
          const size_t J = Rets.size() - 1 - I;
5499
9.52k
          Rets[J] = stackPop();
5500
9.52k
        }
5501
9.15k
        Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5502
9.15k
      }
5503
12.0k
      Builder.createBr(NextBlock);
5504
12.0k
    } else {
5505
6.89k
      Builder.createUnreachable();
5506
6.89k
    }
5507
18.9k
    Builder.positionAtEnd(NextBlock);
5508
18.9k
    Stack.erase(Stack.begin() + static_cast<int64_t>(Entry.StackSize),
5509
18.9k
                Stack.end());
5510
18.9k
    return Entry;
5511
18.9k
  }
5512
5513
4.70k
  void checkStop() noexcept {
5514
4.70k
    if (!Interruptible) {
5515
4.70k
      return;
5516
4.70k
    }
5517
0
    auto NotStopBB = LLVM::BasicBlock::create(LLContext, F.Fn, "NotStop");
5518
0
    auto StopToken = Builder.createAtomicRMW(
5519
0
        LLVMAtomicRMWBinOpXchg, Context.getStopToken(Builder, ExecCtx),
5520
0
        LLContext.getInt32(0), LLVMAtomicOrderingMonotonic);
5521
#if LLVM_VERSION_MAJOR >= 13
5522
    StopToken.setAlignment(32);
5523
#endif
5524
0
    auto NotStop = Builder.createLikely(
5525
0
        Builder.createICmpEQ(StopToken, LLContext.getInt32(0)));
5526
0
    Builder.createCondBr(NotStop, NotStopBB,
5527
0
                         getTrapBB(ErrCode::Value::Interrupted));
5528
5529
0
    Builder.positionAtEnd(NotStopBB);
5530
0
  }
5531
5532
5.06k
  void setUnreachable() noexcept {
5533
5.06k
    if (ControlStack.empty()) {
5534
0
      IsUnreachable = true;
5535
5.06k
    } else {
5536
5.06k
      ControlStack.back().Unreachable = true;
5537
5.06k
    }
5538
5.06k
  }
5539
5540
1.44M
  bool isUnreachable() const noexcept {
5541
1.44M
    if (ControlStack.empty()) {
5542
9.83k
      return IsUnreachable;
5543
1.43M
    } else {
5544
1.43M
      return ControlStack.back().Unreachable;
5545
1.43M
    }
5546
1.44M
  }
5547
5548
  void
5549
  buildPHI(Span<const ValType> RetType,
5550
           Span<const std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5551
16.7k
               Incomings) noexcept {
5552
16.7k
    if (isVoidReturn(RetType)) {
5553
5.06k
      return;
5554
5.06k
    }
5555
11.6k
    std::vector<LLVM::Value> Nodes;
5556
11.6k
    if (Incomings.size() == 0) {
5557
2.31k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5558
2.31k
      Nodes.reserve(Types.size());
5559
2.62k
      for (LLVM::Type Type : Types) {
5560
2.62k
        Nodes.push_back(LLVM::Value::getUndef(Type));
5561
2.62k
      }
5562
9.36k
    } else if (Incomings.size() == 1) {
5563
8.36k
      Nodes = std::move(std::get<0>(Incomings.front()));
5564
8.36k
    } else {
5565
993
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5566
993
      Nodes.reserve(Types.size());
5567
2.07k
      for (size_t I = 0; I < Types.size(); ++I) {
5568
1.08k
        auto PHIRet = Builder.createPHI(Types[I]);
5569
2.91k
        for (auto &[Value, BB] : Incomings) {
5570
2.91k
          assuming(Value.size() == Types.size());
5571
2.91k
          PHIRet.addIncoming(Value[I], BB);
5572
2.91k
        }
5573
1.08k
        Nodes.push_back(PHIRet);
5574
1.08k
      }
5575
993
    }
5576
12.3k
    for (auto &Val : Nodes) {
5577
12.3k
      stackPush(Val);
5578
12.3k
    }
5579
11.6k
  }
5580
5581
37.7k
  void setLableJumpPHI(unsigned int Index) noexcept {
5582
37.7k
    assuming(Index < ControlStack.size());
5583
37.7k
    auto &Entry = *(ControlStack.rbegin() + Index);
5584
37.7k
    if (Entry.NextBlock) { // is loop
5585
2.44k
      std::vector<LLVM::Value> Args(Entry.Type.first.size());
5586
4.26k
      for (size_t I = 0; I < Args.size(); ++I) {
5587
1.81k
        const size_t J = Args.size() - 1 - I;
5588
1.81k
        Args[J] = stackPop();
5589
1.81k
      }
5590
4.26k
      for (size_t I = 0; I < Args.size(); ++I) {
5591
1.81k
        Entry.Args[I].addIncoming(Args[I], Builder.getInsertBlock());
5592
1.81k
        stackPush(Args[I]);
5593
1.81k
      }
5594
35.2k
    } else if (!Entry.Type.second.empty()) { // has return value
5595
1.94k
      std::vector<LLVM::Value> Rets(Entry.Type.second.size());
5596
4.00k
      for (size_t I = 0; I < Rets.size(); ++I) {
5597
2.06k
        const size_t J = Rets.size() - 1 - I;
5598
2.06k
        Rets[J] = stackPop();
5599
2.06k
      }
5600
4.00k
      for (size_t I = 0; I < Rets.size(); ++I) {
5601
2.06k
        stackPush(Rets[I]);
5602
2.06k
      }
5603
1.94k
      Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5604
1.94k
    }
5605
37.7k
  }
5606
5607
37.7k
  LLVM::BasicBlock getLabel(unsigned int Index) const noexcept {
5608
37.7k
    return (ControlStack.rbegin() + Index)->JumpBlock;
5609
37.7k
  }
5610
5611
863k
  void stackPush(LLVM::Value Value) noexcept { Stack.push_back(Value); }
5612
330k
  LLVM::Value stackPop() noexcept {
5613
330k
    assuming(!ControlStack.empty() || !Stack.empty());
5614
330k
    assuming(ControlStack.empty() ||
5615
330k
             Stack.size() > ControlStack.back().StackSize);
5616
330k
    auto Value = Stack.back();
5617
330k
    Stack.pop_back();
5618
330k
    return Value;
5619
330k
  }
5620
5621
  LLVM::Compiler::CompileContext &Context;
5622
  LLVM::Context LLContext;
5623
  std::vector<std::pair<LLVM::Type, LLVM::Value>> Local;
5624
  std::vector<LLVM::Value> Stack;
5625
  LLVM::Value LocalInstrCount = nullptr;
5626
  LLVM::Value LocalGas = nullptr;
5627
  std::unordered_map<ErrCode::Value, LLVM::BasicBlock> TrapBB;
5628
  bool IsUnreachable = false;
5629
  bool Interruptible = false;
5630
  struct Control {
5631
    size_t StackSize;
5632
    bool Unreachable;
5633
    LLVM::BasicBlock JumpBlock;
5634
    LLVM::BasicBlock NextBlock;
5635
    LLVM::BasicBlock ElseBlock;
5636
    std::vector<LLVM::Value> Args;
5637
    std::pair<std::vector<ValType>, std::vector<ValType>> Type;
5638
    std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5639
        ReturnPHI;
5640
    Control(size_t S, bool U, LLVM::BasicBlock J, LLVM::BasicBlock N,
5641
            LLVM::BasicBlock E, std::vector<LLVM::Value> A,
5642
            std::pair<std::vector<ValType>, std::vector<ValType>> T,
5643
            std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5644
                R) noexcept
5645
18.9k
        : StackSize(S), Unreachable(U), JumpBlock(J), NextBlock(N),
5646
18.9k
          ElseBlock(E), Args(std::move(A)), Type(std::move(T)),
5647
18.9k
          ReturnPHI(std::move(R)) {}
5648
    Control(const Control &) = default;
5649
23.5k
    Control(Control &&) = default;
5650
    Control &operator=(const Control &) = default;
5651
894
    Control &operator=(Control &&) = default;
5652
  };
5653
  std::vector<Control> ControlStack;
5654
  LLVM::FunctionCallee F;
5655
  LLVM::Value ExecCtx;
5656
  LLVM::Builder Builder;
5657
};
5658
5659
std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
5660
362
                                      LLVM::Value Struct) noexcept {
5661
362
  const auto N = Struct.getType().getStructNumElements();
5662
362
  std::vector<LLVM::Value> Ret;
5663
362
  Ret.reserve(N);
5664
1.29k
  for (unsigned I = 0; I < N; ++I) {
5665
934
    Ret.push_back(Builder.createExtractValue(Struct, I));
5666
934
  }
5667
362
  return Ret;
5668
362
}
5669
5670
} // namespace
5671
5672
namespace WasmEdge {
5673
namespace LLVM {
5674
5675
1.96k
Expect<void> Compiler::checkConfigure() noexcept {
5676
1.96k
  if (Conf.hasProposal(Proposal::ExceptionHandling)) {
5677
0
    spdlog::error(ErrCode::Value::InvalidConfigure);
5678
0
    spdlog::error(
5679
0
        "    Proposal ExceptionHandling is not yet supported in LLVM backend");
5680
0
    return Unexpect(ErrCode::Value::InvalidConfigure);
5681
0
  }
5682
1.96k
  return {};
5683
1.96k
}
5684
5685
1.96k
Expect<Data> Compiler::compile(const AST::Module &Module) noexcept {
5686
  // Check the module is validated.
5687
1.96k
  if (unlikely(!Module.getIsValidated())) {
5688
0
    spdlog::error(ErrCode::Value::NotValidated);
5689
0
    return Unexpect(ErrCode::Value::NotValidated);
5690
0
  }
5691
5692
1.96k
  std::unique_lock Lock(Mutex);
5693
1.96k
  spdlog::info("compile start"sv);
5694
5695
1.96k
  LLVM::Core::init();
5696
5697
1.96k
  LLVM::Data D;
5698
1.96k
  auto LLContext = D.extract().LLContext();
5699
1.96k
  auto &LLModule = D.extract().LLModule;
5700
1.96k
  LLModule.setTarget(LLVM::getDefaultTargetTriple().unwrap());
5701
1.96k
  LLModule.addFlag(LLVMModuleFlagBehaviorError, "PIC Level"sv, 2);
5702
5703
1.96k
  CompileContext NewContext(LLContext, LLModule,
5704
1.96k
                            Conf.getCompilerConfigure().isGenericBinary());
5705
1.96k
  struct RAIICleanup {
5706
1.96k
    RAIICleanup(CompileContext *&Context, CompileContext &NewContext)
5707
1.96k
        : Context(Context) {
5708
1.96k
      Context = &NewContext;
5709
1.96k
    }
5710
1.96k
    ~RAIICleanup() { Context = nullptr; }
5711
1.96k
    CompileContext *&Context;
5712
1.96k
  };
5713
1.96k
  RAIICleanup Cleanup(Context, NewContext);
5714
5715
  // Compile Function Types
5716
1.96k
  compile(Module.getTypeSection());
5717
  // Compile ImportSection
5718
1.96k
  compile(Module.getImportSection());
5719
  // Compile GlobalSection
5720
1.96k
  compile(Module.getGlobalSection());
5721
  // Compile MemorySection (MemorySec, DataSec)
5722
1.96k
  compile(Module.getMemorySection(), Module.getDataSection());
5723
  // Compile TableSection (TableSec, ElemSec)
5724
1.96k
  compile(Module.getTableSection(), Module.getElementSection());
5725
  // compile Functions in module. (FunctionSec, CodeSec)
5726
1.96k
  compile(Module.getFunctionSection(), Module.getCodeSection());
5727
  // Compile ExportSection
5728
1.96k
  compile(Module.getExportSection());
5729
  // StartSection is not required to compile
5730
5731
1.96k
  spdlog::info("verify start"sv);
5732
1.96k
  LLModule.verify(LLVMPrintMessageAction);
5733
5734
1.96k
  spdlog::info("optimize start"sv);
5735
1.96k
  auto &TM = D.extract().TM;
5736
1.96k
  {
5737
1.96k
    auto Triple = LLModule.getTarget();
5738
1.96k
    auto [TheTarget, ErrorMessage] = LLVM::Target::getFromTriple(Triple);
5739
1.96k
    if (ErrorMessage) {
5740
0
      spdlog::error("getFromTriple failed:{}"sv, ErrorMessage.string_view());
5741
0
      return Unexpect(ErrCode::Value::IllegalPath);
5742
1.96k
    } else {
5743
1.96k
      std::string CPUName;
5744
#if defined(__riscv) && __riscv_xlen == 64
5745
      CPUName = "generic-rv64"s;
5746
#else
5747
1.96k
      if (!Conf.getCompilerConfigure().isGenericBinary()) {
5748
1.96k
        CPUName = LLVM::getHostCPUName().string_view();
5749
1.96k
      } else {
5750
0
        CPUName = "generic"s;
5751
0
      }
5752
1.96k
#endif
5753
5754
1.96k
      TM = LLVM::TargetMachine::create(
5755
1.96k
          TheTarget, Triple, CPUName.c_str(),
5756
1.96k
          LLVM::getHostCPUFeatures().unwrap(),
5757
1.96k
          toLLVMCodeGenLevel(
5758
1.96k
              Conf.getCompilerConfigure().getOptimizationLevel()),
5759
1.96k
          LLVMRelocPIC, LLVMCodeModelDefault);
5760
1.96k
    }
5761
5762
#if LLVM_VERSION_MAJOR >= 13
5763
    auto PBO = LLVM::PassBuilderOptions::create();
5764
    if (auto Error = PBO.runPasses(
5765
            LLModule,
5766
            toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel()),
5767
            TM)) {
5768
      spdlog::error("{}"sv, Error.message().string_view());
5769
    }
5770
#else
5771
1.96k
    auto FP = LLVM::PassManager::createForModule(LLModule);
5772
1.96k
    auto MP = LLVM::PassManager::create();
5773
5774
1.96k
    TM.addAnalysisPasses(MP);
5775
1.96k
    TM.addAnalysisPasses(FP);
5776
1.96k
    {
5777
1.96k
      auto PMB = LLVM::PassManagerBuilder::create();
5778
1.96k
      auto [OptLevel, SizeLevel] =
5779
1.96k
          toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel());
5780
1.96k
      PMB.setOptLevel(OptLevel);
5781
1.96k
      PMB.setSizeLevel(SizeLevel);
5782
1.96k
      PMB.populateFunctionPassManager(FP);
5783
1.96k
      PMB.populateModulePassManager(MP);
5784
1.96k
    }
5785
1.96k
    switch (Conf.getCompilerConfigure().getOptimizationLevel()) {
5786
0
    case CompilerConfigure::OptimizationLevel::O0:
5787
0
    case CompilerConfigure::OptimizationLevel::O1:
5788
0
      FP.addTailCallEliminationPass();
5789
0
      break;
5790
1.96k
    default:
5791
1.96k
      break;
5792
1.96k
    }
5793
5794
1.96k
    FP.initializeFunctionPassManager();
5795
21.7k
    for (auto Fn = LLModule.getFirstFunction(); Fn; Fn = Fn.getNextFunction()) {
5796
19.8k
      FP.runFunctionPassManager(Fn);
5797
19.8k
    }
5798
1.96k
    FP.finalizeFunctionPassManager();
5799
1.96k
    MP.runPassManager(LLModule);
5800
1.96k
#endif
5801
1.96k
  }
5802
5803
  // Set initializer for constant value
5804
1.96k
  if (auto IntrinsicsTable = LLModule.getNamedGlobal("intrinsics")) {
5805
1.12k
    IntrinsicsTable.setInitializer(
5806
1.12k
        LLVM::Value::getConstNull(IntrinsicsTable.getType()));
5807
1.12k
    IntrinsicsTable.setGlobalConstant(false);
5808
1.12k
  } else {
5809
843
    auto IntrinsicsTableTy = LLVM::Type::getArrayType(
5810
843
        LLContext.getInt8Ty().getPointerTo(),
5811
843
        static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax));
5812
843
    LLModule.addGlobal(
5813
843
        IntrinsicsTableTy.getPointerTo(), false, LLVMExternalLinkage,
5814
843
        LLVM::Value::getConstNull(IntrinsicsTableTy), "intrinsics");
5815
843
  }
5816
5817
1.96k
  spdlog::info("optimize done"sv);
5818
1.96k
  return Expect<Data>{std::move(D)};
5819
1.96k
}
5820
5821
1.96k
void Compiler::compile(const AST::TypeSection &TypeSec) noexcept {
5822
1.96k
  auto WrapperTy =
5823
1.96k
      LLVM::Type::getFunctionType(Context->VoidTy,
5824
1.96k
                                  {Context->ExecCtxPtrTy, Context->Int8PtrTy,
5825
1.96k
                                   Context->Int8PtrTy, Context->Int8PtrTy},
5826
1.96k
                                  false);
5827
1.96k
  auto SubTypes = TypeSec.getContent();
5828
1.96k
  const auto Size = SubTypes.size();
5829
1.96k
  if (Size == 0) {
5830
110
    return;
5831
110
  }
5832
1.85k
  Context->CompositeTypes.reserve(Size);
5833
1.85k
  Context->FunctionWrappers.reserve(Size);
5834
5835
  // Iterate and compile types.
5836
5.68k
  for (size_t I = 0; I < Size; ++I) {
5837
3.83k
    const auto &CompType = SubTypes[I].getCompositeType();
5838
3.83k
    const auto Name = fmt::format("t{}"sv, Context->CompositeTypes.size());
5839
3.83k
    if (CompType.isFunc()) {
5840
      // Check function type is unique
5841
3.83k
      {
5842
3.83k
        bool Unique = true;
5843
13.7k
        for (size_t J = 0; J < I; ++J) {
5844
10.0k
          if (Context->CompositeTypes[J] &&
5845
10.0k
              Context->CompositeTypes[J]->isFunc()) {
5846
10.0k
            const auto &OldFuncType = Context->CompositeTypes[J]->getFuncType();
5847
10.0k
            if (OldFuncType == CompType.getFuncType()) {
5848
123
              Unique = false;
5849
123
              Context->CompositeTypes.push_back(Context->CompositeTypes[J]);
5850
123
              auto F = Context->FunctionWrappers[J];
5851
123
              Context->FunctionWrappers.push_back(F);
5852
123
              auto A = Context->LLModule.addAlias(WrapperTy, F, Name.c_str());
5853
123
              A.setLinkage(LLVMExternalLinkage);
5854
123
              A.setVisibility(LLVMProtectedVisibility);
5855
123
              A.setDSOLocal(true);
5856
123
              A.setDLLStorageClass(LLVMDLLExportStorageClass);
5857
123
              break;
5858
123
            }
5859
10.0k
          }
5860
10.0k
        }
5861
3.83k
        if (!Unique) {
5862
123
          continue;
5863
123
        }
5864
3.83k
      }
5865
5866
      // Create Wrapper
5867
3.71k
      auto F = Context->LLModule.addFunction(WrapperTy, LLVMExternalLinkage,
5868
3.71k
                                             Name.c_str());
5869
3.71k
      {
5870
3.71k
        F.setVisibility(LLVMProtectedVisibility);
5871
3.71k
        F.setDSOLocal(true);
5872
3.71k
        F.setDLLStorageClass(LLVMDLLExportStorageClass);
5873
3.71k
        F.addFnAttr(Context->NoStackArgProbe);
5874
3.71k
        F.addFnAttr(Context->StrictFP);
5875
3.71k
        F.addFnAttr(Context->UWTable);
5876
3.71k
        F.addParamAttr(0, Context->ReadOnly);
5877
3.71k
        F.addParamAttr(0, Context->NoAlias);
5878
3.71k
        F.addParamAttr(1, Context->NoAlias);
5879
3.71k
        F.addParamAttr(2, Context->NoAlias);
5880
3.71k
        F.addParamAttr(3, Context->NoAlias);
5881
5882
3.71k
        LLVM::Builder Builder(Context->LLContext);
5883
3.71k
        Builder.positionAtEnd(
5884
3.71k
            LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
5885
5886
3.71k
        auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy,
5887
3.71k
                              CompType.getFuncType());
5888
3.71k
        auto RTy = FTy.getReturnType();
5889
3.71k
        std::vector<LLVM::Type> FPTy(FTy.getNumParams());
5890
3.71k
        FTy.getParamTypes(FPTy);
5891
5892
3.71k
        const size_t ArgCount = FPTy.size() - 1;
5893
3.71k
        auto ExecCtxPtr = F.getFirstParam();
5894
3.71k
        auto RawFunc = LLVM::FunctionCallee{
5895
3.71k
            FTy, Builder.createBitCast(ExecCtxPtr.getNextParam(),
5896
3.71k
                                       FTy.getPointerTo())};
5897
3.71k
        auto RawArgs = ExecCtxPtr.getNextParam().getNextParam();
5898
3.71k
        auto RawRets = RawArgs.getNextParam();
5899
5900
3.71k
        std::vector<LLVM::Value> Args;
5901
3.71k
        Args.reserve(FTy.getNumParams());
5902
3.71k
        Args.push_back(ExecCtxPtr);
5903
7.81k
        for (size_t J = 0; J < ArgCount; ++J) {
5904
4.09k
          Args.push_back(Builder.createValuePtrLoad(
5905
4.09k
              FPTy[J + 1], RawArgs, Context->Int8Ty, J * kValSize));
5906
4.09k
        }
5907
5908
3.71k
        auto Ret = Builder.createCall(RawFunc, Args);
5909
3.71k
        if (RTy.isVoidTy()) {
5910
          // nothing to do
5911
2.46k
        } else if (RTy.isStructTy()) {
5912
277
          auto Rets = unpackStruct(Builder, Ret);
5913
277
          Builder.createArrayPtrStore(Rets, RawRets, Context->Int8Ty, kValSize);
5914
2.18k
        } else {
5915
2.18k
          Builder.createValuePtrStore(Ret, RawRets, Context->Int8Ty);
5916
2.18k
        }
5917
3.71k
        Builder.createRetVoid();
5918
3.71k
      }
5919
      // Copy wrapper, param and return lists to module instance.
5920
3.71k
      Context->FunctionWrappers.push_back(F);
5921
3.71k
    } else {
5922
      // Non function type case. Create empty wrapper.
5923
0
      auto F = Context->LLModule.addFunction(WrapperTy, LLVMExternalLinkage,
5924
0
                                             Name.c_str());
5925
0
      {
5926
0
        F.setVisibility(LLVMProtectedVisibility);
5927
0
        F.setDSOLocal(true);
5928
0
        F.setDLLStorageClass(LLVMDLLExportStorageClass);
5929
0
        F.addFnAttr(Context->NoStackArgProbe);
5930
0
        F.addFnAttr(Context->StrictFP);
5931
0
        F.addFnAttr(Context->UWTable);
5932
0
        F.addParamAttr(0, Context->ReadOnly);
5933
0
        F.addParamAttr(0, Context->NoAlias);
5934
0
        F.addParamAttr(1, Context->NoAlias);
5935
0
        F.addParamAttr(2, Context->NoAlias);
5936
0
        F.addParamAttr(3, Context->NoAlias);
5937
5938
0
        LLVM::Builder Builder(Context->LLContext);
5939
0
        Builder.positionAtEnd(
5940
0
            LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
5941
0
        Builder.createRetVoid();
5942
0
      }
5943
0
      Context->FunctionWrappers.push_back(F);
5944
0
    }
5945
3.71k
    Context->CompositeTypes.push_back(&CompType);
5946
3.71k
  }
5947
1.85k
}
5948
5949
1.96k
void Compiler::compile(const AST::ImportSection &ImportSec) noexcept {
5950
  // Iterate and compile import descriptions.
5951
1.96k
  for (const auto &ImpDesc : ImportSec.getContent()) {
5952
    // Get data from import description.
5953
344
    const auto &ExtType = ImpDesc.getExternalType();
5954
5955
    // Add the imports into module instance.
5956
344
    switch (ExtType) {
5957
258
    case ExternalType::Function: // Function type index
5958
258
    {
5959
258
      const auto FuncID = static_cast<uint32_t>(Context->Functions.size());
5960
      // Get the function type index in module.
5961
258
      uint32_t TypeIdx = ImpDesc.getExternalFuncTypeIdx();
5962
258
      assuming(TypeIdx < Context->CompositeTypes.size());
5963
258
      assuming(Context->CompositeTypes[TypeIdx]->isFunc());
5964
258
      const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
5965
258
      auto FTy =
5966
258
          toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
5967
258
      auto RTy = FTy.getReturnType();
5968
258
      auto F = LLVM::FunctionCallee{
5969
258
          FTy,
5970
258
          Context->LLModule.addFunction(FTy, LLVMInternalLinkage,
5971
258
                                        fmt::format("f{}"sv, FuncID).c_str())};
5972
258
      F.Fn.setDSOLocal(true);
5973
258
      F.Fn.addFnAttr(Context->NoStackArgProbe);
5974
258
      F.Fn.addFnAttr(Context->StrictFP);
5975
258
      F.Fn.addFnAttr(Context->UWTable);
5976
258
      F.Fn.addParamAttr(0, Context->ReadOnly);
5977
258
      F.Fn.addParamAttr(0, Context->NoAlias);
5978
5979
258
      LLVM::Builder Builder(Context->LLContext);
5980
258
      Builder.positionAtEnd(
5981
258
          LLVM::BasicBlock::create(Context->LLContext, F.Fn, "entry"));
5982
5983
258
      const auto ArgSize = FuncType.getParamTypes().size();
5984
258
      const auto RetSize =
5985
258
          RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
5986
5987
258
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
5988
258
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
5989
5990
258
      auto Arg = F.Fn.getFirstParam();
5991
380
      for (unsigned I = 0; I < ArgSize; ++I) {
5992
122
        Arg = Arg.getNextParam();
5993
122
        Builder.createValuePtrStore(Arg, Args, Context->Int8Ty, I * kValSize);
5994
122
      }
5995
5996
258
      Builder.createCall(
5997
258
          Context->getIntrinsic(
5998
258
              Builder, Executable::Intrinsics::kCall,
5999
258
              LLVM::Type::getFunctionType(
6000
258
                  Context->VoidTy,
6001
258
                  {Context->Int32Ty, Context->Int8PtrTy, Context->Int8PtrTy},
6002
258
                  false)),
6003
258
          {Context->LLContext.getInt32(FuncID), Args, Rets});
6004
6005
258
      if (RetSize == 0) {
6006
146
        Builder.createRetVoid();
6007
146
      } else if (RetSize == 1) {
6008
85
        Builder.createRet(
6009
85
            Builder.createValuePtrLoad(RTy, Rets, Context->Int8Ty));
6010
85
      } else {
6011
27
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
6012
27
            RetSize, RTy, Rets, Context->Int8Ty, kValSize));
6013
27
      }
6014
6015
258
      Context->Functions.emplace_back(TypeIdx, F, nullptr);
6016
258
      break;
6017
258
    }
6018
41
    case ExternalType::Table: // Table type
6019
41
    {
6020
      // Nothing to do.
6021
41
      break;
6022
258
    }
6023
8
    case ExternalType::Memory: // Memory type
6024
8
    {
6025
      // Nothing to do.
6026
8
      break;
6027
258
    }
6028
37
    case ExternalType::Global: // Global type
6029
37
    {
6030
      // Get global type. External type checked in validation.
6031
37
      const auto &GlobType = ImpDesc.getExternalGlobalType();
6032
37
      const auto &ValType = GlobType.getValType();
6033
37
      auto Type = toLLVMType(Context->LLContext, ValType);
6034
37
      Context->Globals.push_back(Type);
6035
37
      break;
6036
258
    }
6037
0
    default:
6038
0
      break;
6039
344
    }
6040
344
  }
6041
1.96k
}
6042
6043
1.96k
void Compiler::compile(const AST::ExportSection &) noexcept {}
6044
6045
1.96k
void Compiler::compile(const AST::GlobalSection &GlobalSec) noexcept {
6046
1.96k
  for (const auto &GlobalSeg : GlobalSec.getContent()) {
6047
107
    const auto &ValType = GlobalSeg.getGlobalType().getValType();
6048
107
    auto Type = toLLVMType(Context->LLContext, ValType);
6049
107
    Context->Globals.push_back(Type);
6050
107
  }
6051
1.96k
}
6052
6053
void Compiler::compile(const AST::MemorySection &,
6054
1.96k
                       const AST::DataSection &) noexcept {}
6055
6056
void Compiler::compile(const AST::TableSection &,
6057
1.96k
                       const AST::ElementSection &) noexcept {}
6058
6059
void Compiler::compile(const AST::FunctionSection &FuncSec,
6060
1.96k
                       const AST::CodeSection &CodeSec) noexcept {
6061
1.96k
  const auto &TypeIdxs = FuncSec.getContent();
6062
1.96k
  const auto &CodeSegs = CodeSec.getContent();
6063
1.96k
  if (TypeIdxs.size() == 0 || CodeSegs.size() == 0) {
6064
189
    return;
6065
189
  }
6066
6067
11.6k
  for (size_t I = 0; I < TypeIdxs.size() && I < CodeSegs.size(); ++I) {
6068
9.83k
    const auto &TypeIdx = TypeIdxs[I];
6069
9.83k
    const auto &Code = CodeSegs[I];
6070
9.83k
    assuming(TypeIdx < Context->CompositeTypes.size());
6071
9.83k
    assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6072
9.83k
    const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6073
9.83k
    const auto FuncID = Context->Functions.size();
6074
9.83k
    auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6075
9.83k
    LLVM::FunctionCallee F = {FTy, Context->LLModule.addFunction(
6076
9.83k
                                       FTy, LLVMExternalLinkage,
6077
9.83k
                                       fmt::format("f{}"sv, FuncID).c_str())};
6078
9.83k
    F.Fn.setVisibility(LLVMProtectedVisibility);
6079
9.83k
    F.Fn.setDSOLocal(true);
6080
9.83k
    F.Fn.setDLLStorageClass(LLVMDLLExportStorageClass);
6081
9.83k
    F.Fn.addFnAttr(Context->NoStackArgProbe);
6082
9.83k
    F.Fn.addFnAttr(Context->StrictFP);
6083
9.83k
    F.Fn.addFnAttr(Context->UWTable);
6084
9.83k
    F.Fn.addParamAttr(0, Context->ReadOnly);
6085
9.83k
    F.Fn.addParamAttr(0, Context->NoAlias);
6086
6087
9.83k
    Context->Functions.emplace_back(TypeIdx, F, &Code);
6088
9.83k
  }
6089
6090
9.94k
  for (auto [T, F, Code] : Context->Functions) {
6091
9.94k
    if (!Code) {
6092
104
      continue;
6093
104
    }
6094
6095
9.83k
    std::vector<ValType> Locals;
6096
9.83k
    for (const auto &Local : Code->getLocals()) {
6097
2.43M
      for (unsigned I = 0; I < Local.first; ++I) {
6098
2.43M
        Locals.push_back(Local.second);
6099
2.43M
      }
6100
1.37k
    }
6101
9.83k
    FunctionCompiler FC(*Context, F, Locals,
6102
9.83k
                        Conf.getCompilerConfigure().isInterruptible(),
6103
9.83k
                        Conf.getStatisticsConfigure().isInstructionCounting(),
6104
9.83k
                        Conf.getStatisticsConfigure().isCostMeasuring());
6105
9.83k
    auto Type = Context->resolveBlockType(T);
6106
9.83k
    FC.compile(*Code, std::move(Type));
6107
9.83k
    F.Fn.eliminateUnreachableBlocks();
6108
9.83k
  }
6109
1.77k
}
6110
6111
} // namespace LLVM
6112
} // namespace WasmEdge