Coverage Report

Created: 2025-11-24 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/WasmEdge/lib/llvm/compiler.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: 2019-2024 Second State INC
3
4
#include "llvm/compiler.h"
5
6
#include "aot/version.h"
7
#include "common/defines.h"
8
#include "common/filesystem.h"
9
#include "common/spdlog.h"
10
#include "data.h"
11
#include "llvm.h"
12
#include "system/allocator.h"
13
14
#include <algorithm>
15
#include <array>
16
#include <cinttypes>
17
#include <cstdint>
18
#include <cstdlib>
19
#include <limits>
20
#include <memory>
21
#include <numeric>
22
#include <string>
23
#include <string_view>
24
#include <system_error>
25
26
namespace LLVM = WasmEdge::LLVM;
27
using namespace std::literals;
28
29
namespace {
30
31
static bool
32
isVoidReturn(WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
33
static LLVM::Type toLLVMType(LLVM::Context LLContext,
34
                             const WasmEdge::ValType &ValType) noexcept;
35
static std::vector<LLVM::Type>
36
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
37
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
38
static LLVM::Type
39
toLLVMRetsType(LLVM::Context LLContext,
40
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
41
static LLVM::Type
42
toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
43
           const WasmEdge::AST::FunctionType &FuncType) noexcept;
44
static LLVM::Value
45
toLLVMConstantZero(LLVM::Context LLContext,
46
                   const WasmEdge::ValType &ValType) noexcept;
47
static std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
48
                                             LLVM::Value Struct) noexcept;
49
class FunctionCompiler;
50
51
// XXX: Misalignment handler not implemented yet, forcing unalignment
52
// force unalignment load/store
53
static inline constexpr const bool kForceUnalignment = true;
54
55
// force checking div/rem on zero
56
static inline constexpr const bool kForceDivCheck = true;
57
58
// Size of a ValVariant
59
static inline constexpr const uint32_t kValSize = sizeof(WasmEdge::ValVariant);
60
61
// Translate Compiler::OptimizationLevel to llvm::PassBuilder version
62
#if LLVM_VERSION_MAJOR >= 13
63
static inline const char *
64
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
65
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
66
  switch (Level) {
67
  case OL::O0:
68
    return "default<O0>,function(tailcallelim)";
69
  case OL::O1:
70
    return "default<O1>,function(tailcallelim)";
71
  case OL::O2:
72
    return "default<O2>";
73
  case OL::O3:
74
    return "default<O3>";
75
  case OL::Os:
76
    return "default<Os>";
77
  case OL::Oz:
78
    return "default<Oz>";
79
  default:
80
    assumingUnreachable();
81
  }
82
}
83
#else
84
static inline std::pair<unsigned int, unsigned int>
85
2.32k
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
86
2.32k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
87
2.32k
  switch (Level) {
88
0
  case OL::O0:
89
0
    return {0, 0};
90
0
  case OL::O1:
91
0
    return {1, 0};
92
0
  case OL::O2:
93
0
    return {2, 0};
94
2.32k
  case OL::O3:
95
2.32k
    return {3, 0};
96
0
  case OL::Os:
97
0
    return {2, 1};
98
0
  case OL::Oz:
99
0
    return {2, 2};
100
0
  default:
101
0
    assumingUnreachable();
102
2.32k
  }
103
2.32k
}
104
#endif
105
106
static inline LLVMCodeGenOptLevel toLLVMCodeGenLevel(
107
2.32k
    WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
108
2.32k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
109
2.32k
  switch (Level) {
110
0
  case OL::O0:
111
0
    return LLVMCodeGenLevelNone;
112
0
  case OL::O1:
113
0
    return LLVMCodeGenLevelLess;
114
0
  case OL::O2:
115
0
    return LLVMCodeGenLevelDefault;
116
2.32k
  case OL::O3:
117
2.32k
    return LLVMCodeGenLevelAggressive;
118
0
  case OL::Os:
119
0
    return LLVMCodeGenLevelDefault;
120
0
  case OL::Oz:
121
0
    return LLVMCodeGenLevelDefault;
122
0
  default:
123
0
    assumingUnreachable();
124
2.32k
  }
125
2.32k
}
126
} // namespace
127
128
struct LLVM::Compiler::CompileContext {
129
  LLVM::Context LLContext;
130
  LLVM::Module &LLModule;
131
  LLVM::Attribute Cold;
132
  LLVM::Attribute NoAlias;
133
  LLVM::Attribute NoInline;
134
  LLVM::Attribute NoReturn;
135
  LLVM::Attribute ReadOnly;
136
  LLVM::Attribute StrictFP;
137
  LLVM::Attribute UWTable;
138
  LLVM::Attribute NoStackArgProbe;
139
  LLVM::Type VoidTy;
140
  LLVM::Type Int8Ty;
141
  LLVM::Type Int16Ty;
142
  LLVM::Type Int32Ty;
143
  LLVM::Type Int64Ty;
144
  LLVM::Type Int128Ty;
145
  LLVM::Type FloatTy;
146
  LLVM::Type DoubleTy;
147
  LLVM::Type Int8x16Ty;
148
  LLVM::Type Int16x8Ty;
149
  LLVM::Type Int32x4Ty;
150
  LLVM::Type Floatx4Ty;
151
  LLVM::Type Int64x2Ty;
152
  LLVM::Type Doublex2Ty;
153
  LLVM::Type Int128x1Ty;
154
  LLVM::Type Int8PtrTy;
155
  LLVM::Type Int32PtrTy;
156
  LLVM::Type Int64PtrTy;
157
  LLVM::Type Int128PtrTy;
158
  LLVM::Type Int8PtrPtrTy;
159
  LLVM::Type ExecCtxTy;
160
  LLVM::Type ExecCtxPtrTy;
161
  LLVM::Type IntrinsicsTableTy;
162
  LLVM::Type IntrinsicsTablePtrTy;
163
  LLVM::Message SubtargetFeatures;
164
165
#if defined(__x86_64__)
166
#if defined(__XOP__)
167
  bool SupportXOP = true;
168
#else
169
  bool SupportXOP = false;
170
#endif
171
172
#if defined(__SSE4_1__)
173
  bool SupportSSE4_1 = true;
174
#else
175
  bool SupportSSE4_1 = false;
176
#endif
177
178
#if defined(__SSSE3__)
179
  bool SupportSSSE3 = true;
180
#else
181
  bool SupportSSSE3 = false;
182
#endif
183
184
#if defined(__SSE2__)
185
  bool SupportSSE2 = true;
186
#else
187
  bool SupportSSE2 = false;
188
#endif
189
#endif
190
191
#if defined(__aarch64__)
192
#if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(__ARM_NEON_FP)
193
  bool SupportNEON = true;
194
#else
195
  bool SupportNEON = false;
196
#endif
197
#endif
198
199
  std::vector<const AST::CompositeType *> CompositeTypes;
200
  std::vector<LLVM::Value> FunctionWrappers;
201
  std::vector<std::tuple<uint32_t, LLVM::FunctionCallee,
202
                         const WasmEdge::AST::CodeSegment *>>
203
      Functions;
204
  std::vector<LLVM::Type> Globals;
205
  LLVM::Value IntrinsicsTable;
206
  LLVM::FunctionCallee Trap;
207
  CompileContext(LLVM::Context C, LLVM::Module &M,
208
                 bool IsGenericBinary) noexcept
209
2.33k
      : LLContext(C), LLModule(M),
210
2.33k
        Cold(LLVM::Attribute::createEnum(C, LLVM::Core::Cold, 0)),
211
2.33k
        NoAlias(LLVM::Attribute::createEnum(C, LLVM::Core::NoAlias, 0)),
212
2.33k
        NoInline(LLVM::Attribute::createEnum(C, LLVM::Core::NoInline, 0)),
213
2.33k
        NoReturn(LLVM::Attribute::createEnum(C, LLVM::Core::NoReturn, 0)),
214
2.33k
        ReadOnly(LLVM::Attribute::createEnum(C, LLVM::Core::ReadOnly, 0)),
215
2.33k
        StrictFP(LLVM::Attribute::createEnum(C, LLVM::Core::StrictFP, 0)),
216
2.33k
        UWTable(LLVM::Attribute::createEnum(C, LLVM::Core::UWTable,
217
2.33k
                                            LLVM::Core::UWTableDefault)),
218
        NoStackArgProbe(
219
2.33k
            LLVM::Attribute::createString(C, "no-stack-arg-probe"sv, {})),
220
2.33k
        VoidTy(LLContext.getVoidTy()), Int8Ty(LLContext.getInt8Ty()),
221
2.33k
        Int16Ty(LLContext.getInt16Ty()), Int32Ty(LLContext.getInt32Ty()),
222
2.33k
        Int64Ty(LLContext.getInt64Ty()), Int128Ty(LLContext.getInt128Ty()),
223
2.33k
        FloatTy(LLContext.getFloatTy()), DoubleTy(LLContext.getDoubleTy()),
224
2.33k
        Int8x16Ty(LLVM::Type::getVectorType(Int8Ty, 16)),
225
2.33k
        Int16x8Ty(LLVM::Type::getVectorType(Int16Ty, 8)),
226
2.33k
        Int32x4Ty(LLVM::Type::getVectorType(Int32Ty, 4)),
227
2.33k
        Floatx4Ty(LLVM::Type::getVectorType(FloatTy, 4)),
228
2.33k
        Int64x2Ty(LLVM::Type::getVectorType(Int64Ty, 2)),
229
2.33k
        Doublex2Ty(LLVM::Type::getVectorType(DoubleTy, 2)),
230
2.33k
        Int128x1Ty(LLVM::Type::getVectorType(Int128Ty, 1)),
231
2.33k
        Int8PtrTy(Int8Ty.getPointerTo()), Int32PtrTy(Int32Ty.getPointerTo()),
232
2.33k
        Int64PtrTy(Int64Ty.getPointerTo()),
233
2.33k
        Int128PtrTy(Int128Ty.getPointerTo()),
234
2.33k
        Int8PtrPtrTy(Int8PtrTy.getPointerTo()),
235
2.33k
        ExecCtxTy(LLVM::Type::getStructType(
236
2.33k
            "ExecCtx",
237
2.33k
            std::initializer_list<LLVM::Type>{
238
                // Memory
239
2.33k
                Int8PtrTy.getPointerTo(),
240
                // Globals
241
2.33k
                Int128PtrTy.getPointerTo(),
242
                // InstrCount
243
2.33k
                Int64PtrTy,
244
                // CostTable
245
2.33k
                LLVM::Type::getArrayType(Int64Ty, UINT16_MAX + 1)
246
2.33k
                    .getPointerTo(),
247
                // Gas
248
2.33k
                Int64PtrTy,
249
                // GasLimit
250
2.33k
                Int64Ty,
251
                // StopToken
252
2.33k
                Int32PtrTy,
253
2.33k
            })),
254
2.33k
        ExecCtxPtrTy(ExecCtxTy.getPointerTo()),
255
2.33k
        IntrinsicsTableTy(LLVM::Type::getArrayType(
256
2.33k
            Int8PtrTy,
257
2.33k
            static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax))),
258
2.33k
        IntrinsicsTablePtrTy(IntrinsicsTableTy.getPointerTo()),
259
2.33k
        IntrinsicsTable(LLModule.addGlobal(IntrinsicsTablePtrTy, true,
260
2.33k
                                           LLVMExternalLinkage, LLVM::Value(),
261
2.33k
                                           "intrinsics")) {
262
2.33k
    Trap.Ty = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
263
2.33k
    Trap.Fn = LLModule.addFunction(Trap.Ty, LLVMPrivateLinkage, "trap");
264
2.33k
    Trap.Fn.setDSOLocal(true);
265
2.33k
    Trap.Fn.addFnAttr(NoStackArgProbe);
266
2.33k
    Trap.Fn.addFnAttr(StrictFP);
267
2.33k
    Trap.Fn.addFnAttr(UWTable);
268
2.33k
    Trap.Fn.addFnAttr(NoReturn);
269
2.33k
    Trap.Fn.addFnAttr(Cold);
270
2.33k
    Trap.Fn.addFnAttr(NoInline);
271
272
2.33k
    LLModule.addGlobal(Int32Ty, true, LLVMExternalLinkage,
273
2.33k
                       LLVM::Value::getConstInt(Int32Ty, AOT::kBinaryVersion),
274
2.33k
                       "version");
275
276
2.33k
    if (!IsGenericBinary) {
277
2.33k
      SubtargetFeatures = LLVM::getHostCPUFeatures();
278
2.33k
      auto Features = SubtargetFeatures.string_view();
279
203k
      while (!Features.empty()) {
280
200k
        std::string_view Feature;
281
200k
        if (auto Pos = Features.find(','); Pos != std::string_view::npos) {
282
198k
          Feature = Features.substr(0, Pos);
283
198k
          Features = Features.substr(Pos + 1);
284
198k
        } else {
285
2.33k
          Feature = std::exchange(Features, std::string_view());
286
2.33k
        }
287
200k
        if (Feature[0] != '+') {
288
123k
          continue;
289
123k
        }
290
77.1k
        Feature = Feature.substr(1);
291
292
77.1k
#if defined(__x86_64__)
293
77.1k
        if (!SupportXOP && Feature == "xop"sv) {
294
0
          SupportXOP = true;
295
0
        }
296
77.1k
        if (!SupportSSE4_1 && Feature == "sse4.1"sv) {
297
2.33k
          SupportSSE4_1 = true;
298
2.33k
        }
299
77.1k
        if (!SupportSSSE3 && Feature == "ssse3"sv) {
300
2.33k
          SupportSSSE3 = true;
301
2.33k
        }
302
77.1k
        if (!SupportSSE2 && Feature == "sse2"sv) {
303
0
          SupportSSE2 = true;
304
0
        }
305
#elif defined(__aarch64__)
306
        if (!SupportNEON && Feature == "neon"sv) {
307
          SupportNEON = true;
308
        }
309
#endif
310
77.1k
      }
311
2.33k
    }
312
313
2.33k
    {
314
      // create trap
315
2.33k
      LLVM::Builder Builder(LLContext);
316
2.33k
      Builder.positionAtEnd(
317
2.33k
          LLVM::BasicBlock::create(LLContext, Trap.Fn, "entry"));
318
2.33k
      auto FnTy = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
319
2.33k
      auto CallTrap = Builder.createCall(
320
2.33k
          getIntrinsic(Builder, Executable::Intrinsics::kTrap, FnTy),
321
2.33k
          {Trap.Fn.getFirstParam()});
322
2.33k
      CallTrap.addCallSiteAttribute(NoReturn);
323
2.33k
      Builder.createUnreachable();
324
2.33k
    }
325
2.33k
  }
326
  LLVM::Value getMemory(LLVM::Builder &Builder, LLVM::Value ExecCtx,
327
23.2k
                        uint32_t Index) noexcept {
328
23.2k
    auto Array = Builder.createExtractValue(ExecCtx, 0);
329
#if WASMEDGE_ALLOCATOR_IS_STABLE
330
    auto VPtr = Builder.createLoad(
331
        Int8PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
332
                                              LLContext.getInt64(Index)));
333
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
334
                     LLVM::Metadata(LLContext, {}));
335
#else
336
23.2k
    auto VPtrPtr = Builder.createLoad(
337
23.2k
        Int8PtrPtrTy, Builder.createInBoundsGEP1(Int8PtrPtrTy, Array,
338
23.2k
                                                 LLContext.getInt64(Index)));
339
23.2k
    VPtrPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
340
23.2k
                        LLVM::Metadata(LLContext, {}));
341
23.2k
    auto VPtr = Builder.createLoad(
342
23.2k
        Int8PtrTy,
343
23.2k
        Builder.createInBoundsGEP1(Int8PtrTy, VPtrPtr, LLContext.getInt64(0)));
344
23.2k
#endif
345
23.2k
    return Builder.createBitCast(VPtr, Int8PtrTy);
346
23.2k
  }
347
  std::pair<LLVM::Type, LLVM::Value> getGlobal(LLVM::Builder &Builder,
348
                                               LLVM::Value ExecCtx,
349
378
                                               uint32_t Index) noexcept {
350
378
    auto Ty = Globals[Index];
351
378
    auto Array = Builder.createExtractValue(ExecCtx, 1);
352
378
    auto VPtr = Builder.createLoad(
353
378
        Int128PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
354
378
                                                LLContext.getInt64(Index)));
355
378
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
356
378
                     LLVM::Metadata(LLContext, {}));
357
378
    auto Ptr = Builder.createBitCast(VPtr, Ty.getPointerTo());
358
378
    return {Ty, Ptr};
359
378
  }
360
  LLVM::Value getInstrCount(LLVM::Builder &Builder,
361
0
                            LLVM::Value ExecCtx) noexcept {
362
0
    return Builder.createExtractValue(ExecCtx, 2);
363
0
  }
364
  LLVM::Value getCostTable(LLVM::Builder &Builder,
365
0
                           LLVM::Value ExecCtx) noexcept {
366
0
    return Builder.createExtractValue(ExecCtx, 3);
367
0
  }
368
0
  LLVM::Value getGas(LLVM::Builder &Builder, LLVM::Value ExecCtx) noexcept {
369
0
    return Builder.createExtractValue(ExecCtx, 4);
370
0
  }
371
  LLVM::Value getGasLimit(LLVM::Builder &Builder,
372
0
                          LLVM::Value ExecCtx) noexcept {
373
0
    return Builder.createExtractValue(ExecCtx, 5);
374
0
  }
375
  LLVM::Value getStopToken(LLVM::Builder &Builder,
376
0
                           LLVM::Value ExecCtx) noexcept {
377
0
    return Builder.createExtractValue(ExecCtx, 6);
378
0
  }
379
  LLVM::FunctionCallee getIntrinsic(LLVM::Builder &Builder,
380
                                    Executable::Intrinsics Index,
381
8.26k
                                    LLVM::Type Ty) noexcept {
382
8.26k
    const auto Value = static_cast<uint32_t>(Index);
383
8.26k
    auto PtrTy = Ty.getPointerTo();
384
8.26k
    auto PtrPtrTy = PtrTy.getPointerTo();
385
8.26k
    auto IT = Builder.createLoad(IntrinsicsTablePtrTy, IntrinsicsTable);
386
8.26k
    IT.setMetadata(LLContext, LLVM::Core::InvariantGroup,
387
8.26k
                   LLVM::Metadata(LLContext, {}));
388
8.26k
    auto VPtr =
389
8.26k
        Builder.createInBoundsGEP2(IntrinsicsTableTy, IT, LLContext.getInt64(0),
390
8.26k
                                   LLContext.getInt64(Value));
391
8.26k
    auto Ptr = Builder.createBitCast(VPtr, PtrPtrTy);
392
8.26k
    return {Ty, Builder.createLoad(PtrTy, Ptr)};
393
8.26k
  }
394
  std::pair<std::vector<ValType>, std::vector<ValType>>
395
19.8k
  resolveBlockType(const BlockType &BType) const noexcept {
396
19.8k
    using VecT = std::vector<ValType>;
397
19.8k
    using RetT = std::pair<VecT, VecT>;
398
19.8k
    if (BType.isEmpty()) {
399
2.47k
      return RetT{};
400
2.47k
    }
401
17.3k
    if (BType.isValType()) {
402
2.68k
      return RetT{{}, {BType.getValType()}};
403
14.6k
    } else {
404
      // Type index case. t2* = type[index].returns
405
14.6k
      const uint32_t TypeIdx = BType.getTypeIndex();
406
14.6k
      const auto &FType = CompositeTypes[TypeIdx]->getFuncType();
407
14.6k
      return RetT{
408
14.6k
          VecT(FType.getParamTypes().begin(), FType.getParamTypes().end()),
409
14.6k
          VecT(FType.getReturnTypes().begin(), FType.getReturnTypes().end())};
410
14.6k
    }
411
17.3k
  }
412
};
413
414
namespace {
415
416
using namespace WasmEdge;
417
418
36.9k
static bool isVoidReturn(Span<const ValType> ValTypes) noexcept {
419
36.9k
  return ValTypes.empty();
420
36.9k
}
421
422
static LLVM::Type toLLVMType(LLVM::Context LLContext,
423
2.46M
                             const ValType &ValType) noexcept {
424
2.46M
  switch (ValType.getCode()) {
425
63.3k
  case TypeCode::I32:
426
63.3k
    return LLContext.getInt32Ty();
427
433k
  case TypeCode::I64:
428
433k
    return LLContext.getInt64Ty();
429
119k
  case TypeCode::Ref:
430
215k
  case TypeCode::RefNull:
431
1.89M
  case TypeCode::V128:
432
1.89M
    return LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2);
433
51.1k
  case TypeCode::F32:
434
51.1k
    return LLContext.getFloatTy();
435
20.9k
  case TypeCode::F64:
436
20.9k
    return LLContext.getDoubleTy();
437
0
  default:
438
0
    assumingUnreachable();
439
2.46M
  }
440
2.46M
}
441
442
static std::vector<LLVM::Type>
443
toLLVMTypeVector(LLVM::Context LLContext,
444
21.1k
                 Span<const ValType> ValTypes) noexcept {
445
21.1k
  std::vector<LLVM::Type> Result;
446
21.1k
  Result.reserve(ValTypes.size());
447
21.1k
  for (const auto &Type : ValTypes) {
448
20.3k
    Result.push_back(toLLVMType(LLContext, Type));
449
20.3k
  }
450
21.1k
  return Result;
451
21.1k
}
452
453
static std::vector<LLVM::Type>
454
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
455
17.1k
               Span<const ValType> ValTypes) noexcept {
456
17.1k
  auto Result = toLLVMTypeVector(LLContext, ValTypes);
457
17.1k
  Result.insert(Result.begin(), ExecCtxPtrTy);
458
17.1k
  return Result;
459
17.1k
}
460
461
static LLVM::Type toLLVMRetsType(LLVM::Context LLContext,
462
17.1k
                                 Span<const ValType> ValTypes) noexcept {
463
17.1k
  if (isVoidReturn(ValTypes)) {
464
4.36k
    return LLContext.getVoidTy();
465
4.36k
  }
466
12.8k
  if (ValTypes.size() == 1) {
467
12.0k
    return toLLVMType(LLContext, ValTypes.front());
468
12.0k
  }
469
739
  std::vector<LLVM::Type> Result;
470
739
  Result.reserve(ValTypes.size());
471
2.02k
  for (const auto &Type : ValTypes) {
472
2.02k
    Result.push_back(toLLVMType(LLContext, Type));
473
2.02k
  }
474
739
  return LLVM::Type::getStructType(Result);
475
12.8k
}
476
477
static LLVM::Type toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
478
17.1k
                             const AST::FunctionType &FuncType) noexcept {
479
17.1k
  auto ArgsTy =
480
17.1k
      toLLVMArgsType(LLContext, ExecCtxPtrTy, FuncType.getParamTypes());
481
17.1k
  auto RetTy = toLLVMRetsType(LLContext, FuncType.getReturnTypes());
482
17.1k
  return LLVM::Type::getFunctionType(RetTy, ArgsTy);
483
17.1k
}
484
485
static LLVM::Value toLLVMConstantZero(LLVM::Context LLContext,
486
2.42M
                                      const ValType &ValType) noexcept {
487
2.42M
  switch (ValType.getCode()) {
488
44.2k
  case TypeCode::I32:
489
44.2k
    return LLVM::Value::getConstNull(LLContext.getInt32Ty());
490
429k
  case TypeCode::I64:
491
429k
    return LLVM::Value::getConstNull(LLContext.getInt64Ty());
492
119k
  case TypeCode::Ref:
493
215k
  case TypeCode::RefNull: {
494
215k
    std::array<uint8_t, 16> Data{};
495
215k
    const auto Raw = ValType.getRawData();
496
215k
    std::copy(Raw.begin(), Raw.end(), Data.begin());
497
215k
    return LLVM::Value::getConstVector8(LLContext, Data);
498
119k
  }
499
1.67M
  case TypeCode::V128:
500
1.67M
    return LLVM::Value::getConstNull(
501
1.67M
        LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2));
502
48.4k
  case TypeCode::F32:
503
48.4k
    return LLVM::Value::getConstNull(LLContext.getFloatTy());
504
17.7k
  case TypeCode::F64:
505
17.7k
    return LLVM::Value::getConstNull(LLContext.getDoubleTy());
506
0
  default:
507
0
    assumingUnreachable();
508
2.42M
  }
509
2.42M
}
510
511
class FunctionCompiler {
512
  struct Control;
513
514
public:
515
  FunctionCompiler(LLVM::Compiler::CompileContext &Context,
516
                   LLVM::FunctionCallee F, Span<const ValType> Locals,
517
                   bool Interruptible, bool InstructionCounting,
518
                   bool GasMeasuring) noexcept
519
11.0k
      : Context(Context), LLContext(Context.LLContext),
520
11.0k
        Interruptible(Interruptible), F(F), Builder(LLContext) {
521
11.0k
    if (F.Fn) {
522
11.0k
      Builder.positionAtEnd(LLVM::BasicBlock::create(LLContext, F.Fn, "entry"));
523
11.0k
      ExecCtx = Builder.createLoad(Context.ExecCtxTy, F.Fn.getFirstParam());
524
525
11.0k
      if (InstructionCounting) {
526
0
        LocalInstrCount = Builder.createAlloca(Context.Int64Ty);
527
0
        Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
528
0
      }
529
530
11.0k
      if (GasMeasuring) {
531
0
        LocalGas = Builder.createAlloca(Context.Int64Ty);
532
0
        Builder.createStore(LLContext.getInt64(0), LocalGas);
533
0
      }
534
535
20.8k
      for (LLVM::Value Arg = F.Fn.getFirstParam().getNextParam(); Arg;
536
11.0k
           Arg = Arg.getNextParam()) {
537
9.81k
        LLVM::Type Ty = Arg.getType();
538
9.81k
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
539
9.81k
        Builder.createStore(Arg, ArgPtr);
540
9.81k
        Local.emplace_back(Ty, ArgPtr);
541
9.81k
      }
542
543
2.42M
      for (const auto &Type : Locals) {
544
2.42M
        LLVM::Type Ty = toLLVMType(LLContext, Type);
545
2.42M
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
546
2.42M
        Builder.createStore(toLLVMConstantZero(LLContext, Type), ArgPtr);
547
2.42M
        Local.emplace_back(Ty, ArgPtr);
548
2.42M
      }
549
11.0k
    }
550
11.0k
  }
551
552
32.0k
  LLVM::BasicBlock getTrapBB(ErrCode::Value Error) noexcept {
553
32.0k
    if (auto Iter = TrapBB.find(Error); Iter != TrapBB.end()) {
554
28.6k
      return Iter->second;
555
28.6k
    }
556
3.34k
    auto BB = LLVM::BasicBlock::create(LLContext, F.Fn, "trap");
557
3.34k
    TrapBB.emplace(Error, BB);
558
3.34k
    return BB;
559
32.0k
  }
560
561
  Expect<void>
562
  compile(const AST::CodeSegment &Code,
563
11.0k
          std::pair<std::vector<ValType>, std::vector<ValType>> Type) noexcept {
564
11.0k
    auto RetBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ret");
565
11.0k
    Type.first.clear();
566
11.0k
    enterBlock(RetBB, {}, {}, {}, std::move(Type));
567
11.0k
    EXPECTED_TRY(compile(Code.getExpr().getInstrs()));
568
10.9k
    assuming(ControlStack.empty());
569
10.9k
    compileReturn();
570
571
10.9k
    for (auto &[Error, BB] : TrapBB) {
572
3.33k
      Builder.positionAtEnd(BB);
573
3.33k
      updateInstrCount();
574
3.33k
      updateGasAtTrap();
575
3.33k
      auto CallTrap = Builder.createCall(
576
3.33k
          Context.Trap, {LLContext.getInt32(static_cast<uint32_t>(Error))});
577
3.33k
      CallTrap.addCallSiteAttribute(Context.NoReturn);
578
3.33k
      Builder.createUnreachable();
579
3.33k
    }
580
10.9k
    return {};
581
10.9k
  }
582
583
11.0k
  Expect<void> compile(AST::InstrView Instrs) noexcept {
584
1.59M
    auto Dispatch = [this](const AST::Instruction &Instr) -> Expect<void> {
585
1.59M
      switch (Instr.getOpCode()) {
586
      // Control instructions (for blocks)
587
3.98k
      case OpCode::Block: {
588
3.98k
        auto Block = LLVM::BasicBlock::create(LLContext, F.Fn, "block");
589
3.98k
        auto EndBlock = LLVM::BasicBlock::create(LLContext, F.Fn, "block.end");
590
3.98k
        Builder.createBr(Block);
591
592
3.98k
        Builder.positionAtEnd(Block);
593
3.98k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
594
3.98k
        const auto Arity = Type.first.size();
595
3.98k
        std::vector<LLVM::Value> Args(Arity);
596
3.98k
        if (isUnreachable()) {
597
1.41k
          for (size_t I = 0; I < Arity; ++I) {
598
374
            auto Ty = toLLVMType(LLContext, Type.first[I]);
599
374
            Args[I] = LLVM::Value::getUndef(Ty);
600
374
          }
601
2.94k
        } else {
602
3.38k
          for (size_t I = 0; I < Arity; ++I) {
603
433
            const size_t J = Arity - 1 - I;
604
433
            Args[J] = stackPop();
605
433
          }
606
2.94k
        }
607
3.98k
        enterBlock(EndBlock, {}, {}, std::move(Args), std::move(Type));
608
3.98k
        checkStop();
609
3.98k
        updateGas();
610
3.98k
        return {};
611
0
      }
612
1.85k
      case OpCode::Loop: {
613
1.85k
        auto Curr = Builder.getInsertBlock();
614
1.85k
        auto Loop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop");
615
1.85k
        auto EndLoop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop.end");
616
1.85k
        Builder.createBr(Loop);
617
618
1.85k
        Builder.positionAtEnd(Loop);
619
1.85k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
620
1.85k
        const auto Arity = Type.first.size();
621
1.85k
        std::vector<LLVM::Value> Args(Arity);
622
1.85k
        if (isUnreachable()) {
623
1.18k
          for (size_t I = 0; I < Arity; ++I) {
624
517
            auto Ty = toLLVMType(LLContext, Type.first[I]);
625
517
            auto Value = LLVM::Value::getUndef(Ty);
626
517
            auto PHINode = Builder.createPHI(Ty);
627
517
            PHINode.addIncoming(Value, Curr);
628
517
            Args[I] = PHINode;
629
517
          }
630
1.18k
        } else {
631
1.65k
          for (size_t I = 0; I < Arity; ++I) {
632
475
            const size_t J = Arity - 1 - I;
633
475
            auto Value = stackPop();
634
475
            auto PHINode = Builder.createPHI(Value.getType());
635
475
            PHINode.addIncoming(Value, Curr);
636
475
            Args[J] = PHINode;
637
475
          }
638
1.18k
        }
639
1.85k
        enterBlock(Loop, EndLoop, {}, std::move(Args), std::move(Type));
640
1.85k
        checkStop();
641
1.85k
        updateGas();
642
1.85k
        return {};
643
0
      }
644
2.98k
      case OpCode::If: {
645
2.98k
        auto Then = LLVM::BasicBlock::create(LLContext, F.Fn, "then");
646
2.98k
        auto Else = LLVM::BasicBlock::create(LLContext, F.Fn, "else");
647
2.98k
        auto EndIf = LLVM::BasicBlock::create(LLContext, F.Fn, "if.end");
648
2.98k
        LLVM::Value Cond;
649
2.98k
        if (isUnreachable()) {
650
574
          Cond = LLVM::Value::getUndef(LLContext.getInt1Ty());
651
2.41k
        } else {
652
2.41k
          Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
653
2.41k
        }
654
2.98k
        Builder.createCondBr(Cond, Then, Else);
655
656
2.98k
        Builder.positionAtEnd(Then);
657
2.98k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
658
2.98k
        const auto Arity = Type.first.size();
659
2.98k
        std::vector<LLVM::Value> Args(Arity);
660
2.98k
        if (isUnreachable()) {
661
1.02k
          for (size_t I = 0; I < Arity; ++I) {
662
448
            auto Ty = toLLVMType(LLContext, Type.first[I]);
663
448
            Args[I] = LLVM::Value::getUndef(Ty);
664
448
          }
665
2.41k
        } else {
666
3.29k
          for (size_t I = 0; I < Arity; ++I) {
667
879
            const size_t J = Arity - 1 - I;
668
879
            Args[J] = stackPop();
669
879
          }
670
2.41k
        }
671
2.98k
        enterBlock(EndIf, {}, Else, std::move(Args), std::move(Type));
672
2.98k
        return {};
673
0
      }
674
11
      case OpCode::Try_table:
675
        // TODO: EXCEPTION - implement the AOT.
676
11
        return Unexpect(ErrCode::Value::AOTNotImpl);
677
19.7k
      case OpCode::End: {
678
19.7k
        auto Entry = leaveBlock();
679
19.7k
        if (Entry.ElseBlock) {
680
1.16k
          auto Block = Builder.getInsertBlock();
681
1.16k
          Builder.positionAtEnd(Entry.ElseBlock);
682
1.16k
          enterBlock(Block, {}, {}, std::move(Entry.Args),
683
1.16k
                     std::move(Entry.Type), std::move(Entry.ReturnPHI));
684
1.16k
          Entry = leaveBlock();
685
1.16k
        }
686
19.7k
        buildPHI(Entry.Type.second, Entry.ReturnPHI);
687
19.7k
        return {};
688
0
      }
689
1.81k
      case OpCode::Else: {
690
1.81k
        auto Entry = leaveBlock();
691
1.81k
        Builder.positionAtEnd(Entry.ElseBlock);
692
1.81k
        enterBlock(Entry.JumpBlock, {}, {}, std::move(Entry.Args),
693
1.81k
                   std::move(Entry.Type), std::move(Entry.ReturnPHI));
694
1.81k
        return {};
695
0
      }
696
1.56M
      default:
697
1.56M
        break;
698
1.59M
      }
699
700
1.56M
      if (isUnreachable()) {
701
472k
        return {};
702
472k
      }
703
704
1.09M
      switch (Instr.getOpCode()) {
705
      // Control instructions
706
3.53k
      case OpCode::Unreachable:
707
3.53k
        Builder.createBr(getTrapBB(ErrCode::Value::Unreachable));
708
3.53k
        setUnreachable();
709
3.53k
        Builder.positionAtEnd(
710
3.53k
            LLVM::BasicBlock::create(LLContext, F.Fn, "unreachable.end"));
711
3.53k
        break;
712
46.7k
      case OpCode::Nop:
713
46.7k
        break;
714
1
      case OpCode::Throw:
715
2
      case OpCode::Throw_ref:
716
        // TODO: EXCEPTION - implement the AOT.
717
2
        return Unexpect(ErrCode::Value::AOTNotImpl);
718
763
      case OpCode::Br: {
719
763
        const auto Label = Instr.getJump().TargetIndex;
720
763
        setLableJumpPHI(Label);
721
763
        Builder.createBr(getLabel(Label));
722
763
        setUnreachable();
723
763
        Builder.positionAtEnd(
724
763
            LLVM::BasicBlock::create(LLContext, F.Fn, "br.end"));
725
763
        break;
726
1
      }
727
357
      case OpCode::Br_if: {
728
357
        const auto Label = Instr.getJump().TargetIndex;
729
357
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
730
357
        setLableJumpPHI(Label);
731
357
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_if.end");
732
357
        Builder.createCondBr(Cond, getLabel(Label), Next);
733
357
        Builder.positionAtEnd(Next);
734
357
        break;
735
1
      }
736
1.00k
      case OpCode::Br_table: {
737
1.00k
        auto LabelTable = Instr.getLabelList();
738
1.00k
        assuming(LabelTable.size() <= std::numeric_limits<uint32_t>::max());
739
1.00k
        const auto LabelTableSize =
740
1.00k
            static_cast<uint32_t>(LabelTable.size() - 1);
741
1.00k
        auto Value = stackPop();
742
1.00k
        setLableJumpPHI(LabelTable[LabelTableSize].TargetIndex);
743
1.00k
        auto Switch = Builder.createSwitch(
744
1.00k
            Value, getLabel(LabelTable[LabelTableSize].TargetIndex),
745
1.00k
            LabelTableSize);
746
36.4k
        for (uint32_t I = 0; I < LabelTableSize; ++I) {
747
35.4k
          setLableJumpPHI(LabelTable[I].TargetIndex);
748
35.4k
          Switch.addCase(LLContext.getInt32(I),
749
35.4k
                         getLabel(LabelTable[I].TargetIndex));
750
35.4k
        }
751
1.00k
        setUnreachable();
752
1.00k
        Builder.positionAtEnd(
753
1.00k
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_table.end"));
754
1.00k
        break;
755
1.00k
      }
756
57
      case OpCode::Br_on_null: {
757
57
        const auto Label = Instr.getJump().TargetIndex;
758
57
        auto Value = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
759
57
        auto Cond = Builder.createICmpEQ(
760
57
            Builder.createExtractElement(Value, LLContext.getInt64(1)),
761
57
            LLContext.getInt64(0));
762
57
        setLableJumpPHI(Label);
763
57
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_null.end");
764
57
        Builder.createCondBr(Cond, getLabel(Label), Next);
765
57
        Builder.positionAtEnd(Next);
766
57
        stackPush(Value);
767
57
        break;
768
1.00k
      }
769
7
      case OpCode::Br_on_non_null: {
770
7
        const auto Label = Instr.getJump().TargetIndex;
771
7
        auto Cond = Builder.createICmpNE(
772
7
            Builder.createExtractElement(
773
7
                Builder.createBitCast(Stack.back(), Context.Int64x2Ty),
774
7
                LLContext.getInt64(1)),
775
7
            LLContext.getInt64(0));
776
7
        setLableJumpPHI(Label);
777
7
        auto Next =
778
7
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_non_null.end");
779
7
        Builder.createCondBr(Cond, getLabel(Label), Next);
780
7
        Builder.positionAtEnd(Next);
781
7
        stackPop();
782
7
        break;
783
1.00k
      }
784
0
      case OpCode::Br_on_cast:
785
0
      case OpCode::Br_on_cast_fail: {
786
0
        auto Ref = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
787
0
        const auto Label = Instr.getBrCast().Jump.TargetIndex;
788
0
        std::array<uint8_t, 16> Buf = {0};
789
0
        std::copy_n(Instr.getBrCast().RType2.getRawData().cbegin(), 8,
790
0
                    Buf.begin());
791
0
        auto VType = Builder.createExtractElement(
792
0
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
793
0
                                  Context.Int64x2Ty),
794
0
            LLContext.getInt64(0));
795
0
        auto IsRefTest = Builder.createCall(
796
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
797
0
                                 LLVM::Type::getFunctionType(
798
0
                                     Context.Int32Ty,
799
0
                                     {Context.Int64x2Ty, Context.Int64Ty},
800
0
                                     false)),
801
0
            {Ref, VType});
802
0
        auto Cond =
803
0
            (Instr.getOpCode() == OpCode::Br_on_cast)
804
0
                ? Builder.createICmpNE(IsRefTest, LLContext.getInt32(0))
805
0
                : Builder.createICmpEQ(IsRefTest, LLContext.getInt32(0));
806
0
        setLableJumpPHI(Label);
807
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_cast.end");
808
0
        Builder.createCondBr(Cond, getLabel(Label), Next);
809
0
        Builder.positionAtEnd(Next);
810
0
        break;
811
0
      }
812
738
      case OpCode::Return:
813
738
        compileReturn();
814
738
        setUnreachable();
815
738
        Builder.positionAtEnd(
816
738
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret.end"));
817
738
        break;
818
3.61k
      case OpCode::Call:
819
3.61k
        updateInstrCount();
820
3.61k
        updateGas();
821
3.61k
        compileCallOp(Instr.getTargetIndex());
822
3.61k
        break;
823
1.15k
      case OpCode::Call_indirect:
824
1.15k
        updateInstrCount();
825
1.15k
        updateGas();
826
1.15k
        compileIndirectCallOp(Instr.getSourceIndex(), Instr.getTargetIndex());
827
1.15k
        break;
828
63
      case OpCode::Return_call:
829
63
        updateInstrCount();
830
63
        updateGas();
831
63
        compileReturnCallOp(Instr.getTargetIndex());
832
63
        setUnreachable();
833
63
        Builder.positionAtEnd(
834
63
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call.end"));
835
63
        break;
836
141
      case OpCode::Return_call_indirect:
837
141
        updateInstrCount();
838
141
        updateGas();
839
141
        compileReturnIndirectCallOp(Instr.getSourceIndex(),
840
141
                                    Instr.getTargetIndex());
841
141
        setUnreachable();
842
141
        Builder.positionAtEnd(
843
141
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_indir.end"));
844
141
        break;
845
1
      case OpCode::Call_ref:
846
1
        updateInstrCount();
847
1
        updateGas();
848
1
        compileCallRefOp(Instr.getTargetIndex());
849
1
        break;
850
1
      case OpCode::Return_call_ref:
851
1
        updateInstrCount();
852
1
        updateGas();
853
1
        compileReturnCallRefOp(Instr.getTargetIndex());
854
1
        setUnreachable();
855
1
        Builder.positionAtEnd(
856
1
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_ref.end"));
857
1
        break;
858
0
      case OpCode::Try_table:
859
        // TODO: EXCEPTION - implement the AOT.
860
0
        return Unexpect(ErrCode::Value::AOTNotImpl);
861
862
      // Reference Instructions
863
7.41k
      case OpCode::Ref__null: {
864
7.41k
        std::array<uint8_t, 16> Buf = {0};
865
        // For null references, the dynamic type down scaling is needed.
866
7.41k
        ValType VType;
867
7.41k
        if (Instr.getValType().isAbsHeapType()) {
868
7.33k
          switch (Instr.getValType().getHeapTypeCode()) {
869
41
          case TypeCode::NullFuncRef:
870
3.11k
          case TypeCode::FuncRef:
871
3.11k
            VType = TypeCode::NullFuncRef;
872
3.11k
            break;
873
64
          case TypeCode::NullExternRef:
874
3.65k
          case TypeCode::ExternRef:
875
3.65k
            VType = TypeCode::NullExternRef;
876
3.65k
            break;
877
19
          case TypeCode::NullExnRef:
878
37
          case TypeCode::ExnRef:
879
37
            VType = TypeCode::NullExnRef;
880
37
            break;
881
277
          case TypeCode::NullRef:
882
316
          case TypeCode::AnyRef:
883
374
          case TypeCode::EqRef:
884
452
          case TypeCode::I31Ref:
885
471
          case TypeCode::StructRef:
886
520
          case TypeCode::ArrayRef:
887
520
            VType = TypeCode::NullRef;
888
520
            break;
889
0
          default:
890
0
            assumingUnreachable();
891
7.33k
          }
892
7.33k
        } else {
893
84
          assuming(Instr.getValType().getTypeIndex() <
894
84
                   Context.CompositeTypes.size());
895
84
          const auto *CompType =
896
84
              Context.CompositeTypes[Instr.getValType().getTypeIndex()];
897
84
          assuming(CompType != nullptr);
898
84
          if (CompType->isFunc()) {
899
32
            VType = TypeCode::NullFuncRef;
900
52
          } else {
901
52
            VType = TypeCode::NullRef;
902
52
          }
903
84
        }
904
7.41k
        std::copy_n(VType.getRawData().cbegin(), 8, Buf.begin());
905
7.41k
        stackPush(Builder.createBitCast(
906
7.41k
            LLVM::Value::getConstVector8(LLContext, Buf), Context.Int64x2Ty));
907
7.41k
        break;
908
7.41k
      }
909
3.58k
      case OpCode::Ref__is_null:
910
3.58k
        stackPush(Builder.createZExt(
911
3.58k
            Builder.createICmpEQ(
912
3.58k
                Builder.createExtractElement(
913
3.58k
                    Builder.createBitCast(stackPop(), Context.Int64x2Ty),
914
3.58k
                    LLContext.getInt64(1)),
915
3.58k
                LLContext.getInt64(0)),
916
3.58k
            Context.Int32Ty));
917
3.58k
        break;
918
26
      case OpCode::Ref__func:
919
26
        stackPush(Builder.createCall(
920
26
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefFunc,
921
26
                                 LLVM::Type::getFunctionType(Context.Int64x2Ty,
922
26
                                                             {Context.Int32Ty},
923
26
                                                             false)),
924
26
            {LLContext.getInt32(Instr.getTargetIndex())}));
925
26
        break;
926
1
      case OpCode::Ref__eq: {
927
1
        LLVM::Value RHS = stackPop();
928
1
        LLVM::Value LHS = stackPop();
929
1
        stackPush(Builder.createZExt(
930
1
            Builder.createICmpEQ(
931
1
                Builder.createExtractElement(LHS, LLContext.getInt64(1)),
932
1
                Builder.createExtractElement(RHS, LLContext.getInt64(1))),
933
1
            Context.Int32Ty));
934
1
        break;
935
7.41k
      }
936
529
      case OpCode::Ref__as_non_null: {
937
529
        auto Next =
938
529
            LLVM::BasicBlock::create(LLContext, F.Fn, "ref_as_non_null.ok");
939
529
        Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
940
529
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
941
529
            Builder.createExtractElement(Stack.back(), LLContext.getInt64(1)),
942
529
            LLContext.getInt64(0)));
943
529
        Builder.createCondBr(IsNotNull, Next,
944
529
                             getTrapBB(ErrCode::Value::CastNullToNonNull));
945
529
        Builder.positionAtEnd(Next);
946
529
        break;
947
7.41k
      }
948
949
      // Reference Instructions (GC proposal)
950
25
      case OpCode::Struct__new:
951
43
      case OpCode::Struct__new_default: {
952
43
        LLVM::Value Args = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
953
43
        assuming(Instr.getTargetIndex() < Context.CompositeTypes.size());
954
43
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
955
43
        assuming(CompType != nullptr && !CompType->isFunc());
956
43
        auto ArgSize = CompType->getFieldTypes().size();
957
43
        if (Instr.getOpCode() == OpCode::Struct__new) {
958
25
          std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
959
25
          for (size_t I = 0; I < ArgSize; ++I) {
960
0
            ArgsVec[ArgSize - I - 1] = stackPop();
961
0
          }
962
25
          Args = Builder.createArray(ArgSize, kValSize);
963
25
          Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
964
25
        } else {
965
18
          ArgSize = 0;
966
18
        }
967
43
        stackPush(Builder.createCall(
968
43
            Context.getIntrinsic(
969
43
                Builder, Executable::Intrinsics::kStructNew,
970
43
                LLVM::Type::getFunctionType(
971
43
                    Context.Int64x2Ty,
972
43
                    {Context.Int32Ty, Context.Int8PtrTy, Context.Int32Ty},
973
43
                    false)),
974
43
            {LLContext.getInt32(Instr.getTargetIndex()), Args,
975
43
             LLContext.getInt32(static_cast<uint32_t>(ArgSize))}));
976
43
        break;
977
43
      }
978
0
      case OpCode::Struct__get:
979
0
      case OpCode::Struct__get_u:
980
0
      case OpCode::Struct__get_s: {
981
0
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
982
0
                 Context.CompositeTypes.size());
983
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
984
0
        assuming(CompType != nullptr && !CompType->isFunc());
985
0
        assuming(static_cast<size_t>(Instr.getSourceIndex()) <
986
0
                 CompType->getFieldTypes().size());
987
0
        const auto &StorageType =
988
0
            CompType->getFieldTypes()[Instr.getSourceIndex()].getStorageType();
989
0
        auto Ref = stackPop();
990
0
        auto IsSigned = (Instr.getOpCode() == OpCode::Struct__get_s)
991
0
                            ? LLContext.getInt8(1)
992
0
                            : LLContext.getInt8(0);
993
0
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
994
0
        Builder.createCall(
995
0
            Context.getIntrinsic(
996
0
                Builder, Executable::Intrinsics::kStructGet,
997
0
                LLVM::Type::getFunctionType(Context.VoidTy,
998
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
999
0
                                             Context.Int32Ty, Context.Int8Ty,
1000
0
                                             Context.Int8PtrTy},
1001
0
                                            false)),
1002
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1003
0
             LLContext.getInt32(Instr.getSourceIndex()), IsSigned, Ret});
1004
1005
0
        switch (StorageType.getCode()) {
1006
0
        case TypeCode::I8:
1007
0
        case TypeCode::I16:
1008
0
        case TypeCode::I32: {
1009
0
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1010
0
                                               Context.Int64x2Ty));
1011
0
          break;
1012
0
        }
1013
0
        case TypeCode::I64: {
1014
0
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1015
0
                                               Context.Int64x2Ty));
1016
0
          break;
1017
0
        }
1018
0
        case TypeCode::F32: {
1019
0
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1020
0
                                               Context.Int64x2Ty));
1021
0
          break;
1022
0
        }
1023
0
        case TypeCode::F64: {
1024
0
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1025
0
                                               Context.Int64x2Ty));
1026
0
          break;
1027
0
        }
1028
0
        case TypeCode::V128:
1029
0
        case TypeCode::Ref:
1030
0
        case TypeCode::RefNull: {
1031
0
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1032
0
                                               Context.Int64x2Ty));
1033
0
          break;
1034
0
        }
1035
0
        default:
1036
0
          assumingUnreachable();
1037
0
        }
1038
0
        break;
1039
0
      }
1040
0
      case OpCode::Struct__set: {
1041
0
        auto Val = stackPop();
1042
0
        auto Ref = stackPop();
1043
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1044
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1045
0
        Builder.createCall(
1046
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kStructSet,
1047
0
                                 LLVM::Type::getFunctionType(
1048
0
                                     Context.VoidTy,
1049
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1050
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1051
0
                                     false)),
1052
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1053
0
             LLContext.getInt32(Instr.getSourceIndex()), Arg});
1054
0
        break;
1055
0
      }
1056
36
      case OpCode::Array__new: {
1057
36
        auto Length = stackPop();
1058
36
        auto Val = stackPop();
1059
36
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1060
36
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1061
36
        stackPush(Builder.createCall(
1062
36
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1063
36
                                 LLVM::Type::getFunctionType(
1064
36
                                     Context.Int64x2Ty,
1065
36
                                     {Context.Int32Ty, Context.Int32Ty,
1066
36
                                      Context.Int8PtrTy, Context.Int32Ty},
1067
36
                                     false)),
1068
36
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1069
36
             LLContext.getInt32(1)}));
1070
36
        break;
1071
0
      }
1072
49
      case OpCode::Array__new_default: {
1073
49
        auto Length = stackPop();
1074
49
        LLVM::Value Arg = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
1075
49
        stackPush(Builder.createCall(
1076
49
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1077
49
                                 LLVM::Type::getFunctionType(
1078
49
                                     Context.Int64x2Ty,
1079
49
                                     {Context.Int32Ty, Context.Int32Ty,
1080
49
                                      Context.Int8PtrTy, Context.Int32Ty},
1081
49
                                     false)),
1082
49
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1083
49
             LLContext.getInt32(0)}));
1084
49
        break;
1085
0
      }
1086
44
      case OpCode::Array__new_fixed: {
1087
44
        const auto ArgSize = Instr.getSourceIndex();
1088
44
        std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
1089
194
        for (size_t I = 0; I < ArgSize; ++I) {
1090
150
          ArgsVec[ArgSize - I - 1] = stackPop();
1091
150
        }
1092
44
        LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
1093
44
        Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
1094
44
        stackPush(Builder.createCall(
1095
44
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1096
44
                                 LLVM::Type::getFunctionType(
1097
44
                                     Context.Int64x2Ty,
1098
44
                                     {Context.Int32Ty, Context.Int32Ty,
1099
44
                                      Context.Int8PtrTy, Context.Int32Ty},
1100
44
                                     false)),
1101
44
            {LLContext.getInt32(Instr.getTargetIndex()),
1102
44
             LLContext.getInt32(ArgSize), Args, LLContext.getInt32(ArgSize)}));
1103
44
        break;
1104
0
      }
1105
0
      case OpCode::Array__new_data:
1106
0
      case OpCode::Array__new_elem: {
1107
0
        auto Length = stackPop();
1108
0
        auto Start = stackPop();
1109
0
        stackPush(Builder.createCall(
1110
0
            Context.getIntrinsic(
1111
0
                Builder,
1112
0
                ((Instr.getOpCode() == OpCode::Array__new_data)
1113
0
                     ? Executable::Intrinsics::kArrayNewData
1114
0
                     : Executable::Intrinsics::kArrayNewElem),
1115
0
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1116
0
                                            {Context.Int32Ty, Context.Int32Ty,
1117
0
                                             Context.Int32Ty, Context.Int32Ty},
1118
0
                                            false)),
1119
0
            {LLContext.getInt32(Instr.getTargetIndex()),
1120
0
             LLContext.getInt32(Instr.getSourceIndex()), Start, Length}));
1121
0
        break;
1122
0
      }
1123
109
      case OpCode::Array__get:
1124
141
      case OpCode::Array__get_u:
1125
171
      case OpCode::Array__get_s: {
1126
171
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
1127
171
                 Context.CompositeTypes.size());
1128
171
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
1129
171
        assuming(CompType != nullptr && !CompType->isFunc());
1130
171
        assuming(static_cast<size_t>(1) == CompType->getFieldTypes().size());
1131
171
        const auto &StorageType = CompType->getFieldTypes()[0].getStorageType();
1132
171
        auto Idx = stackPop();
1133
171
        auto Ref = stackPop();
1134
171
        auto IsSigned = (Instr.getOpCode() == OpCode::Array__get_s)
1135
171
                            ? LLContext.getInt8(1)
1136
171
                            : LLContext.getInt8(0);
1137
171
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
1138
171
        Builder.createCall(
1139
171
            Context.getIntrinsic(
1140
171
                Builder, Executable::Intrinsics::kArrayGet,
1141
171
                LLVM::Type::getFunctionType(Context.VoidTy,
1142
171
                                            {Context.Int64x2Ty, Context.Int32Ty,
1143
171
                                             Context.Int32Ty, Context.Int8Ty,
1144
171
                                             Context.Int8PtrTy},
1145
171
                                            false)),
1146
171
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, IsSigned,
1147
171
             Ret});
1148
1149
171
        switch (StorageType.getCode()) {
1150
38
        case TypeCode::I8:
1151
62
        case TypeCode::I16:
1152
80
        case TypeCode::I32: {
1153
80
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1154
80
                                               Context.Int64x2Ty));
1155
80
          break;
1156
62
        }
1157
19
        case TypeCode::I64: {
1158
19
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1159
19
                                               Context.Int64x2Ty));
1160
19
          break;
1161
62
        }
1162
18
        case TypeCode::F32: {
1163
18
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1164
18
                                               Context.Int64x2Ty));
1165
18
          break;
1166
62
        }
1167
18
        case TypeCode::F64: {
1168
18
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1169
18
                                               Context.Int64x2Ty));
1170
18
          break;
1171
62
        }
1172
18
        case TypeCode::V128:
1173
18
        case TypeCode::Ref:
1174
36
        case TypeCode::RefNull: {
1175
36
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1176
36
                                               Context.Int64x2Ty));
1177
36
          break;
1178
18
        }
1179
0
        default:
1180
0
          assumingUnreachable();
1181
171
        }
1182
171
        break;
1183
171
      }
1184
171
      case OpCode::Array__set: {
1185
0
        auto Val = stackPop();
1186
0
        auto Idx = stackPop();
1187
0
        auto Ref = stackPop();
1188
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1189
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1190
0
        Builder.createCall(
1191
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArraySet,
1192
0
                                 LLVM::Type::getFunctionType(
1193
0
                                     Context.VoidTy,
1194
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1195
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1196
0
                                     false)),
1197
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, Arg});
1198
0
        break;
1199
171
      }
1200
52
      case OpCode::Array__len: {
1201
52
        auto Ref = stackPop();
1202
52
        stackPush(Builder.createCall(
1203
52
            Context.getIntrinsic(
1204
52
                Builder, Executable::Intrinsics::kArrayLen,
1205
52
                LLVM::Type::getFunctionType(Context.Int32Ty,
1206
52
                                            {Context.Int64x2Ty}, false)),
1207
52
            {Ref}));
1208
52
        break;
1209
171
      }
1210
0
      case OpCode::Array__fill: {
1211
0
        auto Cnt = stackPop();
1212
0
        auto Val = stackPop();
1213
0
        auto Off = stackPop();
1214
0
        auto Ref = stackPop();
1215
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1216
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1217
0
        Builder.createCall(
1218
0
            Context.getIntrinsic(
1219
0
                Builder, Executable::Intrinsics::kArrayFill,
1220
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1221
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1222
0
                                             Context.Int32Ty, Context.Int32Ty,
1223
0
                                             Context.Int8PtrTy},
1224
0
                                            false)),
1225
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Off, Cnt, Arg});
1226
0
        break;
1227
171
      }
1228
0
      case OpCode::Array__copy: {
1229
0
        auto Cnt = stackPop();
1230
0
        auto SrcOff = stackPop();
1231
0
        auto SrcRef = stackPop();
1232
0
        auto DstOff = stackPop();
1233
0
        auto DstRef = stackPop();
1234
0
        Builder.createCall(
1235
0
            Context.getIntrinsic(
1236
0
                Builder, Executable::Intrinsics::kArrayCopy,
1237
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1238
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1239
0
                                             Context.Int32Ty, Context.Int64x2Ty,
1240
0
                                             Context.Int32Ty, Context.Int32Ty,
1241
0
                                             Context.Int32Ty},
1242
0
                                            false)),
1243
0
            {DstRef, LLContext.getInt32(Instr.getTargetIndex()), DstOff, SrcRef,
1244
0
             LLContext.getInt32(Instr.getSourceIndex()), SrcOff, Cnt});
1245
0
        break;
1246
171
      }
1247
0
      case OpCode::Array__init_data:
1248
0
      case OpCode::Array__init_elem: {
1249
0
        auto Cnt = stackPop();
1250
0
        auto SrcOff = stackPop();
1251
0
        auto DstOff = stackPop();
1252
0
        auto Ref = stackPop();
1253
0
        Builder.createCall(
1254
0
            Context.getIntrinsic(
1255
0
                Builder,
1256
0
                ((Instr.getOpCode() == OpCode::Array__init_data)
1257
0
                     ? Executable::Intrinsics::kArrayInitData
1258
0
                     : Executable::Intrinsics::kArrayInitElem),
1259
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1260
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1261
0
                                             Context.Int32Ty, Context.Int32Ty,
1262
0
                                             Context.Int32Ty, Context.Int32Ty},
1263
0
                                            false)),
1264
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1265
0
             LLContext.getInt32(Instr.getSourceIndex()), DstOff, SrcOff, Cnt});
1266
0
        break;
1267
0
      }
1268
10
      case OpCode::Ref__test:
1269
25
      case OpCode::Ref__test_null: {
1270
25
        auto Ref = stackPop();
1271
25
        std::array<uint8_t, 16> Buf = {0};
1272
25
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1273
25
        auto VType = Builder.createExtractElement(
1274
25
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1275
25
                                  Context.Int64x2Ty),
1276
25
            LLContext.getInt64(0));
1277
25
        stackPush(Builder.createCall(
1278
25
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
1279
25
                                 LLVM::Type::getFunctionType(
1280
25
                                     Context.Int32Ty,
1281
25
                                     {Context.Int64x2Ty, Context.Int64Ty},
1282
25
                                     false)),
1283
25
            {Ref, VType}));
1284
25
        break;
1285
10
      }
1286
12
      case OpCode::Ref__cast:
1287
32
      case OpCode::Ref__cast_null: {
1288
32
        auto Ref = stackPop();
1289
32
        std::array<uint8_t, 16> Buf = {0};
1290
32
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1291
32
        auto VType = Builder.createExtractElement(
1292
32
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1293
32
                                  Context.Int64x2Ty),
1294
32
            LLContext.getInt64(0));
1295
32
        stackPush(Builder.createCall(
1296
32
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefCast,
1297
32
                                 LLVM::Type::getFunctionType(
1298
32
                                     Context.Int64x2Ty,
1299
32
                                     {Context.Int64x2Ty, Context.Int64Ty},
1300
32
                                     false)),
1301
32
            {Ref, VType}));
1302
32
        break;
1303
12
      }
1304
1
      case OpCode::Any__convert_extern: {
1305
1
        std::array<uint8_t, 16> RawRef = {0};
1306
1
        auto Ref = stackPop();
1307
1
        auto PtrVal = Builder.createExtractElement(Ref, LLContext.getInt64(1));
1308
1
        auto IsNullBB =
1309
1
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.null");
1310
1
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1311
1
                                                  "any_conv_extern.not_null");
1312
1
        auto IsExtrefBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1313
1
                                                   "any_conv_extern.is_extref");
1314
1
        auto EndBB =
1315
1
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.end");
1316
1
        auto CondIsNull = Builder.createICmpEQ(PtrVal, LLContext.getInt64(0));
1317
1
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1318
1319
1
        Builder.positionAtEnd(IsNullBB);
1320
1
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullRef);
1321
1
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1322
1
        auto Ret1 = Builder.createBitCast(
1323
1
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1324
1
        Builder.createBr(EndBB);
1325
1326
1
        Builder.positionAtEnd(NotNullBB);
1327
1
        auto Ret2 = Builder.createBitCast(
1328
1
            Builder.createInsertElement(
1329
1
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1330
1
                LLContext.getInt8(0), LLContext.getInt64(1)),
1331
1
            Context.Int64x2Ty);
1332
1
        auto HType = Builder.createExtractElement(
1333
1
            Builder.createBitCast(Ret2, Context.Int8x16Ty),
1334
1
            LLContext.getInt64(3));
1335
1
        auto CondIsExtref = Builder.createOr(
1336
1
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1337
1
                                            TypeCode::ExternRef))),
1338
1
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1339
1
                                            TypeCode::NullExternRef))));
1340
1
        Builder.createCondBr(CondIsExtref, IsExtrefBB, EndBB);
1341
1342
1
        Builder.positionAtEnd(IsExtrefBB);
1343
1
        VT = ValType(TypeCode::Ref, TypeCode::AnyRef);
1344
1
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1345
1
        auto Ret3 = Builder.createInsertElement(
1346
1
            Builder.createBitCast(
1347
1
                LLVM::Value::getConstVector8(LLContext, RawRef),
1348
1
                Context.Int64x2Ty),
1349
1
            PtrVal, LLContext.getInt64(1));
1350
1
        Builder.createBr(EndBB);
1351
1352
1
        Builder.positionAtEnd(EndBB);
1353
1
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1354
1
        Ret.addIncoming(Ret1, IsNullBB);
1355
1
        Ret.addIncoming(Ret2, NotNullBB);
1356
1
        Ret.addIncoming(Ret3, IsExtrefBB);
1357
1
        stackPush(Ret);
1358
1
        break;
1359
12
      }
1360
24
      case OpCode::Extern__convert_any: {
1361
24
        std::array<uint8_t, 16> RawRef = {0};
1362
24
        auto Ref = stackPop();
1363
24
        auto IsNullBB =
1364
24
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.null");
1365
24
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1366
24
                                                  "extern_conv_any.not_null");
1367
24
        auto EndBB =
1368
24
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.end");
1369
24
        auto CondIsNull = Builder.createICmpEQ(
1370
24
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1371
24
            LLContext.getInt64(0));
1372
24
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1373
1374
24
        Builder.positionAtEnd(IsNullBB);
1375
24
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullExternRef);
1376
24
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1377
24
        auto Ret1 = Builder.createBitCast(
1378
24
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1379
24
        Builder.createBr(EndBB);
1380
1381
24
        Builder.positionAtEnd(NotNullBB);
1382
24
        auto Ret2 = Builder.createBitCast(
1383
24
            Builder.createInsertElement(
1384
24
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1385
24
                LLContext.getInt8(1), LLContext.getInt64(1)),
1386
24
            Context.Int64x2Ty);
1387
24
        Builder.createBr(EndBB);
1388
1389
24
        Builder.positionAtEnd(EndBB);
1390
24
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1391
24
        Ret.addIncoming(Ret1, IsNullBB);
1392
24
        Ret.addIncoming(Ret2, NotNullBB);
1393
24
        stackPush(Ret);
1394
24
        break;
1395
12
      }
1396
83
      case OpCode::Ref__i31: {
1397
83
        std::array<uint8_t, 16> RawRef = {0};
1398
83
        auto VT = ValType(TypeCode::Ref, TypeCode::I31Ref);
1399
83
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1400
83
        auto Ref = Builder.createBitCast(
1401
83
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1402
83
        auto Val = Builder.createZExt(
1403
83
            Builder.createOr(
1404
83
                Builder.createAnd(stackPop(), LLContext.getInt32(0x7FFFFFFFU)),
1405
83
                LLContext.getInt32(0x80000000U)),
1406
83
            Context.Int64Ty);
1407
83
        stackPush(Builder.createInsertElement(Ref, Val, LLContext.getInt64(1)));
1408
83
        break;
1409
12
      }
1410
22
      case OpCode::I31__get_s: {
1411
22
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1412
22
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1413
22
        auto Val = Builder.createTrunc(
1414
22
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1415
22
            Context.Int32Ty);
1416
22
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1417
22
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1418
22
            LLContext.getInt32(0)));
1419
22
        Builder.createCondBr(IsNotNull, Next,
1420
22
                             getTrapBB(ErrCode::Value::AccessNullI31));
1421
22
        Builder.positionAtEnd(Next);
1422
22
        Val = Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU));
1423
22
        stackPush(Builder.createOr(
1424
22
            Val, Builder.createShl(
1425
22
                     Builder.createAnd(Val, LLContext.getInt32(0x40000000U)),
1426
22
                     LLContext.getInt32(1))));
1427
22
        break;
1428
12
      }
1429
10
      case OpCode::I31__get_u: {
1430
10
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1431
10
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1432
10
        auto Val = Builder.createTrunc(
1433
10
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1434
10
            Context.Int32Ty);
1435
10
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1436
10
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1437
10
            LLContext.getInt32(0)));
1438
10
        Builder.createCondBr(IsNotNull, Next,
1439
10
                             getTrapBB(ErrCode::Value::AccessNullI31));
1440
10
        Builder.positionAtEnd(Next);
1441
10
        stackPush(Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU)));
1442
10
        break;
1443
12
      }
1444
1445
      // Parametric Instructions
1446
3.58k
      case OpCode::Drop:
1447
3.58k
        stackPop();
1448
3.58k
        break;
1449
703
      case OpCode::Select:
1450
1.18k
      case OpCode::Select_t: {
1451
1.18k
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
1452
1.18k
        auto False = stackPop();
1453
1.18k
        auto True = stackPop();
1454
1.18k
        stackPush(Builder.createSelect(Cond, True, False));
1455
1.18k
        break;
1456
703
      }
1457
1458
      // Variable Instructions
1459
11.2k
      case OpCode::Local__get: {
1460
11.2k
        const auto &L = Local[Instr.getTargetIndex()];
1461
11.2k
        stackPush(Builder.createLoad(L.first, L.second));
1462
11.2k
        break;
1463
703
      }
1464
4.38k
      case OpCode::Local__set:
1465
4.38k
        Builder.createStore(stackPop(), Local[Instr.getTargetIndex()].second);
1466
4.38k
        break;
1467
830
      case OpCode::Local__tee:
1468
830
        Builder.createStore(Stack.back(), Local[Instr.getTargetIndex()].second);
1469
830
        break;
1470
319
      case OpCode::Global__get: {
1471
319
        const auto G =
1472
319
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex());
1473
319
        stackPush(Builder.createLoad(G.first, G.second));
1474
319
        break;
1475
703
      }
1476
59
      case OpCode::Global__set:
1477
59
        Builder.createStore(
1478
59
            stackPop(),
1479
59
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex()).second);
1480
59
        break;
1481
1482
      // Table Instructions
1483
32
      case OpCode::Table__get: {
1484
32
        auto Idx = stackPop();
1485
32
        stackPush(Builder.createCall(
1486
32
            Context.getIntrinsic(
1487
32
                Builder, Executable::Intrinsics::kTableGet,
1488
32
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1489
32
                                            {Context.Int32Ty, Context.Int32Ty},
1490
32
                                            false)),
1491
32
            {LLContext.getInt32(Instr.getTargetIndex()), Idx}));
1492
32
        break;
1493
703
      }
1494
26
      case OpCode::Table__set: {
1495
26
        auto Ref = stackPop();
1496
26
        auto Idx = stackPop();
1497
26
        Builder.createCall(
1498
26
            Context.getIntrinsic(
1499
26
                Builder, Executable::Intrinsics::kTableSet,
1500
26
                LLVM::Type::getFunctionType(
1501
26
                    Context.Int64Ty,
1502
26
                    {Context.Int32Ty, Context.Int32Ty, Context.Int64x2Ty},
1503
26
                    false)),
1504
26
            {LLContext.getInt32(Instr.getTargetIndex()), Idx, Ref});
1505
26
        break;
1506
703
      }
1507
25
      case OpCode::Table__init: {
1508
25
        auto Len = stackPop();
1509
25
        auto Src = stackPop();
1510
25
        auto Dst = stackPop();
1511
25
        Builder.createCall(
1512
25
            Context.getIntrinsic(
1513
25
                Builder, Executable::Intrinsics::kTableInit,
1514
25
                LLVM::Type::getFunctionType(Context.VoidTy,
1515
25
                                            {Context.Int32Ty, Context.Int32Ty,
1516
25
                                             Context.Int32Ty, Context.Int32Ty,
1517
25
                                             Context.Int32Ty},
1518
25
                                            false)),
1519
25
            {LLContext.getInt32(Instr.getTargetIndex()),
1520
25
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1521
25
        break;
1522
703
      }
1523
33
      case OpCode::Elem__drop: {
1524
33
        Builder.createCall(
1525
33
            Context.getIntrinsic(Builder, Executable::Intrinsics::kElemDrop,
1526
33
                                 LLVM::Type::getFunctionType(
1527
33
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1528
33
            {LLContext.getInt32(Instr.getTargetIndex())});
1529
33
        break;
1530
703
      }
1531
16
      case OpCode::Table__copy: {
1532
16
        auto Len = stackPop();
1533
16
        auto Src = stackPop();
1534
16
        auto Dst = stackPop();
1535
16
        Builder.createCall(
1536
16
            Context.getIntrinsic(
1537
16
                Builder, Executable::Intrinsics::kTableCopy,
1538
16
                LLVM::Type::getFunctionType(Context.VoidTy,
1539
16
                                            {Context.Int32Ty, Context.Int32Ty,
1540
16
                                             Context.Int32Ty, Context.Int32Ty,
1541
16
                                             Context.Int32Ty},
1542
16
                                            false)),
1543
16
            {LLContext.getInt32(Instr.getTargetIndex()),
1544
16
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1545
16
        break;
1546
703
      }
1547
17
      case OpCode::Table__grow: {
1548
17
        auto NewSize = stackPop();
1549
17
        auto Val = stackPop();
1550
17
        stackPush(Builder.createCall(
1551
17
            Context.getIntrinsic(
1552
17
                Builder, Executable::Intrinsics::kTableGrow,
1553
17
                LLVM::Type::getFunctionType(
1554
17
                    Context.Int32Ty,
1555
17
                    {Context.Int32Ty, Context.Int64x2Ty, Context.Int32Ty},
1556
17
                    false)),
1557
17
            {LLContext.getInt32(Instr.getTargetIndex()), Val, NewSize}));
1558
17
        break;
1559
703
      }
1560
21
      case OpCode::Table__size: {
1561
21
        stackPush(Builder.createCall(
1562
21
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableSize,
1563
21
                                 LLVM::Type::getFunctionType(Context.Int32Ty,
1564
21
                                                             {Context.Int32Ty},
1565
21
                                                             false)),
1566
21
            {LLContext.getInt32(Instr.getTargetIndex())}));
1567
21
        break;
1568
703
      }
1569
3
      case OpCode::Table__fill: {
1570
3
        auto Len = stackPop();
1571
3
        auto Val = stackPop();
1572
3
        auto Off = stackPop();
1573
3
        Builder.createCall(
1574
3
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableFill,
1575
3
                                 LLVM::Type::getFunctionType(
1576
3
                                     Context.Int32Ty,
1577
3
                                     {Context.Int32Ty, Context.Int32Ty,
1578
3
                                      Context.Int64x2Ty, Context.Int32Ty},
1579
3
                                     false)),
1580
3
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1581
3
        break;
1582
703
      }
1583
1584
      // Memory Instructions
1585
1.40k
      case OpCode::I32__load:
1586
1.40k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1587
1.40k
                      Instr.getMemoryAlign(), Context.Int32Ty);
1588
1.40k
        break;
1589
3.74k
      case OpCode::I64__load:
1590
3.74k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1591
3.74k
                      Instr.getMemoryAlign(), Context.Int64Ty);
1592
3.74k
        break;
1593
114
      case OpCode::F32__load:
1594
114
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1595
114
                      Instr.getMemoryAlign(), Context.FloatTy);
1596
114
        break;
1597
263
      case OpCode::F64__load:
1598
263
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1599
263
                      Instr.getMemoryAlign(), Context.DoubleTy);
1600
263
        break;
1601
671
      case OpCode::I32__load8_s:
1602
671
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1603
671
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1604
671
                      true);
1605
671
        break;
1606
231
      case OpCode::I32__load8_u:
1607
231
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1608
231
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1609
231
                      false);
1610
231
        break;
1611
381
      case OpCode::I32__load16_s:
1612
381
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1613
381
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1614
381
                      true);
1615
381
        break;
1616
1.65k
      case OpCode::I32__load16_u:
1617
1.65k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1618
1.65k
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1619
1.65k
                      false);
1620
1.65k
        break;
1621
718
      case OpCode::I64__load8_s:
1622
718
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1623
718
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1624
718
                      true);
1625
718
        break;
1626
471
      case OpCode::I64__load8_u:
1627
471
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1628
471
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1629
471
                      false);
1630
471
        break;
1631
467
      case OpCode::I64__load16_s:
1632
467
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1633
467
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1634
467
                      true);
1635
467
        break;
1636
671
      case OpCode::I64__load16_u:
1637
671
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1638
671
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1639
671
                      false);
1640
671
        break;
1641
468
      case OpCode::I64__load32_s:
1642
468
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1643
468
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1644
468
                      true);
1645
468
        break;
1646
545
      case OpCode::I64__load32_u:
1647
545
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1648
545
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1649
545
                      false);
1650
545
        break;
1651
444
      case OpCode::I32__store:
1652
444
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1653
444
                       Instr.getMemoryAlign(), Context.Int32Ty);
1654
444
        break;
1655
1.49k
      case OpCode::I64__store:
1656
1.49k
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1657
1.49k
                       Instr.getMemoryAlign(), Context.Int64Ty);
1658
1.49k
        break;
1659
67
      case OpCode::F32__store:
1660
67
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1661
67
                       Instr.getMemoryAlign(), Context.FloatTy);
1662
67
        break;
1663
46
      case OpCode::F64__store:
1664
46
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1665
46
                       Instr.getMemoryAlign(), Context.DoubleTy);
1666
46
        break;
1667
377
      case OpCode::I32__store8:
1668
397
      case OpCode::I64__store8:
1669
397
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1670
397
                       Instr.getMemoryAlign(), Context.Int8Ty, true);
1671
397
        break;
1672
222
      case OpCode::I32__store16:
1673
292
      case OpCode::I64__store16:
1674
292
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1675
292
                       Instr.getMemoryAlign(), Context.Int16Ty, true);
1676
292
        break;
1677
36
      case OpCode::I64__store32:
1678
36
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1679
36
                       Instr.getMemoryAlign(), Context.Int32Ty, true);
1680
36
        break;
1681
800
      case OpCode::Memory__size:
1682
800
        stackPush(Builder.createCall(
1683
800
            Context.getIntrinsic(Builder, Executable::Intrinsics::kMemSize,
1684
800
                                 LLVM::Type::getFunctionType(Context.Int32Ty,
1685
800
                                                             {Context.Int32Ty},
1686
800
                                                             false)),
1687
800
            {LLContext.getInt32(Instr.getTargetIndex())}));
1688
800
        break;
1689
659
      case OpCode::Memory__grow: {
1690
659
        auto Diff = stackPop();
1691
659
        stackPush(Builder.createCall(
1692
659
            Context.getIntrinsic(
1693
659
                Builder, Executable::Intrinsics::kMemGrow,
1694
659
                LLVM::Type::getFunctionType(Context.Int32Ty,
1695
659
                                            {Context.Int32Ty, Context.Int32Ty},
1696
659
                                            false)),
1697
659
            {LLContext.getInt32(Instr.getTargetIndex()), Diff}));
1698
659
        break;
1699
222
      }
1700
23
      case OpCode::Memory__init: {
1701
23
        auto Len = stackPop();
1702
23
        auto Src = stackPop();
1703
23
        auto Dst = stackPop();
1704
23
        Builder.createCall(
1705
23
            Context.getIntrinsic(
1706
23
                Builder, Executable::Intrinsics::kMemInit,
1707
23
                LLVM::Type::getFunctionType(Context.VoidTy,
1708
23
                                            {Context.Int32Ty, Context.Int32Ty,
1709
23
                                             Context.Int32Ty, Context.Int32Ty,
1710
23
                                             Context.Int32Ty},
1711
23
                                            false)),
1712
23
            {LLContext.getInt32(Instr.getTargetIndex()),
1713
23
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1714
23
        break;
1715
222
      }
1716
22
      case OpCode::Data__drop: {
1717
22
        Builder.createCall(
1718
22
            Context.getIntrinsic(Builder, Executable::Intrinsics::kDataDrop,
1719
22
                                 LLVM::Type::getFunctionType(
1720
22
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1721
22
            {LLContext.getInt32(Instr.getTargetIndex())});
1722
22
        break;
1723
222
      }
1724
266
      case OpCode::Memory__copy: {
1725
266
        auto Len = stackPop();
1726
266
        auto Src = stackPop();
1727
266
        auto Dst = stackPop();
1728
266
        Builder.createCall(
1729
266
            Context.getIntrinsic(
1730
266
                Builder, Executable::Intrinsics::kMemCopy,
1731
266
                LLVM::Type::getFunctionType(Context.VoidTy,
1732
266
                                            {Context.Int32Ty, Context.Int32Ty,
1733
266
                                             Context.Int32Ty, Context.Int32Ty,
1734
266
                                             Context.Int32Ty},
1735
266
                                            false)),
1736
266
            {LLContext.getInt32(Instr.getTargetIndex()),
1737
266
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1738
266
        break;
1739
222
      }
1740
566
      case OpCode::Memory__fill: {
1741
566
        auto Len = stackPop();
1742
566
        auto Val = Builder.createTrunc(stackPop(), Context.Int8Ty);
1743
566
        auto Off = stackPop();
1744
566
        Builder.createCall(
1745
566
            Context.getIntrinsic(
1746
566
                Builder, Executable::Intrinsics::kMemFill,
1747
566
                LLVM::Type::getFunctionType(Context.VoidTy,
1748
566
                                            {Context.Int32Ty, Context.Int32Ty,
1749
566
                                             Context.Int8Ty, Context.Int32Ty},
1750
566
                                            false)),
1751
566
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1752
566
        break;
1753
222
      }
1754
1755
      // Const Numeric Instructions
1756
597k
      case OpCode::I32__const:
1757
597k
        stackPush(LLContext.getInt32(Instr.getNum().get<uint32_t>()));
1758
597k
        break;
1759
83.5k
      case OpCode::I64__const:
1760
83.5k
        stackPush(LLContext.getInt64(Instr.getNum().get<uint64_t>()));
1761
83.5k
        break;
1762
14.5k
      case OpCode::F32__const:
1763
14.5k
        stackPush(LLContext.getFloat(Instr.getNum().get<float>()));
1764
14.5k
        break;
1765
6.78k
      case OpCode::F64__const:
1766
6.78k
        stackPush(LLContext.getDouble(Instr.getNum().get<double>()));
1767
6.78k
        break;
1768
1769
      // Unary Numeric Instructions
1770
7.94k
      case OpCode::I32__eqz:
1771
7.94k
        stackPush(Builder.createZExt(
1772
7.94k
            Builder.createICmpEQ(stackPop(), LLContext.getInt32(0)),
1773
7.94k
            Context.Int32Ty));
1774
7.94k
        break;
1775
1.23k
      case OpCode::I64__eqz:
1776
1.23k
        stackPush(Builder.createZExt(
1777
1.23k
            Builder.createICmpEQ(stackPop(), LLContext.getInt64(0)),
1778
1.23k
            Context.Int32Ty));
1779
1.23k
        break;
1780
2.29k
      case OpCode::I32__clz:
1781
2.29k
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1782
2.29k
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int32Ty},
1783
2.29k
                                          {stackPop(), LLContext.getFalse()}));
1784
2.29k
        break;
1785
298
      case OpCode::I64__clz:
1786
298
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1787
298
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int64Ty},
1788
298
                                          {stackPop(), LLContext.getFalse()}));
1789
298
        break;
1790
1.78k
      case OpCode::I32__ctz:
1791
1.78k
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1792
1.78k
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int32Ty},
1793
1.78k
                                          {stackPop(), LLContext.getFalse()}));
1794
1.78k
        break;
1795
418
      case OpCode::I64__ctz:
1796
418
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1797
418
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int64Ty},
1798
418
                                          {stackPop(), LLContext.getFalse()}));
1799
418
        break;
1800
17.7k
      case OpCode::I32__popcnt:
1801
19.6k
      case OpCode::I64__popcnt:
1802
19.6k
        assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
1803
19.6k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, stackPop()));
1804
19.6k
        break;
1805
835
      case OpCode::F32__abs:
1806
1.88k
      case OpCode::F64__abs:
1807
1.88k
        assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
1808
1.88k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Fabs, stackPop()));
1809
1.88k
        break;
1810
1.06k
      case OpCode::F32__neg:
1811
1.85k
      case OpCode::F64__neg:
1812
1.85k
        stackPush(Builder.createFNeg(stackPop()));
1813
1.85k
        break;
1814
1.95k
      case OpCode::F32__ceil:
1815
4.13k
      case OpCode::F64__ceil:
1816
4.13k
        assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
1817
4.13k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ceil, stackPop()));
1818
4.13k
        break;
1819
881
      case OpCode::F32__floor:
1820
1.26k
      case OpCode::F64__floor:
1821
1.26k
        assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
1822
1.26k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Floor, stackPop()));
1823
1.26k
        break;
1824
554
      case OpCode::F32__trunc:
1825
856
      case OpCode::F64__trunc:
1826
856
        assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
1827
856
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Trunc, stackPop()));
1828
856
        break;
1829
829
      case OpCode::F32__nearest:
1830
1.21k
      case OpCode::F64__nearest: {
1831
1.21k
        const bool IsFloat = Instr.getOpCode() == OpCode::F32__nearest;
1832
1.21k
        LLVM::Value Value = stackPop();
1833
1834
1.21k
#if LLVM_VERSION_MAJOR >= 12 && !defined(__s390x__)
1835
1.21k
        assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
1836
1.21k
        if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
1837
1.21k
          stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, Value));
1838
1.21k
          break;
1839
1.21k
        }
1840
0
#endif
1841
1842
        // The VectorSize is only used when SSE4_1 or NEON is supported.
1843
0
        [[maybe_unused]] const uint32_t VectorSize = IsFloat ? 4 : 2;
1844
0
#if defined(__x86_64__)
1845
0
        if (Context.SupportSSE4_1) {
1846
0
          auto Zero = LLContext.getInt64(0);
1847
0
          auto VectorTy =
1848
0
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1849
0
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1850
0
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1851
0
          auto ID = IsFloat ? LLVM::Core::X86SSE41RoundSs
1852
0
                            : LLVM::Core::X86SSE41RoundSd;
1853
0
          assuming(ID != LLVM::Core::NotIntrinsic);
1854
0
          Ret = Builder.createIntrinsic(ID, {},
1855
0
                                        {Ret, Ret, LLContext.getInt32(8)});
1856
0
          Ret = Builder.createExtractElement(Ret, Zero);
1857
0
          stackPush(Ret);
1858
0
          break;
1859
0
        }
1860
0
#endif
1861
1862
#if defined(__aarch64__)
1863
        if (Context.SupportNEON &&
1864
            LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
1865
          auto Zero = LLContext.getInt64(0);
1866
          auto VectorTy =
1867
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1868
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1869
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1870
          Ret =
1871
              Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, Ret);
1872
          Ret = Builder.createExtractElement(Ret, Zero);
1873
          stackPush(Ret);
1874
          break;
1875
        }
1876
#endif
1877
1878
        // Fallback case.
1879
        // If the SSE4.1 is not supported on the x86_64 platform or
1880
        // the NEON is not supported on the aarch64 platform,
1881
        // then fallback to this.
1882
0
        assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
1883
0
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, Value));
1884
0
        break;
1885
0
      }
1886
401
      case OpCode::F32__sqrt:
1887
1.61k
      case OpCode::F64__sqrt:
1888
1.61k
        assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
1889
1.61k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, stackPop()));
1890
1.61k
        break;
1891
339
      case OpCode::I32__wrap_i64:
1892
339
        stackPush(Builder.createTrunc(stackPop(), Context.Int32Ty));
1893
339
        break;
1894
1.34k
      case OpCode::I32__trunc_f32_s:
1895
1.34k
        compileSignedTrunc(Context.Int32Ty);
1896
1.34k
        break;
1897
308
      case OpCode::I32__trunc_f64_s:
1898
308
        compileSignedTrunc(Context.Int32Ty);
1899
308
        break;
1900
200
      case OpCode::I32__trunc_f32_u:
1901
200
        compileUnsignedTrunc(Context.Int32Ty);
1902
200
        break;
1903
1.15k
      case OpCode::I32__trunc_f64_u:
1904
1.15k
        compileUnsignedTrunc(Context.Int32Ty);
1905
1.15k
        break;
1906
2.34k
      case OpCode::I64__extend_i32_s:
1907
2.34k
        stackPush(Builder.createSExt(stackPop(), Context.Int64Ty));
1908
2.34k
        break;
1909
398
      case OpCode::I64__extend_i32_u:
1910
398
        stackPush(Builder.createZExt(stackPop(), Context.Int64Ty));
1911
398
        break;
1912
57
      case OpCode::I64__trunc_f32_s:
1913
57
        compileSignedTrunc(Context.Int64Ty);
1914
57
        break;
1915
399
      case OpCode::I64__trunc_f64_s:
1916
399
        compileSignedTrunc(Context.Int64Ty);
1917
399
        break;
1918
959
      case OpCode::I64__trunc_f32_u:
1919
959
        compileUnsignedTrunc(Context.Int64Ty);
1920
959
        break;
1921
1.29k
      case OpCode::I64__trunc_f64_u:
1922
1.29k
        compileUnsignedTrunc(Context.Int64Ty);
1923
1.29k
        break;
1924
1.69k
      case OpCode::F32__convert_i32_s:
1925
2.08k
      case OpCode::F32__convert_i64_s:
1926
2.08k
        stackPush(Builder.createSIToFP(stackPop(), Context.FloatTy));
1927
2.08k
        break;
1928
652
      case OpCode::F32__convert_i32_u:
1929
1.77k
      case OpCode::F32__convert_i64_u:
1930
1.77k
        stackPush(Builder.createUIToFP(stackPop(), Context.FloatTy));
1931
1.77k
        break;
1932
1.63k
      case OpCode::F64__convert_i32_s:
1933
5.76k
      case OpCode::F64__convert_i64_s:
1934
5.76k
        stackPush(Builder.createSIToFP(stackPop(), Context.DoubleTy));
1935
5.76k
        break;
1936
1.68k
      case OpCode::F64__convert_i32_u:
1937
1.87k
      case OpCode::F64__convert_i64_u:
1938
1.87k
        stackPush(Builder.createUIToFP(stackPop(), Context.DoubleTy));
1939
1.87k
        break;
1940
221
      case OpCode::F32__demote_f64:
1941
221
        stackPush(Builder.createFPTrunc(stackPop(), Context.FloatTy));
1942
221
        break;
1943
104
      case OpCode::F64__promote_f32:
1944
104
        stackPush(Builder.createFPExt(stackPop(), Context.DoubleTy));
1945
104
        break;
1946
650
      case OpCode::I32__reinterpret_f32:
1947
650
        stackPush(Builder.createBitCast(stackPop(), Context.Int32Ty));
1948
650
        break;
1949
674
      case OpCode::I64__reinterpret_f64:
1950
674
        stackPush(Builder.createBitCast(stackPop(), Context.Int64Ty));
1951
674
        break;
1952
4.39k
      case OpCode::F32__reinterpret_i32:
1953
4.39k
        stackPush(Builder.createBitCast(stackPop(), Context.FloatTy));
1954
4.39k
        break;
1955
1.15k
      case OpCode::F64__reinterpret_i64:
1956
1.15k
        stackPush(Builder.createBitCast(stackPop(), Context.DoubleTy));
1957
1.15k
        break;
1958
2.42k
      case OpCode::I32__extend8_s:
1959
2.42k
        stackPush(Builder.createSExt(
1960
2.42k
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int32Ty));
1961
2.42k
        break;
1962
3.13k
      case OpCode::I32__extend16_s:
1963
3.13k
        stackPush(Builder.createSExt(
1964
3.13k
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int32Ty));
1965
3.13k
        break;
1966
366
      case OpCode::I64__extend8_s:
1967
366
        stackPush(Builder.createSExt(
1968
366
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int64Ty));
1969
366
        break;
1970
625
      case OpCode::I64__extend16_s:
1971
625
        stackPush(Builder.createSExt(
1972
625
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int64Ty));
1973
625
        break;
1974
745
      case OpCode::I64__extend32_s:
1975
745
        stackPush(Builder.createSExt(
1976
745
            Builder.createTrunc(stackPop(), Context.Int32Ty), Context.Int64Ty));
1977
745
        break;
1978
1979
      // Binary Numeric Instructions
1980
1.20k
      case OpCode::I32__eq:
1981
1.43k
      case OpCode::I64__eq: {
1982
1.43k
        LLVM::Value RHS = stackPop();
1983
1.43k
        LLVM::Value LHS = stackPop();
1984
1.43k
        stackPush(Builder.createZExt(Builder.createICmpEQ(LHS, RHS),
1985
1.43k
                                     Context.Int32Ty));
1986
1.43k
        break;
1987
1.20k
      }
1988
660
      case OpCode::I32__ne:
1989
681
      case OpCode::I64__ne: {
1990
681
        LLVM::Value RHS = stackPop();
1991
681
        LLVM::Value LHS = stackPop();
1992
681
        stackPush(Builder.createZExt(Builder.createICmpNE(LHS, RHS),
1993
681
                                     Context.Int32Ty));
1994
681
        break;
1995
660
      }
1996
4.34k
      case OpCode::I32__lt_s:
1997
4.96k
      case OpCode::I64__lt_s: {
1998
4.96k
        LLVM::Value RHS = stackPop();
1999
4.96k
        LLVM::Value LHS = stackPop();
2000
4.96k
        stackPush(Builder.createZExt(Builder.createICmpSLT(LHS, RHS),
2001
4.96k
                                     Context.Int32Ty));
2002
4.96k
        break;
2003
4.34k
      }
2004
6.25k
      case OpCode::I32__lt_u:
2005
6.60k
      case OpCode::I64__lt_u: {
2006
6.60k
        LLVM::Value RHS = stackPop();
2007
6.60k
        LLVM::Value LHS = stackPop();
2008
6.60k
        stackPush(Builder.createZExt(Builder.createICmpULT(LHS, RHS),
2009
6.60k
                                     Context.Int32Ty));
2010
6.60k
        break;
2011
6.25k
      }
2012
1.13k
      case OpCode::I32__gt_s:
2013
1.57k
      case OpCode::I64__gt_s: {
2014
1.57k
        LLVM::Value RHS = stackPop();
2015
1.57k
        LLVM::Value LHS = stackPop();
2016
1.57k
        stackPush(Builder.createZExt(Builder.createICmpSGT(LHS, RHS),
2017
1.57k
                                     Context.Int32Ty));
2018
1.57k
        break;
2019
1.13k
      }
2020
7.40k
      case OpCode::I32__gt_u:
2021
7.58k
      case OpCode::I64__gt_u: {
2022
7.58k
        LLVM::Value RHS = stackPop();
2023
7.58k
        LLVM::Value LHS = stackPop();
2024
7.58k
        stackPush(Builder.createZExt(Builder.createICmpUGT(LHS, RHS),
2025
7.58k
                                     Context.Int32Ty));
2026
7.58k
        break;
2027
7.40k
      }
2028
2.25k
      case OpCode::I32__le_s:
2029
3.07k
      case OpCode::I64__le_s: {
2030
3.07k
        LLVM::Value RHS = stackPop();
2031
3.07k
        LLVM::Value LHS = stackPop();
2032
3.07k
        stackPush(Builder.createZExt(Builder.createICmpSLE(LHS, RHS),
2033
3.07k
                                     Context.Int32Ty));
2034
3.07k
        break;
2035
2.25k
      }
2036
780
      case OpCode::I32__le_u:
2037
2.07k
      case OpCode::I64__le_u: {
2038
2.07k
        LLVM::Value RHS = stackPop();
2039
2.07k
        LLVM::Value LHS = stackPop();
2040
2.07k
        stackPush(Builder.createZExt(Builder.createICmpULE(LHS, RHS),
2041
2.07k
                                     Context.Int32Ty));
2042
2.07k
        break;
2043
780
      }
2044
1.24k
      case OpCode::I32__ge_s:
2045
1.27k
      case OpCode::I64__ge_s: {
2046
1.27k
        LLVM::Value RHS = stackPop();
2047
1.27k
        LLVM::Value LHS = stackPop();
2048
1.27k
        stackPush(Builder.createZExt(Builder.createICmpSGE(LHS, RHS),
2049
1.27k
                                     Context.Int32Ty));
2050
1.27k
        break;
2051
1.24k
      }
2052
2.74k
      case OpCode::I32__ge_u:
2053
3.38k
      case OpCode::I64__ge_u: {
2054
3.38k
        LLVM::Value RHS = stackPop();
2055
3.38k
        LLVM::Value LHS = stackPop();
2056
3.38k
        stackPush(Builder.createZExt(Builder.createICmpUGE(LHS, RHS),
2057
3.38k
                                     Context.Int32Ty));
2058
3.38k
        break;
2059
2.74k
      }
2060
159
      case OpCode::F32__eq:
2061
259
      case OpCode::F64__eq: {
2062
259
        LLVM::Value RHS = stackPop();
2063
259
        LLVM::Value LHS = stackPop();
2064
259
        stackPush(Builder.createZExt(Builder.createFCmpOEQ(LHS, RHS),
2065
259
                                     Context.Int32Ty));
2066
259
        break;
2067
159
      }
2068
89
      case OpCode::F32__ne:
2069
117
      case OpCode::F64__ne: {
2070
117
        LLVM::Value RHS = stackPop();
2071
117
        LLVM::Value LHS = stackPop();
2072
117
        stackPush(Builder.createZExt(Builder.createFCmpUNE(LHS, RHS),
2073
117
                                     Context.Int32Ty));
2074
117
        break;
2075
89
      }
2076
183
      case OpCode::F32__lt:
2077
310
      case OpCode::F64__lt: {
2078
310
        LLVM::Value RHS = stackPop();
2079
310
        LLVM::Value LHS = stackPop();
2080
310
        stackPush(Builder.createZExt(Builder.createFCmpOLT(LHS, RHS),
2081
310
                                     Context.Int32Ty));
2082
310
        break;
2083
183
      }
2084
152
      case OpCode::F32__gt:
2085
211
      case OpCode::F64__gt: {
2086
211
        LLVM::Value RHS = stackPop();
2087
211
        LLVM::Value LHS = stackPop();
2088
211
        stackPush(Builder.createZExt(Builder.createFCmpOGT(LHS, RHS),
2089
211
                                     Context.Int32Ty));
2090
211
        break;
2091
152
      }
2092
76
      case OpCode::F32__le:
2093
179
      case OpCode::F64__le: {
2094
179
        LLVM::Value RHS = stackPop();
2095
179
        LLVM::Value LHS = stackPop();
2096
179
        stackPush(Builder.createZExt(Builder.createFCmpOLE(LHS, RHS),
2097
179
                                     Context.Int32Ty));
2098
179
        break;
2099
76
      }
2100
238
      case OpCode::F32__ge:
2101
267
      case OpCode::F64__ge: {
2102
267
        LLVM::Value RHS = stackPop();
2103
267
        LLVM::Value LHS = stackPop();
2104
267
        stackPush(Builder.createZExt(Builder.createFCmpOGE(LHS, RHS),
2105
267
                                     Context.Int32Ty));
2106
267
        break;
2107
238
      }
2108
750
      case OpCode::I32__add:
2109
1.23k
      case OpCode::I64__add: {
2110
1.23k
        LLVM::Value RHS = stackPop();
2111
1.23k
        LLVM::Value LHS = stackPop();
2112
1.23k
        stackPush(Builder.createAdd(LHS, RHS));
2113
1.23k
        break;
2114
750
      }
2115
1.97k
      case OpCode::I32__sub:
2116
2.40k
      case OpCode::I64__sub: {
2117
2.40k
        LLVM::Value RHS = stackPop();
2118
2.40k
        LLVM::Value LHS = stackPop();
2119
2120
2.40k
        stackPush(Builder.createSub(LHS, RHS));
2121
2.40k
        break;
2122
1.97k
      }
2123
610
      case OpCode::I32__mul:
2124
1.23k
      case OpCode::I64__mul: {
2125
1.23k
        LLVM::Value RHS = stackPop();
2126
1.23k
        LLVM::Value LHS = stackPop();
2127
1.23k
        stackPush(Builder.createMul(LHS, RHS));
2128
1.23k
        break;
2129
610
      }
2130
1.27k
      case OpCode::I32__div_s:
2131
1.73k
      case OpCode::I64__div_s: {
2132
1.73k
        LLVM::Value RHS = stackPop();
2133
1.73k
        LLVM::Value LHS = stackPop();
2134
1.73k
        if constexpr (kForceDivCheck) {
2135
1.73k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_s;
2136
1.73k
          LLVM::Value IntZero =
2137
1.73k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2138
1.73k
          LLVM::Value IntMinusOne =
2139
1.73k
              Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2140
1.73k
                   : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2141
1.73k
          LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2142
1.27k
                                          std::numeric_limits<int32_t>::min()))
2143
1.73k
                                    : LLContext.getInt64(static_cast<uint64_t>(
2144
457
                                          std::numeric_limits<int64_t>::min()));
2145
2146
1.73k
          auto NoZeroBB =
2147
1.73k
              LLVM::BasicBlock::create(LLContext, F.Fn, "div.nozero");
2148
1.73k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2149
2150
1.73k
          auto IsNotZero =
2151
1.73k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2152
1.73k
          Builder.createCondBr(IsNotZero, NoZeroBB,
2153
1.73k
                               getTrapBB(ErrCode::Value::DivideByZero));
2154
2155
1.73k
          Builder.positionAtEnd(NoZeroBB);
2156
1.73k
          auto NotOverflow = Builder.createLikely(
2157
1.73k
              Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2158
1.73k
                               Builder.createICmpNE(RHS, IntMinusOne)));
2159
1.73k
          Builder.createCondBr(NotOverflow, OkBB,
2160
1.73k
                               getTrapBB(ErrCode::Value::IntegerOverflow));
2161
2162
1.73k
          Builder.positionAtEnd(OkBB);
2163
1.73k
        }
2164
1.73k
        stackPush(Builder.createSDiv(LHS, RHS));
2165
1.73k
        break;
2166
1.27k
      }
2167
3.34k
      case OpCode::I32__div_u:
2168
3.65k
      case OpCode::I64__div_u: {
2169
3.65k
        LLVM::Value RHS = stackPop();
2170
3.65k
        LLVM::Value LHS = stackPop();
2171
3.65k
        if constexpr (kForceDivCheck) {
2172
3.65k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_u;
2173
3.65k
          LLVM::Value IntZero =
2174
3.65k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2175
3.65k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2176
2177
3.65k
          auto IsNotZero =
2178
3.65k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2179
3.65k
          Builder.createCondBr(IsNotZero, OkBB,
2180
3.65k
                               getTrapBB(ErrCode::Value::DivideByZero));
2181
3.65k
          Builder.positionAtEnd(OkBB);
2182
3.65k
        }
2183
3.65k
        stackPush(Builder.createUDiv(LHS, RHS));
2184
3.65k
        break;
2185
3.34k
      }
2186
1.13k
      case OpCode::I32__rem_s:
2187
1.60k
      case OpCode::I64__rem_s: {
2188
1.60k
        LLVM::Value RHS = stackPop();
2189
1.60k
        LLVM::Value LHS = stackPop();
2190
        // handle INT32_MIN % -1
2191
1.60k
        const bool Is32 = Instr.getOpCode() == OpCode::I32__rem_s;
2192
1.60k
        LLVM::Value IntMinusOne =
2193
1.60k
            Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2194
1.60k
                 : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2195
1.60k
        LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2196
1.13k
                                        std::numeric_limits<int32_t>::min()))
2197
1.60k
                                  : LLContext.getInt64(static_cast<uint64_t>(
2198
463
                                        std::numeric_limits<int64_t>::min()));
2199
1.60k
        LLVM::Value IntZero =
2200
1.60k
            Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2201
2202
1.60k
        auto NoOverflowBB =
2203
1.60k
            LLVM::BasicBlock::create(LLContext, F.Fn, "no.overflow");
2204
1.60k
        auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "end.overflow");
2205
2206
1.60k
        if constexpr (kForceDivCheck) {
2207
1.60k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2208
2209
1.60k
          auto IsNotZero =
2210
1.60k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2211
1.60k
          Builder.createCondBr(IsNotZero, OkBB,
2212
1.60k
                               getTrapBB(ErrCode::Value::DivideByZero));
2213
1.60k
          Builder.positionAtEnd(OkBB);
2214
1.60k
        }
2215
2216
1.60k
        auto CurrBB = Builder.getInsertBlock();
2217
2218
1.60k
        auto NotOverflow = Builder.createLikely(
2219
1.60k
            Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2220
1.60k
                             Builder.createICmpNE(RHS, IntMinusOne)));
2221
1.60k
        Builder.createCondBr(NotOverflow, NoOverflowBB, EndBB);
2222
2223
1.60k
        Builder.positionAtEnd(NoOverflowBB);
2224
1.60k
        auto Ret1 = Builder.createSRem(LHS, RHS);
2225
1.60k
        Builder.createBr(EndBB);
2226
2227
1.60k
        Builder.positionAtEnd(EndBB);
2228
1.60k
        auto Ret = Builder.createPHI(Ret1.getType());
2229
1.60k
        Ret.addIncoming(Ret1, NoOverflowBB);
2230
1.60k
        Ret.addIncoming(IntZero, CurrBB);
2231
2232
1.60k
        stackPush(Ret);
2233
1.60k
        break;
2234
1.13k
      }
2235
1.43k
      case OpCode::I32__rem_u:
2236
2.00k
      case OpCode::I64__rem_u: {
2237
2.00k
        LLVM::Value RHS = stackPop();
2238
2.00k
        LLVM::Value LHS = stackPop();
2239
2.00k
        if constexpr (kForceDivCheck) {
2240
2.00k
          LLVM::Value IntZero = Instr.getOpCode() == OpCode::I32__rem_u
2241
2.00k
                                    ? LLContext.getInt32(0)
2242
2.00k
                                    : LLContext.getInt64(0);
2243
2.00k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2244
2245
2.00k
          auto IsNotZero =
2246
2.00k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2247
2.00k
          Builder.createCondBr(IsNotZero, OkBB,
2248
2.00k
                               getTrapBB(ErrCode::Value::DivideByZero));
2249
2.00k
          Builder.positionAtEnd(OkBB);
2250
2.00k
        }
2251
2.00k
        stackPush(Builder.createURem(LHS, RHS));
2252
2.00k
        break;
2253
1.43k
      }
2254
726
      case OpCode::I32__and:
2255
2.07k
      case OpCode::I64__and: {
2256
2.07k
        LLVM::Value RHS = stackPop();
2257
2.07k
        LLVM::Value LHS = stackPop();
2258
2.07k
        stackPush(Builder.createAnd(LHS, RHS));
2259
2.07k
        break;
2260
726
      }
2261
1.30k
      case OpCode::I32__or:
2262
1.65k
      case OpCode::I64__or: {
2263
1.65k
        LLVM::Value RHS = stackPop();
2264
1.65k
        LLVM::Value LHS = stackPop();
2265
1.65k
        stackPush(Builder.createOr(LHS, RHS));
2266
1.65k
        break;
2267
1.30k
      }
2268
1.62k
      case OpCode::I32__xor:
2269
2.37k
      case OpCode::I64__xor: {
2270
2.37k
        LLVM::Value RHS = stackPop();
2271
2.37k
        LLVM::Value LHS = stackPop();
2272
2.37k
        stackPush(Builder.createXor(LHS, RHS));
2273
2.37k
        break;
2274
1.62k
      }
2275
1.81k
      case OpCode::I32__shl:
2276
2.39k
      case OpCode::I64__shl: {
2277
2.39k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shl
2278
2.39k
                               ? LLContext.getInt32(31)
2279
2.39k
                               : LLContext.getInt64(63);
2280
2.39k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2281
2.39k
        LLVM::Value LHS = stackPop();
2282
2.39k
        stackPush(Builder.createShl(LHS, RHS));
2283
2.39k
        break;
2284
1.81k
      }
2285
1.91k
      case OpCode::I32__shr_s:
2286
2.46k
      case OpCode::I64__shr_s: {
2287
2.46k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_s
2288
2.46k
                               ? LLContext.getInt32(31)
2289
2.46k
                               : LLContext.getInt64(63);
2290
2.46k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2291
2.46k
        LLVM::Value LHS = stackPop();
2292
2.46k
        stackPush(Builder.createAShr(LHS, RHS));
2293
2.46k
        break;
2294
1.91k
      }
2295
4.75k
      case OpCode::I32__shr_u:
2296
5.04k
      case OpCode::I64__shr_u: {
2297
5.04k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_u
2298
5.04k
                               ? LLContext.getInt32(31)
2299
5.04k
                               : LLContext.getInt64(63);
2300
5.04k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2301
5.04k
        LLVM::Value LHS = stackPop();
2302
5.04k
        stackPush(Builder.createLShr(LHS, RHS));
2303
5.04k
        break;
2304
4.75k
      }
2305
2.86k
      case OpCode::I32__rotl: {
2306
2.86k
        LLVM::Value RHS = stackPop();
2307
2.86k
        LLVM::Value LHS = stackPop();
2308
2.86k
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2309
2.86k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int32Ty},
2310
2.86k
                                          {LHS, LHS, RHS}));
2311
2.86k
        break;
2312
2.86k
      }
2313
803
      case OpCode::I32__rotr: {
2314
803
        LLVM::Value RHS = stackPop();
2315
803
        LLVM::Value LHS = stackPop();
2316
803
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2317
803
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int32Ty},
2318
803
                                          {LHS, LHS, RHS}));
2319
803
        break;
2320
803
      }
2321
967
      case OpCode::I64__rotl: {
2322
967
        LLVM::Value RHS = stackPop();
2323
967
        LLVM::Value LHS = stackPop();
2324
967
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2325
967
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int64Ty},
2326
967
                                          {LHS, LHS, RHS}));
2327
967
        break;
2328
967
      }
2329
1.30k
      case OpCode::I64__rotr: {
2330
1.30k
        LLVM::Value RHS = stackPop();
2331
1.30k
        LLVM::Value LHS = stackPop();
2332
1.30k
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2333
1.30k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int64Ty},
2334
1.30k
                                          {LHS, LHS, RHS}));
2335
1.30k
        break;
2336
1.30k
      }
2337
280
      case OpCode::F32__add:
2338
584
      case OpCode::F64__add: {
2339
584
        LLVM::Value RHS = stackPop();
2340
584
        LLVM::Value LHS = stackPop();
2341
584
        stackPush(Builder.createFAdd(LHS, RHS));
2342
584
        break;
2343
280
      }
2344
136
      case OpCode::F32__sub:
2345
429
      case OpCode::F64__sub: {
2346
429
        LLVM::Value RHS = stackPop();
2347
429
        LLVM::Value LHS = stackPop();
2348
429
        stackPush(Builder.createFSub(LHS, RHS));
2349
429
        break;
2350
136
      }
2351
538
      case OpCode::F32__mul:
2352
683
      case OpCode::F64__mul: {
2353
683
        LLVM::Value RHS = stackPop();
2354
683
        LLVM::Value LHS = stackPop();
2355
683
        stackPush(Builder.createFMul(LHS, RHS));
2356
683
        break;
2357
538
      }
2358
224
      case OpCode::F32__div:
2359
564
      case OpCode::F64__div: {
2360
564
        LLVM::Value RHS = stackPop();
2361
564
        LLVM::Value LHS = stackPop();
2362
564
        stackPush(Builder.createFDiv(LHS, RHS));
2363
564
        break;
2364
224
      }
2365
308
      case OpCode::F32__min:
2366
673
      case OpCode::F64__min: {
2367
673
        LLVM::Value RHS = stackPop();
2368
673
        LLVM::Value LHS = stackPop();
2369
673
        auto FpTy = Instr.getOpCode() == OpCode::F32__min ? Context.FloatTy
2370
673
                                                          : Context.DoubleTy;
2371
673
        auto IntTy = Instr.getOpCode() == OpCode::F32__min ? Context.Int32Ty
2372
673
                                                           : Context.Int64Ty;
2373
2374
673
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2375
673
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2376
2377
673
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2378
673
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2379
673
        auto OrInt = Builder.createOr(LHSInt, RHSInt);
2380
673
        auto OrFp = Builder.createBitCast(OrInt, FpTy);
2381
2382
673
        auto AddFp = Builder.createFAdd(LHS, RHS);
2383
2384
673
        assuming(LLVM::Core::MinNum != LLVM::Core::NotIntrinsic);
2385
673
        auto MinFp = Builder.createIntrinsic(LLVM::Core::MinNum,
2386
673
                                             {LHS.getType()}, {LHS, RHS});
2387
2388
673
        auto Ret = Builder.createSelect(
2389
673
            UEQ, Builder.createSelect(UNO, AddFp, OrFp), MinFp);
2390
673
        stackPush(Ret);
2391
673
        break;
2392
673
      }
2393
342
      case OpCode::F32__max:
2394
778
      case OpCode::F64__max: {
2395
778
        LLVM::Value RHS = stackPop();
2396
778
        LLVM::Value LHS = stackPop();
2397
778
        auto FpTy = Instr.getOpCode() == OpCode::F32__max ? Context.FloatTy
2398
778
                                                          : Context.DoubleTy;
2399
778
        auto IntTy = Instr.getOpCode() == OpCode::F32__max ? Context.Int32Ty
2400
778
                                                           : Context.Int64Ty;
2401
2402
778
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2403
778
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2404
2405
778
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2406
778
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2407
778
        auto AndInt = Builder.createAnd(LHSInt, RHSInt);
2408
778
        auto AndFp = Builder.createBitCast(AndInt, FpTy);
2409
2410
778
        auto AddFp = Builder.createFAdd(LHS, RHS);
2411
2412
778
        assuming(LLVM::Core::MaxNum != LLVM::Core::NotIntrinsic);
2413
778
        auto MaxFp = Builder.createIntrinsic(LLVM::Core::MaxNum,
2414
778
                                             {LHS.getType()}, {LHS, RHS});
2415
2416
778
        auto Ret = Builder.createSelect(
2417
778
            UEQ, Builder.createSelect(UNO, AddFp, AndFp), MaxFp);
2418
778
        stackPush(Ret);
2419
778
        break;
2420
778
      }
2421
443
      case OpCode::F32__copysign:
2422
821
      case OpCode::F64__copysign: {
2423
821
        LLVM::Value RHS = stackPop();
2424
821
        LLVM::Value LHS = stackPop();
2425
821
        assuming(LLVM::Core::CopySign != LLVM::Core::NotIntrinsic);
2426
821
        stackPush(Builder.createIntrinsic(LLVM::Core::CopySign, {LHS.getType()},
2427
821
                                          {LHS, RHS}));
2428
821
        break;
2429
821
      }
2430
2431
      // Saturating Truncation Numeric Instructions
2432
207
      case OpCode::I32__trunc_sat_f32_s:
2433
207
        compileSignedTruncSat(Context.Int32Ty);
2434
207
        break;
2435
112
      case OpCode::I32__trunc_sat_f32_u:
2436
112
        compileUnsignedTruncSat(Context.Int32Ty);
2437
112
        break;
2438
410
      case OpCode::I32__trunc_sat_f64_s:
2439
410
        compileSignedTruncSat(Context.Int32Ty);
2440
410
        break;
2441
303
      case OpCode::I32__trunc_sat_f64_u:
2442
303
        compileUnsignedTruncSat(Context.Int32Ty);
2443
303
        break;
2444
385
      case OpCode::I64__trunc_sat_f32_s:
2445
385
        compileSignedTruncSat(Context.Int64Ty);
2446
385
        break;
2447
373
      case OpCode::I64__trunc_sat_f32_u:
2448
373
        compileUnsignedTruncSat(Context.Int64Ty);
2449
373
        break;
2450
291
      case OpCode::I64__trunc_sat_f64_s:
2451
291
        compileSignedTruncSat(Context.Int64Ty);
2452
291
        break;
2453
383
      case OpCode::I64__trunc_sat_f64_u:
2454
383
        compileUnsignedTruncSat(Context.Int64Ty);
2455
383
        break;
2456
2457
      // SIMD Memory Instructions
2458
5.11k
      case OpCode::V128__load:
2459
5.11k
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2460
5.11k
                            Instr.getMemoryAlign(), Context.Int128x1Ty);
2461
5.11k
        break;
2462
222
      case OpCode::V128__load8x8_s:
2463
222
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2464
222
                            Instr.getMemoryAlign(),
2465
222
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2466
222
                            Context.Int16x8Ty, true);
2467
222
        break;
2468
44
      case OpCode::V128__load8x8_u:
2469
44
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2470
44
                            Instr.getMemoryAlign(),
2471
44
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2472
44
                            Context.Int16x8Ty, false);
2473
44
        break;
2474
367
      case OpCode::V128__load16x4_s:
2475
367
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2476
367
                            Instr.getMemoryAlign(),
2477
367
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2478
367
                            Context.Int32x4Ty, true);
2479
367
        break;
2480
542
      case OpCode::V128__load16x4_u:
2481
542
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2482
542
                            Instr.getMemoryAlign(),
2483
542
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2484
542
                            Context.Int32x4Ty, false);
2485
542
        break;
2486
174
      case OpCode::V128__load32x2_s:
2487
174
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2488
174
                            Instr.getMemoryAlign(),
2489
174
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2490
174
                            Context.Int64x2Ty, true);
2491
174
        break;
2492
189
      case OpCode::V128__load32x2_u:
2493
189
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2494
189
                            Instr.getMemoryAlign(),
2495
189
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2496
189
                            Context.Int64x2Ty, false);
2497
189
        break;
2498
69
      case OpCode::V128__load8_splat:
2499
69
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2500
69
                           Instr.getMemoryAlign(), Context.Int8Ty,
2501
69
                           Context.Int8x16Ty);
2502
69
        break;
2503
188
      case OpCode::V128__load16_splat:
2504
188
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2505
188
                           Instr.getMemoryAlign(), Context.Int16Ty,
2506
188
                           Context.Int16x8Ty);
2507
188
        break;
2508
230
      case OpCode::V128__load32_splat:
2509
230
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2510
230
                           Instr.getMemoryAlign(), Context.Int32Ty,
2511
230
                           Context.Int32x4Ty);
2512
230
        break;
2513
194
      case OpCode::V128__load64_splat:
2514
194
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2515
194
                           Instr.getMemoryAlign(), Context.Int64Ty,
2516
194
                           Context.Int64x2Ty);
2517
194
        break;
2518
83
      case OpCode::V128__load32_zero:
2519
83
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2520
83
                            Instr.getMemoryAlign(), Context.Int32Ty,
2521
83
                            Context.Int128Ty, false);
2522
83
        break;
2523
143
      case OpCode::V128__load64_zero:
2524
143
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2525
143
                            Instr.getMemoryAlign(), Context.Int64Ty,
2526
143
                            Context.Int128Ty, false);
2527
143
        break;
2528
235
      case OpCode::V128__store:
2529
235
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2530
235
                       Instr.getMemoryAlign(), Context.Int128x1Ty, false, true);
2531
235
        break;
2532
197
      case OpCode::V128__load8_lane:
2533
197
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2534
197
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2535
197
                          Context.Int8Ty, Context.Int8x16Ty);
2536
197
        break;
2537
149
      case OpCode::V128__load16_lane:
2538
149
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2539
149
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2540
149
                          Context.Int16Ty, Context.Int16x8Ty);
2541
149
        break;
2542
133
      case OpCode::V128__load32_lane:
2543
133
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2544
133
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2545
133
                          Context.Int32Ty, Context.Int32x4Ty);
2546
133
        break;
2547
22
      case OpCode::V128__load64_lane:
2548
22
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2549
22
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2550
22
                          Context.Int64Ty, Context.Int64x2Ty);
2551
22
        break;
2552
131
      case OpCode::V128__store8_lane:
2553
131
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2554
131
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2555
131
                           Context.Int8Ty, Context.Int8x16Ty);
2556
131
        break;
2557
63
      case OpCode::V128__store16_lane:
2558
63
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2559
63
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2560
63
                           Context.Int16Ty, Context.Int16x8Ty);
2561
63
        break;
2562
118
      case OpCode::V128__store32_lane:
2563
118
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2564
118
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2565
118
                           Context.Int32Ty, Context.Int32x4Ty);
2566
118
        break;
2567
35
      case OpCode::V128__store64_lane:
2568
35
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2569
35
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2570
35
                           Context.Int64Ty, Context.Int64x2Ty);
2571
35
        break;
2572
2573
      // SIMD Const Instructions
2574
360
      case OpCode::V128__const: {
2575
360
        const auto Value = Instr.getNum().get<uint64x2_t>();
2576
360
        auto Vector =
2577
360
            LLVM::Value::getConstVector64(LLContext, {Value[0], Value[1]});
2578
360
        stackPush(Builder.createBitCast(Vector, Context.Int64x2Ty));
2579
360
        break;
2580
821
      }
2581
2582
      // SIMD Shuffle Instructions
2583
15
      case OpCode::I8x16__shuffle: {
2584
15
        auto V2 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2585
15
        auto V1 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2586
15
        const auto V3 = Instr.getNum().get<uint128_t>();
2587
15
        std::array<uint8_t, 16> Mask;
2588
255
        for (size_t I = 0; I < 16; ++I) {
2589
240
          auto Num = static_cast<uint8_t>(V3 >> (I * 8));
2590
240
          if constexpr (Endian::native == Endian::little) {
2591
240
            Mask[I] = Num;
2592
          } else {
2593
            Mask[15 - I] = Num < 16 ? 15 - Num : 47 - Num;
2594
          }
2595
240
        }
2596
15
        stackPush(Builder.createBitCast(
2597
15
            Builder.createShuffleVector(
2598
15
                V1, V2, LLVM::Value::getConstVector8(LLContext, Mask)),
2599
15
            Context.Int64x2Ty));
2600
15
        break;
2601
821
      }
2602
2603
      // SIMD Lane Instructions
2604
67
      case OpCode::I8x16__extract_lane_s:
2605
67
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2606
67
                             Context.Int32Ty, true);
2607
67
        break;
2608
28
      case OpCode::I8x16__extract_lane_u:
2609
28
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2610
28
                             Context.Int32Ty, false);
2611
28
        break;
2612
180
      case OpCode::I8x16__replace_lane:
2613
180
        compileReplaceLaneOp(Context.Int8x16Ty, Instr.getMemoryLane());
2614
180
        break;
2615
423
      case OpCode::I16x8__extract_lane_s:
2616
423
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2617
423
                             Context.Int32Ty, true);
2618
423
        break;
2619
456
      case OpCode::I16x8__extract_lane_u:
2620
456
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2621
456
                             Context.Int32Ty, false);
2622
456
        break;
2623
783
      case OpCode::I16x8__replace_lane:
2624
783
        compileReplaceLaneOp(Context.Int16x8Ty, Instr.getMemoryLane());
2625
783
        break;
2626
66
      case OpCode::I32x4__extract_lane:
2627
66
        compileExtractLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2628
66
        break;
2629
287
      case OpCode::I32x4__replace_lane:
2630
287
        compileReplaceLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2631
287
        break;
2632
143
      case OpCode::I64x2__extract_lane:
2633
143
        compileExtractLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2634
143
        break;
2635
14
      case OpCode::I64x2__replace_lane:
2636
14
        compileReplaceLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2637
14
        break;
2638
63
      case OpCode::F32x4__extract_lane:
2639
63
        compileExtractLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2640
63
        break;
2641
23
      case OpCode::F32x4__replace_lane:
2642
23
        compileReplaceLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2643
23
        break;
2644
92
      case OpCode::F64x2__extract_lane:
2645
92
        compileExtractLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2646
92
        break;
2647
7
      case OpCode::F64x2__replace_lane:
2648
7
        compileReplaceLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2649
7
        break;
2650
2651
      // SIMD Numeric Instructions
2652
64
      case OpCode::I8x16__swizzle:
2653
64
        compileVectorSwizzle();
2654
64
        break;
2655
40.4k
      case OpCode::I8x16__splat:
2656
40.4k
        compileSplatOp(Context.Int8x16Ty);
2657
40.4k
        break;
2658
9.79k
      case OpCode::I16x8__splat:
2659
9.79k
        compileSplatOp(Context.Int16x8Ty);
2660
9.79k
        break;
2661
1.36k
      case OpCode::I32x4__splat:
2662
1.36k
        compileSplatOp(Context.Int32x4Ty);
2663
1.36k
        break;
2664
412
      case OpCode::I64x2__splat:
2665
412
        compileSplatOp(Context.Int64x2Ty);
2666
412
        break;
2667
353
      case OpCode::F32x4__splat:
2668
353
        compileSplatOp(Context.Floatx4Ty);
2669
353
        break;
2670
169
      case OpCode::F64x2__splat:
2671
169
        compileSplatOp(Context.Doublex2Ty);
2672
169
        break;
2673
98
      case OpCode::I8x16__eq:
2674
98
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntEQ);
2675
98
        break;
2676
308
      case OpCode::I8x16__ne:
2677
308
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntNE);
2678
308
        break;
2679
66
      case OpCode::I8x16__lt_s:
2680
66
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLT);
2681
66
        break;
2682
110
      case OpCode::I8x16__lt_u:
2683
110
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULT);
2684
110
        break;
2685
136
      case OpCode::I8x16__gt_s:
2686
136
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGT);
2687
136
        break;
2688
211
      case OpCode::I8x16__gt_u:
2689
211
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGT);
2690
211
        break;
2691
88
      case OpCode::I8x16__le_s:
2692
88
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLE);
2693
88
        break;
2694
132
      case OpCode::I8x16__le_u:
2695
132
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULE);
2696
132
        break;
2697
662
      case OpCode::I8x16__ge_s:
2698
662
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGE);
2699
662
        break;
2700
110
      case OpCode::I8x16__ge_u:
2701
110
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGE);
2702
110
        break;
2703
81
      case OpCode::I16x8__eq:
2704
81
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntEQ);
2705
81
        break;
2706
210
      case OpCode::I16x8__ne:
2707
210
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntNE);
2708
210
        break;
2709
59
      case OpCode::I16x8__lt_s:
2710
59
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLT);
2711
59
        break;
2712
237
      case OpCode::I16x8__lt_u:
2713
237
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULT);
2714
237
        break;
2715
282
      case OpCode::I16x8__gt_s:
2716
282
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGT);
2717
282
        break;
2718
139
      case OpCode::I16x8__gt_u:
2719
139
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGT);
2720
139
        break;
2721
107
      case OpCode::I16x8__le_s:
2722
107
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLE);
2723
107
        break;
2724
96
      case OpCode::I16x8__le_u:
2725
96
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULE);
2726
96
        break;
2727
152
      case OpCode::I16x8__ge_s:
2728
152
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGE);
2729
152
        break;
2730
64
      case OpCode::I16x8__ge_u:
2731
64
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGE);
2732
64
        break;
2733
66
      case OpCode::I32x4__eq:
2734
66
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntEQ);
2735
66
        break;
2736
131
      case OpCode::I32x4__ne:
2737
131
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntNE);
2738
131
        break;
2739
40
      case OpCode::I32x4__lt_s:
2740
40
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLT);
2741
40
        break;
2742
138
      case OpCode::I32x4__lt_u:
2743
138
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULT);
2744
138
        break;
2745
106
      case OpCode::I32x4__gt_s:
2746
106
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGT);
2747
106
        break;
2748
228
      case OpCode::I32x4__gt_u:
2749
228
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGT);
2750
228
        break;
2751
282
      case OpCode::I32x4__le_s:
2752
282
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLE);
2753
282
        break;
2754
247
      case OpCode::I32x4__le_u:
2755
247
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULE);
2756
247
        break;
2757
63
      case OpCode::I32x4__ge_s:
2758
63
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGE);
2759
63
        break;
2760
99
      case OpCode::I32x4__ge_u:
2761
99
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGE);
2762
99
        break;
2763
125
      case OpCode::I64x2__eq:
2764
125
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntEQ);
2765
125
        break;
2766
53
      case OpCode::I64x2__ne:
2767
53
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntNE);
2768
53
        break;
2769
50
      case OpCode::I64x2__lt_s:
2770
50
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLT);
2771
50
        break;
2772
177
      case OpCode::I64x2__gt_s:
2773
177
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGT);
2774
177
        break;
2775
33
      case OpCode::I64x2__le_s:
2776
33
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLE);
2777
33
        break;
2778
92
      case OpCode::I64x2__ge_s:
2779
92
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGE);
2780
92
        break;
2781
1.38k
      case OpCode::F32x4__eq:
2782
1.38k
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOEQ,
2783
1.38k
                               Context.Int32x4Ty);
2784
1.38k
        break;
2785
37
      case OpCode::F32x4__ne:
2786
37
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealUNE,
2787
37
                               Context.Int32x4Ty);
2788
37
        break;
2789
829
      case OpCode::F32x4__lt:
2790
829
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLT,
2791
829
                               Context.Int32x4Ty);
2792
829
        break;
2793
76
      case OpCode::F32x4__gt:
2794
76
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGT,
2795
76
                               Context.Int32x4Ty);
2796
76
        break;
2797
358
      case OpCode::F32x4__le:
2798
358
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLE,
2799
358
                               Context.Int32x4Ty);
2800
358
        break;
2801
69
      case OpCode::F32x4__ge:
2802
69
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGE,
2803
69
                               Context.Int32x4Ty);
2804
69
        break;
2805
59
      case OpCode::F64x2__eq:
2806
59
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOEQ,
2807
59
                               Context.Int64x2Ty);
2808
59
        break;
2809
109
      case OpCode::F64x2__ne:
2810
109
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealUNE,
2811
109
                               Context.Int64x2Ty);
2812
109
        break;
2813
174
      case OpCode::F64x2__lt:
2814
174
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLT,
2815
174
                               Context.Int64x2Ty);
2816
174
        break;
2817
58
      case OpCode::F64x2__gt:
2818
58
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGT,
2819
58
                               Context.Int64x2Ty);
2820
58
        break;
2821
189
      case OpCode::F64x2__le:
2822
189
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLE,
2823
189
                               Context.Int64x2Ty);
2824
189
        break;
2825
85
      case OpCode::F64x2__ge:
2826
85
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGE,
2827
85
                               Context.Int64x2Ty);
2828
85
        break;
2829
133
      case OpCode::V128__not:
2830
133
        Stack.back() = Builder.createNot(Stack.back());
2831
133
        break;
2832
66
      case OpCode::V128__and: {
2833
66
        auto RHS = stackPop();
2834
66
        auto LHS = stackPop();
2835
66
        stackPush(Builder.createAnd(LHS, RHS));
2836
66
        break;
2837
821
      }
2838
82
      case OpCode::V128__andnot: {
2839
82
        auto RHS = stackPop();
2840
82
        auto LHS = stackPop();
2841
82
        stackPush(Builder.createAnd(LHS, Builder.createNot(RHS)));
2842
82
        break;
2843
821
      }
2844
114
      case OpCode::V128__or: {
2845
114
        auto RHS = stackPop();
2846
114
        auto LHS = stackPop();
2847
114
        stackPush(Builder.createOr(LHS, RHS));
2848
114
        break;
2849
821
      }
2850
54
      case OpCode::V128__xor: {
2851
54
        auto RHS = stackPop();
2852
54
        auto LHS = stackPop();
2853
54
        stackPush(Builder.createXor(LHS, RHS));
2854
54
        break;
2855
821
      }
2856
111
      case OpCode::V128__bitselect: {
2857
111
        auto C = stackPop();
2858
111
        auto V2 = stackPop();
2859
111
        auto V1 = stackPop();
2860
111
        stackPush(Builder.createXor(
2861
111
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
2862
111
        break;
2863
821
      }
2864
106
      case OpCode::V128__any_true:
2865
106
        compileVectorAnyTrue();
2866
106
        break;
2867
968
      case OpCode::I8x16__abs:
2868
968
        compileVectorAbs(Context.Int8x16Ty);
2869
968
        break;
2870
1.85k
      case OpCode::I8x16__neg:
2871
1.85k
        compileVectorNeg(Context.Int8x16Ty);
2872
1.85k
        break;
2873
147
      case OpCode::I8x16__popcnt:
2874
147
        compileVectorPopcnt();
2875
147
        break;
2876
349
      case OpCode::I8x16__all_true:
2877
349
        compileVectorAllTrue(Context.Int8x16Ty);
2878
349
        break;
2879
906
      case OpCode::I8x16__bitmask:
2880
906
        compileVectorBitMask(Context.Int8x16Ty);
2881
906
        break;
2882
89
      case OpCode::I8x16__narrow_i16x8_s:
2883
89
        compileVectorNarrow(Context.Int16x8Ty, true);
2884
89
        break;
2885
166
      case OpCode::I8x16__narrow_i16x8_u:
2886
166
        compileVectorNarrow(Context.Int16x8Ty, false);
2887
166
        break;
2888
153
      case OpCode::I8x16__shl:
2889
153
        compileVectorShl(Context.Int8x16Ty);
2890
153
        break;
2891
1.05k
      case OpCode::I8x16__shr_s:
2892
1.05k
        compileVectorAShr(Context.Int8x16Ty);
2893
1.05k
        break;
2894
71
      case OpCode::I8x16__shr_u:
2895
71
        compileVectorLShr(Context.Int8x16Ty);
2896
71
        break;
2897
50
      case OpCode::I8x16__add:
2898
50
        compileVectorVectorAdd(Context.Int8x16Ty);
2899
50
        break;
2900
1.03k
      case OpCode::I8x16__add_sat_s:
2901
1.03k
        compileVectorVectorAddSat(Context.Int8x16Ty, true);
2902
1.03k
        break;
2903
81
      case OpCode::I8x16__add_sat_u:
2904
81
        compileVectorVectorAddSat(Context.Int8x16Ty, false);
2905
81
        break;
2906
86
      case OpCode::I8x16__sub:
2907
86
        compileVectorVectorSub(Context.Int8x16Ty);
2908
86
        break;
2909
187
      case OpCode::I8x16__sub_sat_s:
2910
187
        compileVectorVectorSubSat(Context.Int8x16Ty, true);
2911
187
        break;
2912
83
      case OpCode::I8x16__sub_sat_u:
2913
83
        compileVectorVectorSubSat(Context.Int8x16Ty, false);
2914
83
        break;
2915
77
      case OpCode::I8x16__min_s:
2916
77
        compileVectorVectorSMin(Context.Int8x16Ty);
2917
77
        break;
2918
179
      case OpCode::I8x16__min_u:
2919
179
        compileVectorVectorUMin(Context.Int8x16Ty);
2920
179
        break;
2921
322
      case OpCode::I8x16__max_s:
2922
322
        compileVectorVectorSMax(Context.Int8x16Ty);
2923
322
        break;
2924
98
      case OpCode::I8x16__max_u:
2925
98
        compileVectorVectorUMax(Context.Int8x16Ty);
2926
98
        break;
2927
125
      case OpCode::I8x16__avgr_u:
2928
125
        compileVectorVectorUAvgr(Context.Int8x16Ty);
2929
125
        break;
2930
337
      case OpCode::I16x8__abs:
2931
337
        compileVectorAbs(Context.Int16x8Ty);
2932
337
        break;
2933
204
      case OpCode::I16x8__neg:
2934
204
        compileVectorNeg(Context.Int16x8Ty);
2935
204
        break;
2936
151
      case OpCode::I16x8__all_true:
2937
151
        compileVectorAllTrue(Context.Int16x8Ty);
2938
151
        break;
2939
131
      case OpCode::I16x8__bitmask:
2940
131
        compileVectorBitMask(Context.Int16x8Ty);
2941
131
        break;
2942
46
      case OpCode::I16x8__narrow_i32x4_s:
2943
46
        compileVectorNarrow(Context.Int32x4Ty, true);
2944
46
        break;
2945
381
      case OpCode::I16x8__narrow_i32x4_u:
2946
381
        compileVectorNarrow(Context.Int32x4Ty, false);
2947
381
        break;
2948
1.03k
      case OpCode::I16x8__extend_low_i8x16_s:
2949
1.03k
        compileVectorExtend(Context.Int8x16Ty, true, true);
2950
1.03k
        break;
2951
100
      case OpCode::I16x8__extend_high_i8x16_s:
2952
100
        compileVectorExtend(Context.Int8x16Ty, true, false);
2953
100
        break;
2954
393
      case OpCode::I16x8__extend_low_i8x16_u:
2955
393
        compileVectorExtend(Context.Int8x16Ty, false, true);
2956
393
        break;
2957
13
      case OpCode::I16x8__extend_high_i8x16_u:
2958
13
        compileVectorExtend(Context.Int8x16Ty, false, false);
2959
13
        break;
2960
115
      case OpCode::I16x8__shl:
2961
115
        compileVectorShl(Context.Int16x8Ty);
2962
115
        break;
2963
541
      case OpCode::I16x8__shr_s:
2964
541
        compileVectorAShr(Context.Int16x8Ty);
2965
541
        break;
2966
120
      case OpCode::I16x8__shr_u:
2967
120
        compileVectorLShr(Context.Int16x8Ty);
2968
120
        break;
2969
149
      case OpCode::I16x8__add:
2970
149
        compileVectorVectorAdd(Context.Int16x8Ty);
2971
149
        break;
2972
19
      case OpCode::I16x8__add_sat_s:
2973
19
        compileVectorVectorAddSat(Context.Int16x8Ty, true);
2974
19
        break;
2975
474
      case OpCode::I16x8__add_sat_u:
2976
474
        compileVectorVectorAddSat(Context.Int16x8Ty, false);
2977
474
        break;
2978
346
      case OpCode::I16x8__sub:
2979
346
        compileVectorVectorSub(Context.Int16x8Ty);
2980
346
        break;
2981
29
      case OpCode::I16x8__sub_sat_s:
2982
29
        compileVectorVectorSubSat(Context.Int16x8Ty, true);
2983
29
        break;
2984
95
      case OpCode::I16x8__sub_sat_u:
2985
95
        compileVectorVectorSubSat(Context.Int16x8Ty, false);
2986
95
        break;
2987
113
      case OpCode::I16x8__mul:
2988
113
        compileVectorVectorMul(Context.Int16x8Ty);
2989
113
        break;
2990
158
      case OpCode::I16x8__min_s:
2991
158
        compileVectorVectorSMin(Context.Int16x8Ty);
2992
158
        break;
2993
123
      case OpCode::I16x8__min_u:
2994
123
        compileVectorVectorUMin(Context.Int16x8Ty);
2995
123
        break;
2996
87
      case OpCode::I16x8__max_s:
2997
87
        compileVectorVectorSMax(Context.Int16x8Ty);
2998
87
        break;
2999
792
      case OpCode::I16x8__max_u:
3000
792
        compileVectorVectorUMax(Context.Int16x8Ty);
3001
792
        break;
3002
168
      case OpCode::I16x8__avgr_u:
3003
168
        compileVectorVectorUAvgr(Context.Int16x8Ty);
3004
168
        break;
3005
80
      case OpCode::I16x8__extmul_low_i8x16_s:
3006
80
        compileVectorExtMul(Context.Int8x16Ty, true, true);
3007
80
        break;
3008
221
      case OpCode::I16x8__extmul_high_i8x16_s:
3009
221
        compileVectorExtMul(Context.Int8x16Ty, true, false);
3010
221
        break;
3011
122
      case OpCode::I16x8__extmul_low_i8x16_u:
3012
122
        compileVectorExtMul(Context.Int8x16Ty, false, true);
3013
122
        break;
3014
505
      case OpCode::I16x8__extmul_high_i8x16_u:
3015
505
        compileVectorExtMul(Context.Int8x16Ty, false, false);
3016
505
        break;
3017
181
      case OpCode::I16x8__q15mulr_sat_s:
3018
181
        compileVectorVectorQ15MulSat();
3019
181
        break;
3020
368
      case OpCode::I16x8__extadd_pairwise_i8x16_s:
3021
368
        compileVectorExtAddPairwise(Context.Int8x16Ty, true);
3022
368
        break;
3023
329
      case OpCode::I16x8__extadd_pairwise_i8x16_u:
3024
329
        compileVectorExtAddPairwise(Context.Int8x16Ty, false);
3025
329
        break;
3026
57
      case OpCode::I32x4__abs:
3027
57
        compileVectorAbs(Context.Int32x4Ty);
3028
57
        break;
3029
206
      case OpCode::I32x4__neg:
3030
206
        compileVectorNeg(Context.Int32x4Ty);
3031
206
        break;
3032
182
      case OpCode::I32x4__all_true:
3033
182
        compileVectorAllTrue(Context.Int32x4Ty);
3034
182
        break;
3035
87
      case OpCode::I32x4__bitmask:
3036
87
        compileVectorBitMask(Context.Int32x4Ty);
3037
87
        break;
3038
114
      case OpCode::I32x4__extend_low_i16x8_s:
3039
114
        compileVectorExtend(Context.Int16x8Ty, true, true);
3040
114
        break;
3041
519
      case OpCode::I32x4__extend_high_i16x8_s:
3042
519
        compileVectorExtend(Context.Int16x8Ty, true, false);
3043
519
        break;
3044
1.89k
      case OpCode::I32x4__extend_low_i16x8_u:
3045
1.89k
        compileVectorExtend(Context.Int16x8Ty, false, true);
3046
1.89k
        break;
3047
142
      case OpCode::I32x4__extend_high_i16x8_u:
3048
142
        compileVectorExtend(Context.Int16x8Ty, false, false);
3049
142
        break;
3050
1.61k
      case OpCode::I32x4__shl:
3051
1.61k
        compileVectorShl(Context.Int32x4Ty);
3052
1.61k
        break;
3053
449
      case OpCode::I32x4__shr_s:
3054
449
        compileVectorAShr(Context.Int32x4Ty);
3055
449
        break;
3056
431
      case OpCode::I32x4__shr_u:
3057
431
        compileVectorLShr(Context.Int32x4Ty);
3058
431
        break;
3059
154
      case OpCode::I32x4__add:
3060
154
        compileVectorVectorAdd(Context.Int32x4Ty);
3061
154
        break;
3062
158
      case OpCode::I32x4__sub:
3063
158
        compileVectorVectorSub(Context.Int32x4Ty);
3064
158
        break;
3065
255
      case OpCode::I32x4__mul:
3066
255
        compileVectorVectorMul(Context.Int32x4Ty);
3067
255
        break;
3068
90
      case OpCode::I32x4__min_s:
3069
90
        compileVectorVectorSMin(Context.Int32x4Ty);
3070
90
        break;
3071
70
      case OpCode::I32x4__min_u:
3072
70
        compileVectorVectorUMin(Context.Int32x4Ty);
3073
70
        break;
3074
60
      case OpCode::I32x4__max_s:
3075
60
        compileVectorVectorSMax(Context.Int32x4Ty);
3076
60
        break;
3077
88
      case OpCode::I32x4__max_u:
3078
88
        compileVectorVectorUMax(Context.Int32x4Ty);
3079
88
        break;
3080
129
      case OpCode::I32x4__extmul_low_i16x8_s:
3081
129
        compileVectorExtMul(Context.Int16x8Ty, true, true);
3082
129
        break;
3083
60
      case OpCode::I32x4__extmul_high_i16x8_s:
3084
60
        compileVectorExtMul(Context.Int16x8Ty, true, false);
3085
60
        break;
3086
230
      case OpCode::I32x4__extmul_low_i16x8_u:
3087
230
        compileVectorExtMul(Context.Int16x8Ty, false, true);
3088
230
        break;
3089
116
      case OpCode::I32x4__extmul_high_i16x8_u:
3090
116
        compileVectorExtMul(Context.Int16x8Ty, false, false);
3091
116
        break;
3092
1.34k
      case OpCode::I32x4__extadd_pairwise_i16x8_s:
3093
1.34k
        compileVectorExtAddPairwise(Context.Int16x8Ty, true);
3094
1.34k
        break;
3095
688
      case OpCode::I32x4__extadd_pairwise_i16x8_u:
3096
688
        compileVectorExtAddPairwise(Context.Int16x8Ty, false);
3097
688
        break;
3098
146
      case OpCode::I32x4__dot_i16x8_s: {
3099
146
        auto ExtendTy = Context.Int16x8Ty.getExtendedElementVectorType();
3100
146
        auto Undef = LLVM::Value::getUndef(ExtendTy);
3101
146
        auto LHS = Builder.createSExt(
3102
146
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3103
146
        auto RHS = Builder.createSExt(
3104
146
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3105
146
        auto M = Builder.createMul(LHS, RHS);
3106
146
        auto L = Builder.createShuffleVector(
3107
146
            M, Undef,
3108
146
            LLVM::Value::getConstVector32(LLContext, {0U, 2U, 4U, 6U}));
3109
146
        auto R = Builder.createShuffleVector(
3110
146
            M, Undef,
3111
146
            LLVM::Value::getConstVector32(LLContext, {1U, 3U, 5U, 7U}));
3112
146
        auto V = Builder.createAdd(L, R);
3113
146
        stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
3114
146
        break;
3115
821
      }
3116
885
      case OpCode::I64x2__abs:
3117
885
        compileVectorAbs(Context.Int64x2Ty);
3118
885
        break;
3119
525
      case OpCode::I64x2__neg:
3120
525
        compileVectorNeg(Context.Int64x2Ty);
3121
525
        break;
3122
314
      case OpCode::I64x2__all_true:
3123
314
        compileVectorAllTrue(Context.Int64x2Ty);
3124
314
        break;
3125
246
      case OpCode::I64x2__bitmask:
3126
246
        compileVectorBitMask(Context.Int64x2Ty);
3127
246
        break;
3128
387
      case OpCode::I64x2__extend_low_i32x4_s:
3129
387
        compileVectorExtend(Context.Int32x4Ty, true, true);
3130
387
        break;
3131
687
      case OpCode::I64x2__extend_high_i32x4_s:
3132
687
        compileVectorExtend(Context.Int32x4Ty, true, false);
3133
687
        break;
3134
193
      case OpCode::I64x2__extend_low_i32x4_u:
3135
193
        compileVectorExtend(Context.Int32x4Ty, false, true);
3136
193
        break;
3137
714
      case OpCode::I64x2__extend_high_i32x4_u:
3138
714
        compileVectorExtend(Context.Int32x4Ty, false, false);
3139
714
        break;
3140
125
      case OpCode::I64x2__shl:
3141
125
        compileVectorShl(Context.Int64x2Ty);
3142
125
        break;
3143
337
      case OpCode::I64x2__shr_s:
3144
337
        compileVectorAShr(Context.Int64x2Ty);
3145
337
        break;
3146
101
      case OpCode::I64x2__shr_u:
3147
101
        compileVectorLShr(Context.Int64x2Ty);
3148
101
        break;
3149
44
      case OpCode::I64x2__add:
3150
44
        compileVectorVectorAdd(Context.Int64x2Ty);
3151
44
        break;
3152
284
      case OpCode::I64x2__sub:
3153
284
        compileVectorVectorSub(Context.Int64x2Ty);
3154
284
        break;
3155
73
      case OpCode::I64x2__mul:
3156
73
        compileVectorVectorMul(Context.Int64x2Ty);
3157
73
        break;
3158
37
      case OpCode::I64x2__extmul_low_i32x4_s:
3159
37
        compileVectorExtMul(Context.Int32x4Ty, true, true);
3160
37
        break;
3161
300
      case OpCode::I64x2__extmul_high_i32x4_s:
3162
300
        compileVectorExtMul(Context.Int32x4Ty, true, false);
3163
300
        break;
3164
31
      case OpCode::I64x2__extmul_low_i32x4_u:
3165
31
        compileVectorExtMul(Context.Int32x4Ty, false, true);
3166
31
        break;
3167
123
      case OpCode::I64x2__extmul_high_i32x4_u:
3168
123
        compileVectorExtMul(Context.Int32x4Ty, false, false);
3169
123
        break;
3170
103
      case OpCode::F32x4__abs:
3171
103
        compileVectorFAbs(Context.Floatx4Ty);
3172
103
        break;
3173
155
      case OpCode::F32x4__neg:
3174
155
        compileVectorFNeg(Context.Floatx4Ty);
3175
155
        break;
3176
206
      case OpCode::F32x4__sqrt:
3177
206
        compileVectorFSqrt(Context.Floatx4Ty);
3178
206
        break;
3179
132
      case OpCode::F32x4__add:
3180
132
        compileVectorVectorFAdd(Context.Floatx4Ty);
3181
132
        break;
3182
259
      case OpCode::F32x4__sub:
3183
259
        compileVectorVectorFSub(Context.Floatx4Ty);
3184
259
        break;
3185
39
      case OpCode::F32x4__mul:
3186
39
        compileVectorVectorFMul(Context.Floatx4Ty);
3187
39
        break;
3188
176
      case OpCode::F32x4__div:
3189
176
        compileVectorVectorFDiv(Context.Floatx4Ty);
3190
176
        break;
3191
120
      case OpCode::F32x4__min:
3192
120
        compileVectorVectorFMin(Context.Floatx4Ty);
3193
120
        break;
3194
36
      case OpCode::F32x4__max:
3195
36
        compileVectorVectorFMax(Context.Floatx4Ty);
3196
36
        break;
3197
51
      case OpCode::F32x4__pmin:
3198
51
        compileVectorVectorFPMin(Context.Floatx4Ty);
3199
51
        break;
3200
227
      case OpCode::F32x4__pmax:
3201
227
        compileVectorVectorFPMax(Context.Floatx4Ty);
3202
227
        break;
3203
944
      case OpCode::F32x4__ceil:
3204
944
        compileVectorFCeil(Context.Floatx4Ty);
3205
944
        break;
3206
2.04k
      case OpCode::F32x4__floor:
3207
2.04k
        compileVectorFFloor(Context.Floatx4Ty);
3208
2.04k
        break;
3209
1.89k
      case OpCode::F32x4__trunc:
3210
1.89k
        compileVectorFTrunc(Context.Floatx4Ty);
3211
1.89k
        break;
3212
264
      case OpCode::F32x4__nearest:
3213
264
        compileVectorFNearest(Context.Floatx4Ty);
3214
264
        break;
3215
440
      case OpCode::F64x2__abs:
3216
440
        compileVectorFAbs(Context.Doublex2Ty);
3217
440
        break;
3218
735
      case OpCode::F64x2__neg:
3219
735
        compileVectorFNeg(Context.Doublex2Ty);
3220
735
        break;
3221
126
      case OpCode::F64x2__sqrt:
3222
126
        compileVectorFSqrt(Context.Doublex2Ty);
3223
126
        break;
3224
48
      case OpCode::F64x2__add:
3225
48
        compileVectorVectorFAdd(Context.Doublex2Ty);
3226
48
        break;
3227
215
      case OpCode::F64x2__sub:
3228
215
        compileVectorVectorFSub(Context.Doublex2Ty);
3229
215
        break;
3230
211
      case OpCode::F64x2__mul:
3231
211
        compileVectorVectorFMul(Context.Doublex2Ty);
3232
211
        break;
3233
37
      case OpCode::F64x2__div:
3234
37
        compileVectorVectorFDiv(Context.Doublex2Ty);
3235
37
        break;
3236
166
      case OpCode::F64x2__min:
3237
166
        compileVectorVectorFMin(Context.Doublex2Ty);
3238
166
        break;
3239
159
      case OpCode::F64x2__max:
3240
159
        compileVectorVectorFMax(Context.Doublex2Ty);
3241
159
        break;
3242
263
      case OpCode::F64x2__pmin:
3243
263
        compileVectorVectorFPMin(Context.Doublex2Ty);
3244
263
        break;
3245
107
      case OpCode::F64x2__pmax:
3246
107
        compileVectorVectorFPMax(Context.Doublex2Ty);
3247
107
        break;
3248
659
      case OpCode::F64x2__ceil:
3249
659
        compileVectorFCeil(Context.Doublex2Ty);
3250
659
        break;
3251
793
      case OpCode::F64x2__floor:
3252
793
        compileVectorFFloor(Context.Doublex2Ty);
3253
793
        break;
3254
110
      case OpCode::F64x2__trunc:
3255
110
        compileVectorFTrunc(Context.Doublex2Ty);
3256
110
        break;
3257
153
      case OpCode::F64x2__nearest:
3258
153
        compileVectorFNearest(Context.Doublex2Ty);
3259
153
        break;
3260
211
      case OpCode::I32x4__trunc_sat_f32x4_s:
3261
211
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3262
211
        break;
3263
3.70k
      case OpCode::I32x4__trunc_sat_f32x4_u:
3264
3.70k
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3265
3.70k
        break;
3266
335
      case OpCode::F32x4__convert_i32x4_s:
3267
335
        compileVectorConvertS(Context.Int32x4Ty, Context.Floatx4Ty, false);
3268
335
        break;
3269
729
      case OpCode::F32x4__convert_i32x4_u:
3270
729
        compileVectorConvertU(Context.Int32x4Ty, Context.Floatx4Ty, false);
3271
729
        break;
3272
747
      case OpCode::I32x4__trunc_sat_f64x2_s_zero:
3273
747
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3274
747
        break;
3275
2.13k
      case OpCode::I32x4__trunc_sat_f64x2_u_zero:
3276
2.13k
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3277
2.13k
        break;
3278
351
      case OpCode::F64x2__convert_low_i32x4_s:
3279
351
        compileVectorConvertS(Context.Int32x4Ty, Context.Doublex2Ty, true);
3280
351
        break;
3281
1.28k
      case OpCode::F64x2__convert_low_i32x4_u:
3282
1.28k
        compileVectorConvertU(Context.Int32x4Ty, Context.Doublex2Ty, true);
3283
1.28k
        break;
3284
729
      case OpCode::F32x4__demote_f64x2_zero:
3285
729
        compileVectorDemote();
3286
729
        break;
3287
758
      case OpCode::F64x2__promote_low_f32x4:
3288
758
        compileVectorPromote();
3289
758
        break;
3290
3291
      // Relaxed SIMD Instructions
3292
11
      case OpCode::I8x16__relaxed_swizzle:
3293
11
        compileVectorSwizzle();
3294
11
        break;
3295
18
      case OpCode::I32x4__relaxed_trunc_f32x4_s:
3296
18
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3297
18
        break;
3298
13
      case OpCode::I32x4__relaxed_trunc_f32x4_u:
3299
13
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3300
13
        break;
3301
10
      case OpCode::I32x4__relaxed_trunc_f64x2_s_zero:
3302
10
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3303
10
        break;
3304
15
      case OpCode::I32x4__relaxed_trunc_f64x2_u_zero:
3305
15
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3306
15
        break;
3307
27
      case OpCode::F32x4__relaxed_madd:
3308
27
        compileVectorVectorMAdd(Context.Floatx4Ty);
3309
27
        break;
3310
34
      case OpCode::F32x4__relaxed_nmadd:
3311
34
        compileVectorVectorNMAdd(Context.Floatx4Ty);
3312
34
        break;
3313
49
      case OpCode::F64x2__relaxed_madd:
3314
49
        compileVectorVectorMAdd(Context.Doublex2Ty);
3315
49
        break;
3316
16
      case OpCode::F64x2__relaxed_nmadd:
3317
16
        compileVectorVectorNMAdd(Context.Doublex2Ty);
3318
16
        break;
3319
10
      case OpCode::I8x16__relaxed_laneselect:
3320
13
      case OpCode::I16x8__relaxed_laneselect:
3321
23
      case OpCode::I32x4__relaxed_laneselect:
3322
24
      case OpCode::I64x2__relaxed_laneselect: {
3323
24
        auto C = stackPop();
3324
24
        auto V2 = stackPop();
3325
24
        auto V1 = stackPop();
3326
24
        stackPush(Builder.createXor(
3327
24
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
3328
24
        break;
3329
23
      }
3330
12
      case OpCode::F32x4__relaxed_min:
3331
12
        compileVectorVectorFMin(Context.Floatx4Ty);
3332
12
        break;
3333
13
      case OpCode::F32x4__relaxed_max:
3334
13
        compileVectorVectorFMax(Context.Floatx4Ty);
3335
13
        break;
3336
10
      case OpCode::F64x2__relaxed_min:
3337
10
        compileVectorVectorFMin(Context.Doublex2Ty);
3338
10
        break;
3339
21
      case OpCode::F64x2__relaxed_max:
3340
21
        compileVectorVectorFMax(Context.Doublex2Ty);
3341
21
        break;
3342
15
      case OpCode::I16x8__relaxed_q15mulr_s:
3343
15
        compileVectorVectorQ15MulSat();
3344
15
        break;
3345
11
      case OpCode::I16x8__relaxed_dot_i8x16_i7x16_s:
3346
11
        compileVectorRelaxedIntegerDotProduct();
3347
11
        break;
3348
12
      case OpCode::I32x4__relaxed_dot_i8x16_i7x16_add_s:
3349
12
        compileVectorRelaxedIntegerDotProductAdd();
3350
12
        break;
3351
3352
      // Atomic Instructions
3353
192
      case OpCode::Atomic__fence:
3354
192
        compileMemoryFence();
3355
192
        break;
3356
37
      case OpCode::Memory__atomic__notify:
3357
37
        compileAtomicNotify(Instr.getTargetIndex(), Instr.getMemoryOffset());
3358
37
        break;
3359
5
      case OpCode::Memory__atomic__wait32:
3360
5
        compileAtomicWait(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3361
5
                          Context.Int32Ty, 32);
3362
5
        break;
3363
2
      case OpCode::Memory__atomic__wait64:
3364
2
        compileAtomicWait(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3365
2
                          Context.Int64Ty, 64);
3366
2
        break;
3367
0
      case OpCode::I32__atomic__load:
3368
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3369
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3370
0
                          Context.Int32Ty, true);
3371
0
        break;
3372
0
      case OpCode::I64__atomic__load:
3373
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3374
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3375
0
                          Context.Int64Ty, true);
3376
0
        break;
3377
0
      case OpCode::I32__atomic__load8_u:
3378
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3379
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3380
0
                          Context.Int8Ty);
3381
0
        break;
3382
0
      case OpCode::I32__atomic__load16_u:
3383
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3384
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3385
0
                          Context.Int16Ty);
3386
0
        break;
3387
0
      case OpCode::I64__atomic__load8_u:
3388
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3389
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3390
0
                          Context.Int8Ty);
3391
0
        break;
3392
0
      case OpCode::I64__atomic__load16_u:
3393
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3394
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3395
0
                          Context.Int16Ty);
3396
0
        break;
3397
0
      case OpCode::I64__atomic__load32_u:
3398
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3399
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3400
0
                          Context.Int32Ty);
3401
0
        break;
3402
0
      case OpCode::I32__atomic__store:
3403
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3404
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3405
0
                           Context.Int32Ty, true);
3406
0
        break;
3407
0
      case OpCode::I64__atomic__store:
3408
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3409
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3410
0
                           Context.Int64Ty, true);
3411
0
        break;
3412
0
      case OpCode::I32__atomic__store8:
3413
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3414
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3415
0
                           Context.Int8Ty, true);
3416
0
        break;
3417
0
      case OpCode::I32__atomic__store16:
3418
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3419
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3420
0
                           Context.Int16Ty, true);
3421
0
        break;
3422
0
      case OpCode::I64__atomic__store8:
3423
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3424
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3425
0
                           Context.Int8Ty, true);
3426
0
        break;
3427
0
      case OpCode::I64__atomic__store16:
3428
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3429
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3430
0
                           Context.Int16Ty, true);
3431
0
        break;
3432
0
      case OpCode::I64__atomic__store32:
3433
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3434
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3435
0
                           Context.Int32Ty, true);
3436
0
        break;
3437
0
      case OpCode::I32__atomic__rmw__add:
3438
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3439
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3440
0
                           Context.Int32Ty, Context.Int32Ty, true);
3441
0
        break;
3442
0
      case OpCode::I64__atomic__rmw__add:
3443
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3444
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3445
0
                           Context.Int64Ty, Context.Int64Ty, true);
3446
0
        break;
3447
0
      case OpCode::I32__atomic__rmw8__add_u:
3448
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3449
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3450
0
                           Context.Int32Ty, Context.Int8Ty);
3451
0
        break;
3452
0
      case OpCode::I32__atomic__rmw16__add_u:
3453
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3454
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3455
0
                           Context.Int32Ty, Context.Int16Ty);
3456
0
        break;
3457
0
      case OpCode::I64__atomic__rmw8__add_u:
3458
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3459
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3460
0
                           Context.Int64Ty, Context.Int8Ty);
3461
0
        break;
3462
0
      case OpCode::I64__atomic__rmw16__add_u:
3463
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3464
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3465
0
                           Context.Int64Ty, Context.Int16Ty);
3466
0
        break;
3467
0
      case OpCode::I64__atomic__rmw32__add_u:
3468
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3469
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3470
0
                           Context.Int64Ty, Context.Int32Ty);
3471
0
        break;
3472
0
      case OpCode::I32__atomic__rmw__sub:
3473
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3474
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3475
0
                           Context.Int32Ty, Context.Int32Ty, true);
3476
0
        break;
3477
0
      case OpCode::I64__atomic__rmw__sub:
3478
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3479
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3480
0
                           Context.Int64Ty, Context.Int64Ty, true);
3481
0
        break;
3482
0
      case OpCode::I32__atomic__rmw8__sub_u:
3483
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3484
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3485
0
                           Context.Int32Ty, Context.Int8Ty);
3486
0
        break;
3487
0
      case OpCode::I32__atomic__rmw16__sub_u:
3488
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3489
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3490
0
                           Context.Int32Ty, Context.Int16Ty);
3491
0
        break;
3492
0
      case OpCode::I64__atomic__rmw8__sub_u:
3493
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3494
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3495
0
                           Context.Int64Ty, Context.Int8Ty);
3496
0
        break;
3497
0
      case OpCode::I64__atomic__rmw16__sub_u:
3498
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3499
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3500
0
                           Context.Int64Ty, Context.Int16Ty);
3501
0
        break;
3502
0
      case OpCode::I64__atomic__rmw32__sub_u:
3503
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3504
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3505
0
                           Context.Int64Ty, Context.Int32Ty);
3506
0
        break;
3507
0
      case OpCode::I32__atomic__rmw__and:
3508
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3509
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3510
0
                           Context.Int32Ty, Context.Int32Ty, true);
3511
0
        break;
3512
0
      case OpCode::I64__atomic__rmw__and:
3513
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3514
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3515
0
                           Context.Int64Ty, Context.Int64Ty, true);
3516
0
        break;
3517
0
      case OpCode::I32__atomic__rmw8__and_u:
3518
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3519
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3520
0
                           Context.Int32Ty, Context.Int8Ty);
3521
0
        break;
3522
0
      case OpCode::I32__atomic__rmw16__and_u:
3523
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3524
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3525
0
                           Context.Int32Ty, Context.Int16Ty);
3526
0
        break;
3527
0
      case OpCode::I64__atomic__rmw8__and_u:
3528
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3529
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3530
0
                           Context.Int64Ty, Context.Int8Ty);
3531
0
        break;
3532
0
      case OpCode::I64__atomic__rmw16__and_u:
3533
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3534
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3535
0
                           Context.Int64Ty, Context.Int16Ty);
3536
0
        break;
3537
0
      case OpCode::I64__atomic__rmw32__and_u:
3538
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3539
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3540
0
                           Context.Int64Ty, Context.Int32Ty);
3541
0
        break;
3542
0
      case OpCode::I32__atomic__rmw__or:
3543
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3544
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3545
0
                           Context.Int32Ty, Context.Int32Ty, true);
3546
0
        break;
3547
0
      case OpCode::I64__atomic__rmw__or:
3548
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3549
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3550
0
                           Context.Int64Ty, Context.Int64Ty, true);
3551
0
        break;
3552
0
      case OpCode::I32__atomic__rmw8__or_u:
3553
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3554
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3555
0
                           Context.Int32Ty, Context.Int8Ty);
3556
0
        break;
3557
0
      case OpCode::I32__atomic__rmw16__or_u:
3558
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3559
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3560
0
                           Context.Int32Ty, Context.Int16Ty);
3561
0
        break;
3562
0
      case OpCode::I64__atomic__rmw8__or_u:
3563
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3564
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3565
0
                           Context.Int64Ty, Context.Int8Ty);
3566
0
        break;
3567
0
      case OpCode::I64__atomic__rmw16__or_u:
3568
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3569
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3570
0
                           Context.Int64Ty, Context.Int16Ty);
3571
0
        break;
3572
0
      case OpCode::I64__atomic__rmw32__or_u:
3573
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3574
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3575
0
                           Context.Int64Ty, Context.Int32Ty);
3576
0
        break;
3577
0
      case OpCode::I32__atomic__rmw__xor:
3578
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3579
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3580
0
                           Context.Int32Ty, Context.Int32Ty, true);
3581
0
        break;
3582
0
      case OpCode::I64__atomic__rmw__xor:
3583
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3584
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3585
0
                           Context.Int64Ty, Context.Int64Ty, true);
3586
0
        break;
3587
0
      case OpCode::I32__atomic__rmw8__xor_u:
3588
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3589
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3590
0
                           Context.Int32Ty, Context.Int8Ty);
3591
0
        break;
3592
0
      case OpCode::I32__atomic__rmw16__xor_u:
3593
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3594
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3595
0
                           Context.Int32Ty, Context.Int16Ty);
3596
0
        break;
3597
0
      case OpCode::I64__atomic__rmw8__xor_u:
3598
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3599
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3600
0
                           Context.Int64Ty, Context.Int8Ty);
3601
0
        break;
3602
0
      case OpCode::I64__atomic__rmw16__xor_u:
3603
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3604
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3605
0
                           Context.Int64Ty, Context.Int16Ty);
3606
0
        break;
3607
0
      case OpCode::I64__atomic__rmw32__xor_u:
3608
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3609
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3610
0
                           Context.Int64Ty, Context.Int32Ty);
3611
0
        break;
3612
0
      case OpCode::I32__atomic__rmw__xchg:
3613
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3614
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3615
0
                           Context.Int32Ty, Context.Int32Ty, true);
3616
0
        break;
3617
0
      case OpCode::I64__atomic__rmw__xchg:
3618
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3619
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3620
0
                           Context.Int64Ty, Context.Int64Ty, true);
3621
0
        break;
3622
0
      case OpCode::I32__atomic__rmw8__xchg_u:
3623
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3624
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3625
0
                           Context.Int32Ty, Context.Int8Ty);
3626
0
        break;
3627
0
      case OpCode::I32__atomic__rmw16__xchg_u:
3628
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3629
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3630
0
                           Context.Int32Ty, Context.Int16Ty);
3631
0
        break;
3632
0
      case OpCode::I64__atomic__rmw8__xchg_u:
3633
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3634
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3635
0
                           Context.Int64Ty, Context.Int8Ty);
3636
0
        break;
3637
0
      case OpCode::I64__atomic__rmw16__xchg_u:
3638
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3639
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3640
0
                           Context.Int64Ty, Context.Int16Ty);
3641
0
        break;
3642
0
      case OpCode::I64__atomic__rmw32__xchg_u:
3643
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3644
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3645
0
                           Context.Int64Ty, Context.Int32Ty);
3646
0
        break;
3647
0
      case OpCode::I32__atomic__rmw__cmpxchg:
3648
0
        compileAtomicCompareExchange(
3649
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3650
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int32Ty, true);
3651
0
        break;
3652
0
      case OpCode::I64__atomic__rmw__cmpxchg:
3653
0
        compileAtomicCompareExchange(
3654
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3655
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int64Ty, true);
3656
0
        break;
3657
0
      case OpCode::I32__atomic__rmw8__cmpxchg_u:
3658
0
        compileAtomicCompareExchange(
3659
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3660
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int8Ty);
3661
0
        break;
3662
0
      case OpCode::I32__atomic__rmw16__cmpxchg_u:
3663
0
        compileAtomicCompareExchange(
3664
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3665
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int16Ty);
3666
0
        break;
3667
0
      case OpCode::I64__atomic__rmw8__cmpxchg_u:
3668
0
        compileAtomicCompareExchange(
3669
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3670
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int8Ty);
3671
0
        break;
3672
0
      case OpCode::I64__atomic__rmw16__cmpxchg_u:
3673
0
        compileAtomicCompareExchange(
3674
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3675
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int16Ty);
3676
0
        break;
3677
0
      case OpCode::I64__atomic__rmw32__cmpxchg_u:
3678
0
        compileAtomicCompareExchange(
3679
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3680
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int32Ty);
3681
0
        break;
3682
3683
0
      default:
3684
0
        assumingUnreachable();
3685
1.09M
      }
3686
1.09M
      return {};
3687
1.09M
    };
3688
3689
1.59M
    for (const auto &Instr : Instrs) {
3690
      // Update instruction count
3691
1.59M
      if (LocalInstrCount) {
3692
0
        Builder.createStore(
3693
0
            Builder.createAdd(
3694
0
                Builder.createLoad(Context.Int64Ty, LocalInstrCount),
3695
0
                LLContext.getInt64(1)),
3696
0
            LocalInstrCount);
3697
0
      }
3698
1.59M
      if (LocalGas) {
3699
0
        auto NewGas = Builder.createAdd(
3700
0
            Builder.createLoad(Context.Int64Ty, LocalGas),
3701
0
            Builder.createLoad(
3702
0
                Context.Int64Ty,
3703
0
                Builder.createConstInBoundsGEP2_64(
3704
0
                    LLVM::Type::getArrayType(Context.Int64Ty, UINT16_MAX + 1),
3705
0
                    Context.getCostTable(Builder, ExecCtx), 0,
3706
0
                    uint16_t(Instr.getOpCode()))));
3707
0
        Builder.createStore(NewGas, LocalGas);
3708
0
      }
3709
3710
      // Make the instruction node according to Code.
3711
1.59M
      EXPECTED_TRY(Dispatch(Instr));
3712
1.59M
    }
3713
10.9k
    return {};
3714
11.0k
  }
3715
2.10k
  void compileSignedTrunc(LLVM::Type IntType) noexcept {
3716
2.10k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.norm");
3717
2.10k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmin");
3718
2.10k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmax");
3719
2.10k
    auto Value = stackPop();
3720
2.10k
    const auto [Precise, MinFp, MaxFp] =
3721
2.10k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3722
2.10k
      const auto BitWidth = IntType.getIntegerBitWidth();
3723
2.10k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3724
2.10k
        switch (BitWidth) {
3725
1.65k
        case 32:
3726
1.65k
          return {std::numeric_limits<int32_t>::min(),
3727
1.65k
                  std::numeric_limits<int32_t>::max()};
3728
456
        case 64:
3729
456
          return {std::numeric_limits<int64_t>::min(),
3730
456
                  std::numeric_limits<int64_t>::max()};
3731
0
        default:
3732
0
          assumingUnreachable();
3733
2.10k
        }
3734
2.10k
      }();
3735
2.10k
      auto FPType = Value.getType();
3736
2.10k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3737
2.10k
      const auto FPWidth = FPType.getFPMantissaWidth();
3738
2.10k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3739
2.10k
              LLVM::Value::getConstReal(FPType, Max)};
3740
2.10k
    }();
3741
3742
2.10k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3743
2.10k
    Builder.createCondBr(IsNotNan, NormBB,
3744
2.10k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3745
3746
2.10k
    Builder.positionAtEnd(NormBB);
3747
2.10k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3748
2.10k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3749
2.10k
    auto IsNotUnderflow =
3750
2.10k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3751
2.10k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3752
2.10k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3753
3754
2.10k
    Builder.positionAtEnd(NotMinBB);
3755
2.10k
    auto IsNotOverflow = Builder.createLikely(
3756
2.10k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3757
2.10k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3758
2.10k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3759
3760
2.10k
    Builder.positionAtEnd(NotMaxBB);
3761
2.10k
    stackPush(Builder.createFPToSI(Trunc, IntType));
3762
2.10k
  }
3763
1.29k
  void compileSignedTruncSat(LLVM::Type IntType) noexcept {
3764
1.29k
    auto CurrBB = Builder.getInsertBlock();
3765
1.29k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.norm");
3766
1.29k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmin");
3767
1.29k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmax");
3768
1.29k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.end");
3769
1.29k
    auto Value = stackPop();
3770
1.29k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3771
1.29k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3772
1.29k
      const auto BitWidth = IntType.getIntegerBitWidth();
3773
1.29k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3774
1.29k
        switch (BitWidth) {
3775
617
        case 32:
3776
617
          return {std::numeric_limits<int32_t>::min(),
3777
617
                  std::numeric_limits<int32_t>::max()};
3778
676
        case 64:
3779
676
          return {std::numeric_limits<int64_t>::min(),
3780
676
                  std::numeric_limits<int64_t>::max()};
3781
0
        default:
3782
0
          assumingUnreachable();
3783
1.29k
        }
3784
1.29k
      }();
3785
1.29k
      auto FPType = Value.getType();
3786
1.29k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3787
1.29k
      const auto FPWidth = FPType.getFPMantissaWidth();
3788
1.29k
      return {BitWidth <= FPWidth, static_cast<uint64_t>(Min),
3789
1.29k
              static_cast<uint64_t>(Max),
3790
1.29k
              LLVM::Value::getConstReal(FPType, Min),
3791
1.29k
              LLVM::Value::getConstReal(FPType, Max)};
3792
1.29k
    }();
3793
3794
1.29k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3795
1.29k
    Builder.createCondBr(IsNotNan, NormBB, EndBB);
3796
3797
1.29k
    Builder.positionAtEnd(NormBB);
3798
1.29k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3799
1.29k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3800
1.29k
    auto IsNotUnderflow =
3801
1.29k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3802
1.29k
    Builder.createCondBr(IsNotUnderflow, NotMinBB, EndBB);
3803
3804
1.29k
    Builder.positionAtEnd(NotMinBB);
3805
1.29k
    auto IsNotOverflow = Builder.createLikely(
3806
1.29k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3807
1.29k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3808
3809
1.29k
    Builder.positionAtEnd(NotMaxBB);
3810
1.29k
    auto IntValue = Builder.createFPToSI(Trunc, IntType);
3811
1.29k
    Builder.createBr(EndBB);
3812
3813
1.29k
    Builder.positionAtEnd(EndBB);
3814
1.29k
    auto PHIRet = Builder.createPHI(IntType);
3815
1.29k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, 0, true), CurrBB);
3816
1.29k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt, true), NormBB);
3817
1.29k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt, true),
3818
1.29k
                       NotMinBB);
3819
1.29k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3820
3821
1.29k
    stackPush(PHIRet);
3822
1.29k
  }
3823
3.60k
  void compileUnsignedTrunc(LLVM::Type IntType) noexcept {
3824
3.60k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.norm");
3825
3.60k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmin");
3826
3.60k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmax");
3827
3.60k
    auto Value = stackPop();
3828
3.60k
    const auto [Precise, MinFp, MaxFp] =
3829
3.60k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3830
3.60k
      const auto BitWidth = IntType.getIntegerBitWidth();
3831
3.60k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3832
3.60k
        switch (BitWidth) {
3833
1.35k
        case 32:
3834
1.35k
          return {std::numeric_limits<uint32_t>::min(),
3835
1.35k
                  std::numeric_limits<uint32_t>::max()};
3836
2.25k
        case 64:
3837
2.25k
          return {std::numeric_limits<uint64_t>::min(),
3838
2.25k
                  std::numeric_limits<uint64_t>::max()};
3839
0
        default:
3840
0
          assumingUnreachable();
3841
3.60k
        }
3842
3.60k
      }();
3843
3.60k
      auto FPType = Value.getType();
3844
3.60k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3845
3.60k
      const auto FPWidth = FPType.getFPMantissaWidth();
3846
3.60k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3847
3.60k
              LLVM::Value::getConstReal(FPType, Max)};
3848
3.60k
    }();
3849
3850
3.60k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3851
3.60k
    Builder.createCondBr(IsNotNan, NormBB,
3852
3.60k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3853
3854
3.60k
    Builder.positionAtEnd(NormBB);
3855
3.60k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3856
3.60k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3857
3.60k
    auto IsNotUnderflow =
3858
3.60k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3859
3.60k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3860
3.60k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3861
3862
3.60k
    Builder.positionAtEnd(NotMinBB);
3863
3.60k
    auto IsNotOverflow = Builder.createLikely(
3864
3.60k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3865
3.60k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3866
3.60k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3867
3868
3.60k
    Builder.positionAtEnd(NotMaxBB);
3869
3.60k
    stackPush(Builder.createFPToUI(Trunc, IntType));
3870
3.60k
  }
3871
1.17k
  void compileUnsignedTruncSat(LLVM::Type IntType) noexcept {
3872
1.17k
    auto CurrBB = Builder.getInsertBlock();
3873
1.17k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.norm");
3874
1.17k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.notmax");
3875
1.17k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.end");
3876
1.17k
    auto Value = stackPop();
3877
1.17k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3878
1.17k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3879
1.17k
      const auto BitWidth = IntType.getIntegerBitWidth();
3880
1.17k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3881
1.17k
        switch (BitWidth) {
3882
415
        case 32:
3883
415
          return {std::numeric_limits<uint32_t>::min(),
3884
415
                  std::numeric_limits<uint32_t>::max()};
3885
756
        case 64:
3886
756
          return {std::numeric_limits<uint64_t>::min(),
3887
756
                  std::numeric_limits<uint64_t>::max()};
3888
0
        default:
3889
0
          assumingUnreachable();
3890
1.17k
        }
3891
1.17k
      }();
3892
1.17k
      auto FPType = Value.getType();
3893
1.17k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3894
1.17k
      const auto FPWidth = FPType.getFPMantissaWidth();
3895
1.17k
      return {BitWidth <= FPWidth, Min, Max,
3896
1.17k
              LLVM::Value::getConstReal(FPType, Min),
3897
1.17k
              LLVM::Value::getConstReal(FPType, Max)};
3898
1.17k
    }();
3899
3900
1.17k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3901
1.17k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3902
1.17k
    auto IsNotUnderflow =
3903
1.17k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3904
1.17k
    Builder.createCondBr(IsNotUnderflow, NormBB, EndBB);
3905
3906
1.17k
    Builder.positionAtEnd(NormBB);
3907
1.17k
    auto IsNotOverflow = Builder.createLikely(
3908
1.17k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3909
1.17k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3910
3911
1.17k
    Builder.positionAtEnd(NotMaxBB);
3912
1.17k
    auto IntValue = Builder.createFPToUI(Trunc, IntType);
3913
1.17k
    Builder.createBr(EndBB);
3914
3915
1.17k
    Builder.positionAtEnd(EndBB);
3916
1.17k
    auto PHIRet = Builder.createPHI(IntType);
3917
1.17k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt), CurrBB);
3918
1.17k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt), NormBB);
3919
1.17k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3920
3921
1.17k
    stackPush(PHIRet);
3922
1.17k
  }
3923
3924
  void compileAtomicCheckOffsetAlignment(LLVM::Value Offset,
3925
44
                                         LLVM::Type IntType) noexcept {
3926
44
    const auto BitWidth = IntType.getIntegerBitWidth();
3927
44
    auto BWMask = LLContext.getInt64((BitWidth >> 3) - 1);
3928
44
    auto Value = Builder.createAnd(Offset, BWMask);
3929
44
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "address_align_ok");
3930
44
    auto IsAddressAligned = Builder.createLikely(
3931
44
        Builder.createICmpEQ(Value, LLContext.getInt64(0)));
3932
44
    Builder.createCondBr(IsAddressAligned, OkBB,
3933
44
                         getTrapBB(ErrCode::Value::UnalignedAtomicAccess));
3934
3935
44
    Builder.positionAtEnd(OkBB);
3936
44
  }
3937
3938
192
  void compileMemoryFence() noexcept {
3939
192
    Builder.createFence(LLVMAtomicOrderingSequentiallyConsistent);
3940
192
  }
3941
  void compileAtomicNotify(unsigned MemoryIndex,
3942
37
                           unsigned MemoryOffset) noexcept {
3943
37
    auto Count = stackPop();
3944
37
    auto Addr = Builder.createZExt(Stack.back(), Context.Int64Ty);
3945
37
    if (MemoryOffset != 0) {
3946
30
      Addr = Builder.createAdd(Addr, LLContext.getInt64(MemoryOffset));
3947
30
    }
3948
37
    compileAtomicCheckOffsetAlignment(Addr, Context.Int32Ty);
3949
37
    auto Offset = stackPop();
3950
3951
37
    stackPush(Builder.createCall(
3952
37
        Context.getIntrinsic(
3953
37
            Builder, Executable::Intrinsics::kMemAtomicNotify,
3954
37
            LLVM::Type::getFunctionType(
3955
37
                Context.Int32Ty,
3956
37
                {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
3957
37
        {LLContext.getInt32(MemoryIndex), Offset, Count}));
3958
37
  }
3959
  void compileAtomicWait(unsigned MemoryIndex, unsigned MemoryOffset,
3960
7
                         LLVM::Type TargetType, uint32_t BitWidth) noexcept {
3961
7
    auto Timeout = stackPop();
3962
7
    auto ExpectedValue = Builder.createZExtOrTrunc(stackPop(), Context.Int64Ty);
3963
7
    auto Addr = Builder.createZExt(Stack.back(), Context.Int64Ty);
3964
7
    if (MemoryOffset != 0) {
3965
3
      Addr = Builder.createAdd(Addr, LLContext.getInt64(MemoryOffset));
3966
3
    }
3967
7
    compileAtomicCheckOffsetAlignment(Addr, TargetType);
3968
7
    auto Offset = stackPop();
3969
3970
7
    stackPush(Builder.createCall(
3971
7
        Context.getIntrinsic(
3972
7
            Builder, Executable::Intrinsics::kMemAtomicWait,
3973
7
            LLVM::Type::getFunctionType(Context.Int32Ty,
3974
7
                                        {Context.Int32Ty, Context.Int32Ty,
3975
7
                                         Context.Int64Ty, Context.Int64Ty,
3976
7
                                         Context.Int32Ty},
3977
7
                                        false)),
3978
7
        {LLContext.getInt32(MemoryIndex), Offset, ExpectedValue, Timeout,
3979
7
         LLContext.getInt32(BitWidth)}));
3980
7
  }
3981
  void compileAtomicLoad(unsigned MemoryIndex, unsigned MemoryOffset,
3982
                         unsigned Alignment, LLVM::Type IntType,
3983
0
                         LLVM::Type TargetType, bool Signed = false) noexcept {
3984
3985
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
3986
0
    if (MemoryOffset != 0) {
3987
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
3988
0
    }
3989
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
3990
0
    auto VPtr = Builder.createInBoundsGEP1(
3991
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
3992
0
        Offset);
3993
3994
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
3995
0
    auto Load = switchEndian(Builder.createLoad(TargetType, Ptr, true));
3996
0
    Load.setAlignment(1 << Alignment);
3997
0
    Load.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
3998
3999
0
    if (Signed) {
4000
0
      Stack.back() = Builder.createSExt(Load, IntType);
4001
0
    } else {
4002
0
      Stack.back() = Builder.createZExt(Load, IntType);
4003
0
    }
4004
0
  }
4005
  void compileAtomicStore(unsigned MemoryIndex, unsigned MemoryOffset,
4006
                          unsigned Alignment, LLVM::Type, LLVM::Type TargetType,
4007
0
                          bool Signed = false) noexcept {
4008
0
    auto V = stackPop();
4009
4010
0
    if (Signed) {
4011
0
      V = Builder.createSExtOrTrunc(V, TargetType);
4012
0
    } else {
4013
0
      V = Builder.createZExtOrTrunc(V, TargetType);
4014
0
    }
4015
0
    V = switchEndian(V);
4016
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4017
0
    if (MemoryOffset != 0) {
4018
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4019
0
    }
4020
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4021
0
    auto VPtr = Builder.createInBoundsGEP1(
4022
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4023
0
        Offset);
4024
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4025
0
    auto Store = Builder.createStore(V, Ptr, true);
4026
0
    Store.setAlignment(1 << Alignment);
4027
0
    Store.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
4028
0
  }
4029
4030
  void compileAtomicRMWOp(unsigned MemoryIndex, unsigned MemoryOffset,
4031
                          [[maybe_unused]] unsigned Alignment,
4032
                          LLVMAtomicRMWBinOp BinOp, LLVM::Type IntType,
4033
0
                          LLVM::Type TargetType, bool Signed = false) noexcept {
4034
0
    auto Value = Builder.createSExtOrTrunc(stackPop(), TargetType);
4035
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4036
0
    if (MemoryOffset != 0) {
4037
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4038
0
    }
4039
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4040
0
    auto VPtr = Builder.createInBoundsGEP1(
4041
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4042
0
        Offset);
4043
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4044
4045
0
    LLVM::Value Ret;
4046
    if constexpr (Endian::native == Endian::big) {
4047
      if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpAdd ||
4048
          BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpSub) {
4049
        auto AtomicBB = LLVM::BasicBlock::create(LLContext, F.Fn, "atomic.rmw");
4050
        auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "atomic.rmw.ok");
4051
        Builder.createBr(AtomicBB);
4052
        Builder.positionAtEnd(AtomicBB);
4053
4054
        auto Load = Builder.createLoad(TargetType, Ptr, true);
4055
        Load.setOrdering(LLVMAtomicOrderingMonotonic);
4056
        Load.setAlignment(1 << Alignment);
4057
4058
        LLVM::Value New;
4059
        if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpAdd)
4060
          New = Builder.createAdd(switchEndian(Load), Value);
4061
        else if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpSub) {
4062
          New = Builder.createSub(switchEndian(Load), Value);
4063
        } else {
4064
          assumingUnreachable();
4065
        }
4066
        New = switchEndian(New);
4067
4068
        auto Exchange = Builder.createAtomicCmpXchg(
4069
            Ptr, Load, New, LLVMAtomicOrderingSequentiallyConsistent,
4070
            LLVMAtomicOrderingSequentiallyConsistent);
4071
4072
        Ret = Builder.createExtractValue(Exchange, 0);
4073
        auto Success = Builder.createExtractValue(Exchange, 1);
4074
        Builder.createCondBr(Success, OkBB, AtomicBB);
4075
        Builder.positionAtEnd(OkBB);
4076
      } else {
4077
        Ret = Builder.createAtomicRMW(BinOp, Ptr, switchEndian(Value),
4078
                                      LLVMAtomicOrderingSequentiallyConsistent);
4079
      }
4080
0
    } else {
4081
0
      Ret = Builder.createAtomicRMW(BinOp, Ptr, switchEndian(Value),
4082
0
                                    LLVMAtomicOrderingSequentiallyConsistent);
4083
0
    }
4084
0
    Ret = switchEndian(Ret);
4085
#if LLVM_VERSION_MAJOR >= 13
4086
    Ret.setAlignment(1 << Alignment);
4087
#endif
4088
0
    if (Signed) {
4089
0
      Stack.back() = Builder.createSExt(Ret, IntType);
4090
0
    } else {
4091
0
      Stack.back() = Builder.createZExt(Ret, IntType);
4092
0
    }
4093
0
  }
4094
  void compileAtomicCompareExchange(unsigned MemoryIndex, unsigned MemoryOffset,
4095
                                    [[maybe_unused]] unsigned Alignment,
4096
                                    LLVM::Type IntType, LLVM::Type TargetType,
4097
0
                                    bool Signed = false) noexcept {
4098
4099
0
    auto Replacement = Builder.createSExtOrTrunc(stackPop(), TargetType);
4100
0
    auto Expected = Builder.createSExtOrTrunc(stackPop(), TargetType);
4101
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4102
0
    if (MemoryOffset != 0) {
4103
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4104
0
    }
4105
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4106
0
    auto VPtr = Builder.createInBoundsGEP1(
4107
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4108
0
        Offset);
4109
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4110
4111
0
    auto Ret = Builder.createAtomicCmpXchg(
4112
0
        Ptr, switchEndian(Expected), switchEndian(Replacement),
4113
0
        LLVMAtomicOrderingSequentiallyConsistent,
4114
0
        LLVMAtomicOrderingSequentiallyConsistent);
4115
#if LLVM_VERSION_MAJOR >= 13
4116
    Ret.setAlignment(1 << Alignment);
4117
#endif
4118
0
    auto OldVal = Builder.createExtractValue(Ret, 0);
4119
0
    OldVal = switchEndian(OldVal);
4120
0
    if (Signed) {
4121
0
      Stack.back() = Builder.createSExt(OldVal, IntType);
4122
0
    } else {
4123
0
      Stack.back() = Builder.createZExt(OldVal, IntType);
4124
0
    }
4125
0
  }
4126
4127
11.7k
  void compileReturn() noexcept {
4128
11.7k
    updateInstrCount();
4129
11.7k
    updateGas();
4130
11.7k
    auto Ty = F.Ty.getReturnType();
4131
11.7k
    if (Ty.isVoidTy()) {
4132
2.17k
      Builder.createRetVoid();
4133
9.55k
    } else if (Ty.isStructTy()) {
4134
362
      const auto Count = Ty.getStructNumElements();
4135
362
      std::vector<LLVM::Value> Ret(Count);
4136
1.37k
      for (unsigned I = 0; I < Count; ++I) {
4137
1.01k
        const unsigned J = Count - 1 - I;
4138
1.01k
        Ret[J] = stackPop();
4139
1.01k
      }
4140
362
      Builder.createAggregateRet(Ret);
4141
9.19k
    } else {
4142
9.19k
      Builder.createRet(stackPop());
4143
9.19k
    }
4144
11.7k
  }
4145
4146
20.0k
  void updateInstrCount() noexcept {
4147
20.0k
    if (LocalInstrCount) {
4148
0
      auto Store [[maybe_unused]] = Builder.createAtomicRMW(
4149
0
          LLVMAtomicRMWBinOpAdd, Context.getInstrCount(Builder, ExecCtx),
4150
0
          Builder.createLoad(Context.Int64Ty, LocalInstrCount),
4151
0
          LLVMAtomicOrderingMonotonic);
4152
#if LLVM_VERSION_MAJOR >= 13
4153
      Store.setAlignment(8);
4154
#endif
4155
0
      Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
4156
0
    }
4157
20.0k
  }
4158
4159
22.5k
  void updateGas() noexcept {
4160
22.5k
    if (LocalGas) {
4161
0
      auto CurrBB = Builder.getInsertBlock();
4162
0
      auto CheckBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_check");
4163
0
      auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_ok");
4164
0
      auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_end");
4165
4166
0
      auto Cost = Builder.createLoad(Context.Int64Ty, LocalGas);
4167
0
      Cost.setAlignment(64);
4168
0
      auto GasPtr = Context.getGas(Builder, ExecCtx);
4169
0
      auto GasLimit = Context.getGasLimit(Builder, ExecCtx);
4170
0
      auto Gas = Builder.createLoad(Context.Int64Ty, GasPtr);
4171
0
      Gas.setAlignment(64);
4172
0
      Gas.setOrdering(LLVMAtomicOrderingMonotonic);
4173
0
      Builder.createBr(CheckBB);
4174
0
      Builder.positionAtEnd(CheckBB);
4175
4176
0
      auto PHIOldGas = Builder.createPHI(Context.Int64Ty);
4177
0
      auto NewGas = Builder.createAdd(PHIOldGas, Cost);
4178
0
      auto IsGasRemain =
4179
0
          Builder.createLikely(Builder.createICmpULE(NewGas, GasLimit));
4180
0
      Builder.createCondBr(IsGasRemain, OkBB,
4181
0
                           getTrapBB(ErrCode::Value::CostLimitExceeded));
4182
0
      Builder.positionAtEnd(OkBB);
4183
4184
0
      auto RGasAndSucceed = Builder.createAtomicCmpXchg(
4185
0
          GasPtr, PHIOldGas, NewGas, LLVMAtomicOrderingMonotonic,
4186
0
          LLVMAtomicOrderingMonotonic);
4187
#if LLVM_VERSION_MAJOR >= 13
4188
      RGasAndSucceed.setAlignment(8);
4189
#endif
4190
0
      RGasAndSucceed.setWeak(true);
4191
0
      auto RGas = Builder.createExtractValue(RGasAndSucceed, 0);
4192
0
      auto Succeed = Builder.createExtractValue(RGasAndSucceed, 1);
4193
0
      Builder.createCondBr(Builder.createLikely(Succeed), EndBB, CheckBB);
4194
0
      Builder.positionAtEnd(EndBB);
4195
4196
0
      Builder.createStore(LLContext.getInt64(0), LocalGas);
4197
4198
0
      PHIOldGas.addIncoming(Gas, CurrBB);
4199
0
      PHIOldGas.addIncoming(RGas, OkBB);
4200
0
    }
4201
22.5k
  }
4202
4203
3.33k
  void updateGasAtTrap() noexcept {
4204
3.33k
    if (LocalGas) {
4205
0
      auto Update [[maybe_unused]] = Builder.createAtomicRMW(
4206
0
          LLVMAtomicRMWBinOpAdd, Context.getGas(Builder, ExecCtx),
4207
0
          Builder.createLoad(Context.Int64Ty, LocalGas),
4208
0
          LLVMAtomicOrderingMonotonic);
4209
#if LLVM_VERSION_MAJOR >= 13
4210
      Update.setAlignment(8);
4211
#endif
4212
0
    }
4213
3.33k
  }
4214
4215
private:
4216
3.61k
  void compileCallOp(const unsigned int FuncIndex) noexcept {
4217
3.61k
    const auto &FuncType =
4218
3.61k
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4219
3.61k
            ->getFuncType();
4220
3.61k
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4221
3.61k
    const auto &ParamTypes = FuncType.getParamTypes();
4222
4223
3.61k
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4224
3.61k
    Args[0] = F.Fn.getFirstParam();
4225
4.43k
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4226
817
      const size_t J = ParamTypes.size() - 1 - I;
4227
817
      Args[J + 1] = stackPop();
4228
817
    }
4229
4230
3.61k
    auto Ret = Builder.createCall(Function, Args);
4231
3.61k
    auto Ty = Ret.getType();
4232
3.61k
    if (Ty.isVoidTy()) {
4233
      // nothing to do
4234
1.93k
    } else if (Ty.isStructTy()) {
4235
161
      for (auto Val : unpackStruct(Builder, Ret)) {
4236
161
        stackPush(Val);
4237
161
      }
4238
1.61k
    } else {
4239
1.61k
      stackPush(Ret);
4240
1.61k
    }
4241
3.61k
  }
4242
4243
  void compileIndirectCallOp(const uint32_t TableIndex,
4244
1.15k
                             const uint32_t FuncTypeIndex) noexcept {
4245
1.15k
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4246
1.15k
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4247
1.15k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4248
4249
1.15k
    LLVM::Value FuncIndex = stackPop();
4250
1.15k
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4251
1.15k
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4252
1.15k
    auto RTy = FTy.getReturnType();
4253
4254
1.15k
    const size_t ArgSize = FuncType.getParamTypes().size();
4255
1.15k
    const size_t RetSize =
4256
1.15k
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4257
1.15k
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4258
1.15k
    ArgsVec[0] = F.Fn.getFirstParam();
4259
1.97k
    for (size_t I = 0; I < ArgSize; ++I) {
4260
816
      const size_t J = ArgSize - I;
4261
816
      ArgsVec[J] = stackPop();
4262
816
    }
4263
4264
1.15k
    std::vector<LLVM::Value> FPtrRetsVec;
4265
1.15k
    FPtrRetsVec.reserve(RetSize);
4266
1.15k
    {
4267
1.15k
      auto FPtr = Builder.createCall(
4268
1.15k
          Context.getIntrinsic(
4269
1.15k
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4270
1.15k
              LLVM::Type::getFunctionType(
4271
1.15k
                  FTy.getPointerTo(),
4272
1.15k
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4273
1.15k
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4274
1.15k
           FuncIndex});
4275
1.15k
      Builder.createCondBr(
4276
1.15k
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4277
1.15k
          NotNullBB, IsNullBB);
4278
1.15k
      Builder.positionAtEnd(NotNullBB);
4279
4280
1.15k
      auto FPtrRet =
4281
1.15k
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4282
1.15k
      if (RetSize == 0) {
4283
        // nothing to do
4284
721
      } else if (RetSize == 1) {
4285
706
        FPtrRetsVec.push_back(FPtrRet);
4286
706
      } else {
4287
30
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4288
30
          FPtrRetsVec.push_back(Val);
4289
30
        }
4290
15
      }
4291
1.15k
    }
4292
4293
1.15k
    Builder.createBr(EndBB);
4294
1.15k
    Builder.positionAtEnd(IsNullBB);
4295
4296
1.15k
    std::vector<LLVM::Value> RetsVec;
4297
1.15k
    {
4298
1.15k
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4299
1.15k
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4300
1.15k
      Builder.createArrayPtrStore(
4301
1.15k
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4302
1.15k
          kValSize);
4303
4304
1.15k
      Builder.createCall(
4305
1.15k
          Context.getIntrinsic(
4306
1.15k
              Builder, Executable::Intrinsics::kCallIndirect,
4307
1.15k
              LLVM::Type::getFunctionType(Context.VoidTy,
4308
1.15k
                                          {Context.Int32Ty, Context.Int32Ty,
4309
1.15k
                                           Context.Int32Ty, Context.Int8PtrTy,
4310
1.15k
                                           Context.Int8PtrTy},
4311
1.15k
                                          false)),
4312
1.15k
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4313
1.15k
           FuncIndex, Args, Rets});
4314
4315
1.15k
      if (RetSize == 0) {
4316
        // nothing to do
4317
721
      } else if (RetSize == 1) {
4318
706
        RetsVec.push_back(
4319
706
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4320
706
      } else {
4321
15
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4322
15
                                             kValSize);
4323
15
      }
4324
1.15k
      Builder.createBr(EndBB);
4325
1.15k
      Builder.positionAtEnd(EndBB);
4326
1.15k
    }
4327
4328
1.89k
    for (unsigned I = 0; I < RetSize; ++I) {
4329
736
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4330
736
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4331
736
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4332
736
      stackPush(PHIRet);
4333
736
    }
4334
1.15k
  }
4335
4336
63
  void compileReturnCallOp(const unsigned int FuncIndex) noexcept {
4337
63
    const auto &FuncType =
4338
63
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4339
63
            ->getFuncType();
4340
63
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4341
63
    const auto &ParamTypes = FuncType.getParamTypes();
4342
4343
63
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4344
63
    Args[0] = F.Fn.getFirstParam();
4345
141
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4346
78
      const size_t J = ParamTypes.size() - 1 - I;
4347
78
      Args[J + 1] = stackPop();
4348
78
    }
4349
4350
63
    auto Ret = Builder.createCall(Function, Args);
4351
63
    auto Ty = Ret.getType();
4352
63
    if (Ty.isVoidTy()) {
4353
10
      Builder.createRetVoid();
4354
53
    } else {
4355
53
      Builder.createRet(Ret);
4356
53
    }
4357
63
  }
4358
4359
  void compileReturnIndirectCallOp(const uint32_t TableIndex,
4360
141
                                   const uint32_t FuncTypeIndex) noexcept {
4361
141
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4362
141
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4363
4364
141
    LLVM::Value FuncIndex = stackPop();
4365
141
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4366
141
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4367
141
    auto RTy = FTy.getReturnType();
4368
4369
141
    const size_t ArgSize = FuncType.getParamTypes().size();
4370
141
    const size_t RetSize =
4371
141
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4372
141
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4373
141
    ArgsVec[0] = F.Fn.getFirstParam();
4374
266
    for (size_t I = 0; I < ArgSize; ++I) {
4375
125
      const size_t J = ArgSize - I;
4376
125
      ArgsVec[J] = stackPop();
4377
125
    }
4378
4379
141
    {
4380
141
      auto FPtr = Builder.createCall(
4381
141
          Context.getIntrinsic(
4382
141
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4383
141
              LLVM::Type::getFunctionType(
4384
141
                  FTy.getPointerTo(),
4385
141
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4386
141
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4387
141
           FuncIndex});
4388
141
      Builder.createCondBr(
4389
141
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4390
141
          NotNullBB, IsNullBB);
4391
141
      Builder.positionAtEnd(NotNullBB);
4392
4393
141
      auto FPtrRet =
4394
141
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4395
141
      if (RetSize == 0) {
4396
52
        Builder.createRetVoid();
4397
89
      } else {
4398
89
        Builder.createRet(FPtrRet);
4399
89
      }
4400
141
    }
4401
4402
141
    Builder.positionAtEnd(IsNullBB);
4403
4404
141
    {
4405
141
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4406
141
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4407
141
      Builder.createArrayPtrStore(
4408
141
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4409
141
          kValSize);
4410
4411
141
      Builder.createCall(
4412
141
          Context.getIntrinsic(
4413
141
              Builder, Executable::Intrinsics::kCallIndirect,
4414
141
              LLVM::Type::getFunctionType(Context.VoidTy,
4415
141
                                          {Context.Int32Ty, Context.Int32Ty,
4416
141
                                           Context.Int32Ty, Context.Int8PtrTy,
4417
141
                                           Context.Int8PtrTy},
4418
141
                                          false)),
4419
141
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4420
141
           FuncIndex, Args, Rets});
4421
4422
141
      if (RetSize == 0) {
4423
52
        Builder.createRetVoid();
4424
89
      } else if (RetSize == 1) {
4425
83
        Builder.createRet(
4426
83
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4427
83
      } else {
4428
6
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4429
6
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4430
6
      }
4431
141
    }
4432
141
  }
4433
4434
1
  void compileCallRefOp(const unsigned int TypeIndex) noexcept {
4435
1
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4436
1
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4437
1
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4438
4439
1
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4440
1
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4441
1
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4442
1
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4443
1
        LLContext.getInt64(0)));
4444
1
    Builder.createCondBr(IsRefNotNull, OkBB,
4445
1
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4446
1
    Builder.positionAtEnd(OkBB);
4447
4448
1
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4449
1
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4450
1
    auto RTy = FTy.getReturnType();
4451
4452
1
    const size_t ArgSize = FuncType.getParamTypes().size();
4453
1
    const size_t RetSize =
4454
1
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4455
1
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4456
1
    ArgsVec[0] = F.Fn.getFirstParam();
4457
1
    for (size_t I = 0; I < ArgSize; ++I) {
4458
0
      const size_t J = ArgSize - I;
4459
0
      ArgsVec[J] = stackPop();
4460
0
    }
4461
4462
1
    std::vector<LLVM::Value> FPtrRetsVec;
4463
1
    FPtrRetsVec.reserve(RetSize);
4464
1
    {
4465
1
      auto FPtr = Builder.createCall(
4466
1
          Context.getIntrinsic(
4467
1
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4468
1
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4469
1
                                          {Context.Int64x2Ty}, false)),
4470
1
          {Ref});
4471
1
      Builder.createCondBr(
4472
1
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4473
1
          NotNullBB, IsNullBB);
4474
1
      Builder.positionAtEnd(NotNullBB);
4475
4476
1
      auto FPtrRet =
4477
1
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4478
1
      if (RetSize == 0) {
4479
        // nothing to do
4480
1
      } else if (RetSize == 1) {
4481
0
        FPtrRetsVec.push_back(FPtrRet);
4482
0
      } else {
4483
0
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4484
0
          FPtrRetsVec.push_back(Val);
4485
0
        }
4486
0
      }
4487
1
    }
4488
4489
1
    Builder.createBr(EndBB);
4490
1
    Builder.positionAtEnd(IsNullBB);
4491
4492
1
    std::vector<LLVM::Value> RetsVec;
4493
1
    {
4494
1
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4495
1
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4496
1
      Builder.createArrayPtrStore(
4497
1
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4498
1
          kValSize);
4499
4500
1
      Builder.createCall(
4501
1
          Context.getIntrinsic(
4502
1
              Builder, Executable::Intrinsics::kCallRef,
4503
1
              LLVM::Type::getFunctionType(
4504
1
                  Context.VoidTy,
4505
1
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4506
1
                  false)),
4507
1
          {Ref, Args, Rets});
4508
4509
1
      if (RetSize == 0) {
4510
        // nothing to do
4511
1
      } else if (RetSize == 1) {
4512
0
        RetsVec.push_back(
4513
0
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4514
0
      } else {
4515
0
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4516
0
                                             kValSize);
4517
0
      }
4518
1
      Builder.createBr(EndBB);
4519
1
      Builder.positionAtEnd(EndBB);
4520
1
    }
4521
4522
1
    for (unsigned I = 0; I < RetSize; ++I) {
4523
0
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4524
0
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4525
0
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4526
0
      stackPush(PHIRet);
4527
0
    }
4528
1
  }
4529
4530
1
  void compileReturnCallRefOp(const unsigned int TypeIndex) noexcept {
4531
1
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4532
1
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4533
4534
1
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4535
1
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4536
1
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4537
1
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4538
1
        LLContext.getInt64(0)));
4539
1
    Builder.createCondBr(IsRefNotNull, OkBB,
4540
1
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4541
1
    Builder.positionAtEnd(OkBB);
4542
4543
1
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4544
1
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4545
1
    auto RTy = FTy.getReturnType();
4546
4547
1
    const size_t ArgSize = FuncType.getParamTypes().size();
4548
1
    const size_t RetSize =
4549
1
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4550
1
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4551
1
    ArgsVec[0] = F.Fn.getFirstParam();
4552
1
    for (size_t I = 0; I < ArgSize; ++I) {
4553
0
      const size_t J = ArgSize - I;
4554
0
      ArgsVec[J] = stackPop();
4555
0
    }
4556
4557
1
    {
4558
1
      auto FPtr = Builder.createCall(
4559
1
          Context.getIntrinsic(
4560
1
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4561
1
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4562
1
                                          {Context.Int64x2Ty}, false)),
4563
1
          {Ref});
4564
1
      Builder.createCondBr(
4565
1
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4566
1
          NotNullBB, IsNullBB);
4567
1
      Builder.positionAtEnd(NotNullBB);
4568
4569
1
      auto FPtrRet =
4570
1
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4571
1
      if (RetSize == 0) {
4572
1
        Builder.createRetVoid();
4573
1
      } else {
4574
0
        Builder.createRet(FPtrRet);
4575
0
      }
4576
1
    }
4577
4578
1
    Builder.positionAtEnd(IsNullBB);
4579
4580
1
    {
4581
1
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4582
1
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4583
1
      Builder.createArrayPtrStore(
4584
1
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4585
1
          kValSize);
4586
4587
1
      Builder.createCall(
4588
1
          Context.getIntrinsic(
4589
1
              Builder, Executable::Intrinsics::kCallRef,
4590
1
              LLVM::Type::getFunctionType(
4591
1
                  Context.VoidTy,
4592
1
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4593
1
                  false)),
4594
1
          {Ref, Args, Rets});
4595
4596
1
      if (RetSize == 0) {
4597
1
        Builder.createRetVoid();
4598
1
      } else if (RetSize == 1) {
4599
0
        Builder.createRet(
4600
0
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4601
0
      } else {
4602
0
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4603
0
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4604
0
      }
4605
1
    }
4606
1
  }
4607
4608
  void compileLoadOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4609
19.8k
                     LLVM::Type LoadTy) noexcept {
4610
19.8k
    if constexpr (kForceUnalignment) {
4611
19.8k
      Alignment = 0;
4612
19.8k
    }
4613
19.8k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4614
19.8k
    if (Offset != 0) {
4615
13.1k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4616
13.1k
    }
4617
4618
19.8k
    auto VPtr = Builder.createInBoundsGEP1(
4619
19.8k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4620
19.8k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4621
19.8k
    auto LoadInst = Builder.createLoad(LoadTy, Ptr, true);
4622
19.8k
    LoadInst.setAlignment(1 << Alignment);
4623
19.8k
    stackPush(switchEndian(LoadInst));
4624
19.8k
  }
4625
  void compileLoadOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4626
                     LLVM::Type LoadTy, LLVM::Type ExtendTy,
4627
8.03k
                     bool Signed) noexcept {
4628
8.03k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4629
8.03k
    if (Signed) {
4630
3.46k
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4631
4.57k
    } else {
4632
4.57k
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4633
4.57k
    }
4634
8.03k
  }
4635
  void compileVectorLoadOp(unsigned MemoryIndex, unsigned Offset,
4636
5.11k
                           unsigned Alignment, LLVM::Type LoadTy) noexcept {
4637
5.11k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4638
5.11k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4639
5.11k
  }
4640
  void compileVectorLoadOp(unsigned MemoryIndex, unsigned Offset,
4641
                           unsigned Alignment, LLVM::Type LoadTy,
4642
1.76k
                           LLVM::Type ExtendTy, bool Signed) noexcept {
4643
1.76k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy, ExtendTy, Signed);
4644
1.76k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4645
1.76k
  }
4646
  void compileSplatLoadOp(unsigned MemoryIndex, unsigned Offset,
4647
                          unsigned Alignment, LLVM::Type LoadTy,
4648
681
                          LLVM::Type VectorTy) noexcept {
4649
681
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4650
681
    compileSplatOp(VectorTy);
4651
681
  }
4652
  void compileLoadLaneOp(unsigned MemoryIndex, unsigned Offset,
4653
                         unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4654
501
                         LLVM::Type VectorTy) noexcept {
4655
501
    auto Vector = stackPop();
4656
501
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4657
    if constexpr (Endian::native == Endian::big) {
4658
      Index = VectorTy.getVectorSize() - 1 - Index;
4659
    }
4660
501
    auto Value = Stack.back();
4661
501
    Stack.back() = Builder.createBitCast(
4662
501
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4663
501
                                    Value, LLContext.getInt64(Index)),
4664
501
        Context.Int64x2Ty);
4665
501
  }
4666
  void compileStoreOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4667
                      LLVM::Type LoadTy, bool Trunc = false,
4668
3.35k
                      bool BitCast = false) noexcept {
4669
3.35k
    if constexpr (kForceUnalignment) {
4670
3.35k
      Alignment = 0;
4671
3.35k
    }
4672
3.35k
    auto V = stackPop();
4673
3.35k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4674
3.35k
    if (Offset != 0) {
4675
2.53k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4676
2.53k
    }
4677
4678
3.35k
    if (Trunc) {
4679
725
      V = Builder.createTrunc(V, LoadTy);
4680
725
    }
4681
3.35k
    if (BitCast) {
4682
235
      V = Builder.createBitCast(V, LoadTy);
4683
235
    }
4684
3.35k
    V = switchEndian(V);
4685
3.35k
    auto VPtr = Builder.createInBoundsGEP1(
4686
3.35k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4687
3.35k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4688
3.35k
    auto StoreInst = Builder.createStore(V, Ptr, true);
4689
3.35k
    StoreInst.setAlignment(1 << Alignment);
4690
3.35k
  }
4691
  void compileStoreLaneOp(unsigned MemoryIndex, unsigned Offset,
4692
                          unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4693
347
                          LLVM::Type VectorTy) noexcept {
4694
347
    auto Vector = Stack.back();
4695
    if constexpr (Endian::native == Endian::big) {
4696
      Index = VectorTy.getVectorSize() - Index - 1;
4697
    }
4698
347
    Stack.back() = Builder.createExtractElement(
4699
347
        Builder.createBitCast(Vector, VectorTy), LLContext.getInt64(Index));
4700
347
    compileStoreOp(MemoryIndex, Offset, Alignment, LoadTy);
4701
347
  }
4702
53.2k
  void compileSplatOp(LLVM::Type VectorTy) noexcept {
4703
53.2k
    auto Undef = LLVM::Value::getUndef(VectorTy);
4704
53.2k
    auto Zeros = LLVM::Value::getConstNull(
4705
53.2k
        LLVM::Type::getVectorType(Context.Int32Ty, VectorTy.getVectorSize()));
4706
53.2k
    auto Value = Builder.createTrunc(Stack.back(), VectorTy.getElementType());
4707
53.2k
    auto Vector =
4708
53.2k
        Builder.createInsertElement(Undef, Value, LLContext.getInt64(0));
4709
53.2k
    Vector = Builder.createShuffleVector(Vector, Undef, Zeros);
4710
4711
53.2k
    Stack.back() = Builder.createBitCast(Vector, Context.Int64x2Ty);
4712
53.2k
  }
4713
1.33k
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4714
1.33k
    auto Vector = Builder.createBitCast(Stack.back(), VectorTy);
4715
    if constexpr (Endian::native == Endian::big) {
4716
      Index = VectorTy.getVectorSize() - Index - 1;
4717
    }
4718
1.33k
    Stack.back() =
4719
1.33k
        Builder.createExtractElement(Vector, LLContext.getInt64(Index));
4720
1.33k
  }
4721
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index,
4722
974
                            LLVM::Type ExtendTy, bool Signed) noexcept {
4723
974
    compileExtractLaneOp(VectorTy, Index);
4724
974
    if (Signed) {
4725
490
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4726
490
    } else {
4727
484
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4728
484
    }
4729
974
  }
4730
1.29k
  void compileReplaceLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4731
1.29k
    auto Value = Builder.createTrunc(stackPop(), VectorTy.getElementType());
4732
1.29k
    auto Vector = Stack.back();
4733
    if constexpr (Endian::native == Endian::big) {
4734
      Index = VectorTy.getVectorSize() - Index - 1;
4735
    }
4736
1.29k
    Stack.back() = Builder.createBitCast(
4737
1.29k
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4738
1.29k
                                    Value, LLContext.getInt64(Index)),
4739
1.29k
        Context.Int64x2Ty);
4740
1.29k
  }
4741
  void compileVectorCompareOp(LLVM::Type VectorTy,
4742
5.27k
                              LLVMIntPredicate Predicate) noexcept {
4743
5.27k
    auto RHS = stackPop();
4744
5.27k
    auto LHS = stackPop();
4745
5.27k
    auto Result = Builder.createSExt(
4746
5.27k
        Builder.createICmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4747
5.27k
                           Builder.createBitCast(RHS, VectorTy)),
4748
5.27k
        VectorTy);
4749
5.27k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4750
5.27k
  }
4751
  void compileVectorCompareOp(LLVM::Type VectorTy, LLVMRealPredicate Predicate,
4752
3.42k
                              LLVM::Type ResultTy) noexcept {
4753
3.42k
    auto RHS = stackPop();
4754
3.42k
    auto LHS = stackPop();
4755
3.42k
    auto Result = Builder.createSExt(
4756
3.42k
        Builder.createFCmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4757
3.42k
                           Builder.createBitCast(RHS, VectorTy)),
4758
3.42k
        ResultTy);
4759
3.42k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4760
3.42k
  }
4761
  template <typename Func>
4762
27.5k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
27.5k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
27.5k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
27.5k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.24k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.24k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.24k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.24k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.78k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.78k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.78k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.78k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
147
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
147
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
147
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
147
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.72k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.72k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.72k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.72k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
543
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
543
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
543
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
543
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
890
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
890
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
890
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
890
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
332
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
332
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
332
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
332
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
1.60k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
1.60k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
1.60k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
1.60k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.83k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.83k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.83k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.83k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.00k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.00k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.00k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.00k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
417
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
417
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
417
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
417
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
986
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
986
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
986
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
986
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
5.86k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
5.86k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
5.86k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
5.86k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
686
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
686
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
686
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
686
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.01k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.01k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.01k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.01k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
729
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
729
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
729
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
729
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
758
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
758
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
758
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
758
  }
4766
2.24k
  void compileVectorAbs(LLVM::Type VectorTy) noexcept {
4767
2.24k
    compileVectorOp(VectorTy, [this, VectorTy](auto V) noexcept {
4768
2.24k
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4769
2.24k
      auto C = Builder.createICmpSLT(V, Zero);
4770
2.24k
      return Builder.createSelect(C, Builder.createNeg(V), V);
4771
2.24k
    });
4772
2.24k
  }
4773
2.78k
  void compileVectorNeg(LLVM::Type VectorTy) noexcept {
4774
2.78k
    compileVectorOp(VectorTy,
4775
2.78k
                    [this](auto V) noexcept { return Builder.createNeg(V); });
4776
2.78k
  }
4777
147
  void compileVectorPopcnt() noexcept {
4778
147
    compileVectorOp(Context.Int8x16Ty, [this](auto V) noexcept {
4779
147
      assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
4780
147
      return Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, V);
4781
147
    });
4782
147
  }
4783
  template <typename Func>
4784
2.47k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4785
2.47k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4786
2.47k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4787
2.47k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}&&)
Line
Count
Source
4784
106
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4785
106
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4786
106
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4787
106
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4784
996
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4785
996
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4786
996
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4787
996
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4784
1.37k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4785
1.37k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4786
1.37k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4787
1.37k
  }
4788
106
  void compileVectorAnyTrue() noexcept {
4789
106
    compileVectorReduceIOp(Context.Int128x1Ty, [this](auto V) noexcept {
4790
106
      auto Zero = LLVM::Value::getConstNull(Context.Int128x1Ty);
4791
106
      return Builder.createBitCast(Builder.createICmpNE(V, Zero),
4792
106
                                   LLContext.getInt1Ty());
4793
106
    });
4794
106
  }
4795
996
  void compileVectorAllTrue(LLVM::Type VectorTy) noexcept {
4796
996
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4797
996
      const auto Size = VectorTy.getVectorSize();
4798
996
      auto IntType = LLContext.getIntNTy(Size);
4799
996
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4800
996
      auto Cmp = Builder.createBitCast(Builder.createICmpEQ(V, Zero), IntType);
4801
996
      auto CmpZero = LLVM::Value::getConstInt(IntType, 0);
4802
996
      return Builder.createICmpEQ(Cmp, CmpZero);
4803
996
    });
4804
996
  }
4805
1.37k
  void compileVectorBitMask(LLVM::Type VectorTy) noexcept {
4806
1.37k
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4807
1.37k
      const auto Size = VectorTy.getVectorSize();
4808
1.37k
      auto IntType = LLContext.getIntNTy(Size);
4809
1.37k
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4810
1.37k
      return Builder.createBitCast(Builder.createICmpSLT(V, Zero), IntType);
4811
1.37k
    });
4812
1.37k
  }
4813
  template <typename Func>
4814
5.10k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4815
5.10k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4816
5.10k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4817
5.10k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4818
5.10k
    auto RHS = Builder.createVectorSplat(
4819
5.10k
        VectorTy.getVectorSize(),
4820
5.10k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4821
5.10k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4822
5.10k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4823
5.10k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4824
5.10k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4814
2.00k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4815
2.00k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4816
2.00k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4817
2.00k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4818
2.00k
    auto RHS = Builder.createVectorSplat(
4819
2.00k
        VectorTy.getVectorSize(),
4820
2.00k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4821
2.00k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4822
2.00k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4823
2.00k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4824
2.00k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4814
2.38k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4815
2.38k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4816
2.38k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4817
2.38k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4818
2.38k
    auto RHS = Builder.createVectorSplat(
4819
2.38k
        VectorTy.getVectorSize(),
4820
2.38k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4821
2.38k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4822
2.38k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4823
2.38k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4824
2.38k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4814
723
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4815
723
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4816
723
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4817
723
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4818
723
    auto RHS = Builder.createVectorSplat(
4819
723
        VectorTy.getVectorSize(),
4820
723
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4821
723
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4822
723
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4823
723
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4824
723
  }
4825
2.00k
  void compileVectorShl(LLVM::Type VectorTy) noexcept {
4826
2.00k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4827
2.00k
      return Builder.createShl(LHS, RHS);
4828
2.00k
    });
4829
2.00k
  }
4830
723
  void compileVectorLShr(LLVM::Type VectorTy) noexcept {
4831
723
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4832
723
      return Builder.createLShr(LHS, RHS);
4833
723
    });
4834
723
  }
4835
2.38k
  void compileVectorAShr(LLVM::Type VectorTy) noexcept {
4836
2.38k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4837
2.38k
      return Builder.createAShr(LHS, RHS);
4838
2.38k
    });
4839
2.38k
  }
4840
  template <typename Func>
4841
8.64k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
8.64k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
8.64k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
8.64k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
8.64k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
397
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
397
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
397
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
397
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
397
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
1.60k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
1.60k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
1.60k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
1.60k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
1.60k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
874
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
874
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
874
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
874
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
874
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
394
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
394
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
394
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
394
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
394
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
325
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
325
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
325
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
325
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
325
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
372
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
372
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
372
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
372
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
372
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
469
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
469
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
469
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
469
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
469
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
978
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
978
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
978
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
978
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
978
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
293
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
293
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
293
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
293
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
293
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
441
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
441
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
441
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
441
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
441
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
196
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
196
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
196
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
196
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
196
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
180
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
180
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
180
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
180
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
180
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
474
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
474
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
474
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
474
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
474
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
250
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
250
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
250
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
250
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
250
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
213
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
213
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
213
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
213
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
213
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
308
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
308
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
308
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
308
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
308
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
229
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
229
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
229
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
229
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
229
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
314
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
314
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
314
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
314
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
314
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
334
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
334
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
334
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
334
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
334
  }
4846
397
  void compileVectorVectorAdd(LLVM::Type VectorTy) noexcept {
4847
397
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4848
397
      return Builder.createAdd(LHS, RHS);
4849
397
    });
4850
397
  }
4851
1.60k
  void compileVectorVectorAddSat(LLVM::Type VectorTy, bool Signed) noexcept {
4852
1.60k
    auto ID = Signed ? LLVM::Core::SAddSat : LLVM::Core::UAddSat;
4853
1.60k
    assuming(ID != LLVM::Core::NotIntrinsic);
4854
1.60k
    compileVectorVectorOp(
4855
1.60k
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
4856
1.60k
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
4857
1.60k
        });
4858
1.60k
  }
4859
874
  void compileVectorVectorSub(LLVM::Type VectorTy) noexcept {
4860
874
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4861
874
      return Builder.createSub(LHS, RHS);
4862
874
    });
4863
874
  }
4864
394
  void compileVectorVectorSubSat(LLVM::Type VectorTy, bool Signed) noexcept {
4865
394
    auto ID = Signed ? LLVM::Core::SSubSat : LLVM::Core::USubSat;
4866
394
    assuming(ID != LLVM::Core::NotIntrinsic);
4867
394
    compileVectorVectorOp(
4868
394
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
4869
394
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
4870
394
        });
4871
394
  }
4872
441
  void compileVectorVectorMul(LLVM::Type VectorTy) noexcept {
4873
441
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4874
441
      return Builder.createMul(LHS, RHS);
4875
441
    });
4876
441
  }
4877
75
  void compileVectorSwizzle() noexcept {
4878
75
    auto Index = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
4879
75
    auto Vector = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
4880
4881
75
#if defined(__x86_64__)
4882
75
    if (Context.SupportSSSE3) {
4883
75
      auto Magic = Builder.createVectorSplat(16, LLContext.getInt8(112));
4884
75
      auto Added = Builder.createAdd(Index, Magic);
4885
75
      auto NewIndex = Builder.createSelect(
4886
75
          Builder.createICmpUGT(Index, Added),
4887
75
          LLVM::Value::getConstAllOnes(Context.Int8x16Ty), Added);
4888
75
      assuming(LLVM::Core::X86SSSE3PShufB128 != LLVM::Core::NotIntrinsic);
4889
75
      stackPush(Builder.createBitCast(
4890
75
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PShufB128, {},
4891
75
                                  {Vector, NewIndex}),
4892
75
          Context.Int64x2Ty));
4893
75
      return;
4894
75
    }
4895
0
#endif
4896
4897
#if defined(__aarch64__)
4898
    if (Context.SupportNEON) {
4899
      assuming(LLVM::Core::AArch64NeonTbl1 != LLVM::Core::NotIntrinsic);
4900
      stackPush(Builder.createBitCast(
4901
          Builder.createIntrinsic(LLVM::Core::AArch64NeonTbl1,
4902
                                  {Context.Int8x16Ty}, {Vector, Index}),
4903
          Context.Int64x2Ty));
4904
      return;
4905
    }
4906
#endif
4907
4908
0
    auto Mask = Builder.createVectorSplat(16, LLContext.getInt8(15));
4909
0
    auto Zero = Builder.createVectorSplat(16, LLContext.getInt8(0));
4910
4911
#if defined(__s390x__)
4912
    assuming(LLVM::Core::S390VPerm != LLVM::Core::NotIntrinsic);
4913
    auto Exceed = Builder.createICmpULE(Index, Mask);
4914
    Index = Builder.createSub(Mask, Index);
4915
    auto Result = Builder.createIntrinsic(LLVM::Core::S390VPerm, {},
4916
                                          {Vector, Zero, Index});
4917
    Result = Builder.createSelect(Exceed, Result, Zero);
4918
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4919
    return;
4920
#endif
4921
4922
    // Fallback case.
4923
    // If the SSSE3 is not supported on the x86_64 platform or
4924
    // the NEON is not supported on the aarch64 platform,
4925
    // then fallback to this.
4926
0
    auto IsOver = Builder.createICmpUGT(Index, Mask);
4927
0
    auto InboundIndex = Builder.createAnd(Index, Mask);
4928
0
    auto Array = Builder.createArray(16, 1);
4929
0
    for (size_t I = 0; I < 16; ++I) {
4930
0
      Builder.createStore(
4931
0
          Builder.createExtractElement(Vector, LLContext.getInt64(I)),
4932
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array,
4933
0
                                     LLContext.getInt64(I)));
4934
0
    }
4935
0
    LLVM::Value Ret = LLVM::Value::getUndef(Context.Int8x16Ty);
4936
0
    for (size_t I = 0; I < 16; ++I) {
4937
0
      auto Idx =
4938
0
          Builder.createExtractElement(InboundIndex, LLContext.getInt64(I));
4939
0
      auto Value = Builder.createLoad(
4940
0
          Context.Int8Ty,
4941
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array, Idx));
4942
0
      Ret = Builder.createInsertElement(Ret, Value, LLContext.getInt64(I));
4943
0
    }
4944
0
    Ret = Builder.createSelect(IsOver, Zero, Ret);
4945
0
    stackPush(Builder.createBitCast(Ret, Context.Int64x2Ty));
4946
0
  }
4947
4948
196
  void compileVectorVectorQ15MulSat() noexcept {
4949
196
    compileVectorVectorOp(
4950
196
        Context.Int16x8Ty, [this](auto LHS, auto RHS) noexcept -> LLVM::Value {
4951
196
#if defined(__x86_64__)
4952
196
          if (Context.SupportSSSE3) {
4953
196
            assuming(LLVM::Core::X86SSSE3PMulHrSw128 !=
4954
196
                     LLVM::Core::NotIntrinsic);
4955
196
            auto Result = Builder.createIntrinsic(
4956
196
                LLVM::Core::X86SSSE3PMulHrSw128, {}, {LHS, RHS});
4957
196
            auto IntMaxV = Builder.createVectorSplat(
4958
196
                8, LLContext.getInt16(UINT16_C(0x8000)));
4959
196
            auto NotOver = Builder.createSExt(
4960
196
                Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
4961
196
            return Builder.createXor(Result, NotOver);
4962
196
          }
4963
0
#endif
4964
4965
#if defined(__aarch64__)
4966
          if (Context.SupportNEON) {
4967
            assuming(LLVM::Core::AArch64NeonSQRDMulH !=
4968
                     LLVM::Core::NotIntrinsic);
4969
            return Builder.createBinaryIntrinsic(
4970
                LLVM::Core::AArch64NeonSQRDMulH, LHS, RHS);
4971
          }
4972
#endif
4973
4974
          // Fallback case.
4975
          // If the SSSE3 is not supported on the x86_64 platform or
4976
          // the NEON is not supported on the aarch64 platform,
4977
          // then fallback to this.
4978
0
          auto ExtTy = Context.Int16x8Ty.getExtendedElementVectorType();
4979
0
          auto Offset = Builder.createVectorSplat(
4980
0
              8, LLContext.getInt32(UINT32_C(0x4000)));
4981
0
          auto Shift =
4982
0
              Builder.createVectorSplat(8, LLContext.getInt32(UINT32_C(15)));
4983
0
          auto ExtLHS = Builder.createSExt(LHS, ExtTy);
4984
0
          auto ExtRHS = Builder.createSExt(RHS, ExtTy);
4985
0
          auto Result = Builder.createTrunc(
4986
0
              Builder.createAShr(
4987
0
                  Builder.createAdd(Builder.createMul(ExtLHS, ExtRHS), Offset),
4988
0
                  Shift),
4989
0
              Context.Int16x8Ty);
4990
0
          auto IntMaxV = Builder.createVectorSplat(
4991
0
              8, LLContext.getInt16(UINT16_C(0x8000)));
4992
0
          auto NotOver = Builder.createSExt(
4993
0
              Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
4994
0
          return Builder.createXor(Result, NotOver);
4995
196
        });
4996
196
  }
4997
325
  void compileVectorVectorSMin(LLVM::Type VectorTy) noexcept {
4998
325
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4999
325
      auto C = Builder.createICmpSLE(LHS, RHS);
5000
325
      return Builder.createSelect(C, LHS, RHS);
5001
325
    });
5002
325
  }
5003
372
  void compileVectorVectorUMin(LLVM::Type VectorTy) noexcept {
5004
372
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5005
372
      auto C = Builder.createICmpULE(LHS, RHS);
5006
372
      return Builder.createSelect(C, LHS, RHS);
5007
372
    });
5008
372
  }
5009
469
  void compileVectorVectorSMax(LLVM::Type VectorTy) noexcept {
5010
469
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5011
469
      auto C = Builder.createICmpSGE(LHS, RHS);
5012
469
      return Builder.createSelect(C, LHS, RHS);
5013
469
    });
5014
469
  }
5015
978
  void compileVectorVectorUMax(LLVM::Type VectorTy) noexcept {
5016
978
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5017
978
      auto C = Builder.createICmpUGE(LHS, RHS);
5018
978
      return Builder.createSelect(C, LHS, RHS);
5019
978
    });
5020
978
  }
5021
293
  void compileVectorVectorUAvgr(LLVM::Type VectorTy) noexcept {
5022
293
    auto ExtendTy = VectorTy.getExtendedElementVectorType();
5023
293
    compileVectorVectorOp(
5024
293
        VectorTy,
5025
293
        [this, VectorTy, ExtendTy](auto LHS, auto RHS) noexcept -> LLVM::Value {
5026
293
#if defined(__x86_64__)
5027
293
          if (Context.SupportSSE2) {
5028
293
            const auto ID = [VectorTy]() noexcept {
5029
293
              switch (VectorTy.getElementType().getIntegerBitWidth()) {
5030
125
              case 8:
5031
125
                return LLVM::Core::X86SSE2PAvgB;
5032
168
              case 16:
5033
168
                return LLVM::Core::X86SSE2PAvgW;
5034
0
              default:
5035
0
                assumingUnreachable();
5036
293
              }
5037
293
            }();
5038
293
            assuming(ID != LLVM::Core::NotIntrinsic);
5039
293
            return Builder.createIntrinsic(ID, {}, {LHS, RHS});
5040
293
          }
5041
0
#endif
5042
5043
#if defined(__aarch64__)
5044
          if (Context.SupportNEON) {
5045
            assuming(LLVM::Core::AArch64NeonURHAdd != LLVM::Core::NotIntrinsic);
5046
            return Builder.createBinaryIntrinsic(LLVM::Core::AArch64NeonURHAdd,
5047
                                                 LHS, RHS);
5048
          }
5049
#endif
5050
5051
          // Fallback case.
5052
          // If the SSE2 is not supported on the x86_64 platform or
5053
          // the NEON is not supported on the aarch64 platform,
5054
          // then fallback to this.
5055
0
          auto EL = Builder.createZExt(LHS, ExtendTy);
5056
0
          auto ER = Builder.createZExt(RHS, ExtendTy);
5057
0
          auto One = Builder.createZExt(
5058
0
              Builder.createVectorSplat(ExtendTy.getVectorSize(),
5059
0
                                        LLContext.getTrue()),
5060
0
              ExtendTy);
5061
0
          return Builder.createTrunc(
5062
0
              Builder.createLShr(
5063
0
                  Builder.createAdd(Builder.createAdd(EL, ER), One), One),
5064
0
              VectorTy);
5065
293
        });
5066
293
  }
5067
682
  void compileVectorNarrow(LLVM::Type FromTy, bool Signed) noexcept {
5068
682
    auto [MinInt,
5069
682
          MaxInt] = [&]() noexcept -> std::tuple<LLVM::Value, LLVM::Value> {
5070
682
      switch (FromTy.getElementType().getIntegerBitWidth()) {
5071
255
      case 16: {
5072
255
        const auto Min =
5073
255
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::min()
5074
255
                                        : std::numeric_limits<uint8_t>::min());
5075
255
        const auto Max =
5076
255
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::max()
5077
255
                                        : std::numeric_limits<uint8_t>::max());
5078
255
        return {LLContext.getInt16(static_cast<uint16_t>(Min)),
5079
255
                LLContext.getInt16(static_cast<uint16_t>(Max))};
5080
0
      }
5081
427
      case 32: {
5082
427
        const auto Min =
5083
427
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::min()
5084
427
                                        : std::numeric_limits<uint16_t>::min());
5085
427
        const auto Max =
5086
427
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::max()
5087
427
                                        : std::numeric_limits<uint16_t>::max());
5088
427
        return {LLContext.getInt32(static_cast<uint32_t>(Min)),
5089
427
                LLContext.getInt32(static_cast<uint32_t>(Max))};
5090
0
      }
5091
0
      default:
5092
0
        assumingUnreachable();
5093
682
      }
5094
682
    }();
5095
682
    const auto Count = FromTy.getVectorSize();
5096
682
    auto VMin = Builder.createVectorSplat(Count, MinInt);
5097
682
    auto VMax = Builder.createVectorSplat(Count, MaxInt);
5098
5099
682
    auto TruncTy = FromTy.getTruncatedElementVectorType();
5100
5101
682
    auto F2 = Builder.createBitCast(stackPop(), FromTy);
5102
682
    F2 = Builder.createSelect(Builder.createICmpSLT(F2, VMin), VMin, F2);
5103
682
    F2 = Builder.createSelect(Builder.createICmpSGT(F2, VMax), VMax, F2);
5104
682
    F2 = Builder.createTrunc(F2, TruncTy);
5105
5106
682
    auto F1 = Builder.createBitCast(stackPop(), FromTy);
5107
682
    F1 = Builder.createSelect(Builder.createICmpSLT(F1, VMin), VMin, F1);
5108
682
    F1 = Builder.createSelect(Builder.createICmpSGT(F1, VMax), VMax, F1);
5109
682
    F1 = Builder.createTrunc(F1, TruncTy);
5110
5111
682
    std::vector<uint32_t> Mask(Count * 2);
5112
682
    std::iota(Mask.begin(), Mask.end(), 0);
5113
682
    auto V = Endian::native == Endian::little
5114
682
                 ? Builder.createShuffleVector(
5115
682
                       F1, F2, LLVM::Value::getConstVector32(LLContext, Mask))
5116
682
                 : Builder.createShuffleVector(
5117
0
                       F2, F1, LLVM::Value::getConstVector32(LLContext, Mask));
5118
682
    stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
5119
682
  }
5120
6.19k
  void compileVectorExtend(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5121
6.19k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5122
6.19k
    const auto Count = FromTy.getVectorSize();
5123
6.19k
    std::vector<uint32_t> Mask(Count / 2);
5124
    if constexpr (Endian::native == Endian::big) {
5125
      Low = !Low;
5126
    }
5127
6.19k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5128
6.19k
    auto R = Builder.createBitCast(Stack.back(), FromTy);
5129
6.19k
    if (Signed) {
5130
2.84k
      R = Builder.createSExt(R, ExtTy);
5131
3.35k
    } else {
5132
3.35k
      R = Builder.createZExt(R, ExtTy);
5133
3.35k
    }
5134
6.19k
    R = Builder.createShuffleVector(
5135
6.19k
        R, LLVM::Value::getUndef(ExtTy),
5136
6.19k
        LLVM::Value::getConstVector32(LLContext, Mask));
5137
6.19k
    Stack.back() = Builder.createBitCast(R, Context.Int64x2Ty);
5138
6.19k
  }
5139
1.95k
  void compileVectorExtMul(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5140
1.95k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5141
1.95k
    const auto Count = FromTy.getVectorSize();
5142
1.95k
    std::vector<uint32_t> Mask(Count / 2);
5143
1.95k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5144
3.90k
    auto Extend = [this, FromTy, Signed, ExtTy, &Mask](LLVM::Value R) noexcept {
5145
3.90k
      R = Builder.createBitCast(R, FromTy);
5146
3.90k
      if (Signed) {
5147
1.65k
        R = Builder.createSExt(R, ExtTy);
5148
2.25k
      } else {
5149
2.25k
        R = Builder.createZExt(R, ExtTy);
5150
2.25k
      }
5151
3.90k
      return Builder.createShuffleVector(
5152
3.90k
          R, LLVM::Value::getUndef(ExtTy),
5153
3.90k
          LLVM::Value::getConstVector32(LLContext, Mask));
5154
3.90k
    };
5155
1.95k
    auto RHS = Extend(stackPop());
5156
1.95k
    auto LHS = Extend(stackPop());
5157
1.95k
    stackPush(
5158
1.95k
        Builder.createBitCast(Builder.createMul(RHS, LHS), Context.Int64x2Ty));
5159
1.95k
  }
5160
2.72k
  void compileVectorExtAddPairwise(LLVM::Type VectorTy, bool Signed) noexcept {
5161
2.72k
    compileVectorOp(
5162
2.72k
        VectorTy, [this, VectorTy, Signed](auto V) noexcept -> LLVM::Value {
5163
2.72k
          auto ExtTy = VectorTy.getExtendedElementVectorType()
5164
2.72k
                           .getHalfElementsVectorType();
5165
2.72k
#if defined(__x86_64__)
5166
2.72k
          const auto Count = VectorTy.getVectorSize();
5167
2.72k
          if (Context.SupportXOP) {
5168
0
            const auto ID = [Count, Signed]() noexcept {
5169
0
              switch (Count) {
5170
0
              case 8:
5171
0
                return Signed ? LLVM::Core::X86XOpVPHAddWD
5172
0
                              : LLVM::Core::X86XOpVPHAddUWD;
5173
0
              case 16:
5174
0
                return Signed ? LLVM::Core::X86XOpVPHAddBW
5175
0
                              : LLVM::Core::X86XOpVPHAddUBW;
5176
0
              default:
5177
0
                assumingUnreachable();
5178
0
              }
5179
0
            }();
5180
0
            assuming(ID != LLVM::Core::NotIntrinsic);
5181
0
            return Builder.createUnaryIntrinsic(ID, V);
5182
0
          }
5183
2.72k
          if (Context.SupportSSSE3 && Count == 16) {
5184
697
            assuming(LLVM::Core::X86SSSE3PMAddUbSw128 !=
5185
697
                     LLVM::Core::NotIntrinsic);
5186
697
            if (Signed) {
5187
368
              return Builder.createIntrinsic(
5188
368
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5189
368
                  {Builder.createVectorSplat(16, LLContext.getInt8(1)), V});
5190
368
            } else {
5191
329
              return Builder.createIntrinsic(
5192
329
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5193
329
                  {V, Builder.createVectorSplat(16, LLContext.getInt8(1))});
5194
329
            }
5195
697
          }
5196
2.03k
          if (Context.SupportSSE2 && Count == 8) {
5197
2.03k
            assuming(LLVM::Core::X86SSE2PMAddWd != LLVM::Core::NotIntrinsic);
5198
2.03k
            if (Signed) {
5199
1.34k
              return Builder.createIntrinsic(
5200
1.34k
                  LLVM::Core::X86SSE2PMAddWd, {},
5201
1.34k
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5202
1.34k
            } else {
5203
688
              V = Builder.createXor(
5204
688
                  V, Builder.createVectorSplat(8, LLContext.getInt16(0x8000)));
5205
688
              V = Builder.createIntrinsic(
5206
688
                  LLVM::Core::X86SSE2PMAddWd, {},
5207
688
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5208
688
              return Builder.createAdd(
5209
688
                  V, Builder.createVectorSplat(4, LLContext.getInt32(0x10000)));
5210
688
            }
5211
2.03k
          }
5212
0
#endif
5213
5214
#if defined(__aarch64__)
5215
          if (Context.SupportNEON) {
5216
            const auto ID = Signed ? LLVM::Core::AArch64NeonSAddLP
5217
                                   : LLVM::Core::AArch64NeonUAddLP;
5218
            assuming(ID != LLVM::Core::NotIntrinsic);
5219
            return Builder.createIntrinsic(ID, {ExtTy, VectorTy}, {V});
5220
          }
5221
#endif
5222
5223
          // Fallback case.
5224
          // If the XOP, SSSE3, or SSE2 is not supported on the x86_64 platform
5225
          // or the NEON is not supported on the aarch64 platform,
5226
          // then fallback to this.
5227
0
          auto Width = LLVM::Value::getConstInt(
5228
0
              ExtTy.getElementType(),
5229
0
              VectorTy.getElementType().getIntegerBitWidth());
5230
0
          Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5231
0
          auto EV = Builder.createBitCast(V, ExtTy);
5232
0
          LLVM::Value L, R;
5233
0
          if (Signed) {
5234
0
            L = Builder.createAShr(EV, Width);
5235
0
            R = Builder.createAShr(Builder.createShl(EV, Width), Width);
5236
0
          } else {
5237
0
            L = Builder.createLShr(EV, Width);
5238
0
            R = Builder.createLShr(Builder.createShl(EV, Width), Width);
5239
0
          }
5240
0
          return Builder.createAdd(L, R);
5241
2.03k
        });
5242
2.72k
  }
5243
543
  void compileVectorFAbs(LLVM::Type VectorTy) noexcept {
5244
543
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5245
543
      assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
5246
543
      return Builder.createUnaryIntrinsic(LLVM::Core::Fabs, V);
5247
543
    });
5248
543
  }
5249
890
  void compileVectorFNeg(LLVM::Type VectorTy) noexcept {
5250
890
    compileVectorOp(VectorTy,
5251
890
                    [this](auto V) noexcept { return Builder.createFNeg(V); });
5252
890
  }
5253
332
  void compileVectorFSqrt(LLVM::Type VectorTy) noexcept {
5254
332
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5255
332
      assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
5256
332
      return Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, V);
5257
332
    });
5258
332
  }
5259
1.60k
  void compileVectorFCeil(LLVM::Type VectorTy) noexcept {
5260
1.60k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5261
1.60k
      assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
5262
1.60k
      return Builder.createUnaryIntrinsic(LLVM::Core::Ceil, V);
5263
1.60k
    });
5264
1.60k
  }
5265
2.83k
  void compileVectorFFloor(LLVM::Type VectorTy) noexcept {
5266
2.83k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5267
2.83k
      assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
5268
2.83k
      return Builder.createUnaryIntrinsic(LLVM::Core::Floor, V);
5269
2.83k
    });
5270
2.83k
  }
5271
2.00k
  void compileVectorFTrunc(LLVM::Type VectorTy) noexcept {
5272
2.00k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5273
2.00k
      assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
5274
2.00k
      return Builder.createUnaryIntrinsic(LLVM::Core::Trunc, V);
5275
2.00k
    });
5276
2.00k
  }
5277
417
  void compileVectorFNearest(LLVM::Type VectorTy) noexcept {
5278
417
    compileVectorOp(VectorTy, [&](auto V) noexcept {
5279
417
#if LLVM_VERSION_MAJOR >= 12 && !defined(__s390x__)
5280
417
      assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
5281
417
      if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
5282
417
        return Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, V);
5283
417
      }
5284
0
#endif
5285
5286
0
#if defined(__x86_64__)
5287
0
      if (Context.SupportSSE4_1) {
5288
0
        const bool IsFloat = VectorTy.getElementType().isFloatTy();
5289
0
        auto ID =
5290
0
            IsFloat ? LLVM::Core::X86SSE41RoundPs : LLVM::Core::X86SSE41RoundPd;
5291
0
        assuming(ID != LLVM::Core::NotIntrinsic);
5292
0
        return Builder.createIntrinsic(ID, {}, {V, LLContext.getInt32(8)});
5293
0
      }
5294
0
#endif
5295
5296
#if defined(__aarch64__)
5297
      if (Context.SupportNEON &&
5298
          LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
5299
        return Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, V);
5300
      }
5301
#endif
5302
5303
      // Fallback case.
5304
      // If the SSE4.1 is not supported on the x86_64 platform or
5305
      // the NEON is not supported on the aarch64 platform,
5306
      // then fallback to this.
5307
0
      assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
5308
0
      return Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, V);
5309
0
    });
5310
417
  }
5311
180
  void compileVectorVectorFAdd(LLVM::Type VectorTy) noexcept {
5312
180
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5313
180
      return Builder.createFAdd(LHS, RHS);
5314
180
    });
5315
180
  }
5316
474
  void compileVectorVectorFSub(LLVM::Type VectorTy) noexcept {
5317
474
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5318
474
      return Builder.createFSub(LHS, RHS);
5319
474
    });
5320
474
  }
5321
250
  void compileVectorVectorFMul(LLVM::Type VectorTy) noexcept {
5322
250
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5323
250
      return Builder.createFMul(LHS, RHS);
5324
250
    });
5325
250
  }
5326
213
  void compileVectorVectorFDiv(LLVM::Type VectorTy) noexcept {
5327
213
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5328
213
      return Builder.createFDiv(LHS, RHS);
5329
213
    });
5330
213
  }
5331
308
  void compileVectorVectorFMin(LLVM::Type VectorTy) noexcept {
5332
308
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5333
308
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5334
308
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5335
308
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5336
308
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5337
308
      auto Ret = Builder.createBitCast(
5338
308
          Builder.createOr(Builder.createBitCast(LHS, Context.Int64x2Ty),
5339
308
                           Builder.createBitCast(RHS, Context.Int64x2Ty)),
5340
308
          LHS.getType());
5341
308
      Ret = Builder.createSelect(OGT, RHS, Ret);
5342
308
      Ret = Builder.createSelect(OLT, LHS, Ret);
5343
308
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5344
308
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5345
308
      return Ret;
5346
308
    });
5347
308
  }
5348
229
  void compileVectorVectorFMax(LLVM::Type VectorTy) noexcept {
5349
229
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5350
229
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5351
229
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5352
229
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5353
229
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5354
229
      auto Ret = Builder.createBitCast(
5355
229
          Builder.createAnd(Builder.createBitCast(LHS, Context.Int64x2Ty),
5356
229
                            Builder.createBitCast(RHS, Context.Int64x2Ty)),
5357
229
          LHS.getType());
5358
229
      Ret = Builder.createSelect(OLT, RHS, Ret);
5359
229
      Ret = Builder.createSelect(OGT, LHS, Ret);
5360
229
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5361
229
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5362
229
      return Ret;
5363
229
    });
5364
229
  }
5365
314
  void compileVectorVectorFPMin(LLVM::Type VectorTy) noexcept {
5366
314
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5367
314
      auto Cmp = Builder.createFCmpOLT(RHS, LHS);
5368
314
      return Builder.createSelect(Cmp, RHS, LHS);
5369
314
    });
5370
314
  }
5371
334
  void compileVectorVectorFPMax(LLVM::Type VectorTy) noexcept {
5372
334
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5373
334
      auto Cmp = Builder.createFCmpOGT(RHS, LHS);
5374
334
      return Builder.createSelect(Cmp, RHS, LHS);
5375
334
    });
5376
334
  }
5377
986
  void compileVectorTruncSatS32(LLVM::Type VectorTy, bool PadZero) noexcept {
5378
986
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5379
986
      const auto Size = VectorTy.getVectorSize();
5380
986
      auto FPTy = VectorTy.getElementType();
5381
986
      auto IntMin = LLContext.getInt32(
5382
986
          static_cast<uint32_t>(std::numeric_limits<int32_t>::min()));
5383
986
      auto IntMax = LLContext.getInt32(
5384
986
          static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
5385
986
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5386
986
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5387
986
      auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5388
986
      auto FPMin = Builder.createSIToFP(IntMin, FPTy);
5389
986
      auto FPMax = Builder.createSIToFP(IntMax, FPTy);
5390
986
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5391
986
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5392
5393
986
      auto Normal = Builder.createFCmpORD(V, V);
5394
986
      auto NotUnder = Builder.createFCmpUGE(V, FPMinV);
5395
986
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5396
986
      V = Builder.createFPToSI(
5397
986
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5398
986
      V = Builder.createSelect(Normal, V, IntZeroV);
5399
986
      V = Builder.createSelect(NotUnder, V, IntMinV);
5400
986
      V = Builder.createSelect(NotOver, V, IntMaxV);
5401
986
      if (PadZero) {
5402
757
        std::vector<uint32_t> Mask(Size * 2);
5403
757
        std::iota(Mask.begin(), Mask.end(), 0);
5404
757
        if constexpr (Endian::native == Endian::little) {
5405
757
          V = Builder.createShuffleVector(
5406
757
              V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5407
        } else {
5408
          V = Builder.createShuffleVector(
5409
              IntZeroV, V, LLVM::Value::getConstVector32(LLContext, Mask));
5410
        }
5411
757
      }
5412
986
      return V;
5413
986
    });
5414
986
  }
5415
5.86k
  void compileVectorTruncSatU32(LLVM::Type VectorTy, bool PadZero) noexcept {
5416
5.86k
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5417
5.86k
      const auto Size = VectorTy.getVectorSize();
5418
5.86k
      auto FPTy = VectorTy.getElementType();
5419
5.86k
      auto IntMin = LLContext.getInt32(std::numeric_limits<uint32_t>::min());
5420
5.86k
      auto IntMax = LLContext.getInt32(std::numeric_limits<uint32_t>::max());
5421
5.86k
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5422
5.86k
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5423
5.86k
      auto FPMin = Builder.createUIToFP(IntMin, FPTy);
5424
5.86k
      auto FPMax = Builder.createUIToFP(IntMax, FPTy);
5425
5.86k
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5426
5.86k
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5427
5428
5.86k
      auto NotUnder = Builder.createFCmpOGE(V, FPMinV);
5429
5.86k
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5430
5.86k
      V = Builder.createFPToUI(
5431
5.86k
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5432
5.86k
      V = Builder.createSelect(NotUnder, V, IntMinV);
5433
5.86k
      V = Builder.createSelect(NotOver, V, IntMaxV);
5434
5.86k
      if (PadZero) {
5435
2.14k
        auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5436
2.14k
        std::vector<uint32_t> Mask(Size * 2);
5437
2.14k
        std::iota(Mask.begin(), Mask.end(), 0);
5438
2.14k
        if constexpr (Endian::native == Endian::little) {
5439
2.14k
          V = Builder.createShuffleVector(
5440
2.14k
              V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5441
        } else {
5442
          V = Builder.createShuffleVector(
5443
              IntZeroV, V, LLVM::Value::getConstVector32(LLContext, Mask));
5444
        }
5445
2.14k
      }
5446
5.86k
      return V;
5447
5.86k
    });
5448
5.86k
  }
5449
  void compileVectorConvertS(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5450
686
                             bool Low) noexcept {
5451
686
    compileVectorOp(VectorTy,
5452
686
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5453
686
                      if (Low) {
5454
351
                        const auto Size = VectorTy.getVectorSize() / 2;
5455
351
                        std::vector<uint32_t> Mask(Size);
5456
351
                        if constexpr (Endian::native == Endian::little) {
5457
351
                          std::iota(Mask.begin(), Mask.end(), 0);
5458
                        } else {
5459
                          std::iota(Mask.begin(), Mask.end(), Size);
5460
                        }
5461
351
                        V = Builder.createShuffleVector(
5462
351
                            V, LLVM::Value::getUndef(VectorTy),
5463
351
                            LLVM::Value::getConstVector32(LLContext, Mask));
5464
351
                      }
5465
686
                      return Builder.createSIToFP(V, FPVectorTy);
5466
686
                    });
5467
686
  }
5468
  void compileVectorConvertU(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5469
2.01k
                             bool Low) noexcept {
5470
2.01k
    compileVectorOp(VectorTy,
5471
2.01k
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5472
2.01k
                      if (Low) {
5473
1.28k
                        const auto Size = VectorTy.getVectorSize() / 2;
5474
1.28k
                        std::vector<uint32_t> Mask(Size);
5475
1.28k
                        if constexpr (Endian::native == Endian::little) {
5476
1.28k
                          std::iota(Mask.begin(), Mask.end(), 0);
5477
                        } else {
5478
                          std::iota(Mask.begin(), Mask.end(), Size);
5479
                        }
5480
1.28k
                        V = Builder.createShuffleVector(
5481
1.28k
                            V, LLVM::Value::getUndef(VectorTy),
5482
1.28k
                            LLVM::Value::getConstVector32(LLContext, Mask));
5483
1.28k
                      }
5484
2.01k
                      return Builder.createUIToFP(V, FPVectorTy);
5485
2.01k
                    });
5486
2.01k
  }
5487
729
  void compileVectorDemote() noexcept {
5488
729
    compileVectorOp(Context.Doublex2Ty, [this](auto V) noexcept {
5489
729
      auto Demoted = Builder.createFPTrunc(
5490
729
          V, LLVM::Type::getVectorType(Context.FloatTy, 2));
5491
729
      auto ZeroV = LLVM::Value::getConstNull(Demoted.getType());
5492
729
      if constexpr (Endian::native == Endian::little) {
5493
729
        return Builder.createShuffleVector(
5494
729
            Demoted, ZeroV,
5495
729
            LLVM::Value::getConstVector32(LLContext, {0u, 1u, 2u, 3u}));
5496
      } else {
5497
        return Builder.createShuffleVector(
5498
            Demoted, ZeroV,
5499
            LLVM::Value::getConstVector32(LLContext, {3u, 2u, 1u, 0u}));
5500
      }
5501
729
    });
5502
729
  }
5503
758
  void compileVectorPromote() noexcept {
5504
758
    compileVectorOp(Context.Floatx4Ty, [this](auto V) noexcept {
5505
758
      auto UndefV = LLVM::Value::getUndef(V.getType());
5506
758
      auto Low = Builder.createShuffleVector(
5507
758
          V, UndefV, LLVM::Value::getConstVector32(LLContext, {0u, 1u}));
5508
758
      return Builder.createFPExt(
5509
758
          Low, LLVM::Type::getVectorType(Context.DoubleTy, 2));
5510
758
    });
5511
758
  }
5512
5513
76
  void compileVectorVectorMAdd(LLVM::Type VectorTy) noexcept {
5514
76
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5515
76
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5516
76
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5517
76
    stackPush(Builder.createBitCast(
5518
76
        Builder.createFAdd(Builder.createFMul(LHS, RHS), C),
5519
76
        Context.Int64x2Ty));
5520
76
  }
5521
5522
50
  void compileVectorVectorNMAdd(LLVM::Type VectorTy) noexcept {
5523
50
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5524
50
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5525
50
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5526
50
    stackPush(Builder.createBitCast(
5527
50
        Builder.createFAdd(Builder.createFMul(Builder.createFNeg(LHS), RHS), C),
5528
50
        Context.Int64x2Ty));
5529
50
  }
5530
5531
11
  void compileVectorRelaxedIntegerDotProduct() noexcept {
5532
11
    auto OriTy = Context.Int8x16Ty;
5533
11
    auto ExtTy = Context.Int16x8Ty;
5534
11
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5535
11
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5536
11
#if defined(__x86_64__)
5537
11
    if (Context.SupportSSSE3) {
5538
11
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5539
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5540
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5541
      // side to match the WebAssembly spec
5542
11
      return stackPush(Builder.createBitCast(
5543
11
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5544
11
                                  {RHS, LHS}),
5545
11
          Context.Int64x2Ty));
5546
11
    }
5547
0
#endif
5548
0
    auto Width = LLVM::Value::getConstInt(
5549
0
        ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5550
0
    Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5551
0
    auto EA = Builder.createBitCast(LHS, ExtTy);
5552
0
    auto EB = Builder.createBitCast(RHS, ExtTy);
5553
5554
0
    LLVM::Value AL, AR, BL, BR;
5555
0
    AL = Builder.createAShr(EA, Width);
5556
0
    AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5557
0
    BL = Builder.createAShr(EB, Width);
5558
0
    BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5559
5560
0
    return stackPush(Builder.createBitCast(
5561
0
        Builder.createAdd(Builder.createMul(AL, BL), Builder.createMul(AR, BR)),
5562
0
        Context.Int64x2Ty));
5563
11
  }
5564
5565
12
  void compileVectorRelaxedIntegerDotProductAdd() noexcept {
5566
12
    auto OriTy = Context.Int8x16Ty;
5567
12
    auto ExtTy = Context.Int16x8Ty;
5568
12
    auto FinTy = Context.Int32x4Ty;
5569
12
    auto VC = Builder.createBitCast(stackPop(), FinTy);
5570
12
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5571
12
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5572
12
    LLVM::Value IM;
5573
12
#if defined(__x86_64__)
5574
12
    if (Context.SupportSSSE3) {
5575
12
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5576
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5577
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5578
      // side to match the WebAssembly spec
5579
12
      IM = Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5580
12
                                   {RHS, LHS});
5581
12
    } else
5582
0
#endif
5583
0
    {
5584
0
      auto Width = LLVM::Value::getConstInt(
5585
0
          ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5586
0
      Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5587
0
      auto EA = Builder.createBitCast(LHS, ExtTy);
5588
0
      auto EB = Builder.createBitCast(RHS, ExtTy);
5589
5590
0
      LLVM::Value AL, AR, BL, BR;
5591
0
      AL = Builder.createAShr(EA, Width);
5592
0
      AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5593
0
      BL = Builder.createAShr(EB, Width);
5594
0
      BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5595
0
      IM = Builder.createAdd(Builder.createMul(AL, BL),
5596
0
                             Builder.createMul(AR, BR));
5597
0
    }
5598
5599
12
    auto Width = LLVM::Value::getConstInt(
5600
12
        FinTy.getElementType(), ExtTy.getElementType().getIntegerBitWidth());
5601
12
    Width = Builder.createVectorSplat(FinTy.getVectorSize(), Width);
5602
12
    auto IME = Builder.createBitCast(IM, FinTy);
5603
12
    auto L = Builder.createAShr(IME, Width);
5604
12
    auto R = Builder.createAShr(Builder.createShl(IME, Width), Width);
5605
5606
12
    return stackPush(Builder.createBitCast(
5607
12
        Builder.createAdd(Builder.createAdd(L, R), VC), Context.Int64x2Ty));
5608
12
  }
5609
5610
  void
5611
  enterBlock(LLVM::BasicBlock JumpBlock, LLVM::BasicBlock NextBlock,
5612
             LLVM::BasicBlock ElseBlock, std::vector<LLVM::Value> Args,
5613
             std::pair<std::vector<ValType>, std::vector<ValType>> Type,
5614
             std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5615
22.8k
                 ReturnPHI = {}) noexcept {
5616
22.8k
    assuming(Type.first.size() == Args.size());
5617
22.8k
    for (auto &Value : Args) {
5618
4.44k
      stackPush(Value);
5619
4.44k
    }
5620
22.8k
    const auto Unreachable = isUnreachable();
5621
22.8k
    ControlStack.emplace_back(Stack.size() - Args.size(), Unreachable,
5622
22.8k
                              JumpBlock, NextBlock, ElseBlock, std::move(Args),
5623
22.8k
                              std::move(Type), std::move(ReturnPHI));
5624
22.8k
  }
5625
5626
22.7k
  Control leaveBlock() noexcept {
5627
22.7k
    Control Entry = std::move(ControlStack.back());
5628
22.7k
    ControlStack.pop_back();
5629
5630
22.7k
    auto NextBlock = Entry.NextBlock ? Entry.NextBlock : Entry.JumpBlock;
5631
22.7k
    if (!Entry.Unreachable) {
5632
13.7k
      const auto &ReturnType = Entry.Type.second;
5633
13.7k
      if (!ReturnType.empty()) {
5634
10.1k
        std::vector<LLVM::Value> Rets(ReturnType.size());
5635
20.8k
        for (size_t I = 0; I < Rets.size(); ++I) {
5636
10.6k
          const size_t J = Rets.size() - 1 - I;
5637
10.6k
          Rets[J] = stackPop();
5638
10.6k
        }
5639
10.1k
        Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5640
10.1k
      }
5641
13.7k
      Builder.createBr(NextBlock);
5642
13.7k
    } else {
5643
9.06k
      Builder.createUnreachable();
5644
9.06k
    }
5645
22.7k
    Builder.positionAtEnd(NextBlock);
5646
22.7k
    Stack.erase(Stack.begin() + static_cast<int64_t>(Entry.StackSize),
5647
22.7k
                Stack.end());
5648
22.7k
    return Entry;
5649
22.7k
  }
5650
5651
5.83k
  void checkStop() noexcept {
5652
5.83k
    if (!Interruptible) {
5653
5.83k
      return;
5654
5.83k
    }
5655
0
    auto NotStopBB = LLVM::BasicBlock::create(LLContext, F.Fn, "NotStop");
5656
0
    auto StopToken = Builder.createAtomicRMW(
5657
0
        LLVMAtomicRMWBinOpXchg, Context.getStopToken(Builder, ExecCtx),
5658
0
        LLContext.getInt32(0), LLVMAtomicOrderingMonotonic);
5659
#if LLVM_VERSION_MAJOR >= 13
5660
    StopToken.setAlignment(32);
5661
#endif
5662
0
    auto NotStop = Builder.createLikely(
5663
0
        Builder.createICmpEQ(StopToken, LLContext.getInt32(0)));
5664
0
    Builder.createCondBr(NotStop, NotStopBB,
5665
0
                         getTrapBB(ErrCode::Value::Interrupted));
5666
5667
0
    Builder.positionAtEnd(NotStopBB);
5668
0
  }
5669
5670
6.24k
  void setUnreachable() noexcept {
5671
6.24k
    if (ControlStack.empty()) {
5672
0
      IsUnreachable = true;
5673
6.24k
    } else {
5674
6.24k
      ControlStack.back().Unreachable = true;
5675
6.24k
    }
5676
6.24k
  }
5677
5678
1.60M
  bool isUnreachable() const noexcept {
5679
1.60M
    if (ControlStack.empty()) {
5680
11.0k
      return IsUnreachable;
5681
1.59M
    } else {
5682
1.59M
      return ControlStack.back().Unreachable;
5683
1.59M
    }
5684
1.60M
  }
5685
5686
  void
5687
  buildPHI(Span<const ValType> RetType,
5688
           Span<const std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5689
19.7k
               Incomings) noexcept {
5690
19.7k
    if (isVoidReturn(RetType)) {
5691
6.71k
      return;
5692
6.71k
    }
5693
13.0k
    std::vector<LLVM::Value> Nodes;
5694
13.0k
    if (Incomings.size() == 0) {
5695
2.80k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5696
2.80k
      Nodes.reserve(Types.size());
5697
3.19k
      for (LLVM::Type Type : Types) {
5698
3.19k
        Nodes.push_back(LLVM::Value::getUndef(Type));
5699
3.19k
      }
5700
10.2k
    } else if (Incomings.size() == 1) {
5701
9.07k
      Nodes = std::move(std::get<0>(Incomings.front()));
5702
9.07k
    } else {
5703
1.19k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5704
1.19k
      Nodes.reserve(Types.size());
5705
2.51k
      for (size_t I = 0; I < Types.size(); ++I) {
5706
1.31k
        auto PHIRet = Builder.createPHI(Types[I]);
5707
3.36k
        for (auto &[Value, BB] : Incomings) {
5708
3.36k
          assuming(Value.size() == Types.size());
5709
3.36k
          PHIRet.addIncoming(Value[I], BB);
5710
3.36k
        }
5711
1.31k
        Nodes.push_back(PHIRet);
5712
1.31k
      }
5713
1.19k
    }
5714
13.9k
    for (auto &Val : Nodes) {
5715
13.9k
      stackPush(Val);
5716
13.9k
    }
5717
13.0k
  }
5718
5719
37.6k
  void setLableJumpPHI(unsigned int Index) noexcept {
5720
37.6k
    assuming(Index < ControlStack.size());
5721
37.6k
    auto &Entry = *(ControlStack.rbegin() + Index);
5722
37.6k
    if (Entry.NextBlock) { // is loop
5723
2.11k
      std::vector<LLVM::Value> Args(Entry.Type.first.size());
5724
3.88k
      for (size_t I = 0; I < Args.size(); ++I) {
5725
1.77k
        const size_t J = Args.size() - 1 - I;
5726
1.77k
        Args[J] = stackPop();
5727
1.77k
      }
5728
3.88k
      for (size_t I = 0; I < Args.size(); ++I) {
5729
1.77k
        Entry.Args[I].addIncoming(Args[I], Builder.getInsertBlock());
5730
1.77k
        stackPush(Args[I]);
5731
1.77k
      }
5732
35.5k
    } else if (!Entry.Type.second.empty()) { // has return value
5733
2.01k
      std::vector<LLVM::Value> Rets(Entry.Type.second.size());
5734
4.16k
      for (size_t I = 0; I < Rets.size(); ++I) {
5735
2.15k
        const size_t J = Rets.size() - 1 - I;
5736
2.15k
        Rets[J] = stackPop();
5737
2.15k
      }
5738
4.16k
      for (size_t I = 0; I < Rets.size(); ++I) {
5739
2.15k
        stackPush(Rets[I]);
5740
2.15k
      }
5741
2.01k
      Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5742
2.01k
    }
5743
37.6k
  }
5744
5745
37.6k
  LLVM::BasicBlock getLabel(unsigned int Index) const noexcept {
5746
37.6k
    return (ControlStack.rbegin() + Index)->JumpBlock;
5747
37.6k
  }
5748
5749
957k
  void stackPush(LLVM::Value Value) noexcept { Stack.push_back(Value); }
5750
366k
  LLVM::Value stackPop() noexcept {
5751
366k
    assuming(!ControlStack.empty() || !Stack.empty());
5752
366k
    assuming(ControlStack.empty() ||
5753
366k
             Stack.size() > ControlStack.back().StackSize);
5754
366k
    auto Value = Stack.back();
5755
366k
    Stack.pop_back();
5756
366k
    return Value;
5757
366k
  }
5758
5759
23.2k
  LLVM::Value switchEndian(LLVM::Value Value) {
5760
    if constexpr (Endian::native == Endian::big) {
5761
      auto Type = Value.getType();
5762
      if ((Type.isIntegerTy() && Type.getIntegerBitWidth() > 8) ||
5763
          (Type.isVectorTy() && Type.getVectorSize() == 1)) {
5764
        return Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5765
      }
5766
      if (Type.isVectorTy()) {
5767
        LLVM::Type VecType = Type.getElementType().getIntegerBitWidth() == 128
5768
                                 ? Context.Int128Ty
5769
                                 : Context.Int64Ty;
5770
        Value = Builder.createBitCast(Value, VecType);
5771
        Value = Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5772
        return Builder.createBitCast(Value, Type);
5773
      }
5774
      if (Type.isFloatTy() || Type.isDoubleTy()) {
5775
        LLVM::Type IntType =
5776
            Type.isFloatTy() ? Context.Int32Ty : Context.Int64Ty;
5777
        Value = Builder.createBitCast(Value, IntType);
5778
        Value = Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5779
        return Builder.createBitCast(Value, Type);
5780
      }
5781
    }
5782
23.2k
    return Value;
5783
23.2k
  }
5784
5785
  LLVM::Compiler::CompileContext &Context;
5786
  LLVM::Context LLContext;
5787
  std::vector<std::pair<LLVM::Type, LLVM::Value>> Local;
5788
  std::vector<LLVM::Value> Stack;
5789
  LLVM::Value LocalInstrCount = nullptr;
5790
  LLVM::Value LocalGas = nullptr;
5791
  std::unordered_map<ErrCode::Value, LLVM::BasicBlock> TrapBB;
5792
  bool IsUnreachable = false;
5793
  bool Interruptible = false;
5794
  struct Control {
5795
    size_t StackSize;
5796
    bool Unreachable;
5797
    LLVM::BasicBlock JumpBlock;
5798
    LLVM::BasicBlock NextBlock;
5799
    LLVM::BasicBlock ElseBlock;
5800
    std::vector<LLVM::Value> Args;
5801
    std::pair<std::vector<ValType>, std::vector<ValType>> Type;
5802
    std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5803
        ReturnPHI;
5804
    Control(size_t S, bool U, LLVM::BasicBlock J, LLVM::BasicBlock N,
5805
            LLVM::BasicBlock E, std::vector<LLVM::Value> A,
5806
            std::pair<std::vector<ValType>, std::vector<ValType>> T,
5807
            std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5808
                R) noexcept
5809
22.8k
        : StackSize(S), Unreachable(U), JumpBlock(J), NextBlock(N),
5810
22.8k
          ElseBlock(E), Args(std::move(A)), Type(std::move(T)),
5811
22.8k
          ReturnPHI(std::move(R)) {}
5812
    Control(const Control &) = default;
5813
28.0k
    Control(Control &&) = default;
5814
    Control &operator=(const Control &) = default;
5815
1.16k
    Control &operator=(Control &&) = default;
5816
  };
5817
  std::vector<Control> ControlStack;
5818
  LLVM::FunctionCallee F;
5819
  LLVM::Value ExecCtx;
5820
  LLVM::Builder Builder;
5821
};
5822
5823
std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
5824
442
                                      LLVM::Value Struct) noexcept {
5825
442
  const auto N = Struct.getType().getStructNumElements();
5826
442
  std::vector<LLVM::Value> Ret;
5827
442
  Ret.reserve(N);
5828
1.59k
  for (unsigned I = 0; I < N; ++I) {
5829
1.15k
    Ret.push_back(Builder.createExtractValue(Struct, I));
5830
1.15k
  }
5831
442
  return Ret;
5832
442
}
5833
5834
} // namespace
5835
5836
namespace WasmEdge {
5837
namespace LLVM {
5838
5839
2.33k
Expect<void> Compiler::checkConfigure() noexcept {
5840
  // Note: Although the exception handling proposal and memory64 proposal is not
5841
  // implemented in AOT yet, we should not trap here because the default
5842
  // configuration becomes WASM 3.0 which contains these proposals.
5843
2.33k
  if (Conf.hasProposal(Proposal::ExceptionHandling)) {
5844
2.33k
    spdlog::warn("Proposal Exception Handling is not yet supported in WasmEdge "
5845
2.33k
                 "AOT/JIT. The compilation will be trapped when related data "
5846
2.33k
                 "structure or instructions found in WASM.");
5847
2.33k
  }
5848
2.33k
  if (Conf.hasProposal(Proposal::Memory64)) {
5849
0
    spdlog::warn("Proposal Memory64 is not yet supported in WasmEdge AOT/JIT. "
5850
0
                 "The compilation will be trapped when related data "
5851
0
                 "structure or instructions found in WASM.");
5852
0
  }
5853
2.33k
  if (Conf.hasProposal(Proposal::Annotations)) {
5854
0
    spdlog::error(ErrCode::Value::InvalidAOTConfigure);
5855
0
    spdlog::error("    Proposal Custom Annotation Syntax is not yet supported "
5856
0
                  "in WasmEdge AOT/JIT.");
5857
0
    return Unexpect(ErrCode::Value::InvalidAOTConfigure);
5858
0
  }
5859
2.33k
  return {};
5860
2.33k
}
5861
5862
2.33k
Expect<Data> Compiler::compile(const AST::Module &Module) noexcept {
5863
  // Check the module is validated.
5864
2.33k
  if (unlikely(!Module.getIsValidated())) {
5865
0
    spdlog::error(ErrCode::Value::NotValidated);
5866
0
    return Unexpect(ErrCode::Value::NotValidated);
5867
0
  }
5868
5869
2.33k
  std::unique_lock Lock(Mutex);
5870
2.33k
  spdlog::info("compile start"sv);
5871
5872
2.33k
  LLVM::Core::init();
5873
5874
2.33k
  LLVM::Data D;
5875
2.33k
  auto LLContext = D.extract().getLLContext();
5876
2.33k
  auto &LLModule = D.extract().LLModule;
5877
2.33k
  LLModule.setTarget(LLVM::getDefaultTargetTriple().unwrap());
5878
2.33k
  LLModule.addFlag(LLVMModuleFlagBehaviorError, "PIC Level"sv, 2);
5879
5880
2.33k
  CompileContext NewContext(LLContext, LLModule,
5881
2.33k
                            Conf.getCompilerConfigure().isGenericBinary());
5882
2.33k
  struct RAIICleanup {
5883
2.33k
    RAIICleanup(CompileContext *&Context, CompileContext &NewContext)
5884
2.33k
        : Context(Context) {
5885
2.33k
      Context = &NewContext;
5886
2.33k
    }
5887
2.33k
    ~RAIICleanup() { Context = nullptr; }
5888
2.33k
    CompileContext *&Context;
5889
2.33k
  };
5890
2.33k
  RAIICleanup Cleanup(Context, NewContext);
5891
5892
  // Compile Function Types
5893
2.33k
  compile(Module.getTypeSection());
5894
  // Compile ImportSection
5895
2.33k
  compile(Module.getImportSection());
5896
  // Compile GlobalSection
5897
2.33k
  compile(Module.getGlobalSection());
5898
  // Compile MemorySection (MemorySec, DataSec)
5899
2.33k
  compile(Module.getMemorySection(), Module.getDataSection());
5900
  // Compile TableSection (TableSec, ElemSec)
5901
2.33k
  compile(Module.getTableSection(), Module.getElementSection());
5902
  // compile Functions in module. (FunctionSec, CodeSec)
5903
2.33k
  EXPECTED_TRY(compile(Module.getFunctionSection(), Module.getCodeSection()));
5904
  // Compile ExportSection
5905
2.32k
  compile(Module.getExportSection());
5906
  // StartSection is not required to compile
5907
5908
2.32k
  spdlog::info("verify start"sv);
5909
2.32k
  LLModule.verify(LLVMPrintMessageAction);
5910
5911
2.32k
  spdlog::info("optimize start"sv);
5912
2.32k
  auto &TM = D.extract().TM;
5913
2.32k
  {
5914
2.32k
    auto Triple = LLModule.getTarget();
5915
2.32k
    auto [TheTarget, ErrorMessage] = LLVM::Target::getFromTriple(Triple);
5916
2.32k
    if (ErrorMessage) {
5917
0
      spdlog::error("getFromTriple failed:{}"sv, ErrorMessage.string_view());
5918
0
      return Unexpect(ErrCode::Value::IllegalPath);
5919
2.32k
    } else {
5920
2.32k
      std::string CPUName;
5921
#if defined(__riscv) && __riscv_xlen == 64
5922
      CPUName = "generic-rv64"s;
5923
#else
5924
2.32k
      if (!Conf.getCompilerConfigure().isGenericBinary()) {
5925
2.32k
        CPUName = LLVM::getHostCPUName().string_view();
5926
2.32k
      } else {
5927
0
        CPUName = "generic"s;
5928
0
      }
5929
2.32k
#endif
5930
5931
2.32k
      TM = LLVM::TargetMachine::create(
5932
2.32k
          TheTarget, Triple, CPUName.c_str(),
5933
2.32k
          LLVM::getHostCPUFeatures().unwrap(),
5934
2.32k
          toLLVMCodeGenLevel(
5935
2.32k
              Conf.getCompilerConfigure().getOptimizationLevel()),
5936
2.32k
          LLVMRelocPIC, LLVMCodeModelDefault);
5937
2.32k
    }
5938
5939
#if LLVM_VERSION_MAJOR >= 13
5940
    auto PBO = LLVM::PassBuilderOptions::create();
5941
    if (auto Error = PBO.runPasses(
5942
            LLModule,
5943
            toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel()),
5944
            TM)) {
5945
      spdlog::error("{}"sv, Error.message().string_view());
5946
    }
5947
#else
5948
2.32k
    auto FP = LLVM::PassManager::createForModule(LLModule);
5949
2.32k
    auto MP = LLVM::PassManager::create();
5950
5951
2.32k
    TM.addAnalysisPasses(MP);
5952
2.32k
    TM.addAnalysisPasses(FP);
5953
2.32k
    {
5954
2.32k
      auto PMB = LLVM::PassManagerBuilder::create();
5955
2.32k
      auto [OptLevel, SizeLevel] =
5956
2.32k
          toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel());
5957
2.32k
      PMB.setOptLevel(OptLevel);
5958
2.32k
      PMB.setSizeLevel(SizeLevel);
5959
2.32k
      PMB.populateFunctionPassManager(FP);
5960
2.32k
      PMB.populateModulePassManager(MP);
5961
2.32k
    }
5962
2.32k
    switch (Conf.getCompilerConfigure().getOptimizationLevel()) {
5963
0
    case CompilerConfigure::OptimizationLevel::O0:
5964
0
    case CompilerConfigure::OptimizationLevel::O1:
5965
0
      FP.addTailCallEliminationPass();
5966
0
      break;
5967
2.32k
    default:
5968
2.32k
      break;
5969
2.32k
    }
5970
5971
2.32k
    FP.initializeFunctionPassManager();
5972
25.1k
    for (auto Fn = LLModule.getFirstFunction(); Fn; Fn = Fn.getNextFunction()) {
5973
22.8k
      FP.runFunctionPassManager(Fn);
5974
22.8k
    }
5975
2.32k
    FP.finalizeFunctionPassManager();
5976
2.32k
    MP.runPassManager(LLModule);
5977
2.32k
#endif
5978
2.32k
  }
5979
5980
  // Set initializer for constant value
5981
2.32k
  if (auto IntrinsicsTable = LLModule.getNamedGlobal("intrinsics")) {
5982
1.36k
    IntrinsicsTable.setInitializer(
5983
1.36k
        LLVM::Value::getConstNull(IntrinsicsTable.getType()));
5984
1.36k
    IntrinsicsTable.setGlobalConstant(false);
5985
1.36k
  } else {
5986
963
    auto IntrinsicsTableTy = LLVM::Type::getArrayType(
5987
963
        LLContext.getInt8Ty().getPointerTo(),
5988
963
        static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax));
5989
963
    LLModule.addGlobal(
5990
963
        IntrinsicsTableTy.getPointerTo(), false, LLVMExternalLinkage,
5991
963
        LLVM::Value::getConstNull(IntrinsicsTableTy), "intrinsics");
5992
963
  }
5993
5994
2.32k
  spdlog::info("optimize done"sv);
5995
2.32k
  return Expect<Data>{std::move(D)};
5996
2.32k
}
5997
5998
2.33k
void Compiler::compile(const AST::TypeSection &TypeSec) noexcept {
5999
2.33k
  auto WrapperTy =
6000
2.33k
      LLVM::Type::getFunctionType(Context->VoidTy,
6001
2.33k
                                  {Context->ExecCtxPtrTy, Context->Int8PtrTy,
6002
2.33k
                                   Context->Int8PtrTy, Context->Int8PtrTy},
6003
2.33k
                                  false);
6004
2.33k
  auto SubTypes = TypeSec.getContent();
6005
2.33k
  const auto Size = SubTypes.size();
6006
2.33k
  if (Size == 0) {
6007
130
    return;
6008
130
  }
6009
2.20k
  Context->CompositeTypes.reserve(Size);
6010
2.20k
  Context->FunctionWrappers.reserve(Size);
6011
6012
  // Iterate and compile types.
6013
7.06k
  for (size_t I = 0; I < Size; ++I) {
6014
4.86k
    const auto &CompType = SubTypes[I].getCompositeType();
6015
4.86k
    const auto Name = fmt::format("t{}"sv, Context->CompositeTypes.size());
6016
4.86k
    if (CompType.isFunc()) {
6017
      // Check function type is unique
6018
4.74k
      {
6019
4.74k
        bool Unique = true;
6020
18.4k
        for (size_t J = 0; J < I; ++J) {
6021
13.9k
          if (Context->CompositeTypes[J] &&
6022
13.9k
              Context->CompositeTypes[J]->isFunc()) {
6023
13.6k
            const auto &OldFuncType = Context->CompositeTypes[J]->getFuncType();
6024
13.6k
            if (OldFuncType == CompType.getFuncType()) {
6025
173
              Unique = false;
6026
173
              Context->CompositeTypes.push_back(Context->CompositeTypes[J]);
6027
173
              auto F = Context->FunctionWrappers[J];
6028
173
              Context->FunctionWrappers.push_back(F);
6029
173
              auto A = Context->LLModule.addAlias(WrapperTy, F, Name.c_str());
6030
173
              A.setLinkage(LLVMExternalLinkage);
6031
173
              A.setVisibility(LLVMProtectedVisibility);
6032
173
              A.setDSOLocal(true);
6033
173
              A.setDLLStorageClass(LLVMDLLExportStorageClass);
6034
173
              break;
6035
173
            }
6036
13.6k
          }
6037
13.9k
        }
6038
4.74k
        if (!Unique) {
6039
173
          continue;
6040
173
        }
6041
4.74k
      }
6042
6043
      // Create Wrapper
6044
4.57k
      auto F = Context->LLModule.addFunction(WrapperTy, LLVMExternalLinkage,
6045
4.57k
                                             Name.c_str());
6046
4.57k
      {
6047
4.57k
        F.setVisibility(LLVMProtectedVisibility);
6048
4.57k
        F.setDSOLocal(true);
6049
4.57k
        F.setDLLStorageClass(LLVMDLLExportStorageClass);
6050
4.57k
        F.addFnAttr(Context->NoStackArgProbe);
6051
4.57k
        F.addFnAttr(Context->StrictFP);
6052
4.57k
        F.addFnAttr(Context->UWTable);
6053
4.57k
        F.addParamAttr(0, Context->ReadOnly);
6054
4.57k
        F.addParamAttr(0, Context->NoAlias);
6055
4.57k
        F.addParamAttr(1, Context->NoAlias);
6056
4.57k
        F.addParamAttr(2, Context->NoAlias);
6057
4.57k
        F.addParamAttr(3, Context->NoAlias);
6058
6059
4.57k
        LLVM::Builder Builder(Context->LLContext);
6060
4.57k
        Builder.positionAtEnd(
6061
4.57k
            LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
6062
6063
4.57k
        auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy,
6064
4.57k
                              CompType.getFuncType());
6065
4.57k
        auto RTy = FTy.getReturnType();
6066
4.57k
        std::vector<LLVM::Type> FPTy(FTy.getNumParams());
6067
4.57k
        FTy.getParamTypes(FPTy);
6068
6069
4.57k
        const size_t ArgCount = FPTy.size() - 1;
6070
4.57k
        auto ExecCtxPtr = F.getFirstParam();
6071
4.57k
        auto RawFunc = LLVM::FunctionCallee{
6072
4.57k
            FTy, Builder.createBitCast(ExecCtxPtr.getNextParam(),
6073
4.57k
                                       FTy.getPointerTo())};
6074
4.57k
        auto RawArgs = ExecCtxPtr.getNextParam().getNextParam();
6075
4.57k
        auto RawRets = RawArgs.getNextParam();
6076
6077
4.57k
        std::vector<LLVM::Value> Args;
6078
4.57k
        Args.reserve(FTy.getNumParams());
6079
4.57k
        Args.push_back(ExecCtxPtr);
6080
9.46k
        for (size_t J = 0; J < ArgCount; ++J) {
6081
4.88k
          Args.push_back(Builder.createValuePtrLoad(
6082
4.88k
              FPTy[J + 1], RawArgs, Context->Int8Ty, J * kValSize));
6083
4.88k
        }
6084
6085
4.57k
        auto Ret = Builder.createCall(RawFunc, Args);
6086
4.57k
        if (RTy.isVoidTy()) {
6087
          // nothing to do
6088
3.01k
        } else if (RTy.isStructTy()) {
6089
356
          auto Rets = unpackStruct(Builder, Ret);
6090
356
          Builder.createArrayPtrStore(Rets, RawRets, Context->Int8Ty, kValSize);
6091
2.65k
        } else {
6092
2.65k
          Builder.createValuePtrStore(Ret, RawRets, Context->Int8Ty);
6093
2.65k
        }
6094
4.57k
        Builder.createRetVoid();
6095
4.57k
      }
6096
      // Copy wrapper, param and return lists to module instance.
6097
4.57k
      Context->FunctionWrappers.push_back(F);
6098
4.57k
    } else {
6099
      // Non function type case. Create empty wrapper.
6100
112
      auto F = Context->LLModule.addFunction(WrapperTy, LLVMExternalLinkage,
6101
112
                                             Name.c_str());
6102
112
      {
6103
112
        F.setVisibility(LLVMProtectedVisibility);
6104
112
        F.setDSOLocal(true);
6105
112
        F.setDLLStorageClass(LLVMDLLExportStorageClass);
6106
112
        F.addFnAttr(Context->NoStackArgProbe);
6107
112
        F.addFnAttr(Context->StrictFP);
6108
112
        F.addFnAttr(Context->UWTable);
6109
112
        F.addParamAttr(0, Context->ReadOnly);
6110
112
        F.addParamAttr(0, Context->NoAlias);
6111
112
        F.addParamAttr(1, Context->NoAlias);
6112
112
        F.addParamAttr(2, Context->NoAlias);
6113
112
        F.addParamAttr(3, Context->NoAlias);
6114
6115
112
        LLVM::Builder Builder(Context->LLContext);
6116
112
        Builder.positionAtEnd(
6117
112
            LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
6118
112
        Builder.createRetVoid();
6119
112
      }
6120
112
      Context->FunctionWrappers.push_back(F);
6121
112
    }
6122
4.68k
    Context->CompositeTypes.push_back(&CompType);
6123
4.68k
  }
6124
2.20k
}
6125
6126
2.33k
void Compiler::compile(const AST::ImportSection &ImportSec) noexcept {
6127
  // Iterate and compile import descriptions.
6128
2.33k
  for (const auto &ImpDesc : ImportSec.getContent()) {
6129
    // Get data from import description.
6130
426
    const auto &ExtType = ImpDesc.getExternalType();
6131
6132
    // Add the imports into module instance.
6133
426
    switch (ExtType) {
6134
297
    case ExternalType::Function: // Function type index
6135
297
    {
6136
297
      const auto FuncID = static_cast<uint32_t>(Context->Functions.size());
6137
      // Get the function type index in module.
6138
297
      uint32_t TypeIdx = ImpDesc.getExternalFuncTypeIdx();
6139
297
      assuming(TypeIdx < Context->CompositeTypes.size());
6140
297
      assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6141
297
      const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6142
297
      auto FTy =
6143
297
          toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6144
297
      auto RTy = FTy.getReturnType();
6145
297
      auto F = LLVM::FunctionCallee{
6146
297
          FTy,
6147
297
          Context->LLModule.addFunction(FTy, LLVMInternalLinkage,
6148
297
                                        fmt::format("f{}"sv, FuncID).c_str())};
6149
297
      F.Fn.setDSOLocal(true);
6150
297
      F.Fn.addFnAttr(Context->NoStackArgProbe);
6151
297
      F.Fn.addFnAttr(Context->StrictFP);
6152
297
      F.Fn.addFnAttr(Context->UWTable);
6153
297
      F.Fn.addParamAttr(0, Context->ReadOnly);
6154
297
      F.Fn.addParamAttr(0, Context->NoAlias);
6155
6156
297
      LLVM::Builder Builder(Context->LLContext);
6157
297
      Builder.positionAtEnd(
6158
297
          LLVM::BasicBlock::create(Context->LLContext, F.Fn, "entry"));
6159
6160
297
      const auto ArgSize = FuncType.getParamTypes().size();
6161
297
      const auto RetSize =
6162
297
          RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
6163
6164
297
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
6165
297
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
6166
6167
297
      auto Arg = F.Fn.getFirstParam();
6168
440
      for (unsigned I = 0; I < ArgSize; ++I) {
6169
143
        Arg = Arg.getNextParam();
6170
143
        Builder.createValuePtrStore(Arg, Args, Context->Int8Ty, I * kValSize);
6171
143
      }
6172
6173
297
      Builder.createCall(
6174
297
          Context->getIntrinsic(
6175
297
              Builder, Executable::Intrinsics::kCall,
6176
297
              LLVM::Type::getFunctionType(
6177
297
                  Context->VoidTy,
6178
297
                  {Context->Int32Ty, Context->Int8PtrTy, Context->Int8PtrTy},
6179
297
                  false)),
6180
297
          {Context->LLContext.getInt32(FuncID), Args, Rets});
6181
6182
297
      if (RetSize == 0) {
6183
169
        Builder.createRetVoid();
6184
169
      } else if (RetSize == 1) {
6185
92
        Builder.createRet(
6186
92
            Builder.createValuePtrLoad(RTy, Rets, Context->Int8Ty));
6187
92
      } else {
6188
36
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
6189
36
            RetSize, RTy, Rets, Context->Int8Ty, kValSize));
6190
36
      }
6191
6192
297
      Context->Functions.emplace_back(TypeIdx, F, nullptr);
6193
297
      break;
6194
297
    }
6195
49
    case ExternalType::Table: // Table type
6196
49
    {
6197
      // Nothing to do.
6198
49
      break;
6199
297
    }
6200
35
    case ExternalType::Memory: // Memory type
6201
35
    {
6202
      // Nothing to do.
6203
35
      break;
6204
297
    }
6205
40
    case ExternalType::Global: // Global type
6206
40
    {
6207
      // Get global type. External type checked in validation.
6208
40
      const auto &GlobType = ImpDesc.getExternalGlobalType();
6209
40
      const auto &ValType = GlobType.getValType();
6210
40
      auto Type = toLLVMType(Context->LLContext, ValType);
6211
40
      Context->Globals.push_back(Type);
6212
40
      break;
6213
297
    }
6214
5
    case ExternalType::Tag: // Tag type
6215
5
    {
6216
      // TODO: EXCEPTION - implement the AOT.
6217
5
      break;
6218
297
    }
6219
0
    default:
6220
0
      assumingUnreachable();
6221
426
    }
6222
426
  }
6223
2.33k
}
6224
6225
2.32k
void Compiler::compile(const AST::ExportSection &) noexcept {}
6226
6227
2.33k
void Compiler::compile(const AST::GlobalSection &GlobalSec) noexcept {
6228
2.33k
  for (const auto &GlobalSeg : GlobalSec.getContent()) {
6229
146
    const auto &ValType = GlobalSeg.getGlobalType().getValType();
6230
146
    auto Type = toLLVMType(Context->LLContext, ValType);
6231
146
    Context->Globals.push_back(Type);
6232
146
  }
6233
2.33k
}
6234
6235
void Compiler::compile(const AST::MemorySection &,
6236
2.33k
                       const AST::DataSection &) noexcept {}
6237
6238
void Compiler::compile(const AST::TableSection &,
6239
2.33k
                       const AST::ElementSection &) noexcept {}
6240
6241
Expect<void> Compiler::compile(const AST::FunctionSection &FuncSec,
6242
2.33k
                               const AST::CodeSection &CodeSec) noexcept {
6243
2.33k
  const auto &TypeIdxs = FuncSec.getContent();
6244
2.33k
  const auto &CodeSegs = CodeSec.getContent();
6245
2.33k
  assuming(TypeIdxs.size() == CodeSegs.size());
6246
6247
13.3k
  for (size_t I = 0; I < CodeSegs.size(); ++I) {
6248
11.0k
    const auto &TypeIdx = TypeIdxs[I];
6249
11.0k
    const auto &Code = CodeSegs[I];
6250
11.0k
    assuming(TypeIdx < Context->CompositeTypes.size());
6251
11.0k
    assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6252
11.0k
    const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6253
11.0k
    const auto FuncID = Context->Functions.size();
6254
11.0k
    auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6255
11.0k
    LLVM::FunctionCallee F = {FTy, Context->LLModule.addFunction(
6256
11.0k
                                       FTy, LLVMExternalLinkage,
6257
11.0k
                                       fmt::format("f{}"sv, FuncID).c_str())};
6258
11.0k
    F.Fn.setVisibility(LLVMProtectedVisibility);
6259
11.0k
    F.Fn.setDSOLocal(true);
6260
11.0k
    F.Fn.setDLLStorageClass(LLVMDLLExportStorageClass);
6261
11.0k
    F.Fn.addFnAttr(Context->NoStackArgProbe);
6262
11.0k
    F.Fn.addFnAttr(Context->StrictFP);
6263
11.0k
    F.Fn.addFnAttr(Context->UWTable);
6264
11.0k
    F.Fn.addParamAttr(0, Context->ReadOnly);
6265
11.0k
    F.Fn.addParamAttr(0, Context->NoAlias);
6266
6267
11.0k
    Context->Functions.emplace_back(TypeIdx, F, &Code);
6268
11.0k
  }
6269
6270
11.3k
  for (auto [T, F, Code] : Context->Functions) {
6271
11.3k
    if (!Code) {
6272
297
      continue;
6273
297
    }
6274
6275
11.0k
    std::vector<ValType> Locals;
6276
11.0k
    for (const auto &Local : Code->getLocals()) {
6277
2.42M
      for (unsigned I = 0; I < Local.first; ++I) {
6278
2.42M
        Locals.push_back(Local.second);
6279
2.42M
      }
6280
1.72k
    }
6281
11.0k
    FunctionCompiler FC(*Context, F, Locals,
6282
11.0k
                        Conf.getCompilerConfigure().isInterruptible(),
6283
11.0k
                        Conf.getStatisticsConfigure().isInstructionCounting(),
6284
11.0k
                        Conf.getStatisticsConfigure().isCostMeasuring());
6285
11.0k
    auto Type = Context->resolveBlockType(T);
6286
11.0k
    EXPECTED_TRY(FC.compile(*Code, std::move(Type)));
6287
10.9k
    F.Fn.eliminateUnreachableBlocks();
6288
10.9k
  }
6289
2.32k
  return {};
6290
2.33k
}
6291
6292
} // namespace LLVM
6293
} // namespace WasmEdge