Coverage Report

Created: 2025-12-14 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/WasmEdge/lib/llvm/compiler.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: 2019-2024 Second State INC
3
4
#include "llvm/compiler.h"
5
6
#include "aot/version.h"
7
#include "common/defines.h"
8
#include "common/filesystem.h"
9
#include "common/spdlog.h"
10
#include "data.h"
11
#include "llvm.h"
12
#include "system/allocator.h"
13
14
#include <algorithm>
15
#include <array>
16
#include <cinttypes>
17
#include <cstdint>
18
#include <cstdlib>
19
#include <limits>
20
#include <memory>
21
#include <numeric>
22
#include <string>
23
#include <string_view>
24
#include <system_error>
25
26
namespace LLVM = WasmEdge::LLVM;
27
using namespace std::literals;
28
29
namespace {
30
31
static bool
32
isVoidReturn(WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
33
static LLVM::Type toLLVMType(LLVM::Context LLContext,
34
                             const WasmEdge::ValType &ValType) noexcept;
35
static std::vector<LLVM::Type>
36
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
37
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
38
static LLVM::Type
39
toLLVMRetsType(LLVM::Context LLContext,
40
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
41
static LLVM::Type
42
toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
43
           const WasmEdge::AST::FunctionType &FuncType) noexcept;
44
static LLVM::Value
45
toLLVMConstantZero(LLVM::Context LLContext,
46
                   const WasmEdge::ValType &ValType) noexcept;
47
static std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
48
                                             LLVM::Value Struct) noexcept;
49
class FunctionCompiler;
50
51
// XXX: Misalignment handler not implemented yet, forcing unalignment
52
// force unalignment load/store
53
static inline constexpr const bool kForceUnalignment = true;
54
55
// force checking div/rem on zero
56
static inline constexpr const bool kForceDivCheck = true;
57
58
// Size of a ValVariant
59
static inline constexpr const uint32_t kValSize = sizeof(WasmEdge::ValVariant);
60
61
// Translate Compiler::OptimizationLevel to llvm::PassBuilder version
62
#if LLVM_VERSION_MAJOR >= 13
63
static inline const char *
64
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
65
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
66
  switch (Level) {
67
  case OL::O0:
68
    return "default<O0>,function(tailcallelim)";
69
  case OL::O1:
70
    return "default<O1>,function(tailcallelim)";
71
  case OL::O2:
72
    return "default<O2>";
73
  case OL::O3:
74
    return "default<O3>";
75
  case OL::Os:
76
    return "default<Os>";
77
  case OL::Oz:
78
    return "default<Oz>";
79
  default:
80
    assumingUnreachable();
81
  }
82
}
83
#else
84
static inline std::pair<unsigned int, unsigned int>
85
2.31k
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
86
2.31k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
87
2.31k
  switch (Level) {
88
0
  case OL::O0:
89
0
    return {0, 0};
90
0
  case OL::O1:
91
0
    return {1, 0};
92
0
  case OL::O2:
93
0
    return {2, 0};
94
2.31k
  case OL::O3:
95
2.31k
    return {3, 0};
96
0
  case OL::Os:
97
0
    return {2, 1};
98
0
  case OL::Oz:
99
0
    return {2, 2};
100
0
  default:
101
0
    assumingUnreachable();
102
2.31k
  }
103
2.31k
}
104
#endif
105
106
static inline LLVMCodeGenOptLevel toLLVMCodeGenLevel(
107
2.31k
    WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
108
2.31k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
109
2.31k
  switch (Level) {
110
0
  case OL::O0:
111
0
    return LLVMCodeGenLevelNone;
112
0
  case OL::O1:
113
0
    return LLVMCodeGenLevelLess;
114
0
  case OL::O2:
115
0
    return LLVMCodeGenLevelDefault;
116
2.31k
  case OL::O3:
117
2.31k
    return LLVMCodeGenLevelAggressive;
118
0
  case OL::Os:
119
0
    return LLVMCodeGenLevelDefault;
120
0
  case OL::Oz:
121
0
    return LLVMCodeGenLevelDefault;
122
0
  default:
123
0
    assumingUnreachable();
124
2.31k
  }
125
2.31k
}
126
} // namespace
127
128
struct LLVM::Compiler::CompileContext {
129
  LLVM::Context LLContext;
130
  LLVM::Module &LLModule;
131
  LLVM::Attribute Cold;
132
  LLVM::Attribute NoAlias;
133
  LLVM::Attribute NoInline;
134
  LLVM::Attribute NoReturn;
135
  LLVM::Attribute ReadOnly;
136
  LLVM::Attribute StrictFP;
137
  LLVM::Attribute UWTable;
138
  LLVM::Attribute NoStackArgProbe;
139
  LLVM::Type VoidTy;
140
  LLVM::Type Int8Ty;
141
  LLVM::Type Int16Ty;
142
  LLVM::Type Int32Ty;
143
  LLVM::Type Int64Ty;
144
  LLVM::Type Int128Ty;
145
  LLVM::Type FloatTy;
146
  LLVM::Type DoubleTy;
147
  LLVM::Type Int8x16Ty;
148
  LLVM::Type Int16x8Ty;
149
  LLVM::Type Int32x4Ty;
150
  LLVM::Type Floatx4Ty;
151
  LLVM::Type Int64x2Ty;
152
  LLVM::Type Doublex2Ty;
153
  LLVM::Type Int128x1Ty;
154
  LLVM::Type Int8PtrTy;
155
  LLVM::Type Int32PtrTy;
156
  LLVM::Type Int64PtrTy;
157
  LLVM::Type Int128PtrTy;
158
  LLVM::Type Int8PtrPtrTy;
159
  LLVM::Type ExecCtxTy;
160
  LLVM::Type ExecCtxPtrTy;
161
  LLVM::Type IntrinsicsTableTy;
162
  LLVM::Type IntrinsicsTablePtrTy;
163
  LLVM::Message SubtargetFeatures;
164
165
#if defined(__x86_64__)
166
#if defined(__XOP__)
167
  bool SupportXOP = true;
168
#else
169
  bool SupportXOP = false;
170
#endif
171
172
#if defined(__SSE4_1__)
173
  bool SupportSSE4_1 = true;
174
#else
175
  bool SupportSSE4_1 = false;
176
#endif
177
178
#if defined(__SSSE3__)
179
  bool SupportSSSE3 = true;
180
#else
181
  bool SupportSSSE3 = false;
182
#endif
183
184
#if defined(__SSE2__)
185
  bool SupportSSE2 = true;
186
#else
187
  bool SupportSSE2 = false;
188
#endif
189
#endif
190
191
#if defined(__aarch64__)
192
#if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(__ARM_NEON_FP)
193
  bool SupportNEON = true;
194
#else
195
  bool SupportNEON = false;
196
#endif
197
#endif
198
199
  std::vector<const AST::CompositeType *> CompositeTypes;
200
  std::vector<LLVM::Value> FunctionWrappers;
201
  std::vector<std::tuple<uint32_t, LLVM::FunctionCallee,
202
                         const WasmEdge::AST::CodeSegment *>>
203
      Functions;
204
  std::vector<LLVM::Type> Globals;
205
  LLVM::Value IntrinsicsTable;
206
  LLVM::FunctionCallee Trap;
207
  CompileContext(LLVM::Context C, LLVM::Module &M,
208
                 bool IsGenericBinary) noexcept
209
2.32k
      : LLContext(C), LLModule(M),
210
2.32k
        Cold(LLVM::Attribute::createEnum(C, LLVM::Core::Cold, 0)),
211
2.32k
        NoAlias(LLVM::Attribute::createEnum(C, LLVM::Core::NoAlias, 0)),
212
2.32k
        NoInline(LLVM::Attribute::createEnum(C, LLVM::Core::NoInline, 0)),
213
2.32k
        NoReturn(LLVM::Attribute::createEnum(C, LLVM::Core::NoReturn, 0)),
214
2.32k
        ReadOnly(LLVM::Attribute::createEnum(C, LLVM::Core::ReadOnly, 0)),
215
2.32k
        StrictFP(LLVM::Attribute::createEnum(C, LLVM::Core::StrictFP, 0)),
216
2.32k
        UWTable(LLVM::Attribute::createEnum(C, LLVM::Core::UWTable,
217
2.32k
                                            LLVM::Core::UWTableDefault)),
218
        NoStackArgProbe(
219
2.32k
            LLVM::Attribute::createString(C, "no-stack-arg-probe"sv, {})),
220
2.32k
        VoidTy(LLContext.getVoidTy()), Int8Ty(LLContext.getInt8Ty()),
221
2.32k
        Int16Ty(LLContext.getInt16Ty()), Int32Ty(LLContext.getInt32Ty()),
222
2.32k
        Int64Ty(LLContext.getInt64Ty()), Int128Ty(LLContext.getInt128Ty()),
223
2.32k
        FloatTy(LLContext.getFloatTy()), DoubleTy(LLContext.getDoubleTy()),
224
2.32k
        Int8x16Ty(LLVM::Type::getVectorType(Int8Ty, 16)),
225
2.32k
        Int16x8Ty(LLVM::Type::getVectorType(Int16Ty, 8)),
226
2.32k
        Int32x4Ty(LLVM::Type::getVectorType(Int32Ty, 4)),
227
2.32k
        Floatx4Ty(LLVM::Type::getVectorType(FloatTy, 4)),
228
2.32k
        Int64x2Ty(LLVM::Type::getVectorType(Int64Ty, 2)),
229
2.32k
        Doublex2Ty(LLVM::Type::getVectorType(DoubleTy, 2)),
230
2.32k
        Int128x1Ty(LLVM::Type::getVectorType(Int128Ty, 1)),
231
2.32k
        Int8PtrTy(Int8Ty.getPointerTo()), Int32PtrTy(Int32Ty.getPointerTo()),
232
2.32k
        Int64PtrTy(Int64Ty.getPointerTo()),
233
2.32k
        Int128PtrTy(Int128Ty.getPointerTo()),
234
2.32k
        Int8PtrPtrTy(Int8PtrTy.getPointerTo()),
235
2.32k
        ExecCtxTy(LLVM::Type::getStructType(
236
2.32k
            "ExecCtx",
237
2.32k
            std::initializer_list<LLVM::Type>{
238
                // Memory
239
2.32k
                Int8PtrTy.getPointerTo(),
240
                // Globals
241
2.32k
                Int128PtrTy.getPointerTo(),
242
                // InstrCount
243
2.32k
                Int64PtrTy,
244
                // CostTable
245
2.32k
                LLVM::Type::getArrayType(Int64Ty, UINT16_MAX + 1)
246
2.32k
                    .getPointerTo(),
247
                // Gas
248
2.32k
                Int64PtrTy,
249
                // GasLimit
250
2.32k
                Int64Ty,
251
                // StopToken
252
2.32k
                Int32PtrTy,
253
2.32k
            })),
254
2.32k
        ExecCtxPtrTy(ExecCtxTy.getPointerTo()),
255
2.32k
        IntrinsicsTableTy(LLVM::Type::getArrayType(
256
2.32k
            Int8PtrTy,
257
2.32k
            static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax))),
258
2.32k
        IntrinsicsTablePtrTy(IntrinsicsTableTy.getPointerTo()),
259
2.32k
        IntrinsicsTable(LLModule.addGlobal(IntrinsicsTablePtrTy, true,
260
2.32k
                                           LLVMExternalLinkage, LLVM::Value(),
261
2.32k
                                           "intrinsics")) {
262
2.32k
    Trap.Ty = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
263
2.32k
    Trap.Fn = LLModule.addFunction(Trap.Ty, LLVMPrivateLinkage, "trap");
264
2.32k
    Trap.Fn.setDSOLocal(true);
265
2.32k
    Trap.Fn.addFnAttr(NoStackArgProbe);
266
2.32k
    Trap.Fn.addFnAttr(StrictFP);
267
2.32k
    Trap.Fn.addFnAttr(UWTable);
268
2.32k
    Trap.Fn.addFnAttr(NoReturn);
269
2.32k
    Trap.Fn.addFnAttr(Cold);
270
2.32k
    Trap.Fn.addFnAttr(NoInline);
271
272
2.32k
    LLModule.addGlobal(Int32Ty, true, LLVMExternalLinkage,
273
2.32k
                       LLVM::Value::getConstInt(Int32Ty, AOT::kBinaryVersion),
274
2.32k
                       "version");
275
276
2.32k
    if (!IsGenericBinary) {
277
2.32k
      SubtargetFeatures = LLVM::getHostCPUFeatures();
278
2.32k
      auto Features = SubtargetFeatures.string_view();
279
202k
      while (!Features.empty()) {
280
200k
        std::string_view Feature;
281
200k
        if (auto Pos = Features.find(','); Pos != std::string_view::npos) {
282
197k
          Feature = Features.substr(0, Pos);
283
197k
          Features = Features.substr(Pos + 1);
284
197k
        } else {
285
2.32k
          Feature = std::exchange(Features, std::string_view());
286
2.32k
        }
287
200k
        if (Feature[0] != '+') {
288
123k
          continue;
289
123k
        }
290
76.7k
        Feature = Feature.substr(1);
291
292
76.7k
#if defined(__x86_64__)
293
76.7k
        if (!SupportXOP && Feature == "xop"sv) {
294
0
          SupportXOP = true;
295
0
        }
296
76.7k
        if (!SupportSSE4_1 && Feature == "sse4.1"sv) {
297
2.32k
          SupportSSE4_1 = true;
298
2.32k
        }
299
76.7k
        if (!SupportSSSE3 && Feature == "ssse3"sv) {
300
2.32k
          SupportSSSE3 = true;
301
2.32k
        }
302
76.7k
        if (!SupportSSE2 && Feature == "sse2"sv) {
303
0
          SupportSSE2 = true;
304
0
        }
305
#elif defined(__aarch64__)
306
        if (!SupportNEON && Feature == "neon"sv) {
307
          SupportNEON = true;
308
        }
309
#endif
310
76.7k
      }
311
2.32k
    }
312
313
2.32k
    {
314
      // create trap
315
2.32k
      LLVM::Builder Builder(LLContext);
316
2.32k
      Builder.positionAtEnd(
317
2.32k
          LLVM::BasicBlock::create(LLContext, Trap.Fn, "entry"));
318
2.32k
      auto FnTy = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
319
2.32k
      auto CallTrap = Builder.createCall(
320
2.32k
          getIntrinsic(Builder, Executable::Intrinsics::kTrap, FnTy),
321
2.32k
          {Trap.Fn.getFirstParam()});
322
2.32k
      CallTrap.addCallSiteAttribute(NoReturn);
323
2.32k
      Builder.createUnreachable();
324
2.32k
    }
325
2.32k
  }
326
  LLVM::Value getMemory(LLVM::Builder &Builder, LLVM::Value ExecCtx,
327
22.6k
                        uint32_t Index) noexcept {
328
22.6k
    auto Array = Builder.createExtractValue(ExecCtx, 0);
329
#if WASMEDGE_ALLOCATOR_IS_STABLE
330
    auto VPtr = Builder.createLoad(
331
        Int8PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
332
                                              LLContext.getInt64(Index)));
333
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
334
                     LLVM::Metadata(LLContext, {}));
335
#else
336
22.6k
    auto VPtrPtr = Builder.createLoad(
337
22.6k
        Int8PtrPtrTy, Builder.createInBoundsGEP1(Int8PtrPtrTy, Array,
338
22.6k
                                                 LLContext.getInt64(Index)));
339
22.6k
    VPtrPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
340
22.6k
                        LLVM::Metadata(LLContext, {}));
341
22.6k
    auto VPtr = Builder.createLoad(
342
22.6k
        Int8PtrTy,
343
22.6k
        Builder.createInBoundsGEP1(Int8PtrTy, VPtrPtr, LLContext.getInt64(0)));
344
22.6k
#endif
345
22.6k
    return Builder.createBitCast(VPtr, Int8PtrTy);
346
22.6k
  }
347
  std::pair<LLVM::Type, LLVM::Value> getGlobal(LLVM::Builder &Builder,
348
                                               LLVM::Value ExecCtx,
349
414
                                               uint32_t Index) noexcept {
350
414
    auto Ty = Globals[Index];
351
414
    auto Array = Builder.createExtractValue(ExecCtx, 1);
352
414
    auto VPtr = Builder.createLoad(
353
414
        Int128PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
354
414
                                                LLContext.getInt64(Index)));
355
414
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
356
414
                     LLVM::Metadata(LLContext, {}));
357
414
    auto Ptr = Builder.createBitCast(VPtr, Ty.getPointerTo());
358
414
    return {Ty, Ptr};
359
414
  }
360
  LLVM::Value getInstrCount(LLVM::Builder &Builder,
361
0
                            LLVM::Value ExecCtx) noexcept {
362
0
    return Builder.createExtractValue(ExecCtx, 2);
363
0
  }
364
  LLVM::Value getCostTable(LLVM::Builder &Builder,
365
0
                           LLVM::Value ExecCtx) noexcept {
366
0
    return Builder.createExtractValue(ExecCtx, 3);
367
0
  }
368
0
  LLVM::Value getGas(LLVM::Builder &Builder, LLVM::Value ExecCtx) noexcept {
369
0
    return Builder.createExtractValue(ExecCtx, 4);
370
0
  }
371
  LLVM::Value getGasLimit(LLVM::Builder &Builder,
372
0
                          LLVM::Value ExecCtx) noexcept {
373
0
    return Builder.createExtractValue(ExecCtx, 5);
374
0
  }
375
  LLVM::Value getStopToken(LLVM::Builder &Builder,
376
0
                           LLVM::Value ExecCtx) noexcept {
377
0
    return Builder.createExtractValue(ExecCtx, 6);
378
0
  }
379
  LLVM::FunctionCallee getIntrinsic(LLVM::Builder &Builder,
380
                                    Executable::Intrinsics Index,
381
8.97k
                                    LLVM::Type Ty) noexcept {
382
8.97k
    const auto Value = static_cast<uint32_t>(Index);
383
8.97k
    auto PtrTy = Ty.getPointerTo();
384
8.97k
    auto PtrPtrTy = PtrTy.getPointerTo();
385
8.97k
    auto IT = Builder.createLoad(IntrinsicsTablePtrTy, IntrinsicsTable);
386
8.97k
    IT.setMetadata(LLContext, LLVM::Core::InvariantGroup,
387
8.97k
                   LLVM::Metadata(LLContext, {}));
388
8.97k
    auto VPtr =
389
8.97k
        Builder.createInBoundsGEP2(IntrinsicsTableTy, IT, LLContext.getInt64(0),
390
8.97k
                                   LLContext.getInt64(Value));
391
8.97k
    auto Ptr = Builder.createBitCast(VPtr, PtrPtrTy);
392
8.97k
    return {Ty, Builder.createLoad(PtrTy, Ptr)};
393
8.97k
  }
394
  std::pair<std::vector<ValType>, std::vector<ValType>>
395
20.3k
  resolveBlockType(const BlockType &BType) const noexcept {
396
20.3k
    using VecT = std::vector<ValType>;
397
20.3k
    using RetT = std::pair<VecT, VecT>;
398
20.3k
    if (BType.isEmpty()) {
399
2.34k
      return RetT{};
400
2.34k
    }
401
17.9k
    if (BType.isValType()) {
402
3.05k
      return RetT{{}, {BType.getValType()}};
403
14.9k
    } else {
404
      // Type index case. t2* = type[index].returns
405
14.9k
      const uint32_t TypeIdx = BType.getTypeIndex();
406
14.9k
      const auto &FType = CompositeTypes[TypeIdx]->getFuncType();
407
14.9k
      return RetT{
408
14.9k
          VecT(FType.getParamTypes().begin(), FType.getParamTypes().end()),
409
14.9k
          VecT(FType.getReturnTypes().begin(), FType.getReturnTypes().end())};
410
14.9k
    }
411
17.9k
  }
412
};
413
414
namespace {
415
416
using namespace WasmEdge;
417
418
38.2k
static bool isVoidReturn(Span<const ValType> ValTypes) noexcept {
419
38.2k
  return ValTypes.empty();
420
38.2k
}
421
422
static LLVM::Type toLLVMType(LLVM::Context LLContext,
423
2.15M
                             const ValType &ValType) noexcept {
424
2.15M
  switch (ValType.getCode()) {
425
64.0k
  case TypeCode::I32:
426
64.0k
    return LLContext.getInt32Ty();
427
224k
  case TypeCode::I64:
428
224k
    return LLContext.getInt64Ty();
429
5.90k
  case TypeCode::Ref:
430
115k
  case TypeCode::RefNull:
431
1.79M
  case TypeCode::V128:
432
1.79M
    return LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2);
433
51.3k
  case TypeCode::F32:
434
51.3k
    return LLContext.getFloatTy();
435
21.0k
  case TypeCode::F64:
436
21.0k
    return LLContext.getDoubleTy();
437
0
  default:
438
0
    assumingUnreachable();
439
2.15M
  }
440
2.15M
}
441
442
static std::vector<LLVM::Type>
443
toLLVMTypeVector(LLVM::Context LLContext,
444
22.1k
                 Span<const ValType> ValTypes) noexcept {
445
22.1k
  std::vector<LLVM::Type> Result;
446
22.1k
  Result.reserve(ValTypes.size());
447
22.1k
  for (const auto &Type : ValTypes) {
448
20.9k
    Result.push_back(toLLVMType(LLContext, Type));
449
20.9k
  }
450
22.1k
  return Result;
451
22.1k
}
452
453
static std::vector<LLVM::Type>
454
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
455
17.9k
               Span<const ValType> ValTypes) noexcept {
456
17.9k
  auto Result = toLLVMTypeVector(LLContext, ValTypes);
457
17.9k
  Result.insert(Result.begin(), ExecCtxPtrTy);
458
17.9k
  return Result;
459
17.9k
}
460
461
static LLVM::Type toLLVMRetsType(LLVM::Context LLContext,
462
17.9k
                                 Span<const ValType> ValTypes) noexcept {
463
17.9k
  if (isVoidReturn(ValTypes)) {
464
4.32k
    return LLContext.getVoidTy();
465
4.32k
  }
466
13.6k
  if (ValTypes.size() == 1) {
467
12.8k
    return toLLVMType(LLContext, ValTypes.front());
468
12.8k
  }
469
781
  std::vector<LLVM::Type> Result;
470
781
  Result.reserve(ValTypes.size());
471
2.07k
  for (const auto &Type : ValTypes) {
472
2.07k
    Result.push_back(toLLVMType(LLContext, Type));
473
2.07k
  }
474
781
  return LLVM::Type::getStructType(Result);
475
13.6k
}
476
477
static LLVM::Type toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
478
17.9k
                             const AST::FunctionType &FuncType) noexcept {
479
17.9k
  auto ArgsTy =
480
17.9k
      toLLVMArgsType(LLContext, ExecCtxPtrTy, FuncType.getParamTypes());
481
17.9k
  auto RetTy = toLLVMRetsType(LLContext, FuncType.getReturnTypes());
482
17.9k
  return LLVM::Type::getFunctionType(RetTy, ArgsTy);
483
17.9k
}
484
485
static LLVM::Value toLLVMConstantZero(LLVM::Context LLContext,
486
2.11M
                                      const ValType &ValType) noexcept {
487
2.11M
  switch (ValType.getCode()) {
488
44.6k
  case TypeCode::I32:
489
44.6k
    return LLVM::Value::getConstNull(LLContext.getInt32Ty());
490
220k
  case TypeCode::I64:
491
220k
    return LLVM::Value::getConstNull(LLContext.getInt64Ty());
492
5.90k
  case TypeCode::Ref:
493
114k
  case TypeCode::RefNull: {
494
114k
    std::array<uint8_t, 16> Data{};
495
114k
    const auto Raw = ValType.getRawData();
496
114k
    std::copy(Raw.begin(), Raw.end(), Data.begin());
497
114k
    return LLVM::Value::getConstVector8(LLContext, Data);
498
5.90k
  }
499
1.67M
  case TypeCode::V128:
500
1.67M
    return LLVM::Value::getConstNull(
501
1.67M
        LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2));
502
48.4k
  case TypeCode::F32:
503
48.4k
    return LLVM::Value::getConstNull(LLContext.getFloatTy());
504
17.5k
  case TypeCode::F64:
505
17.5k
    return LLVM::Value::getConstNull(LLContext.getDoubleTy());
506
0
  default:
507
0
    assumingUnreachable();
508
2.11M
  }
509
2.11M
}
510
511
class FunctionCompiler {
512
  struct Control;
513
514
public:
515
  FunctionCompiler(LLVM::Compiler::CompileContext &Context,
516
                   LLVM::FunctionCallee F, Span<const ValType> Locals,
517
                   bool Interruptible, bool InstructionCounting,
518
                   bool GasMeasuring) noexcept
519
11.4k
      : Context(Context), LLContext(Context.LLContext),
520
11.4k
        Interruptible(Interruptible), F(F), Builder(LLContext) {
521
11.4k
    if (F.Fn) {
522
11.4k
      Builder.positionAtEnd(LLVM::BasicBlock::create(LLContext, F.Fn, "entry"));
523
11.4k
      ExecCtx = Builder.createLoad(Context.ExecCtxTy, F.Fn.getFirstParam());
524
525
11.4k
      if (InstructionCounting) {
526
0
        LocalInstrCount = Builder.createAlloca(Context.Int64Ty);
527
0
        Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
528
0
      }
529
530
11.4k
      if (GasMeasuring) {
531
0
        LocalGas = Builder.createAlloca(Context.Int64Ty);
532
0
        Builder.createStore(LLContext.getInt64(0), LocalGas);
533
0
      }
534
535
21.3k
      for (LLVM::Value Arg = F.Fn.getFirstParam().getNextParam(); Arg;
536
11.4k
           Arg = Arg.getNextParam()) {
537
9.89k
        LLVM::Type Ty = Arg.getType();
538
9.89k
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
539
9.89k
        Builder.createStore(Arg, ArgPtr);
540
9.89k
        Local.emplace_back(Ty, ArgPtr);
541
9.89k
      }
542
543
2.11M
      for (const auto &Type : Locals) {
544
2.11M
        LLVM::Type Ty = toLLVMType(LLContext, Type);
545
2.11M
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
546
2.11M
        Builder.createStore(toLLVMConstantZero(LLContext, Type), ArgPtr);
547
2.11M
        Local.emplace_back(Ty, ArgPtr);
548
2.11M
      }
549
11.4k
    }
550
11.4k
  }
551
552
33.4k
  LLVM::BasicBlock getTrapBB(ErrCode::Value Error) noexcept {
553
33.4k
    if (auto Iter = TrapBB.find(Error); Iter != TrapBB.end()) {
554
30.0k
      return Iter->second;
555
30.0k
    }
556
3.34k
    auto BB = LLVM::BasicBlock::create(LLContext, F.Fn, "trap");
557
3.34k
    TrapBB.emplace(Error, BB);
558
3.34k
    return BB;
559
33.4k
  }
560
561
  Expect<void>
562
  compile(const AST::CodeSegment &Code,
563
11.4k
          std::pair<std::vector<ValType>, std::vector<ValType>> Type) noexcept {
564
11.4k
    auto RetBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ret");
565
11.4k
    Type.first.clear();
566
11.4k
    enterBlock(RetBB, {}, {}, {}, std::move(Type));
567
11.4k
    EXPECTED_TRY(compile(Code.getExpr().getInstrs()));
568
11.4k
    assuming(ControlStack.empty());
569
11.4k
    compileReturn();
570
571
11.4k
    for (auto &[Error, BB] : TrapBB) {
572
3.32k
      Builder.positionAtEnd(BB);
573
3.32k
      updateInstrCount();
574
3.32k
      updateGasAtTrap();
575
3.32k
      auto CallTrap = Builder.createCall(
576
3.32k
          Context.Trap, {LLContext.getInt32(static_cast<uint32_t>(Error))});
577
3.32k
      CallTrap.addCallSiteAttribute(Context.NoReturn);
578
3.32k
      Builder.createUnreachable();
579
3.32k
    }
580
11.4k
    return {};
581
11.4k
  }
582
583
11.4k
  Expect<void> compile(AST::InstrView Instrs) noexcept {
584
1.61M
    auto Dispatch = [this](const AST::Instruction &Instr) -> Expect<void> {
585
1.61M
      switch (Instr.getOpCode()) {
586
      // Control instructions (for blocks)
587
4.00k
      case OpCode::Block: {
588
4.00k
        auto Block = LLVM::BasicBlock::create(LLContext, F.Fn, "block");
589
4.00k
        auto EndBlock = LLVM::BasicBlock::create(LLContext, F.Fn, "block.end");
590
4.00k
        Builder.createBr(Block);
591
592
4.00k
        Builder.positionAtEnd(Block);
593
4.00k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
594
4.00k
        const auto Arity = Type.first.size();
595
4.00k
        std::vector<LLVM::Value> Args(Arity);
596
4.00k
        if (isUnreachable()) {
597
1.14k
          for (size_t I = 0; I < Arity; ++I) {
598
337
            auto Ty = toLLVMType(LLContext, Type.first[I]);
599
337
            Args[I] = LLVM::Value::getUndef(Ty);
600
337
          }
601
3.19k
        } else {
602
3.61k
          for (size_t I = 0; I < Arity; ++I) {
603
420
            const size_t J = Arity - 1 - I;
604
420
            Args[J] = stackPop();
605
420
          }
606
3.19k
        }
607
4.00k
        enterBlock(EndBlock, {}, {}, std::move(Args), std::move(Type));
608
4.00k
        checkStop();
609
4.00k
        updateGas();
610
4.00k
        return {};
611
0
      }
612
1.99k
      case OpCode::Loop: {
613
1.99k
        auto Curr = Builder.getInsertBlock();
614
1.99k
        auto Loop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop");
615
1.99k
        auto EndLoop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop.end");
616
1.99k
        Builder.createBr(Loop);
617
618
1.99k
        Builder.positionAtEnd(Loop);
619
1.99k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
620
1.99k
        const auto Arity = Type.first.size();
621
1.99k
        std::vector<LLVM::Value> Args(Arity);
622
1.99k
        if (isUnreachable()) {
623
995
          for (size_t I = 0; I < Arity; ++I) {
624
431
            auto Ty = toLLVMType(LLContext, Type.first[I]);
625
431
            auto Value = LLVM::Value::getUndef(Ty);
626
431
            auto PHINode = Builder.createPHI(Ty);
627
431
            PHINode.addIncoming(Value, Curr);
628
431
            Args[I] = PHINode;
629
431
          }
630
1.43k
        } else {
631
2.21k
          for (size_t I = 0; I < Arity; ++I) {
632
778
            const size_t J = Arity - 1 - I;
633
778
            auto Value = stackPop();
634
778
            auto PHINode = Builder.createPHI(Value.getType());
635
778
            PHINode.addIncoming(Value, Curr);
636
778
            Args[J] = PHINode;
637
778
          }
638
1.43k
        }
639
1.99k
        enterBlock(Loop, EndLoop, {}, std::move(Args), std::move(Type));
640
1.99k
        checkStop();
641
1.99k
        updateGas();
642
1.99k
        return {};
643
0
      }
644
2.86k
      case OpCode::If: {
645
2.86k
        auto Then = LLVM::BasicBlock::create(LLContext, F.Fn, "then");
646
2.86k
        auto Else = LLVM::BasicBlock::create(LLContext, F.Fn, "else");
647
2.86k
        auto EndIf = LLVM::BasicBlock::create(LLContext, F.Fn, "if.end");
648
2.86k
        LLVM::Value Cond;
649
2.86k
        if (isUnreachable()) {
650
556
          Cond = LLVM::Value::getUndef(LLContext.getInt1Ty());
651
2.31k
        } else {
652
2.31k
          Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
653
2.31k
        }
654
2.86k
        Builder.createCondBr(Cond, Then, Else);
655
656
2.86k
        Builder.positionAtEnd(Then);
657
2.86k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
658
2.86k
        const auto Arity = Type.first.size();
659
2.86k
        std::vector<LLVM::Value> Args(Arity);
660
2.86k
        if (isUnreachable()) {
661
1.00k
          for (size_t I = 0; I < Arity; ++I) {
662
448
            auto Ty = toLLVMType(LLContext, Type.first[I]);
663
448
            Args[I] = LLVM::Value::getUndef(Ty);
664
448
          }
665
2.31k
        } else {
666
3.13k
          for (size_t I = 0; I < Arity; ++I) {
667
820
            const size_t J = Arity - 1 - I;
668
820
            Args[J] = stackPop();
669
820
          }
670
2.31k
        }
671
2.86k
        enterBlock(EndIf, {}, Else, std::move(Args), std::move(Type));
672
2.86k
        return {};
673
0
      }
674
9
      case OpCode::Try_table:
675
        // TODO: EXCEPTION - implement the AOT.
676
9
        return Unexpect(ErrCode::Value::AOTNotImpl);
677
20.2k
      case OpCode::End: {
678
20.2k
        auto Entry = leaveBlock();
679
20.2k
        if (Entry.ElseBlock) {
680
1.17k
          auto Block = Builder.getInsertBlock();
681
1.17k
          Builder.positionAtEnd(Entry.ElseBlock);
682
1.17k
          enterBlock(Block, {}, {}, std::move(Entry.Args),
683
1.17k
                     std::move(Entry.Type), std::move(Entry.ReturnPHI));
684
1.17k
          Entry = leaveBlock();
685
1.17k
        }
686
20.2k
        buildPHI(Entry.Type.second, Entry.ReturnPHI);
687
20.2k
        return {};
688
0
      }
689
1.68k
      case OpCode::Else: {
690
1.68k
        auto Entry = leaveBlock();
691
1.68k
        Builder.positionAtEnd(Entry.ElseBlock);
692
1.68k
        enterBlock(Entry.JumpBlock, {}, {}, std::move(Entry.Args),
693
1.68k
                   std::move(Entry.Type), std::move(Entry.ReturnPHI));
694
1.68k
        return {};
695
0
      }
696
1.58M
      default:
697
1.58M
        break;
698
1.61M
      }
699
700
1.58M
      if (isUnreachable()) {
701
491k
        return {};
702
491k
      }
703
704
1.09M
      switch (Instr.getOpCode()) {
705
      // Control instructions
706
3.37k
      case OpCode::Unreachable:
707
3.37k
        Builder.createBr(getTrapBB(ErrCode::Value::Unreachable));
708
3.37k
        setUnreachable();
709
3.37k
        Builder.positionAtEnd(
710
3.37k
            LLVM::BasicBlock::create(LLContext, F.Fn, "unreachable.end"));
711
3.37k
        break;
712
45.3k
      case OpCode::Nop:
713
45.3k
        break;
714
1
      case OpCode::Throw:
715
2
      case OpCode::Throw_ref:
716
        // TODO: EXCEPTION - implement the AOT.
717
2
        return Unexpect(ErrCode::Value::AOTNotImpl);
718
783
      case OpCode::Br: {
719
783
        const auto Label = Instr.getJump().TargetIndex;
720
783
        setLableJumpPHI(Label);
721
783
        Builder.createBr(getLabel(Label));
722
783
        setUnreachable();
723
783
        Builder.positionAtEnd(
724
783
            LLVM::BasicBlock::create(LLContext, F.Fn, "br.end"));
725
783
        break;
726
1
      }
727
372
      case OpCode::Br_if: {
728
372
        const auto Label = Instr.getJump().TargetIndex;
729
372
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
730
372
        setLableJumpPHI(Label);
731
372
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_if.end");
732
372
        Builder.createCondBr(Cond, getLabel(Label), Next);
733
372
        Builder.positionAtEnd(Next);
734
372
        break;
735
1
      }
736
1.11k
      case OpCode::Br_table: {
737
1.11k
        auto LabelTable = Instr.getLabelList();
738
1.11k
        assuming(LabelTable.size() <= std::numeric_limits<uint32_t>::max());
739
1.11k
        const auto LabelTableSize =
740
1.11k
            static_cast<uint32_t>(LabelTable.size() - 1);
741
1.11k
        auto Value = stackPop();
742
1.11k
        setLableJumpPHI(LabelTable[LabelTableSize].TargetIndex);
743
1.11k
        auto Switch = Builder.createSwitch(
744
1.11k
            Value, getLabel(LabelTable[LabelTableSize].TargetIndex),
745
1.11k
            LabelTableSize);
746
37.1k
        for (uint32_t I = 0; I < LabelTableSize; ++I) {
747
36.0k
          setLableJumpPHI(LabelTable[I].TargetIndex);
748
36.0k
          Switch.addCase(LLContext.getInt32(I),
749
36.0k
                         getLabel(LabelTable[I].TargetIndex));
750
36.0k
        }
751
1.11k
        setUnreachable();
752
1.11k
        Builder.positionAtEnd(
753
1.11k
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_table.end"));
754
1.11k
        break;
755
1.11k
      }
756
17
      case OpCode::Br_on_null: {
757
17
        const auto Label = Instr.getJump().TargetIndex;
758
17
        auto Value = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
759
17
        auto Cond = Builder.createICmpEQ(
760
17
            Builder.createExtractElement(Value, LLContext.getInt64(1)),
761
17
            LLContext.getInt64(0));
762
17
        setLableJumpPHI(Label);
763
17
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_null.end");
764
17
        Builder.createCondBr(Cond, getLabel(Label), Next);
765
17
        Builder.positionAtEnd(Next);
766
17
        stackPush(Value);
767
17
        break;
768
1.11k
      }
769
10
      case OpCode::Br_on_non_null: {
770
10
        const auto Label = Instr.getJump().TargetIndex;
771
10
        auto Cond = Builder.createICmpNE(
772
10
            Builder.createExtractElement(
773
10
                Builder.createBitCast(Stack.back(), Context.Int64x2Ty),
774
10
                LLContext.getInt64(1)),
775
10
            LLContext.getInt64(0));
776
10
        setLableJumpPHI(Label);
777
10
        auto Next =
778
10
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_non_null.end");
779
10
        Builder.createCondBr(Cond, getLabel(Label), Next);
780
10
        Builder.positionAtEnd(Next);
781
10
        stackPop();
782
10
        break;
783
1.11k
      }
784
0
      case OpCode::Br_on_cast:
785
0
      case OpCode::Br_on_cast_fail: {
786
0
        auto Ref = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
787
0
        const auto Label = Instr.getBrCast().Jump.TargetIndex;
788
0
        std::array<uint8_t, 16> Buf = {0};
789
0
        std::copy_n(Instr.getBrCast().RType2.getRawData().cbegin(), 8,
790
0
                    Buf.begin());
791
0
        auto VType = Builder.createExtractElement(
792
0
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
793
0
                                  Context.Int64x2Ty),
794
0
            LLContext.getInt64(0));
795
0
        auto IsRefTest = Builder.createCall(
796
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
797
0
                                 LLVM::Type::getFunctionType(
798
0
                                     Context.Int32Ty,
799
0
                                     {Context.Int64x2Ty, Context.Int64Ty},
800
0
                                     false)),
801
0
            {Ref, VType});
802
0
        auto Cond =
803
0
            (Instr.getOpCode() == OpCode::Br_on_cast)
804
0
                ? Builder.createICmpNE(IsRefTest, LLContext.getInt32(0))
805
0
                : Builder.createICmpEQ(IsRefTest, LLContext.getInt32(0));
806
0
        setLableJumpPHI(Label);
807
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_cast.end");
808
0
        Builder.createCondBr(Cond, getLabel(Label), Next);
809
0
        Builder.positionAtEnd(Next);
810
0
        break;
811
0
      }
812
707
      case OpCode::Return:
813
707
        compileReturn();
814
707
        setUnreachable();
815
707
        Builder.positionAtEnd(
816
707
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret.end"));
817
707
        break;
818
3.34k
      case OpCode::Call:
819
3.34k
        updateInstrCount();
820
3.34k
        updateGas();
821
3.34k
        compileCallOp(Instr.getTargetIndex());
822
3.34k
        break;
823
1.21k
      case OpCode::Call_indirect:
824
1.21k
        updateInstrCount();
825
1.21k
        updateGas();
826
1.21k
        compileIndirectCallOp(Instr.getSourceIndex(), Instr.getTargetIndex());
827
1.21k
        break;
828
78
      case OpCode::Return_call:
829
78
        updateInstrCount();
830
78
        updateGas();
831
78
        compileReturnCallOp(Instr.getTargetIndex());
832
78
        setUnreachable();
833
78
        Builder.positionAtEnd(
834
78
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call.end"));
835
78
        break;
836
209
      case OpCode::Return_call_indirect:
837
209
        updateInstrCount();
838
209
        updateGas();
839
209
        compileReturnIndirectCallOp(Instr.getSourceIndex(),
840
209
                                    Instr.getTargetIndex());
841
209
        setUnreachable();
842
209
        Builder.positionAtEnd(
843
209
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_indir.end"));
844
209
        break;
845
15
      case OpCode::Call_ref:
846
15
        updateInstrCount();
847
15
        updateGas();
848
15
        compileCallRefOp(Instr.getTargetIndex());
849
15
        break;
850
32
      case OpCode::Return_call_ref:
851
32
        updateInstrCount();
852
32
        updateGas();
853
32
        compileReturnCallRefOp(Instr.getTargetIndex());
854
32
        setUnreachable();
855
32
        Builder.positionAtEnd(
856
32
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_ref.end"));
857
32
        break;
858
0
      case OpCode::Try_table:
859
        // TODO: EXCEPTION - implement the AOT.
860
0
        return Unexpect(ErrCode::Value::AOTNotImpl);
861
862
      // Reference Instructions
863
2.82k
      case OpCode::Ref__null: {
864
2.82k
        std::array<uint8_t, 16> Buf = {0};
865
        // For null references, the dynamic type down scaling is needed.
866
2.82k
        ValType VType;
867
2.82k
        if (Instr.getValType().isAbsHeapType()) {
868
2.74k
          switch (Instr.getValType().getHeapTypeCode()) {
869
8
          case TypeCode::NullFuncRef:
870
1.06k
          case TypeCode::FuncRef:
871
1.06k
            VType = TypeCode::NullFuncRef;
872
1.06k
            break;
873
10
          case TypeCode::NullExternRef:
874
1.19k
          case TypeCode::ExternRef:
875
1.19k
            VType = TypeCode::NullExternRef;
876
1.19k
            break;
877
25
          case TypeCode::NullExnRef:
878
28
          case TypeCode::ExnRef:
879
28
            VType = TypeCode::NullExnRef;
880
28
            break;
881
335
          case TypeCode::NullRef:
882
348
          case TypeCode::AnyRef:
883
361
          case TypeCode::EqRef:
884
424
          case TypeCode::I31Ref:
885
426
          case TypeCode::StructRef:
886
450
          case TypeCode::ArrayRef:
887
450
            VType = TypeCode::NullRef;
888
450
            break;
889
0
          default:
890
0
            assumingUnreachable();
891
2.74k
          }
892
2.74k
        } else {
893
80
          assuming(Instr.getValType().getTypeIndex() <
894
80
                   Context.CompositeTypes.size());
895
80
          const auto *CompType =
896
80
              Context.CompositeTypes[Instr.getValType().getTypeIndex()];
897
80
          assuming(CompType != nullptr);
898
80
          if (CompType->isFunc()) {
899
53
            VType = TypeCode::NullFuncRef;
900
53
          } else {
901
27
            VType = TypeCode::NullRef;
902
27
          }
903
80
        }
904
2.82k
        std::copy_n(VType.getRawData().cbegin(), 8, Buf.begin());
905
2.82k
        stackPush(Builder.createBitCast(
906
2.82k
            LLVM::Value::getConstVector8(LLContext, Buf), Context.Int64x2Ty));
907
2.82k
        break;
908
2.82k
      }
909
1.17k
      case OpCode::Ref__is_null:
910
1.17k
        stackPush(Builder.createZExt(
911
1.17k
            Builder.createICmpEQ(
912
1.17k
                Builder.createExtractElement(
913
1.17k
                    Builder.createBitCast(stackPop(), Context.Int64x2Ty),
914
1.17k
                    LLContext.getInt64(1)),
915
1.17k
                LLContext.getInt64(0)),
916
1.17k
            Context.Int32Ty));
917
1.17k
        break;
918
27
      case OpCode::Ref__func:
919
27
        stackPush(Builder.createCall(
920
27
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefFunc,
921
27
                                 LLVM::Type::getFunctionType(Context.Int64x2Ty,
922
27
                                                             {Context.Int32Ty},
923
27
                                                             false)),
924
27
            {LLContext.getInt32(Instr.getTargetIndex())}));
925
27
        break;
926
13
      case OpCode::Ref__eq: {
927
13
        LLVM::Value RHS = stackPop();
928
13
        LLVM::Value LHS = stackPop();
929
13
        stackPush(Builder.createZExt(
930
13
            Builder.createICmpEQ(
931
13
                Builder.createExtractElement(LHS, LLContext.getInt64(1)),
932
13
                Builder.createExtractElement(RHS, LLContext.getInt64(1))),
933
13
            Context.Int32Ty));
934
13
        break;
935
2.82k
      }
936
330
      case OpCode::Ref__as_non_null: {
937
330
        auto Next =
938
330
            LLVM::BasicBlock::create(LLContext, F.Fn, "ref_as_non_null.ok");
939
330
        Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
940
330
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
941
330
            Builder.createExtractElement(Stack.back(), LLContext.getInt64(1)),
942
330
            LLContext.getInt64(0)));
943
330
        Builder.createCondBr(IsNotNull, Next,
944
330
                             getTrapBB(ErrCode::Value::CastNullToNonNull));
945
330
        Builder.positionAtEnd(Next);
946
330
        break;
947
2.82k
      }
948
949
      // Reference Instructions (GC proposal)
950
34
      case OpCode::Struct__new:
951
83
      case OpCode::Struct__new_default: {
952
83
        LLVM::Value Args = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
953
83
        assuming(Instr.getTargetIndex() < Context.CompositeTypes.size());
954
83
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
955
83
        assuming(CompType != nullptr && !CompType->isFunc());
956
83
        auto ArgSize = CompType->getFieldTypes().size();
957
83
        if (Instr.getOpCode() == OpCode::Struct__new) {
958
34
          std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
959
34
          for (size_t I = 0; I < ArgSize; ++I) {
960
0
            ArgsVec[ArgSize - I - 1] = stackPop();
961
0
          }
962
34
          Args = Builder.createArray(ArgSize, kValSize);
963
34
          Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
964
49
        } else {
965
49
          ArgSize = 0;
966
49
        }
967
83
        stackPush(Builder.createCall(
968
83
            Context.getIntrinsic(
969
83
                Builder, Executable::Intrinsics::kStructNew,
970
83
                LLVM::Type::getFunctionType(
971
83
                    Context.Int64x2Ty,
972
83
                    {Context.Int32Ty, Context.Int8PtrTy, Context.Int32Ty},
973
83
                    false)),
974
83
            {LLContext.getInt32(Instr.getTargetIndex()), Args,
975
83
             LLContext.getInt32(static_cast<uint32_t>(ArgSize))}));
976
83
        break;
977
83
      }
978
0
      case OpCode::Struct__get:
979
0
      case OpCode::Struct__get_u:
980
0
      case OpCode::Struct__get_s: {
981
0
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
982
0
                 Context.CompositeTypes.size());
983
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
984
0
        assuming(CompType != nullptr && !CompType->isFunc());
985
0
        assuming(static_cast<size_t>(Instr.getSourceIndex()) <
986
0
                 CompType->getFieldTypes().size());
987
0
        const auto &StorageType =
988
0
            CompType->getFieldTypes()[Instr.getSourceIndex()].getStorageType();
989
0
        auto Ref = stackPop();
990
0
        auto IsSigned = (Instr.getOpCode() == OpCode::Struct__get_s)
991
0
                            ? LLContext.getInt8(1)
992
0
                            : LLContext.getInt8(0);
993
0
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
994
0
        Builder.createCall(
995
0
            Context.getIntrinsic(
996
0
                Builder, Executable::Intrinsics::kStructGet,
997
0
                LLVM::Type::getFunctionType(Context.VoidTy,
998
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
999
0
                                             Context.Int32Ty, Context.Int8Ty,
1000
0
                                             Context.Int8PtrTy},
1001
0
                                            false)),
1002
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1003
0
             LLContext.getInt32(Instr.getSourceIndex()), IsSigned, Ret});
1004
1005
0
        switch (StorageType.getCode()) {
1006
0
        case TypeCode::I8:
1007
0
        case TypeCode::I16:
1008
0
        case TypeCode::I32: {
1009
0
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1010
0
                                               Context.Int64x2Ty));
1011
0
          break;
1012
0
        }
1013
0
        case TypeCode::I64: {
1014
0
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1015
0
                                               Context.Int64x2Ty));
1016
0
          break;
1017
0
        }
1018
0
        case TypeCode::F32: {
1019
0
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1020
0
                                               Context.Int64x2Ty));
1021
0
          break;
1022
0
        }
1023
0
        case TypeCode::F64: {
1024
0
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1025
0
                                               Context.Int64x2Ty));
1026
0
          break;
1027
0
        }
1028
0
        case TypeCode::V128:
1029
0
        case TypeCode::Ref:
1030
0
        case TypeCode::RefNull: {
1031
0
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1032
0
                                               Context.Int64x2Ty));
1033
0
          break;
1034
0
        }
1035
0
        default:
1036
0
          assumingUnreachable();
1037
0
        }
1038
0
        break;
1039
0
      }
1040
0
      case OpCode::Struct__set: {
1041
0
        auto Val = stackPop();
1042
0
        auto Ref = stackPop();
1043
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1044
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1045
0
        Builder.createCall(
1046
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kStructSet,
1047
0
                                 LLVM::Type::getFunctionType(
1048
0
                                     Context.VoidTy,
1049
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1050
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1051
0
                                     false)),
1052
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1053
0
             LLContext.getInt32(Instr.getSourceIndex()), Arg});
1054
0
        break;
1055
0
      }
1056
66
      case OpCode::Array__new: {
1057
66
        auto Length = stackPop();
1058
66
        auto Val = stackPop();
1059
66
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1060
66
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1061
66
        stackPush(Builder.createCall(
1062
66
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1063
66
                                 LLVM::Type::getFunctionType(
1064
66
                                     Context.Int64x2Ty,
1065
66
                                     {Context.Int32Ty, Context.Int32Ty,
1066
66
                                      Context.Int8PtrTy, Context.Int32Ty},
1067
66
                                     false)),
1068
66
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1069
66
             LLContext.getInt32(1)}));
1070
66
        break;
1071
0
      }
1072
66
      case OpCode::Array__new_default: {
1073
66
        auto Length = stackPop();
1074
66
        LLVM::Value Arg = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
1075
66
        stackPush(Builder.createCall(
1076
66
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1077
66
                                 LLVM::Type::getFunctionType(
1078
66
                                     Context.Int64x2Ty,
1079
66
                                     {Context.Int32Ty, Context.Int32Ty,
1080
66
                                      Context.Int8PtrTy, Context.Int32Ty},
1081
66
                                     false)),
1082
66
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1083
66
             LLContext.getInt32(0)}));
1084
66
        break;
1085
0
      }
1086
63
      case OpCode::Array__new_fixed: {
1087
63
        const auto ArgSize = Instr.getSourceIndex();
1088
63
        std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
1089
211
        for (size_t I = 0; I < ArgSize; ++I) {
1090
148
          ArgsVec[ArgSize - I - 1] = stackPop();
1091
148
        }
1092
63
        LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
1093
63
        Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
1094
63
        stackPush(Builder.createCall(
1095
63
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1096
63
                                 LLVM::Type::getFunctionType(
1097
63
                                     Context.Int64x2Ty,
1098
63
                                     {Context.Int32Ty, Context.Int32Ty,
1099
63
                                      Context.Int8PtrTy, Context.Int32Ty},
1100
63
                                     false)),
1101
63
            {LLContext.getInt32(Instr.getTargetIndex()),
1102
63
             LLContext.getInt32(ArgSize), Args, LLContext.getInt32(ArgSize)}));
1103
63
        break;
1104
0
      }
1105
0
      case OpCode::Array__new_data:
1106
0
      case OpCode::Array__new_elem: {
1107
0
        auto Length = stackPop();
1108
0
        auto Start = stackPop();
1109
0
        stackPush(Builder.createCall(
1110
0
            Context.getIntrinsic(
1111
0
                Builder,
1112
0
                ((Instr.getOpCode() == OpCode::Array__new_data)
1113
0
                     ? Executable::Intrinsics::kArrayNewData
1114
0
                     : Executable::Intrinsics::kArrayNewElem),
1115
0
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1116
0
                                            {Context.Int32Ty, Context.Int32Ty,
1117
0
                                             Context.Int32Ty, Context.Int32Ty},
1118
0
                                            false)),
1119
0
            {LLContext.getInt32(Instr.getTargetIndex()),
1120
0
             LLContext.getInt32(Instr.getSourceIndex()), Start, Length}));
1121
0
        break;
1122
0
      }
1123
131
      case OpCode::Array__get:
1124
151
      case OpCode::Array__get_u:
1125
186
      case OpCode::Array__get_s: {
1126
186
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
1127
186
                 Context.CompositeTypes.size());
1128
186
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
1129
186
        assuming(CompType != nullptr && !CompType->isFunc());
1130
186
        assuming(static_cast<size_t>(1) == CompType->getFieldTypes().size());
1131
186
        const auto &StorageType = CompType->getFieldTypes()[0].getStorageType();
1132
186
        auto Idx = stackPop();
1133
186
        auto Ref = stackPop();
1134
186
        auto IsSigned = (Instr.getOpCode() == OpCode::Array__get_s)
1135
186
                            ? LLContext.getInt8(1)
1136
186
                            : LLContext.getInt8(0);
1137
186
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
1138
186
        Builder.createCall(
1139
186
            Context.getIntrinsic(
1140
186
                Builder, Executable::Intrinsics::kArrayGet,
1141
186
                LLVM::Type::getFunctionType(Context.VoidTy,
1142
186
                                            {Context.Int64x2Ty, Context.Int32Ty,
1143
186
                                             Context.Int32Ty, Context.Int8Ty,
1144
186
                                             Context.Int8PtrTy},
1145
186
                                            false)),
1146
186
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, IsSigned,
1147
186
             Ret});
1148
1149
186
        switch (StorageType.getCode()) {
1150
22
        case TypeCode::I8:
1151
55
        case TypeCode::I16:
1152
77
        case TypeCode::I32: {
1153
77
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1154
77
                                               Context.Int64x2Ty));
1155
77
          break;
1156
55
        }
1157
18
        case TypeCode::I64: {
1158
18
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1159
18
                                               Context.Int64x2Ty));
1160
18
          break;
1161
55
        }
1162
18
        case TypeCode::F32: {
1163
18
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1164
18
                                               Context.Int64x2Ty));
1165
18
          break;
1166
55
        }
1167
19
        case TypeCode::F64: {
1168
19
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1169
19
                                               Context.Int64x2Ty));
1170
19
          break;
1171
55
        }
1172
19
        case TypeCode::V128:
1173
19
        case TypeCode::Ref:
1174
54
        case TypeCode::RefNull: {
1175
54
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1176
54
                                               Context.Int64x2Ty));
1177
54
          break;
1178
19
        }
1179
0
        default:
1180
0
          assumingUnreachable();
1181
186
        }
1182
186
        break;
1183
186
      }
1184
186
      case OpCode::Array__set: {
1185
0
        auto Val = stackPop();
1186
0
        auto Idx = stackPop();
1187
0
        auto Ref = stackPop();
1188
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1189
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1190
0
        Builder.createCall(
1191
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArraySet,
1192
0
                                 LLVM::Type::getFunctionType(
1193
0
                                     Context.VoidTy,
1194
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1195
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1196
0
                                     false)),
1197
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, Arg});
1198
0
        break;
1199
186
      }
1200
49
      case OpCode::Array__len: {
1201
49
        auto Ref = stackPop();
1202
49
        stackPush(Builder.createCall(
1203
49
            Context.getIntrinsic(
1204
49
                Builder, Executable::Intrinsics::kArrayLen,
1205
49
                LLVM::Type::getFunctionType(Context.Int32Ty,
1206
49
                                            {Context.Int64x2Ty}, false)),
1207
49
            {Ref}));
1208
49
        break;
1209
186
      }
1210
10
      case OpCode::Array__fill: {
1211
10
        auto Cnt = stackPop();
1212
10
        auto Val = stackPop();
1213
10
        auto Off = stackPop();
1214
10
        auto Ref = stackPop();
1215
10
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1216
10
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1217
10
        Builder.createCall(
1218
10
            Context.getIntrinsic(
1219
10
                Builder, Executable::Intrinsics::kArrayFill,
1220
10
                LLVM::Type::getFunctionType(Context.VoidTy,
1221
10
                                            {Context.Int64x2Ty, Context.Int32Ty,
1222
10
                                             Context.Int32Ty, Context.Int32Ty,
1223
10
                                             Context.Int8PtrTy},
1224
10
                                            false)),
1225
10
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Off, Cnt, Arg});
1226
10
        break;
1227
186
      }
1228
0
      case OpCode::Array__copy: {
1229
0
        auto Cnt = stackPop();
1230
0
        auto SrcOff = stackPop();
1231
0
        auto SrcRef = stackPop();
1232
0
        auto DstOff = stackPop();
1233
0
        auto DstRef = stackPop();
1234
0
        Builder.createCall(
1235
0
            Context.getIntrinsic(
1236
0
                Builder, Executable::Intrinsics::kArrayCopy,
1237
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1238
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1239
0
                                             Context.Int32Ty, Context.Int64x2Ty,
1240
0
                                             Context.Int32Ty, Context.Int32Ty,
1241
0
                                             Context.Int32Ty},
1242
0
                                            false)),
1243
0
            {DstRef, LLContext.getInt32(Instr.getTargetIndex()), DstOff, SrcRef,
1244
0
             LLContext.getInt32(Instr.getSourceIndex()), SrcOff, Cnt});
1245
0
        break;
1246
186
      }
1247
0
      case OpCode::Array__init_data:
1248
0
      case OpCode::Array__init_elem: {
1249
0
        auto Cnt = stackPop();
1250
0
        auto SrcOff = stackPop();
1251
0
        auto DstOff = stackPop();
1252
0
        auto Ref = stackPop();
1253
0
        Builder.createCall(
1254
0
            Context.getIntrinsic(
1255
0
                Builder,
1256
0
                ((Instr.getOpCode() == OpCode::Array__init_data)
1257
0
                     ? Executable::Intrinsics::kArrayInitData
1258
0
                     : Executable::Intrinsics::kArrayInitElem),
1259
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1260
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1261
0
                                             Context.Int32Ty, Context.Int32Ty,
1262
0
                                             Context.Int32Ty, Context.Int32Ty},
1263
0
                                            false)),
1264
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1265
0
             LLContext.getInt32(Instr.getSourceIndex()), DstOff, SrcOff, Cnt});
1266
0
        break;
1267
0
      }
1268
10
      case OpCode::Ref__test:
1269
22
      case OpCode::Ref__test_null: {
1270
22
        auto Ref = stackPop();
1271
22
        std::array<uint8_t, 16> Buf = {0};
1272
22
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1273
22
        auto VType = Builder.createExtractElement(
1274
22
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1275
22
                                  Context.Int64x2Ty),
1276
22
            LLContext.getInt64(0));
1277
22
        stackPush(Builder.createCall(
1278
22
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
1279
22
                                 LLVM::Type::getFunctionType(
1280
22
                                     Context.Int32Ty,
1281
22
                                     {Context.Int64x2Ty, Context.Int64Ty},
1282
22
                                     false)),
1283
22
            {Ref, VType}));
1284
22
        break;
1285
10
      }
1286
22
      case OpCode::Ref__cast:
1287
40
      case OpCode::Ref__cast_null: {
1288
40
        auto Ref = stackPop();
1289
40
        std::array<uint8_t, 16> Buf = {0};
1290
40
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1291
40
        auto VType = Builder.createExtractElement(
1292
40
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1293
40
                                  Context.Int64x2Ty),
1294
40
            LLContext.getInt64(0));
1295
40
        stackPush(Builder.createCall(
1296
40
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefCast,
1297
40
                                 LLVM::Type::getFunctionType(
1298
40
                                     Context.Int64x2Ty,
1299
40
                                     {Context.Int64x2Ty, Context.Int64Ty},
1300
40
                                     false)),
1301
40
            {Ref, VType}));
1302
40
        break;
1303
22
      }
1304
1
      case OpCode::Any__convert_extern: {
1305
1
        std::array<uint8_t, 16> RawRef = {0};
1306
1
        auto Ref = stackPop();
1307
1
        auto PtrVal = Builder.createExtractElement(Ref, LLContext.getInt64(1));
1308
1
        auto IsNullBB =
1309
1
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.null");
1310
1
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1311
1
                                                  "any_conv_extern.not_null");
1312
1
        auto IsExtrefBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1313
1
                                                   "any_conv_extern.is_extref");
1314
1
        auto EndBB =
1315
1
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.end");
1316
1
        auto CondIsNull = Builder.createICmpEQ(PtrVal, LLContext.getInt64(0));
1317
1
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1318
1319
1
        Builder.positionAtEnd(IsNullBB);
1320
1
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullRef);
1321
1
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1322
1
        auto Ret1 = Builder.createBitCast(
1323
1
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1324
1
        Builder.createBr(EndBB);
1325
1326
1
        Builder.positionAtEnd(NotNullBB);
1327
1
        auto Ret2 = Builder.createBitCast(
1328
1
            Builder.createInsertElement(
1329
1
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1330
1
                LLContext.getInt8(0), LLContext.getInt64(1)),
1331
1
            Context.Int64x2Ty);
1332
1
        auto HType = Builder.createExtractElement(
1333
1
            Builder.createBitCast(Ret2, Context.Int8x16Ty),
1334
1
            LLContext.getInt64(3));
1335
1
        auto CondIsExtref = Builder.createOr(
1336
1
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1337
1
                                            TypeCode::ExternRef))),
1338
1
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1339
1
                                            TypeCode::NullExternRef))));
1340
1
        Builder.createCondBr(CondIsExtref, IsExtrefBB, EndBB);
1341
1342
1
        Builder.positionAtEnd(IsExtrefBB);
1343
1
        VT = ValType(TypeCode::Ref, TypeCode::AnyRef);
1344
1
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1345
1
        auto Ret3 = Builder.createInsertElement(
1346
1
            Builder.createBitCast(
1347
1
                LLVM::Value::getConstVector8(LLContext, RawRef),
1348
1
                Context.Int64x2Ty),
1349
1
            PtrVal, LLContext.getInt64(1));
1350
1
        Builder.createBr(EndBB);
1351
1352
1
        Builder.positionAtEnd(EndBB);
1353
1
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1354
1
        Ret.addIncoming(Ret1, IsNullBB);
1355
1
        Ret.addIncoming(Ret2, NotNullBB);
1356
1
        Ret.addIncoming(Ret3, IsExtrefBB);
1357
1
        stackPush(Ret);
1358
1
        break;
1359
22
      }
1360
48
      case OpCode::Extern__convert_any: {
1361
48
        std::array<uint8_t, 16> RawRef = {0};
1362
48
        auto Ref = stackPop();
1363
48
        auto IsNullBB =
1364
48
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.null");
1365
48
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1366
48
                                                  "extern_conv_any.not_null");
1367
48
        auto EndBB =
1368
48
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.end");
1369
48
        auto CondIsNull = Builder.createICmpEQ(
1370
48
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1371
48
            LLContext.getInt64(0));
1372
48
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1373
1374
48
        Builder.positionAtEnd(IsNullBB);
1375
48
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullExternRef);
1376
48
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1377
48
        auto Ret1 = Builder.createBitCast(
1378
48
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1379
48
        Builder.createBr(EndBB);
1380
1381
48
        Builder.positionAtEnd(NotNullBB);
1382
48
        auto Ret2 = Builder.createBitCast(
1383
48
            Builder.createInsertElement(
1384
48
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1385
48
                LLContext.getInt8(1), LLContext.getInt64(1)),
1386
48
            Context.Int64x2Ty);
1387
48
        Builder.createBr(EndBB);
1388
1389
48
        Builder.positionAtEnd(EndBB);
1390
48
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1391
48
        Ret.addIncoming(Ret1, IsNullBB);
1392
48
        Ret.addIncoming(Ret2, NotNullBB);
1393
48
        stackPush(Ret);
1394
48
        break;
1395
22
      }
1396
86
      case OpCode::Ref__i31: {
1397
86
        std::array<uint8_t, 16> RawRef = {0};
1398
86
        auto VT = ValType(TypeCode::Ref, TypeCode::I31Ref);
1399
86
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1400
86
        auto Ref = Builder.createBitCast(
1401
86
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1402
86
        auto Val = Builder.createZExt(
1403
86
            Builder.createOr(
1404
86
                Builder.createAnd(stackPop(), LLContext.getInt32(0x7FFFFFFFU)),
1405
86
                LLContext.getInt32(0x80000000U)),
1406
86
            Context.Int64Ty);
1407
86
        stackPush(Builder.createInsertElement(Ref, Val, LLContext.getInt64(1)));
1408
86
        break;
1409
22
      }
1410
26
      case OpCode::I31__get_s: {
1411
26
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1412
26
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1413
26
        auto Val = Builder.createTrunc(
1414
26
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1415
26
            Context.Int32Ty);
1416
26
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1417
26
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1418
26
            LLContext.getInt32(0)));
1419
26
        Builder.createCondBr(IsNotNull, Next,
1420
26
                             getTrapBB(ErrCode::Value::AccessNullI31));
1421
26
        Builder.positionAtEnd(Next);
1422
26
        Val = Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU));
1423
26
        stackPush(Builder.createOr(
1424
26
            Val, Builder.createShl(
1425
26
                     Builder.createAnd(Val, LLContext.getInt32(0x40000000U)),
1426
26
                     LLContext.getInt32(1))));
1427
26
        break;
1428
22
      }
1429
23
      case OpCode::I31__get_u: {
1430
23
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1431
23
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1432
23
        auto Val = Builder.createTrunc(
1433
23
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1434
23
            Context.Int32Ty);
1435
23
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1436
23
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1437
23
            LLContext.getInt32(0)));
1438
23
        Builder.createCondBr(IsNotNull, Next,
1439
23
                             getTrapBB(ErrCode::Value::AccessNullI31));
1440
23
        Builder.positionAtEnd(Next);
1441
23
        stackPush(Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU)));
1442
23
        break;
1443
22
      }
1444
1445
      // Parametric Instructions
1446
3.87k
      case OpCode::Drop:
1447
3.87k
        stackPop();
1448
3.87k
        break;
1449
764
      case OpCode::Select:
1450
1.19k
      case OpCode::Select_t: {
1451
1.19k
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
1452
1.19k
        auto False = stackPop();
1453
1.19k
        auto True = stackPop();
1454
1.19k
        stackPush(Builder.createSelect(Cond, True, False));
1455
1.19k
        break;
1456
764
      }
1457
1458
      // Variable Instructions
1459
11.4k
      case OpCode::Local__get: {
1460
11.4k
        const auto &L = Local[Instr.getTargetIndex()];
1461
11.4k
        stackPush(Builder.createLoad(L.first, L.second));
1462
11.4k
        break;
1463
764
      }
1464
3.91k
      case OpCode::Local__set:
1465
3.91k
        Builder.createStore(stackPop(), Local[Instr.getTargetIndex()].second);
1466
3.91k
        break;
1467
847
      case OpCode::Local__tee:
1468
847
        Builder.createStore(Stack.back(), Local[Instr.getTargetIndex()].second);
1469
847
        break;
1470
347
      case OpCode::Global__get: {
1471
347
        const auto G =
1472
347
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex());
1473
347
        stackPush(Builder.createLoad(G.first, G.second));
1474
347
        break;
1475
764
      }
1476
67
      case OpCode::Global__set:
1477
67
        Builder.createStore(
1478
67
            stackPop(),
1479
67
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex()).second);
1480
67
        break;
1481
1482
      // Table Instructions
1483
28
      case OpCode::Table__get: {
1484
28
        auto Idx = stackPop();
1485
28
        stackPush(Builder.createCall(
1486
28
            Context.getIntrinsic(
1487
28
                Builder, Executable::Intrinsics::kTableGet,
1488
28
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1489
28
                                            {Context.Int32Ty, Context.Int32Ty},
1490
28
                                            false)),
1491
28
            {LLContext.getInt32(Instr.getTargetIndex()), Idx}));
1492
28
        break;
1493
764
      }
1494
23
      case OpCode::Table__set: {
1495
23
        auto Ref = stackPop();
1496
23
        auto Idx = stackPop();
1497
23
        Builder.createCall(
1498
23
            Context.getIntrinsic(
1499
23
                Builder, Executable::Intrinsics::kTableSet,
1500
23
                LLVM::Type::getFunctionType(
1501
23
                    Context.Int64Ty,
1502
23
                    {Context.Int32Ty, Context.Int32Ty, Context.Int64x2Ty},
1503
23
                    false)),
1504
23
            {LLContext.getInt32(Instr.getTargetIndex()), Idx, Ref});
1505
23
        break;
1506
764
      }
1507
26
      case OpCode::Table__init: {
1508
26
        auto Len = stackPop();
1509
26
        auto Src = stackPop();
1510
26
        auto Dst = stackPop();
1511
26
        Builder.createCall(
1512
26
            Context.getIntrinsic(
1513
26
                Builder, Executable::Intrinsics::kTableInit,
1514
26
                LLVM::Type::getFunctionType(Context.VoidTy,
1515
26
                                            {Context.Int32Ty, Context.Int32Ty,
1516
26
                                             Context.Int32Ty, Context.Int32Ty,
1517
26
                                             Context.Int32Ty},
1518
26
                                            false)),
1519
26
            {LLContext.getInt32(Instr.getTargetIndex()),
1520
26
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1521
26
        break;
1522
764
      }
1523
33
      case OpCode::Elem__drop: {
1524
33
        Builder.createCall(
1525
33
            Context.getIntrinsic(Builder, Executable::Intrinsics::kElemDrop,
1526
33
                                 LLVM::Type::getFunctionType(
1527
33
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1528
33
            {LLContext.getInt32(Instr.getTargetIndex())});
1529
33
        break;
1530
764
      }
1531
16
      case OpCode::Table__copy: {
1532
16
        auto Len = stackPop();
1533
16
        auto Src = stackPop();
1534
16
        auto Dst = stackPop();
1535
16
        Builder.createCall(
1536
16
            Context.getIntrinsic(
1537
16
                Builder, Executable::Intrinsics::kTableCopy,
1538
16
                LLVM::Type::getFunctionType(Context.VoidTy,
1539
16
                                            {Context.Int32Ty, Context.Int32Ty,
1540
16
                                             Context.Int32Ty, Context.Int32Ty,
1541
16
                                             Context.Int32Ty},
1542
16
                                            false)),
1543
16
            {LLContext.getInt32(Instr.getTargetIndex()),
1544
16
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1545
16
        break;
1546
764
      }
1547
13
      case OpCode::Table__grow: {
1548
13
        auto NewSize = stackPop();
1549
13
        auto Val = stackPop();
1550
13
        stackPush(Builder.createCall(
1551
13
            Context.getIntrinsic(
1552
13
                Builder, Executable::Intrinsics::kTableGrow,
1553
13
                LLVM::Type::getFunctionType(
1554
13
                    Context.Int32Ty,
1555
13
                    {Context.Int32Ty, Context.Int64x2Ty, Context.Int32Ty},
1556
13
                    false)),
1557
13
            {LLContext.getInt32(Instr.getTargetIndex()), Val, NewSize}));
1558
13
        break;
1559
764
      }
1560
17
      case OpCode::Table__size: {
1561
17
        stackPush(Builder.createCall(
1562
17
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableSize,
1563
17
                                 LLVM::Type::getFunctionType(Context.Int32Ty,
1564
17
                                                             {Context.Int32Ty},
1565
17
                                                             false)),
1566
17
            {LLContext.getInt32(Instr.getTargetIndex())}));
1567
17
        break;
1568
764
      }
1569
3
      case OpCode::Table__fill: {
1570
3
        auto Len = stackPop();
1571
3
        auto Val = stackPop();
1572
3
        auto Off = stackPop();
1573
3
        Builder.createCall(
1574
3
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableFill,
1575
3
                                 LLVM::Type::getFunctionType(
1576
3
                                     Context.Int32Ty,
1577
3
                                     {Context.Int32Ty, Context.Int32Ty,
1578
3
                                      Context.Int64x2Ty, Context.Int32Ty},
1579
3
                                     false)),
1580
3
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1581
3
        break;
1582
764
      }
1583
1584
      // Memory Instructions
1585
1.37k
      case OpCode::I32__load:
1586
1.37k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1587
1.37k
                      Instr.getMemoryAlign(), Context.Int32Ty);
1588
1.37k
        break;
1589
3.24k
      case OpCode::I64__load:
1590
3.24k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1591
3.24k
                      Instr.getMemoryAlign(), Context.Int64Ty);
1592
3.24k
        break;
1593
108
      case OpCode::F32__load:
1594
108
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1595
108
                      Instr.getMemoryAlign(), Context.FloatTy);
1596
108
        break;
1597
238
      case OpCode::F64__load:
1598
238
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1599
238
                      Instr.getMemoryAlign(), Context.DoubleTy);
1600
238
        break;
1601
642
      case OpCode::I32__load8_s:
1602
642
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1603
642
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1604
642
                      true);
1605
642
        break;
1606
182
      case OpCode::I32__load8_u:
1607
182
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1608
182
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1609
182
                      false);
1610
182
        break;
1611
449
      case OpCode::I32__load16_s:
1612
449
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1613
449
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1614
449
                      true);
1615
449
        break;
1616
1.65k
      case OpCode::I32__load16_u:
1617
1.65k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1618
1.65k
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1619
1.65k
                      false);
1620
1.65k
        break;
1621
732
      case OpCode::I64__load8_s:
1622
732
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1623
732
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1624
732
                      true);
1625
732
        break;
1626
469
      case OpCode::I64__load8_u:
1627
469
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1628
469
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1629
469
                      false);
1630
469
        break;
1631
465
      case OpCode::I64__load16_s:
1632
465
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1633
465
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1634
465
                      true);
1635
465
        break;
1636
694
      case OpCode::I64__load16_u:
1637
694
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1638
694
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1639
694
                      false);
1640
694
        break;
1641
456
      case OpCode::I64__load32_s:
1642
456
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1643
456
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1644
456
                      true);
1645
456
        break;
1646
514
      case OpCode::I64__load32_u:
1647
514
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1648
514
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1649
514
                      false);
1650
514
        break;
1651
458
      case OpCode::I32__store:
1652
458
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1653
458
                       Instr.getMemoryAlign(), Context.Int32Ty);
1654
458
        break;
1655
1.40k
      case OpCode::I64__store:
1656
1.40k
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1657
1.40k
                       Instr.getMemoryAlign(), Context.Int64Ty);
1658
1.40k
        break;
1659
83
      case OpCode::F32__store:
1660
83
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1661
83
                       Instr.getMemoryAlign(), Context.FloatTy);
1662
83
        break;
1663
51
      case OpCode::F64__store:
1664
51
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1665
51
                       Instr.getMemoryAlign(), Context.DoubleTy);
1666
51
        break;
1667
321
      case OpCode::I32__store8:
1668
341
      case OpCode::I64__store8:
1669
341
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1670
341
                       Instr.getMemoryAlign(), Context.Int8Ty, true);
1671
341
        break;
1672
223
      case OpCode::I32__store16:
1673
324
      case OpCode::I64__store16:
1674
324
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1675
324
                       Instr.getMemoryAlign(), Context.Int16Ty, true);
1676
324
        break;
1677
35
      case OpCode::I64__store32:
1678
35
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1679
35
                       Instr.getMemoryAlign(), Context.Int32Ty, true);
1680
35
        break;
1681
815
      case OpCode::Memory__size:
1682
815
        stackPush(Builder.createCall(
1683
815
            Context.getIntrinsic(Builder, Executable::Intrinsics::kMemSize,
1684
815
                                 LLVM::Type::getFunctionType(Context.Int32Ty,
1685
815
                                                             {Context.Int32Ty},
1686
815
                                                             false)),
1687
815
            {LLContext.getInt32(Instr.getTargetIndex())}));
1688
815
        break;
1689
693
      case OpCode::Memory__grow: {
1690
693
        auto Diff = stackPop();
1691
693
        stackPush(Builder.createCall(
1692
693
            Context.getIntrinsic(
1693
693
                Builder, Executable::Intrinsics::kMemGrow,
1694
693
                LLVM::Type::getFunctionType(Context.Int32Ty,
1695
693
                                            {Context.Int32Ty, Context.Int32Ty},
1696
693
                                            false)),
1697
693
            {LLContext.getInt32(Instr.getTargetIndex()), Diff}));
1698
693
        break;
1699
223
      }
1700
24
      case OpCode::Memory__init: {
1701
24
        auto Len = stackPop();
1702
24
        auto Src = stackPop();
1703
24
        auto Dst = stackPop();
1704
24
        Builder.createCall(
1705
24
            Context.getIntrinsic(
1706
24
                Builder, Executable::Intrinsics::kMemInit,
1707
24
                LLVM::Type::getFunctionType(Context.VoidTy,
1708
24
                                            {Context.Int32Ty, Context.Int32Ty,
1709
24
                                             Context.Int32Ty, Context.Int32Ty,
1710
24
                                             Context.Int32Ty},
1711
24
                                            false)),
1712
24
            {LLContext.getInt32(Instr.getTargetIndex()),
1713
24
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1714
24
        break;
1715
223
      }
1716
22
      case OpCode::Data__drop: {
1717
22
        Builder.createCall(
1718
22
            Context.getIntrinsic(Builder, Executable::Intrinsics::kDataDrop,
1719
22
                                 LLVM::Type::getFunctionType(
1720
22
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1721
22
            {LLContext.getInt32(Instr.getTargetIndex())});
1722
22
        break;
1723
223
      }
1724
351
      case OpCode::Memory__copy: {
1725
351
        auto Len = stackPop();
1726
351
        auto Src = stackPop();
1727
351
        auto Dst = stackPop();
1728
351
        Builder.createCall(
1729
351
            Context.getIntrinsic(
1730
351
                Builder, Executable::Intrinsics::kMemCopy,
1731
351
                LLVM::Type::getFunctionType(Context.VoidTy,
1732
351
                                            {Context.Int32Ty, Context.Int32Ty,
1733
351
                                             Context.Int32Ty, Context.Int32Ty,
1734
351
                                             Context.Int32Ty},
1735
351
                                            false)),
1736
351
            {LLContext.getInt32(Instr.getTargetIndex()),
1737
351
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1738
351
        break;
1739
223
      }
1740
666
      case OpCode::Memory__fill: {
1741
666
        auto Len = stackPop();
1742
666
        auto Val = Builder.createTrunc(stackPop(), Context.Int8Ty);
1743
666
        auto Off = stackPop();
1744
666
        Builder.createCall(
1745
666
            Context.getIntrinsic(
1746
666
                Builder, Executable::Intrinsics::kMemFill,
1747
666
                LLVM::Type::getFunctionType(Context.VoidTy,
1748
666
                                            {Context.Int32Ty, Context.Int32Ty,
1749
666
                                             Context.Int8Ty, Context.Int32Ty},
1750
666
                                            false)),
1751
666
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1752
666
        break;
1753
223
      }
1754
1755
      // Const Numeric Instructions
1756
608k
      case OpCode::I32__const:
1757
608k
        stackPush(LLContext.getInt32(Instr.getNum().get<uint32_t>()));
1758
608k
        break;
1759
80.1k
      case OpCode::I64__const:
1760
80.1k
        stackPush(LLContext.getInt64(Instr.getNum().get<uint64_t>()));
1761
80.1k
        break;
1762
16.1k
      case OpCode::F32__const:
1763
16.1k
        stackPush(LLContext.getFloat(Instr.getNum().get<float>()));
1764
16.1k
        break;
1765
7.47k
      case OpCode::F64__const:
1766
7.47k
        stackPush(LLContext.getDouble(Instr.getNum().get<double>()));
1767
7.47k
        break;
1768
1769
      // Unary Numeric Instructions
1770
8.14k
      case OpCode::I32__eqz:
1771
8.14k
        stackPush(Builder.createZExt(
1772
8.14k
            Builder.createICmpEQ(stackPop(), LLContext.getInt32(0)),
1773
8.14k
            Context.Int32Ty));
1774
8.14k
        break;
1775
1.35k
      case OpCode::I64__eqz:
1776
1.35k
        stackPush(Builder.createZExt(
1777
1.35k
            Builder.createICmpEQ(stackPop(), LLContext.getInt64(0)),
1778
1.35k
            Context.Int32Ty));
1779
1.35k
        break;
1780
2.75k
      case OpCode::I32__clz:
1781
2.75k
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1782
2.75k
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int32Ty},
1783
2.75k
                                          {stackPop(), LLContext.getFalse()}));
1784
2.75k
        break;
1785
298
      case OpCode::I64__clz:
1786
298
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1787
298
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int64Ty},
1788
298
                                          {stackPop(), LLContext.getFalse()}));
1789
298
        break;
1790
1.83k
      case OpCode::I32__ctz:
1791
1.83k
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1792
1.83k
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int32Ty},
1793
1.83k
                                          {stackPop(), LLContext.getFalse()}));
1794
1.83k
        break;
1795
421
      case OpCode::I64__ctz:
1796
421
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1797
421
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int64Ty},
1798
421
                                          {stackPop(), LLContext.getFalse()}));
1799
421
        break;
1800
15.5k
      case OpCode::I32__popcnt:
1801
17.4k
      case OpCode::I64__popcnt:
1802
17.4k
        assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
1803
17.4k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, stackPop()));
1804
17.4k
        break;
1805
824
      case OpCode::F32__abs:
1806
1.87k
      case OpCode::F64__abs:
1807
1.87k
        assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
1808
1.87k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Fabs, stackPop()));
1809
1.87k
        break;
1810
1.06k
      case OpCode::F32__neg:
1811
1.86k
      case OpCode::F64__neg:
1812
1.86k
        stackPush(Builder.createFNeg(stackPop()));
1813
1.86k
        break;
1814
2.99k
      case OpCode::F32__ceil:
1815
5.33k
      case OpCode::F64__ceil:
1816
5.33k
        assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
1817
5.33k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ceil, stackPop()));
1818
5.33k
        break;
1819
876
      case OpCode::F32__floor:
1820
1.29k
      case OpCode::F64__floor:
1821
1.29k
        assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
1822
1.29k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Floor, stackPop()));
1823
1.29k
        break;
1824
555
      case OpCode::F32__trunc:
1825
877
      case OpCode::F64__trunc:
1826
877
        assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
1827
877
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Trunc, stackPop()));
1828
877
        break;
1829
830
      case OpCode::F32__nearest:
1830
1.23k
      case OpCode::F64__nearest: {
1831
1.23k
        const bool IsFloat = Instr.getOpCode() == OpCode::F32__nearest;
1832
1.23k
        LLVM::Value Value = stackPop();
1833
1834
1.23k
#if LLVM_VERSION_MAJOR >= 12 && !defined(__s390x__)
1835
1.23k
        assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
1836
1.23k
        if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
1837
1.23k
          stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, Value));
1838
1.23k
          break;
1839
1.23k
        }
1840
0
#endif
1841
1842
        // The VectorSize is only used when SSE4_1 or NEON is supported.
1843
0
        [[maybe_unused]] const uint32_t VectorSize = IsFloat ? 4 : 2;
1844
0
#if defined(__x86_64__)
1845
0
        if (Context.SupportSSE4_1) {
1846
0
          auto Zero = LLContext.getInt64(0);
1847
0
          auto VectorTy =
1848
0
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1849
0
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1850
0
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1851
0
          auto ID = IsFloat ? LLVM::Core::X86SSE41RoundSs
1852
0
                            : LLVM::Core::X86SSE41RoundSd;
1853
0
          assuming(ID != LLVM::Core::NotIntrinsic);
1854
0
          Ret = Builder.createIntrinsic(ID, {},
1855
0
                                        {Ret, Ret, LLContext.getInt32(8)});
1856
0
          Ret = Builder.createExtractElement(Ret, Zero);
1857
0
          stackPush(Ret);
1858
0
          break;
1859
0
        }
1860
0
#endif
1861
1862
#if defined(__aarch64__)
1863
        if (Context.SupportNEON &&
1864
            LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
1865
          auto Zero = LLContext.getInt64(0);
1866
          auto VectorTy =
1867
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1868
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1869
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1870
          Ret =
1871
              Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, Ret);
1872
          Ret = Builder.createExtractElement(Ret, Zero);
1873
          stackPush(Ret);
1874
          break;
1875
        }
1876
#endif
1877
1878
        // Fallback case.
1879
        // If the SSE4.1 is not supported on the x86_64 platform or
1880
        // the NEON is not supported on the aarch64 platform,
1881
        // then fallback to this.
1882
0
        assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
1883
0
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, Value));
1884
0
        break;
1885
0
      }
1886
405
      case OpCode::F32__sqrt:
1887
1.61k
      case OpCode::F64__sqrt:
1888
1.61k
        assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
1889
1.61k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, stackPop()));
1890
1.61k
        break;
1891
335
      case OpCode::I32__wrap_i64:
1892
335
        stackPush(Builder.createTrunc(stackPop(), Context.Int32Ty));
1893
335
        break;
1894
1.37k
      case OpCode::I32__trunc_f32_s:
1895
1.37k
        compileSignedTrunc(Context.Int32Ty);
1896
1.37k
        break;
1897
307
      case OpCode::I32__trunc_f64_s:
1898
307
        compileSignedTrunc(Context.Int32Ty);
1899
307
        break;
1900
247
      case OpCode::I32__trunc_f32_u:
1901
247
        compileUnsignedTrunc(Context.Int32Ty);
1902
247
        break;
1903
1.38k
      case OpCode::I32__trunc_f64_u:
1904
1.38k
        compileUnsignedTrunc(Context.Int32Ty);
1905
1.38k
        break;
1906
2.34k
      case OpCode::I64__extend_i32_s:
1907
2.34k
        stackPush(Builder.createSExt(stackPop(), Context.Int64Ty));
1908
2.34k
        break;
1909
428
      case OpCode::I64__extend_i32_u:
1910
428
        stackPush(Builder.createZExt(stackPop(), Context.Int64Ty));
1911
428
        break;
1912
62
      case OpCode::I64__trunc_f32_s:
1913
62
        compileSignedTrunc(Context.Int64Ty);
1914
62
        break;
1915
404
      case OpCode::I64__trunc_f64_s:
1916
404
        compileSignedTrunc(Context.Int64Ty);
1917
404
        break;
1918
1.12k
      case OpCode::I64__trunc_f32_u:
1919
1.12k
        compileUnsignedTrunc(Context.Int64Ty);
1920
1.12k
        break;
1921
1.48k
      case OpCode::I64__trunc_f64_u:
1922
1.48k
        compileUnsignedTrunc(Context.Int64Ty);
1923
1.48k
        break;
1924
2.03k
      case OpCode::F32__convert_i32_s:
1925
2.42k
      case OpCode::F32__convert_i64_s:
1926
2.42k
        stackPush(Builder.createSIToFP(stackPop(), Context.FloatTy));
1927
2.42k
        break;
1928
665
      case OpCode::F32__convert_i32_u:
1929
1.92k
      case OpCode::F32__convert_i64_u:
1930
1.92k
        stackPush(Builder.createUIToFP(stackPop(), Context.FloatTy));
1931
1.92k
        break;
1932
1.60k
      case OpCode::F64__convert_i32_s:
1933
5.99k
      case OpCode::F64__convert_i64_s:
1934
5.99k
        stackPush(Builder.createSIToFP(stackPop(), Context.DoubleTy));
1935
5.99k
        break;
1936
2.03k
      case OpCode::F64__convert_i32_u:
1937
2.22k
      case OpCode::F64__convert_i64_u:
1938
2.22k
        stackPush(Builder.createUIToFP(stackPop(), Context.DoubleTy));
1939
2.22k
        break;
1940
209
      case OpCode::F32__demote_f64:
1941
209
        stackPush(Builder.createFPTrunc(stackPop(), Context.FloatTy));
1942
209
        break;
1943
90
      case OpCode::F64__promote_f32:
1944
90
        stackPush(Builder.createFPExt(stackPop(), Context.DoubleTy));
1945
90
        break;
1946
1.02k
      case OpCode::I32__reinterpret_f32:
1947
1.02k
        stackPush(Builder.createBitCast(stackPop(), Context.Int32Ty));
1948
1.02k
        break;
1949
672
      case OpCode::I64__reinterpret_f64:
1950
672
        stackPush(Builder.createBitCast(stackPop(), Context.Int64Ty));
1951
672
        break;
1952
4.49k
      case OpCode::F32__reinterpret_i32:
1953
4.49k
        stackPush(Builder.createBitCast(stackPop(), Context.FloatTy));
1954
4.49k
        break;
1955
1.23k
      case OpCode::F64__reinterpret_i64:
1956
1.23k
        stackPush(Builder.createBitCast(stackPop(), Context.DoubleTy));
1957
1.23k
        break;
1958
2.31k
      case OpCode::I32__extend8_s:
1959
2.31k
        stackPush(Builder.createSExt(
1960
2.31k
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int32Ty));
1961
2.31k
        break;
1962
3.09k
      case OpCode::I32__extend16_s:
1963
3.09k
        stackPush(Builder.createSExt(
1964
3.09k
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int32Ty));
1965
3.09k
        break;
1966
368
      case OpCode::I64__extend8_s:
1967
368
        stackPush(Builder.createSExt(
1968
368
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int64Ty));
1969
368
        break;
1970
621
      case OpCode::I64__extend16_s:
1971
621
        stackPush(Builder.createSExt(
1972
621
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int64Ty));
1973
621
        break;
1974
746
      case OpCode::I64__extend32_s:
1975
746
        stackPush(Builder.createSExt(
1976
746
            Builder.createTrunc(stackPop(), Context.Int32Ty), Context.Int64Ty));
1977
746
        break;
1978
1979
      // Binary Numeric Instructions
1980
1.18k
      case OpCode::I32__eq:
1981
1.42k
      case OpCode::I64__eq: {
1982
1.42k
        LLVM::Value RHS = stackPop();
1983
1.42k
        LLVM::Value LHS = stackPop();
1984
1.42k
        stackPush(Builder.createZExt(Builder.createICmpEQ(LHS, RHS),
1985
1.42k
                                     Context.Int32Ty));
1986
1.42k
        break;
1987
1.18k
      }
1988
689
      case OpCode::I32__ne:
1989
709
      case OpCode::I64__ne: {
1990
709
        LLVM::Value RHS = stackPop();
1991
709
        LLVM::Value LHS = stackPop();
1992
709
        stackPush(Builder.createZExt(Builder.createICmpNE(LHS, RHS),
1993
709
                                     Context.Int32Ty));
1994
709
        break;
1995
689
      }
1996
4.55k
      case OpCode::I32__lt_s:
1997
5.18k
      case OpCode::I64__lt_s: {
1998
5.18k
        LLVM::Value RHS = stackPop();
1999
5.18k
        LLVM::Value LHS = stackPop();
2000
5.18k
        stackPush(Builder.createZExt(Builder.createICmpSLT(LHS, RHS),
2001
5.18k
                                     Context.Int32Ty));
2002
5.18k
        break;
2003
4.55k
      }
2004
6.55k
      case OpCode::I32__lt_u:
2005
6.89k
      case OpCode::I64__lt_u: {
2006
6.89k
        LLVM::Value RHS = stackPop();
2007
6.89k
        LLVM::Value LHS = stackPop();
2008
6.89k
        stackPush(Builder.createZExt(Builder.createICmpULT(LHS, RHS),
2009
6.89k
                                     Context.Int32Ty));
2010
6.89k
        break;
2011
6.55k
      }
2012
1.18k
      case OpCode::I32__gt_s:
2013
1.69k
      case OpCode::I64__gt_s: {
2014
1.69k
        LLVM::Value RHS = stackPop();
2015
1.69k
        LLVM::Value LHS = stackPop();
2016
1.69k
        stackPush(Builder.createZExt(Builder.createICmpSGT(LHS, RHS),
2017
1.69k
                                     Context.Int32Ty));
2018
1.69k
        break;
2019
1.18k
      }
2020
7.07k
      case OpCode::I32__gt_u:
2021
7.25k
      case OpCode::I64__gt_u: {
2022
7.25k
        LLVM::Value RHS = stackPop();
2023
7.25k
        LLVM::Value LHS = stackPop();
2024
7.25k
        stackPush(Builder.createZExt(Builder.createICmpUGT(LHS, RHS),
2025
7.25k
                                     Context.Int32Ty));
2026
7.25k
        break;
2027
7.07k
      }
2028
2.25k
      case OpCode::I32__le_s:
2029
3.13k
      case OpCode::I64__le_s: {
2030
3.13k
        LLVM::Value RHS = stackPop();
2031
3.13k
        LLVM::Value LHS = stackPop();
2032
3.13k
        stackPush(Builder.createZExt(Builder.createICmpSLE(LHS, RHS),
2033
3.13k
                                     Context.Int32Ty));
2034
3.13k
        break;
2035
2.25k
      }
2036
468
      case OpCode::I32__le_u:
2037
1.79k
      case OpCode::I64__le_u: {
2038
1.79k
        LLVM::Value RHS = stackPop();
2039
1.79k
        LLVM::Value LHS = stackPop();
2040
1.79k
        stackPush(Builder.createZExt(Builder.createICmpULE(LHS, RHS),
2041
1.79k
                                     Context.Int32Ty));
2042
1.79k
        break;
2043
468
      }
2044
1.37k
      case OpCode::I32__ge_s:
2045
1.39k
      case OpCode::I64__ge_s: {
2046
1.39k
        LLVM::Value RHS = stackPop();
2047
1.39k
        LLVM::Value LHS = stackPop();
2048
1.39k
        stackPush(Builder.createZExt(Builder.createICmpSGE(LHS, RHS),
2049
1.39k
                                     Context.Int32Ty));
2050
1.39k
        break;
2051
1.37k
      }
2052
2.00k
      case OpCode::I32__ge_u:
2053
2.72k
      case OpCode::I64__ge_u: {
2054
2.72k
        LLVM::Value RHS = stackPop();
2055
2.72k
        LLVM::Value LHS = stackPop();
2056
2.72k
        stackPush(Builder.createZExt(Builder.createICmpUGE(LHS, RHS),
2057
2.72k
                                     Context.Int32Ty));
2058
2.72k
        break;
2059
2.00k
      }
2060
167
      case OpCode::F32__eq:
2061
227
      case OpCode::F64__eq: {
2062
227
        LLVM::Value RHS = stackPop();
2063
227
        LLVM::Value LHS = stackPop();
2064
227
        stackPush(Builder.createZExt(Builder.createFCmpOEQ(LHS, RHS),
2065
227
                                     Context.Int32Ty));
2066
227
        break;
2067
167
      }
2068
102
      case OpCode::F32__ne:
2069
129
      case OpCode::F64__ne: {
2070
129
        LLVM::Value RHS = stackPop();
2071
129
        LLVM::Value LHS = stackPop();
2072
129
        stackPush(Builder.createZExt(Builder.createFCmpUNE(LHS, RHS),
2073
129
                                     Context.Int32Ty));
2074
129
        break;
2075
102
      }
2076
182
      case OpCode::F32__lt:
2077
305
      case OpCode::F64__lt: {
2078
305
        LLVM::Value RHS = stackPop();
2079
305
        LLVM::Value LHS = stackPop();
2080
305
        stackPush(Builder.createZExt(Builder.createFCmpOLT(LHS, RHS),
2081
305
                                     Context.Int32Ty));
2082
305
        break;
2083
182
      }
2084
154
      case OpCode::F32__gt:
2085
209
      case OpCode::F64__gt: {
2086
209
        LLVM::Value RHS = stackPop();
2087
209
        LLVM::Value LHS = stackPop();
2088
209
        stackPush(Builder.createZExt(Builder.createFCmpOGT(LHS, RHS),
2089
209
                                     Context.Int32Ty));
2090
209
        break;
2091
154
      }
2092
83
      case OpCode::F32__le:
2093
188
      case OpCode::F64__le: {
2094
188
        LLVM::Value RHS = stackPop();
2095
188
        LLVM::Value LHS = stackPop();
2096
188
        stackPush(Builder.createZExt(Builder.createFCmpOLE(LHS, RHS),
2097
188
                                     Context.Int32Ty));
2098
188
        break;
2099
83
      }
2100
281
      case OpCode::F32__ge:
2101
308
      case OpCode::F64__ge: {
2102
308
        LLVM::Value RHS = stackPop();
2103
308
        LLVM::Value LHS = stackPop();
2104
308
        stackPush(Builder.createZExt(Builder.createFCmpOGE(LHS, RHS),
2105
308
                                     Context.Int32Ty));
2106
308
        break;
2107
281
      }
2108
804
      case OpCode::I32__add:
2109
1.28k
      case OpCode::I64__add: {
2110
1.28k
        LLVM::Value RHS = stackPop();
2111
1.28k
        LLVM::Value LHS = stackPop();
2112
1.28k
        stackPush(Builder.createAdd(LHS, RHS));
2113
1.28k
        break;
2114
804
      }
2115
2.17k
      case OpCode::I32__sub:
2116
2.62k
      case OpCode::I64__sub: {
2117
2.62k
        LLVM::Value RHS = stackPop();
2118
2.62k
        LLVM::Value LHS = stackPop();
2119
2120
2.62k
        stackPush(Builder.createSub(LHS, RHS));
2121
2.62k
        break;
2122
2.17k
      }
2123
646
      case OpCode::I32__mul:
2124
1.35k
      case OpCode::I64__mul: {
2125
1.35k
        LLVM::Value RHS = stackPop();
2126
1.35k
        LLVM::Value LHS = stackPop();
2127
1.35k
        stackPush(Builder.createMul(LHS, RHS));
2128
1.35k
        break;
2129
646
      }
2130
1.22k
      case OpCode::I32__div_s:
2131
1.57k
      case OpCode::I64__div_s: {
2132
1.57k
        LLVM::Value RHS = stackPop();
2133
1.57k
        LLVM::Value LHS = stackPop();
2134
1.57k
        if constexpr (kForceDivCheck) {
2135
1.57k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_s;
2136
1.57k
          LLVM::Value IntZero =
2137
1.57k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2138
1.57k
          LLVM::Value IntMinusOne =
2139
1.57k
              Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2140
1.57k
                   : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2141
1.57k
          LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2142
1.22k
                                          std::numeric_limits<int32_t>::min()))
2143
1.57k
                                    : LLContext.getInt64(static_cast<uint64_t>(
2144
344
                                          std::numeric_limits<int64_t>::min()));
2145
2146
1.57k
          auto NoZeroBB =
2147
1.57k
              LLVM::BasicBlock::create(LLContext, F.Fn, "div.nozero");
2148
1.57k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2149
2150
1.57k
          auto IsNotZero =
2151
1.57k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2152
1.57k
          Builder.createCondBr(IsNotZero, NoZeroBB,
2153
1.57k
                               getTrapBB(ErrCode::Value::DivideByZero));
2154
2155
1.57k
          Builder.positionAtEnd(NoZeroBB);
2156
1.57k
          auto NotOverflow = Builder.createLikely(
2157
1.57k
              Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2158
1.57k
                               Builder.createICmpNE(RHS, IntMinusOne)));
2159
1.57k
          Builder.createCondBr(NotOverflow, OkBB,
2160
1.57k
                               getTrapBB(ErrCode::Value::IntegerOverflow));
2161
2162
1.57k
          Builder.positionAtEnd(OkBB);
2163
1.57k
        }
2164
1.57k
        stackPush(Builder.createSDiv(LHS, RHS));
2165
1.57k
        break;
2166
1.22k
      }
2167
3.47k
      case OpCode::I32__div_u:
2168
3.78k
      case OpCode::I64__div_u: {
2169
3.78k
        LLVM::Value RHS = stackPop();
2170
3.78k
        LLVM::Value LHS = stackPop();
2171
3.78k
        if constexpr (kForceDivCheck) {
2172
3.78k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_u;
2173
3.78k
          LLVM::Value IntZero =
2174
3.78k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2175
3.78k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2176
2177
3.78k
          auto IsNotZero =
2178
3.78k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2179
3.78k
          Builder.createCondBr(IsNotZero, OkBB,
2180
3.78k
                               getTrapBB(ErrCode::Value::DivideByZero));
2181
3.78k
          Builder.positionAtEnd(OkBB);
2182
3.78k
        }
2183
3.78k
        stackPush(Builder.createUDiv(LHS, RHS));
2184
3.78k
        break;
2185
3.47k
      }
2186
1.12k
      case OpCode::I32__rem_s:
2187
1.58k
      case OpCode::I64__rem_s: {
2188
1.58k
        LLVM::Value RHS = stackPop();
2189
1.58k
        LLVM::Value LHS = stackPop();
2190
        // handle INT32_MIN % -1
2191
1.58k
        const bool Is32 = Instr.getOpCode() == OpCode::I32__rem_s;
2192
1.58k
        LLVM::Value IntMinusOne =
2193
1.58k
            Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2194
1.58k
                 : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2195
1.58k
        LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2196
1.12k
                                        std::numeric_limits<int32_t>::min()))
2197
1.58k
                                  : LLContext.getInt64(static_cast<uint64_t>(
2198
457
                                        std::numeric_limits<int64_t>::min()));
2199
1.58k
        LLVM::Value IntZero =
2200
1.58k
            Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2201
2202
1.58k
        auto NoOverflowBB =
2203
1.58k
            LLVM::BasicBlock::create(LLContext, F.Fn, "no.overflow");
2204
1.58k
        auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "end.overflow");
2205
2206
1.58k
        if constexpr (kForceDivCheck) {
2207
1.58k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2208
2209
1.58k
          auto IsNotZero =
2210
1.58k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2211
1.58k
          Builder.createCondBr(IsNotZero, OkBB,
2212
1.58k
                               getTrapBB(ErrCode::Value::DivideByZero));
2213
1.58k
          Builder.positionAtEnd(OkBB);
2214
1.58k
        }
2215
2216
1.58k
        auto CurrBB = Builder.getInsertBlock();
2217
2218
1.58k
        auto NotOverflow = Builder.createLikely(
2219
1.58k
            Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2220
1.58k
                             Builder.createICmpNE(RHS, IntMinusOne)));
2221
1.58k
        Builder.createCondBr(NotOverflow, NoOverflowBB, EndBB);
2222
2223
1.58k
        Builder.positionAtEnd(NoOverflowBB);
2224
1.58k
        auto Ret1 = Builder.createSRem(LHS, RHS);
2225
1.58k
        Builder.createBr(EndBB);
2226
2227
1.58k
        Builder.positionAtEnd(EndBB);
2228
1.58k
        auto Ret = Builder.createPHI(Ret1.getType());
2229
1.58k
        Ret.addIncoming(Ret1, NoOverflowBB);
2230
1.58k
        Ret.addIncoming(IntZero, CurrBB);
2231
2232
1.58k
        stackPush(Ret);
2233
1.58k
        break;
2234
1.12k
      }
2235
1.18k
      case OpCode::I32__rem_u:
2236
1.88k
      case OpCode::I64__rem_u: {
2237
1.88k
        LLVM::Value RHS = stackPop();
2238
1.88k
        LLVM::Value LHS = stackPop();
2239
1.88k
        if constexpr (kForceDivCheck) {
2240
1.88k
          LLVM::Value IntZero = Instr.getOpCode() == OpCode::I32__rem_u
2241
1.88k
                                    ? LLContext.getInt32(0)
2242
1.88k
                                    : LLContext.getInt64(0);
2243
1.88k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2244
2245
1.88k
          auto IsNotZero =
2246
1.88k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2247
1.88k
          Builder.createCondBr(IsNotZero, OkBB,
2248
1.88k
                               getTrapBB(ErrCode::Value::DivideByZero));
2249
1.88k
          Builder.positionAtEnd(OkBB);
2250
1.88k
        }
2251
1.88k
        stackPush(Builder.createURem(LHS, RHS));
2252
1.88k
        break;
2253
1.18k
      }
2254
691
      case OpCode::I32__and:
2255
2.03k
      case OpCode::I64__and: {
2256
2.03k
        LLVM::Value RHS = stackPop();
2257
2.03k
        LLVM::Value LHS = stackPop();
2258
2.03k
        stackPush(Builder.createAnd(LHS, RHS));
2259
2.03k
        break;
2260
691
      }
2261
1.35k
      case OpCode::I32__or:
2262
1.72k
      case OpCode::I64__or: {
2263
1.72k
        LLVM::Value RHS = stackPop();
2264
1.72k
        LLVM::Value LHS = stackPop();
2265
1.72k
        stackPush(Builder.createOr(LHS, RHS));
2266
1.72k
        break;
2267
1.35k
      }
2268
1.53k
      case OpCode::I32__xor:
2269
2.15k
      case OpCode::I64__xor: {
2270
2.15k
        LLVM::Value RHS = stackPop();
2271
2.15k
        LLVM::Value LHS = stackPop();
2272
2.15k
        stackPush(Builder.createXor(LHS, RHS));
2273
2.15k
        break;
2274
1.53k
      }
2275
1.97k
      case OpCode::I32__shl:
2276
2.39k
      case OpCode::I64__shl: {
2277
2.39k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shl
2278
2.39k
                               ? LLContext.getInt32(31)
2279
2.39k
                               : LLContext.getInt64(63);
2280
2.39k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2281
2.39k
        LLVM::Value LHS = stackPop();
2282
2.39k
        stackPush(Builder.createShl(LHS, RHS));
2283
2.39k
        break;
2284
1.97k
      }
2285
1.89k
      case OpCode::I32__shr_s:
2286
2.29k
      case OpCode::I64__shr_s: {
2287
2.29k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_s
2288
2.29k
                               ? LLContext.getInt32(31)
2289
2.29k
                               : LLContext.getInt64(63);
2290
2.29k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2291
2.29k
        LLVM::Value LHS = stackPop();
2292
2.29k
        stackPush(Builder.createAShr(LHS, RHS));
2293
2.29k
        break;
2294
1.89k
      }
2295
4.45k
      case OpCode::I32__shr_u:
2296
4.74k
      case OpCode::I64__shr_u: {
2297
4.74k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_u
2298
4.74k
                               ? LLContext.getInt32(31)
2299
4.74k
                               : LLContext.getInt64(63);
2300
4.74k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2301
4.74k
        LLVM::Value LHS = stackPop();
2302
4.74k
        stackPush(Builder.createLShr(LHS, RHS));
2303
4.74k
        break;
2304
4.45k
      }
2305
2.61k
      case OpCode::I32__rotl: {
2306
2.61k
        LLVM::Value RHS = stackPop();
2307
2.61k
        LLVM::Value LHS = stackPop();
2308
2.61k
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2309
2.61k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int32Ty},
2310
2.61k
                                          {LHS, LHS, RHS}));
2311
2.61k
        break;
2312
2.61k
      }
2313
786
      case OpCode::I32__rotr: {
2314
786
        LLVM::Value RHS = stackPop();
2315
786
        LLVM::Value LHS = stackPop();
2316
786
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2317
786
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int32Ty},
2318
786
                                          {LHS, LHS, RHS}));
2319
786
        break;
2320
786
      }
2321
892
      case OpCode::I64__rotl: {
2322
892
        LLVM::Value RHS = stackPop();
2323
892
        LLVM::Value LHS = stackPop();
2324
892
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2325
892
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int64Ty},
2326
892
                                          {LHS, LHS, RHS}));
2327
892
        break;
2328
892
      }
2329
1.30k
      case OpCode::I64__rotr: {
2330
1.30k
        LLVM::Value RHS = stackPop();
2331
1.30k
        LLVM::Value LHS = stackPop();
2332
1.30k
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2333
1.30k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int64Ty},
2334
1.30k
                                          {LHS, LHS, RHS}));
2335
1.30k
        break;
2336
1.30k
      }
2337
283
      case OpCode::F32__add:
2338
586
      case OpCode::F64__add: {
2339
586
        LLVM::Value RHS = stackPop();
2340
586
        LLVM::Value LHS = stackPop();
2341
586
        stackPush(Builder.createFAdd(LHS, RHS));
2342
586
        break;
2343
283
      }
2344
150
      case OpCode::F32__sub:
2345
468
      case OpCode::F64__sub: {
2346
468
        LLVM::Value RHS = stackPop();
2347
468
        LLVM::Value LHS = stackPop();
2348
468
        stackPush(Builder.createFSub(LHS, RHS));
2349
468
        break;
2350
150
      }
2351
538
      case OpCode::F32__mul:
2352
682
      case OpCode::F64__mul: {
2353
682
        LLVM::Value RHS = stackPop();
2354
682
        LLVM::Value LHS = stackPop();
2355
682
        stackPush(Builder.createFMul(LHS, RHS));
2356
682
        break;
2357
538
      }
2358
226
      case OpCode::F32__div:
2359
565
      case OpCode::F64__div: {
2360
565
        LLVM::Value RHS = stackPop();
2361
565
        LLVM::Value LHS = stackPop();
2362
565
        stackPush(Builder.createFDiv(LHS, RHS));
2363
565
        break;
2364
226
      }
2365
309
      case OpCode::F32__min:
2366
672
      case OpCode::F64__min: {
2367
672
        LLVM::Value RHS = stackPop();
2368
672
        LLVM::Value LHS = stackPop();
2369
672
        auto FpTy = Instr.getOpCode() == OpCode::F32__min ? Context.FloatTy
2370
672
                                                          : Context.DoubleTy;
2371
672
        auto IntTy = Instr.getOpCode() == OpCode::F32__min ? Context.Int32Ty
2372
672
                                                           : Context.Int64Ty;
2373
2374
672
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2375
672
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2376
2377
672
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2378
672
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2379
672
        auto OrInt = Builder.createOr(LHSInt, RHSInt);
2380
672
        auto OrFp = Builder.createBitCast(OrInt, FpTy);
2381
2382
672
        auto AddFp = Builder.createFAdd(LHS, RHS);
2383
2384
672
        assuming(LLVM::Core::MinNum != LLVM::Core::NotIntrinsic);
2385
672
        auto MinFp = Builder.createIntrinsic(LLVM::Core::MinNum,
2386
672
                                             {LHS.getType()}, {LHS, RHS});
2387
2388
672
        auto Ret = Builder.createSelect(
2389
672
            UEQ, Builder.createSelect(UNO, AddFp, OrFp), MinFp);
2390
672
        stackPush(Ret);
2391
672
        break;
2392
672
      }
2393
341
      case OpCode::F32__max:
2394
778
      case OpCode::F64__max: {
2395
778
        LLVM::Value RHS = stackPop();
2396
778
        LLVM::Value LHS = stackPop();
2397
778
        auto FpTy = Instr.getOpCode() == OpCode::F32__max ? Context.FloatTy
2398
778
                                                          : Context.DoubleTy;
2399
778
        auto IntTy = Instr.getOpCode() == OpCode::F32__max ? Context.Int32Ty
2400
778
                                                           : Context.Int64Ty;
2401
2402
778
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2403
778
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2404
2405
778
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2406
778
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2407
778
        auto AndInt = Builder.createAnd(LHSInt, RHSInt);
2408
778
        auto AndFp = Builder.createBitCast(AndInt, FpTy);
2409
2410
778
        auto AddFp = Builder.createFAdd(LHS, RHS);
2411
2412
778
        assuming(LLVM::Core::MaxNum != LLVM::Core::NotIntrinsic);
2413
778
        auto MaxFp = Builder.createIntrinsic(LLVM::Core::MaxNum,
2414
778
                                             {LHS.getType()}, {LHS, RHS});
2415
2416
778
        auto Ret = Builder.createSelect(
2417
778
            UEQ, Builder.createSelect(UNO, AddFp, AndFp), MaxFp);
2418
778
        stackPush(Ret);
2419
778
        break;
2420
778
      }
2421
442
      case OpCode::F32__copysign:
2422
826
      case OpCode::F64__copysign: {
2423
826
        LLVM::Value RHS = stackPop();
2424
826
        LLVM::Value LHS = stackPop();
2425
826
        assuming(LLVM::Core::CopySign != LLVM::Core::NotIntrinsic);
2426
826
        stackPush(Builder.createIntrinsic(LLVM::Core::CopySign, {LHS.getType()},
2427
826
                                          {LHS, RHS}));
2428
826
        break;
2429
826
      }
2430
2431
      // Saturating Truncation Numeric Instructions
2432
219
      case OpCode::I32__trunc_sat_f32_s:
2433
219
        compileSignedTruncSat(Context.Int32Ty);
2434
219
        break;
2435
115
      case OpCode::I32__trunc_sat_f32_u:
2436
115
        compileUnsignedTruncSat(Context.Int32Ty);
2437
115
        break;
2438
527
      case OpCode::I32__trunc_sat_f64_s:
2439
527
        compileSignedTruncSat(Context.Int32Ty);
2440
527
        break;
2441
429
      case OpCode::I32__trunc_sat_f64_u:
2442
429
        compileUnsignedTruncSat(Context.Int32Ty);
2443
429
        break;
2444
436
      case OpCode::I64__trunc_sat_f32_s:
2445
436
        compileSignedTruncSat(Context.Int64Ty);
2446
436
        break;
2447
452
      case OpCode::I64__trunc_sat_f32_u:
2448
452
        compileUnsignedTruncSat(Context.Int64Ty);
2449
452
        break;
2450
290
      case OpCode::I64__trunc_sat_f64_s:
2451
290
        compileSignedTruncSat(Context.Int64Ty);
2452
290
        break;
2453
446
      case OpCode::I64__trunc_sat_f64_u:
2454
446
        compileUnsignedTruncSat(Context.Int64Ty);
2455
446
        break;
2456
2457
      // SIMD Memory Instructions
2458
5.20k
      case OpCode::V128__load:
2459
5.20k
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2460
5.20k
                            Instr.getMemoryAlign(), Context.Int128x1Ty);
2461
5.20k
        break;
2462
216
      case OpCode::V128__load8x8_s:
2463
216
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2464
216
                            Instr.getMemoryAlign(),
2465
216
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2466
216
                            Context.Int16x8Ty, true);
2467
216
        break;
2468
41
      case OpCode::V128__load8x8_u:
2469
41
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2470
41
                            Instr.getMemoryAlign(),
2471
41
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2472
41
                            Context.Int16x8Ty, false);
2473
41
        break;
2474
362
      case OpCode::V128__load16x4_s:
2475
362
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2476
362
                            Instr.getMemoryAlign(),
2477
362
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2478
362
                            Context.Int32x4Ty, true);
2479
362
        break;
2480
537
      case OpCode::V128__load16x4_u:
2481
537
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2482
537
                            Instr.getMemoryAlign(),
2483
537
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2484
537
                            Context.Int32x4Ty, false);
2485
537
        break;
2486
168
      case OpCode::V128__load32x2_s:
2487
168
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2488
168
                            Instr.getMemoryAlign(),
2489
168
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2490
168
                            Context.Int64x2Ty, true);
2491
168
        break;
2492
174
      case OpCode::V128__load32x2_u:
2493
174
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2494
174
                            Instr.getMemoryAlign(),
2495
174
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2496
174
                            Context.Int64x2Ty, false);
2497
174
        break;
2498
69
      case OpCode::V128__load8_splat:
2499
69
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2500
69
                           Instr.getMemoryAlign(), Context.Int8Ty,
2501
69
                           Context.Int8x16Ty);
2502
69
        break;
2503
193
      case OpCode::V128__load16_splat:
2504
193
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2505
193
                           Instr.getMemoryAlign(), Context.Int16Ty,
2506
193
                           Context.Int16x8Ty);
2507
193
        break;
2508
233
      case OpCode::V128__load32_splat:
2509
233
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2510
233
                           Instr.getMemoryAlign(), Context.Int32Ty,
2511
233
                           Context.Int32x4Ty);
2512
233
        break;
2513
184
      case OpCode::V128__load64_splat:
2514
184
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2515
184
                           Instr.getMemoryAlign(), Context.Int64Ty,
2516
184
                           Context.Int64x2Ty);
2517
184
        break;
2518
94
      case OpCode::V128__load32_zero:
2519
94
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2520
94
                            Instr.getMemoryAlign(), Context.Int32Ty,
2521
94
                            Context.Int128Ty, false);
2522
94
        break;
2523
143
      case OpCode::V128__load64_zero:
2524
143
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2525
143
                            Instr.getMemoryAlign(), Context.Int64Ty,
2526
143
                            Context.Int128Ty, false);
2527
143
        break;
2528
255
      case OpCode::V128__store:
2529
255
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2530
255
                       Instr.getMemoryAlign(), Context.Int128x1Ty, false, true);
2531
255
        break;
2532
197
      case OpCode::V128__load8_lane:
2533
197
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2534
197
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2535
197
                          Context.Int8Ty, Context.Int8x16Ty);
2536
197
        break;
2537
159
      case OpCode::V128__load16_lane:
2538
159
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2539
159
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2540
159
                          Context.Int16Ty, Context.Int16x8Ty);
2541
159
        break;
2542
133
      case OpCode::V128__load32_lane:
2543
133
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2544
133
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2545
133
                          Context.Int32Ty, Context.Int32x4Ty);
2546
133
        break;
2547
22
      case OpCode::V128__load64_lane:
2548
22
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2549
22
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2550
22
                          Context.Int64Ty, Context.Int64x2Ty);
2551
22
        break;
2552
135
      case OpCode::V128__store8_lane:
2553
135
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2554
135
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2555
135
                           Context.Int8Ty, Context.Int8x16Ty);
2556
135
        break;
2557
94
      case OpCode::V128__store16_lane:
2558
94
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2559
94
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2560
94
                           Context.Int16Ty, Context.Int16x8Ty);
2561
94
        break;
2562
118
      case OpCode::V128__store32_lane:
2563
118
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2564
118
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2565
118
                           Context.Int32Ty, Context.Int32x4Ty);
2566
118
        break;
2567
23
      case OpCode::V128__store64_lane:
2568
23
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2569
23
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2570
23
                           Context.Int64Ty, Context.Int64x2Ty);
2571
23
        break;
2572
2573
      // SIMD Const Instructions
2574
339
      case OpCode::V128__const: {
2575
339
        const auto Value = Instr.getNum().get<uint64x2_t>();
2576
339
        auto Vector =
2577
339
            LLVM::Value::getConstVector64(LLContext, {Value[0], Value[1]});
2578
339
        stackPush(Builder.createBitCast(Vector, Context.Int64x2Ty));
2579
339
        break;
2580
826
      }
2581
2582
      // SIMD Shuffle Instructions
2583
15
      case OpCode::I8x16__shuffle: {
2584
15
        auto V2 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2585
15
        auto V1 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2586
15
        const auto V3 = Instr.getNum().get<uint128_t>();
2587
15
        std::array<uint8_t, 16> Mask;
2588
255
        for (size_t I = 0; I < 16; ++I) {
2589
240
          auto Num = static_cast<uint8_t>(V3 >> (I * 8));
2590
240
          if constexpr (Endian::native == Endian::little) {
2591
240
            Mask[I] = Num;
2592
          } else {
2593
            Mask[15 - I] = Num < 16 ? 15 - Num : 47 - Num;
2594
          }
2595
240
        }
2596
15
        stackPush(Builder.createBitCast(
2597
15
            Builder.createShuffleVector(
2598
15
                V1, V2, LLVM::Value::getConstVector8(LLContext, Mask)),
2599
15
            Context.Int64x2Ty));
2600
15
        break;
2601
826
      }
2602
2603
      // SIMD Lane Instructions
2604
65
      case OpCode::I8x16__extract_lane_s:
2605
65
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2606
65
                             Context.Int32Ty, true);
2607
65
        break;
2608
28
      case OpCode::I8x16__extract_lane_u:
2609
28
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2610
28
                             Context.Int32Ty, false);
2611
28
        break;
2612
181
      case OpCode::I8x16__replace_lane:
2613
181
        compileReplaceLaneOp(Context.Int8x16Ty, Instr.getMemoryLane());
2614
181
        break;
2615
442
      case OpCode::I16x8__extract_lane_s:
2616
442
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2617
442
                             Context.Int32Ty, true);
2618
442
        break;
2619
458
      case OpCode::I16x8__extract_lane_u:
2620
458
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2621
458
                             Context.Int32Ty, false);
2622
458
        break;
2623
459
      case OpCode::I16x8__replace_lane:
2624
459
        compileReplaceLaneOp(Context.Int16x8Ty, Instr.getMemoryLane());
2625
459
        break;
2626
66
      case OpCode::I32x4__extract_lane:
2627
66
        compileExtractLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2628
66
        break;
2629
240
      case OpCode::I32x4__replace_lane:
2630
240
        compileReplaceLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2631
240
        break;
2632
128
      case OpCode::I64x2__extract_lane:
2633
128
        compileExtractLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2634
128
        break;
2635
14
      case OpCode::I64x2__replace_lane:
2636
14
        compileReplaceLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2637
14
        break;
2638
63
      case OpCode::F32x4__extract_lane:
2639
63
        compileExtractLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2640
63
        break;
2641
23
      case OpCode::F32x4__replace_lane:
2642
23
        compileReplaceLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2643
23
        break;
2644
86
      case OpCode::F64x2__extract_lane:
2645
86
        compileExtractLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2646
86
        break;
2647
7
      case OpCode::F64x2__replace_lane:
2648
7
        compileReplaceLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2649
7
        break;
2650
2651
      // SIMD Numeric Instructions
2652
71
      case OpCode::I8x16__swizzle:
2653
71
        compileVectorSwizzle();
2654
71
        break;
2655
40.5k
      case OpCode::I8x16__splat:
2656
40.5k
        compileSplatOp(Context.Int8x16Ty);
2657
40.5k
        break;
2658
9.43k
      case OpCode::I16x8__splat:
2659
9.43k
        compileSplatOp(Context.Int16x8Ty);
2660
9.43k
        break;
2661
1.35k
      case OpCode::I32x4__splat:
2662
1.35k
        compileSplatOp(Context.Int32x4Ty);
2663
1.35k
        break;
2664
446
      case OpCode::I64x2__splat:
2665
446
        compileSplatOp(Context.Int64x2Ty);
2666
446
        break;
2667
348
      case OpCode::F32x4__splat:
2668
348
        compileSplatOp(Context.Floatx4Ty);
2669
348
        break;
2670
58
      case OpCode::F64x2__splat:
2671
58
        compileSplatOp(Context.Doublex2Ty);
2672
58
        break;
2673
100
      case OpCode::I8x16__eq:
2674
100
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntEQ);
2675
100
        break;
2676
467
      case OpCode::I8x16__ne:
2677
467
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntNE);
2678
467
        break;
2679
63
      case OpCode::I8x16__lt_s:
2680
63
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLT);
2681
63
        break;
2682
79
      case OpCode::I8x16__lt_u:
2683
79
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULT);
2684
79
        break;
2685
151
      case OpCode::I8x16__gt_s:
2686
151
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGT);
2687
151
        break;
2688
222
      case OpCode::I8x16__gt_u:
2689
222
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGT);
2690
222
        break;
2691
139
      case OpCode::I8x16__le_s:
2692
139
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLE);
2693
139
        break;
2694
141
      case OpCode::I8x16__le_u:
2695
141
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULE);
2696
141
        break;
2697
1.05k
      case OpCode::I8x16__ge_s:
2698
1.05k
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGE);
2699
1.05k
        break;
2700
114
      case OpCode::I8x16__ge_u:
2701
114
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGE);
2702
114
        break;
2703
90
      case OpCode::I16x8__eq:
2704
90
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntEQ);
2705
90
        break;
2706
213
      case OpCode::I16x8__ne:
2707
213
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntNE);
2708
213
        break;
2709
64
      case OpCode::I16x8__lt_s:
2710
64
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLT);
2711
64
        break;
2712
241
      case OpCode::I16x8__lt_u:
2713
241
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULT);
2714
241
        break;
2715
285
      case OpCode::I16x8__gt_s:
2716
285
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGT);
2717
285
        break;
2718
150
      case OpCode::I16x8__gt_u:
2719
150
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGT);
2720
150
        break;
2721
107
      case OpCode::I16x8__le_s:
2722
107
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLE);
2723
107
        break;
2724
112
      case OpCode::I16x8__le_u:
2725
112
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULE);
2726
112
        break;
2727
160
      case OpCode::I16x8__ge_s:
2728
160
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGE);
2729
160
        break;
2730
67
      case OpCode::I16x8__ge_u:
2731
67
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGE);
2732
67
        break;
2733
57
      case OpCode::I32x4__eq:
2734
57
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntEQ);
2735
57
        break;
2736
120
      case OpCode::I32x4__ne:
2737
120
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntNE);
2738
120
        break;
2739
31
      case OpCode::I32x4__lt_s:
2740
31
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLT);
2741
31
        break;
2742
128
      case OpCode::I32x4__lt_u:
2743
128
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULT);
2744
128
        break;
2745
124
      case OpCode::I32x4__gt_s:
2746
124
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGT);
2747
124
        break;
2748
218
      case OpCode::I32x4__gt_u:
2749
218
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGT);
2750
218
        break;
2751
281
      case OpCode::I32x4__le_s:
2752
281
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLE);
2753
281
        break;
2754
243
      case OpCode::I32x4__le_u:
2755
243
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULE);
2756
243
        break;
2757
55
      case OpCode::I32x4__ge_s:
2758
55
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGE);
2759
55
        break;
2760
105
      case OpCode::I32x4__ge_u:
2761
105
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGE);
2762
105
        break;
2763
123
      case OpCode::I64x2__eq:
2764
123
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntEQ);
2765
123
        break;
2766
72
      case OpCode::I64x2__ne:
2767
72
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntNE);
2768
72
        break;
2769
46
      case OpCode::I64x2__lt_s:
2770
46
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLT);
2771
46
        break;
2772
130
      case OpCode::I64x2__gt_s:
2773
130
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGT);
2774
130
        break;
2775
36
      case OpCode::I64x2__le_s:
2776
36
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLE);
2777
36
        break;
2778
72
      case OpCode::I64x2__ge_s:
2779
72
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGE);
2780
72
        break;
2781
1.44k
      case OpCode::F32x4__eq:
2782
1.44k
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOEQ,
2783
1.44k
                               Context.Int32x4Ty);
2784
1.44k
        break;
2785
37
      case OpCode::F32x4__ne:
2786
37
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealUNE,
2787
37
                               Context.Int32x4Ty);
2788
37
        break;
2789
828
      case OpCode::F32x4__lt:
2790
828
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLT,
2791
828
                               Context.Int32x4Ty);
2792
828
        break;
2793
86
      case OpCode::F32x4__gt:
2794
86
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGT,
2795
86
                               Context.Int32x4Ty);
2796
86
        break;
2797
357
      case OpCode::F32x4__le:
2798
357
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLE,
2799
357
                               Context.Int32x4Ty);
2800
357
        break;
2801
73
      case OpCode::F32x4__ge:
2802
73
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGE,
2803
73
                               Context.Int32x4Ty);
2804
73
        break;
2805
56
      case OpCode::F64x2__eq:
2806
56
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOEQ,
2807
56
                               Context.Int64x2Ty);
2808
56
        break;
2809
109
      case OpCode::F64x2__ne:
2810
109
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealUNE,
2811
109
                               Context.Int64x2Ty);
2812
109
        break;
2813
180
      case OpCode::F64x2__lt:
2814
180
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLT,
2815
180
                               Context.Int64x2Ty);
2816
180
        break;
2817
58
      case OpCode::F64x2__gt:
2818
58
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGT,
2819
58
                               Context.Int64x2Ty);
2820
58
        break;
2821
189
      case OpCode::F64x2__le:
2822
189
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLE,
2823
189
                               Context.Int64x2Ty);
2824
189
        break;
2825
86
      case OpCode::F64x2__ge:
2826
86
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGE,
2827
86
                               Context.Int64x2Ty);
2828
86
        break;
2829
137
      case OpCode::V128__not:
2830
137
        Stack.back() = Builder.createNot(Stack.back());
2831
137
        break;
2832
75
      case OpCode::V128__and: {
2833
75
        auto RHS = stackPop();
2834
75
        auto LHS = stackPop();
2835
75
        stackPush(Builder.createAnd(LHS, RHS));
2836
75
        break;
2837
826
      }
2838
90
      case OpCode::V128__andnot: {
2839
90
        auto RHS = stackPop();
2840
90
        auto LHS = stackPop();
2841
90
        stackPush(Builder.createAnd(LHS, Builder.createNot(RHS)));
2842
90
        break;
2843
826
      }
2844
123
      case OpCode::V128__or: {
2845
123
        auto RHS = stackPop();
2846
123
        auto LHS = stackPop();
2847
123
        stackPush(Builder.createOr(LHS, RHS));
2848
123
        break;
2849
826
      }
2850
60
      case OpCode::V128__xor: {
2851
60
        auto RHS = stackPop();
2852
60
        auto LHS = stackPop();
2853
60
        stackPush(Builder.createXor(LHS, RHS));
2854
60
        break;
2855
826
      }
2856
127
      case OpCode::V128__bitselect: {
2857
127
        auto C = stackPop();
2858
127
        auto V2 = stackPop();
2859
127
        auto V1 = stackPop();
2860
127
        stackPush(Builder.createXor(
2861
127
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
2862
127
        break;
2863
826
      }
2864
108
      case OpCode::V128__any_true:
2865
108
        compileVectorAnyTrue();
2866
108
        break;
2867
866
      case OpCode::I8x16__abs:
2868
866
        compileVectorAbs(Context.Int8x16Ty);
2869
866
        break;
2870
1.52k
      case OpCode::I8x16__neg:
2871
1.52k
        compileVectorNeg(Context.Int8x16Ty);
2872
1.52k
        break;
2873
145
      case OpCode::I8x16__popcnt:
2874
145
        compileVectorPopcnt();
2875
145
        break;
2876
341
      case OpCode::I8x16__all_true:
2877
341
        compileVectorAllTrue(Context.Int8x16Ty);
2878
341
        break;
2879
635
      case OpCode::I8x16__bitmask:
2880
635
        compileVectorBitMask(Context.Int8x16Ty);
2881
635
        break;
2882
85
      case OpCode::I8x16__narrow_i16x8_s:
2883
85
        compileVectorNarrow(Context.Int16x8Ty, true);
2884
85
        break;
2885
205
      case OpCode::I8x16__narrow_i16x8_u:
2886
205
        compileVectorNarrow(Context.Int16x8Ty, false);
2887
205
        break;
2888
272
      case OpCode::I8x16__shl:
2889
272
        compileVectorShl(Context.Int8x16Ty);
2890
272
        break;
2891
1.19k
      case OpCode::I8x16__shr_s:
2892
1.19k
        compileVectorAShr(Context.Int8x16Ty);
2893
1.19k
        break;
2894
60
      case OpCode::I8x16__shr_u:
2895
60
        compileVectorLShr(Context.Int8x16Ty);
2896
60
        break;
2897
41
      case OpCode::I8x16__add:
2898
41
        compileVectorVectorAdd(Context.Int8x16Ty);
2899
41
        break;
2900
665
      case OpCode::I8x16__add_sat_s:
2901
665
        compileVectorVectorAddSat(Context.Int8x16Ty, true);
2902
665
        break;
2903
81
      case OpCode::I8x16__add_sat_u:
2904
81
        compileVectorVectorAddSat(Context.Int8x16Ty, false);
2905
81
        break;
2906
68
      case OpCode::I8x16__sub:
2907
68
        compileVectorVectorSub(Context.Int8x16Ty);
2908
68
        break;
2909
190
      case OpCode::I8x16__sub_sat_s:
2910
190
        compileVectorVectorSubSat(Context.Int8x16Ty, true);
2911
190
        break;
2912
86
      case OpCode::I8x16__sub_sat_u:
2913
86
        compileVectorVectorSubSat(Context.Int8x16Ty, false);
2914
86
        break;
2915
68
      case OpCode::I8x16__min_s:
2916
68
        compileVectorVectorSMin(Context.Int8x16Ty);
2917
68
        break;
2918
109
      case OpCode::I8x16__min_u:
2919
109
        compileVectorVectorUMin(Context.Int8x16Ty);
2920
109
        break;
2921
275
      case OpCode::I8x16__max_s:
2922
275
        compileVectorVectorSMax(Context.Int8x16Ty);
2923
275
        break;
2924
98
      case OpCode::I8x16__max_u:
2925
98
        compileVectorVectorUMax(Context.Int8x16Ty);
2926
98
        break;
2927
121
      case OpCode::I8x16__avgr_u:
2928
121
        compileVectorVectorUAvgr(Context.Int8x16Ty);
2929
121
        break;
2930
329
      case OpCode::I16x8__abs:
2931
329
        compileVectorAbs(Context.Int16x8Ty);
2932
329
        break;
2933
197
      case OpCode::I16x8__neg:
2934
197
        compileVectorNeg(Context.Int16x8Ty);
2935
197
        break;
2936
142
      case OpCode::I16x8__all_true:
2937
142
        compileVectorAllTrue(Context.Int16x8Ty);
2938
142
        break;
2939
116
      case OpCode::I16x8__bitmask:
2940
116
        compileVectorBitMask(Context.Int16x8Ty);
2941
116
        break;
2942
46
      case OpCode::I16x8__narrow_i32x4_s:
2943
46
        compileVectorNarrow(Context.Int32x4Ty, true);
2944
46
        break;
2945
387
      case OpCode::I16x8__narrow_i32x4_u:
2946
387
        compileVectorNarrow(Context.Int32x4Ty, false);
2947
387
        break;
2948
991
      case OpCode::I16x8__extend_low_i8x16_s:
2949
991
        compileVectorExtend(Context.Int8x16Ty, true, true);
2950
991
        break;
2951
100
      case OpCode::I16x8__extend_high_i8x16_s:
2952
100
        compileVectorExtend(Context.Int8x16Ty, true, false);
2953
100
        break;
2954
365
      case OpCode::I16x8__extend_low_i8x16_u:
2955
365
        compileVectorExtend(Context.Int8x16Ty, false, true);
2956
365
        break;
2957
12
      case OpCode::I16x8__extend_high_i8x16_u:
2958
12
        compileVectorExtend(Context.Int8x16Ty, false, false);
2959
12
        break;
2960
113
      case OpCode::I16x8__shl:
2961
113
        compileVectorShl(Context.Int16x8Ty);
2962
113
        break;
2963
426
      case OpCode::I16x8__shr_s:
2964
426
        compileVectorAShr(Context.Int16x8Ty);
2965
426
        break;
2966
161
      case OpCode::I16x8__shr_u:
2967
161
        compileVectorLShr(Context.Int16x8Ty);
2968
161
        break;
2969
147
      case OpCode::I16x8__add:
2970
147
        compileVectorVectorAdd(Context.Int16x8Ty);
2971
147
        break;
2972
20
      case OpCode::I16x8__add_sat_s:
2973
20
        compileVectorVectorAddSat(Context.Int16x8Ty, true);
2974
20
        break;
2975
408
      case OpCode::I16x8__add_sat_u:
2976
408
        compileVectorVectorAddSat(Context.Int16x8Ty, false);
2977
408
        break;
2978
338
      case OpCode::I16x8__sub:
2979
338
        compileVectorVectorSub(Context.Int16x8Ty);
2980
338
        break;
2981
30
      case OpCode::I16x8__sub_sat_s:
2982
30
        compileVectorVectorSubSat(Context.Int16x8Ty, true);
2983
30
        break;
2984
94
      case OpCode::I16x8__sub_sat_u:
2985
94
        compileVectorVectorSubSat(Context.Int16x8Ty, false);
2986
94
        break;
2987
113
      case OpCode::I16x8__mul:
2988
113
        compileVectorVectorMul(Context.Int16x8Ty);
2989
113
        break;
2990
157
      case OpCode::I16x8__min_s:
2991
157
        compileVectorVectorSMin(Context.Int16x8Ty);
2992
157
        break;
2993
122
      case OpCode::I16x8__min_u:
2994
122
        compileVectorVectorUMin(Context.Int16x8Ty);
2995
122
        break;
2996
90
      case OpCode::I16x8__max_s:
2997
90
        compileVectorVectorSMax(Context.Int16x8Ty);
2998
90
        break;
2999
804
      case OpCode::I16x8__max_u:
3000
804
        compileVectorVectorUMax(Context.Int16x8Ty);
3001
804
        break;
3002
166
      case OpCode::I16x8__avgr_u:
3003
166
        compileVectorVectorUAvgr(Context.Int16x8Ty);
3004
166
        break;
3005
66
      case OpCode::I16x8__extmul_low_i8x16_s:
3006
66
        compileVectorExtMul(Context.Int8x16Ty, true, true);
3007
66
        break;
3008
268
      case OpCode::I16x8__extmul_high_i8x16_s:
3009
268
        compileVectorExtMul(Context.Int8x16Ty, true, false);
3010
268
        break;
3011
115
      case OpCode::I16x8__extmul_low_i8x16_u:
3012
115
        compileVectorExtMul(Context.Int8x16Ty, false, true);
3013
115
        break;
3014
504
      case OpCode::I16x8__extmul_high_i8x16_u:
3015
504
        compileVectorExtMul(Context.Int8x16Ty, false, false);
3016
504
        break;
3017
149
      case OpCode::I16x8__q15mulr_sat_s:
3018
149
        compileVectorVectorQ15MulSat();
3019
149
        break;
3020
314
      case OpCode::I16x8__extadd_pairwise_i8x16_s:
3021
314
        compileVectorExtAddPairwise(Context.Int8x16Ty, true);
3022
314
        break;
3023
327
      case OpCode::I16x8__extadd_pairwise_i8x16_u:
3024
327
        compileVectorExtAddPairwise(Context.Int8x16Ty, false);
3025
327
        break;
3026
57
      case OpCode::I32x4__abs:
3027
57
        compileVectorAbs(Context.Int32x4Ty);
3028
57
        break;
3029
206
      case OpCode::I32x4__neg:
3030
206
        compileVectorNeg(Context.Int32x4Ty);
3031
206
        break;
3032
185
      case OpCode::I32x4__all_true:
3033
185
        compileVectorAllTrue(Context.Int32x4Ty);
3034
185
        break;
3035
83
      case OpCode::I32x4__bitmask:
3036
83
        compileVectorBitMask(Context.Int32x4Ty);
3037
83
        break;
3038
136
      case OpCode::I32x4__extend_low_i16x8_s:
3039
136
        compileVectorExtend(Context.Int16x8Ty, true, true);
3040
136
        break;
3041
510
      case OpCode::I32x4__extend_high_i16x8_s:
3042
510
        compileVectorExtend(Context.Int16x8Ty, true, false);
3043
510
        break;
3044
1.88k
      case OpCode::I32x4__extend_low_i16x8_u:
3045
1.88k
        compileVectorExtend(Context.Int16x8Ty, false, true);
3046
1.88k
        break;
3047
139
      case OpCode::I32x4__extend_high_i16x8_u:
3048
139
        compileVectorExtend(Context.Int16x8Ty, false, false);
3049
139
        break;
3050
1.45k
      case OpCode::I32x4__shl:
3051
1.45k
        compileVectorShl(Context.Int32x4Ty);
3052
1.45k
        break;
3053
289
      case OpCode::I32x4__shr_s:
3054
289
        compileVectorAShr(Context.Int32x4Ty);
3055
289
        break;
3056
651
      case OpCode::I32x4__shr_u:
3057
651
        compileVectorLShr(Context.Int32x4Ty);
3058
651
        break;
3059
196
      case OpCode::I32x4__add:
3060
196
        compileVectorVectorAdd(Context.Int32x4Ty);
3061
196
        break;
3062
158
      case OpCode::I32x4__sub:
3063
158
        compileVectorVectorSub(Context.Int32x4Ty);
3064
158
        break;
3065
293
      case OpCode::I32x4__mul:
3066
293
        compileVectorVectorMul(Context.Int32x4Ty);
3067
293
        break;
3068
90
      case OpCode::I32x4__min_s:
3069
90
        compileVectorVectorSMin(Context.Int32x4Ty);
3070
90
        break;
3071
105
      case OpCode::I32x4__min_u:
3072
105
        compileVectorVectorUMin(Context.Int32x4Ty);
3073
105
        break;
3074
64
      case OpCode::I32x4__max_s:
3075
64
        compileVectorVectorSMax(Context.Int32x4Ty);
3076
64
        break;
3077
87
      case OpCode::I32x4__max_u:
3078
87
        compileVectorVectorUMax(Context.Int32x4Ty);
3079
87
        break;
3080
111
      case OpCode::I32x4__extmul_low_i16x8_s:
3081
111
        compileVectorExtMul(Context.Int16x8Ty, true, true);
3082
111
        break;
3083
71
      case OpCode::I32x4__extmul_high_i16x8_s:
3084
71
        compileVectorExtMul(Context.Int16x8Ty, true, false);
3085
71
        break;
3086
259
      case OpCode::I32x4__extmul_low_i16x8_u:
3087
259
        compileVectorExtMul(Context.Int16x8Ty, false, true);
3088
259
        break;
3089
158
      case OpCode::I32x4__extmul_high_i16x8_u:
3090
158
        compileVectorExtMul(Context.Int16x8Ty, false, false);
3091
158
        break;
3092
1.15k
      case OpCode::I32x4__extadd_pairwise_i16x8_s:
3093
1.15k
        compileVectorExtAddPairwise(Context.Int16x8Ty, true);
3094
1.15k
        break;
3095
652
      case OpCode::I32x4__extadd_pairwise_i16x8_u:
3096
652
        compileVectorExtAddPairwise(Context.Int16x8Ty, false);
3097
652
        break;
3098
98
      case OpCode::I32x4__dot_i16x8_s: {
3099
98
        auto ExtendTy = Context.Int16x8Ty.getExtendedElementVectorType();
3100
98
        auto Undef = LLVM::Value::getUndef(ExtendTy);
3101
98
        auto LHS = Builder.createSExt(
3102
98
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3103
98
        auto RHS = Builder.createSExt(
3104
98
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3105
98
        auto M = Builder.createMul(LHS, RHS);
3106
98
        auto L = Builder.createShuffleVector(
3107
98
            M, Undef,
3108
98
            LLVM::Value::getConstVector32(LLContext, {0U, 2U, 4U, 6U}));
3109
98
        auto R = Builder.createShuffleVector(
3110
98
            M, Undef,
3111
98
            LLVM::Value::getConstVector32(LLContext, {1U, 3U, 5U, 7U}));
3112
98
        auto V = Builder.createAdd(L, R);
3113
98
        stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
3114
98
        break;
3115
826
      }
3116
1.02k
      case OpCode::I64x2__abs:
3117
1.02k
        compileVectorAbs(Context.Int64x2Ty);
3118
1.02k
        break;
3119
590
      case OpCode::I64x2__neg:
3120
590
        compileVectorNeg(Context.Int64x2Ty);
3121
590
        break;
3122
297
      case OpCode::I64x2__all_true:
3123
297
        compileVectorAllTrue(Context.Int64x2Ty);
3124
297
        break;
3125
312
      case OpCode::I64x2__bitmask:
3126
312
        compileVectorBitMask(Context.Int64x2Ty);
3127
312
        break;
3128
345
      case OpCode::I64x2__extend_low_i32x4_s:
3129
345
        compileVectorExtend(Context.Int32x4Ty, true, true);
3130
345
        break;
3131
752
      case OpCode::I64x2__extend_high_i32x4_s:
3132
752
        compileVectorExtend(Context.Int32x4Ty, true, false);
3133
752
        break;
3134
208
      case OpCode::I64x2__extend_low_i32x4_u:
3135
208
        compileVectorExtend(Context.Int32x4Ty, false, true);
3136
208
        break;
3137
644
      case OpCode::I64x2__extend_high_i32x4_u:
3138
644
        compileVectorExtend(Context.Int32x4Ty, false, false);
3139
644
        break;
3140
108
      case OpCode::I64x2__shl:
3141
108
        compileVectorShl(Context.Int64x2Ty);
3142
108
        break;
3143
475
      case OpCode::I64x2__shr_s:
3144
475
        compileVectorAShr(Context.Int64x2Ty);
3145
475
        break;
3146
82
      case OpCode::I64x2__shr_u:
3147
82
        compileVectorLShr(Context.Int64x2Ty);
3148
82
        break;
3149
40
      case OpCode::I64x2__add:
3150
40
        compileVectorVectorAdd(Context.Int64x2Ty);
3151
40
        break;
3152
273
      case OpCode::I64x2__sub:
3153
273
        compileVectorVectorSub(Context.Int64x2Ty);
3154
273
        break;
3155
88
      case OpCode::I64x2__mul:
3156
88
        compileVectorVectorMul(Context.Int64x2Ty);
3157
88
        break;
3158
36
      case OpCode::I64x2__extmul_low_i32x4_s:
3159
36
        compileVectorExtMul(Context.Int32x4Ty, true, true);
3160
36
        break;
3161
327
      case OpCode::I64x2__extmul_high_i32x4_s:
3162
327
        compileVectorExtMul(Context.Int32x4Ty, true, false);
3163
327
        break;
3164
31
      case OpCode::I64x2__extmul_low_i32x4_u:
3165
31
        compileVectorExtMul(Context.Int32x4Ty, false, true);
3166
31
        break;
3167
119
      case OpCode::I64x2__extmul_high_i32x4_u:
3168
119
        compileVectorExtMul(Context.Int32x4Ty, false, false);
3169
119
        break;
3170
111
      case OpCode::F32x4__abs:
3171
111
        compileVectorFAbs(Context.Floatx4Ty);
3172
111
        break;
3173
150
      case OpCode::F32x4__neg:
3174
150
        compileVectorFNeg(Context.Floatx4Ty);
3175
150
        break;
3176
208
      case OpCode::F32x4__sqrt:
3177
208
        compileVectorFSqrt(Context.Floatx4Ty);
3178
208
        break;
3179
132
      case OpCode::F32x4__add:
3180
132
        compileVectorVectorFAdd(Context.Floatx4Ty);
3181
132
        break;
3182
253
      case OpCode::F32x4__sub:
3183
253
        compileVectorVectorFSub(Context.Floatx4Ty);
3184
253
        break;
3185
38
      case OpCode::F32x4__mul:
3186
38
        compileVectorVectorFMul(Context.Floatx4Ty);
3187
38
        break;
3188
176
      case OpCode::F32x4__div:
3189
176
        compileVectorVectorFDiv(Context.Floatx4Ty);
3190
176
        break;
3191
124
      case OpCode::F32x4__min:
3192
124
        compileVectorVectorFMin(Context.Floatx4Ty);
3193
124
        break;
3194
36
      case OpCode::F32x4__max:
3195
36
        compileVectorVectorFMax(Context.Floatx4Ty);
3196
36
        break;
3197
50
      case OpCode::F32x4__pmin:
3198
50
        compileVectorVectorFPMin(Context.Floatx4Ty);
3199
50
        break;
3200
223
      case OpCode::F32x4__pmax:
3201
223
        compileVectorVectorFPMax(Context.Floatx4Ty);
3202
223
        break;
3203
786
      case OpCode::F32x4__ceil:
3204
786
        compileVectorFCeil(Context.Floatx4Ty);
3205
786
        break;
3206
1.62k
      case OpCode::F32x4__floor:
3207
1.62k
        compileVectorFFloor(Context.Floatx4Ty);
3208
1.62k
        break;
3209
1.64k
      case OpCode::F32x4__trunc:
3210
1.64k
        compileVectorFTrunc(Context.Floatx4Ty);
3211
1.64k
        break;
3212
217
      case OpCode::F32x4__nearest:
3213
217
        compileVectorFNearest(Context.Floatx4Ty);
3214
217
        break;
3215
439
      case OpCode::F64x2__abs:
3216
439
        compileVectorFAbs(Context.Doublex2Ty);
3217
439
        break;
3218
734
      case OpCode::F64x2__neg:
3219
734
        compileVectorFNeg(Context.Doublex2Ty);
3220
734
        break;
3221
126
      case OpCode::F64x2__sqrt:
3222
126
        compileVectorFSqrt(Context.Doublex2Ty);
3223
126
        break;
3224
49
      case OpCode::F64x2__add:
3225
49
        compileVectorVectorFAdd(Context.Doublex2Ty);
3226
49
        break;
3227
215
      case OpCode::F64x2__sub:
3228
215
        compileVectorVectorFSub(Context.Doublex2Ty);
3229
215
        break;
3230
211
      case OpCode::F64x2__mul:
3231
211
        compileVectorVectorFMul(Context.Doublex2Ty);
3232
211
        break;
3233
37
      case OpCode::F64x2__div:
3234
37
        compileVectorVectorFDiv(Context.Doublex2Ty);
3235
37
        break;
3236
166
      case OpCode::F64x2__min:
3237
166
        compileVectorVectorFMin(Context.Doublex2Ty);
3238
166
        break;
3239
185
      case OpCode::F64x2__max:
3240
185
        compileVectorVectorFMax(Context.Doublex2Ty);
3241
185
        break;
3242
265
      case OpCode::F64x2__pmin:
3243
265
        compileVectorVectorFPMin(Context.Doublex2Ty);
3244
265
        break;
3245
106
      case OpCode::F64x2__pmax:
3246
106
        compileVectorVectorFPMax(Context.Doublex2Ty);
3247
106
        break;
3248
550
      case OpCode::F64x2__ceil:
3249
550
        compileVectorFCeil(Context.Doublex2Ty);
3250
550
        break;
3251
654
      case OpCode::F64x2__floor:
3252
654
        compileVectorFFloor(Context.Doublex2Ty);
3253
654
        break;
3254
120
      case OpCode::F64x2__trunc:
3255
120
        compileVectorFTrunc(Context.Doublex2Ty);
3256
120
        break;
3257
145
      case OpCode::F64x2__nearest:
3258
145
        compileVectorFNearest(Context.Doublex2Ty);
3259
145
        break;
3260
211
      case OpCode::I32x4__trunc_sat_f32x4_s:
3261
211
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3262
211
        break;
3263
3.68k
      case OpCode::I32x4__trunc_sat_f32x4_u:
3264
3.68k
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3265
3.68k
        break;
3266
334
      case OpCode::F32x4__convert_i32x4_s:
3267
334
        compileVectorConvertS(Context.Int32x4Ty, Context.Floatx4Ty, false);
3268
334
        break;
3269
725
      case OpCode::F32x4__convert_i32x4_u:
3270
725
        compileVectorConvertU(Context.Int32x4Ty, Context.Floatx4Ty, false);
3271
725
        break;
3272
739
      case OpCode::I32x4__trunc_sat_f64x2_s_zero:
3273
739
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3274
739
        break;
3275
2.11k
      case OpCode::I32x4__trunc_sat_f64x2_u_zero:
3276
2.11k
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3277
2.11k
        break;
3278
351
      case OpCode::F64x2__convert_low_i32x4_s:
3279
351
        compileVectorConvertS(Context.Int32x4Ty, Context.Doublex2Ty, true);
3280
351
        break;
3281
1.27k
      case OpCode::F64x2__convert_low_i32x4_u:
3282
1.27k
        compileVectorConvertU(Context.Int32x4Ty, Context.Doublex2Ty, true);
3283
1.27k
        break;
3284
591
      case OpCode::F32x4__demote_f64x2_zero:
3285
591
        compileVectorDemote();
3286
591
        break;
3287
625
      case OpCode::F64x2__promote_low_f32x4:
3288
625
        compileVectorPromote();
3289
625
        break;
3290
3291
      // Relaxed SIMD Instructions
3292
22
      case OpCode::I8x16__relaxed_swizzle:
3293
22
        compileVectorSwizzle();
3294
22
        break;
3295
12
      case OpCode::I32x4__relaxed_trunc_f32x4_s:
3296
12
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3297
12
        break;
3298
14
      case OpCode::I32x4__relaxed_trunc_f32x4_u:
3299
14
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3300
14
        break;
3301
20
      case OpCode::I32x4__relaxed_trunc_f64x2_s_zero:
3302
20
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3303
20
        break;
3304
11
      case OpCode::I32x4__relaxed_trunc_f64x2_u_zero:
3305
11
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3306
11
        break;
3307
10
      case OpCode::F32x4__relaxed_madd:
3308
10
        compileVectorVectorMAdd(Context.Floatx4Ty);
3309
10
        break;
3310
19
      case OpCode::F32x4__relaxed_nmadd:
3311
19
        compileVectorVectorNMAdd(Context.Floatx4Ty);
3312
19
        break;
3313
12
      case OpCode::F64x2__relaxed_madd:
3314
12
        compileVectorVectorMAdd(Context.Doublex2Ty);
3315
12
        break;
3316
13
      case OpCode::F64x2__relaxed_nmadd:
3317
13
        compileVectorVectorNMAdd(Context.Doublex2Ty);
3318
13
        break;
3319
11
      case OpCode::I8x16__relaxed_laneselect:
3320
21
      case OpCode::I16x8__relaxed_laneselect:
3321
31
      case OpCode::I32x4__relaxed_laneselect:
3322
34
      case OpCode::I64x2__relaxed_laneselect: {
3323
34
        auto C = stackPop();
3324
34
        auto V2 = stackPop();
3325
34
        auto V1 = stackPop();
3326
34
        stackPush(Builder.createXor(
3327
34
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
3328
34
        break;
3329
31
      }
3330
10
      case OpCode::F32x4__relaxed_min:
3331
10
        compileVectorVectorFMin(Context.Floatx4Ty);
3332
10
        break;
3333
10
      case OpCode::F32x4__relaxed_max:
3334
10
        compileVectorVectorFMax(Context.Floatx4Ty);
3335
10
        break;
3336
11
      case OpCode::F64x2__relaxed_min:
3337
11
        compileVectorVectorFMin(Context.Doublex2Ty);
3338
11
        break;
3339
29
      case OpCode::F64x2__relaxed_max:
3340
29
        compileVectorVectorFMax(Context.Doublex2Ty);
3341
29
        break;
3342
31
      case OpCode::I16x8__relaxed_q15mulr_s:
3343
31
        compileVectorVectorQ15MulSat();
3344
31
        break;
3345
24
      case OpCode::I16x8__relaxed_dot_i8x16_i7x16_s:
3346
24
        compileVectorRelaxedIntegerDotProduct();
3347
24
        break;
3348
16
      case OpCode::I32x4__relaxed_dot_i8x16_i7x16_add_s:
3349
16
        compileVectorRelaxedIntegerDotProductAdd();
3350
16
        break;
3351
3352
      // Atomic Instructions
3353
192
      case OpCode::Atomic__fence:
3354
192
        compileMemoryFence();
3355
192
        break;
3356
54
      case OpCode::Memory__atomic__notify:
3357
54
        compileAtomicNotify(Instr.getTargetIndex(), Instr.getMemoryOffset());
3358
54
        break;
3359
5
      case OpCode::Memory__atomic__wait32:
3360
5
        compileAtomicWait(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3361
5
                          Context.Int32Ty, 32);
3362
5
        break;
3363
2
      case OpCode::Memory__atomic__wait64:
3364
2
        compileAtomicWait(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3365
2
                          Context.Int64Ty, 64);
3366
2
        break;
3367
0
      case OpCode::I32__atomic__load:
3368
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3369
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3370
0
                          Context.Int32Ty, true);
3371
0
        break;
3372
0
      case OpCode::I64__atomic__load:
3373
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3374
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3375
0
                          Context.Int64Ty, true);
3376
0
        break;
3377
0
      case OpCode::I32__atomic__load8_u:
3378
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3379
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3380
0
                          Context.Int8Ty);
3381
0
        break;
3382
0
      case OpCode::I32__atomic__load16_u:
3383
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3384
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3385
0
                          Context.Int16Ty);
3386
0
        break;
3387
0
      case OpCode::I64__atomic__load8_u:
3388
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3389
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3390
0
                          Context.Int8Ty);
3391
0
        break;
3392
0
      case OpCode::I64__atomic__load16_u:
3393
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3394
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3395
0
                          Context.Int16Ty);
3396
0
        break;
3397
0
      case OpCode::I64__atomic__load32_u:
3398
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3399
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3400
0
                          Context.Int32Ty);
3401
0
        break;
3402
0
      case OpCode::I32__atomic__store:
3403
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3404
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3405
0
                           Context.Int32Ty, true);
3406
0
        break;
3407
0
      case OpCode::I64__atomic__store:
3408
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3409
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3410
0
                           Context.Int64Ty, true);
3411
0
        break;
3412
0
      case OpCode::I32__atomic__store8:
3413
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3414
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3415
0
                           Context.Int8Ty, true);
3416
0
        break;
3417
0
      case OpCode::I32__atomic__store16:
3418
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3419
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3420
0
                           Context.Int16Ty, true);
3421
0
        break;
3422
0
      case OpCode::I64__atomic__store8:
3423
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3424
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3425
0
                           Context.Int8Ty, true);
3426
0
        break;
3427
0
      case OpCode::I64__atomic__store16:
3428
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3429
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3430
0
                           Context.Int16Ty, true);
3431
0
        break;
3432
0
      case OpCode::I64__atomic__store32:
3433
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3434
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3435
0
                           Context.Int32Ty, true);
3436
0
        break;
3437
0
      case OpCode::I32__atomic__rmw__add:
3438
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3439
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3440
0
                           Context.Int32Ty, Context.Int32Ty, true);
3441
0
        break;
3442
0
      case OpCode::I64__atomic__rmw__add:
3443
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3444
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3445
0
                           Context.Int64Ty, Context.Int64Ty, true);
3446
0
        break;
3447
0
      case OpCode::I32__atomic__rmw8__add_u:
3448
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3449
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3450
0
                           Context.Int32Ty, Context.Int8Ty);
3451
0
        break;
3452
0
      case OpCode::I32__atomic__rmw16__add_u:
3453
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3454
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3455
0
                           Context.Int32Ty, Context.Int16Ty);
3456
0
        break;
3457
0
      case OpCode::I64__atomic__rmw8__add_u:
3458
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3459
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3460
0
                           Context.Int64Ty, Context.Int8Ty);
3461
0
        break;
3462
0
      case OpCode::I64__atomic__rmw16__add_u:
3463
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3464
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3465
0
                           Context.Int64Ty, Context.Int16Ty);
3466
0
        break;
3467
0
      case OpCode::I64__atomic__rmw32__add_u:
3468
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3469
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3470
0
                           Context.Int64Ty, Context.Int32Ty);
3471
0
        break;
3472
0
      case OpCode::I32__atomic__rmw__sub:
3473
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3474
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3475
0
                           Context.Int32Ty, Context.Int32Ty, true);
3476
0
        break;
3477
0
      case OpCode::I64__atomic__rmw__sub:
3478
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3479
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3480
0
                           Context.Int64Ty, Context.Int64Ty, true);
3481
0
        break;
3482
0
      case OpCode::I32__atomic__rmw8__sub_u:
3483
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3484
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3485
0
                           Context.Int32Ty, Context.Int8Ty);
3486
0
        break;
3487
0
      case OpCode::I32__atomic__rmw16__sub_u:
3488
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3489
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3490
0
                           Context.Int32Ty, Context.Int16Ty);
3491
0
        break;
3492
0
      case OpCode::I64__atomic__rmw8__sub_u:
3493
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3494
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3495
0
                           Context.Int64Ty, Context.Int8Ty);
3496
0
        break;
3497
0
      case OpCode::I64__atomic__rmw16__sub_u:
3498
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3499
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3500
0
                           Context.Int64Ty, Context.Int16Ty);
3501
0
        break;
3502
0
      case OpCode::I64__atomic__rmw32__sub_u:
3503
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3504
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3505
0
                           Context.Int64Ty, Context.Int32Ty);
3506
0
        break;
3507
0
      case OpCode::I32__atomic__rmw__and:
3508
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3509
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3510
0
                           Context.Int32Ty, Context.Int32Ty, true);
3511
0
        break;
3512
0
      case OpCode::I64__atomic__rmw__and:
3513
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3514
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3515
0
                           Context.Int64Ty, Context.Int64Ty, true);
3516
0
        break;
3517
0
      case OpCode::I32__atomic__rmw8__and_u:
3518
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3519
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3520
0
                           Context.Int32Ty, Context.Int8Ty);
3521
0
        break;
3522
0
      case OpCode::I32__atomic__rmw16__and_u:
3523
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3524
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3525
0
                           Context.Int32Ty, Context.Int16Ty);
3526
0
        break;
3527
0
      case OpCode::I64__atomic__rmw8__and_u:
3528
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3529
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3530
0
                           Context.Int64Ty, Context.Int8Ty);
3531
0
        break;
3532
0
      case OpCode::I64__atomic__rmw16__and_u:
3533
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3534
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3535
0
                           Context.Int64Ty, Context.Int16Ty);
3536
0
        break;
3537
0
      case OpCode::I64__atomic__rmw32__and_u:
3538
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3539
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3540
0
                           Context.Int64Ty, Context.Int32Ty);
3541
0
        break;
3542
0
      case OpCode::I32__atomic__rmw__or:
3543
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3544
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3545
0
                           Context.Int32Ty, Context.Int32Ty, true);
3546
0
        break;
3547
0
      case OpCode::I64__atomic__rmw__or:
3548
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3549
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3550
0
                           Context.Int64Ty, Context.Int64Ty, true);
3551
0
        break;
3552
0
      case OpCode::I32__atomic__rmw8__or_u:
3553
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3554
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3555
0
                           Context.Int32Ty, Context.Int8Ty);
3556
0
        break;
3557
0
      case OpCode::I32__atomic__rmw16__or_u:
3558
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3559
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3560
0
                           Context.Int32Ty, Context.Int16Ty);
3561
0
        break;
3562
0
      case OpCode::I64__atomic__rmw8__or_u:
3563
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3564
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3565
0
                           Context.Int64Ty, Context.Int8Ty);
3566
0
        break;
3567
0
      case OpCode::I64__atomic__rmw16__or_u:
3568
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3569
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3570
0
                           Context.Int64Ty, Context.Int16Ty);
3571
0
        break;
3572
0
      case OpCode::I64__atomic__rmw32__or_u:
3573
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3574
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3575
0
                           Context.Int64Ty, Context.Int32Ty);
3576
0
        break;
3577
0
      case OpCode::I32__atomic__rmw__xor:
3578
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3579
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3580
0
                           Context.Int32Ty, Context.Int32Ty, true);
3581
0
        break;
3582
0
      case OpCode::I64__atomic__rmw__xor:
3583
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3584
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3585
0
                           Context.Int64Ty, Context.Int64Ty, true);
3586
0
        break;
3587
0
      case OpCode::I32__atomic__rmw8__xor_u:
3588
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3589
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3590
0
                           Context.Int32Ty, Context.Int8Ty);
3591
0
        break;
3592
0
      case OpCode::I32__atomic__rmw16__xor_u:
3593
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3594
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3595
0
                           Context.Int32Ty, Context.Int16Ty);
3596
0
        break;
3597
0
      case OpCode::I64__atomic__rmw8__xor_u:
3598
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3599
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3600
0
                           Context.Int64Ty, Context.Int8Ty);
3601
0
        break;
3602
0
      case OpCode::I64__atomic__rmw16__xor_u:
3603
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3604
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3605
0
                           Context.Int64Ty, Context.Int16Ty);
3606
0
        break;
3607
0
      case OpCode::I64__atomic__rmw32__xor_u:
3608
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3609
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3610
0
                           Context.Int64Ty, Context.Int32Ty);
3611
0
        break;
3612
0
      case OpCode::I32__atomic__rmw__xchg:
3613
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3614
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3615
0
                           Context.Int32Ty, Context.Int32Ty, true);
3616
0
        break;
3617
0
      case OpCode::I64__atomic__rmw__xchg:
3618
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3619
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3620
0
                           Context.Int64Ty, Context.Int64Ty, true);
3621
0
        break;
3622
0
      case OpCode::I32__atomic__rmw8__xchg_u:
3623
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3624
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3625
0
                           Context.Int32Ty, Context.Int8Ty);
3626
0
        break;
3627
0
      case OpCode::I32__atomic__rmw16__xchg_u:
3628
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3629
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3630
0
                           Context.Int32Ty, Context.Int16Ty);
3631
0
        break;
3632
0
      case OpCode::I64__atomic__rmw8__xchg_u:
3633
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3634
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3635
0
                           Context.Int64Ty, Context.Int8Ty);
3636
0
        break;
3637
0
      case OpCode::I64__atomic__rmw16__xchg_u:
3638
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3639
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3640
0
                           Context.Int64Ty, Context.Int16Ty);
3641
0
        break;
3642
0
      case OpCode::I64__atomic__rmw32__xchg_u:
3643
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3644
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3645
0
                           Context.Int64Ty, Context.Int32Ty);
3646
0
        break;
3647
0
      case OpCode::I32__atomic__rmw__cmpxchg:
3648
0
        compileAtomicCompareExchange(
3649
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3650
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int32Ty, true);
3651
0
        break;
3652
0
      case OpCode::I64__atomic__rmw__cmpxchg:
3653
0
        compileAtomicCompareExchange(
3654
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3655
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int64Ty, true);
3656
0
        break;
3657
0
      case OpCode::I32__atomic__rmw8__cmpxchg_u:
3658
0
        compileAtomicCompareExchange(
3659
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3660
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int8Ty);
3661
0
        break;
3662
0
      case OpCode::I32__atomic__rmw16__cmpxchg_u:
3663
0
        compileAtomicCompareExchange(
3664
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3665
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int16Ty);
3666
0
        break;
3667
0
      case OpCode::I64__atomic__rmw8__cmpxchg_u:
3668
0
        compileAtomicCompareExchange(
3669
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3670
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int8Ty);
3671
0
        break;
3672
0
      case OpCode::I64__atomic__rmw16__cmpxchg_u:
3673
0
        compileAtomicCompareExchange(
3674
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3675
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int16Ty);
3676
0
        break;
3677
0
      case OpCode::I64__atomic__rmw32__cmpxchg_u:
3678
0
        compileAtomicCompareExchange(
3679
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3680
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int32Ty);
3681
0
        break;
3682
3683
0
      default:
3684
0
        assumingUnreachable();
3685
1.09M
      }
3686
1.09M
      return {};
3687
1.09M
    };
3688
3689
1.61M
    for (const auto &Instr : Instrs) {
3690
      // Update instruction count
3691
1.61M
      if (LocalInstrCount) {
3692
0
        Builder.createStore(
3693
0
            Builder.createAdd(
3694
0
                Builder.createLoad(Context.Int64Ty, LocalInstrCount),
3695
0
                LLContext.getInt64(1)),
3696
0
            LocalInstrCount);
3697
0
      }
3698
1.61M
      if (LocalGas) {
3699
0
        auto NewGas = Builder.createAdd(
3700
0
            Builder.createLoad(Context.Int64Ty, LocalGas),
3701
0
            Builder.createLoad(
3702
0
                Context.Int64Ty,
3703
0
                Builder.createConstInBoundsGEP2_64(
3704
0
                    LLVM::Type::getArrayType(Context.Int64Ty, UINT16_MAX + 1),
3705
0
                    Context.getCostTable(Builder, ExecCtx), 0,
3706
0
                    uint16_t(Instr.getOpCode()))));
3707
0
        Builder.createStore(NewGas, LocalGas);
3708
0
      }
3709
3710
      // Make the instruction node according to Code.
3711
1.61M
      EXPECTED_TRY(Dispatch(Instr));
3712
1.61M
    }
3713
11.4k
    return {};
3714
11.4k
  }
3715
2.14k
  void compileSignedTrunc(LLVM::Type IntType) noexcept {
3716
2.14k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.norm");
3717
2.14k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmin");
3718
2.14k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmax");
3719
2.14k
    auto Value = stackPop();
3720
2.14k
    const auto [Precise, MinFp, MaxFp] =
3721
2.14k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3722
2.14k
      const auto BitWidth = IntType.getIntegerBitWidth();
3723
2.14k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3724
2.14k
        switch (BitWidth) {
3725
1.67k
        case 32:
3726
1.67k
          return {std::numeric_limits<int32_t>::min(),
3727
1.67k
                  std::numeric_limits<int32_t>::max()};
3728
466
        case 64:
3729
466
          return {std::numeric_limits<int64_t>::min(),
3730
466
                  std::numeric_limits<int64_t>::max()};
3731
0
        default:
3732
0
          assumingUnreachable();
3733
2.14k
        }
3734
2.14k
      }();
3735
2.14k
      auto FPType = Value.getType();
3736
2.14k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3737
2.14k
      const auto FPWidth = FPType.getFPMantissaWidth();
3738
2.14k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3739
2.14k
              LLVM::Value::getConstReal(FPType, Max)};
3740
2.14k
    }();
3741
3742
2.14k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3743
2.14k
    Builder.createCondBr(IsNotNan, NormBB,
3744
2.14k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3745
3746
2.14k
    Builder.positionAtEnd(NormBB);
3747
2.14k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3748
2.14k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3749
2.14k
    auto IsNotUnderflow =
3750
2.14k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3751
2.14k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3752
2.14k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3753
3754
2.14k
    Builder.positionAtEnd(NotMinBB);
3755
2.14k
    auto IsNotOverflow = Builder.createLikely(
3756
2.14k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3757
2.14k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3758
2.14k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3759
3760
2.14k
    Builder.positionAtEnd(NotMaxBB);
3761
2.14k
    stackPush(Builder.createFPToSI(Trunc, IntType));
3762
2.14k
  }
3763
1.47k
  void compileSignedTruncSat(LLVM::Type IntType) noexcept {
3764
1.47k
    auto CurrBB = Builder.getInsertBlock();
3765
1.47k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.norm");
3766
1.47k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmin");
3767
1.47k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmax");
3768
1.47k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.end");
3769
1.47k
    auto Value = stackPop();
3770
1.47k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3771
1.47k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3772
1.47k
      const auto BitWidth = IntType.getIntegerBitWidth();
3773
1.47k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3774
1.47k
        switch (BitWidth) {
3775
746
        case 32:
3776
746
          return {std::numeric_limits<int32_t>::min(),
3777
746
                  std::numeric_limits<int32_t>::max()};
3778
726
        case 64:
3779
726
          return {std::numeric_limits<int64_t>::min(),
3780
726
                  std::numeric_limits<int64_t>::max()};
3781
0
        default:
3782
0
          assumingUnreachable();
3783
1.47k
        }
3784
1.47k
      }();
3785
1.47k
      auto FPType = Value.getType();
3786
1.47k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3787
1.47k
      const auto FPWidth = FPType.getFPMantissaWidth();
3788
1.47k
      return {BitWidth <= FPWidth, static_cast<uint64_t>(Min),
3789
1.47k
              static_cast<uint64_t>(Max),
3790
1.47k
              LLVM::Value::getConstReal(FPType, Min),
3791
1.47k
              LLVM::Value::getConstReal(FPType, Max)};
3792
1.47k
    }();
3793
3794
1.47k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3795
1.47k
    Builder.createCondBr(IsNotNan, NormBB, EndBB);
3796
3797
1.47k
    Builder.positionAtEnd(NormBB);
3798
1.47k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3799
1.47k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3800
1.47k
    auto IsNotUnderflow =
3801
1.47k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3802
1.47k
    Builder.createCondBr(IsNotUnderflow, NotMinBB, EndBB);
3803
3804
1.47k
    Builder.positionAtEnd(NotMinBB);
3805
1.47k
    auto IsNotOverflow = Builder.createLikely(
3806
1.47k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3807
1.47k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3808
3809
1.47k
    Builder.positionAtEnd(NotMaxBB);
3810
1.47k
    auto IntValue = Builder.createFPToSI(Trunc, IntType);
3811
1.47k
    Builder.createBr(EndBB);
3812
3813
1.47k
    Builder.positionAtEnd(EndBB);
3814
1.47k
    auto PHIRet = Builder.createPHI(IntType);
3815
1.47k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, 0, true), CurrBB);
3816
1.47k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt, true), NormBB);
3817
1.47k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt, true),
3818
1.47k
                       NotMinBB);
3819
1.47k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3820
3821
1.47k
    stackPush(PHIRet);
3822
1.47k
  }
3823
4.24k
  void compileUnsignedTrunc(LLVM::Type IntType) noexcept {
3824
4.24k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.norm");
3825
4.24k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmin");
3826
4.24k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmax");
3827
4.24k
    auto Value = stackPop();
3828
4.24k
    const auto [Precise, MinFp, MaxFp] =
3829
4.24k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3830
4.24k
      const auto BitWidth = IntType.getIntegerBitWidth();
3831
4.24k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3832
4.24k
        switch (BitWidth) {
3833
1.63k
        case 32:
3834
1.63k
          return {std::numeric_limits<uint32_t>::min(),
3835
1.63k
                  std::numeric_limits<uint32_t>::max()};
3836
2.60k
        case 64:
3837
2.60k
          return {std::numeric_limits<uint64_t>::min(),
3838
2.60k
                  std::numeric_limits<uint64_t>::max()};
3839
0
        default:
3840
0
          assumingUnreachable();
3841
4.24k
        }
3842
4.24k
      }();
3843
4.24k
      auto FPType = Value.getType();
3844
4.24k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3845
4.24k
      const auto FPWidth = FPType.getFPMantissaWidth();
3846
4.24k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3847
4.24k
              LLVM::Value::getConstReal(FPType, Max)};
3848
4.24k
    }();
3849
3850
4.24k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3851
4.24k
    Builder.createCondBr(IsNotNan, NormBB,
3852
4.24k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3853
3854
4.24k
    Builder.positionAtEnd(NormBB);
3855
4.24k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3856
4.24k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3857
4.24k
    auto IsNotUnderflow =
3858
4.24k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3859
4.24k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3860
4.24k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3861
3862
4.24k
    Builder.positionAtEnd(NotMinBB);
3863
4.24k
    auto IsNotOverflow = Builder.createLikely(
3864
4.24k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3865
4.24k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3866
4.24k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3867
3868
4.24k
    Builder.positionAtEnd(NotMaxBB);
3869
4.24k
    stackPush(Builder.createFPToUI(Trunc, IntType));
3870
4.24k
  }
3871
1.44k
  void compileUnsignedTruncSat(LLVM::Type IntType) noexcept {
3872
1.44k
    auto CurrBB = Builder.getInsertBlock();
3873
1.44k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.norm");
3874
1.44k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.notmax");
3875
1.44k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.end");
3876
1.44k
    auto Value = stackPop();
3877
1.44k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3878
1.44k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3879
1.44k
      const auto BitWidth = IntType.getIntegerBitWidth();
3880
1.44k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3881
1.44k
        switch (BitWidth) {
3882
544
        case 32:
3883
544
          return {std::numeric_limits<uint32_t>::min(),
3884
544
                  std::numeric_limits<uint32_t>::max()};
3885
898
        case 64:
3886
898
          return {std::numeric_limits<uint64_t>::min(),
3887
898
                  std::numeric_limits<uint64_t>::max()};
3888
0
        default:
3889
0
          assumingUnreachable();
3890
1.44k
        }
3891
1.44k
      }();
3892
1.44k
      auto FPType = Value.getType();
3893
1.44k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3894
1.44k
      const auto FPWidth = FPType.getFPMantissaWidth();
3895
1.44k
      return {BitWidth <= FPWidth, Min, Max,
3896
1.44k
              LLVM::Value::getConstReal(FPType, Min),
3897
1.44k
              LLVM::Value::getConstReal(FPType, Max)};
3898
1.44k
    }();
3899
3900
1.44k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3901
1.44k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3902
1.44k
    auto IsNotUnderflow =
3903
1.44k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3904
1.44k
    Builder.createCondBr(IsNotUnderflow, NormBB, EndBB);
3905
3906
1.44k
    Builder.positionAtEnd(NormBB);
3907
1.44k
    auto IsNotOverflow = Builder.createLikely(
3908
1.44k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3909
1.44k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3910
3911
1.44k
    Builder.positionAtEnd(NotMaxBB);
3912
1.44k
    auto IntValue = Builder.createFPToUI(Trunc, IntType);
3913
1.44k
    Builder.createBr(EndBB);
3914
3915
1.44k
    Builder.positionAtEnd(EndBB);
3916
1.44k
    auto PHIRet = Builder.createPHI(IntType);
3917
1.44k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt), CurrBB);
3918
1.44k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt), NormBB);
3919
1.44k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3920
3921
1.44k
    stackPush(PHIRet);
3922
1.44k
  }
3923
3924
  void compileAtomicCheckOffsetAlignment(LLVM::Value Offset,
3925
61
                                         LLVM::Type IntType) noexcept {
3926
61
    const auto BitWidth = IntType.getIntegerBitWidth();
3927
61
    auto BWMask = LLContext.getInt64((BitWidth >> 3) - 1);
3928
61
    auto Value = Builder.createAnd(Offset, BWMask);
3929
61
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "address_align_ok");
3930
61
    auto IsAddressAligned = Builder.createLikely(
3931
61
        Builder.createICmpEQ(Value, LLContext.getInt64(0)));
3932
61
    Builder.createCondBr(IsAddressAligned, OkBB,
3933
61
                         getTrapBB(ErrCode::Value::UnalignedAtomicAccess));
3934
3935
61
    Builder.positionAtEnd(OkBB);
3936
61
  }
3937
3938
192
  void compileMemoryFence() noexcept {
3939
192
    Builder.createFence(LLVMAtomicOrderingSequentiallyConsistent);
3940
192
  }
3941
  void compileAtomicNotify(unsigned MemoryIndex,
3942
54
                           unsigned MemoryOffset) noexcept {
3943
54
    auto Count = stackPop();
3944
54
    auto Addr = Builder.createZExt(Stack.back(), Context.Int64Ty);
3945
54
    if (MemoryOffset != 0) {
3946
47
      Addr = Builder.createAdd(Addr, LLContext.getInt64(MemoryOffset));
3947
47
    }
3948
54
    compileAtomicCheckOffsetAlignment(Addr, Context.Int32Ty);
3949
54
    auto Offset = stackPop();
3950
3951
54
    stackPush(Builder.createCall(
3952
54
        Context.getIntrinsic(
3953
54
            Builder, Executable::Intrinsics::kMemAtomicNotify,
3954
54
            LLVM::Type::getFunctionType(
3955
54
                Context.Int32Ty,
3956
54
                {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
3957
54
        {LLContext.getInt32(MemoryIndex), Offset, Count}));
3958
54
  }
3959
  void compileAtomicWait(unsigned MemoryIndex, unsigned MemoryOffset,
3960
7
                         LLVM::Type TargetType, uint32_t BitWidth) noexcept {
3961
7
    auto Timeout = stackPop();
3962
7
    auto ExpectedValue = Builder.createZExtOrTrunc(stackPop(), Context.Int64Ty);
3963
7
    auto Addr = Builder.createZExt(Stack.back(), Context.Int64Ty);
3964
7
    if (MemoryOffset != 0) {
3965
3
      Addr = Builder.createAdd(Addr, LLContext.getInt64(MemoryOffset));
3966
3
    }
3967
7
    compileAtomicCheckOffsetAlignment(Addr, TargetType);
3968
7
    auto Offset = stackPop();
3969
3970
7
    stackPush(Builder.createCall(
3971
7
        Context.getIntrinsic(
3972
7
            Builder, Executable::Intrinsics::kMemAtomicWait,
3973
7
            LLVM::Type::getFunctionType(Context.Int32Ty,
3974
7
                                        {Context.Int32Ty, Context.Int32Ty,
3975
7
                                         Context.Int64Ty, Context.Int64Ty,
3976
7
                                         Context.Int32Ty},
3977
7
                                        false)),
3978
7
        {LLContext.getInt32(MemoryIndex), Offset, ExpectedValue, Timeout,
3979
7
         LLContext.getInt32(BitWidth)}));
3980
7
  }
3981
  void compileAtomicLoad(unsigned MemoryIndex, unsigned MemoryOffset,
3982
                         unsigned Alignment, LLVM::Type IntType,
3983
0
                         LLVM::Type TargetType, bool Signed = false) noexcept {
3984
3985
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
3986
0
    if (MemoryOffset != 0) {
3987
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
3988
0
    }
3989
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
3990
0
    auto VPtr = Builder.createInBoundsGEP1(
3991
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
3992
0
        Offset);
3993
3994
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
3995
0
    auto Load = switchEndian(Builder.createLoad(TargetType, Ptr, true));
3996
0
    Load.setAlignment(1 << Alignment);
3997
0
    Load.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
3998
3999
0
    if (Signed) {
4000
0
      Stack.back() = Builder.createSExt(Load, IntType);
4001
0
    } else {
4002
0
      Stack.back() = Builder.createZExt(Load, IntType);
4003
0
    }
4004
0
  }
4005
  void compileAtomicStore(unsigned MemoryIndex, unsigned MemoryOffset,
4006
                          unsigned Alignment, LLVM::Type, LLVM::Type TargetType,
4007
0
                          bool Signed = false) noexcept {
4008
0
    auto V = stackPop();
4009
4010
0
    if (Signed) {
4011
0
      V = Builder.createSExtOrTrunc(V, TargetType);
4012
0
    } else {
4013
0
      V = Builder.createZExtOrTrunc(V, TargetType);
4014
0
    }
4015
0
    V = switchEndian(V);
4016
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4017
0
    if (MemoryOffset != 0) {
4018
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4019
0
    }
4020
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4021
0
    auto VPtr = Builder.createInBoundsGEP1(
4022
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4023
0
        Offset);
4024
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4025
0
    auto Store = Builder.createStore(V, Ptr, true);
4026
0
    Store.setAlignment(1 << Alignment);
4027
0
    Store.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
4028
0
  }
4029
4030
  void compileAtomicRMWOp(unsigned MemoryIndex, unsigned MemoryOffset,
4031
                          [[maybe_unused]] unsigned Alignment,
4032
                          LLVMAtomicRMWBinOp BinOp, LLVM::Type IntType,
4033
0
                          LLVM::Type TargetType, bool Signed = false) noexcept {
4034
0
    auto Value = Builder.createSExtOrTrunc(stackPop(), TargetType);
4035
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4036
0
    if (MemoryOffset != 0) {
4037
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4038
0
    }
4039
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4040
0
    auto VPtr = Builder.createInBoundsGEP1(
4041
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4042
0
        Offset);
4043
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4044
4045
0
    LLVM::Value Ret;
4046
    if constexpr (Endian::native == Endian::big) {
4047
      if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpAdd ||
4048
          BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpSub) {
4049
        auto AtomicBB = LLVM::BasicBlock::create(LLContext, F.Fn, "atomic.rmw");
4050
        auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "atomic.rmw.ok");
4051
        Builder.createBr(AtomicBB);
4052
        Builder.positionAtEnd(AtomicBB);
4053
4054
        auto Load = Builder.createLoad(TargetType, Ptr, true);
4055
        Load.setOrdering(LLVMAtomicOrderingMonotonic);
4056
        Load.setAlignment(1 << Alignment);
4057
4058
        LLVM::Value New;
4059
        if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpAdd)
4060
          New = Builder.createAdd(switchEndian(Load), Value);
4061
        else if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpSub) {
4062
          New = Builder.createSub(switchEndian(Load), Value);
4063
        } else {
4064
          assumingUnreachable();
4065
        }
4066
        New = switchEndian(New);
4067
4068
        auto Exchange = Builder.createAtomicCmpXchg(
4069
            Ptr, Load, New, LLVMAtomicOrderingSequentiallyConsistent,
4070
            LLVMAtomicOrderingSequentiallyConsistent);
4071
4072
        Ret = Builder.createExtractValue(Exchange, 0);
4073
        auto Success = Builder.createExtractValue(Exchange, 1);
4074
        Builder.createCondBr(Success, OkBB, AtomicBB);
4075
        Builder.positionAtEnd(OkBB);
4076
      } else {
4077
        Ret = Builder.createAtomicRMW(BinOp, Ptr, switchEndian(Value),
4078
                                      LLVMAtomicOrderingSequentiallyConsistent);
4079
      }
4080
0
    } else {
4081
0
      Ret = Builder.createAtomicRMW(BinOp, Ptr, switchEndian(Value),
4082
0
                                    LLVMAtomicOrderingSequentiallyConsistent);
4083
0
    }
4084
0
    Ret = switchEndian(Ret);
4085
#if LLVM_VERSION_MAJOR >= 13
4086
    Ret.setAlignment(1 << Alignment);
4087
#endif
4088
0
    if (Signed) {
4089
0
      Stack.back() = Builder.createSExt(Ret, IntType);
4090
0
    } else {
4091
0
      Stack.back() = Builder.createZExt(Ret, IntType);
4092
0
    }
4093
0
  }
4094
  void compileAtomicCompareExchange(unsigned MemoryIndex, unsigned MemoryOffset,
4095
                                    [[maybe_unused]] unsigned Alignment,
4096
                                    LLVM::Type IntType, LLVM::Type TargetType,
4097
0
                                    bool Signed = false) noexcept {
4098
4099
0
    auto Replacement = Builder.createSExtOrTrunc(stackPop(), TargetType);
4100
0
    auto Expected = Builder.createSExtOrTrunc(stackPop(), TargetType);
4101
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4102
0
    if (MemoryOffset != 0) {
4103
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4104
0
    }
4105
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4106
0
    auto VPtr = Builder.createInBoundsGEP1(
4107
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4108
0
        Offset);
4109
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4110
4111
0
    auto Ret = Builder.createAtomicCmpXchg(
4112
0
        Ptr, switchEndian(Expected), switchEndian(Replacement),
4113
0
        LLVMAtomicOrderingSequentiallyConsistent,
4114
0
        LLVMAtomicOrderingSequentiallyConsistent);
4115
#if LLVM_VERSION_MAJOR >= 13
4116
    Ret.setAlignment(1 << Alignment);
4117
#endif
4118
0
    auto OldVal = Builder.createExtractValue(Ret, 0);
4119
0
    OldVal = switchEndian(OldVal);
4120
0
    if (Signed) {
4121
0
      Stack.back() = Builder.createSExt(OldVal, IntType);
4122
0
    } else {
4123
0
      Stack.back() = Builder.createZExt(OldVal, IntType);
4124
0
    }
4125
0
  }
4126
4127
12.1k
  void compileReturn() noexcept {
4128
12.1k
    updateInstrCount();
4129
12.1k
    updateGas();
4130
12.1k
    auto Ty = F.Ty.getReturnType();
4131
12.1k
    if (Ty.isVoidTy()) {
4132
2.21k
      Builder.createRetVoid();
4133
9.93k
    } else if (Ty.isStructTy()) {
4134
378
      const auto Count = Ty.getStructNumElements();
4135
378
      std::vector<LLVM::Value> Ret(Count);
4136
1.39k
      for (unsigned I = 0; I < Count; ++I) {
4137
1.01k
        const unsigned J = Count - 1 - I;
4138
1.01k
        Ret[J] = stackPop();
4139
1.01k
      }
4140
378
      Builder.createAggregateRet(Ret);
4141
9.55k
    } else {
4142
9.55k
      Builder.createRet(stackPop());
4143
9.55k
    }
4144
12.1k
  }
4145
4146
20.3k
  void updateInstrCount() noexcept {
4147
20.3k
    if (LocalInstrCount) {
4148
0
      auto Store [[maybe_unused]] = Builder.createAtomicRMW(
4149
0
          LLVMAtomicRMWBinOpAdd, Context.getInstrCount(Builder, ExecCtx),
4150
0
          Builder.createLoad(Context.Int64Ty, LocalInstrCount),
4151
0
          LLVMAtomicOrderingMonotonic);
4152
#if LLVM_VERSION_MAJOR >= 13
4153
      Store.setAlignment(8);
4154
#endif
4155
0
      Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
4156
0
    }
4157
20.3k
  }
4158
4159
23.0k
  void updateGas() noexcept {
4160
23.0k
    if (LocalGas) {
4161
0
      auto CurrBB = Builder.getInsertBlock();
4162
0
      auto CheckBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_check");
4163
0
      auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_ok");
4164
0
      auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_end");
4165
4166
0
      auto Cost = Builder.createLoad(Context.Int64Ty, LocalGas);
4167
0
      Cost.setAlignment(64);
4168
0
      auto GasPtr = Context.getGas(Builder, ExecCtx);
4169
0
      auto GasLimit = Context.getGasLimit(Builder, ExecCtx);
4170
0
      auto Gas = Builder.createLoad(Context.Int64Ty, GasPtr);
4171
0
      Gas.setAlignment(64);
4172
0
      Gas.setOrdering(LLVMAtomicOrderingMonotonic);
4173
0
      Builder.createBr(CheckBB);
4174
0
      Builder.positionAtEnd(CheckBB);
4175
4176
0
      auto PHIOldGas = Builder.createPHI(Context.Int64Ty);
4177
0
      auto NewGas = Builder.createAdd(PHIOldGas, Cost);
4178
0
      auto IsGasRemain =
4179
0
          Builder.createLikely(Builder.createICmpULE(NewGas, GasLimit));
4180
0
      Builder.createCondBr(IsGasRemain, OkBB,
4181
0
                           getTrapBB(ErrCode::Value::CostLimitExceeded));
4182
0
      Builder.positionAtEnd(OkBB);
4183
4184
0
      auto RGasAndSucceed = Builder.createAtomicCmpXchg(
4185
0
          GasPtr, PHIOldGas, NewGas, LLVMAtomicOrderingMonotonic,
4186
0
          LLVMAtomicOrderingMonotonic);
4187
#if LLVM_VERSION_MAJOR >= 13
4188
      RGasAndSucceed.setAlignment(8);
4189
#endif
4190
0
      RGasAndSucceed.setWeak(true);
4191
0
      auto RGas = Builder.createExtractValue(RGasAndSucceed, 0);
4192
0
      auto Succeed = Builder.createExtractValue(RGasAndSucceed, 1);
4193
0
      Builder.createCondBr(Builder.createLikely(Succeed), EndBB, CheckBB);
4194
0
      Builder.positionAtEnd(EndBB);
4195
4196
0
      Builder.createStore(LLContext.getInt64(0), LocalGas);
4197
4198
0
      PHIOldGas.addIncoming(Gas, CurrBB);
4199
0
      PHIOldGas.addIncoming(RGas, OkBB);
4200
0
    }
4201
23.0k
  }
4202
4203
3.32k
  void updateGasAtTrap() noexcept {
4204
3.32k
    if (LocalGas) {
4205
0
      auto Update [[maybe_unused]] = Builder.createAtomicRMW(
4206
0
          LLVMAtomicRMWBinOpAdd, Context.getGas(Builder, ExecCtx),
4207
0
          Builder.createLoad(Context.Int64Ty, LocalGas),
4208
0
          LLVMAtomicOrderingMonotonic);
4209
#if LLVM_VERSION_MAJOR >= 13
4210
      Update.setAlignment(8);
4211
#endif
4212
0
    }
4213
3.32k
  }
4214
4215
private:
4216
3.34k
  void compileCallOp(const unsigned int FuncIndex) noexcept {
4217
3.34k
    const auto &FuncType =
4218
3.34k
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4219
3.34k
            ->getFuncType();
4220
3.34k
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4221
3.34k
    const auto &ParamTypes = FuncType.getParamTypes();
4222
4223
3.34k
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4224
3.34k
    Args[0] = F.Fn.getFirstParam();
4225
4.21k
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4226
861
      const size_t J = ParamTypes.size() - 1 - I;
4227
861
      Args[J + 1] = stackPop();
4228
861
    }
4229
4230
3.34k
    auto Ret = Builder.createCall(Function, Args);
4231
3.34k
    auto Ty = Ret.getType();
4232
3.34k
    if (Ty.isVoidTy()) {
4233
      // nothing to do
4234
1.88k
    } else if (Ty.isStructTy()) {
4235
181
      for (auto Val : unpackStruct(Builder, Ret)) {
4236
181
        stackPush(Val);
4237
181
      }
4238
1.38k
    } else {
4239
1.38k
      stackPush(Ret);
4240
1.38k
    }
4241
3.34k
  }
4242
4243
  void compileIndirectCallOp(const uint32_t TableIndex,
4244
1.21k
                             const uint32_t FuncTypeIndex) noexcept {
4245
1.21k
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4246
1.21k
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4247
1.21k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4248
4249
1.21k
    LLVM::Value FuncIndex = stackPop();
4250
1.21k
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4251
1.21k
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4252
1.21k
    auto RTy = FTy.getReturnType();
4253
4254
1.21k
    const size_t ArgSize = FuncType.getParamTypes().size();
4255
1.21k
    const size_t RetSize =
4256
1.21k
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4257
1.21k
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4258
1.21k
    ArgsVec[0] = F.Fn.getFirstParam();
4259
2.18k
    for (size_t I = 0; I < ArgSize; ++I) {
4260
963
      const size_t J = ArgSize - I;
4261
963
      ArgsVec[J] = stackPop();
4262
963
    }
4263
4264
1.21k
    std::vector<LLVM::Value> FPtrRetsVec;
4265
1.21k
    FPtrRetsVec.reserve(RetSize);
4266
1.21k
    {
4267
1.21k
      auto FPtr = Builder.createCall(
4268
1.21k
          Context.getIntrinsic(
4269
1.21k
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4270
1.21k
              LLVM::Type::getFunctionType(
4271
1.21k
                  FTy.getPointerTo(),
4272
1.21k
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4273
1.21k
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4274
1.21k
           FuncIndex});
4275
1.21k
      Builder.createCondBr(
4276
1.21k
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4277
1.21k
          NotNullBB, IsNullBB);
4278
1.21k
      Builder.positionAtEnd(NotNullBB);
4279
4280
1.21k
      auto FPtrRet =
4281
1.21k
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4282
1.21k
      if (RetSize == 0) {
4283
        // nothing to do
4284
873
      } else if (RetSize == 1) {
4285
852
        FPtrRetsVec.push_back(FPtrRet);
4286
852
      } else {
4287
42
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4288
42
          FPtrRetsVec.push_back(Val);
4289
42
        }
4290
21
      }
4291
1.21k
    }
4292
4293
1.21k
    Builder.createBr(EndBB);
4294
1.21k
    Builder.positionAtEnd(IsNullBB);
4295
4296
1.21k
    std::vector<LLVM::Value> RetsVec;
4297
1.21k
    {
4298
1.21k
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4299
1.21k
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4300
1.21k
      Builder.createArrayPtrStore(
4301
1.21k
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4302
1.21k
          kValSize);
4303
4304
1.21k
      Builder.createCall(
4305
1.21k
          Context.getIntrinsic(
4306
1.21k
              Builder, Executable::Intrinsics::kCallIndirect,
4307
1.21k
              LLVM::Type::getFunctionType(Context.VoidTy,
4308
1.21k
                                          {Context.Int32Ty, Context.Int32Ty,
4309
1.21k
                                           Context.Int32Ty, Context.Int8PtrTy,
4310
1.21k
                                           Context.Int8PtrTy},
4311
1.21k
                                          false)),
4312
1.21k
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4313
1.21k
           FuncIndex, Args, Rets});
4314
4315
1.21k
      if (RetSize == 0) {
4316
        // nothing to do
4317
873
      } else if (RetSize == 1) {
4318
852
        RetsVec.push_back(
4319
852
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4320
852
      } else {
4321
21
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4322
21
                                             kValSize);
4323
21
      }
4324
1.21k
      Builder.createBr(EndBB);
4325
1.21k
      Builder.positionAtEnd(EndBB);
4326
1.21k
    }
4327
4328
2.11k
    for (unsigned I = 0; I < RetSize; ++I) {
4329
894
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4330
894
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4331
894
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4332
894
      stackPush(PHIRet);
4333
894
    }
4334
1.21k
  }
4335
4336
78
  void compileReturnCallOp(const unsigned int FuncIndex) noexcept {
4337
78
    const auto &FuncType =
4338
78
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4339
78
            ->getFuncType();
4340
78
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4341
78
    const auto &ParamTypes = FuncType.getParamTypes();
4342
4343
78
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4344
78
    Args[0] = F.Fn.getFirstParam();
4345
149
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4346
71
      const size_t J = ParamTypes.size() - 1 - I;
4347
71
      Args[J + 1] = stackPop();
4348
71
    }
4349
4350
78
    auto Ret = Builder.createCall(Function, Args);
4351
78
    auto Ty = Ret.getType();
4352
78
    if (Ty.isVoidTy()) {
4353
11
      Builder.createRetVoid();
4354
67
    } else {
4355
67
      Builder.createRet(Ret);
4356
67
    }
4357
78
  }
4358
4359
  void compileReturnIndirectCallOp(const uint32_t TableIndex,
4360
209
                                   const uint32_t FuncTypeIndex) noexcept {
4361
209
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4362
209
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4363
4364
209
    LLVM::Value FuncIndex = stackPop();
4365
209
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4366
209
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4367
209
    auto RTy = FTy.getReturnType();
4368
4369
209
    const size_t ArgSize = FuncType.getParamTypes().size();
4370
209
    const size_t RetSize =
4371
209
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4372
209
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4373
209
    ArgsVec[0] = F.Fn.getFirstParam();
4374
402
    for (size_t I = 0; I < ArgSize; ++I) {
4375
193
      const size_t J = ArgSize - I;
4376
193
      ArgsVec[J] = stackPop();
4377
193
    }
4378
4379
209
    {
4380
209
      auto FPtr = Builder.createCall(
4381
209
          Context.getIntrinsic(
4382
209
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4383
209
              LLVM::Type::getFunctionType(
4384
209
                  FTy.getPointerTo(),
4385
209
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4386
209
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4387
209
           FuncIndex});
4388
209
      Builder.createCondBr(
4389
209
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4390
209
          NotNullBB, IsNullBB);
4391
209
      Builder.positionAtEnd(NotNullBB);
4392
4393
209
      auto FPtrRet =
4394
209
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4395
209
      if (RetSize == 0) {
4396
27
        Builder.createRetVoid();
4397
182
      } else {
4398
182
        Builder.createRet(FPtrRet);
4399
182
      }
4400
209
    }
4401
4402
209
    Builder.positionAtEnd(IsNullBB);
4403
4404
209
    {
4405
209
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4406
209
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4407
209
      Builder.createArrayPtrStore(
4408
209
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4409
209
          kValSize);
4410
4411
209
      Builder.createCall(
4412
209
          Context.getIntrinsic(
4413
209
              Builder, Executable::Intrinsics::kCallIndirect,
4414
209
              LLVM::Type::getFunctionType(Context.VoidTy,
4415
209
                                          {Context.Int32Ty, Context.Int32Ty,
4416
209
                                           Context.Int32Ty, Context.Int8PtrTy,
4417
209
                                           Context.Int8PtrTy},
4418
209
                                          false)),
4419
209
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4420
209
           FuncIndex, Args, Rets});
4421
4422
209
      if (RetSize == 0) {
4423
27
        Builder.createRetVoid();
4424
182
      } else if (RetSize == 1) {
4425
169
        Builder.createRet(
4426
169
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4427
169
      } else {
4428
13
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4429
13
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4430
13
      }
4431
209
    }
4432
209
  }
4433
4434
15
  void compileCallRefOp(const unsigned int TypeIndex) noexcept {
4435
15
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4436
15
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4437
15
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4438
4439
15
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4440
15
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4441
15
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4442
15
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4443
15
        LLContext.getInt64(0)));
4444
15
    Builder.createCondBr(IsRefNotNull, OkBB,
4445
15
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4446
15
    Builder.positionAtEnd(OkBB);
4447
4448
15
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4449
15
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4450
15
    auto RTy = FTy.getReturnType();
4451
4452
15
    const size_t ArgSize = FuncType.getParamTypes().size();
4453
15
    const size_t RetSize =
4454
15
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4455
15
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4456
15
    ArgsVec[0] = F.Fn.getFirstParam();
4457
16
    for (size_t I = 0; I < ArgSize; ++I) {
4458
1
      const size_t J = ArgSize - I;
4459
1
      ArgsVec[J] = stackPop();
4460
1
    }
4461
4462
15
    std::vector<LLVM::Value> FPtrRetsVec;
4463
15
    FPtrRetsVec.reserve(RetSize);
4464
15
    {
4465
15
      auto FPtr = Builder.createCall(
4466
15
          Context.getIntrinsic(
4467
15
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4468
15
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4469
15
                                          {Context.Int64x2Ty}, false)),
4470
15
          {Ref});
4471
15
      Builder.createCondBr(
4472
15
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4473
15
          NotNullBB, IsNullBB);
4474
15
      Builder.positionAtEnd(NotNullBB);
4475
4476
15
      auto FPtrRet =
4477
15
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4478
15
      if (RetSize == 0) {
4479
        // nothing to do
4480
11
      } else if (RetSize == 1) {
4481
4
        FPtrRetsVec.push_back(FPtrRet);
4482
4
      } else {
4483
0
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4484
0
          FPtrRetsVec.push_back(Val);
4485
0
        }
4486
0
      }
4487
15
    }
4488
4489
15
    Builder.createBr(EndBB);
4490
15
    Builder.positionAtEnd(IsNullBB);
4491
4492
15
    std::vector<LLVM::Value> RetsVec;
4493
15
    {
4494
15
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4495
15
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4496
15
      Builder.createArrayPtrStore(
4497
15
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4498
15
          kValSize);
4499
4500
15
      Builder.createCall(
4501
15
          Context.getIntrinsic(
4502
15
              Builder, Executable::Intrinsics::kCallRef,
4503
15
              LLVM::Type::getFunctionType(
4504
15
                  Context.VoidTy,
4505
15
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4506
15
                  false)),
4507
15
          {Ref, Args, Rets});
4508
4509
15
      if (RetSize == 0) {
4510
        // nothing to do
4511
11
      } else if (RetSize == 1) {
4512
4
        RetsVec.push_back(
4513
4
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4514
4
      } else {
4515
0
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4516
0
                                             kValSize);
4517
0
      }
4518
15
      Builder.createBr(EndBB);
4519
15
      Builder.positionAtEnd(EndBB);
4520
15
    }
4521
4522
19
    for (unsigned I = 0; I < RetSize; ++I) {
4523
4
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4524
4
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4525
4
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4526
4
      stackPush(PHIRet);
4527
4
    }
4528
15
  }
4529
4530
32
  void compileReturnCallRefOp(const unsigned int TypeIndex) noexcept {
4531
32
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4532
32
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4533
4534
32
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4535
32
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4536
32
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4537
32
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4538
32
        LLContext.getInt64(0)));
4539
32
    Builder.createCondBr(IsRefNotNull, OkBB,
4540
32
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4541
32
    Builder.positionAtEnd(OkBB);
4542
4543
32
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4544
32
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4545
32
    auto RTy = FTy.getReturnType();
4546
4547
32
    const size_t ArgSize = FuncType.getParamTypes().size();
4548
32
    const size_t RetSize =
4549
32
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4550
32
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4551
32
    ArgsVec[0] = F.Fn.getFirstParam();
4552
33
    for (size_t I = 0; I < ArgSize; ++I) {
4553
1
      const size_t J = ArgSize - I;
4554
1
      ArgsVec[J] = stackPop();
4555
1
    }
4556
4557
32
    {
4558
32
      auto FPtr = Builder.createCall(
4559
32
          Context.getIntrinsic(
4560
32
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4561
32
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4562
32
                                          {Context.Int64x2Ty}, false)),
4563
32
          {Ref});
4564
32
      Builder.createCondBr(
4565
32
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4566
32
          NotNullBB, IsNullBB);
4567
32
      Builder.positionAtEnd(NotNullBB);
4568
4569
32
      auto FPtrRet =
4570
32
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4571
32
      if (RetSize == 0) {
4572
30
        Builder.createRetVoid();
4573
30
      } else {
4574
2
        Builder.createRet(FPtrRet);
4575
2
      }
4576
32
    }
4577
4578
32
    Builder.positionAtEnd(IsNullBB);
4579
4580
32
    {
4581
32
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4582
32
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4583
32
      Builder.createArrayPtrStore(
4584
32
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4585
32
          kValSize);
4586
4587
32
      Builder.createCall(
4588
32
          Context.getIntrinsic(
4589
32
              Builder, Executable::Intrinsics::kCallRef,
4590
32
              LLVM::Type::getFunctionType(
4591
32
                  Context.VoidTy,
4592
32
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4593
32
                  false)),
4594
32
          {Ref, Args, Rets});
4595
4596
32
      if (RetSize == 0) {
4597
30
        Builder.createRetVoid();
4598
30
      } else if (RetSize == 1) {
4599
2
        Builder.createRet(
4600
2
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4601
2
      } else {
4602
0
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4603
0
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4604
0
      }
4605
32
    }
4606
32
  }
4607
4608
  void compileLoadOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4609
19.3k
                     LLVM::Type LoadTy) noexcept {
4610
19.3k
    if constexpr (kForceUnalignment) {
4611
19.3k
      Alignment = 0;
4612
19.3k
    }
4613
19.3k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4614
19.3k
    if (Offset != 0) {
4615
12.5k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4616
12.5k
    }
4617
4618
19.3k
    auto VPtr = Builder.createInBoundsGEP1(
4619
19.3k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4620
19.3k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4621
19.3k
    auto LoadInst = Builder.createLoad(LoadTy, Ptr, true);
4622
19.3k
    LoadInst.setAlignment(1 << Alignment);
4623
19.3k
    stackPush(switchEndian(LoadInst));
4624
19.3k
  }
4625
  void compileLoadOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4626
                     LLVM::Type LoadTy, LLVM::Type ExtendTy,
4627
7.99k
                     bool Signed) noexcept {
4628
7.99k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4629
7.99k
    if (Signed) {
4630
3.49k
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4631
4.50k
    } else {
4632
4.50k
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4633
4.50k
    }
4634
7.99k
  }
4635
  void compileVectorLoadOp(unsigned MemoryIndex, unsigned Offset,
4636
5.20k
                           unsigned Alignment, LLVM::Type LoadTy) noexcept {
4637
5.20k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4638
5.20k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4639
5.20k
  }
4640
  void compileVectorLoadOp(unsigned MemoryIndex, unsigned Offset,
4641
                           unsigned Alignment, LLVM::Type LoadTy,
4642
1.73k
                           LLVM::Type ExtendTy, bool Signed) noexcept {
4643
1.73k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy, ExtendTy, Signed);
4644
1.73k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4645
1.73k
  }
4646
  void compileSplatLoadOp(unsigned MemoryIndex, unsigned Offset,
4647
                          unsigned Alignment, LLVM::Type LoadTy,
4648
679
                          LLVM::Type VectorTy) noexcept {
4649
679
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4650
679
    compileSplatOp(VectorTy);
4651
679
  }
4652
  void compileLoadLaneOp(unsigned MemoryIndex, unsigned Offset,
4653
                         unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4654
511
                         LLVM::Type VectorTy) noexcept {
4655
511
    auto Vector = stackPop();
4656
511
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4657
    if constexpr (Endian::native == Endian::big) {
4658
      Index = VectorTy.getVectorSize() - 1 - Index;
4659
    }
4660
511
    auto Value = Stack.back();
4661
511
    Stack.back() = Builder.createBitCast(
4662
511
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4663
511
                                    Value, LLContext.getInt64(Index)),
4664
511
        Context.Int64x2Ty);
4665
511
  }
4666
  void compileStoreOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4667
                      LLVM::Type LoadTy, bool Trunc = false,
4668
3.32k
                      bool BitCast = false) noexcept {
4669
3.32k
    if constexpr (kForceUnalignment) {
4670
3.32k
      Alignment = 0;
4671
3.32k
    }
4672
3.32k
    auto V = stackPop();
4673
3.32k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4674
3.32k
    if (Offset != 0) {
4675
2.48k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4676
2.48k
    }
4677
4678
3.32k
    if (Trunc) {
4679
700
      V = Builder.createTrunc(V, LoadTy);
4680
700
    }
4681
3.32k
    if (BitCast) {
4682
255
      V = Builder.createBitCast(V, LoadTy);
4683
255
    }
4684
3.32k
    V = switchEndian(V);
4685
3.32k
    auto VPtr = Builder.createInBoundsGEP1(
4686
3.32k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4687
3.32k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4688
3.32k
    auto StoreInst = Builder.createStore(V, Ptr, true);
4689
3.32k
    StoreInst.setAlignment(1 << Alignment);
4690
3.32k
  }
4691
  void compileStoreLaneOp(unsigned MemoryIndex, unsigned Offset,
4692
                          unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4693
370
                          LLVM::Type VectorTy) noexcept {
4694
370
    auto Vector = Stack.back();
4695
    if constexpr (Endian::native == Endian::big) {
4696
      Index = VectorTy.getVectorSize() - Index - 1;
4697
    }
4698
370
    Stack.back() = Builder.createExtractElement(
4699
370
        Builder.createBitCast(Vector, VectorTy), LLContext.getInt64(Index));
4700
370
    compileStoreOp(MemoryIndex, Offset, Alignment, LoadTy);
4701
370
  }
4702
52.8k
  void compileSplatOp(LLVM::Type VectorTy) noexcept {
4703
52.8k
    auto Undef = LLVM::Value::getUndef(VectorTy);
4704
52.8k
    auto Zeros = LLVM::Value::getConstNull(
4705
52.8k
        LLVM::Type::getVectorType(Context.Int32Ty, VectorTy.getVectorSize()));
4706
52.8k
    auto Value = Builder.createTrunc(Stack.back(), VectorTy.getElementType());
4707
52.8k
    auto Vector =
4708
52.8k
        Builder.createInsertElement(Undef, Value, LLContext.getInt64(0));
4709
52.8k
    Vector = Builder.createShuffleVector(Vector, Undef, Zeros);
4710
4711
52.8k
    Stack.back() = Builder.createBitCast(Vector, Context.Int64x2Ty);
4712
52.8k
  }
4713
1.33k
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4714
1.33k
    auto Vector = Builder.createBitCast(Stack.back(), VectorTy);
4715
    if constexpr (Endian::native == Endian::big) {
4716
      Index = VectorTy.getVectorSize() - Index - 1;
4717
    }
4718
1.33k
    Stack.back() =
4719
1.33k
        Builder.createExtractElement(Vector, LLContext.getInt64(Index));
4720
1.33k
  }
4721
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index,
4722
993
                            LLVM::Type ExtendTy, bool Signed) noexcept {
4723
993
    compileExtractLaneOp(VectorTy, Index);
4724
993
    if (Signed) {
4725
507
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4726
507
    } else {
4727
486
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4728
486
    }
4729
993
  }
4730
924
  void compileReplaceLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4731
924
    auto Value = Builder.createTrunc(stackPop(), VectorTy.getElementType());
4732
924
    auto Vector = Stack.back();
4733
    if constexpr (Endian::native == Endian::big) {
4734
      Index = VectorTy.getVectorSize() - Index - 1;
4735
    }
4736
924
    Stack.back() = Builder.createBitCast(
4737
924
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4738
924
                                    Value, LLContext.getInt64(Index)),
4739
924
        Context.Int64x2Ty);
4740
924
  }
4741
  void compileVectorCompareOp(LLVM::Type VectorTy,
4742
5.86k
                              LLVMIntPredicate Predicate) noexcept {
4743
5.86k
    auto RHS = stackPop();
4744
5.86k
    auto LHS = stackPop();
4745
5.86k
    auto Result = Builder.createSExt(
4746
5.86k
        Builder.createICmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4747
5.86k
                           Builder.createBitCast(RHS, VectorTy)),
4748
5.86k
        VectorTy);
4749
5.86k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4750
5.86k
  }
4751
  void compileVectorCompareOp(LLVM::Type VectorTy, LLVMRealPredicate Predicate,
4752
3.50k
                              LLVM::Type ResultTy) noexcept {
4753
3.50k
    auto RHS = stackPop();
4754
3.50k
    auto LHS = stackPop();
4755
3.50k
    auto Result = Builder.createSExt(
4756
3.50k
        Builder.createFCmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4757
3.50k
                           Builder.createBitCast(RHS, VectorTy)),
4758
3.50k
        ResultTy);
4759
3.50k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4760
3.50k
  }
4761
  template <typename Func>
4762
25.6k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
25.6k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
25.6k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
25.6k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.27k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.27k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.27k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.27k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.52k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.52k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.52k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.52k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
145
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
145
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
145
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
145
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.44k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.44k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.44k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.44k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
550
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
550
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
550
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
550
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
884
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
884
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
884
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
884
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
334
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
334
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
334
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
334
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
1.33k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
1.33k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
1.33k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
1.33k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.28k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.28k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.28k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.28k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
1.76k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
1.76k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
1.76k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
1.76k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
362
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
362
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
362
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
362
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
982
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
982
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
982
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
982
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
5.82k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
5.82k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
5.82k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
5.82k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
685
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
685
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
685
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
685
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
2.00k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
2.00k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
2.00k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
2.00k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
591
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
591
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
591
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
591
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4762
625
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4763
625
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4764
625
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4765
625
  }
4766
2.27k
  void compileVectorAbs(LLVM::Type VectorTy) noexcept {
4767
2.27k
    compileVectorOp(VectorTy, [this, VectorTy](auto V) noexcept {
4768
2.27k
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4769
2.27k
      auto C = Builder.createICmpSLT(V, Zero);
4770
2.27k
      return Builder.createSelect(C, Builder.createNeg(V), V);
4771
2.27k
    });
4772
2.27k
  }
4773
2.52k
  void compileVectorNeg(LLVM::Type VectorTy) noexcept {
4774
2.52k
    compileVectorOp(VectorTy,
4775
2.52k
                    [this](auto V) noexcept { return Builder.createNeg(V); });
4776
2.52k
  }
4777
145
  void compileVectorPopcnt() noexcept {
4778
145
    compileVectorOp(Context.Int8x16Ty, [this](auto V) noexcept {
4779
145
      assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
4780
145
      return Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, V);
4781
145
    });
4782
145
  }
4783
  template <typename Func>
4784
2.21k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4785
2.21k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4786
2.21k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4787
2.21k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}&&)
Line
Count
Source
4784
108
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4785
108
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4786
108
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4787
108
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4784
965
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4785
965
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4786
965
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4787
965
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4784
1.14k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4785
1.14k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4786
1.14k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4787
1.14k
  }
4788
108
  void compileVectorAnyTrue() noexcept {
4789
108
    compileVectorReduceIOp(Context.Int128x1Ty, [this](auto V) noexcept {
4790
108
      auto Zero = LLVM::Value::getConstNull(Context.Int128x1Ty);
4791
108
      return Builder.createBitCast(Builder.createICmpNE(V, Zero),
4792
108
                                   LLContext.getInt1Ty());
4793
108
    });
4794
108
  }
4795
965
  void compileVectorAllTrue(LLVM::Type VectorTy) noexcept {
4796
965
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4797
965
      const auto Size = VectorTy.getVectorSize();
4798
965
      auto IntType = LLContext.getIntNTy(Size);
4799
965
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4800
965
      auto Cmp = Builder.createBitCast(Builder.createICmpEQ(V, Zero), IntType);
4801
965
      auto CmpZero = LLVM::Value::getConstInt(IntType, 0);
4802
965
      return Builder.createICmpEQ(Cmp, CmpZero);
4803
965
    });
4804
965
  }
4805
1.14k
  void compileVectorBitMask(LLVM::Type VectorTy) noexcept {
4806
1.14k
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4807
1.14k
      const auto Size = VectorTy.getVectorSize();
4808
1.14k
      auto IntType = LLContext.getIntNTy(Size);
4809
1.14k
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4810
1.14k
      return Builder.createBitCast(Builder.createICmpSLT(V, Zero), IntType);
4811
1.14k
    });
4812
1.14k
  }
4813
  template <typename Func>
4814
5.28k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4815
5.28k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4816
5.28k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4817
5.28k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4818
5.28k
    auto RHS = Builder.createVectorSplat(
4819
5.28k
        VectorTy.getVectorSize(),
4820
5.28k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4821
5.28k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4822
5.28k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4823
5.28k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4824
5.28k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4814
1.94k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4815
1.94k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4816
1.94k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4817
1.94k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4818
1.94k
    auto RHS = Builder.createVectorSplat(
4819
1.94k
        VectorTy.getVectorSize(),
4820
1.94k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4821
1.94k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4822
1.94k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4823
1.94k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4824
1.94k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4814
2.38k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4815
2.38k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4816
2.38k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4817
2.38k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4818
2.38k
    auto RHS = Builder.createVectorSplat(
4819
2.38k
        VectorTy.getVectorSize(),
4820
2.38k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4821
2.38k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4822
2.38k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4823
2.38k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4824
2.38k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4814
954
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4815
954
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4816
954
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4817
954
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4818
954
    auto RHS = Builder.createVectorSplat(
4819
954
        VectorTy.getVectorSize(),
4820
954
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4821
954
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4822
954
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4823
954
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4824
954
  }
4825
1.94k
  void compileVectorShl(LLVM::Type VectorTy) noexcept {
4826
1.94k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4827
1.94k
      return Builder.createShl(LHS, RHS);
4828
1.94k
    });
4829
1.94k
  }
4830
954
  void compileVectorLShr(LLVM::Type VectorTy) noexcept {
4831
954
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4832
954
      return Builder.createLShr(LHS, RHS);
4833
954
    });
4834
954
  }
4835
2.38k
  void compileVectorAShr(LLVM::Type VectorTy) noexcept {
4836
2.38k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4837
2.38k
      return Builder.createAShr(LHS, RHS);
4838
2.38k
    });
4839
2.38k
  }
4840
  template <typename Func>
4841
8.19k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
8.19k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
8.19k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
8.19k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
8.19k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
424
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
424
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
424
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
424
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
424
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
1.17k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
1.17k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
1.17k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
1.17k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
1.17k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
837
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
837
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
837
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
837
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
837
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
400
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
400
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
400
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
400
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
400
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
315
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
315
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
315
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
315
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
315
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
336
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
336
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
336
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
336
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
336
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
429
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
429
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
429
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
429
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
429
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
989
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
989
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
989
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
989
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
989
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
287
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
287
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
287
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
287
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
287
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
494
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
494
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
494
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
494
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
494
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
180
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
180
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
180
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
180
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
180
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
181
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
181
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
181
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
181
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
181
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
468
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
468
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
468
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
468
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
468
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
249
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
249
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
249
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
249
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
249
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
213
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
213
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
213
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
213
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
213
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
311
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
311
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
311
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
311
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
311
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
260
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
260
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
260
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
260
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
260
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
315
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
315
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
315
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
315
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
315
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4841
329
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4842
329
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4843
329
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4844
329
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4845
329
  }
4846
424
  void compileVectorVectorAdd(LLVM::Type VectorTy) noexcept {
4847
424
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4848
424
      return Builder.createAdd(LHS, RHS);
4849
424
    });
4850
424
  }
4851
1.17k
  void compileVectorVectorAddSat(LLVM::Type VectorTy, bool Signed) noexcept {
4852
1.17k
    auto ID = Signed ? LLVM::Core::SAddSat : LLVM::Core::UAddSat;
4853
1.17k
    assuming(ID != LLVM::Core::NotIntrinsic);
4854
1.17k
    compileVectorVectorOp(
4855
1.17k
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
4856
1.17k
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
4857
1.17k
        });
4858
1.17k
  }
4859
837
  void compileVectorVectorSub(LLVM::Type VectorTy) noexcept {
4860
837
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4861
837
      return Builder.createSub(LHS, RHS);
4862
837
    });
4863
837
  }
4864
400
  void compileVectorVectorSubSat(LLVM::Type VectorTy, bool Signed) noexcept {
4865
400
    auto ID = Signed ? LLVM::Core::SSubSat : LLVM::Core::USubSat;
4866
400
    assuming(ID != LLVM::Core::NotIntrinsic);
4867
400
    compileVectorVectorOp(
4868
400
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
4869
400
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
4870
400
        });
4871
400
  }
4872
494
  void compileVectorVectorMul(LLVM::Type VectorTy) noexcept {
4873
494
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4874
494
      return Builder.createMul(LHS, RHS);
4875
494
    });
4876
494
  }
4877
93
  void compileVectorSwizzle() noexcept {
4878
93
    auto Index = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
4879
93
    auto Vector = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
4880
4881
93
#if defined(__x86_64__)
4882
93
    if (Context.SupportSSSE3) {
4883
93
      auto Magic = Builder.createVectorSplat(16, LLContext.getInt8(112));
4884
93
      auto Added = Builder.createAdd(Index, Magic);
4885
93
      auto NewIndex = Builder.createSelect(
4886
93
          Builder.createICmpUGT(Index, Added),
4887
93
          LLVM::Value::getConstAllOnes(Context.Int8x16Ty), Added);
4888
93
      assuming(LLVM::Core::X86SSSE3PShufB128 != LLVM::Core::NotIntrinsic);
4889
93
      stackPush(Builder.createBitCast(
4890
93
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PShufB128, {},
4891
93
                                  {Vector, NewIndex}),
4892
93
          Context.Int64x2Ty));
4893
93
      return;
4894
93
    }
4895
0
#endif
4896
4897
#if defined(__aarch64__)
4898
    if (Context.SupportNEON) {
4899
      assuming(LLVM::Core::AArch64NeonTbl1 != LLVM::Core::NotIntrinsic);
4900
      stackPush(Builder.createBitCast(
4901
          Builder.createIntrinsic(LLVM::Core::AArch64NeonTbl1,
4902
                                  {Context.Int8x16Ty}, {Vector, Index}),
4903
          Context.Int64x2Ty));
4904
      return;
4905
    }
4906
#endif
4907
4908
0
    auto Mask = Builder.createVectorSplat(16, LLContext.getInt8(15));
4909
0
    auto Zero = Builder.createVectorSplat(16, LLContext.getInt8(0));
4910
4911
#if defined(__s390x__)
4912
    assuming(LLVM::Core::S390VPerm != LLVM::Core::NotIntrinsic);
4913
    auto Exceed = Builder.createICmpULE(Index, Mask);
4914
    Index = Builder.createSub(Mask, Index);
4915
    auto Result = Builder.createIntrinsic(LLVM::Core::S390VPerm, {},
4916
                                          {Vector, Zero, Index});
4917
    Result = Builder.createSelect(Exceed, Result, Zero);
4918
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4919
    return;
4920
#endif
4921
4922
    // Fallback case.
4923
    // If the SSSE3 is not supported on the x86_64 platform or
4924
    // the NEON is not supported on the aarch64 platform,
4925
    // then fallback to this.
4926
0
    auto IsOver = Builder.createICmpUGT(Index, Mask);
4927
0
    auto InboundIndex = Builder.createAnd(Index, Mask);
4928
0
    auto Array = Builder.createArray(16, 1);
4929
0
    for (size_t I = 0; I < 16; ++I) {
4930
0
      Builder.createStore(
4931
0
          Builder.createExtractElement(Vector, LLContext.getInt64(I)),
4932
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array,
4933
0
                                     LLContext.getInt64(I)));
4934
0
    }
4935
0
    LLVM::Value Ret = LLVM::Value::getUndef(Context.Int8x16Ty);
4936
0
    for (size_t I = 0; I < 16; ++I) {
4937
0
      auto Idx =
4938
0
          Builder.createExtractElement(InboundIndex, LLContext.getInt64(I));
4939
0
      auto Value = Builder.createLoad(
4940
0
          Context.Int8Ty,
4941
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array, Idx));
4942
0
      Ret = Builder.createInsertElement(Ret, Value, LLContext.getInt64(I));
4943
0
    }
4944
0
    Ret = Builder.createSelect(IsOver, Zero, Ret);
4945
0
    stackPush(Builder.createBitCast(Ret, Context.Int64x2Ty));
4946
0
  }
4947
4948
180
  void compileVectorVectorQ15MulSat() noexcept {
4949
180
    compileVectorVectorOp(
4950
180
        Context.Int16x8Ty, [this](auto LHS, auto RHS) noexcept -> LLVM::Value {
4951
180
#if defined(__x86_64__)
4952
180
          if (Context.SupportSSSE3) {
4953
180
            assuming(LLVM::Core::X86SSSE3PMulHrSw128 !=
4954
180
                     LLVM::Core::NotIntrinsic);
4955
180
            auto Result = Builder.createIntrinsic(
4956
180
                LLVM::Core::X86SSSE3PMulHrSw128, {}, {LHS, RHS});
4957
180
            auto IntMaxV = Builder.createVectorSplat(
4958
180
                8, LLContext.getInt16(UINT16_C(0x8000)));
4959
180
            auto NotOver = Builder.createSExt(
4960
180
                Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
4961
180
            return Builder.createXor(Result, NotOver);
4962
180
          }
4963
0
#endif
4964
4965
#if defined(__aarch64__)
4966
          if (Context.SupportNEON) {
4967
            assuming(LLVM::Core::AArch64NeonSQRDMulH !=
4968
                     LLVM::Core::NotIntrinsic);
4969
            return Builder.createBinaryIntrinsic(
4970
                LLVM::Core::AArch64NeonSQRDMulH, LHS, RHS);
4971
          }
4972
#endif
4973
4974
          // Fallback case.
4975
          // If the SSSE3 is not supported on the x86_64 platform or
4976
          // the NEON is not supported on the aarch64 platform,
4977
          // then fallback to this.
4978
0
          auto ExtTy = Context.Int16x8Ty.getExtendedElementVectorType();
4979
0
          auto Offset = Builder.createVectorSplat(
4980
0
              8, LLContext.getInt32(UINT32_C(0x4000)));
4981
0
          auto Shift =
4982
0
              Builder.createVectorSplat(8, LLContext.getInt32(UINT32_C(15)));
4983
0
          auto ExtLHS = Builder.createSExt(LHS, ExtTy);
4984
0
          auto ExtRHS = Builder.createSExt(RHS, ExtTy);
4985
0
          auto Result = Builder.createTrunc(
4986
0
              Builder.createAShr(
4987
0
                  Builder.createAdd(Builder.createMul(ExtLHS, ExtRHS), Offset),
4988
0
                  Shift),
4989
0
              Context.Int16x8Ty);
4990
0
          auto IntMaxV = Builder.createVectorSplat(
4991
0
              8, LLContext.getInt16(UINT16_C(0x8000)));
4992
0
          auto NotOver = Builder.createSExt(
4993
0
              Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
4994
0
          return Builder.createXor(Result, NotOver);
4995
180
        });
4996
180
  }
4997
315
  void compileVectorVectorSMin(LLVM::Type VectorTy) noexcept {
4998
315
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4999
315
      auto C = Builder.createICmpSLE(LHS, RHS);
5000
315
      return Builder.createSelect(C, LHS, RHS);
5001
315
    });
5002
315
  }
5003
336
  void compileVectorVectorUMin(LLVM::Type VectorTy) noexcept {
5004
336
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5005
336
      auto C = Builder.createICmpULE(LHS, RHS);
5006
336
      return Builder.createSelect(C, LHS, RHS);
5007
336
    });
5008
336
  }
5009
429
  void compileVectorVectorSMax(LLVM::Type VectorTy) noexcept {
5010
429
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5011
429
      auto C = Builder.createICmpSGE(LHS, RHS);
5012
429
      return Builder.createSelect(C, LHS, RHS);
5013
429
    });
5014
429
  }
5015
989
  void compileVectorVectorUMax(LLVM::Type VectorTy) noexcept {
5016
989
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5017
989
      auto C = Builder.createICmpUGE(LHS, RHS);
5018
989
      return Builder.createSelect(C, LHS, RHS);
5019
989
    });
5020
989
  }
5021
287
  void compileVectorVectorUAvgr(LLVM::Type VectorTy) noexcept {
5022
287
    auto ExtendTy = VectorTy.getExtendedElementVectorType();
5023
287
    compileVectorVectorOp(
5024
287
        VectorTy,
5025
287
        [this, VectorTy, ExtendTy](auto LHS, auto RHS) noexcept -> LLVM::Value {
5026
287
#if defined(__x86_64__)
5027
287
          if (Context.SupportSSE2) {
5028
287
            const auto ID = [VectorTy]() noexcept {
5029
287
              switch (VectorTy.getElementType().getIntegerBitWidth()) {
5030
121
              case 8:
5031
121
                return LLVM::Core::X86SSE2PAvgB;
5032
166
              case 16:
5033
166
                return LLVM::Core::X86SSE2PAvgW;
5034
0
              default:
5035
0
                assumingUnreachable();
5036
287
              }
5037
287
            }();
5038
287
            assuming(ID != LLVM::Core::NotIntrinsic);
5039
287
            return Builder.createIntrinsic(ID, {}, {LHS, RHS});
5040
287
          }
5041
0
#endif
5042
5043
#if defined(__aarch64__)
5044
          if (Context.SupportNEON) {
5045
            assuming(LLVM::Core::AArch64NeonURHAdd != LLVM::Core::NotIntrinsic);
5046
            return Builder.createBinaryIntrinsic(LLVM::Core::AArch64NeonURHAdd,
5047
                                                 LHS, RHS);
5048
          }
5049
#endif
5050
5051
          // Fallback case.
5052
          // If the SSE2 is not supported on the x86_64 platform or
5053
          // the NEON is not supported on the aarch64 platform,
5054
          // then fallback to this.
5055
0
          auto EL = Builder.createZExt(LHS, ExtendTy);
5056
0
          auto ER = Builder.createZExt(RHS, ExtendTy);
5057
0
          auto One = Builder.createZExt(
5058
0
              Builder.createVectorSplat(ExtendTy.getVectorSize(),
5059
0
                                        LLContext.getTrue()),
5060
0
              ExtendTy);
5061
0
          return Builder.createTrunc(
5062
0
              Builder.createLShr(
5063
0
                  Builder.createAdd(Builder.createAdd(EL, ER), One), One),
5064
0
              VectorTy);
5065
287
        });
5066
287
  }
5067
723
  void compileVectorNarrow(LLVM::Type FromTy, bool Signed) noexcept {
5068
723
    auto [MinInt,
5069
723
          MaxInt] = [&]() noexcept -> std::tuple<LLVM::Value, LLVM::Value> {
5070
723
      switch (FromTy.getElementType().getIntegerBitWidth()) {
5071
290
      case 16: {
5072
290
        const auto Min =
5073
290
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::min()
5074
290
                                        : std::numeric_limits<uint8_t>::min());
5075
290
        const auto Max =
5076
290
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::max()
5077
290
                                        : std::numeric_limits<uint8_t>::max());
5078
290
        return {LLContext.getInt16(static_cast<uint16_t>(Min)),
5079
290
                LLContext.getInt16(static_cast<uint16_t>(Max))};
5080
0
      }
5081
433
      case 32: {
5082
433
        const auto Min =
5083
433
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::min()
5084
433
                                        : std::numeric_limits<uint16_t>::min());
5085
433
        const auto Max =
5086
433
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::max()
5087
433
                                        : std::numeric_limits<uint16_t>::max());
5088
433
        return {LLContext.getInt32(static_cast<uint32_t>(Min)),
5089
433
                LLContext.getInt32(static_cast<uint32_t>(Max))};
5090
0
      }
5091
0
      default:
5092
0
        assumingUnreachable();
5093
723
      }
5094
723
    }();
5095
723
    const auto Count = FromTy.getVectorSize();
5096
723
    auto VMin = Builder.createVectorSplat(Count, MinInt);
5097
723
    auto VMax = Builder.createVectorSplat(Count, MaxInt);
5098
5099
723
    auto TruncTy = FromTy.getTruncatedElementVectorType();
5100
5101
723
    auto F2 = Builder.createBitCast(stackPop(), FromTy);
5102
723
    F2 = Builder.createSelect(Builder.createICmpSLT(F2, VMin), VMin, F2);
5103
723
    F2 = Builder.createSelect(Builder.createICmpSGT(F2, VMax), VMax, F2);
5104
723
    F2 = Builder.createTrunc(F2, TruncTy);
5105
5106
723
    auto F1 = Builder.createBitCast(stackPop(), FromTy);
5107
723
    F1 = Builder.createSelect(Builder.createICmpSLT(F1, VMin), VMin, F1);
5108
723
    F1 = Builder.createSelect(Builder.createICmpSGT(F1, VMax), VMax, F1);
5109
723
    F1 = Builder.createTrunc(F1, TruncTy);
5110
5111
723
    std::vector<uint32_t> Mask(Count * 2);
5112
723
    std::iota(Mask.begin(), Mask.end(), 0);
5113
723
    auto V = Endian::native == Endian::little
5114
723
                 ? Builder.createShuffleVector(
5115
723
                       F1, F2, LLVM::Value::getConstVector32(LLContext, Mask))
5116
723
                 : Builder.createShuffleVector(
5117
0
                       F2, F1, LLVM::Value::getConstVector32(LLContext, Mask));
5118
723
    stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
5119
723
  }
5120
6.08k
  void compileVectorExtend(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5121
6.08k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5122
6.08k
    const auto Count = FromTy.getVectorSize();
5123
6.08k
    std::vector<uint32_t> Mask(Count / 2);
5124
    if constexpr (Endian::native == Endian::big) {
5125
      Low = !Low;
5126
    }
5127
6.08k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5128
6.08k
    auto R = Builder.createBitCast(Stack.back(), FromTy);
5129
6.08k
    if (Signed) {
5130
2.83k
      R = Builder.createSExt(R, ExtTy);
5131
3.25k
    } else {
5132
3.25k
      R = Builder.createZExt(R, ExtTy);
5133
3.25k
    }
5134
6.08k
    R = Builder.createShuffleVector(
5135
6.08k
        R, LLVM::Value::getUndef(ExtTy),
5136
6.08k
        LLVM::Value::getConstVector32(LLContext, Mask));
5137
6.08k
    Stack.back() = Builder.createBitCast(R, Context.Int64x2Ty);
5138
6.08k
  }
5139
2.06k
  void compileVectorExtMul(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5140
2.06k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5141
2.06k
    const auto Count = FromTy.getVectorSize();
5142
2.06k
    std::vector<uint32_t> Mask(Count / 2);
5143
2.06k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5144
4.13k
    auto Extend = [this, FromTy, Signed, ExtTy, &Mask](LLVM::Value R) noexcept {
5145
4.13k
      R = Builder.createBitCast(R, FromTy);
5146
4.13k
      if (Signed) {
5147
1.75k
        R = Builder.createSExt(R, ExtTy);
5148
2.37k
      } else {
5149
2.37k
        R = Builder.createZExt(R, ExtTy);
5150
2.37k
      }
5151
4.13k
      return Builder.createShuffleVector(
5152
4.13k
          R, LLVM::Value::getUndef(ExtTy),
5153
4.13k
          LLVM::Value::getConstVector32(LLContext, Mask));
5154
4.13k
    };
5155
2.06k
    auto RHS = Extend(stackPop());
5156
2.06k
    auto LHS = Extend(stackPop());
5157
2.06k
    stackPush(
5158
2.06k
        Builder.createBitCast(Builder.createMul(RHS, LHS), Context.Int64x2Ty));
5159
2.06k
  }
5160
2.44k
  void compileVectorExtAddPairwise(LLVM::Type VectorTy, bool Signed) noexcept {
5161
2.44k
    compileVectorOp(
5162
2.44k
        VectorTy, [this, VectorTy, Signed](auto V) noexcept -> LLVM::Value {
5163
2.44k
          auto ExtTy = VectorTy.getExtendedElementVectorType()
5164
2.44k
                           .getHalfElementsVectorType();
5165
2.44k
#if defined(__x86_64__)
5166
2.44k
          const auto Count = VectorTy.getVectorSize();
5167
2.44k
          if (Context.SupportXOP) {
5168
0
            const auto ID = [Count, Signed]() noexcept {
5169
0
              switch (Count) {
5170
0
              case 8:
5171
0
                return Signed ? LLVM::Core::X86XOpVPHAddWD
5172
0
                              : LLVM::Core::X86XOpVPHAddUWD;
5173
0
              case 16:
5174
0
                return Signed ? LLVM::Core::X86XOpVPHAddBW
5175
0
                              : LLVM::Core::X86XOpVPHAddUBW;
5176
0
              default:
5177
0
                assumingUnreachable();
5178
0
              }
5179
0
            }();
5180
0
            assuming(ID != LLVM::Core::NotIntrinsic);
5181
0
            return Builder.createUnaryIntrinsic(ID, V);
5182
0
          }
5183
2.44k
          if (Context.SupportSSSE3 && Count == 16) {
5184
641
            assuming(LLVM::Core::X86SSSE3PMAddUbSw128 !=
5185
641
                     LLVM::Core::NotIntrinsic);
5186
641
            if (Signed) {
5187
314
              return Builder.createIntrinsic(
5188
314
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5189
314
                  {Builder.createVectorSplat(16, LLContext.getInt8(1)), V});
5190
327
            } else {
5191
327
              return Builder.createIntrinsic(
5192
327
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5193
327
                  {V, Builder.createVectorSplat(16, LLContext.getInt8(1))});
5194
327
            }
5195
641
          }
5196
1.80k
          if (Context.SupportSSE2 && Count == 8) {
5197
1.80k
            assuming(LLVM::Core::X86SSE2PMAddWd != LLVM::Core::NotIntrinsic);
5198
1.80k
            if (Signed) {
5199
1.15k
              return Builder.createIntrinsic(
5200
1.15k
                  LLVM::Core::X86SSE2PMAddWd, {},
5201
1.15k
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5202
1.15k
            } else {
5203
652
              V = Builder.createXor(
5204
652
                  V, Builder.createVectorSplat(8, LLContext.getInt16(0x8000)));
5205
652
              V = Builder.createIntrinsic(
5206
652
                  LLVM::Core::X86SSE2PMAddWd, {},
5207
652
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5208
652
              return Builder.createAdd(
5209
652
                  V, Builder.createVectorSplat(4, LLContext.getInt32(0x10000)));
5210
652
            }
5211
1.80k
          }
5212
0
#endif
5213
5214
#if defined(__aarch64__)
5215
          if (Context.SupportNEON) {
5216
            const auto ID = Signed ? LLVM::Core::AArch64NeonSAddLP
5217
                                   : LLVM::Core::AArch64NeonUAddLP;
5218
            assuming(ID != LLVM::Core::NotIntrinsic);
5219
            return Builder.createIntrinsic(ID, {ExtTy, VectorTy}, {V});
5220
          }
5221
#endif
5222
5223
          // Fallback case.
5224
          // If the XOP, SSSE3, or SSE2 is not supported on the x86_64 platform
5225
          // or the NEON is not supported on the aarch64 platform,
5226
          // then fallback to this.
5227
0
          auto Width = LLVM::Value::getConstInt(
5228
0
              ExtTy.getElementType(),
5229
0
              VectorTy.getElementType().getIntegerBitWidth());
5230
0
          Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5231
0
          auto EV = Builder.createBitCast(V, ExtTy);
5232
0
          LLVM::Value L, R;
5233
0
          if (Signed) {
5234
0
            L = Builder.createAShr(EV, Width);
5235
0
            R = Builder.createAShr(Builder.createShl(EV, Width), Width);
5236
0
          } else {
5237
0
            L = Builder.createLShr(EV, Width);
5238
0
            R = Builder.createLShr(Builder.createShl(EV, Width), Width);
5239
0
          }
5240
0
          return Builder.createAdd(L, R);
5241
1.80k
        });
5242
2.44k
  }
5243
550
  void compileVectorFAbs(LLVM::Type VectorTy) noexcept {
5244
550
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5245
550
      assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
5246
550
      return Builder.createUnaryIntrinsic(LLVM::Core::Fabs, V);
5247
550
    });
5248
550
  }
5249
884
  void compileVectorFNeg(LLVM::Type VectorTy) noexcept {
5250
884
    compileVectorOp(VectorTy,
5251
884
                    [this](auto V) noexcept { return Builder.createFNeg(V); });
5252
884
  }
5253
334
  void compileVectorFSqrt(LLVM::Type VectorTy) noexcept {
5254
334
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5255
334
      assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
5256
334
      return Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, V);
5257
334
    });
5258
334
  }
5259
1.33k
  void compileVectorFCeil(LLVM::Type VectorTy) noexcept {
5260
1.33k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5261
1.33k
      assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
5262
1.33k
      return Builder.createUnaryIntrinsic(LLVM::Core::Ceil, V);
5263
1.33k
    });
5264
1.33k
  }
5265
2.28k
  void compileVectorFFloor(LLVM::Type VectorTy) noexcept {
5266
2.28k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5267
2.28k
      assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
5268
2.28k
      return Builder.createUnaryIntrinsic(LLVM::Core::Floor, V);
5269
2.28k
    });
5270
2.28k
  }
5271
1.76k
  void compileVectorFTrunc(LLVM::Type VectorTy) noexcept {
5272
1.76k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5273
1.76k
      assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
5274
1.76k
      return Builder.createUnaryIntrinsic(LLVM::Core::Trunc, V);
5275
1.76k
    });
5276
1.76k
  }
5277
362
  void compileVectorFNearest(LLVM::Type VectorTy) noexcept {
5278
362
    compileVectorOp(VectorTy, [&](auto V) noexcept {
5279
362
#if LLVM_VERSION_MAJOR >= 12 && !defined(__s390x__)
5280
362
      assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
5281
362
      if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
5282
362
        return Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, V);
5283
362
      }
5284
0
#endif
5285
5286
0
#if defined(__x86_64__)
5287
0
      if (Context.SupportSSE4_1) {
5288
0
        const bool IsFloat = VectorTy.getElementType().isFloatTy();
5289
0
        auto ID =
5290
0
            IsFloat ? LLVM::Core::X86SSE41RoundPs : LLVM::Core::X86SSE41RoundPd;
5291
0
        assuming(ID != LLVM::Core::NotIntrinsic);
5292
0
        return Builder.createIntrinsic(ID, {}, {V, LLContext.getInt32(8)});
5293
0
      }
5294
0
#endif
5295
5296
#if defined(__aarch64__)
5297
      if (Context.SupportNEON &&
5298
          LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
5299
        return Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, V);
5300
      }
5301
#endif
5302
5303
      // Fallback case.
5304
      // If the SSE4.1 is not supported on the x86_64 platform or
5305
      // the NEON is not supported on the aarch64 platform,
5306
      // then fallback to this.
5307
0
      assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
5308
0
      return Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, V);
5309
0
    });
5310
362
  }
5311
181
  void compileVectorVectorFAdd(LLVM::Type VectorTy) noexcept {
5312
181
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5313
181
      return Builder.createFAdd(LHS, RHS);
5314
181
    });
5315
181
  }
5316
468
  void compileVectorVectorFSub(LLVM::Type VectorTy) noexcept {
5317
468
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5318
468
      return Builder.createFSub(LHS, RHS);
5319
468
    });
5320
468
  }
5321
249
  void compileVectorVectorFMul(LLVM::Type VectorTy) noexcept {
5322
249
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5323
249
      return Builder.createFMul(LHS, RHS);
5324
249
    });
5325
249
  }
5326
213
  void compileVectorVectorFDiv(LLVM::Type VectorTy) noexcept {
5327
213
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5328
213
      return Builder.createFDiv(LHS, RHS);
5329
213
    });
5330
213
  }
5331
311
  void compileVectorVectorFMin(LLVM::Type VectorTy) noexcept {
5332
311
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5333
311
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5334
311
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5335
311
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5336
311
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5337
311
      auto Ret = Builder.createBitCast(
5338
311
          Builder.createOr(Builder.createBitCast(LHS, Context.Int64x2Ty),
5339
311
                           Builder.createBitCast(RHS, Context.Int64x2Ty)),
5340
311
          LHS.getType());
5341
311
      Ret = Builder.createSelect(OGT, RHS, Ret);
5342
311
      Ret = Builder.createSelect(OLT, LHS, Ret);
5343
311
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5344
311
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5345
311
      return Ret;
5346
311
    });
5347
311
  }
5348
260
  void compileVectorVectorFMax(LLVM::Type VectorTy) noexcept {
5349
260
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5350
260
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5351
260
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5352
260
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5353
260
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5354
260
      auto Ret = Builder.createBitCast(
5355
260
          Builder.createAnd(Builder.createBitCast(LHS, Context.Int64x2Ty),
5356
260
                            Builder.createBitCast(RHS, Context.Int64x2Ty)),
5357
260
          LHS.getType());
5358
260
      Ret = Builder.createSelect(OLT, RHS, Ret);
5359
260
      Ret = Builder.createSelect(OGT, LHS, Ret);
5360
260
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5361
260
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5362
260
      return Ret;
5363
260
    });
5364
260
  }
5365
315
  void compileVectorVectorFPMin(LLVM::Type VectorTy) noexcept {
5366
315
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5367
315
      auto Cmp = Builder.createFCmpOLT(RHS, LHS);
5368
315
      return Builder.createSelect(Cmp, RHS, LHS);
5369
315
    });
5370
315
  }
5371
329
  void compileVectorVectorFPMax(LLVM::Type VectorTy) noexcept {
5372
329
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5373
329
      auto Cmp = Builder.createFCmpOGT(RHS, LHS);
5374
329
      return Builder.createSelect(Cmp, RHS, LHS);
5375
329
    });
5376
329
  }
5377
982
  void compileVectorTruncSatS32(LLVM::Type VectorTy, bool PadZero) noexcept {
5378
982
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5379
982
      const auto Size = VectorTy.getVectorSize();
5380
982
      auto FPTy = VectorTy.getElementType();
5381
982
      auto IntMin = LLContext.getInt32(
5382
982
          static_cast<uint32_t>(std::numeric_limits<int32_t>::min()));
5383
982
      auto IntMax = LLContext.getInt32(
5384
982
          static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
5385
982
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5386
982
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5387
982
      auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5388
982
      auto FPMin = Builder.createSIToFP(IntMin, FPTy);
5389
982
      auto FPMax = Builder.createSIToFP(IntMax, FPTy);
5390
982
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5391
982
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5392
5393
982
      auto Normal = Builder.createFCmpORD(V, V);
5394
982
      auto NotUnder = Builder.createFCmpUGE(V, FPMinV);
5395
982
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5396
982
      V = Builder.createFPToSI(
5397
982
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5398
982
      V = Builder.createSelect(Normal, V, IntZeroV);
5399
982
      V = Builder.createSelect(NotUnder, V, IntMinV);
5400
982
      V = Builder.createSelect(NotOver, V, IntMaxV);
5401
982
      if (PadZero) {
5402
759
        std::vector<uint32_t> Mask(Size * 2);
5403
759
        std::iota(Mask.begin(), Mask.end(), 0);
5404
759
        if constexpr (Endian::native == Endian::little) {
5405
759
          V = Builder.createShuffleVector(
5406
759
              V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5407
        } else {
5408
          V = Builder.createShuffleVector(
5409
              IntZeroV, V, LLVM::Value::getConstVector32(LLContext, Mask));
5410
        }
5411
759
      }
5412
982
      return V;
5413
982
    });
5414
982
  }
5415
5.82k
  void compileVectorTruncSatU32(LLVM::Type VectorTy, bool PadZero) noexcept {
5416
5.82k
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5417
5.82k
      const auto Size = VectorTy.getVectorSize();
5418
5.82k
      auto FPTy = VectorTy.getElementType();
5419
5.82k
      auto IntMin = LLContext.getInt32(std::numeric_limits<uint32_t>::min());
5420
5.82k
      auto IntMax = LLContext.getInt32(std::numeric_limits<uint32_t>::max());
5421
5.82k
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5422
5.82k
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5423
5.82k
      auto FPMin = Builder.createUIToFP(IntMin, FPTy);
5424
5.82k
      auto FPMax = Builder.createUIToFP(IntMax, FPTy);
5425
5.82k
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5426
5.82k
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5427
5428
5.82k
      auto NotUnder = Builder.createFCmpOGE(V, FPMinV);
5429
5.82k
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5430
5.82k
      V = Builder.createFPToUI(
5431
5.82k
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5432
5.82k
      V = Builder.createSelect(NotUnder, V, IntMinV);
5433
5.82k
      V = Builder.createSelect(NotOver, V, IntMaxV);
5434
5.82k
      if (PadZero) {
5435
2.13k
        auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5436
2.13k
        std::vector<uint32_t> Mask(Size * 2);
5437
2.13k
        std::iota(Mask.begin(), Mask.end(), 0);
5438
2.13k
        if constexpr (Endian::native == Endian::little) {
5439
2.13k
          V = Builder.createShuffleVector(
5440
2.13k
              V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5441
        } else {
5442
          V = Builder.createShuffleVector(
5443
              IntZeroV, V, LLVM::Value::getConstVector32(LLContext, Mask));
5444
        }
5445
2.13k
      }
5446
5.82k
      return V;
5447
5.82k
    });
5448
5.82k
  }
5449
  void compileVectorConvertS(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5450
685
                             bool Low) noexcept {
5451
685
    compileVectorOp(VectorTy,
5452
685
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5453
685
                      if (Low) {
5454
351
                        const auto Size = VectorTy.getVectorSize() / 2;
5455
351
                        std::vector<uint32_t> Mask(Size);
5456
351
                        if constexpr (Endian::native == Endian::little) {
5457
351
                          std::iota(Mask.begin(), Mask.end(), 0);
5458
                        } else {
5459
                          std::iota(Mask.begin(), Mask.end(), Size);
5460
                        }
5461
351
                        V = Builder.createShuffleVector(
5462
351
                            V, LLVM::Value::getUndef(VectorTy),
5463
351
                            LLVM::Value::getConstVector32(LLContext, Mask));
5464
351
                      }
5465
685
                      return Builder.createSIToFP(V, FPVectorTy);
5466
685
                    });
5467
685
  }
5468
  void compileVectorConvertU(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5469
2.00k
                             bool Low) noexcept {
5470
2.00k
    compileVectorOp(VectorTy,
5471
2.00k
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5472
2.00k
                      if (Low) {
5473
1.27k
                        const auto Size = VectorTy.getVectorSize() / 2;
5474
1.27k
                        std::vector<uint32_t> Mask(Size);
5475
1.27k
                        if constexpr (Endian::native == Endian::little) {
5476
1.27k
                          std::iota(Mask.begin(), Mask.end(), 0);
5477
                        } else {
5478
                          std::iota(Mask.begin(), Mask.end(), Size);
5479
                        }
5480
1.27k
                        V = Builder.createShuffleVector(
5481
1.27k
                            V, LLVM::Value::getUndef(VectorTy),
5482
1.27k
                            LLVM::Value::getConstVector32(LLContext, Mask));
5483
1.27k
                      }
5484
2.00k
                      return Builder.createUIToFP(V, FPVectorTy);
5485
2.00k
                    });
5486
2.00k
  }
5487
591
  void compileVectorDemote() noexcept {
5488
591
    compileVectorOp(Context.Doublex2Ty, [this](auto V) noexcept {
5489
591
      auto Demoted = Builder.createFPTrunc(
5490
591
          V, LLVM::Type::getVectorType(Context.FloatTy, 2));
5491
591
      auto ZeroV = LLVM::Value::getConstNull(Demoted.getType());
5492
591
      if constexpr (Endian::native == Endian::little) {
5493
591
        return Builder.createShuffleVector(
5494
591
            Demoted, ZeroV,
5495
591
            LLVM::Value::getConstVector32(LLContext, {0u, 1u, 2u, 3u}));
5496
      } else {
5497
        return Builder.createShuffleVector(
5498
            Demoted, ZeroV,
5499
            LLVM::Value::getConstVector32(LLContext, {3u, 2u, 1u, 0u}));
5500
      }
5501
591
    });
5502
591
  }
5503
625
  void compileVectorPromote() noexcept {
5504
625
    compileVectorOp(Context.Floatx4Ty, [this](auto V) noexcept {
5505
625
      auto UndefV = LLVM::Value::getUndef(V.getType());
5506
625
      auto Low = Builder.createShuffleVector(
5507
625
          V, UndefV, LLVM::Value::getConstVector32(LLContext, {0u, 1u}));
5508
625
      return Builder.createFPExt(
5509
625
          Low, LLVM::Type::getVectorType(Context.DoubleTy, 2));
5510
625
    });
5511
625
  }
5512
5513
22
  void compileVectorVectorMAdd(LLVM::Type VectorTy) noexcept {
5514
22
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5515
22
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5516
22
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5517
22
    stackPush(Builder.createBitCast(
5518
22
        Builder.createFAdd(Builder.createFMul(LHS, RHS), C),
5519
22
        Context.Int64x2Ty));
5520
22
  }
5521
5522
32
  void compileVectorVectorNMAdd(LLVM::Type VectorTy) noexcept {
5523
32
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5524
32
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5525
32
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5526
32
    stackPush(Builder.createBitCast(
5527
32
        Builder.createFAdd(Builder.createFMul(Builder.createFNeg(LHS), RHS), C),
5528
32
        Context.Int64x2Ty));
5529
32
  }
5530
5531
24
  void compileVectorRelaxedIntegerDotProduct() noexcept {
5532
24
    auto OriTy = Context.Int8x16Ty;
5533
24
    auto ExtTy = Context.Int16x8Ty;
5534
24
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5535
24
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5536
24
#if defined(__x86_64__)
5537
24
    if (Context.SupportSSSE3) {
5538
24
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5539
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5540
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5541
      // side to match the WebAssembly spec
5542
24
      return stackPush(Builder.createBitCast(
5543
24
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5544
24
                                  {RHS, LHS}),
5545
24
          Context.Int64x2Ty));
5546
24
    }
5547
0
#endif
5548
0
    auto Width = LLVM::Value::getConstInt(
5549
0
        ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5550
0
    Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5551
0
    auto EA = Builder.createBitCast(LHS, ExtTy);
5552
0
    auto EB = Builder.createBitCast(RHS, ExtTy);
5553
5554
0
    LLVM::Value AL, AR, BL, BR;
5555
0
    AL = Builder.createAShr(EA, Width);
5556
0
    AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5557
0
    BL = Builder.createAShr(EB, Width);
5558
0
    BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5559
5560
0
    return stackPush(Builder.createBitCast(
5561
0
        Builder.createAdd(Builder.createMul(AL, BL), Builder.createMul(AR, BR)),
5562
0
        Context.Int64x2Ty));
5563
24
  }
5564
5565
16
  void compileVectorRelaxedIntegerDotProductAdd() noexcept {
5566
16
    auto OriTy = Context.Int8x16Ty;
5567
16
    auto ExtTy = Context.Int16x8Ty;
5568
16
    auto FinTy = Context.Int32x4Ty;
5569
16
    auto VC = Builder.createBitCast(stackPop(), FinTy);
5570
16
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5571
16
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5572
16
    LLVM::Value IM;
5573
16
#if defined(__x86_64__)
5574
16
    if (Context.SupportSSSE3) {
5575
16
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5576
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5577
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5578
      // side to match the WebAssembly spec
5579
16
      IM = Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5580
16
                                   {RHS, LHS});
5581
16
    } else
5582
0
#endif
5583
0
    {
5584
0
      auto Width = LLVM::Value::getConstInt(
5585
0
          ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5586
0
      Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5587
0
      auto EA = Builder.createBitCast(LHS, ExtTy);
5588
0
      auto EB = Builder.createBitCast(RHS, ExtTy);
5589
5590
0
      LLVM::Value AL, AR, BL, BR;
5591
0
      AL = Builder.createAShr(EA, Width);
5592
0
      AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5593
0
      BL = Builder.createAShr(EB, Width);
5594
0
      BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5595
0
      IM = Builder.createAdd(Builder.createMul(AL, BL),
5596
0
                             Builder.createMul(AR, BR));
5597
0
    }
5598
5599
16
    auto Width = LLVM::Value::getConstInt(
5600
16
        FinTy.getElementType(), ExtTy.getElementType().getIntegerBitWidth());
5601
16
    Width = Builder.createVectorSplat(FinTy.getVectorSize(), Width);
5602
16
    auto IME = Builder.createBitCast(IM, FinTy);
5603
16
    auto L = Builder.createAShr(IME, Width);
5604
16
    auto R = Builder.createAShr(Builder.createShl(IME, Width), Width);
5605
5606
16
    return stackPush(Builder.createBitCast(
5607
16
        Builder.createAdd(Builder.createAdd(L, R), VC), Context.Int64x2Ty));
5608
16
  }
5609
5610
  void
5611
  enterBlock(LLVM::BasicBlock JumpBlock, LLVM::BasicBlock NextBlock,
5612
             LLVM::BasicBlock ElseBlock, std::vector<LLVM::Value> Args,
5613
             std::pair<std::vector<ValType>, std::vector<ValType>> Type,
5614
             std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5615
23.1k
                 ReturnPHI = {}) noexcept {
5616
23.1k
    assuming(Type.first.size() == Args.size());
5617
23.1k
    for (auto &Value : Args) {
5618
4.49k
      stackPush(Value);
5619
4.49k
    }
5620
23.1k
    const auto Unreachable = isUnreachable();
5621
23.1k
    ControlStack.emplace_back(Stack.size() - Args.size(), Unreachable,
5622
23.1k
                              JumpBlock, NextBlock, ElseBlock, std::move(Args),
5623
23.1k
                              std::move(Type), std::move(ReturnPHI));
5624
23.1k
  }
5625
5626
23.1k
  Control leaveBlock() noexcept {
5627
23.1k
    Control Entry = std::move(ControlStack.back());
5628
23.1k
    ControlStack.pop_back();
5629
5630
23.1k
    auto NextBlock = Entry.NextBlock ? Entry.NextBlock : Entry.JumpBlock;
5631
23.1k
    if (!Entry.Unreachable) {
5632
14.4k
      const auto &ReturnType = Entry.Type.second;
5633
14.4k
      if (!ReturnType.empty()) {
5634
10.9k
        std::vector<LLVM::Value> Rets(ReturnType.size());
5635
22.4k
        for (size_t I = 0; I < Rets.size(); ++I) {
5636
11.4k
          const size_t J = Rets.size() - 1 - I;
5637
11.4k
          Rets[J] = stackPop();
5638
11.4k
        }
5639
10.9k
        Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5640
10.9k
      }
5641
14.4k
      Builder.createBr(NextBlock);
5642
14.4k
    } else {
5643
8.74k
      Builder.createUnreachable();
5644
8.74k
    }
5645
23.1k
    Builder.positionAtEnd(NextBlock);
5646
23.1k
    Stack.erase(Stack.begin() + static_cast<int64_t>(Entry.StackSize),
5647
23.1k
                Stack.end());
5648
23.1k
    return Entry;
5649
23.1k
  }
5650
5651
5.99k
  void checkStop() noexcept {
5652
5.99k
    if (!Interruptible) {
5653
5.99k
      return;
5654
5.99k
    }
5655
0
    auto NotStopBB = LLVM::BasicBlock::create(LLContext, F.Fn, "NotStop");
5656
0
    auto StopToken = Builder.createAtomicRMW(
5657
0
        LLVMAtomicRMWBinOpXchg, Context.getStopToken(Builder, ExecCtx),
5658
0
        LLContext.getInt32(0), LLVMAtomicOrderingMonotonic);
5659
#if LLVM_VERSION_MAJOR >= 13
5660
    StopToken.setAlignment(32);
5661
#endif
5662
0
    auto NotStop = Builder.createLikely(
5663
0
        Builder.createICmpEQ(StopToken, LLContext.getInt32(0)));
5664
0
    Builder.createCondBr(NotStop, NotStopBB,
5665
0
                         getTrapBB(ErrCode::Value::Interrupted));
5666
5667
0
    Builder.positionAtEnd(NotStopBB);
5668
0
  }
5669
5670
6.30k
  void setUnreachable() noexcept {
5671
6.30k
    if (ControlStack.empty()) {
5672
0
      IsUnreachable = true;
5673
6.30k
    } else {
5674
6.30k
      ControlStack.back().Unreachable = true;
5675
6.30k
    }
5676
6.30k
  }
5677
5678
1.62M
  bool isUnreachable() const noexcept {
5679
1.62M
    if (ControlStack.empty()) {
5680
11.4k
      return IsUnreachable;
5681
1.61M
    } else {
5682
1.61M
      return ControlStack.back().Unreachable;
5683
1.61M
    }
5684
1.62M
  }
5685
5686
  void
5687
  buildPHI(Span<const ValType> RetType,
5688
           Span<const std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5689
20.2k
               Incomings) noexcept {
5690
20.2k
    if (isVoidReturn(RetType)) {
5691
6.32k
      return;
5692
6.32k
    }
5693
13.9k
    std::vector<LLVM::Value> Nodes;
5694
13.9k
    if (Incomings.size() == 0) {
5695
2.93k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5696
2.93k
      Nodes.reserve(Types.size());
5697
3.30k
      for (LLVM::Type Type : Types) {
5698
3.30k
        Nodes.push_back(LLVM::Value::getUndef(Type));
5699
3.30k
      }
5700
11.0k
    } else if (Incomings.size() == 1) {
5701
9.77k
      Nodes = std::move(std::get<0>(Incomings.front()));
5702
9.77k
    } else {
5703
1.25k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5704
1.25k
      Nodes.reserve(Types.size());
5705
2.60k
      for (size_t I = 0; I < Types.size(); ++I) {
5706
1.35k
        auto PHIRet = Builder.createPHI(Types[I]);
5707
3.43k
        for (auto &[Value, BB] : Incomings) {
5708
3.43k
          assuming(Value.size() == Types.size());
5709
3.43k
          PHIRet.addIncoming(Value[I], BB);
5710
3.43k
        }
5711
1.35k
        Nodes.push_back(PHIRet);
5712
1.35k
      }
5713
1.25k
    }
5714
14.8k
    for (auto &Val : Nodes) {
5715
14.8k
      stackPush(Val);
5716
14.8k
    }
5717
13.9k
  }
5718
5719
38.3k
  void setLableJumpPHI(unsigned int Index) noexcept {
5720
38.3k
    assuming(Index < ControlStack.size());
5721
38.3k
    auto &Entry = *(ControlStack.rbegin() + Index);
5722
38.3k
    if (Entry.NextBlock) { // is loop
5723
2.63k
      std::vector<LLVM::Value> Args(Entry.Type.first.size());
5724
5.70k
      for (size_t I = 0; I < Args.size(); ++I) {
5725
3.07k
        const size_t J = Args.size() - 1 - I;
5726
3.07k
        Args[J] = stackPop();
5727
3.07k
      }
5728
5.70k
      for (size_t I = 0; I < Args.size(); ++I) {
5729
3.07k
        Entry.Args[I].addIncoming(Args[I], Builder.getInsertBlock());
5730
3.07k
        stackPush(Args[I]);
5731
3.07k
      }
5732
35.6k
    } else if (!Entry.Type.second.empty()) { // has return value
5733
2.02k
      std::vector<LLVM::Value> Rets(Entry.Type.second.size());
5734
4.17k
      for (size_t I = 0; I < Rets.size(); ++I) {
5735
2.15k
        const size_t J = Rets.size() - 1 - I;
5736
2.15k
        Rets[J] = stackPop();
5737
2.15k
      }
5738
4.17k
      for (size_t I = 0; I < Rets.size(); ++I) {
5739
2.15k
        stackPush(Rets[I]);
5740
2.15k
      }
5741
2.02k
      Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5742
2.02k
    }
5743
38.3k
  }
5744
5745
38.3k
  LLVM::BasicBlock getLabel(unsigned int Index) const noexcept {
5746
38.3k
    return (ControlStack.rbegin() + Index)->JumpBlock;
5747
38.3k
  }
5748
5749
964k
  void stackPush(LLVM::Value Value) noexcept { Stack.push_back(Value); }
5750
367k
  LLVM::Value stackPop() noexcept {
5751
367k
    assuming(!ControlStack.empty() || !Stack.empty());
5752
367k
    assuming(ControlStack.empty() ||
5753
367k
             Stack.size() > ControlStack.back().StackSize);
5754
367k
    auto Value = Stack.back();
5755
367k
    Stack.pop_back();
5756
367k
    return Value;
5757
367k
  }
5758
5759
22.6k
  LLVM::Value switchEndian(LLVM::Value Value) {
5760
    if constexpr (Endian::native == Endian::big) {
5761
      auto Type = Value.getType();
5762
      if ((Type.isIntegerTy() && Type.getIntegerBitWidth() > 8) ||
5763
          (Type.isVectorTy() && Type.getVectorSize() == 1)) {
5764
        return Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5765
      }
5766
      if (Type.isVectorTy()) {
5767
        LLVM::Type VecType = Type.getElementType().getIntegerBitWidth() == 128
5768
                                 ? Context.Int128Ty
5769
                                 : Context.Int64Ty;
5770
        Value = Builder.createBitCast(Value, VecType);
5771
        Value = Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5772
        return Builder.createBitCast(Value, Type);
5773
      }
5774
      if (Type.isFloatTy() || Type.isDoubleTy()) {
5775
        LLVM::Type IntType =
5776
            Type.isFloatTy() ? Context.Int32Ty : Context.Int64Ty;
5777
        Value = Builder.createBitCast(Value, IntType);
5778
        Value = Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5779
        return Builder.createBitCast(Value, Type);
5780
      }
5781
    }
5782
22.6k
    return Value;
5783
22.6k
  }
5784
5785
  LLVM::Compiler::CompileContext &Context;
5786
  LLVM::Context LLContext;
5787
  std::vector<std::pair<LLVM::Type, LLVM::Value>> Local;
5788
  std::vector<LLVM::Value> Stack;
5789
  LLVM::Value LocalInstrCount = nullptr;
5790
  LLVM::Value LocalGas = nullptr;
5791
  std::unordered_map<ErrCode::Value, LLVM::BasicBlock> TrapBB;
5792
  bool IsUnreachable = false;
5793
  bool Interruptible = false;
5794
  struct Control {
5795
    size_t StackSize;
5796
    bool Unreachable;
5797
    LLVM::BasicBlock JumpBlock;
5798
    LLVM::BasicBlock NextBlock;
5799
    LLVM::BasicBlock ElseBlock;
5800
    std::vector<LLVM::Value> Args;
5801
    std::pair<std::vector<ValType>, std::vector<ValType>> Type;
5802
    std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5803
        ReturnPHI;
5804
    Control(size_t S, bool U, LLVM::BasicBlock J, LLVM::BasicBlock N,
5805
            LLVM::BasicBlock E, std::vector<LLVM::Value> A,
5806
            std::pair<std::vector<ValType>, std::vector<ValType>> T,
5807
            std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5808
                R) noexcept
5809
23.1k
        : StackSize(S), Unreachable(U), JumpBlock(J), NextBlock(N),
5810
23.1k
          ElseBlock(E), Args(std::move(A)), Type(std::move(T)),
5811
23.1k
          ReturnPHI(std::move(R)) {}
5812
    Control(const Control &) = default;
5813
28.8k
    Control(Control &&) = default;
5814
    Control &operator=(const Control &) = default;
5815
1.17k
    Control &operator=(Control &&) = default;
5816
  };
5817
  std::vector<Control> ControlStack;
5818
  LLVM::FunctionCallee F;
5819
  LLVM::Value ExecCtx;
5820
  LLVM::Builder Builder;
5821
};
5822
5823
std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
5824
465
                                      LLVM::Value Struct) noexcept {
5825
465
  const auto N = Struct.getType().getStructNumElements();
5826
465
  std::vector<LLVM::Value> Ret;
5827
465
  Ret.reserve(N);
5828
1.66k
  for (unsigned I = 0; I < N; ++I) {
5829
1.19k
    Ret.push_back(Builder.createExtractValue(Struct, I));
5830
1.19k
  }
5831
465
  return Ret;
5832
465
}
5833
5834
} // namespace
5835
5836
namespace WasmEdge {
5837
namespace LLVM {
5838
5839
2.32k
Expect<void> Compiler::checkConfigure() noexcept {
5840
  // Note: Although the exception handling proposal and memory64 proposal is not
5841
  // implemented in AOT yet, we should not trap here because the default
5842
  // configuration becomes WASM 3.0 which contains these proposals.
5843
2.32k
  if (Conf.hasProposal(Proposal::ExceptionHandling)) {
5844
2.32k
    spdlog::warn("Proposal Exception Handling is not yet supported in WasmEdge "
5845
2.32k
                 "AOT/JIT. The compilation will be trapped when related data "
5846
2.32k
                 "structure or instructions found in WASM.");
5847
2.32k
  }
5848
2.32k
  if (Conf.hasProposal(Proposal::Memory64)) {
5849
0
    spdlog::warn("Proposal Memory64 is not yet supported in WasmEdge AOT/JIT. "
5850
0
                 "The compilation will be trapped when related data "
5851
0
                 "structure or instructions found in WASM.");
5852
0
  }
5853
2.32k
  if (Conf.hasProposal(Proposal::Annotations)) {
5854
0
    spdlog::error(ErrCode::Value::InvalidAOTConfigure);
5855
0
    spdlog::error("    Proposal Custom Annotation Syntax is not yet supported "
5856
0
                  "in WasmEdge AOT/JIT.");
5857
0
    return Unexpect(ErrCode::Value::InvalidAOTConfigure);
5858
0
  }
5859
2.32k
  return {};
5860
2.32k
}
5861
5862
2.32k
Expect<Data> Compiler::compile(const AST::Module &Module) noexcept {
5863
  // Check the module is validated.
5864
2.32k
  if (unlikely(!Module.getIsValidated())) {
5865
0
    spdlog::error(ErrCode::Value::NotValidated);
5866
0
    return Unexpect(ErrCode::Value::NotValidated);
5867
0
  }
5868
5869
2.32k
  std::unique_lock Lock(Mutex);
5870
2.32k
  spdlog::info("compile start"sv);
5871
5872
2.32k
  LLVM::Core::init();
5873
5874
2.32k
  LLVM::Data D;
5875
2.32k
  auto LLContext = D.extract().getLLContext();
5876
2.32k
  auto &LLModule = D.extract().LLModule;
5877
2.32k
  LLModule.setTarget(LLVM::getDefaultTargetTriple().unwrap());
5878
2.32k
  LLModule.addFlag(LLVMModuleFlagBehaviorError, "PIC Level"sv, 2);
5879
5880
2.32k
  CompileContext NewContext(LLContext, LLModule,
5881
2.32k
                            Conf.getCompilerConfigure().isGenericBinary());
5882
2.32k
  struct RAIICleanup {
5883
2.32k
    RAIICleanup(CompileContext *&Context, CompileContext &NewContext)
5884
2.32k
        : Context(Context) {
5885
2.32k
      Context = &NewContext;
5886
2.32k
    }
5887
2.32k
    ~RAIICleanup() { Context = nullptr; }
5888
2.32k
    CompileContext *&Context;
5889
2.32k
  };
5890
2.32k
  RAIICleanup Cleanup(Context, NewContext);
5891
5892
  // Compile Function Types
5893
2.32k
  compile(Module.getTypeSection());
5894
  // Compile ImportSection
5895
2.32k
  compile(Module.getImportSection());
5896
  // Compile GlobalSection
5897
2.32k
  compile(Module.getGlobalSection());
5898
  // Compile MemorySection (MemorySec, DataSec)
5899
2.32k
  compile(Module.getMemorySection(), Module.getDataSection());
5900
  // Compile TableSection (TableSec, ElemSec)
5901
2.32k
  compile(Module.getTableSection(), Module.getElementSection());
5902
  // compile Functions in module. (FunctionSec, CodeSec)
5903
2.32k
  EXPECTED_TRY(compile(Module.getFunctionSection(), Module.getCodeSection()));
5904
  // Compile ExportSection
5905
2.31k
  compile(Module.getExportSection());
5906
  // StartSection is not required to compile
5907
5908
2.31k
  spdlog::info("verify start"sv);
5909
2.31k
  LLModule.verify(LLVMPrintMessageAction);
5910
5911
2.31k
  spdlog::info("optimize start"sv);
5912
2.31k
  auto &TM = D.extract().TM;
5913
2.31k
  {
5914
2.31k
    auto Triple = LLModule.getTarget();
5915
2.31k
    auto [TheTarget, ErrorMessage] = LLVM::Target::getFromTriple(Triple);
5916
2.31k
    if (ErrorMessage) {
5917
0
      spdlog::error("getFromTriple failed:{}"sv, ErrorMessage.string_view());
5918
0
      return Unexpect(ErrCode::Value::IllegalPath);
5919
2.31k
    } else {
5920
2.31k
      std::string CPUName;
5921
#if defined(__riscv) && __riscv_xlen == 64
5922
      CPUName = "generic-rv64"s;
5923
#else
5924
2.31k
      if (!Conf.getCompilerConfigure().isGenericBinary()) {
5925
2.31k
        CPUName = LLVM::getHostCPUName().string_view();
5926
2.31k
      } else {
5927
0
        CPUName = "generic"s;
5928
0
      }
5929
2.31k
#endif
5930
5931
2.31k
      TM = LLVM::TargetMachine::create(
5932
2.31k
          TheTarget, Triple, CPUName.c_str(),
5933
2.31k
          LLVM::getHostCPUFeatures().unwrap(),
5934
2.31k
          toLLVMCodeGenLevel(
5935
2.31k
              Conf.getCompilerConfigure().getOptimizationLevel()),
5936
2.31k
          LLVMRelocPIC, LLVMCodeModelDefault);
5937
2.31k
    }
5938
5939
#if LLVM_VERSION_MAJOR >= 13
5940
    auto PBO = LLVM::PassBuilderOptions::create();
5941
    if (auto Error = PBO.runPasses(
5942
            LLModule,
5943
            toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel()),
5944
            TM)) {
5945
      spdlog::error("{}"sv, Error.message().string_view());
5946
    }
5947
#else
5948
2.31k
    auto FP = LLVM::PassManager::createForModule(LLModule);
5949
2.31k
    auto MP = LLVM::PassManager::create();
5950
5951
2.31k
    TM.addAnalysisPasses(MP);
5952
2.31k
    TM.addAnalysisPasses(FP);
5953
2.31k
    {
5954
2.31k
      auto PMB = LLVM::PassManagerBuilder::create();
5955
2.31k
      auto [OptLevel, SizeLevel] =
5956
2.31k
          toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel());
5957
2.31k
      PMB.setOptLevel(OptLevel);
5958
2.31k
      PMB.setSizeLevel(SizeLevel);
5959
2.31k
      PMB.populateFunctionPassManager(FP);
5960
2.31k
      PMB.populateModulePassManager(MP);
5961
2.31k
    }
5962
2.31k
    switch (Conf.getCompilerConfigure().getOptimizationLevel()) {
5963
0
    case CompilerConfigure::OptimizationLevel::O0:
5964
0
    case CompilerConfigure::OptimizationLevel::O1:
5965
0
      FP.addTailCallEliminationPass();
5966
0
      break;
5967
2.31k
    default:
5968
2.31k
      break;
5969
2.31k
    }
5970
5971
2.31k
    FP.initializeFunctionPassManager();
5972
25.6k
    for (auto Fn = LLModule.getFirstFunction(); Fn; Fn = Fn.getNextFunction()) {
5973
23.3k
      FP.runFunctionPassManager(Fn);
5974
23.3k
    }
5975
2.31k
    FP.finalizeFunctionPassManager();
5976
2.31k
    MP.runPassManager(LLModule);
5977
2.31k
#endif
5978
2.31k
  }
5979
5980
  // Set initializer for constant value
5981
2.31k
  if (auto IntrinsicsTable = LLModule.getNamedGlobal("intrinsics")) {
5982
1.35k
    IntrinsicsTable.setInitializer(
5983
1.35k
        LLVM::Value::getConstNull(IntrinsicsTable.getType()));
5984
1.35k
    IntrinsicsTable.setGlobalConstant(false);
5985
1.35k
  } else {
5986
958
    auto IntrinsicsTableTy = LLVM::Type::getArrayType(
5987
958
        LLContext.getInt8Ty().getPointerTo(),
5988
958
        static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax));
5989
958
    LLModule.addGlobal(
5990
958
        IntrinsicsTableTy.getPointerTo(), false, LLVMExternalLinkage,
5991
958
        LLVM::Value::getConstNull(IntrinsicsTableTy), "intrinsics");
5992
958
  }
5993
5994
2.31k
  spdlog::info("optimize done"sv);
5995
2.31k
  return Expect<Data>{std::move(D)};
5996
2.31k
}
5997
5998
2.32k
void Compiler::compile(const AST::TypeSection &TypeSec) noexcept {
5999
2.32k
  auto WrapperTy =
6000
2.32k
      LLVM::Type::getFunctionType(Context->VoidTy,
6001
2.32k
                                  {Context->ExecCtxPtrTy, Context->Int8PtrTy,
6002
2.32k
                                   Context->Int8PtrTy, Context->Int8PtrTy},
6003
2.32k
                                  false);
6004
2.32k
  auto SubTypes = TypeSec.getContent();
6005
2.32k
  const auto Size = SubTypes.size();
6006
2.32k
  if (Size == 0) {
6007
128
    return;
6008
128
  }
6009
2.19k
  Context->CompositeTypes.reserve(Size);
6010
2.19k
  Context->FunctionWrappers.reserve(Size);
6011
6012
  // Iterate and compile types.
6013
7.22k
  for (size_t I = 0; I < Size; ++I) {
6014
5.02k
    const auto &CompType = SubTypes[I].getCompositeType();
6015
5.02k
    const auto Name = fmt::format("t{}"sv, Context->CompositeTypes.size());
6016
5.02k
    if (CompType.isFunc()) {
6017
      // Check function type is unique
6018
4.90k
      {
6019
4.90k
        bool Unique = true;
6020
21.0k
        for (size_t J = 0; J < I; ++J) {
6021
16.3k
          if (Context->CompositeTypes[J] &&
6022
16.3k
              Context->CompositeTypes[J]->isFunc()) {
6023
16.0k
            const auto &OldFuncType = Context->CompositeTypes[J]->getFuncType();
6024
16.0k
            if (OldFuncType == CompType.getFuncType()) {
6025
206
              Unique = false;
6026
206
              Context->CompositeTypes.push_back(Context->CompositeTypes[J]);
6027
206
              auto F = Context->FunctionWrappers[J];
6028
206
              Context->FunctionWrappers.push_back(F);
6029
206
              auto A = Context->LLModule.addAlias(WrapperTy, F, Name.c_str());
6030
206
              A.setLinkage(LLVMExternalLinkage);
6031
206
              A.setVisibility(LLVMProtectedVisibility);
6032
206
              A.setDSOLocal(true);
6033
206
              A.setDLLStorageClass(LLVMDLLExportStorageClass);
6034
206
              break;
6035
206
            }
6036
16.0k
          }
6037
16.3k
        }
6038
4.90k
        if (!Unique) {
6039
206
          continue;
6040
206
        }
6041
4.90k
      }
6042
6043
      // Create Wrapper
6044
4.70k
      auto F = Context->LLModule.addFunction(WrapperTy, LLVMExternalLinkage,
6045
4.70k
                                             Name.c_str());
6046
4.70k
      {
6047
4.70k
        F.setVisibility(LLVMProtectedVisibility);
6048
4.70k
        F.setDSOLocal(true);
6049
4.70k
        F.setDLLStorageClass(LLVMDLLExportStorageClass);
6050
4.70k
        F.addFnAttr(Context->NoStackArgProbe);
6051
4.70k
        F.addFnAttr(Context->StrictFP);
6052
4.70k
        F.addFnAttr(Context->UWTable);
6053
4.70k
        F.addParamAttr(0, Context->ReadOnly);
6054
4.70k
        F.addParamAttr(0, Context->NoAlias);
6055
4.70k
        F.addParamAttr(1, Context->NoAlias);
6056
4.70k
        F.addParamAttr(2, Context->NoAlias);
6057
4.70k
        F.addParamAttr(3, Context->NoAlias);
6058
6059
4.70k
        LLVM::Builder Builder(Context->LLContext);
6060
4.70k
        Builder.positionAtEnd(
6061
4.70k
            LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
6062
6063
4.70k
        auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy,
6064
4.70k
                              CompType.getFuncType());
6065
4.70k
        auto RTy = FTy.getReturnType();
6066
4.70k
        std::vector<LLVM::Type> FPTy(FTy.getNumParams());
6067
4.70k
        FTy.getParamTypes(FPTy);
6068
6069
4.70k
        const size_t ArgCount = FPTy.size() - 1;
6070
4.70k
        auto ExecCtxPtr = F.getFirstParam();
6071
4.70k
        auto RawFunc = LLVM::FunctionCallee{
6072
4.70k
            FTy, Builder.createBitCast(ExecCtxPtr.getNextParam(),
6073
4.70k
                                       FTy.getPointerTo())};
6074
4.70k
        auto RawArgs = ExecCtxPtr.getNextParam().getNextParam();
6075
4.70k
        auto RawRets = RawArgs.getNextParam();
6076
6077
4.70k
        std::vector<LLVM::Value> Args;
6078
4.70k
        Args.reserve(FTy.getNumParams());
6079
4.70k
        Args.push_back(ExecCtxPtr);
6080
9.79k
        for (size_t J = 0; J < ArgCount; ++J) {
6081
5.09k
          Args.push_back(Builder.createValuePtrLoad(
6082
5.09k
              FPTy[J + 1], RawArgs, Context->Int8Ty, J * kValSize));
6083
5.09k
        }
6084
6085
4.70k
        auto Ret = Builder.createCall(RawFunc, Args);
6086
4.70k
        if (RTy.isVoidTy()) {
6087
          // nothing to do
6088
3.14k
        } else if (RTy.isStructTy()) {
6089
365
          auto Rets = unpackStruct(Builder, Ret);
6090
365
          Builder.createArrayPtrStore(Rets, RawRets, Context->Int8Ty, kValSize);
6091
2.78k
        } else {
6092
2.78k
          Builder.createValuePtrStore(Ret, RawRets, Context->Int8Ty);
6093
2.78k
        }
6094
4.70k
        Builder.createRetVoid();
6095
4.70k
      }
6096
      // Copy wrapper, param and return lists to module instance.
6097
4.70k
      Context->FunctionWrappers.push_back(F);
6098
4.70k
    } else {
6099
      // Non function type case. Create empty wrapper.
6100
118
      auto F = Context->LLModule.addFunction(WrapperTy, LLVMExternalLinkage,
6101
118
                                             Name.c_str());
6102
118
      {
6103
118
        F.setVisibility(LLVMProtectedVisibility);
6104
118
        F.setDSOLocal(true);
6105
118
        F.setDLLStorageClass(LLVMDLLExportStorageClass);
6106
118
        F.addFnAttr(Context->NoStackArgProbe);
6107
118
        F.addFnAttr(Context->StrictFP);
6108
118
        F.addFnAttr(Context->UWTable);
6109
118
        F.addParamAttr(0, Context->ReadOnly);
6110
118
        F.addParamAttr(0, Context->NoAlias);
6111
118
        F.addParamAttr(1, Context->NoAlias);
6112
118
        F.addParamAttr(2, Context->NoAlias);
6113
118
        F.addParamAttr(3, Context->NoAlias);
6114
6115
118
        LLVM::Builder Builder(Context->LLContext);
6116
118
        Builder.positionAtEnd(
6117
118
            LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
6118
118
        Builder.createRetVoid();
6119
118
      }
6120
118
      Context->FunctionWrappers.push_back(F);
6121
118
    }
6122
4.82k
    Context->CompositeTypes.push_back(&CompType);
6123
4.82k
  }
6124
2.19k
}
6125
6126
2.32k
void Compiler::compile(const AST::ImportSection &ImportSec) noexcept {
6127
  // Iterate and compile import descriptions.
6128
2.32k
  for (const auto &ImpDesc : ImportSec.getContent()) {
6129
    // Get data from import description.
6130
427
    const auto &ExtType = ImpDesc.getExternalType();
6131
6132
    // Add the imports into module instance.
6133
427
    switch (ExtType) {
6134
293
    case ExternalType::Function: // Function type index
6135
293
    {
6136
293
      const auto FuncID = static_cast<uint32_t>(Context->Functions.size());
6137
      // Get the function type index in module.
6138
293
      uint32_t TypeIdx = ImpDesc.getExternalFuncTypeIdx();
6139
293
      assuming(TypeIdx < Context->CompositeTypes.size());
6140
293
      assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6141
293
      const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6142
293
      auto FTy =
6143
293
          toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6144
293
      auto RTy = FTy.getReturnType();
6145
293
      auto F = LLVM::FunctionCallee{
6146
293
          FTy,
6147
293
          Context->LLModule.addFunction(FTy, LLVMInternalLinkage,
6148
293
                                        fmt::format("f{}"sv, FuncID).c_str())};
6149
293
      F.Fn.setDSOLocal(true);
6150
293
      F.Fn.addFnAttr(Context->NoStackArgProbe);
6151
293
      F.Fn.addFnAttr(Context->StrictFP);
6152
293
      F.Fn.addFnAttr(Context->UWTable);
6153
293
      F.Fn.addParamAttr(0, Context->ReadOnly);
6154
293
      F.Fn.addParamAttr(0, Context->NoAlias);
6155
6156
293
      LLVM::Builder Builder(Context->LLContext);
6157
293
      Builder.positionAtEnd(
6158
293
          LLVM::BasicBlock::create(Context->LLContext, F.Fn, "entry"));
6159
6160
293
      const auto ArgSize = FuncType.getParamTypes().size();
6161
293
      const auto RetSize =
6162
293
          RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
6163
6164
293
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
6165
293
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
6166
6167
293
      auto Arg = F.Fn.getFirstParam();
6168
439
      for (unsigned I = 0; I < ArgSize; ++I) {
6169
146
        Arg = Arg.getNextParam();
6170
146
        Builder.createValuePtrStore(Arg, Args, Context->Int8Ty, I * kValSize);
6171
146
      }
6172
6173
293
      Builder.createCall(
6174
293
          Context->getIntrinsic(
6175
293
              Builder, Executable::Intrinsics::kCall,
6176
293
              LLVM::Type::getFunctionType(
6177
293
                  Context->VoidTy,
6178
293
                  {Context->Int32Ty, Context->Int8PtrTy, Context->Int8PtrTy},
6179
293
                  false)),
6180
293
          {Context->LLContext.getInt32(FuncID), Args, Rets});
6181
6182
293
      if (RetSize == 0) {
6183
171
        Builder.createRetVoid();
6184
171
      } else if (RetSize == 1) {
6185
86
        Builder.createRet(
6186
86
            Builder.createValuePtrLoad(RTy, Rets, Context->Int8Ty));
6187
86
      } else {
6188
36
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
6189
36
            RetSize, RTy, Rets, Context->Int8Ty, kValSize));
6190
36
      }
6191
6192
293
      Context->Functions.emplace_back(TypeIdx, F, nullptr);
6193
293
      break;
6194
293
    }
6195
49
    case ExternalType::Table: // Table type
6196
49
    {
6197
      // Nothing to do.
6198
49
      break;
6199
293
    }
6200
40
    case ExternalType::Memory: // Memory type
6201
40
    {
6202
      // Nothing to do.
6203
40
      break;
6204
293
    }
6205
40
    case ExternalType::Global: // Global type
6206
40
    {
6207
      // Get global type. External type checked in validation.
6208
40
      const auto &GlobType = ImpDesc.getExternalGlobalType();
6209
40
      const auto &ValType = GlobType.getValType();
6210
40
      auto Type = toLLVMType(Context->LLContext, ValType);
6211
40
      Context->Globals.push_back(Type);
6212
40
      break;
6213
293
    }
6214
5
    case ExternalType::Tag: // Tag type
6215
5
    {
6216
      // TODO: EXCEPTION - implement the AOT.
6217
5
      break;
6218
293
    }
6219
0
    default:
6220
0
      assumingUnreachable();
6221
427
    }
6222
427
  }
6223
2.32k
}
6224
6225
2.31k
void Compiler::compile(const AST::ExportSection &) noexcept {}
6226
6227
2.32k
void Compiler::compile(const AST::GlobalSection &GlobalSec) noexcept {
6228
2.32k
  for (const auto &GlobalSeg : GlobalSec.getContent()) {
6229
142
    const auto &ValType = GlobalSeg.getGlobalType().getValType();
6230
142
    auto Type = toLLVMType(Context->LLContext, ValType);
6231
142
    Context->Globals.push_back(Type);
6232
142
  }
6233
2.32k
}
6234
6235
void Compiler::compile(const AST::MemorySection &,
6236
2.32k
                       const AST::DataSection &) noexcept {}
6237
6238
void Compiler::compile(const AST::TableSection &,
6239
2.32k
                       const AST::ElementSection &) noexcept {}
6240
6241
Expect<void> Compiler::compile(const AST::FunctionSection &FuncSec,
6242
2.32k
                               const AST::CodeSection &CodeSec) noexcept {
6243
2.32k
  const auto &TypeIdxs = FuncSec.getContent();
6244
2.32k
  const auto &CodeSegs = CodeSec.getContent();
6245
2.32k
  assuming(TypeIdxs.size() == CodeSegs.size());
6246
6247
13.7k
  for (size_t I = 0; I < CodeSegs.size(); ++I) {
6248
11.4k
    const auto &TypeIdx = TypeIdxs[I];
6249
11.4k
    const auto &Code = CodeSegs[I];
6250
11.4k
    assuming(TypeIdx < Context->CompositeTypes.size());
6251
11.4k
    assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6252
11.4k
    const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6253
11.4k
    const auto FuncID = Context->Functions.size();
6254
11.4k
    auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6255
11.4k
    LLVM::FunctionCallee F = {FTy, Context->LLModule.addFunction(
6256
11.4k
                                       FTy, LLVMExternalLinkage,
6257
11.4k
                                       fmt::format("f{}"sv, FuncID).c_str())};
6258
11.4k
    F.Fn.setVisibility(LLVMProtectedVisibility);
6259
11.4k
    F.Fn.setDSOLocal(true);
6260
11.4k
    F.Fn.setDLLStorageClass(LLVMDLLExportStorageClass);
6261
11.4k
    F.Fn.addFnAttr(Context->NoStackArgProbe);
6262
11.4k
    F.Fn.addFnAttr(Context->StrictFP);
6263
11.4k
    F.Fn.addFnAttr(Context->UWTable);
6264
11.4k
    F.Fn.addParamAttr(0, Context->ReadOnly);
6265
11.4k
    F.Fn.addParamAttr(0, Context->NoAlias);
6266
6267
11.4k
    Context->Functions.emplace_back(TypeIdx, F, &Code);
6268
11.4k
  }
6269
6270
11.7k
  for (auto [T, F, Code] : Context->Functions) {
6271
11.7k
    if (!Code) {
6272
293
      continue;
6273
293
    }
6274
6275
11.4k
    std::vector<ValType> Locals;
6276
11.4k
    for (const auto &Local : Code->getLocals()) {
6277
2.12M
      for (unsigned I = 0; I < Local.first; ++I) {
6278
2.11M
        Locals.push_back(Local.second);
6279
2.11M
      }
6280
2.01k
    }
6281
11.4k
    FunctionCompiler FC(*Context, F, Locals,
6282
11.4k
                        Conf.getCompilerConfigure().isInterruptible(),
6283
11.4k
                        Conf.getStatisticsConfigure().isInstructionCounting(),
6284
11.4k
                        Conf.getStatisticsConfigure().isCostMeasuring());
6285
11.4k
    auto Type = Context->resolveBlockType(T);
6286
11.4k
    EXPECTED_TRY(FC.compile(*Code, std::move(Type)));
6287
11.4k
    F.Fn.eliminateUnreachableBlocks();
6288
11.4k
  }
6289
2.31k
  return {};
6290
2.32k
}
6291
6292
} // namespace LLVM
6293
} // namespace WasmEdge