Coverage Report

Created: 2025-11-11 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/WasmEdge/lib/llvm/compiler.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: 2019-2024 Second State INC
3
4
#include "llvm/compiler.h"
5
6
#include "aot/version.h"
7
#include "common/defines.h"
8
#include "common/filesystem.h"
9
#include "common/spdlog.h"
10
#include "data.h"
11
#include "llvm.h"
12
#include "system/allocator.h"
13
14
#include <algorithm>
15
#include <array>
16
#include <cinttypes>
17
#include <cstdint>
18
#include <cstdlib>
19
#include <limits>
20
#include <memory>
21
#include <numeric>
22
#include <string>
23
#include <string_view>
24
#include <system_error>
25
26
namespace LLVM = WasmEdge::LLVM;
27
using namespace std::literals;
28
29
namespace {
30
31
static bool
32
isVoidReturn(WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
33
static LLVM::Type toLLVMType(LLVM::Context LLContext,
34
                             const WasmEdge::ValType &ValType) noexcept;
35
static std::vector<LLVM::Type>
36
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
37
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
38
static LLVM::Type
39
toLLVMRetsType(LLVM::Context LLContext,
40
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
41
static LLVM::Type
42
toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
43
           const WasmEdge::AST::FunctionType &FuncType) noexcept;
44
static LLVM::Value
45
toLLVMConstantZero(LLVM::Context LLContext,
46
                   const WasmEdge::ValType &ValType) noexcept;
47
static std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
48
                                             LLVM::Value Struct) noexcept;
49
class FunctionCompiler;
50
51
// XXX: Misalignment handler not implemented yet, forcing unalignment
52
// force unalignment load/store
53
static inline constexpr const bool kForceUnalignment = true;
54
55
// force checking div/rem on zero
56
static inline constexpr const bool kForceDivCheck = true;
57
58
// Size of a ValVariant
59
static inline constexpr const uint32_t kValSize = sizeof(WasmEdge::ValVariant);
60
61
// Translate Compiler::OptimizationLevel to llvm::PassBuilder version
62
#if LLVM_VERSION_MAJOR >= 13
63
static inline const char *
64
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
65
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
66
  switch (Level) {
67
  case OL::O0:
68
    return "default<O0>,function(tailcallelim)";
69
  case OL::O1:
70
    return "default<O1>,function(tailcallelim)";
71
  case OL::O2:
72
    return "default<O2>";
73
  case OL::O3:
74
    return "default<O3>";
75
  case OL::Os:
76
    return "default<Os>";
77
  case OL::Oz:
78
    return "default<Oz>";
79
  default:
80
    assumingUnreachable();
81
  }
82
}
83
#else
84
static inline std::pair<unsigned int, unsigned int>
85
2.21k
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
86
2.21k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
87
2.21k
  switch (Level) {
88
0
  case OL::O0:
89
0
    return {0, 0};
90
0
  case OL::O1:
91
0
    return {1, 0};
92
0
  case OL::O2:
93
0
    return {2, 0};
94
2.21k
  case OL::O3:
95
2.21k
    return {3, 0};
96
0
  case OL::Os:
97
0
    return {2, 1};
98
0
  case OL::Oz:
99
0
    return {2, 2};
100
0
  default:
101
0
    assumingUnreachable();
102
2.21k
  }
103
2.21k
}
104
#endif
105
106
static inline LLVMCodeGenOptLevel toLLVMCodeGenLevel(
107
2.21k
    WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
108
2.21k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
109
2.21k
  switch (Level) {
110
0
  case OL::O0:
111
0
    return LLVMCodeGenLevelNone;
112
0
  case OL::O1:
113
0
    return LLVMCodeGenLevelLess;
114
0
  case OL::O2:
115
0
    return LLVMCodeGenLevelDefault;
116
2.21k
  case OL::O3:
117
2.21k
    return LLVMCodeGenLevelAggressive;
118
0
  case OL::Os:
119
0
    return LLVMCodeGenLevelDefault;
120
0
  case OL::Oz:
121
0
    return LLVMCodeGenLevelDefault;
122
0
  default:
123
0
    assumingUnreachable();
124
2.21k
  }
125
2.21k
}
126
} // namespace
127
128
struct LLVM::Compiler::CompileContext {
129
  LLVM::Context LLContext;
130
  LLVM::Module &LLModule;
131
  LLVM::Attribute Cold;
132
  LLVM::Attribute NoAlias;
133
  LLVM::Attribute NoInline;
134
  LLVM::Attribute NoReturn;
135
  LLVM::Attribute ReadOnly;
136
  LLVM::Attribute StrictFP;
137
  LLVM::Attribute UWTable;
138
  LLVM::Attribute NoStackArgProbe;
139
  LLVM::Type VoidTy;
140
  LLVM::Type Int8Ty;
141
  LLVM::Type Int16Ty;
142
  LLVM::Type Int32Ty;
143
  LLVM::Type Int64Ty;
144
  LLVM::Type Int128Ty;
145
  LLVM::Type FloatTy;
146
  LLVM::Type DoubleTy;
147
  LLVM::Type Int8x16Ty;
148
  LLVM::Type Int16x8Ty;
149
  LLVM::Type Int32x4Ty;
150
  LLVM::Type Floatx4Ty;
151
  LLVM::Type Int64x2Ty;
152
  LLVM::Type Doublex2Ty;
153
  LLVM::Type Int128x1Ty;
154
  LLVM::Type Int8PtrTy;
155
  LLVM::Type Int32PtrTy;
156
  LLVM::Type Int64PtrTy;
157
  LLVM::Type Int128PtrTy;
158
  LLVM::Type Int8PtrPtrTy;
159
  LLVM::Type ExecCtxTy;
160
  LLVM::Type ExecCtxPtrTy;
161
  LLVM::Type IntrinsicsTableTy;
162
  LLVM::Type IntrinsicsTablePtrTy;
163
  LLVM::Message SubtargetFeatures;
164
165
#if defined(__x86_64__)
166
#if defined(__XOP__)
167
  bool SupportXOP = true;
168
#else
169
  bool SupportXOP = false;
170
#endif
171
172
#if defined(__SSE4_1__)
173
  bool SupportSSE4_1 = true;
174
#else
175
  bool SupportSSE4_1 = false;
176
#endif
177
178
#if defined(__SSSE3__)
179
  bool SupportSSSE3 = true;
180
#else
181
  bool SupportSSSE3 = false;
182
#endif
183
184
#if defined(__SSE2__)
185
  bool SupportSSE2 = true;
186
#else
187
  bool SupportSSE2 = false;
188
#endif
189
#endif
190
191
#if defined(__aarch64__)
192
#if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(__ARM_NEON_FP)
193
  bool SupportNEON = true;
194
#else
195
  bool SupportNEON = false;
196
#endif
197
#endif
198
199
  std::vector<const AST::CompositeType *> CompositeTypes;
200
  std::vector<LLVM::Value> FunctionWrappers;
201
  std::vector<std::tuple<uint32_t, LLVM::FunctionCallee,
202
                         const WasmEdge::AST::CodeSegment *>>
203
      Functions;
204
  std::vector<LLVM::Type> Globals;
205
  LLVM::Value IntrinsicsTable;
206
  LLVM::FunctionCallee Trap;
207
  CompileContext(LLVM::Context C, LLVM::Module &M,
208
                 bool IsGenericBinary) noexcept
209
2.21k
      : LLContext(C), LLModule(M),
210
2.21k
        Cold(LLVM::Attribute::createEnum(C, LLVM::Core::Cold, 0)),
211
2.21k
        NoAlias(LLVM::Attribute::createEnum(C, LLVM::Core::NoAlias, 0)),
212
2.21k
        NoInline(LLVM::Attribute::createEnum(C, LLVM::Core::NoInline, 0)),
213
2.21k
        NoReturn(LLVM::Attribute::createEnum(C, LLVM::Core::NoReturn, 0)),
214
2.21k
        ReadOnly(LLVM::Attribute::createEnum(C, LLVM::Core::ReadOnly, 0)),
215
2.21k
        StrictFP(LLVM::Attribute::createEnum(C, LLVM::Core::StrictFP, 0)),
216
2.21k
        UWTable(LLVM::Attribute::createEnum(C, LLVM::Core::UWTable,
217
2.21k
                                            LLVM::Core::UWTableDefault)),
218
        NoStackArgProbe(
219
2.21k
            LLVM::Attribute::createString(C, "no-stack-arg-probe"sv, {})),
220
2.21k
        VoidTy(LLContext.getVoidTy()), Int8Ty(LLContext.getInt8Ty()),
221
2.21k
        Int16Ty(LLContext.getInt16Ty()), Int32Ty(LLContext.getInt32Ty()),
222
2.21k
        Int64Ty(LLContext.getInt64Ty()), Int128Ty(LLContext.getInt128Ty()),
223
2.21k
        FloatTy(LLContext.getFloatTy()), DoubleTy(LLContext.getDoubleTy()),
224
2.21k
        Int8x16Ty(LLVM::Type::getVectorType(Int8Ty, 16)),
225
2.21k
        Int16x8Ty(LLVM::Type::getVectorType(Int16Ty, 8)),
226
2.21k
        Int32x4Ty(LLVM::Type::getVectorType(Int32Ty, 4)),
227
2.21k
        Floatx4Ty(LLVM::Type::getVectorType(FloatTy, 4)),
228
2.21k
        Int64x2Ty(LLVM::Type::getVectorType(Int64Ty, 2)),
229
2.21k
        Doublex2Ty(LLVM::Type::getVectorType(DoubleTy, 2)),
230
2.21k
        Int128x1Ty(LLVM::Type::getVectorType(Int128Ty, 1)),
231
2.21k
        Int8PtrTy(Int8Ty.getPointerTo()), Int32PtrTy(Int32Ty.getPointerTo()),
232
2.21k
        Int64PtrTy(Int64Ty.getPointerTo()),
233
2.21k
        Int128PtrTy(Int128Ty.getPointerTo()),
234
2.21k
        Int8PtrPtrTy(Int8PtrTy.getPointerTo()),
235
2.21k
        ExecCtxTy(LLVM::Type::getStructType(
236
2.21k
            "ExecCtx",
237
2.21k
            std::initializer_list<LLVM::Type>{
238
                // Memory
239
2.21k
                Int8PtrTy.getPointerTo(),
240
                // Globals
241
2.21k
                Int128PtrTy.getPointerTo(),
242
                // InstrCount
243
2.21k
                Int64PtrTy,
244
                // CostTable
245
2.21k
                LLVM::Type::getArrayType(Int64Ty, UINT16_MAX + 1)
246
2.21k
                    .getPointerTo(),
247
                // Gas
248
2.21k
                Int64PtrTy,
249
                // GasLimit
250
2.21k
                Int64Ty,
251
                // StopToken
252
2.21k
                Int32PtrTy,
253
2.21k
            })),
254
2.21k
        ExecCtxPtrTy(ExecCtxTy.getPointerTo()),
255
2.21k
        IntrinsicsTableTy(LLVM::Type::getArrayType(
256
2.21k
            Int8PtrTy,
257
2.21k
            static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax))),
258
2.21k
        IntrinsicsTablePtrTy(IntrinsicsTableTy.getPointerTo()),
259
2.21k
        IntrinsicsTable(LLModule.addGlobal(IntrinsicsTablePtrTy, true,
260
2.21k
                                           LLVMExternalLinkage, LLVM::Value(),
261
2.21k
                                           "intrinsics")) {
262
2.21k
    Trap.Ty = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
263
2.21k
    Trap.Fn = LLModule.addFunction(Trap.Ty, LLVMPrivateLinkage, "trap");
264
2.21k
    Trap.Fn.setDSOLocal(true);
265
2.21k
    Trap.Fn.addFnAttr(NoStackArgProbe);
266
2.21k
    Trap.Fn.addFnAttr(StrictFP);
267
2.21k
    Trap.Fn.addFnAttr(UWTable);
268
2.21k
    Trap.Fn.addFnAttr(NoReturn);
269
2.21k
    Trap.Fn.addFnAttr(Cold);
270
2.21k
    Trap.Fn.addFnAttr(NoInline);
271
272
2.21k
    LLModule.addGlobal(Int32Ty, true, LLVMExternalLinkage,
273
2.21k
                       LLVM::Value::getConstInt(Int32Ty, AOT::kBinaryVersion),
274
2.21k
                       "version");
275
276
2.21k
    if (!IsGenericBinary) {
277
2.21k
      SubtargetFeatures = LLVM::getHostCPUFeatures();
278
2.21k
      auto Features = SubtargetFeatures.string_view();
279
192k
      while (!Features.empty()) {
280
190k
        std::string_view Feature;
281
190k
        if (auto Pos = Features.find(','); Pos != std::string_view::npos) {
282
188k
          Feature = Features.substr(0, Pos);
283
188k
          Features = Features.substr(Pos + 1);
284
188k
        } else {
285
2.21k
          Feature = std::exchange(Features, std::string_view());
286
2.21k
        }
287
190k
        if (Feature[0] != '+') {
288
106k
          continue;
289
106k
        }
290
84.2k
        Feature = Feature.substr(1);
291
292
84.2k
#if defined(__x86_64__)
293
84.2k
        if (!SupportXOP && Feature == "xop"sv) {
294
0
          SupportXOP = true;
295
0
        }
296
84.2k
        if (!SupportSSE4_1 && Feature == "sse4.1"sv) {
297
2.21k
          SupportSSE4_1 = true;
298
2.21k
        }
299
84.2k
        if (!SupportSSSE3 && Feature == "ssse3"sv) {
300
2.21k
          SupportSSSE3 = true;
301
2.21k
        }
302
84.2k
        if (!SupportSSE2 && Feature == "sse2"sv) {
303
0
          SupportSSE2 = true;
304
0
        }
305
#elif defined(__aarch64__)
306
        if (!SupportNEON && Feature == "neon"sv) {
307
          SupportNEON = true;
308
        }
309
#endif
310
84.2k
      }
311
2.21k
    }
312
313
2.21k
    {
314
      // create trap
315
2.21k
      LLVM::Builder Builder(LLContext);
316
2.21k
      Builder.positionAtEnd(
317
2.21k
          LLVM::BasicBlock::create(LLContext, Trap.Fn, "entry"));
318
2.21k
      auto FnTy = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
319
2.21k
      auto CallTrap = Builder.createCall(
320
2.21k
          getIntrinsic(Builder, Executable::Intrinsics::kTrap, FnTy),
321
2.21k
          {Trap.Fn.getFirstParam()});
322
2.21k
      CallTrap.addCallSiteAttribute(NoReturn);
323
2.21k
      Builder.createUnreachable();
324
2.21k
    }
325
2.21k
  }
326
  LLVM::Value getMemory(LLVM::Builder &Builder, LLVM::Value ExecCtx,
327
22.2k
                        uint32_t Index) noexcept {
328
22.2k
    auto Array = Builder.createExtractValue(ExecCtx, 0);
329
#if WASMEDGE_ALLOCATOR_IS_STABLE
330
    auto VPtr = Builder.createLoad(
331
        Int8PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
332
                                              LLContext.getInt64(Index)));
333
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
334
                     LLVM::Metadata(LLContext, {}));
335
#else
336
22.2k
    auto VPtrPtr = Builder.createLoad(
337
22.2k
        Int8PtrPtrTy, Builder.createInBoundsGEP1(Int8PtrPtrTy, Array,
338
22.2k
                                                 LLContext.getInt64(Index)));
339
22.2k
    VPtrPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
340
22.2k
                        LLVM::Metadata(LLContext, {}));
341
22.2k
    auto VPtr = Builder.createLoad(
342
22.2k
        Int8PtrTy,
343
22.2k
        Builder.createInBoundsGEP1(Int8PtrTy, VPtrPtr, LLContext.getInt64(0)));
344
22.2k
#endif
345
22.2k
    return Builder.createBitCast(VPtr, Int8PtrTy);
346
22.2k
  }
347
  std::pair<LLVM::Type, LLVM::Value> getGlobal(LLVM::Builder &Builder,
348
                                               LLVM::Value ExecCtx,
349
486
                                               uint32_t Index) noexcept {
350
486
    auto Ty = Globals[Index];
351
486
    auto Array = Builder.createExtractValue(ExecCtx, 1);
352
486
    auto VPtr = Builder.createLoad(
353
486
        Int128PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
354
486
                                                LLContext.getInt64(Index)));
355
486
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
356
486
                     LLVM::Metadata(LLContext, {}));
357
486
    auto Ptr = Builder.createBitCast(VPtr, Ty.getPointerTo());
358
486
    return {Ty, Ptr};
359
486
  }
360
  LLVM::Value getInstrCount(LLVM::Builder &Builder,
361
0
                            LLVM::Value ExecCtx) noexcept {
362
0
    return Builder.createExtractValue(ExecCtx, 2);
363
0
  }
364
  LLVM::Value getCostTable(LLVM::Builder &Builder,
365
0
                           LLVM::Value ExecCtx) noexcept {
366
0
    return Builder.createExtractValue(ExecCtx, 3);
367
0
  }
368
0
  LLVM::Value getGas(LLVM::Builder &Builder, LLVM::Value ExecCtx) noexcept {
369
0
    return Builder.createExtractValue(ExecCtx, 4);
370
0
  }
371
  LLVM::Value getGasLimit(LLVM::Builder &Builder,
372
0
                          LLVM::Value ExecCtx) noexcept {
373
0
    return Builder.createExtractValue(ExecCtx, 5);
374
0
  }
375
  LLVM::Value getStopToken(LLVM::Builder &Builder,
376
0
                           LLVM::Value ExecCtx) noexcept {
377
0
    return Builder.createExtractValue(ExecCtx, 6);
378
0
  }
379
  LLVM::FunctionCallee getIntrinsic(LLVM::Builder &Builder,
380
                                    Executable::Intrinsics Index,
381
7.60k
                                    LLVM::Type Ty) noexcept {
382
7.60k
    const auto Value = static_cast<uint32_t>(Index);
383
7.60k
    auto PtrTy = Ty.getPointerTo();
384
7.60k
    auto PtrPtrTy = PtrTy.getPointerTo();
385
7.60k
    auto IT = Builder.createLoad(IntrinsicsTablePtrTy, IntrinsicsTable);
386
7.60k
    IT.setMetadata(LLContext, LLVM::Core::InvariantGroup,
387
7.60k
                   LLVM::Metadata(LLContext, {}));
388
7.60k
    auto VPtr =
389
7.60k
        Builder.createInBoundsGEP2(IntrinsicsTableTy, IT, LLContext.getInt64(0),
390
7.60k
                                   LLContext.getInt64(Value));
391
7.60k
    auto Ptr = Builder.createBitCast(VPtr, PtrPtrTy);
392
7.60k
    return {Ty, Builder.createLoad(PtrTy, Ptr)};
393
7.60k
  }
394
  std::pair<std::vector<ValType>, std::vector<ValType>>
395
18.4k
  resolveBlockType(const BlockType &BType) const noexcept {
396
18.4k
    using VecT = std::vector<ValType>;
397
18.4k
    using RetT = std::pair<VecT, VecT>;
398
18.4k
    if (BType.isEmpty()) {
399
2.19k
      return RetT{};
400
2.19k
    }
401
16.2k
    if (BType.isValType()) {
402
2.52k
      return RetT{{}, {BType.getValType()}};
403
13.7k
    } else {
404
      // Type index case. t2* = type[index].returns
405
13.7k
      const uint32_t TypeIdx = BType.getTypeIndex();
406
13.7k
      const auto &FType = CompositeTypes[TypeIdx]->getFuncType();
407
13.7k
      return RetT{
408
13.7k
          VecT(FType.getParamTypes().begin(), FType.getParamTypes().end()),
409
13.7k
          VecT(FType.getReturnTypes().begin(), FType.getReturnTypes().end())};
410
13.7k
    }
411
16.2k
  }
412
};
413
414
namespace {
415
416
using namespace WasmEdge;
417
418
35.0k
static bool isVoidReturn(Span<const ValType> ValTypes) noexcept {
419
35.0k
  return ValTypes.empty();
420
35.0k
}
421
422
static LLVM::Type toLLVMType(LLVM::Context LLContext,
423
2.33M
                             const ValType &ValType) noexcept {
424
2.33M
  switch (ValType.getCode()) {
425
62.0k
  case TypeCode::I32:
426
62.0k
    return LLContext.getInt32Ty();
427
435k
  case TypeCode::I64:
428
435k
    return LLContext.getInt64Ty();
429
11.3k
  case TypeCode::Ref:
430
89.4k
  case TypeCode::RefNull:
431
1.76M
  case TypeCode::V128:
432
1.76M
    return LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2);
433
50.9k
  case TypeCode::F32:
434
50.9k
    return LLContext.getFloatTy();
435
21.0k
  case TypeCode::F64:
436
21.0k
    return LLContext.getDoubleTy();
437
0
  default:
438
0
    assumingUnreachable();
439
2.33M
  }
440
2.33M
}
441
442
static std::vector<LLVM::Type>
443
toLLVMTypeVector(LLVM::Context LLContext,
444
20.5k
                 Span<const ValType> ValTypes) noexcept {
445
20.5k
  std::vector<LLVM::Type> Result;
446
20.5k
  Result.reserve(ValTypes.size());
447
20.5k
  for (const auto &Type : ValTypes) {
448
19.5k
    Result.push_back(toLLVMType(LLContext, Type));
449
19.5k
  }
450
20.5k
  return Result;
451
20.5k
}
452
453
static std::vector<LLVM::Type>
454
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
455
16.6k
               Span<const ValType> ValTypes) noexcept {
456
16.6k
  auto Result = toLLVMTypeVector(LLContext, ValTypes);
457
16.6k
  Result.insert(Result.begin(), ExecCtxPtrTy);
458
16.6k
  return Result;
459
16.6k
}
460
461
static LLVM::Type toLLVMRetsType(LLVM::Context LLContext,
462
16.6k
                                 Span<const ValType> ValTypes) noexcept {
463
16.6k
  if (isVoidReturn(ValTypes)) {
464
3.94k
    return LLContext.getVoidTy();
465
3.94k
  }
466
12.6k
  if (ValTypes.size() == 1) {
467
11.9k
    return toLLVMType(LLContext, ValTypes.front());
468
11.9k
  }
469
681
  std::vector<LLVM::Type> Result;
470
681
  Result.reserve(ValTypes.size());
471
1.82k
  for (const auto &Type : ValTypes) {
472
1.82k
    Result.push_back(toLLVMType(LLContext, Type));
473
1.82k
  }
474
681
  return LLVM::Type::getStructType(Result);
475
12.6k
}
476
477
static LLVM::Type toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
478
16.6k
                             const AST::FunctionType &FuncType) noexcept {
479
16.6k
  auto ArgsTy =
480
16.6k
      toLLVMArgsType(LLContext, ExecCtxPtrTy, FuncType.getParamTypes());
481
16.6k
  auto RetTy = toLLVMRetsType(LLContext, FuncType.getReturnTypes());
482
16.6k
  return LLVM::Type::getFunctionType(RetTy, ArgsTy);
483
16.6k
}
484
485
static LLVM::Value toLLVMConstantZero(LLVM::Context LLContext,
486
2.30M
                                      const ValType &ValType) noexcept {
487
2.30M
  switch (ValType.getCode()) {
488
43.6k
  case TypeCode::I32:
489
43.6k
    return LLVM::Value::getConstNull(LLContext.getInt32Ty());
490
431k
  case TypeCode::I64:
491
431k
    return LLVM::Value::getConstNull(LLContext.getInt64Ty());
492
11.3k
  case TypeCode::Ref:
493
88.7k
  case TypeCode::RefNull: {
494
88.7k
    std::array<uint8_t, 16> Data{};
495
88.7k
    const auto Raw = ValType.getRawData();
496
88.7k
    std::copy(Raw.begin(), Raw.end(), Data.begin());
497
88.7k
    return LLVM::Value::getConstVector8(LLContext, Data);
498
11.3k
  }
499
1.67M
  case TypeCode::V128:
500
1.67M
    return LLVM::Value::getConstNull(
501
1.67M
        LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2));
502
48.3k
  case TypeCode::F32:
503
48.3k
    return LLVM::Value::getConstNull(LLContext.getFloatTy());
504
17.9k
  case TypeCode::F64:
505
17.9k
    return LLVM::Value::getConstNull(LLContext.getDoubleTy());
506
0
  default:
507
0
    assumingUnreachable();
508
2.30M
  }
509
2.30M
}
510
511
class FunctionCompiler {
512
  struct Control;
513
514
public:
515
  FunctionCompiler(LLVM::Compiler::CompileContext &Context,
516
                   LLVM::FunctionCallee F, Span<const ValType> Locals,
517
                   bool Interruptible, bool InstructionCounting,
518
                   bool GasMeasuring) noexcept
519
10.7k
      : Context(Context), LLContext(Context.LLContext),
520
10.7k
        Interruptible(Interruptible), F(F), Builder(LLContext) {
521
10.7k
    if (F.Fn) {
522
10.7k
      Builder.positionAtEnd(LLVM::BasicBlock::create(LLContext, F.Fn, "entry"));
523
10.7k
      ExecCtx = Builder.createLoad(Context.ExecCtxTy, F.Fn.getFirstParam());
524
525
10.7k
      if (InstructionCounting) {
526
0
        LocalInstrCount = Builder.createAlloca(Context.Int64Ty);
527
0
        Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
528
0
      }
529
530
10.7k
      if (GasMeasuring) {
531
0
        LocalGas = Builder.createAlloca(Context.Int64Ty);
532
0
        Builder.createStore(LLContext.getInt64(0), LocalGas);
533
0
      }
534
535
20.2k
      for (LLVM::Value Arg = F.Fn.getFirstParam().getNextParam(); Arg;
536
10.7k
           Arg = Arg.getNextParam()) {
537
9.46k
        LLVM::Type Ty = Arg.getType();
538
9.46k
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
539
9.46k
        Builder.createStore(Arg, ArgPtr);
540
9.46k
        Local.emplace_back(Ty, ArgPtr);
541
9.46k
      }
542
543
2.30M
      for (const auto &Type : Locals) {
544
2.30M
        LLVM::Type Ty = toLLVMType(LLContext, Type);
545
2.30M
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
546
2.30M
        Builder.createStore(toLLVMConstantZero(LLContext, Type), ArgPtr);
547
2.30M
        Local.emplace_back(Ty, ArgPtr);
548
2.30M
      }
549
10.7k
    }
550
10.7k
  }
551
552
31.8k
  LLVM::BasicBlock getTrapBB(ErrCode::Value Error) noexcept {
553
31.8k
    if (auto Iter = TrapBB.find(Error); Iter != TrapBB.end()) {
554
28.7k
      return Iter->second;
555
28.7k
    }
556
3.11k
    auto BB = LLVM::BasicBlock::create(LLContext, F.Fn, "trap");
557
3.11k
    TrapBB.emplace(Error, BB);
558
3.11k
    return BB;
559
31.8k
  }
560
561
  Expect<void>
562
  compile(const AST::CodeSegment &Code,
563
10.7k
          std::pair<std::vector<ValType>, std::vector<ValType>> Type) noexcept {
564
10.7k
    auto RetBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ret");
565
10.7k
    Type.first.clear();
566
10.7k
    enterBlock(RetBB, {}, {}, {}, std::move(Type));
567
10.7k
    EXPECTED_TRY(compile(Code.getExpr().getInstrs()));
568
10.7k
    assuming(ControlStack.empty());
569
10.7k
    compileReturn();
570
571
10.7k
    for (auto &[Error, BB] : TrapBB) {
572
3.11k
      Builder.positionAtEnd(BB);
573
3.11k
      updateInstrCount();
574
3.11k
      updateGasAtTrap();
575
3.11k
      auto CallTrap = Builder.createCall(
576
3.11k
          Context.Trap, {LLContext.getInt32(static_cast<uint32_t>(Error))});
577
3.11k
      CallTrap.addCallSiteAttribute(Context.NoReturn);
578
3.11k
      Builder.createUnreachable();
579
3.11k
    }
580
10.7k
    return {};
581
10.7k
  }
582
583
10.7k
  Expect<void> compile(AST::InstrView Instrs) noexcept {
584
1.55M
    auto Dispatch = [this](const AST::Instruction &Instr) -> Expect<void> {
585
1.55M
      switch (Instr.getOpCode()) {
586
      // Control instructions (for blocks)
587
3.38k
      case OpCode::Block: {
588
3.38k
        auto Block = LLVM::BasicBlock::create(LLContext, F.Fn, "block");
589
3.38k
        auto EndBlock = LLVM::BasicBlock::create(LLContext, F.Fn, "block.end");
590
3.38k
        Builder.createBr(Block);
591
592
3.38k
        Builder.positionAtEnd(Block);
593
3.38k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
594
3.38k
        const auto Arity = Type.first.size();
595
3.38k
        std::vector<LLVM::Value> Args(Arity);
596
3.38k
        if (isUnreachable()) {
597
898
          for (size_t I = 0; I < Arity; ++I) {
598
303
            auto Ty = toLLVMType(LLContext, Type.first[I]);
599
303
            Args[I] = LLVM::Value::getUndef(Ty);
600
303
          }
601
2.79k
        } else {
602
3.25k
          for (size_t I = 0; I < Arity; ++I) {
603
467
            const size_t J = Arity - 1 - I;
604
467
            Args[J] = stackPop();
605
467
          }
606
2.79k
        }
607
3.38k
        enterBlock(EndBlock, {}, {}, std::move(Args), std::move(Type));
608
3.38k
        checkStop();
609
3.38k
        updateGas();
610
3.38k
        return {};
611
0
      }
612
1.60k
      case OpCode::Loop: {
613
1.60k
        auto Curr = Builder.getInsertBlock();
614
1.60k
        auto Loop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop");
615
1.60k
        auto EndLoop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop.end");
616
1.60k
        Builder.createBr(Loop);
617
618
1.60k
        Builder.positionAtEnd(Loop);
619
1.60k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
620
1.60k
        const auto Arity = Type.first.size();
621
1.60k
        std::vector<LLVM::Value> Args(Arity);
622
1.60k
        if (isUnreachable()) {
623
912
          for (size_t I = 0; I < Arity; ++I) {
624
438
            auto Ty = toLLVMType(LLContext, Type.first[I]);
625
438
            auto Value = LLVM::Value::getUndef(Ty);
626
438
            auto PHINode = Builder.createPHI(Ty);
627
438
            PHINode.addIncoming(Value, Curr);
628
438
            Args[I] = PHINode;
629
438
          }
630
1.13k
        } else {
631
1.60k
          for (size_t I = 0; I < Arity; ++I) {
632
469
            const size_t J = Arity - 1 - I;
633
469
            auto Value = stackPop();
634
469
            auto PHINode = Builder.createPHI(Value.getType());
635
469
            PHINode.addIncoming(Value, Curr);
636
469
            Args[J] = PHINode;
637
469
          }
638
1.13k
        }
639
1.60k
        enterBlock(Loop, EndLoop, {}, std::move(Args), std::move(Type));
640
1.60k
        checkStop();
641
1.60k
        updateGas();
642
1.60k
        return {};
643
0
      }
644
2.69k
      case OpCode::If: {
645
2.69k
        auto Then = LLVM::BasicBlock::create(LLContext, F.Fn, "then");
646
2.69k
        auto Else = LLVM::BasicBlock::create(LLContext, F.Fn, "else");
647
2.69k
        auto EndIf = LLVM::BasicBlock::create(LLContext, F.Fn, "if.end");
648
2.69k
        LLVM::Value Cond;
649
2.69k
        if (isUnreachable()) {
650
528
          Cond = LLVM::Value::getUndef(LLContext.getInt1Ty());
651
2.17k
        } else {
652
2.17k
          Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
653
2.17k
        }
654
2.69k
        Builder.createCondBr(Cond, Then, Else);
655
656
2.69k
        Builder.positionAtEnd(Then);
657
2.69k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
658
2.69k
        const auto Arity = Type.first.size();
659
2.69k
        std::vector<LLVM::Value> Args(Arity);
660
2.69k
        if (isUnreachable()) {
661
1.00k
          for (size_t I = 0; I < Arity; ++I) {
662
479
            auto Ty = toLLVMType(LLContext, Type.first[I]);
663
479
            Args[I] = LLVM::Value::getUndef(Ty);
664
479
          }
665
2.17k
        } else {
666
3.10k
          for (size_t I = 0; I < Arity; ++I) {
667
934
            const size_t J = Arity - 1 - I;
668
934
            Args[J] = stackPop();
669
934
          }
670
2.17k
        }
671
2.69k
        enterBlock(EndIf, {}, Else, std::move(Args), std::move(Type));
672
2.69k
        return {};
673
0
      }
674
18.4k
      case OpCode::End: {
675
18.4k
        auto Entry = leaveBlock();
676
18.4k
        if (Entry.ElseBlock) {
677
1.00k
          auto Block = Builder.getInsertBlock();
678
1.00k
          Builder.positionAtEnd(Entry.ElseBlock);
679
1.00k
          enterBlock(Block, {}, {}, std::move(Entry.Args),
680
1.00k
                     std::move(Entry.Type), std::move(Entry.ReturnPHI));
681
1.00k
          Entry = leaveBlock();
682
1.00k
        }
683
18.4k
        buildPHI(Entry.Type.second, Entry.ReturnPHI);
684
18.4k
        return {};
685
0
      }
686
1.69k
      case OpCode::Else: {
687
1.69k
        auto Entry = leaveBlock();
688
1.69k
        Builder.positionAtEnd(Entry.ElseBlock);
689
1.69k
        enterBlock(Entry.JumpBlock, {}, {}, std::move(Entry.Args),
690
1.69k
                   std::move(Entry.Type), std::move(Entry.ReturnPHI));
691
1.69k
        return {};
692
0
      }
693
1.52M
      default:
694
1.52M
        break;
695
1.55M
      }
696
697
1.52M
      if (isUnreachable()) {
698
461k
        return {};
699
461k
      }
700
701
1.06M
      switch (Instr.getOpCode()) {
702
      // Control instructions
703
3.22k
      case OpCode::Unreachable:
704
3.22k
        Builder.createBr(getTrapBB(ErrCode::Value::Unreachable));
705
3.22k
        setUnreachable();
706
3.22k
        Builder.positionAtEnd(
707
3.22k
            LLVM::BasicBlock::create(LLContext, F.Fn, "unreachable.end"));
708
3.22k
        break;
709
42.2k
      case OpCode::Nop:
710
42.2k
        break;
711
1
      case OpCode::Throw:
712
2
      case OpCode::Throw_ref:
713
        // TODO: EXCEPTION - implement the AOT.
714
2
        return Unexpect(ErrCode::Value::AOTNotImpl);
715
737
      case OpCode::Br: {
716
737
        const auto Label = Instr.getJump().TargetIndex;
717
737
        setLableJumpPHI(Label);
718
737
        Builder.createBr(getLabel(Label));
719
737
        setUnreachable();
720
737
        Builder.positionAtEnd(
721
737
            LLVM::BasicBlock::create(LLContext, F.Fn, "br.end"));
722
737
        break;
723
1
      }
724
350
      case OpCode::Br_if: {
725
350
        const auto Label = Instr.getJump().TargetIndex;
726
350
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
727
350
        setLableJumpPHI(Label);
728
350
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_if.end");
729
350
        Builder.createCondBr(Cond, getLabel(Label), Next);
730
350
        Builder.positionAtEnd(Next);
731
350
        break;
732
1
      }
733
953
      case OpCode::Br_table: {
734
953
        auto LabelTable = Instr.getLabelList();
735
953
        assuming(LabelTable.size() <= std::numeric_limits<uint32_t>::max());
736
953
        const auto LabelTableSize =
737
953
            static_cast<uint32_t>(LabelTable.size() - 1);
738
953
        auto Value = stackPop();
739
953
        setLableJumpPHI(LabelTable[LabelTableSize].TargetIndex);
740
953
        auto Switch = Builder.createSwitch(
741
953
            Value, getLabel(LabelTable[LabelTableSize].TargetIndex),
742
953
            LabelTableSize);
743
36.4k
        for (uint32_t I = 0; I < LabelTableSize; ++I) {
744
35.5k
          setLableJumpPHI(LabelTable[I].TargetIndex);
745
35.5k
          Switch.addCase(LLContext.getInt32(I),
746
35.5k
                         getLabel(LabelTable[I].TargetIndex));
747
35.5k
        }
748
953
        setUnreachable();
749
953
        Builder.positionAtEnd(
750
953
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_table.end"));
751
953
        break;
752
953
      }
753
3
      case OpCode::Br_on_null: {
754
3
        const auto Label = Instr.getJump().TargetIndex;
755
3
        auto Value = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
756
3
        auto Cond = Builder.createICmpEQ(
757
3
            Builder.createExtractElement(Value, LLContext.getInt64(1)),
758
3
            LLContext.getInt64(0));
759
3
        setLableJumpPHI(Label);
760
3
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_null.end");
761
3
        Builder.createCondBr(Cond, getLabel(Label), Next);
762
3
        Builder.positionAtEnd(Next);
763
3
        stackPush(Value);
764
3
        break;
765
953
      }
766
1
      case OpCode::Br_on_non_null: {
767
1
        const auto Label = Instr.getJump().TargetIndex;
768
1
        auto Cond = Builder.createICmpNE(
769
1
            Builder.createExtractElement(
770
1
                Builder.createBitCast(Stack.back(), Context.Int64x2Ty),
771
1
                LLContext.getInt64(1)),
772
1
            LLContext.getInt64(0));
773
1
        setLableJumpPHI(Label);
774
1
        auto Next =
775
1
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_non_null.end");
776
1
        Builder.createCondBr(Cond, getLabel(Label), Next);
777
1
        Builder.positionAtEnd(Next);
778
1
        stackPop();
779
1
        break;
780
953
      }
781
0
      case OpCode::Br_on_cast:
782
0
      case OpCode::Br_on_cast_fail: {
783
0
        auto Ref = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
784
0
        const auto Label = Instr.getBrCast().Jump.TargetIndex;
785
0
        std::array<uint8_t, 16> Buf = {0};
786
0
        std::copy_n(Instr.getBrCast().RType2.getRawData().cbegin(), 8,
787
0
                    Buf.begin());
788
0
        auto VType = Builder.createExtractElement(
789
0
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
790
0
                                  Context.Int64x2Ty),
791
0
            LLContext.getInt64(0));
792
0
        auto IsRefTest = Builder.createCall(
793
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
794
0
                                 LLVM::Type::getFunctionType(
795
0
                                     Context.Int32Ty,
796
0
                                     {Context.Int64x2Ty, Context.Int64Ty},
797
0
                                     false)),
798
0
            {Ref, VType});
799
0
        auto Cond =
800
0
            (Instr.getOpCode() == OpCode::Br_on_cast)
801
0
                ? Builder.createICmpNE(IsRefTest, LLContext.getInt32(0))
802
0
                : Builder.createICmpEQ(IsRefTest, LLContext.getInt32(0));
803
0
        setLableJumpPHI(Label);
804
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_cast.end");
805
0
        Builder.createCondBr(Cond, getLabel(Label), Next);
806
0
        Builder.positionAtEnd(Next);
807
0
        break;
808
0
      }
809
730
      case OpCode::Return:
810
730
        compileReturn();
811
730
        setUnreachable();
812
730
        Builder.positionAtEnd(
813
730
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret.end"));
814
730
        break;
815
3.48k
      case OpCode::Call:
816
3.48k
        updateInstrCount();
817
3.48k
        updateGas();
818
3.48k
        compileCallOp(Instr.getTargetIndex());
819
3.48k
        break;
820
1.17k
      case OpCode::Call_indirect:
821
1.17k
        updateInstrCount();
822
1.17k
        updateGas();
823
1.17k
        compileIndirectCallOp(Instr.getSourceIndex(), Instr.getTargetIndex());
824
1.17k
        break;
825
63
      case OpCode::Return_call:
826
63
        updateInstrCount();
827
63
        updateGas();
828
63
        compileReturnCallOp(Instr.getTargetIndex());
829
63
        setUnreachable();
830
63
        Builder.positionAtEnd(
831
63
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call.end"));
832
63
        break;
833
102
      case OpCode::Return_call_indirect:
834
102
        updateInstrCount();
835
102
        updateGas();
836
102
        compileReturnIndirectCallOp(Instr.getSourceIndex(),
837
102
                                    Instr.getTargetIndex());
838
102
        setUnreachable();
839
102
        Builder.positionAtEnd(
840
102
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_indir.end"));
841
102
        break;
842
7
      case OpCode::Call_ref:
843
7
        updateInstrCount();
844
7
        updateGas();
845
7
        compileCallRefOp(Instr.getTargetIndex());
846
7
        break;
847
2
      case OpCode::Return_call_ref:
848
2
        updateInstrCount();
849
2
        updateGas();
850
2
        compileReturnCallRefOp(Instr.getTargetIndex());
851
2
        setUnreachable();
852
2
        Builder.positionAtEnd(
853
2
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_ref.end"));
854
2
        break;
855
2
      case OpCode::Try_table:
856
        // TODO: EXCEPTION - implement the AOT.
857
2
        return Unexpect(ErrCode::Value::AOTNotImpl);
858
859
      // Reference Instructions
860
5.25k
      case OpCode::Ref__null: {
861
5.25k
        std::array<uint8_t, 16> Buf = {0};
862
        // For null references, the dynamic type down scaling is needed.
863
5.25k
        ValType VType;
864
5.25k
        if (Instr.getValType().isAbsHeapType()) {
865
5.21k
          switch (Instr.getValType().getHeapTypeCode()) {
866
10
          case TypeCode::NullFuncRef:
867
2.31k
          case TypeCode::FuncRef:
868
2.31k
            VType = TypeCode::NullFuncRef;
869
2.31k
            break;
870
10
          case TypeCode::NullExternRef:
871
2.71k
          case TypeCode::ExternRef:
872
2.71k
            VType = TypeCode::NullExternRef;
873
2.71k
            break;
874
10
          case TypeCode::NullExnRef:
875
22
          case TypeCode::ExnRef:
876
22
            VType = TypeCode::NullExnRef;
877
22
            break;
878
10
          case TypeCode::NullRef:
879
28
          case TypeCode::AnyRef:
880
68
          case TypeCode::EqRef:
881
139
          case TypeCode::I31Ref:
882
152
          case TypeCode::StructRef:
883
163
          case TypeCode::ArrayRef:
884
163
            VType = TypeCode::NullRef;
885
163
            break;
886
0
          default:
887
0
            assumingUnreachable();
888
5.21k
          }
889
5.21k
        } else {
890
32
          assuming(Instr.getValType().getTypeIndex() <
891
32
                   Context.CompositeTypes.size());
892
32
          const auto *CompType =
893
32
              Context.CompositeTypes[Instr.getValType().getTypeIndex()];
894
32
          assuming(CompType != nullptr);
895
32
          if (CompType->isFunc()) {
896
29
            VType = TypeCode::NullFuncRef;
897
29
          } else {
898
3
            VType = TypeCode::NullRef;
899
3
          }
900
32
        }
901
5.25k
        std::copy_n(VType.getRawData().cbegin(), 8, Buf.begin());
902
5.25k
        stackPush(Builder.createBitCast(
903
5.25k
            LLVM::Value::getConstVector8(LLContext, Buf), Context.Int64x2Ty));
904
5.25k
        break;
905
5.25k
      }
906
2.68k
      case OpCode::Ref__is_null:
907
2.68k
        stackPush(Builder.createZExt(
908
2.68k
            Builder.createICmpEQ(
909
2.68k
                Builder.createExtractElement(
910
2.68k
                    Builder.createBitCast(stackPop(), Context.Int64x2Ty),
911
2.68k
                    LLContext.getInt64(1)),
912
2.68k
                LLContext.getInt64(0)),
913
2.68k
            Context.Int32Ty));
914
2.68k
        break;
915
28
      case OpCode::Ref__func:
916
28
        stackPush(Builder.createCall(
917
28
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefFunc,
918
28
                                 LLVM::Type::getFunctionType(Context.Int64x2Ty,
919
28
                                                             {Context.Int32Ty},
920
28
                                                             false)),
921
28
            {LLContext.getInt32(Instr.getTargetIndex())}));
922
28
        break;
923
0
      case OpCode::Ref__eq: {
924
0
        LLVM::Value RHS = stackPop();
925
0
        LLVM::Value LHS = stackPop();
926
0
        stackPush(Builder.createZExt(
927
0
            Builder.createICmpEQ(
928
0
                Builder.createExtractElement(LHS, LLContext.getInt64(1)),
929
0
                Builder.createExtractElement(RHS, LLContext.getInt64(1))),
930
0
            Context.Int32Ty));
931
0
        break;
932
5.25k
      }
933
316
      case OpCode::Ref__as_non_null: {
934
316
        auto Next =
935
316
            LLVM::BasicBlock::create(LLContext, F.Fn, "ref_as_non_null.ok");
936
316
        Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
937
316
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
938
316
            Builder.createExtractElement(Stack.back(), LLContext.getInt64(1)),
939
316
            LLContext.getInt64(0)));
940
316
        Builder.createCondBr(IsNotNull, Next,
941
316
                             getTrapBB(ErrCode::Value::CastNullToNonNull));
942
316
        Builder.positionAtEnd(Next);
943
316
        break;
944
5.25k
      }
945
946
      // Reference Instructions (GC proposal)
947
0
      case OpCode::Struct__new:
948
0
      case OpCode::Struct__new_default: {
949
0
        LLVM::Value Args = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
950
0
        assuming(Instr.getTargetIndex() < Context.CompositeTypes.size());
951
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
952
0
        assuming(CompType != nullptr && !CompType->isFunc());
953
0
        auto ArgSize = CompType->getFieldTypes().size();
954
0
        if (Instr.getOpCode() == OpCode::Struct__new) {
955
0
          std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
956
0
          for (size_t I = 0; I < ArgSize; ++I) {
957
0
            ArgsVec[ArgSize - I - 1] = stackPop();
958
0
          }
959
0
          Args = Builder.createArray(ArgSize, kValSize);
960
0
          Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
961
0
        } else {
962
0
          ArgSize = 0;
963
0
        }
964
0
        stackPush(Builder.createCall(
965
0
            Context.getIntrinsic(
966
0
                Builder, Executable::Intrinsics::kStructNew,
967
0
                LLVM::Type::getFunctionType(
968
0
                    Context.Int64x2Ty,
969
0
                    {Context.Int32Ty, Context.Int8PtrTy, Context.Int32Ty},
970
0
                    false)),
971
0
            {LLContext.getInt32(Instr.getTargetIndex()), Args,
972
0
             LLContext.getInt32(static_cast<uint32_t>(ArgSize))}));
973
0
        break;
974
0
      }
975
0
      case OpCode::Struct__get:
976
0
      case OpCode::Struct__get_u:
977
0
      case OpCode::Struct__get_s: {
978
0
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
979
0
                 Context.CompositeTypes.size());
980
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
981
0
        assuming(CompType != nullptr && !CompType->isFunc());
982
0
        assuming(static_cast<size_t>(Instr.getSourceIndex()) <
983
0
                 CompType->getFieldTypes().size());
984
0
        const auto &StorageType =
985
0
            CompType->getFieldTypes()[Instr.getSourceIndex()].getStorageType();
986
0
        auto Ref = stackPop();
987
0
        auto IsSigned = (Instr.getOpCode() == OpCode::Struct__get_s)
988
0
                            ? LLContext.getInt8(1)
989
0
                            : LLContext.getInt8(0);
990
0
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
991
0
        Builder.createCall(
992
0
            Context.getIntrinsic(
993
0
                Builder, Executable::Intrinsics::kStructGet,
994
0
                LLVM::Type::getFunctionType(Context.VoidTy,
995
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
996
0
                                             Context.Int32Ty, Context.Int8Ty,
997
0
                                             Context.Int8PtrTy},
998
0
                                            false)),
999
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1000
0
             LLContext.getInt32(Instr.getSourceIndex()), IsSigned, Ret});
1001
1002
0
        switch (StorageType.getCode()) {
1003
0
        case TypeCode::I8:
1004
0
        case TypeCode::I16:
1005
0
        case TypeCode::I32: {
1006
0
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1007
0
                                               Context.Int64x2Ty));
1008
0
          break;
1009
0
        }
1010
0
        case TypeCode::I64: {
1011
0
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1012
0
                                               Context.Int64x2Ty));
1013
0
          break;
1014
0
        }
1015
0
        case TypeCode::F32: {
1016
0
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1017
0
                                               Context.Int64x2Ty));
1018
0
          break;
1019
0
        }
1020
0
        case TypeCode::F64: {
1021
0
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1022
0
                                               Context.Int64x2Ty));
1023
0
          break;
1024
0
        }
1025
0
        case TypeCode::V128:
1026
0
        case TypeCode::Ref:
1027
0
        case TypeCode::RefNull: {
1028
0
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1029
0
                                               Context.Int64x2Ty));
1030
0
          break;
1031
0
        }
1032
0
        default:
1033
0
          assumingUnreachable();
1034
0
        }
1035
0
        break;
1036
0
      }
1037
0
      case OpCode::Struct__set: {
1038
0
        auto Val = stackPop();
1039
0
        auto Ref = stackPop();
1040
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1041
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1042
0
        Builder.createCall(
1043
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kStructSet,
1044
0
                                 LLVM::Type::getFunctionType(
1045
0
                                     Context.VoidTy,
1046
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1047
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1048
0
                                     false)),
1049
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1050
0
             LLContext.getInt32(Instr.getSourceIndex()), Arg});
1051
0
        break;
1052
0
      }
1053
1
      case OpCode::Array__new: {
1054
1
        auto Length = stackPop();
1055
1
        auto Val = stackPop();
1056
1
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1057
1
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1058
1
        stackPush(Builder.createCall(
1059
1
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1060
1
                                 LLVM::Type::getFunctionType(
1061
1
                                     Context.Int64x2Ty,
1062
1
                                     {Context.Int32Ty, Context.Int32Ty,
1063
1
                                      Context.Int8PtrTy, Context.Int32Ty},
1064
1
                                     false)),
1065
1
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1066
1
             LLContext.getInt32(1)}));
1067
1
        break;
1068
0
      }
1069
11
      case OpCode::Array__new_default: {
1070
11
        auto Length = stackPop();
1071
11
        LLVM::Value Arg = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
1072
11
        stackPush(Builder.createCall(
1073
11
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1074
11
                                 LLVM::Type::getFunctionType(
1075
11
                                     Context.Int64x2Ty,
1076
11
                                     {Context.Int32Ty, Context.Int32Ty,
1077
11
                                      Context.Int8PtrTy, Context.Int32Ty},
1078
11
                                     false)),
1079
11
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1080
11
             LLContext.getInt32(0)}));
1081
11
        break;
1082
0
      }
1083
6
      case OpCode::Array__new_fixed: {
1084
6
        const auto ArgSize = Instr.getSourceIndex();
1085
6
        std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
1086
21
        for (size_t I = 0; I < ArgSize; ++I) {
1087
15
          ArgsVec[ArgSize - I - 1] = stackPop();
1088
15
        }
1089
6
        LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
1090
6
        Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
1091
6
        stackPush(Builder.createCall(
1092
6
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1093
6
                                 LLVM::Type::getFunctionType(
1094
6
                                     Context.Int64x2Ty,
1095
6
                                     {Context.Int32Ty, Context.Int32Ty,
1096
6
                                      Context.Int8PtrTy, Context.Int32Ty},
1097
6
                                     false)),
1098
6
            {LLContext.getInt32(Instr.getTargetIndex()),
1099
6
             LLContext.getInt32(ArgSize), Args, LLContext.getInt32(ArgSize)}));
1100
6
        break;
1101
0
      }
1102
0
      case OpCode::Array__new_data:
1103
0
      case OpCode::Array__new_elem: {
1104
0
        auto Length = stackPop();
1105
0
        auto Start = stackPop();
1106
0
        stackPush(Builder.createCall(
1107
0
            Context.getIntrinsic(
1108
0
                Builder,
1109
0
                ((Instr.getOpCode() == OpCode::Array__new_data)
1110
0
                     ? Executable::Intrinsics::kArrayNewData
1111
0
                     : Executable::Intrinsics::kArrayNewElem),
1112
0
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1113
0
                                            {Context.Int32Ty, Context.Int32Ty,
1114
0
                                             Context.Int32Ty, Context.Int32Ty},
1115
0
                                            false)),
1116
0
            {LLContext.getInt32(Instr.getTargetIndex()),
1117
0
             LLContext.getInt32(Instr.getSourceIndex()), Start, Length}));
1118
0
        break;
1119
0
      }
1120
0
      case OpCode::Array__get:
1121
0
      case OpCode::Array__get_u:
1122
0
      case OpCode::Array__get_s: {
1123
0
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
1124
0
                 Context.CompositeTypes.size());
1125
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
1126
0
        assuming(CompType != nullptr && !CompType->isFunc());
1127
0
        assuming(static_cast<size_t>(1) == CompType->getFieldTypes().size());
1128
0
        const auto &StorageType = CompType->getFieldTypes()[0].getStorageType();
1129
0
        auto Idx = stackPop();
1130
0
        auto Ref = stackPop();
1131
0
        auto IsSigned = (Instr.getOpCode() == OpCode::Array__get_s)
1132
0
                            ? LLContext.getInt8(1)
1133
0
                            : LLContext.getInt8(0);
1134
0
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
1135
0
        Builder.createCall(
1136
0
            Context.getIntrinsic(
1137
0
                Builder, Executable::Intrinsics::kArrayGet,
1138
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1139
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1140
0
                                             Context.Int32Ty, Context.Int8Ty,
1141
0
                                             Context.Int8PtrTy},
1142
0
                                            false)),
1143
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, IsSigned,
1144
0
             Ret});
1145
1146
0
        switch (StorageType.getCode()) {
1147
0
        case TypeCode::I8:
1148
0
        case TypeCode::I16:
1149
0
        case TypeCode::I32: {
1150
0
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1151
0
                                               Context.Int64x2Ty));
1152
0
          break;
1153
0
        }
1154
0
        case TypeCode::I64: {
1155
0
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1156
0
                                               Context.Int64x2Ty));
1157
0
          break;
1158
0
        }
1159
0
        case TypeCode::F32: {
1160
0
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1161
0
                                               Context.Int64x2Ty));
1162
0
          break;
1163
0
        }
1164
0
        case TypeCode::F64: {
1165
0
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1166
0
                                               Context.Int64x2Ty));
1167
0
          break;
1168
0
        }
1169
0
        case TypeCode::V128:
1170
0
        case TypeCode::Ref:
1171
0
        case TypeCode::RefNull: {
1172
0
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1173
0
                                               Context.Int64x2Ty));
1174
0
          break;
1175
0
        }
1176
0
        default:
1177
0
          assumingUnreachable();
1178
0
        }
1179
0
        break;
1180
0
      }
1181
0
      case OpCode::Array__set: {
1182
0
        auto Val = stackPop();
1183
0
        auto Idx = stackPop();
1184
0
        auto Ref = stackPop();
1185
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1186
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1187
0
        Builder.createCall(
1188
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArraySet,
1189
0
                                 LLVM::Type::getFunctionType(
1190
0
                                     Context.VoidTy,
1191
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1192
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1193
0
                                     false)),
1194
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, Arg});
1195
0
        break;
1196
0
      }
1197
0
      case OpCode::Array__len: {
1198
0
        auto Ref = stackPop();
1199
0
        stackPush(Builder.createCall(
1200
0
            Context.getIntrinsic(
1201
0
                Builder, Executable::Intrinsics::kArrayLen,
1202
0
                LLVM::Type::getFunctionType(Context.Int32Ty,
1203
0
                                            {Context.Int64x2Ty}, false)),
1204
0
            {Ref}));
1205
0
        break;
1206
0
      }
1207
0
      case OpCode::Array__fill: {
1208
0
        auto Cnt = stackPop();
1209
0
        auto Val = stackPop();
1210
0
        auto Off = stackPop();
1211
0
        auto Ref = stackPop();
1212
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1213
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1214
0
        Builder.createCall(
1215
0
            Context.getIntrinsic(
1216
0
                Builder, Executable::Intrinsics::kArrayFill,
1217
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1218
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1219
0
                                             Context.Int32Ty, Context.Int32Ty,
1220
0
                                             Context.Int8PtrTy},
1221
0
                                            false)),
1222
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Off, Cnt, Arg});
1223
0
        break;
1224
0
      }
1225
0
      case OpCode::Array__copy: {
1226
0
        auto Cnt = stackPop();
1227
0
        auto SrcOff = stackPop();
1228
0
        auto SrcRef = stackPop();
1229
0
        auto DstOff = stackPop();
1230
0
        auto DstRef = stackPop();
1231
0
        Builder.createCall(
1232
0
            Context.getIntrinsic(
1233
0
                Builder, Executable::Intrinsics::kArrayCopy,
1234
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1235
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1236
0
                                             Context.Int32Ty, Context.Int64x2Ty,
1237
0
                                             Context.Int32Ty, Context.Int32Ty,
1238
0
                                             Context.Int32Ty},
1239
0
                                            false)),
1240
0
            {DstRef, LLContext.getInt32(Instr.getTargetIndex()), DstOff, SrcRef,
1241
0
             LLContext.getInt32(Instr.getSourceIndex()), SrcOff, Cnt});
1242
0
        break;
1243
0
      }
1244
0
      case OpCode::Array__init_data:
1245
0
      case OpCode::Array__init_elem: {
1246
0
        auto Cnt = stackPop();
1247
0
        auto SrcOff = stackPop();
1248
0
        auto DstOff = stackPop();
1249
0
        auto Ref = stackPop();
1250
0
        Builder.createCall(
1251
0
            Context.getIntrinsic(
1252
0
                Builder,
1253
0
                ((Instr.getOpCode() == OpCode::Array__init_data)
1254
0
                     ? Executable::Intrinsics::kArrayInitData
1255
0
                     : Executable::Intrinsics::kArrayInitElem),
1256
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1257
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1258
0
                                             Context.Int32Ty, Context.Int32Ty,
1259
0
                                             Context.Int32Ty, Context.Int32Ty},
1260
0
                                            false)),
1261
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1262
0
             LLContext.getInt32(Instr.getSourceIndex()), DstOff, SrcOff, Cnt});
1263
0
        break;
1264
0
      }
1265
2
      case OpCode::Ref__test:
1266
3
      case OpCode::Ref__test_null: {
1267
3
        auto Ref = stackPop();
1268
3
        std::array<uint8_t, 16> Buf = {0};
1269
3
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1270
3
        auto VType = Builder.createExtractElement(
1271
3
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1272
3
                                  Context.Int64x2Ty),
1273
3
            LLContext.getInt64(0));
1274
3
        stackPush(Builder.createCall(
1275
3
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
1276
3
                                 LLVM::Type::getFunctionType(
1277
3
                                     Context.Int32Ty,
1278
3
                                     {Context.Int64x2Ty, Context.Int64Ty},
1279
3
                                     false)),
1280
3
            {Ref, VType}));
1281
3
        break;
1282
2
      }
1283
1
      case OpCode::Ref__cast:
1284
2
      case OpCode::Ref__cast_null: {
1285
2
        auto Ref = stackPop();
1286
2
        std::array<uint8_t, 16> Buf = {0};
1287
2
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1288
2
        auto VType = Builder.createExtractElement(
1289
2
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1290
2
                                  Context.Int64x2Ty),
1291
2
            LLContext.getInt64(0));
1292
2
        stackPush(Builder.createCall(
1293
2
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefCast,
1294
2
                                 LLVM::Type::getFunctionType(
1295
2
                                     Context.Int64x2Ty,
1296
2
                                     {Context.Int64x2Ty, Context.Int64Ty},
1297
2
                                     false)),
1298
2
            {Ref, VType}));
1299
2
        break;
1300
1
      }
1301
0
      case OpCode::Any__convert_extern: {
1302
0
        std::array<uint8_t, 16> RawRef = {0};
1303
0
        auto Ref = stackPop();
1304
0
        auto PtrVal = Builder.createExtractElement(Ref, LLContext.getInt64(1));
1305
0
        auto IsNullBB =
1306
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.null");
1307
0
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1308
0
                                                  "any_conv_extern.not_null");
1309
0
        auto IsExtrefBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1310
0
                                                   "any_conv_extern.is_extref");
1311
0
        auto EndBB =
1312
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.end");
1313
0
        auto CondIsNull = Builder.createICmpEQ(PtrVal, LLContext.getInt64(0));
1314
0
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1315
1316
0
        Builder.positionAtEnd(IsNullBB);
1317
0
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullRef);
1318
0
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1319
0
        auto Ret1 = Builder.createBitCast(
1320
0
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1321
0
        Builder.createBr(EndBB);
1322
1323
0
        Builder.positionAtEnd(NotNullBB);
1324
0
        auto Ret2 = Builder.createBitCast(
1325
0
            Builder.createInsertElement(
1326
0
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1327
0
                LLContext.getInt8(0), LLContext.getInt64(1)),
1328
0
            Context.Int64x2Ty);
1329
0
        auto HType = Builder.createExtractElement(
1330
0
            Builder.createBitCast(Ret2, Context.Int8x16Ty),
1331
0
            LLContext.getInt64(3));
1332
0
        auto CondIsExtref = Builder.createOr(
1333
0
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1334
0
                                            TypeCode::ExternRef))),
1335
0
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1336
0
                                            TypeCode::NullExternRef))));
1337
0
        Builder.createCondBr(CondIsExtref, IsExtrefBB, EndBB);
1338
1339
0
        Builder.positionAtEnd(IsExtrefBB);
1340
0
        VT = ValType(TypeCode::Ref, TypeCode::AnyRef);
1341
0
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1342
0
        auto Ret3 = Builder.createInsertElement(
1343
0
            Builder.createBitCast(
1344
0
                LLVM::Value::getConstVector8(LLContext, RawRef),
1345
0
                Context.Int64x2Ty),
1346
0
            PtrVal, LLContext.getInt64(1));
1347
0
        Builder.createBr(EndBB);
1348
1349
0
        Builder.positionAtEnd(EndBB);
1350
0
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1351
0
        Ret.addIncoming(Ret1, IsNullBB);
1352
0
        Ret.addIncoming(Ret2, NotNullBB);
1353
0
        Ret.addIncoming(Ret3, IsExtrefBB);
1354
0
        stackPush(Ret);
1355
0
        break;
1356
1
      }
1357
0
      case OpCode::Extern__convert_any: {
1358
0
        std::array<uint8_t, 16> RawRef = {0};
1359
0
        auto Ref = stackPop();
1360
0
        auto IsNullBB =
1361
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.null");
1362
0
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1363
0
                                                  "extern_conv_any.not_null");
1364
0
        auto EndBB =
1365
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.end");
1366
0
        auto CondIsNull = Builder.createICmpEQ(
1367
0
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1368
0
            LLContext.getInt64(0));
1369
0
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1370
1371
0
        Builder.positionAtEnd(IsNullBB);
1372
0
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullExternRef);
1373
0
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1374
0
        auto Ret1 = Builder.createBitCast(
1375
0
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1376
0
        Builder.createBr(EndBB);
1377
1378
0
        Builder.positionAtEnd(NotNullBB);
1379
0
        auto Ret2 = Builder.createBitCast(
1380
0
            Builder.createInsertElement(
1381
0
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1382
0
                LLContext.getInt8(1), LLContext.getInt64(1)),
1383
0
            Context.Int64x2Ty);
1384
0
        Builder.createBr(EndBB);
1385
1386
0
        Builder.positionAtEnd(EndBB);
1387
0
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1388
0
        Ret.addIncoming(Ret1, IsNullBB);
1389
0
        Ret.addIncoming(Ret2, NotNullBB);
1390
0
        stackPush(Ret);
1391
0
        break;
1392
1
      }
1393
3
      case OpCode::Ref__i31: {
1394
3
        std::array<uint8_t, 16> RawRef = {0};
1395
3
        auto VT = ValType(TypeCode::Ref, TypeCode::I31Ref);
1396
3
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1397
3
        auto Ref = Builder.createBitCast(
1398
3
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1399
3
        auto Val = Builder.createZExt(
1400
3
            Builder.createOr(
1401
3
                Builder.createAnd(stackPop(), LLContext.getInt32(0x7FFFFFFFU)),
1402
3
                LLContext.getInt32(0x80000000U)),
1403
3
            Context.Int64Ty);
1404
3
        stackPush(Builder.createInsertElement(Ref, Val, LLContext.getInt64(1)));
1405
3
        break;
1406
1
      }
1407
0
      case OpCode::I31__get_s: {
1408
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1409
0
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1410
0
        auto Val = Builder.createTrunc(
1411
0
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1412
0
            Context.Int32Ty);
1413
0
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1414
0
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1415
0
            LLContext.getInt32(0)));
1416
0
        Builder.createCondBr(IsNotNull, Next,
1417
0
                             getTrapBB(ErrCode::Value::AccessNullI31));
1418
0
        Builder.positionAtEnd(Next);
1419
0
        Val = Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU));
1420
0
        stackPush(Builder.createOr(
1421
0
            Val, Builder.createShl(
1422
0
                     Builder.createAnd(Val, LLContext.getInt32(0x40000000U)),
1423
0
                     LLContext.getInt32(1))));
1424
0
        break;
1425
1
      }
1426
0
      case OpCode::I31__get_u: {
1427
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1428
0
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1429
0
        auto Val = Builder.createTrunc(
1430
0
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1431
0
            Context.Int32Ty);
1432
0
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1433
0
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1434
0
            LLContext.getInt32(0)));
1435
0
        Builder.createCondBr(IsNotNull, Next,
1436
0
                             getTrapBB(ErrCode::Value::AccessNullI31));
1437
0
        Builder.positionAtEnd(Next);
1438
0
        stackPush(Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU)));
1439
0
        break;
1440
1
      }
1441
1442
      // Parametric Instructions
1443
3.45k
      case OpCode::Drop:
1444
3.45k
        stackPop();
1445
3.45k
        break;
1446
712
      case OpCode::Select:
1447
1.16k
      case OpCode::Select_t: {
1448
1.16k
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
1449
1.16k
        auto False = stackPop();
1450
1.16k
        auto True = stackPop();
1451
1.16k
        stackPush(Builder.createSelect(Cond, True, False));
1452
1.16k
        break;
1453
712
      }
1454
1455
      // Variable Instructions
1456
11.3k
      case OpCode::Local__get: {
1457
11.3k
        const auto &L = Local[Instr.getTargetIndex()];
1458
11.3k
        stackPush(Builder.createLoad(L.first, L.second));
1459
11.3k
        break;
1460
712
      }
1461
4.19k
      case OpCode::Local__set:
1462
4.19k
        Builder.createStore(stackPop(), Local[Instr.getTargetIndex()].second);
1463
4.19k
        break;
1464
773
      case OpCode::Local__tee:
1465
773
        Builder.createStore(Stack.back(), Local[Instr.getTargetIndex()].second);
1466
773
        break;
1467
416
      case OpCode::Global__get: {
1468
416
        const auto G =
1469
416
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex());
1470
416
        stackPush(Builder.createLoad(G.first, G.second));
1471
416
        break;
1472
712
      }
1473
70
      case OpCode::Global__set:
1474
70
        Builder.createStore(
1475
70
            stackPop(),
1476
70
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex()).second);
1477
70
        break;
1478
1479
      // Table Instructions
1480
31
      case OpCode::Table__get: {
1481
31
        auto Idx = stackPop();
1482
31
        stackPush(Builder.createCall(
1483
31
            Context.getIntrinsic(
1484
31
                Builder, Executable::Intrinsics::kTableGet,
1485
31
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1486
31
                                            {Context.Int32Ty, Context.Int32Ty},
1487
31
                                            false)),
1488
31
            {LLContext.getInt32(Instr.getTargetIndex()), Idx}));
1489
31
        break;
1490
712
      }
1491
23
      case OpCode::Table__set: {
1492
23
        auto Ref = stackPop();
1493
23
        auto Idx = stackPop();
1494
23
        Builder.createCall(
1495
23
            Context.getIntrinsic(
1496
23
                Builder, Executable::Intrinsics::kTableSet,
1497
23
                LLVM::Type::getFunctionType(
1498
23
                    Context.Int64Ty,
1499
23
                    {Context.Int32Ty, Context.Int32Ty, Context.Int64x2Ty},
1500
23
                    false)),
1501
23
            {LLContext.getInt32(Instr.getTargetIndex()), Idx, Ref});
1502
23
        break;
1503
712
      }
1504
24
      case OpCode::Table__init: {
1505
24
        auto Len = stackPop();
1506
24
        auto Src = stackPop();
1507
24
        auto Dst = stackPop();
1508
24
        Builder.createCall(
1509
24
            Context.getIntrinsic(
1510
24
                Builder, Executable::Intrinsics::kTableInit,
1511
24
                LLVM::Type::getFunctionType(Context.VoidTy,
1512
24
                                            {Context.Int32Ty, Context.Int32Ty,
1513
24
                                             Context.Int32Ty, Context.Int32Ty,
1514
24
                                             Context.Int32Ty},
1515
24
                                            false)),
1516
24
            {LLContext.getInt32(Instr.getTargetIndex()),
1517
24
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1518
24
        break;
1519
712
      }
1520
33
      case OpCode::Elem__drop: {
1521
33
        Builder.createCall(
1522
33
            Context.getIntrinsic(Builder, Executable::Intrinsics::kElemDrop,
1523
33
                                 LLVM::Type::getFunctionType(
1524
33
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1525
33
            {LLContext.getInt32(Instr.getTargetIndex())});
1526
33
        break;
1527
712
      }
1528
17
      case OpCode::Table__copy: {
1529
17
        auto Len = stackPop();
1530
17
        auto Src = stackPop();
1531
17
        auto Dst = stackPop();
1532
17
        Builder.createCall(
1533
17
            Context.getIntrinsic(
1534
17
                Builder, Executable::Intrinsics::kTableCopy,
1535
17
                LLVM::Type::getFunctionType(Context.VoidTy,
1536
17
                                            {Context.Int32Ty, Context.Int32Ty,
1537
17
                                             Context.Int32Ty, Context.Int32Ty,
1538
17
                                             Context.Int32Ty},
1539
17
                                            false)),
1540
17
            {LLContext.getInt32(Instr.getTargetIndex()),
1541
17
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1542
17
        break;
1543
712
      }
1544
12
      case OpCode::Table__grow: {
1545
12
        auto NewSize = stackPop();
1546
12
        auto Val = stackPop();
1547
12
        stackPush(Builder.createCall(
1548
12
            Context.getIntrinsic(
1549
12
                Builder, Executable::Intrinsics::kTableGrow,
1550
12
                LLVM::Type::getFunctionType(
1551
12
                    Context.Int32Ty,
1552
12
                    {Context.Int32Ty, Context.Int64x2Ty, Context.Int32Ty},
1553
12
                    false)),
1554
12
            {LLContext.getInt32(Instr.getTargetIndex()), Val, NewSize}));
1555
12
        break;
1556
712
      }
1557
13
      case OpCode::Table__size: {
1558
13
        stackPush(Builder.createCall(
1559
13
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableSize,
1560
13
                                 LLVM::Type::getFunctionType(Context.Int32Ty,
1561
13
                                                             {Context.Int32Ty},
1562
13
                                                             false)),
1563
13
            {LLContext.getInt32(Instr.getTargetIndex())}));
1564
13
        break;
1565
712
      }
1566
5
      case OpCode::Table__fill: {
1567
5
        auto Len = stackPop();
1568
5
        auto Val = stackPop();
1569
5
        auto Off = stackPop();
1570
5
        Builder.createCall(
1571
5
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableFill,
1572
5
                                 LLVM::Type::getFunctionType(
1573
5
                                     Context.Int32Ty,
1574
5
                                     {Context.Int32Ty, Context.Int32Ty,
1575
5
                                      Context.Int64x2Ty, Context.Int32Ty},
1576
5
                                     false)),
1577
5
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1578
5
        break;
1579
712
      }
1580
1581
      // Memory Instructions
1582
1.37k
      case OpCode::I32__load:
1583
1.37k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1584
1.37k
                      Instr.getMemoryAlign(), Context.Int32Ty);
1585
1.37k
        break;
1586
3.55k
      case OpCode::I64__load:
1587
3.55k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1588
3.55k
                      Instr.getMemoryAlign(), Context.Int64Ty);
1589
3.55k
        break;
1590
109
      case OpCode::F32__load:
1591
109
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1592
109
                      Instr.getMemoryAlign(), Context.FloatTy);
1593
109
        break;
1594
232
      case OpCode::F64__load:
1595
232
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1596
232
                      Instr.getMemoryAlign(), Context.DoubleTy);
1597
232
        break;
1598
661
      case OpCode::I32__load8_s:
1599
661
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1600
661
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1601
661
                      true);
1602
661
        break;
1603
206
      case OpCode::I32__load8_u:
1604
206
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1605
206
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1606
206
                      false);
1607
206
        break;
1608
340
      case OpCode::I32__load16_s:
1609
340
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1610
340
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1611
340
                      true);
1612
340
        break;
1613
1.61k
      case OpCode::I32__load16_u:
1614
1.61k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1615
1.61k
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1616
1.61k
                      false);
1617
1.61k
        break;
1618
715
      case OpCode::I64__load8_s:
1619
715
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1620
715
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1621
715
                      true);
1622
715
        break;
1623
440
      case OpCode::I64__load8_u:
1624
440
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1625
440
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1626
440
                      false);
1627
440
        break;
1628
404
      case OpCode::I64__load16_s:
1629
404
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1630
404
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1631
404
                      true);
1632
404
        break;
1633
629
      case OpCode::I64__load16_u:
1634
629
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1635
629
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1636
629
                      false);
1637
629
        break;
1638
434
      case OpCode::I64__load32_s:
1639
434
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1640
434
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1641
434
                      true);
1642
434
        break;
1643
536
      case OpCode::I64__load32_u:
1644
536
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1645
536
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1646
536
                      false);
1647
536
        break;
1648
449
      case OpCode::I32__store:
1649
449
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1650
449
                       Instr.getMemoryAlign(), Context.Int32Ty);
1651
449
        break;
1652
1.49k
      case OpCode::I64__store:
1653
1.49k
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1654
1.49k
                       Instr.getMemoryAlign(), Context.Int64Ty);
1655
1.49k
        break;
1656
68
      case OpCode::F32__store:
1657
68
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1658
68
                       Instr.getMemoryAlign(), Context.FloatTy);
1659
68
        break;
1660
49
      case OpCode::F64__store:
1661
49
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1662
49
                       Instr.getMemoryAlign(), Context.DoubleTy);
1663
49
        break;
1664
352
      case OpCode::I32__store8:
1665
372
      case OpCode::I64__store8:
1666
372
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1667
372
                       Instr.getMemoryAlign(), Context.Int8Ty, true);
1668
372
        break;
1669
219
      case OpCode::I32__store16:
1670
267
      case OpCode::I64__store16:
1671
267
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1672
267
                       Instr.getMemoryAlign(), Context.Int16Ty, true);
1673
267
        break;
1674
36
      case OpCode::I64__store32:
1675
36
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1676
36
                       Instr.getMemoryAlign(), Context.Int32Ty, true);
1677
36
        break;
1678
792
      case OpCode::Memory__size:
1679
792
        stackPush(Builder.createCall(
1680
792
            Context.getIntrinsic(Builder, Executable::Intrinsics::kMemSize,
1681
792
                                 LLVM::Type::getFunctionType(Context.Int32Ty,
1682
792
                                                             {Context.Int32Ty},
1683
792
                                                             false)),
1684
792
            {LLContext.getInt32(Instr.getTargetIndex())}));
1685
792
        break;
1686
627
      case OpCode::Memory__grow: {
1687
627
        auto Diff = stackPop();
1688
627
        stackPush(Builder.createCall(
1689
627
            Context.getIntrinsic(
1690
627
                Builder, Executable::Intrinsics::kMemGrow,
1691
627
                LLVM::Type::getFunctionType(Context.Int32Ty,
1692
627
                                            {Context.Int32Ty, Context.Int32Ty},
1693
627
                                            false)),
1694
627
            {LLContext.getInt32(Instr.getTargetIndex()), Diff}));
1695
627
        break;
1696
219
      }
1697
25
      case OpCode::Memory__init: {
1698
25
        auto Len = stackPop();
1699
25
        auto Src = stackPop();
1700
25
        auto Dst = stackPop();
1701
25
        Builder.createCall(
1702
25
            Context.getIntrinsic(
1703
25
                Builder, Executable::Intrinsics::kMemInit,
1704
25
                LLVM::Type::getFunctionType(Context.VoidTy,
1705
25
                                            {Context.Int32Ty, Context.Int32Ty,
1706
25
                                             Context.Int32Ty, Context.Int32Ty,
1707
25
                                             Context.Int32Ty},
1708
25
                                            false)),
1709
25
            {LLContext.getInt32(Instr.getTargetIndex()),
1710
25
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1711
25
        break;
1712
219
      }
1713
22
      case OpCode::Data__drop: {
1714
22
        Builder.createCall(
1715
22
            Context.getIntrinsic(Builder, Executable::Intrinsics::kDataDrop,
1716
22
                                 LLVM::Type::getFunctionType(
1717
22
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1718
22
            {LLContext.getInt32(Instr.getTargetIndex())});
1719
22
        break;
1720
219
      }
1721
254
      case OpCode::Memory__copy: {
1722
254
        auto Len = stackPop();
1723
254
        auto Src = stackPop();
1724
254
        auto Dst = stackPop();
1725
254
        Builder.createCall(
1726
254
            Context.getIntrinsic(
1727
254
                Builder, Executable::Intrinsics::kMemCopy,
1728
254
                LLVM::Type::getFunctionType(Context.VoidTy,
1729
254
                                            {Context.Int32Ty, Context.Int32Ty,
1730
254
                                             Context.Int32Ty, Context.Int32Ty,
1731
254
                                             Context.Int32Ty},
1732
254
                                            false)),
1733
254
            {LLContext.getInt32(Instr.getTargetIndex()),
1734
254
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1735
254
        break;
1736
219
      }
1737
571
      case OpCode::Memory__fill: {
1738
571
        auto Len = stackPop();
1739
571
        auto Val = Builder.createTrunc(stackPop(), Context.Int8Ty);
1740
571
        auto Off = stackPop();
1741
571
        Builder.createCall(
1742
571
            Context.getIntrinsic(
1743
571
                Builder, Executable::Intrinsics::kMemFill,
1744
571
                LLVM::Type::getFunctionType(Context.VoidTy,
1745
571
                                            {Context.Int32Ty, Context.Int32Ty,
1746
571
                                             Context.Int8Ty, Context.Int32Ty},
1747
571
                                            false)),
1748
571
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1749
571
        break;
1750
219
      }
1751
1752
      // Const Numeric Instructions
1753
575k
      case OpCode::I32__const:
1754
575k
        stackPush(LLContext.getInt32(Instr.getNum().get<uint32_t>()));
1755
575k
        break;
1756
90.9k
      case OpCode::I64__const:
1757
90.9k
        stackPush(LLContext.getInt64(Instr.getNum().get<uint64_t>()));
1758
90.9k
        break;
1759
14.9k
      case OpCode::F32__const:
1760
14.9k
        stackPush(LLContext.getFloat(Instr.getNum().get<float>()));
1761
14.9k
        break;
1762
7.14k
      case OpCode::F64__const:
1763
7.14k
        stackPush(LLContext.getDouble(Instr.getNum().get<double>()));
1764
7.14k
        break;
1765
1766
      // Unary Numeric Instructions
1767
7.51k
      case OpCode::I32__eqz:
1768
7.51k
        stackPush(Builder.createZExt(
1769
7.51k
            Builder.createICmpEQ(stackPop(), LLContext.getInt32(0)),
1770
7.51k
            Context.Int32Ty));
1771
7.51k
        break;
1772
1.26k
      case OpCode::I64__eqz:
1773
1.26k
        stackPush(Builder.createZExt(
1774
1.26k
            Builder.createICmpEQ(stackPop(), LLContext.getInt64(0)),
1775
1.26k
            Context.Int32Ty));
1776
1.26k
        break;
1777
2.19k
      case OpCode::I32__clz:
1778
2.19k
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1779
2.19k
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int32Ty},
1780
2.19k
                                          {stackPop(), LLContext.getFalse()}));
1781
2.19k
        break;
1782
309
      case OpCode::I64__clz:
1783
309
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1784
309
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int64Ty},
1785
309
                                          {stackPop(), LLContext.getFalse()}));
1786
309
        break;
1787
1.74k
      case OpCode::I32__ctz:
1788
1.74k
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1789
1.74k
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int32Ty},
1790
1.74k
                                          {stackPop(), LLContext.getFalse()}));
1791
1.74k
        break;
1792
417
      case OpCode::I64__ctz:
1793
417
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1794
417
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int64Ty},
1795
417
                                          {stackPop(), LLContext.getFalse()}));
1796
417
        break;
1797
16.6k
      case OpCode::I32__popcnt:
1798
18.5k
      case OpCode::I64__popcnt:
1799
18.5k
        assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
1800
18.5k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, stackPop()));
1801
18.5k
        break;
1802
825
      case OpCode::F32__abs:
1803
1.88k
      case OpCode::F64__abs:
1804
1.88k
        assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
1805
1.88k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Fabs, stackPop()));
1806
1.88k
        break;
1807
1.06k
      case OpCode::F32__neg:
1808
1.87k
      case OpCode::F64__neg:
1809
1.87k
        stackPush(Builder.createFNeg(stackPop()));
1810
1.87k
        break;
1811
1.98k
      case OpCode::F32__ceil:
1812
4.90k
      case OpCode::F64__ceil:
1813
4.90k
        assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
1814
4.90k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ceil, stackPop()));
1815
4.90k
        break;
1816
890
      case OpCode::F32__floor:
1817
1.27k
      case OpCode::F64__floor:
1818
1.27k
        assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
1819
1.27k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Floor, stackPop()));
1820
1.27k
        break;
1821
509
      case OpCode::F32__trunc:
1822
817
      case OpCode::F64__trunc:
1823
817
        assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
1824
817
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Trunc, stackPop()));
1825
817
        break;
1826
848
      case OpCode::F32__nearest:
1827
1.24k
      case OpCode::F64__nearest: {
1828
1.24k
        const bool IsFloat = Instr.getOpCode() == OpCode::F32__nearest;
1829
1.24k
        LLVM::Value Value = stackPop();
1830
1831
1.24k
#if LLVM_VERSION_MAJOR >= 12 && !defined(__s390x__)
1832
1.24k
        assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
1833
1.24k
        if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
1834
1.24k
          stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, Value));
1835
1.24k
          break;
1836
1.24k
        }
1837
0
#endif
1838
1839
        // The VectorSize is only used when SSE4_1 or NEON is supported.
1840
0
        [[maybe_unused]] const uint32_t VectorSize = IsFloat ? 4 : 2;
1841
0
#if defined(__x86_64__)
1842
0
        if (Context.SupportSSE4_1) {
1843
0
          auto Zero = LLContext.getInt64(0);
1844
0
          auto VectorTy =
1845
0
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1846
0
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1847
0
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1848
0
          auto ID = IsFloat ? LLVM::Core::X86SSE41RoundSs
1849
0
                            : LLVM::Core::X86SSE41RoundSd;
1850
0
          assuming(ID != LLVM::Core::NotIntrinsic);
1851
0
          Ret = Builder.createIntrinsic(ID, {},
1852
0
                                        {Ret, Ret, LLContext.getInt32(8)});
1853
0
          Ret = Builder.createExtractElement(Ret, Zero);
1854
0
          stackPush(Ret);
1855
0
          break;
1856
0
        }
1857
0
#endif
1858
1859
#if defined(__aarch64__)
1860
        if (Context.SupportNEON &&
1861
            LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
1862
          auto Zero = LLContext.getInt64(0);
1863
          auto VectorTy =
1864
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1865
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1866
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1867
          Ret =
1868
              Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, Ret);
1869
          Ret = Builder.createExtractElement(Ret, Zero);
1870
          stackPush(Ret);
1871
          break;
1872
        }
1873
#endif
1874
1875
        // Fallback case.
1876
        // If the SSE4.1 is not supported on the x86_64 platform or
1877
        // the NEON is not supported on the aarch64 platform,
1878
        // then fallback to this.
1879
0
        assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
1880
0
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, Value));
1881
0
        break;
1882
0
      }
1883
402
      case OpCode::F32__sqrt:
1884
3.26k
      case OpCode::F64__sqrt:
1885
3.26k
        assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
1886
3.26k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, stackPop()));
1887
3.26k
        break;
1888
321
      case OpCode::I32__wrap_i64:
1889
321
        stackPush(Builder.createTrunc(stackPop(), Context.Int32Ty));
1890
321
        break;
1891
1.41k
      case OpCode::I32__trunc_f32_s:
1892
1.41k
        compileSignedTrunc(Context.Int32Ty);
1893
1.41k
        break;
1894
249
      case OpCode::I32__trunc_f64_s:
1895
249
        compileSignedTrunc(Context.Int32Ty);
1896
249
        break;
1897
181
      case OpCode::I32__trunc_f32_u:
1898
181
        compileUnsignedTrunc(Context.Int32Ty);
1899
181
        break;
1900
1.34k
      case OpCode::I32__trunc_f64_u:
1901
1.34k
        compileUnsignedTrunc(Context.Int32Ty);
1902
1.34k
        break;
1903
2.21k
      case OpCode::I64__extend_i32_s:
1904
2.21k
        stackPush(Builder.createSExt(stackPop(), Context.Int64Ty));
1905
2.21k
        break;
1906
339
      case OpCode::I64__extend_i32_u:
1907
339
        stackPush(Builder.createZExt(stackPop(), Context.Int64Ty));
1908
339
        break;
1909
58
      case OpCode::I64__trunc_f32_s:
1910
58
        compileSignedTrunc(Context.Int64Ty);
1911
58
        break;
1912
411
      case OpCode::I64__trunc_f64_s:
1913
411
        compileSignedTrunc(Context.Int64Ty);
1914
411
        break;
1915
965
      case OpCode::I64__trunc_f32_u:
1916
965
        compileUnsignedTrunc(Context.Int64Ty);
1917
965
        break;
1918
1.29k
      case OpCode::I64__trunc_f64_u:
1919
1.29k
        compileUnsignedTrunc(Context.Int64Ty);
1920
1.29k
        break;
1921
1.67k
      case OpCode::F32__convert_i32_s:
1922
2.06k
      case OpCode::F32__convert_i64_s:
1923
2.06k
        stackPush(Builder.createSIToFP(stackPop(), Context.FloatTy));
1924
2.06k
        break;
1925
687
      case OpCode::F32__convert_i32_u:
1926
1.80k
      case OpCode::F32__convert_i64_u:
1927
1.80k
        stackPush(Builder.createUIToFP(stackPop(), Context.FloatTy));
1928
1.80k
        break;
1929
1.60k
      case OpCode::F64__convert_i32_s:
1930
6.12k
      case OpCode::F64__convert_i64_s:
1931
6.12k
        stackPush(Builder.createSIToFP(stackPop(), Context.DoubleTy));
1932
6.12k
        break;
1933
1.33k
      case OpCode::F64__convert_i32_u:
1934
1.51k
      case OpCode::F64__convert_i64_u:
1935
1.51k
        stackPush(Builder.createUIToFP(stackPop(), Context.DoubleTy));
1936
1.51k
        break;
1937
209
      case OpCode::F32__demote_f64:
1938
209
        stackPush(Builder.createFPTrunc(stackPop(), Context.FloatTy));
1939
209
        break;
1940
88
      case OpCode::F64__promote_f32:
1941
88
        stackPush(Builder.createFPExt(stackPop(), Context.DoubleTy));
1942
88
        break;
1943
643
      case OpCode::I32__reinterpret_f32:
1944
643
        stackPush(Builder.createBitCast(stackPop(), Context.Int32Ty));
1945
643
        break;
1946
651
      case OpCode::I64__reinterpret_f64:
1947
651
        stackPush(Builder.createBitCast(stackPop(), Context.Int64Ty));
1948
651
        break;
1949
4.28k
      case OpCode::F32__reinterpret_i32:
1950
4.28k
        stackPush(Builder.createBitCast(stackPop(), Context.FloatTy));
1951
4.28k
        break;
1952
1.13k
      case OpCode::F64__reinterpret_i64:
1953
1.13k
        stackPush(Builder.createBitCast(stackPop(), Context.DoubleTy));
1954
1.13k
        break;
1955
2.43k
      case OpCode::I32__extend8_s:
1956
2.43k
        stackPush(Builder.createSExt(
1957
2.43k
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int32Ty));
1958
2.43k
        break;
1959
3.09k
      case OpCode::I32__extend16_s:
1960
3.09k
        stackPush(Builder.createSExt(
1961
3.09k
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int32Ty));
1962
3.09k
        break;
1963
380
      case OpCode::I64__extend8_s:
1964
380
        stackPush(Builder.createSExt(
1965
380
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int64Ty));
1966
380
        break;
1967
619
      case OpCode::I64__extend16_s:
1968
619
        stackPush(Builder.createSExt(
1969
619
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int64Ty));
1970
619
        break;
1971
751
      case OpCode::I64__extend32_s:
1972
751
        stackPush(Builder.createSExt(
1973
751
            Builder.createTrunc(stackPop(), Context.Int32Ty), Context.Int64Ty));
1974
751
        break;
1975
1976
      // Binary Numeric Instructions
1977
1.19k
      case OpCode::I32__eq:
1978
1.46k
      case OpCode::I64__eq: {
1979
1.46k
        LLVM::Value RHS = stackPop();
1980
1.46k
        LLVM::Value LHS = stackPop();
1981
1.46k
        stackPush(Builder.createZExt(Builder.createICmpEQ(LHS, RHS),
1982
1.46k
                                     Context.Int32Ty));
1983
1.46k
        break;
1984
1.19k
      }
1985
708
      case OpCode::I32__ne:
1986
731
      case OpCode::I64__ne: {
1987
731
        LLVM::Value RHS = stackPop();
1988
731
        LLVM::Value LHS = stackPop();
1989
731
        stackPush(Builder.createZExt(Builder.createICmpNE(LHS, RHS),
1990
731
                                     Context.Int32Ty));
1991
731
        break;
1992
708
      }
1993
4.31k
      case OpCode::I32__lt_s:
1994
4.93k
      case OpCode::I64__lt_s: {
1995
4.93k
        LLVM::Value RHS = stackPop();
1996
4.93k
        LLVM::Value LHS = stackPop();
1997
4.93k
        stackPush(Builder.createZExt(Builder.createICmpSLT(LHS, RHS),
1998
4.93k
                                     Context.Int32Ty));
1999
4.93k
        break;
2000
4.31k
      }
2001
6.12k
      case OpCode::I32__lt_u:
2002
6.51k
      case OpCode::I64__lt_u: {
2003
6.51k
        LLVM::Value RHS = stackPop();
2004
6.51k
        LLVM::Value LHS = stackPop();
2005
6.51k
        stackPush(Builder.createZExt(Builder.createICmpULT(LHS, RHS),
2006
6.51k
                                     Context.Int32Ty));
2007
6.51k
        break;
2008
6.12k
      }
2009
1.17k
      case OpCode::I32__gt_s:
2010
1.61k
      case OpCode::I64__gt_s: {
2011
1.61k
        LLVM::Value RHS = stackPop();
2012
1.61k
        LLVM::Value LHS = stackPop();
2013
1.61k
        stackPush(Builder.createZExt(Builder.createICmpSGT(LHS, RHS),
2014
1.61k
                                     Context.Int32Ty));
2015
1.61k
        break;
2016
1.17k
      }
2017
7.18k
      case OpCode::I32__gt_u:
2018
7.37k
      case OpCode::I64__gt_u: {
2019
7.37k
        LLVM::Value RHS = stackPop();
2020
7.37k
        LLVM::Value LHS = stackPop();
2021
7.37k
        stackPush(Builder.createZExt(Builder.createICmpUGT(LHS, RHS),
2022
7.37k
                                     Context.Int32Ty));
2023
7.37k
        break;
2024
7.18k
      }
2025
2.19k
      case OpCode::I32__le_s:
2026
3.13k
      case OpCode::I64__le_s: {
2027
3.13k
        LLVM::Value RHS = stackPop();
2028
3.13k
        LLVM::Value LHS = stackPop();
2029
3.13k
        stackPush(Builder.createZExt(Builder.createICmpSLE(LHS, RHS),
2030
3.13k
                                     Context.Int32Ty));
2031
3.13k
        break;
2032
2.19k
      }
2033
466
      case OpCode::I32__le_u:
2034
2.16k
      case OpCode::I64__le_u: {
2035
2.16k
        LLVM::Value RHS = stackPop();
2036
2.16k
        LLVM::Value LHS = stackPop();
2037
2.16k
        stackPush(Builder.createZExt(Builder.createICmpULE(LHS, RHS),
2038
2.16k
                                     Context.Int32Ty));
2039
2.16k
        break;
2040
466
      }
2041
1.12k
      case OpCode::I32__ge_s:
2042
1.15k
      case OpCode::I64__ge_s: {
2043
1.15k
        LLVM::Value RHS = stackPop();
2044
1.15k
        LLVM::Value LHS = stackPop();
2045
1.15k
        stackPush(Builder.createZExt(Builder.createICmpSGE(LHS, RHS),
2046
1.15k
                                     Context.Int32Ty));
2047
1.15k
        break;
2048
1.12k
      }
2049
2.70k
      case OpCode::I32__ge_u:
2050
3.35k
      case OpCode::I64__ge_u: {
2051
3.35k
        LLVM::Value RHS = stackPop();
2052
3.35k
        LLVM::Value LHS = stackPop();
2053
3.35k
        stackPush(Builder.createZExt(Builder.createICmpUGE(LHS, RHS),
2054
3.35k
                                     Context.Int32Ty));
2055
3.35k
        break;
2056
2.70k
      }
2057
160
      case OpCode::F32__eq:
2058
238
      case OpCode::F64__eq: {
2059
238
        LLVM::Value RHS = stackPop();
2060
238
        LLVM::Value LHS = stackPop();
2061
238
        stackPush(Builder.createZExt(Builder.createFCmpOEQ(LHS, RHS),
2062
238
                                     Context.Int32Ty));
2063
238
        break;
2064
160
      }
2065
88
      case OpCode::F32__ne:
2066
116
      case OpCode::F64__ne: {
2067
116
        LLVM::Value RHS = stackPop();
2068
116
        LLVM::Value LHS = stackPop();
2069
116
        stackPush(Builder.createZExt(Builder.createFCmpUNE(LHS, RHS),
2070
116
                                     Context.Int32Ty));
2071
116
        break;
2072
88
      }
2073
186
      case OpCode::F32__lt:
2074
312
      case OpCode::F64__lt: {
2075
312
        LLVM::Value RHS = stackPop();
2076
312
        LLVM::Value LHS = stackPop();
2077
312
        stackPush(Builder.createZExt(Builder.createFCmpOLT(LHS, RHS),
2078
312
                                     Context.Int32Ty));
2079
312
        break;
2080
186
      }
2081
149
      case OpCode::F32__gt:
2082
208
      case OpCode::F64__gt: {
2083
208
        LLVM::Value RHS = stackPop();
2084
208
        LLVM::Value LHS = stackPop();
2085
208
        stackPush(Builder.createZExt(Builder.createFCmpOGT(LHS, RHS),
2086
208
                                     Context.Int32Ty));
2087
208
        break;
2088
149
      }
2089
77
      case OpCode::F32__le:
2090
181
      case OpCode::F64__le: {
2091
181
        LLVM::Value RHS = stackPop();
2092
181
        LLVM::Value LHS = stackPop();
2093
181
        stackPush(Builder.createZExt(Builder.createFCmpOLE(LHS, RHS),
2094
181
                                     Context.Int32Ty));
2095
181
        break;
2096
77
      }
2097
232
      case OpCode::F32__ge:
2098
261
      case OpCode::F64__ge: {
2099
261
        LLVM::Value RHS = stackPop();
2100
261
        LLVM::Value LHS = stackPop();
2101
261
        stackPush(Builder.createZExt(Builder.createFCmpOGE(LHS, RHS),
2102
261
                                     Context.Int32Ty));
2103
261
        break;
2104
232
      }
2105
724
      case OpCode::I32__add:
2106
1.19k
      case OpCode::I64__add: {
2107
1.19k
        LLVM::Value RHS = stackPop();
2108
1.19k
        LLVM::Value LHS = stackPop();
2109
1.19k
        stackPush(Builder.createAdd(LHS, RHS));
2110
1.19k
        break;
2111
724
      }
2112
1.80k
      case OpCode::I32__sub:
2113
2.22k
      case OpCode::I64__sub: {
2114
2.22k
        LLVM::Value RHS = stackPop();
2115
2.22k
        LLVM::Value LHS = stackPop();
2116
2117
2.22k
        stackPush(Builder.createSub(LHS, RHS));
2118
2.22k
        break;
2119
1.80k
      }
2120
604
      case OpCode::I32__mul:
2121
1.20k
      case OpCode::I64__mul: {
2122
1.20k
        LLVM::Value RHS = stackPop();
2123
1.20k
        LLVM::Value LHS = stackPop();
2124
1.20k
        stackPush(Builder.createMul(LHS, RHS));
2125
1.20k
        break;
2126
604
      }
2127
1.25k
      case OpCode::I32__div_s:
2128
1.72k
      case OpCode::I64__div_s: {
2129
1.72k
        LLVM::Value RHS = stackPop();
2130
1.72k
        LLVM::Value LHS = stackPop();
2131
1.72k
        if constexpr (kForceDivCheck) {
2132
1.72k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_s;
2133
1.72k
          LLVM::Value IntZero =
2134
1.72k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2135
1.72k
          LLVM::Value IntMinusOne =
2136
1.72k
              Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2137
1.72k
                   : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2138
1.72k
          LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2139
1.25k
                                          std::numeric_limits<int32_t>::min()))
2140
1.72k
                                    : LLContext.getInt64(static_cast<uint64_t>(
2141
472
                                          std::numeric_limits<int64_t>::min()));
2142
2143
1.72k
          auto NoZeroBB =
2144
1.72k
              LLVM::BasicBlock::create(LLContext, F.Fn, "div.nozero");
2145
1.72k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2146
2147
1.72k
          auto IsNotZero =
2148
1.72k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2149
1.72k
          Builder.createCondBr(IsNotZero, NoZeroBB,
2150
1.72k
                               getTrapBB(ErrCode::Value::DivideByZero));
2151
2152
1.72k
          Builder.positionAtEnd(NoZeroBB);
2153
1.72k
          auto NotOverflow = Builder.createLikely(
2154
1.72k
              Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2155
1.72k
                               Builder.createICmpNE(RHS, IntMinusOne)));
2156
1.72k
          Builder.createCondBr(NotOverflow, OkBB,
2157
1.72k
                               getTrapBB(ErrCode::Value::IntegerOverflow));
2158
2159
1.72k
          Builder.positionAtEnd(OkBB);
2160
1.72k
        }
2161
1.72k
        stackPush(Builder.createSDiv(LHS, RHS));
2162
1.72k
        break;
2163
1.25k
      }
2164
3.23k
      case OpCode::I32__div_u:
2165
3.55k
      case OpCode::I64__div_u: {
2166
3.55k
        LLVM::Value RHS = stackPop();
2167
3.55k
        LLVM::Value LHS = stackPop();
2168
3.55k
        if constexpr (kForceDivCheck) {
2169
3.55k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_u;
2170
3.55k
          LLVM::Value IntZero =
2171
3.55k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2172
3.55k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2173
2174
3.55k
          auto IsNotZero =
2175
3.55k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2176
3.55k
          Builder.createCondBr(IsNotZero, OkBB,
2177
3.55k
                               getTrapBB(ErrCode::Value::DivideByZero));
2178
3.55k
          Builder.positionAtEnd(OkBB);
2179
3.55k
        }
2180
3.55k
        stackPush(Builder.createUDiv(LHS, RHS));
2181
3.55k
        break;
2182
3.23k
      }
2183
1.13k
      case OpCode::I32__rem_s:
2184
1.59k
      case OpCode::I64__rem_s: {
2185
1.59k
        LLVM::Value RHS = stackPop();
2186
1.59k
        LLVM::Value LHS = stackPop();
2187
        // handle INT32_MIN % -1
2188
1.59k
        const bool Is32 = Instr.getOpCode() == OpCode::I32__rem_s;
2189
1.59k
        LLVM::Value IntMinusOne =
2190
1.59k
            Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2191
1.59k
                 : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2192
1.59k
        LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2193
1.13k
                                        std::numeric_limits<int32_t>::min()))
2194
1.59k
                                  : LLContext.getInt64(static_cast<uint64_t>(
2195
457
                                        std::numeric_limits<int64_t>::min()));
2196
1.59k
        LLVM::Value IntZero =
2197
1.59k
            Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2198
2199
1.59k
        auto NoOverflowBB =
2200
1.59k
            LLVM::BasicBlock::create(LLContext, F.Fn, "no.overflow");
2201
1.59k
        auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "end.overflow");
2202
2203
1.59k
        if constexpr (kForceDivCheck) {
2204
1.59k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2205
2206
1.59k
          auto IsNotZero =
2207
1.59k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2208
1.59k
          Builder.createCondBr(IsNotZero, OkBB,
2209
1.59k
                               getTrapBB(ErrCode::Value::DivideByZero));
2210
1.59k
          Builder.positionAtEnd(OkBB);
2211
1.59k
        }
2212
2213
1.59k
        auto CurrBB = Builder.getInsertBlock();
2214
2215
1.59k
        auto NotOverflow = Builder.createLikely(
2216
1.59k
            Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2217
1.59k
                             Builder.createICmpNE(RHS, IntMinusOne)));
2218
1.59k
        Builder.createCondBr(NotOverflow, NoOverflowBB, EndBB);
2219
2220
1.59k
        Builder.positionAtEnd(NoOverflowBB);
2221
1.59k
        auto Ret1 = Builder.createSRem(LHS, RHS);
2222
1.59k
        Builder.createBr(EndBB);
2223
2224
1.59k
        Builder.positionAtEnd(EndBB);
2225
1.59k
        auto Ret = Builder.createPHI(Ret1.getType());
2226
1.59k
        Ret.addIncoming(Ret1, NoOverflowBB);
2227
1.59k
        Ret.addIncoming(IntZero, CurrBB);
2228
2229
1.59k
        stackPush(Ret);
2230
1.59k
        break;
2231
1.13k
      }
2232
1.34k
      case OpCode::I32__rem_u:
2233
1.91k
      case OpCode::I64__rem_u: {
2234
1.91k
        LLVM::Value RHS = stackPop();
2235
1.91k
        LLVM::Value LHS = stackPop();
2236
1.91k
        if constexpr (kForceDivCheck) {
2237
1.91k
          LLVM::Value IntZero = Instr.getOpCode() == OpCode::I32__rem_u
2238
1.91k
                                    ? LLContext.getInt32(0)
2239
1.91k
                                    : LLContext.getInt64(0);
2240
1.91k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2241
2242
1.91k
          auto IsNotZero =
2243
1.91k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2244
1.91k
          Builder.createCondBr(IsNotZero, OkBB,
2245
1.91k
                               getTrapBB(ErrCode::Value::DivideByZero));
2246
1.91k
          Builder.positionAtEnd(OkBB);
2247
1.91k
        }
2248
1.91k
        stackPush(Builder.createURem(LHS, RHS));
2249
1.91k
        break;
2250
1.34k
      }
2251
665
      case OpCode::I32__and:
2252
2.00k
      case OpCode::I64__and: {
2253
2.00k
        LLVM::Value RHS = stackPop();
2254
2.00k
        LLVM::Value LHS = stackPop();
2255
2.00k
        stackPush(Builder.createAnd(LHS, RHS));
2256
2.00k
        break;
2257
665
      }
2258
1.32k
      case OpCode::I32__or:
2259
1.65k
      case OpCode::I64__or: {
2260
1.65k
        LLVM::Value RHS = stackPop();
2261
1.65k
        LLVM::Value LHS = stackPop();
2262
1.65k
        stackPush(Builder.createOr(LHS, RHS));
2263
1.65k
        break;
2264
1.32k
      }
2265
1.59k
      case OpCode::I32__xor:
2266
2.23k
      case OpCode::I64__xor: {
2267
2.23k
        LLVM::Value RHS = stackPop();
2268
2.23k
        LLVM::Value LHS = stackPop();
2269
2.23k
        stackPush(Builder.createXor(LHS, RHS));
2270
2.23k
        break;
2271
1.59k
      }
2272
1.87k
      case OpCode::I32__shl:
2273
2.28k
      case OpCode::I64__shl: {
2274
2.28k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shl
2275
2.28k
                               ? LLContext.getInt32(31)
2276
2.28k
                               : LLContext.getInt64(63);
2277
2.28k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2278
2.28k
        LLVM::Value LHS = stackPop();
2279
2.28k
        stackPush(Builder.createShl(LHS, RHS));
2280
2.28k
        break;
2281
1.87k
      }
2282
1.80k
      case OpCode::I32__shr_s:
2283
2.20k
      case OpCode::I64__shr_s: {
2284
2.20k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_s
2285
2.20k
                               ? LLContext.getInt32(31)
2286
2.20k
                               : LLContext.getInt64(63);
2287
2.20k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2288
2.20k
        LLVM::Value LHS = stackPop();
2289
2.20k
        stackPush(Builder.createAShr(LHS, RHS));
2290
2.20k
        break;
2291
1.80k
      }
2292
4.49k
      case OpCode::I32__shr_u:
2293
4.78k
      case OpCode::I64__shr_u: {
2294
4.78k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_u
2295
4.78k
                               ? LLContext.getInt32(31)
2296
4.78k
                               : LLContext.getInt64(63);
2297
4.78k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2298
4.78k
        LLVM::Value LHS = stackPop();
2299
4.78k
        stackPush(Builder.createLShr(LHS, RHS));
2300
4.78k
        break;
2301
4.49k
      }
2302
2.80k
      case OpCode::I32__rotl: {
2303
2.80k
        LLVM::Value RHS = stackPop();
2304
2.80k
        LLVM::Value LHS = stackPop();
2305
2.80k
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2306
2.80k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int32Ty},
2307
2.80k
                                          {LHS, LHS, RHS}));
2308
2.80k
        break;
2309
2.80k
      }
2310
802
      case OpCode::I32__rotr: {
2311
802
        LLVM::Value RHS = stackPop();
2312
802
        LLVM::Value LHS = stackPop();
2313
802
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2314
802
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int32Ty},
2315
802
                                          {LHS, LHS, RHS}));
2316
802
        break;
2317
802
      }
2318
904
      case OpCode::I64__rotl: {
2319
904
        LLVM::Value RHS = stackPop();
2320
904
        LLVM::Value LHS = stackPop();
2321
904
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2322
904
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int64Ty},
2323
904
                                          {LHS, LHS, RHS}));
2324
904
        break;
2325
904
      }
2326
1.35k
      case OpCode::I64__rotr: {
2327
1.35k
        LLVM::Value RHS = stackPop();
2328
1.35k
        LLVM::Value LHS = stackPop();
2329
1.35k
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2330
1.35k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int64Ty},
2331
1.35k
                                          {LHS, LHS, RHS}));
2332
1.35k
        break;
2333
1.35k
      }
2334
283
      case OpCode::F32__add:
2335
591
      case OpCode::F64__add: {
2336
591
        LLVM::Value RHS = stackPop();
2337
591
        LLVM::Value LHS = stackPop();
2338
591
        stackPush(Builder.createFAdd(LHS, RHS));
2339
591
        break;
2340
283
      }
2341
144
      case OpCode::F32__sub:
2342
445
      case OpCode::F64__sub: {
2343
445
        LLVM::Value RHS = stackPop();
2344
445
        LLVM::Value LHS = stackPop();
2345
445
        stackPush(Builder.createFSub(LHS, RHS));
2346
445
        break;
2347
144
      }
2348
555
      case OpCode::F32__mul:
2349
700
      case OpCode::F64__mul: {
2350
700
        LLVM::Value RHS = stackPop();
2351
700
        LLVM::Value LHS = stackPop();
2352
700
        stackPush(Builder.createFMul(LHS, RHS));
2353
700
        break;
2354
555
      }
2355
232
      case OpCode::F32__div:
2356
577
      case OpCode::F64__div: {
2357
577
        LLVM::Value RHS = stackPop();
2358
577
        LLVM::Value LHS = stackPop();
2359
577
        stackPush(Builder.createFDiv(LHS, RHS));
2360
577
        break;
2361
232
      }
2362
307
      case OpCode::F32__min:
2363
654
      case OpCode::F64__min: {
2364
654
        LLVM::Value RHS = stackPop();
2365
654
        LLVM::Value LHS = stackPop();
2366
654
        auto FpTy = Instr.getOpCode() == OpCode::F32__min ? Context.FloatTy
2367
654
                                                          : Context.DoubleTy;
2368
654
        auto IntTy = Instr.getOpCode() == OpCode::F32__min ? Context.Int32Ty
2369
654
                                                           : Context.Int64Ty;
2370
2371
654
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2372
654
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2373
2374
654
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2375
654
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2376
654
        auto OrInt = Builder.createOr(LHSInt, RHSInt);
2377
654
        auto OrFp = Builder.createBitCast(OrInt, FpTy);
2378
2379
654
        auto AddFp = Builder.createFAdd(LHS, RHS);
2380
2381
654
        assuming(LLVM::Core::MinNum != LLVM::Core::NotIntrinsic);
2382
654
        auto MinFp = Builder.createIntrinsic(LLVM::Core::MinNum,
2383
654
                                             {LHS.getType()}, {LHS, RHS});
2384
2385
654
        auto Ret = Builder.createSelect(
2386
654
            UEQ, Builder.createSelect(UNO, AddFp, OrFp), MinFp);
2387
654
        stackPush(Ret);
2388
654
        break;
2389
654
      }
2390
334
      case OpCode::F32__max:
2391
977
      case OpCode::F64__max: {
2392
977
        LLVM::Value RHS = stackPop();
2393
977
        LLVM::Value LHS = stackPop();
2394
977
        auto FpTy = Instr.getOpCode() == OpCode::F32__max ? Context.FloatTy
2395
977
                                                          : Context.DoubleTy;
2396
977
        auto IntTy = Instr.getOpCode() == OpCode::F32__max ? Context.Int32Ty
2397
977
                                                           : Context.Int64Ty;
2398
2399
977
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2400
977
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2401
2402
977
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2403
977
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2404
977
        auto AndInt = Builder.createAnd(LHSInt, RHSInt);
2405
977
        auto AndFp = Builder.createBitCast(AndInt, FpTy);
2406
2407
977
        auto AddFp = Builder.createFAdd(LHS, RHS);
2408
2409
977
        assuming(LLVM::Core::MaxNum != LLVM::Core::NotIntrinsic);
2410
977
        auto MaxFp = Builder.createIntrinsic(LLVM::Core::MaxNum,
2411
977
                                             {LHS.getType()}, {LHS, RHS});
2412
2413
977
        auto Ret = Builder.createSelect(
2414
977
            UEQ, Builder.createSelect(UNO, AddFp, AndFp), MaxFp);
2415
977
        stackPush(Ret);
2416
977
        break;
2417
977
      }
2418
452
      case OpCode::F32__copysign:
2419
871
      case OpCode::F64__copysign: {
2420
871
        LLVM::Value RHS = stackPop();
2421
871
        LLVM::Value LHS = stackPop();
2422
871
        assuming(LLVM::Core::CopySign != LLVM::Core::NotIntrinsic);
2423
871
        stackPush(Builder.createIntrinsic(LLVM::Core::CopySign, {LHS.getType()},
2424
871
                                          {LHS, RHS}));
2425
871
        break;
2426
871
      }
2427
2428
      // Saturating Truncation Numeric Instructions
2429
194
      case OpCode::I32__trunc_sat_f32_s:
2430
194
        compileSignedTruncSat(Context.Int32Ty);
2431
194
        break;
2432
88
      case OpCode::I32__trunc_sat_f32_u:
2433
88
        compileUnsignedTruncSat(Context.Int32Ty);
2434
88
        break;
2435
265
      case OpCode::I32__trunc_sat_f64_s:
2436
265
        compileSignedTruncSat(Context.Int32Ty);
2437
265
        break;
2438
187
      case OpCode::I32__trunc_sat_f64_u:
2439
187
        compileUnsignedTruncSat(Context.Int32Ty);
2440
187
        break;
2441
438
      case OpCode::I64__trunc_sat_f32_s:
2442
438
        compileSignedTruncSat(Context.Int64Ty);
2443
438
        break;
2444
348
      case OpCode::I64__trunc_sat_f32_u:
2445
348
        compileUnsignedTruncSat(Context.Int64Ty);
2446
348
        break;
2447
194
      case OpCode::I64__trunc_sat_f64_s:
2448
194
        compileSignedTruncSat(Context.Int64Ty);
2449
194
        break;
2450
256
      case OpCode::I64__trunc_sat_f64_u:
2451
256
        compileUnsignedTruncSat(Context.Int64Ty);
2452
256
        break;
2453
2454
      // SIMD Memory Instructions
2455
4.84k
      case OpCode::V128__load:
2456
4.84k
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2457
4.84k
                            Instr.getMemoryAlign(), Context.Int128x1Ty);
2458
4.84k
        break;
2459
219
      case OpCode::V128__load8x8_s:
2460
219
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2461
219
                            Instr.getMemoryAlign(),
2462
219
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2463
219
                            Context.Int16x8Ty, true);
2464
219
        break;
2465
42
      case OpCode::V128__load8x8_u:
2466
42
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2467
42
                            Instr.getMemoryAlign(),
2468
42
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2469
42
                            Context.Int16x8Ty, false);
2470
42
        break;
2471
365
      case OpCode::V128__load16x4_s:
2472
365
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2473
365
                            Instr.getMemoryAlign(),
2474
365
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2475
365
                            Context.Int32x4Ty, true);
2476
365
        break;
2477
545
      case OpCode::V128__load16x4_u:
2478
545
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2479
545
                            Instr.getMemoryAlign(),
2480
545
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2481
545
                            Context.Int32x4Ty, false);
2482
545
        break;
2483
174
      case OpCode::V128__load32x2_s:
2484
174
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2485
174
                            Instr.getMemoryAlign(),
2486
174
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2487
174
                            Context.Int64x2Ty, true);
2488
174
        break;
2489
174
      case OpCode::V128__load32x2_u:
2490
174
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2491
174
                            Instr.getMemoryAlign(),
2492
174
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2493
174
                            Context.Int64x2Ty, false);
2494
174
        break;
2495
69
      case OpCode::V128__load8_splat:
2496
69
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2497
69
                           Instr.getMemoryAlign(), Context.Int8Ty,
2498
69
                           Context.Int8x16Ty);
2499
69
        break;
2500
187
      case OpCode::V128__load16_splat:
2501
187
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2502
187
                           Instr.getMemoryAlign(), Context.Int16Ty,
2503
187
                           Context.Int16x8Ty);
2504
187
        break;
2505
200
      case OpCode::V128__load32_splat:
2506
200
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2507
200
                           Instr.getMemoryAlign(), Context.Int32Ty,
2508
200
                           Context.Int32x4Ty);
2509
200
        break;
2510
148
      case OpCode::V128__load64_splat:
2511
148
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2512
148
                           Instr.getMemoryAlign(), Context.Int64Ty,
2513
148
                           Context.Int64x2Ty);
2514
148
        break;
2515
81
      case OpCode::V128__load32_zero:
2516
81
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2517
81
                            Instr.getMemoryAlign(), Context.Int32Ty,
2518
81
                            Context.Int128Ty, false);
2519
81
        break;
2520
143
      case OpCode::V128__load64_zero:
2521
143
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2522
143
                            Instr.getMemoryAlign(), Context.Int64Ty,
2523
143
                            Context.Int128Ty, false);
2524
143
        break;
2525
282
      case OpCode::V128__store:
2526
282
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2527
282
                       Instr.getMemoryAlign(), Context.Int128x1Ty, false, true);
2528
282
        break;
2529
156
      case OpCode::V128__load8_lane:
2530
156
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2531
156
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2532
156
                          Context.Int8Ty, Context.Int8x16Ty);
2533
156
        break;
2534
132
      case OpCode::V128__load16_lane:
2535
132
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2536
132
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2537
132
                          Context.Int16Ty, Context.Int16x8Ty);
2538
132
        break;
2539
124
      case OpCode::V128__load32_lane:
2540
124
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2541
124
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2542
124
                          Context.Int32Ty, Context.Int32x4Ty);
2543
124
        break;
2544
22
      case OpCode::V128__load64_lane:
2545
22
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2546
22
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2547
22
                          Context.Int64Ty, Context.Int64x2Ty);
2548
22
        break;
2549
142
      case OpCode::V128__store8_lane:
2550
142
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2551
142
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2552
142
                           Context.Int8Ty, Context.Int8x16Ty);
2553
142
        break;
2554
103
      case OpCode::V128__store16_lane:
2555
103
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2556
103
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2557
103
                           Context.Int16Ty, Context.Int16x8Ty);
2558
103
        break;
2559
99
      case OpCode::V128__store32_lane:
2560
99
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2561
99
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2562
99
                           Context.Int32Ty, Context.Int32x4Ty);
2563
99
        break;
2564
42
      case OpCode::V128__store64_lane:
2565
42
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2566
42
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2567
42
                           Context.Int64Ty, Context.Int64x2Ty);
2568
42
        break;
2569
2570
      // SIMD Const Instructions
2571
346
      case OpCode::V128__const: {
2572
346
        const auto Value = Instr.getNum().get<uint64x2_t>();
2573
346
        auto Vector =
2574
346
            LLVM::Value::getConstVector64(LLContext, {Value[0], Value[1]});
2575
346
        stackPush(Builder.createBitCast(Vector, Context.Int64x2Ty));
2576
346
        break;
2577
871
      }
2578
2579
      // SIMD Shuffle Instructions
2580
16
      case OpCode::I8x16__shuffle: {
2581
16
        auto V2 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2582
16
        auto V1 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2583
16
        const auto V3 = Instr.getNum().get<uint128_t>();
2584
16
        std::array<uint8_t, 16> Mask;
2585
272
        for (size_t I = 0; I < 16; ++I) {
2586
256
          auto Num = static_cast<uint8_t>(V3 >> (I * 8));
2587
256
          if constexpr (Endian::native == Endian::little) {
2588
256
            Mask[I] = Num;
2589
          } else {
2590
            Mask[15 - I] = Num < 16 ? 15 - Num : 47 - Num;
2591
          }
2592
256
        }
2593
16
        stackPush(Builder.createBitCast(
2594
16
            Builder.createShuffleVector(
2595
16
                V1, V2, LLVM::Value::getConstVector8(LLContext, Mask)),
2596
16
            Context.Int64x2Ty));
2597
16
        break;
2598
871
      }
2599
2600
      // SIMD Lane Instructions
2601
68
      case OpCode::I8x16__extract_lane_s:
2602
68
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2603
68
                             Context.Int32Ty, true);
2604
68
        break;
2605
28
      case OpCode::I8x16__extract_lane_u:
2606
28
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2607
28
                             Context.Int32Ty, false);
2608
28
        break;
2609
181
      case OpCode::I8x16__replace_lane:
2610
181
        compileReplaceLaneOp(Context.Int8x16Ty, Instr.getMemoryLane());
2611
181
        break;
2612
492
      case OpCode::I16x8__extract_lane_s:
2613
492
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2614
492
                             Context.Int32Ty, true);
2615
492
        break;
2616
455
      case OpCode::I16x8__extract_lane_u:
2617
455
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2618
455
                             Context.Int32Ty, false);
2619
455
        break;
2620
675
      case OpCode::I16x8__replace_lane:
2621
675
        compileReplaceLaneOp(Context.Int16x8Ty, Instr.getMemoryLane());
2622
675
        break;
2623
67
      case OpCode::I32x4__extract_lane:
2624
67
        compileExtractLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2625
67
        break;
2626
362
      case OpCode::I32x4__replace_lane:
2627
362
        compileReplaceLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2628
362
        break;
2629
131
      case OpCode::I64x2__extract_lane:
2630
131
        compileExtractLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2631
131
        break;
2632
14
      case OpCode::I64x2__replace_lane:
2633
14
        compileReplaceLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2634
14
        break;
2635
65
      case OpCode::F32x4__extract_lane:
2636
65
        compileExtractLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2637
65
        break;
2638
24
      case OpCode::F32x4__replace_lane:
2639
24
        compileReplaceLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2640
24
        break;
2641
72
      case OpCode::F64x2__extract_lane:
2642
72
        compileExtractLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2643
72
        break;
2644
8
      case OpCode::F64x2__replace_lane:
2645
8
        compileReplaceLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2646
8
        break;
2647
2648
      // SIMD Numeric Instructions
2649
65
      case OpCode::I8x16__swizzle:
2650
65
        compileVectorSwizzle();
2651
65
        break;
2652
38.2k
      case OpCode::I8x16__splat:
2653
38.2k
        compileSplatOp(Context.Int8x16Ty);
2654
38.2k
        break;
2655
8.99k
      case OpCode::I16x8__splat:
2656
8.99k
        compileSplatOp(Context.Int16x8Ty);
2657
8.99k
        break;
2658
1.48k
      case OpCode::I32x4__splat:
2659
1.48k
        compileSplatOp(Context.Int32x4Ty);
2660
1.48k
        break;
2661
710
      case OpCode::I64x2__splat:
2662
710
        compileSplatOp(Context.Int64x2Ty);
2663
710
        break;
2664
354
      case OpCode::F32x4__splat:
2665
354
        compileSplatOp(Context.Floatx4Ty);
2666
354
        break;
2667
149
      case OpCode::F64x2__splat:
2668
149
        compileSplatOp(Context.Doublex2Ty);
2669
149
        break;
2670
94
      case OpCode::I8x16__eq:
2671
94
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntEQ);
2672
94
        break;
2673
408
      case OpCode::I8x16__ne:
2674
408
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntNE);
2675
408
        break;
2676
47
      case OpCode::I8x16__lt_s:
2677
47
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLT);
2678
47
        break;
2679
91
      case OpCode::I8x16__lt_u:
2680
91
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULT);
2681
91
        break;
2682
147
      case OpCode::I8x16__gt_s:
2683
147
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGT);
2684
147
        break;
2685
214
      case OpCode::I8x16__gt_u:
2686
214
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGT);
2687
214
        break;
2688
109
      case OpCode::I8x16__le_s:
2689
109
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLE);
2690
109
        break;
2691
126
      case OpCode::I8x16__le_u:
2692
126
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULE);
2693
126
        break;
2694
653
      case OpCode::I8x16__ge_s:
2695
653
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGE);
2696
653
        break;
2697
114
      case OpCode::I8x16__ge_u:
2698
114
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGE);
2699
114
        break;
2700
77
      case OpCode::I16x8__eq:
2701
77
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntEQ);
2702
77
        break;
2703
183
      case OpCode::I16x8__ne:
2704
183
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntNE);
2705
183
        break;
2706
52
      case OpCode::I16x8__lt_s:
2707
52
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLT);
2708
52
        break;
2709
221
      case OpCode::I16x8__lt_u:
2710
221
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULT);
2711
221
        break;
2712
275
      case OpCode::I16x8__gt_s:
2713
275
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGT);
2714
275
        break;
2715
137
      case OpCode::I16x8__gt_u:
2716
137
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGT);
2717
137
        break;
2718
107
      case OpCode::I16x8__le_s:
2719
107
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLE);
2720
107
        break;
2721
94
      case OpCode::I16x8__le_u:
2722
94
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULE);
2723
94
        break;
2724
153
      case OpCode::I16x8__ge_s:
2725
153
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGE);
2726
153
        break;
2727
66
      case OpCode::I16x8__ge_u:
2728
66
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGE);
2729
66
        break;
2730
61
      case OpCode::I32x4__eq:
2731
61
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntEQ);
2732
61
        break;
2733
128
      case OpCode::I32x4__ne:
2734
128
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntNE);
2735
128
        break;
2736
40
      case OpCode::I32x4__lt_s:
2737
40
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLT);
2738
40
        break;
2739
137
      case OpCode::I32x4__lt_u:
2740
137
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULT);
2741
137
        break;
2742
108
      case OpCode::I32x4__gt_s:
2743
108
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGT);
2744
108
        break;
2745
214
      case OpCode::I32x4__gt_u:
2746
214
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGT);
2747
214
        break;
2748
268
      case OpCode::I32x4__le_s:
2749
268
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLE);
2750
268
        break;
2751
256
      case OpCode::I32x4__le_u:
2752
256
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULE);
2753
256
        break;
2754
61
      case OpCode::I32x4__ge_s:
2755
61
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGE);
2756
61
        break;
2757
99
      case OpCode::I32x4__ge_u:
2758
99
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGE);
2759
99
        break;
2760
124
      case OpCode::I64x2__eq:
2761
124
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntEQ);
2762
124
        break;
2763
71
      case OpCode::I64x2__ne:
2764
71
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntNE);
2765
71
        break;
2766
49
      case OpCode::I64x2__lt_s:
2767
49
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLT);
2768
49
        break;
2769
134
      case OpCode::I64x2__gt_s:
2770
134
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGT);
2771
134
        break;
2772
32
      case OpCode::I64x2__le_s:
2773
32
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLE);
2774
32
        break;
2775
42
      case OpCode::I64x2__ge_s:
2776
42
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGE);
2777
42
        break;
2778
1.31k
      case OpCode::F32x4__eq:
2779
1.31k
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOEQ,
2780
1.31k
                               Context.Int32x4Ty);
2781
1.31k
        break;
2782
37
      case OpCode::F32x4__ne:
2783
37
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealUNE,
2784
37
                               Context.Int32x4Ty);
2785
37
        break;
2786
699
      case OpCode::F32x4__lt:
2787
699
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLT,
2788
699
                               Context.Int32x4Ty);
2789
699
        break;
2790
73
      case OpCode::F32x4__gt:
2791
73
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGT,
2792
73
                               Context.Int32x4Ty);
2793
73
        break;
2794
353
      case OpCode::F32x4__le:
2795
353
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLE,
2796
353
                               Context.Int32x4Ty);
2797
353
        break;
2798
64
      case OpCode::F32x4__ge:
2799
64
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGE,
2800
64
                               Context.Int32x4Ty);
2801
64
        break;
2802
58
      case OpCode::F64x2__eq:
2803
58
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOEQ,
2804
58
                               Context.Int64x2Ty);
2805
58
        break;
2806
99
      case OpCode::F64x2__ne:
2807
99
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealUNE,
2808
99
                               Context.Int64x2Ty);
2809
99
        break;
2810
121
      case OpCode::F64x2__lt:
2811
121
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLT,
2812
121
                               Context.Int64x2Ty);
2813
121
        break;
2814
57
      case OpCode::F64x2__gt:
2815
57
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGT,
2816
57
                               Context.Int64x2Ty);
2817
57
        break;
2818
168
      case OpCode::F64x2__le:
2819
168
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLE,
2820
168
                               Context.Int64x2Ty);
2821
168
        break;
2822
88
      case OpCode::F64x2__ge:
2823
88
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGE,
2824
88
                               Context.Int64x2Ty);
2825
88
        break;
2826
138
      case OpCode::V128__not:
2827
138
        Stack.back() = Builder.createNot(Stack.back());
2828
138
        break;
2829
74
      case OpCode::V128__and: {
2830
74
        auto RHS = stackPop();
2831
74
        auto LHS = stackPop();
2832
74
        stackPush(Builder.createAnd(LHS, RHS));
2833
74
        break;
2834
871
      }
2835
92
      case OpCode::V128__andnot: {
2836
92
        auto RHS = stackPop();
2837
92
        auto LHS = stackPop();
2838
92
        stackPush(Builder.createAnd(LHS, Builder.createNot(RHS)));
2839
92
        break;
2840
871
      }
2841
122
      case OpCode::V128__or: {
2842
122
        auto RHS = stackPop();
2843
122
        auto LHS = stackPop();
2844
122
        stackPush(Builder.createOr(LHS, RHS));
2845
122
        break;
2846
871
      }
2847
66
      case OpCode::V128__xor: {
2848
66
        auto RHS = stackPop();
2849
66
        auto LHS = stackPop();
2850
66
        stackPush(Builder.createXor(LHS, RHS));
2851
66
        break;
2852
871
      }
2853
125
      case OpCode::V128__bitselect: {
2854
125
        auto C = stackPop();
2855
125
        auto V2 = stackPop();
2856
125
        auto V1 = stackPop();
2857
125
        stackPush(Builder.createXor(
2858
125
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
2859
125
        break;
2860
871
      }
2861
106
      case OpCode::V128__any_true:
2862
106
        compileVectorAnyTrue();
2863
106
        break;
2864
856
      case OpCode::I8x16__abs:
2865
856
        compileVectorAbs(Context.Int8x16Ty);
2866
856
        break;
2867
1.54k
      case OpCode::I8x16__neg:
2868
1.54k
        compileVectorNeg(Context.Int8x16Ty);
2869
1.54k
        break;
2870
105
      case OpCode::I8x16__popcnt:
2871
105
        compileVectorPopcnt();
2872
105
        break;
2873
301
      case OpCode::I8x16__all_true:
2874
301
        compileVectorAllTrue(Context.Int8x16Ty);
2875
301
        break;
2876
769
      case OpCode::I8x16__bitmask:
2877
769
        compileVectorBitMask(Context.Int8x16Ty);
2878
769
        break;
2879
82
      case OpCode::I8x16__narrow_i16x8_s:
2880
82
        compileVectorNarrow(Context.Int16x8Ty, true);
2881
82
        break;
2882
194
      case OpCode::I8x16__narrow_i16x8_u:
2883
194
        compileVectorNarrow(Context.Int16x8Ty, false);
2884
194
        break;
2885
151
      case OpCode::I8x16__shl:
2886
151
        compileVectorShl(Context.Int8x16Ty);
2887
151
        break;
2888
1.04k
      case OpCode::I8x16__shr_s:
2889
1.04k
        compileVectorAShr(Context.Int8x16Ty);
2890
1.04k
        break;
2891
59
      case OpCode::I8x16__shr_u:
2892
59
        compileVectorLShr(Context.Int8x16Ty);
2893
59
        break;
2894
51
      case OpCode::I8x16__add:
2895
51
        compileVectorVectorAdd(Context.Int8x16Ty);
2896
51
        break;
2897
904
      case OpCode::I8x16__add_sat_s:
2898
904
        compileVectorVectorAddSat(Context.Int8x16Ty, true);
2899
904
        break;
2900
72
      case OpCode::I8x16__add_sat_u:
2901
72
        compileVectorVectorAddSat(Context.Int8x16Ty, false);
2902
72
        break;
2903
74
      case OpCode::I8x16__sub:
2904
74
        compileVectorVectorSub(Context.Int8x16Ty);
2905
74
        break;
2906
197
      case OpCode::I8x16__sub_sat_s:
2907
197
        compileVectorVectorSubSat(Context.Int8x16Ty, true);
2908
197
        break;
2909
73
      case OpCode::I8x16__sub_sat_u:
2910
73
        compileVectorVectorSubSat(Context.Int8x16Ty, false);
2911
73
        break;
2912
54
      case OpCode::I8x16__min_s:
2913
54
        compileVectorVectorSMin(Context.Int8x16Ty);
2914
54
        break;
2915
110
      case OpCode::I8x16__min_u:
2916
110
        compileVectorVectorUMin(Context.Int8x16Ty);
2917
110
        break;
2918
270
      case OpCode::I8x16__max_s:
2919
270
        compileVectorVectorSMax(Context.Int8x16Ty);
2920
270
        break;
2921
79
      case OpCode::I8x16__max_u:
2922
79
        compileVectorVectorUMax(Context.Int8x16Ty);
2923
79
        break;
2924
109
      case OpCode::I8x16__avgr_u:
2925
109
        compileVectorVectorUAvgr(Context.Int8x16Ty);
2926
109
        break;
2927
332
      case OpCode::I16x8__abs:
2928
332
        compileVectorAbs(Context.Int16x8Ty);
2929
332
        break;
2930
210
      case OpCode::I16x8__neg:
2931
210
        compileVectorNeg(Context.Int16x8Ty);
2932
210
        break;
2933
142
      case OpCode::I16x8__all_true:
2934
142
        compileVectorAllTrue(Context.Int16x8Ty);
2935
142
        break;
2936
164
      case OpCode::I16x8__bitmask:
2937
164
        compileVectorBitMask(Context.Int16x8Ty);
2938
164
        break;
2939
46
      case OpCode::I16x8__narrow_i32x4_s:
2940
46
        compileVectorNarrow(Context.Int32x4Ty, true);
2941
46
        break;
2942
338
      case OpCode::I16x8__narrow_i32x4_u:
2943
338
        compileVectorNarrow(Context.Int32x4Ty, false);
2944
338
        break;
2945
972
      case OpCode::I16x8__extend_low_i8x16_s:
2946
972
        compileVectorExtend(Context.Int8x16Ty, true, true);
2947
972
        break;
2948
95
      case OpCode::I16x8__extend_high_i8x16_s:
2949
95
        compileVectorExtend(Context.Int8x16Ty, true, false);
2950
95
        break;
2951
395
      case OpCode::I16x8__extend_low_i8x16_u:
2952
395
        compileVectorExtend(Context.Int8x16Ty, false, true);
2953
395
        break;
2954
12
      case OpCode::I16x8__extend_high_i8x16_u:
2955
12
        compileVectorExtend(Context.Int8x16Ty, false, false);
2956
12
        break;
2957
113
      case OpCode::I16x8__shl:
2958
113
        compileVectorShl(Context.Int16x8Ty);
2959
113
        break;
2960
483
      case OpCode::I16x8__shr_s:
2961
483
        compileVectorAShr(Context.Int16x8Ty);
2962
483
        break;
2963
135
      case OpCode::I16x8__shr_u:
2964
135
        compileVectorLShr(Context.Int16x8Ty);
2965
135
        break;
2966
148
      case OpCode::I16x8__add:
2967
148
        compileVectorVectorAdd(Context.Int16x8Ty);
2968
148
        break;
2969
22
      case OpCode::I16x8__add_sat_s:
2970
22
        compileVectorVectorAddSat(Context.Int16x8Ty, true);
2971
22
        break;
2972
475
      case OpCode::I16x8__add_sat_u:
2973
475
        compileVectorVectorAddSat(Context.Int16x8Ty, false);
2974
475
        break;
2975
309
      case OpCode::I16x8__sub:
2976
309
        compileVectorVectorSub(Context.Int16x8Ty);
2977
309
        break;
2978
31
      case OpCode::I16x8__sub_sat_s:
2979
31
        compileVectorVectorSubSat(Context.Int16x8Ty, true);
2980
31
        break;
2981
100
      case OpCode::I16x8__sub_sat_u:
2982
100
        compileVectorVectorSubSat(Context.Int16x8Ty, false);
2983
100
        break;
2984
113
      case OpCode::I16x8__mul:
2985
113
        compileVectorVectorMul(Context.Int16x8Ty);
2986
113
        break;
2987
161
      case OpCode::I16x8__min_s:
2988
161
        compileVectorVectorSMin(Context.Int16x8Ty);
2989
161
        break;
2990
123
      case OpCode::I16x8__min_u:
2991
123
        compileVectorVectorUMin(Context.Int16x8Ty);
2992
123
        break;
2993
79
      case OpCode::I16x8__max_s:
2994
79
        compileVectorVectorSMax(Context.Int16x8Ty);
2995
79
        break;
2996
826
      case OpCode::I16x8__max_u:
2997
826
        compileVectorVectorUMax(Context.Int16x8Ty);
2998
826
        break;
2999
170
      case OpCode::I16x8__avgr_u:
3000
170
        compileVectorVectorUAvgr(Context.Int16x8Ty);
3001
170
        break;
3002
66
      case OpCode::I16x8__extmul_low_i8x16_s:
3003
66
        compileVectorExtMul(Context.Int8x16Ty, true, true);
3004
66
        break;
3005
194
      case OpCode::I16x8__extmul_high_i8x16_s:
3006
194
        compileVectorExtMul(Context.Int8x16Ty, true, false);
3007
194
        break;
3008
108
      case OpCode::I16x8__extmul_low_i8x16_u:
3009
108
        compileVectorExtMul(Context.Int8x16Ty, false, true);
3010
108
        break;
3011
432
      case OpCode::I16x8__extmul_high_i8x16_u:
3012
432
        compileVectorExtMul(Context.Int8x16Ty, false, false);
3013
432
        break;
3014
152
      case OpCode::I16x8__q15mulr_sat_s:
3015
152
        compileVectorVectorQ15MulSat();
3016
152
        break;
3017
308
      case OpCode::I16x8__extadd_pairwise_i8x16_s:
3018
308
        compileVectorExtAddPairwise(Context.Int8x16Ty, true);
3019
308
        break;
3020
327
      case OpCode::I16x8__extadd_pairwise_i8x16_u:
3021
327
        compileVectorExtAddPairwise(Context.Int8x16Ty, false);
3022
327
        break;
3023
57
      case OpCode::I32x4__abs:
3024
57
        compileVectorAbs(Context.Int32x4Ty);
3025
57
        break;
3026
195
      case OpCode::I32x4__neg:
3027
195
        compileVectorNeg(Context.Int32x4Ty);
3028
195
        break;
3029
170
      case OpCode::I32x4__all_true:
3030
170
        compileVectorAllTrue(Context.Int32x4Ty);
3031
170
        break;
3032
96
      case OpCode::I32x4__bitmask:
3033
96
        compileVectorBitMask(Context.Int32x4Ty);
3034
96
        break;
3035
109
      case OpCode::I32x4__extend_low_i16x8_s:
3036
109
        compileVectorExtend(Context.Int16x8Ty, true, true);
3037
109
        break;
3038
513
      case OpCode::I32x4__extend_high_i16x8_s:
3039
513
        compileVectorExtend(Context.Int16x8Ty, true, false);
3040
513
        break;
3041
1.88k
      case OpCode::I32x4__extend_low_i16x8_u:
3042
1.88k
        compileVectorExtend(Context.Int16x8Ty, false, true);
3043
1.88k
        break;
3044
141
      case OpCode::I32x4__extend_high_i16x8_u:
3045
141
        compileVectorExtend(Context.Int16x8Ty, false, false);
3046
141
        break;
3047
1.47k
      case OpCode::I32x4__shl:
3048
1.47k
        compileVectorShl(Context.Int32x4Ty);
3049
1.47k
        break;
3050
391
      case OpCode::I32x4__shr_s:
3051
391
        compileVectorAShr(Context.Int32x4Ty);
3052
391
        break;
3053
505
      case OpCode::I32x4__shr_u:
3054
505
        compileVectorLShr(Context.Int32x4Ty);
3055
505
        break;
3056
177
      case OpCode::I32x4__add:
3057
177
        compileVectorVectorAdd(Context.Int32x4Ty);
3058
177
        break;
3059
144
      case OpCode::I32x4__sub:
3060
144
        compileVectorVectorSub(Context.Int32x4Ty);
3061
144
        break;
3062
229
      case OpCode::I32x4__mul:
3063
229
        compileVectorVectorMul(Context.Int32x4Ty);
3064
229
        break;
3065
82
      case OpCode::I32x4__min_s:
3066
82
        compileVectorVectorSMin(Context.Int32x4Ty);
3067
82
        break;
3068
70
      case OpCode::I32x4__min_u:
3069
70
        compileVectorVectorUMin(Context.Int32x4Ty);
3070
70
        break;
3071
60
      case OpCode::I32x4__max_s:
3072
60
        compileVectorVectorSMax(Context.Int32x4Ty);
3073
60
        break;
3074
82
      case OpCode::I32x4__max_u:
3075
82
        compileVectorVectorUMax(Context.Int32x4Ty);
3076
82
        break;
3077
122
      case OpCode::I32x4__extmul_low_i16x8_s:
3078
122
        compileVectorExtMul(Context.Int16x8Ty, true, true);
3079
122
        break;
3080
66
      case OpCode::I32x4__extmul_high_i16x8_s:
3081
66
        compileVectorExtMul(Context.Int16x8Ty, true, false);
3082
66
        break;
3083
228
      case OpCode::I32x4__extmul_low_i16x8_u:
3084
228
        compileVectorExtMul(Context.Int16x8Ty, false, true);
3085
228
        break;
3086
133
      case OpCode::I32x4__extmul_high_i16x8_u:
3087
133
        compileVectorExtMul(Context.Int16x8Ty, false, false);
3088
133
        break;
3089
1.08k
      case OpCode::I32x4__extadd_pairwise_i16x8_s:
3090
1.08k
        compileVectorExtAddPairwise(Context.Int16x8Ty, true);
3091
1.08k
        break;
3092
497
      case OpCode::I32x4__extadd_pairwise_i16x8_u:
3093
497
        compileVectorExtAddPairwise(Context.Int16x8Ty, false);
3094
497
        break;
3095
100
      case OpCode::I32x4__dot_i16x8_s: {
3096
100
        auto ExtendTy = Context.Int16x8Ty.getExtendedElementVectorType();
3097
100
        auto Undef = LLVM::Value::getUndef(ExtendTy);
3098
100
        auto LHS = Builder.createSExt(
3099
100
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3100
100
        auto RHS = Builder.createSExt(
3101
100
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3102
100
        auto M = Builder.createMul(LHS, RHS);
3103
100
        auto L = Builder.createShuffleVector(
3104
100
            M, Undef,
3105
100
            LLVM::Value::getConstVector32(LLContext, {0U, 2U, 4U, 6U}));
3106
100
        auto R = Builder.createShuffleVector(
3107
100
            M, Undef,
3108
100
            LLVM::Value::getConstVector32(LLContext, {1U, 3U, 5U, 7U}));
3109
100
        auto V = Builder.createAdd(L, R);
3110
100
        stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
3111
100
        break;
3112
871
      }
3113
879
      case OpCode::I64x2__abs:
3114
879
        compileVectorAbs(Context.Int64x2Ty);
3115
879
        break;
3116
541
      case OpCode::I64x2__neg:
3117
541
        compileVectorNeg(Context.Int64x2Ty);
3118
541
        break;
3119
350
      case OpCode::I64x2__all_true:
3120
350
        compileVectorAllTrue(Context.Int64x2Ty);
3121
350
        break;
3122
241
      case OpCode::I64x2__bitmask:
3123
241
        compileVectorBitMask(Context.Int64x2Ty);
3124
241
        break;
3125
268
      case OpCode::I64x2__extend_low_i32x4_s:
3126
268
        compileVectorExtend(Context.Int32x4Ty, true, true);
3127
268
        break;
3128
682
      case OpCode::I64x2__extend_high_i32x4_s:
3129
682
        compileVectorExtend(Context.Int32x4Ty, true, false);
3130
682
        break;
3131
221
      case OpCode::I64x2__extend_low_i32x4_u:
3132
221
        compileVectorExtend(Context.Int32x4Ty, false, true);
3133
221
        break;
3134
588
      case OpCode::I64x2__extend_high_i32x4_u:
3135
588
        compileVectorExtend(Context.Int32x4Ty, false, false);
3136
588
        break;
3137
105
      case OpCode::I64x2__shl:
3138
105
        compileVectorShl(Context.Int64x2Ty);
3139
105
        break;
3140
273
      case OpCode::I64x2__shr_s:
3141
273
        compileVectorAShr(Context.Int64x2Ty);
3142
273
        break;
3143
78
      case OpCode::I64x2__shr_u:
3144
78
        compileVectorLShr(Context.Int64x2Ty);
3145
78
        break;
3146
38
      case OpCode::I64x2__add:
3147
38
        compileVectorVectorAdd(Context.Int64x2Ty);
3148
38
        break;
3149
261
      case OpCode::I64x2__sub:
3150
261
        compileVectorVectorSub(Context.Int64x2Ty);
3151
261
        break;
3152
87
      case OpCode::I64x2__mul:
3153
87
        compileVectorVectorMul(Context.Int64x2Ty);
3154
87
        break;
3155
36
      case OpCode::I64x2__extmul_low_i32x4_s:
3156
36
        compileVectorExtMul(Context.Int32x4Ty, true, true);
3157
36
        break;
3158
514
      case OpCode::I64x2__extmul_high_i32x4_s:
3159
514
        compileVectorExtMul(Context.Int32x4Ty, true, false);
3160
514
        break;
3161
31
      case OpCode::I64x2__extmul_low_i32x4_u:
3162
31
        compileVectorExtMul(Context.Int32x4Ty, false, true);
3163
31
        break;
3164
127
      case OpCode::I64x2__extmul_high_i32x4_u:
3165
127
        compileVectorExtMul(Context.Int32x4Ty, false, false);
3166
127
        break;
3167
104
      case OpCode::F32x4__abs:
3168
104
        compileVectorFAbs(Context.Floatx4Ty);
3169
104
        break;
3170
153
      case OpCode::F32x4__neg:
3171
153
        compileVectorFNeg(Context.Floatx4Ty);
3172
153
        break;
3173
215
      case OpCode::F32x4__sqrt:
3174
215
        compileVectorFSqrt(Context.Floatx4Ty);
3175
215
        break;
3176
132
      case OpCode::F32x4__add:
3177
132
        compileVectorVectorFAdd(Context.Floatx4Ty);
3178
132
        break;
3179
252
      case OpCode::F32x4__sub:
3180
252
        compileVectorVectorFSub(Context.Floatx4Ty);
3181
252
        break;
3182
38
      case OpCode::F32x4__mul:
3183
38
        compileVectorVectorFMul(Context.Floatx4Ty);
3184
38
        break;
3185
167
      case OpCode::F32x4__div:
3186
167
        compileVectorVectorFDiv(Context.Floatx4Ty);
3187
167
        break;
3188
123
      case OpCode::F32x4__min:
3189
123
        compileVectorVectorFMin(Context.Floatx4Ty);
3190
123
        break;
3191
37
      case OpCode::F32x4__max:
3192
37
        compileVectorVectorFMax(Context.Floatx4Ty);
3193
37
        break;
3194
52
      case OpCode::F32x4__pmin:
3195
52
        compileVectorVectorFPMin(Context.Floatx4Ty);
3196
52
        break;
3197
228
      case OpCode::F32x4__pmax:
3198
228
        compileVectorVectorFPMax(Context.Floatx4Ty);
3199
228
        break;
3200
784
      case OpCode::F32x4__ceil:
3201
784
        compileVectorFCeil(Context.Floatx4Ty);
3202
784
        break;
3203
1.72k
      case OpCode::F32x4__floor:
3204
1.72k
        compileVectorFFloor(Context.Floatx4Ty);
3205
1.72k
        break;
3206
1.59k
      case OpCode::F32x4__trunc:
3207
1.59k
        compileVectorFTrunc(Context.Floatx4Ty);
3208
1.59k
        break;
3209
217
      case OpCode::F32x4__nearest:
3210
217
        compileVectorFNearest(Context.Floatx4Ty);
3211
217
        break;
3212
437
      case OpCode::F64x2__abs:
3213
437
        compileVectorFAbs(Context.Doublex2Ty);
3214
437
        break;
3215
630
      case OpCode::F64x2__neg:
3216
630
        compileVectorFNeg(Context.Doublex2Ty);
3217
630
        break;
3218
104
      case OpCode::F64x2__sqrt:
3219
104
        compileVectorFSqrt(Context.Doublex2Ty);
3220
104
        break;
3221
48
      case OpCode::F64x2__add:
3222
48
        compileVectorVectorFAdd(Context.Doublex2Ty);
3223
48
        break;
3224
208
      case OpCode::F64x2__sub:
3225
208
        compileVectorVectorFSub(Context.Doublex2Ty);
3226
208
        break;
3227
210
      case OpCode::F64x2__mul:
3228
210
        compileVectorVectorFMul(Context.Doublex2Ty);
3229
210
        break;
3230
37
      case OpCode::F64x2__div:
3231
37
        compileVectorVectorFDiv(Context.Doublex2Ty);
3232
37
        break;
3233
166
      case OpCode::F64x2__min:
3234
166
        compileVectorVectorFMin(Context.Doublex2Ty);
3235
166
        break;
3236
154
      case OpCode::F64x2__max:
3237
154
        compileVectorVectorFMax(Context.Doublex2Ty);
3238
154
        break;
3239
335
      case OpCode::F64x2__pmin:
3240
335
        compileVectorVectorFPMin(Context.Doublex2Ty);
3241
335
        break;
3242
106
      case OpCode::F64x2__pmax:
3243
106
        compileVectorVectorFPMax(Context.Doublex2Ty);
3244
106
        break;
3245
563
      case OpCode::F64x2__ceil:
3246
563
        compileVectorFCeil(Context.Doublex2Ty);
3247
563
        break;
3248
652
      case OpCode::F64x2__floor:
3249
652
        compileVectorFFloor(Context.Doublex2Ty);
3250
652
        break;
3251
115
      case OpCode::F64x2__trunc:
3252
115
        compileVectorFTrunc(Context.Doublex2Ty);
3253
115
        break;
3254
160
      case OpCode::F64x2__nearest:
3255
160
        compileVectorFNearest(Context.Doublex2Ty);
3256
160
        break;
3257
166
      case OpCode::I32x4__trunc_sat_f32x4_s:
3258
166
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3259
166
        break;
3260
3.68k
      case OpCode::I32x4__trunc_sat_f32x4_u:
3261
3.68k
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3262
3.68k
        break;
3263
333
      case OpCode::F32x4__convert_i32x4_s:
3264
333
        compileVectorConvertS(Context.Int32x4Ty, Context.Floatx4Ty, false);
3265
333
        break;
3266
693
      case OpCode::F32x4__convert_i32x4_u:
3267
693
        compileVectorConvertU(Context.Int32x4Ty, Context.Floatx4Ty, false);
3268
693
        break;
3269
741
      case OpCode::I32x4__trunc_sat_f64x2_s_zero:
3270
741
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3271
741
        break;
3272
2.10k
      case OpCode::I32x4__trunc_sat_f64x2_u_zero:
3273
2.10k
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3274
2.10k
        break;
3275
333
      case OpCode::F64x2__convert_low_i32x4_s:
3276
333
        compileVectorConvertS(Context.Int32x4Ty, Context.Doublex2Ty, true);
3277
333
        break;
3278
1.20k
      case OpCode::F64x2__convert_low_i32x4_u:
3279
1.20k
        compileVectorConvertU(Context.Int32x4Ty, Context.Doublex2Ty, true);
3280
1.20k
        break;
3281
595
      case OpCode::F32x4__demote_f64x2_zero:
3282
595
        compileVectorDemote();
3283
595
        break;
3284
625
      case OpCode::F64x2__promote_low_f32x4:
3285
625
        compileVectorPromote();
3286
625
        break;
3287
3288
      // Relaxed SIMD Instructions
3289
34
      case OpCode::I8x16__relaxed_swizzle:
3290
34
        compileVectorSwizzle();
3291
34
        break;
3292
6
      case OpCode::I32x4__relaxed_trunc_f32x4_s:
3293
6
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3294
6
        break;
3295
3
      case OpCode::I32x4__relaxed_trunc_f32x4_u:
3296
3
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3297
3
        break;
3298
7
      case OpCode::I32x4__relaxed_trunc_f64x2_s_zero:
3299
7
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3300
7
        break;
3301
15
      case OpCode::I32x4__relaxed_trunc_f64x2_u_zero:
3302
15
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3303
15
        break;
3304
4
      case OpCode::F32x4__relaxed_madd:
3305
4
        compileVectorVectorMAdd(Context.Floatx4Ty);
3306
4
        break;
3307
7
      case OpCode::F32x4__relaxed_nmadd:
3308
7
        compileVectorVectorNMAdd(Context.Floatx4Ty);
3309
7
        break;
3310
4
      case OpCode::F64x2__relaxed_madd:
3311
4
        compileVectorVectorMAdd(Context.Doublex2Ty);
3312
4
        break;
3313
13
      case OpCode::F64x2__relaxed_nmadd:
3314
13
        compileVectorVectorNMAdd(Context.Doublex2Ty);
3315
13
        break;
3316
1
      case OpCode::I8x16__relaxed_laneselect:
3317
11
      case OpCode::I16x8__relaxed_laneselect:
3318
14
      case OpCode::I32x4__relaxed_laneselect:
3319
15
      case OpCode::I64x2__relaxed_laneselect: {
3320
15
        auto C = stackPop();
3321
15
        auto V2 = stackPop();
3322
15
        auto V1 = stackPop();
3323
15
        stackPush(Builder.createXor(
3324
15
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
3325
15
        break;
3326
14
      }
3327
4
      case OpCode::F32x4__relaxed_min:
3328
4
        compileVectorVectorFMin(Context.Floatx4Ty);
3329
4
        break;
3330
2
      case OpCode::F32x4__relaxed_max:
3331
2
        compileVectorVectorFMax(Context.Floatx4Ty);
3332
2
        break;
3333
1
      case OpCode::F64x2__relaxed_min:
3334
1
        compileVectorVectorFMin(Context.Doublex2Ty);
3335
1
        break;
3336
4
      case OpCode::F64x2__relaxed_max:
3337
4
        compileVectorVectorFMax(Context.Doublex2Ty);
3338
4
        break;
3339
10
      case OpCode::I16x8__relaxed_q15mulr_s:
3340
10
        compileVectorVectorQ15MulSat();
3341
10
        break;
3342
6
      case OpCode::I16x8__relaxed_dot_i8x16_i7x16_s:
3343
6
        compileVectorRelaxedIntegerDotProduct();
3344
6
        break;
3345
8
      case OpCode::I32x4__relaxed_dot_i8x16_i7x16_add_s:
3346
8
        compileVectorRelaxedIntegerDotProductAdd();
3347
8
        break;
3348
3349
      // Atomic Instructions
3350
193
      case OpCode::Atomic__fence:
3351
193
        compileMemoryFence();
3352
193
        break;
3353
33
      case OpCode::Memory__atomic__notify:
3354
33
        compileAtomicNotify(Instr.getTargetIndex(), Instr.getMemoryOffset());
3355
33
        break;
3356
5
      case OpCode::Memory__atomic__wait32:
3357
5
        compileAtomicWait(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3358
5
                          Context.Int32Ty, 32);
3359
5
        break;
3360
2
      case OpCode::Memory__atomic__wait64:
3361
2
        compileAtomicWait(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3362
2
                          Context.Int64Ty, 64);
3363
2
        break;
3364
0
      case OpCode::I32__atomic__load:
3365
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3366
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3367
0
                          Context.Int32Ty, true);
3368
0
        break;
3369
0
      case OpCode::I64__atomic__load:
3370
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3371
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3372
0
                          Context.Int64Ty, true);
3373
0
        break;
3374
0
      case OpCode::I32__atomic__load8_u:
3375
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3376
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3377
0
                          Context.Int8Ty);
3378
0
        break;
3379
0
      case OpCode::I32__atomic__load16_u:
3380
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3381
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3382
0
                          Context.Int16Ty);
3383
0
        break;
3384
0
      case OpCode::I64__atomic__load8_u:
3385
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3386
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3387
0
                          Context.Int8Ty);
3388
0
        break;
3389
0
      case OpCode::I64__atomic__load16_u:
3390
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3391
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3392
0
                          Context.Int16Ty);
3393
0
        break;
3394
0
      case OpCode::I64__atomic__load32_u:
3395
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3396
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3397
0
                          Context.Int32Ty);
3398
0
        break;
3399
0
      case OpCode::I32__atomic__store:
3400
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3401
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3402
0
                           Context.Int32Ty, true);
3403
0
        break;
3404
0
      case OpCode::I64__atomic__store:
3405
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3406
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3407
0
                           Context.Int64Ty, true);
3408
0
        break;
3409
0
      case OpCode::I32__atomic__store8:
3410
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3411
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3412
0
                           Context.Int8Ty, true);
3413
0
        break;
3414
0
      case OpCode::I32__atomic__store16:
3415
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3416
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3417
0
                           Context.Int16Ty, true);
3418
0
        break;
3419
0
      case OpCode::I64__atomic__store8:
3420
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3421
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3422
0
                           Context.Int8Ty, true);
3423
0
        break;
3424
0
      case OpCode::I64__atomic__store16:
3425
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3426
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3427
0
                           Context.Int16Ty, true);
3428
0
        break;
3429
0
      case OpCode::I64__atomic__store32:
3430
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3431
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3432
0
                           Context.Int32Ty, true);
3433
0
        break;
3434
0
      case OpCode::I32__atomic__rmw__add:
3435
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3436
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3437
0
                           Context.Int32Ty, Context.Int32Ty, true);
3438
0
        break;
3439
0
      case OpCode::I64__atomic__rmw__add:
3440
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3441
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3442
0
                           Context.Int64Ty, Context.Int64Ty, true);
3443
0
        break;
3444
0
      case OpCode::I32__atomic__rmw8__add_u:
3445
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3446
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3447
0
                           Context.Int32Ty, Context.Int8Ty);
3448
0
        break;
3449
0
      case OpCode::I32__atomic__rmw16__add_u:
3450
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3451
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3452
0
                           Context.Int32Ty, Context.Int16Ty);
3453
0
        break;
3454
0
      case OpCode::I64__atomic__rmw8__add_u:
3455
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3456
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3457
0
                           Context.Int64Ty, Context.Int8Ty);
3458
0
        break;
3459
0
      case OpCode::I64__atomic__rmw16__add_u:
3460
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3461
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3462
0
                           Context.Int64Ty, Context.Int16Ty);
3463
0
        break;
3464
0
      case OpCode::I64__atomic__rmw32__add_u:
3465
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3466
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3467
0
                           Context.Int64Ty, Context.Int32Ty);
3468
0
        break;
3469
0
      case OpCode::I32__atomic__rmw__sub:
3470
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3471
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3472
0
                           Context.Int32Ty, Context.Int32Ty, true);
3473
0
        break;
3474
0
      case OpCode::I64__atomic__rmw__sub:
3475
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3476
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3477
0
                           Context.Int64Ty, Context.Int64Ty, true);
3478
0
        break;
3479
0
      case OpCode::I32__atomic__rmw8__sub_u:
3480
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3481
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3482
0
                           Context.Int32Ty, Context.Int8Ty);
3483
0
        break;
3484
0
      case OpCode::I32__atomic__rmw16__sub_u:
3485
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3486
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3487
0
                           Context.Int32Ty, Context.Int16Ty);
3488
0
        break;
3489
0
      case OpCode::I64__atomic__rmw8__sub_u:
3490
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3491
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3492
0
                           Context.Int64Ty, Context.Int8Ty);
3493
0
        break;
3494
0
      case OpCode::I64__atomic__rmw16__sub_u:
3495
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3496
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3497
0
                           Context.Int64Ty, Context.Int16Ty);
3498
0
        break;
3499
0
      case OpCode::I64__atomic__rmw32__sub_u:
3500
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3501
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3502
0
                           Context.Int64Ty, Context.Int32Ty);
3503
0
        break;
3504
0
      case OpCode::I32__atomic__rmw__and:
3505
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3506
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3507
0
                           Context.Int32Ty, Context.Int32Ty, true);
3508
0
        break;
3509
0
      case OpCode::I64__atomic__rmw__and:
3510
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3511
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3512
0
                           Context.Int64Ty, Context.Int64Ty, true);
3513
0
        break;
3514
0
      case OpCode::I32__atomic__rmw8__and_u:
3515
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3516
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3517
0
                           Context.Int32Ty, Context.Int8Ty);
3518
0
        break;
3519
0
      case OpCode::I32__atomic__rmw16__and_u:
3520
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3521
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3522
0
                           Context.Int32Ty, Context.Int16Ty);
3523
0
        break;
3524
0
      case OpCode::I64__atomic__rmw8__and_u:
3525
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3526
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3527
0
                           Context.Int64Ty, Context.Int8Ty);
3528
0
        break;
3529
0
      case OpCode::I64__atomic__rmw16__and_u:
3530
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3531
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3532
0
                           Context.Int64Ty, Context.Int16Ty);
3533
0
        break;
3534
0
      case OpCode::I64__atomic__rmw32__and_u:
3535
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3536
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3537
0
                           Context.Int64Ty, Context.Int32Ty);
3538
0
        break;
3539
0
      case OpCode::I32__atomic__rmw__or:
3540
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3541
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3542
0
                           Context.Int32Ty, Context.Int32Ty, true);
3543
0
        break;
3544
0
      case OpCode::I64__atomic__rmw__or:
3545
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3546
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3547
0
                           Context.Int64Ty, Context.Int64Ty, true);
3548
0
        break;
3549
0
      case OpCode::I32__atomic__rmw8__or_u:
3550
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3551
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3552
0
                           Context.Int32Ty, Context.Int8Ty);
3553
0
        break;
3554
0
      case OpCode::I32__atomic__rmw16__or_u:
3555
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3556
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3557
0
                           Context.Int32Ty, Context.Int16Ty);
3558
0
        break;
3559
0
      case OpCode::I64__atomic__rmw8__or_u:
3560
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3561
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3562
0
                           Context.Int64Ty, Context.Int8Ty);
3563
0
        break;
3564
0
      case OpCode::I64__atomic__rmw16__or_u:
3565
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3566
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3567
0
                           Context.Int64Ty, Context.Int16Ty);
3568
0
        break;
3569
0
      case OpCode::I64__atomic__rmw32__or_u:
3570
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3571
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3572
0
                           Context.Int64Ty, Context.Int32Ty);
3573
0
        break;
3574
0
      case OpCode::I32__atomic__rmw__xor:
3575
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3576
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3577
0
                           Context.Int32Ty, Context.Int32Ty, true);
3578
0
        break;
3579
0
      case OpCode::I64__atomic__rmw__xor:
3580
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3581
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3582
0
                           Context.Int64Ty, Context.Int64Ty, true);
3583
0
        break;
3584
0
      case OpCode::I32__atomic__rmw8__xor_u:
3585
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3586
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3587
0
                           Context.Int32Ty, Context.Int8Ty);
3588
0
        break;
3589
0
      case OpCode::I32__atomic__rmw16__xor_u:
3590
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3591
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3592
0
                           Context.Int32Ty, Context.Int16Ty);
3593
0
        break;
3594
0
      case OpCode::I64__atomic__rmw8__xor_u:
3595
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3596
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3597
0
                           Context.Int64Ty, Context.Int8Ty);
3598
0
        break;
3599
0
      case OpCode::I64__atomic__rmw16__xor_u:
3600
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3601
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3602
0
                           Context.Int64Ty, Context.Int16Ty);
3603
0
        break;
3604
0
      case OpCode::I64__atomic__rmw32__xor_u:
3605
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3606
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3607
0
                           Context.Int64Ty, Context.Int32Ty);
3608
0
        break;
3609
0
      case OpCode::I32__atomic__rmw__xchg:
3610
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3611
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3612
0
                           Context.Int32Ty, Context.Int32Ty, true);
3613
0
        break;
3614
0
      case OpCode::I64__atomic__rmw__xchg:
3615
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3616
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3617
0
                           Context.Int64Ty, Context.Int64Ty, true);
3618
0
        break;
3619
0
      case OpCode::I32__atomic__rmw8__xchg_u:
3620
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3621
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3622
0
                           Context.Int32Ty, Context.Int8Ty);
3623
0
        break;
3624
0
      case OpCode::I32__atomic__rmw16__xchg_u:
3625
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3626
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3627
0
                           Context.Int32Ty, Context.Int16Ty);
3628
0
        break;
3629
0
      case OpCode::I64__atomic__rmw8__xchg_u:
3630
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3631
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3632
0
                           Context.Int64Ty, Context.Int8Ty);
3633
0
        break;
3634
0
      case OpCode::I64__atomic__rmw16__xchg_u:
3635
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3636
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3637
0
                           Context.Int64Ty, Context.Int16Ty);
3638
0
        break;
3639
0
      case OpCode::I64__atomic__rmw32__xchg_u:
3640
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3641
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3642
0
                           Context.Int64Ty, Context.Int32Ty);
3643
0
        break;
3644
0
      case OpCode::I32__atomic__rmw__cmpxchg:
3645
0
        compileAtomicCompareExchange(
3646
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3647
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int32Ty, true);
3648
0
        break;
3649
0
      case OpCode::I64__atomic__rmw__cmpxchg:
3650
0
        compileAtomicCompareExchange(
3651
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3652
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int64Ty, true);
3653
0
        break;
3654
0
      case OpCode::I32__atomic__rmw8__cmpxchg_u:
3655
0
        compileAtomicCompareExchange(
3656
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3657
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int8Ty);
3658
0
        break;
3659
0
      case OpCode::I32__atomic__rmw16__cmpxchg_u:
3660
0
        compileAtomicCompareExchange(
3661
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3662
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int16Ty);
3663
0
        break;
3664
0
      case OpCode::I64__atomic__rmw8__cmpxchg_u:
3665
0
        compileAtomicCompareExchange(
3666
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3667
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int8Ty);
3668
0
        break;
3669
0
      case OpCode::I64__atomic__rmw16__cmpxchg_u:
3670
0
        compileAtomicCompareExchange(
3671
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3672
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int16Ty);
3673
0
        break;
3674
0
      case OpCode::I64__atomic__rmw32__cmpxchg_u:
3675
0
        compileAtomicCompareExchange(
3676
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3677
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int32Ty);
3678
0
        break;
3679
3680
0
      default:
3681
0
        assumingUnreachable();
3682
1.06M
      }
3683
1.06M
      return {};
3684
1.06M
    };
3685
3686
1.55M
    for (const auto &Instr : Instrs) {
3687
      // Update instruction count
3688
1.55M
      if (LocalInstrCount) {
3689
0
        Builder.createStore(
3690
0
            Builder.createAdd(
3691
0
                Builder.createLoad(Context.Int64Ty, LocalInstrCount),
3692
0
                LLContext.getInt64(1)),
3693
0
            LocalInstrCount);
3694
0
      }
3695
1.55M
      if (LocalGas) {
3696
0
        auto NewGas = Builder.createAdd(
3697
0
            Builder.createLoad(Context.Int64Ty, LocalGas),
3698
0
            Builder.createLoad(
3699
0
                Context.Int64Ty,
3700
0
                Builder.createConstInBoundsGEP2_64(
3701
0
                    LLVM::Type::getArrayType(Context.Int64Ty, UINT16_MAX + 1),
3702
0
                    Context.getCostTable(Builder, ExecCtx), 0,
3703
0
                    uint16_t(Instr.getOpCode()))));
3704
0
        Builder.createStore(NewGas, LocalGas);
3705
0
      }
3706
3707
      // Make the instruction node according to Code.
3708
1.55M
      EXPECTED_TRY(Dispatch(Instr));
3709
1.55M
    }
3710
10.7k
    return {};
3711
10.7k
  }
3712
2.13k
  void compileSignedTrunc(LLVM::Type IntType) noexcept {
3713
2.13k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.norm");
3714
2.13k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmin");
3715
2.13k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmax");
3716
2.13k
    auto Value = stackPop();
3717
2.13k
    const auto [Precise, MinFp, MaxFp] =
3718
2.13k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3719
2.13k
      const auto BitWidth = IntType.getIntegerBitWidth();
3720
2.13k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3721
2.13k
        switch (BitWidth) {
3722
1.66k
        case 32:
3723
1.66k
          return {std::numeric_limits<int32_t>::min(),
3724
1.66k
                  std::numeric_limits<int32_t>::max()};
3725
469
        case 64:
3726
469
          return {std::numeric_limits<int64_t>::min(),
3727
469
                  std::numeric_limits<int64_t>::max()};
3728
0
        default:
3729
0
          assumingUnreachable();
3730
2.13k
        }
3731
2.13k
      }();
3732
2.13k
      auto FPType = Value.getType();
3733
2.13k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3734
2.13k
      const auto FPWidth = FPType.getFPMantissaWidth();
3735
2.13k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3736
2.13k
              LLVM::Value::getConstReal(FPType, Max)};
3737
2.13k
    }();
3738
3739
2.13k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3740
2.13k
    Builder.createCondBr(IsNotNan, NormBB,
3741
2.13k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3742
3743
2.13k
    Builder.positionAtEnd(NormBB);
3744
2.13k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3745
2.13k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3746
2.13k
    auto IsNotUnderflow =
3747
2.13k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3748
2.13k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3749
2.13k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3750
3751
2.13k
    Builder.positionAtEnd(NotMinBB);
3752
2.13k
    auto IsNotOverflow = Builder.createLikely(
3753
2.13k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3754
2.13k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3755
2.13k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3756
3757
2.13k
    Builder.positionAtEnd(NotMaxBB);
3758
2.13k
    stackPush(Builder.createFPToSI(Trunc, IntType));
3759
2.13k
  }
3760
1.09k
  void compileSignedTruncSat(LLVM::Type IntType) noexcept {
3761
1.09k
    auto CurrBB = Builder.getInsertBlock();
3762
1.09k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.norm");
3763
1.09k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmin");
3764
1.09k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmax");
3765
1.09k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.end");
3766
1.09k
    auto Value = stackPop();
3767
1.09k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3768
1.09k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3769
1.09k
      const auto BitWidth = IntType.getIntegerBitWidth();
3770
1.09k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3771
1.09k
        switch (BitWidth) {
3772
459
        case 32:
3773
459
          return {std::numeric_limits<int32_t>::min(),
3774
459
                  std::numeric_limits<int32_t>::max()};
3775
632
        case 64:
3776
632
          return {std::numeric_limits<int64_t>::min(),
3777
632
                  std::numeric_limits<int64_t>::max()};
3778
0
        default:
3779
0
          assumingUnreachable();
3780
1.09k
        }
3781
1.09k
      }();
3782
1.09k
      auto FPType = Value.getType();
3783
1.09k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3784
1.09k
      const auto FPWidth = FPType.getFPMantissaWidth();
3785
1.09k
      return {BitWidth <= FPWidth, static_cast<uint64_t>(Min),
3786
1.09k
              static_cast<uint64_t>(Max),
3787
1.09k
              LLVM::Value::getConstReal(FPType, Min),
3788
1.09k
              LLVM::Value::getConstReal(FPType, Max)};
3789
1.09k
    }();
3790
3791
1.09k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3792
1.09k
    Builder.createCondBr(IsNotNan, NormBB, EndBB);
3793
3794
1.09k
    Builder.positionAtEnd(NormBB);
3795
1.09k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3796
1.09k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3797
1.09k
    auto IsNotUnderflow =
3798
1.09k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3799
1.09k
    Builder.createCondBr(IsNotUnderflow, NotMinBB, EndBB);
3800
3801
1.09k
    Builder.positionAtEnd(NotMinBB);
3802
1.09k
    auto IsNotOverflow = Builder.createLikely(
3803
1.09k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3804
1.09k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3805
3806
1.09k
    Builder.positionAtEnd(NotMaxBB);
3807
1.09k
    auto IntValue = Builder.createFPToSI(Trunc, IntType);
3808
1.09k
    Builder.createBr(EndBB);
3809
3810
1.09k
    Builder.positionAtEnd(EndBB);
3811
1.09k
    auto PHIRet = Builder.createPHI(IntType);
3812
1.09k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, 0, true), CurrBB);
3813
1.09k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt, true), NormBB);
3814
1.09k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt, true),
3815
1.09k
                       NotMinBB);
3816
1.09k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3817
3818
1.09k
    stackPush(PHIRet);
3819
1.09k
  }
3820
3.79k
  void compileUnsignedTrunc(LLVM::Type IntType) noexcept {
3821
3.79k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.norm");
3822
3.79k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmin");
3823
3.79k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmax");
3824
3.79k
    auto Value = stackPop();
3825
3.79k
    const auto [Precise, MinFp, MaxFp] =
3826
3.79k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3827
3.79k
      const auto BitWidth = IntType.getIntegerBitWidth();
3828
3.79k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3829
3.79k
        switch (BitWidth) {
3830
1.53k
        case 32:
3831
1.53k
          return {std::numeric_limits<uint32_t>::min(),
3832
1.53k
                  std::numeric_limits<uint32_t>::max()};
3833
2.26k
        case 64:
3834
2.26k
          return {std::numeric_limits<uint64_t>::min(),
3835
2.26k
                  std::numeric_limits<uint64_t>::max()};
3836
0
        default:
3837
0
          assumingUnreachable();
3838
3.79k
        }
3839
3.79k
      }();
3840
3.79k
      auto FPType = Value.getType();
3841
3.79k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3842
3.79k
      const auto FPWidth = FPType.getFPMantissaWidth();
3843
3.79k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3844
3.79k
              LLVM::Value::getConstReal(FPType, Max)};
3845
3.79k
    }();
3846
3847
3.79k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3848
3.79k
    Builder.createCondBr(IsNotNan, NormBB,
3849
3.79k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3850
3851
3.79k
    Builder.positionAtEnd(NormBB);
3852
3.79k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3853
3.79k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3854
3.79k
    auto IsNotUnderflow =
3855
3.79k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3856
3.79k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3857
3.79k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3858
3859
3.79k
    Builder.positionAtEnd(NotMinBB);
3860
3.79k
    auto IsNotOverflow = Builder.createLikely(
3861
3.79k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3862
3.79k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3863
3.79k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3864
3865
3.79k
    Builder.positionAtEnd(NotMaxBB);
3866
3.79k
    stackPush(Builder.createFPToUI(Trunc, IntType));
3867
3.79k
  }
3868
879
  void compileUnsignedTruncSat(LLVM::Type IntType) noexcept {
3869
879
    auto CurrBB = Builder.getInsertBlock();
3870
879
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.norm");
3871
879
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.notmax");
3872
879
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.end");
3873
879
    auto Value = stackPop();
3874
879
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3875
879
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3876
879
      const auto BitWidth = IntType.getIntegerBitWidth();
3877
879
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3878
879
        switch (BitWidth) {
3879
275
        case 32:
3880
275
          return {std::numeric_limits<uint32_t>::min(),
3881
275
                  std::numeric_limits<uint32_t>::max()};
3882
604
        case 64:
3883
604
          return {std::numeric_limits<uint64_t>::min(),
3884
604
                  std::numeric_limits<uint64_t>::max()};
3885
0
        default:
3886
0
          assumingUnreachable();
3887
879
        }
3888
879
      }();
3889
879
      auto FPType = Value.getType();
3890
879
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3891
879
      const auto FPWidth = FPType.getFPMantissaWidth();
3892
879
      return {BitWidth <= FPWidth, Min, Max,
3893
879
              LLVM::Value::getConstReal(FPType, Min),
3894
879
              LLVM::Value::getConstReal(FPType, Max)};
3895
879
    }();
3896
3897
879
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3898
879
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3899
879
    auto IsNotUnderflow =
3900
879
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3901
879
    Builder.createCondBr(IsNotUnderflow, NormBB, EndBB);
3902
3903
879
    Builder.positionAtEnd(NormBB);
3904
879
    auto IsNotOverflow = Builder.createLikely(
3905
879
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3906
879
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3907
3908
879
    Builder.positionAtEnd(NotMaxBB);
3909
879
    auto IntValue = Builder.createFPToUI(Trunc, IntType);
3910
879
    Builder.createBr(EndBB);
3911
3912
879
    Builder.positionAtEnd(EndBB);
3913
879
    auto PHIRet = Builder.createPHI(IntType);
3914
879
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt), CurrBB);
3915
879
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt), NormBB);
3916
879
    PHIRet.addIncoming(IntValue, NotMaxBB);
3917
3918
879
    stackPush(PHIRet);
3919
879
  }
3920
3921
  void compileAtomicCheckOffsetAlignment(LLVM::Value Offset,
3922
40
                                         LLVM::Type IntType) noexcept {
3923
40
    const auto BitWidth = IntType.getIntegerBitWidth();
3924
40
    auto BWMask = LLContext.getInt64((BitWidth >> 3) - 1);
3925
40
    auto Value = Builder.createAnd(Offset, BWMask);
3926
40
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "address_align_ok");
3927
40
    auto IsAddressAligned = Builder.createLikely(
3928
40
        Builder.createICmpEQ(Value, LLContext.getInt64(0)));
3929
40
    Builder.createCondBr(IsAddressAligned, OkBB,
3930
40
                         getTrapBB(ErrCode::Value::UnalignedAtomicAccess));
3931
3932
40
    Builder.positionAtEnd(OkBB);
3933
40
  }
3934
3935
193
  void compileMemoryFence() noexcept {
3936
193
    Builder.createFence(LLVMAtomicOrderingSequentiallyConsistent);
3937
193
  }
3938
  void compileAtomicNotify(unsigned MemoryIndex,
3939
33
                           unsigned MemoryOffset) noexcept {
3940
33
    auto Count = stackPop();
3941
33
    auto Addr = Builder.createZExt(Stack.back(), Context.Int64Ty);
3942
33
    if (MemoryOffset != 0) {
3943
26
      Addr = Builder.createAdd(Addr, LLContext.getInt64(MemoryOffset));
3944
26
    }
3945
33
    compileAtomicCheckOffsetAlignment(Addr, Context.Int32Ty);
3946
33
    auto Offset = stackPop();
3947
3948
33
    stackPush(Builder.createCall(
3949
33
        Context.getIntrinsic(
3950
33
            Builder, Executable::Intrinsics::kMemAtomicNotify,
3951
33
            LLVM::Type::getFunctionType(
3952
33
                Context.Int32Ty,
3953
33
                {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
3954
33
        {LLContext.getInt32(MemoryIndex), Offset, Count}));
3955
33
  }
3956
  void compileAtomicWait(unsigned MemoryIndex, unsigned MemoryOffset,
3957
7
                         LLVM::Type TargetType, uint32_t BitWidth) noexcept {
3958
7
    auto Timeout = stackPop();
3959
7
    auto ExpectedValue = Builder.createZExtOrTrunc(stackPop(), Context.Int64Ty);
3960
7
    auto Addr = Builder.createZExt(Stack.back(), Context.Int64Ty);
3961
7
    if (MemoryOffset != 0) {
3962
3
      Addr = Builder.createAdd(Addr, LLContext.getInt64(MemoryOffset));
3963
3
    }
3964
7
    compileAtomicCheckOffsetAlignment(Addr, TargetType);
3965
7
    auto Offset = stackPop();
3966
3967
7
    stackPush(Builder.createCall(
3968
7
        Context.getIntrinsic(
3969
7
            Builder, Executable::Intrinsics::kMemAtomicWait,
3970
7
            LLVM::Type::getFunctionType(Context.Int32Ty,
3971
7
                                        {Context.Int32Ty, Context.Int32Ty,
3972
7
                                         Context.Int64Ty, Context.Int64Ty,
3973
7
                                         Context.Int32Ty},
3974
7
                                        false)),
3975
7
        {LLContext.getInt32(MemoryIndex), Offset, ExpectedValue, Timeout,
3976
7
         LLContext.getInt32(BitWidth)}));
3977
7
  }
3978
  void compileAtomicLoad(unsigned MemoryIndex, unsigned MemoryOffset,
3979
                         unsigned Alignment, LLVM::Type IntType,
3980
0
                         LLVM::Type TargetType, bool Signed = false) noexcept {
3981
3982
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
3983
0
    if (MemoryOffset != 0) {
3984
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
3985
0
    }
3986
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
3987
0
    auto VPtr = Builder.createInBoundsGEP1(
3988
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
3989
0
        Offset);
3990
3991
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
3992
0
    auto Load = switchEndian(Builder.createLoad(TargetType, Ptr, true));
3993
0
    Load.setAlignment(1 << Alignment);
3994
0
    Load.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
3995
3996
0
    if (Signed) {
3997
0
      Stack.back() = Builder.createSExt(Load, IntType);
3998
0
    } else {
3999
0
      Stack.back() = Builder.createZExt(Load, IntType);
4000
0
    }
4001
0
  }
4002
  void compileAtomicStore(unsigned MemoryIndex, unsigned MemoryOffset,
4003
                          unsigned Alignment, LLVM::Type, LLVM::Type TargetType,
4004
0
                          bool Signed = false) noexcept {
4005
0
    auto V = stackPop();
4006
4007
0
    if (Signed) {
4008
0
      V = Builder.createSExtOrTrunc(V, TargetType);
4009
0
    } else {
4010
0
      V = Builder.createZExtOrTrunc(V, TargetType);
4011
0
    }
4012
0
    V = switchEndian(V);
4013
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4014
0
    if (MemoryOffset != 0) {
4015
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4016
0
    }
4017
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4018
0
    auto VPtr = Builder.createInBoundsGEP1(
4019
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4020
0
        Offset);
4021
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4022
0
    auto Store = Builder.createStore(V, Ptr, true);
4023
0
    Store.setAlignment(1 << Alignment);
4024
0
    Store.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
4025
0
  }
4026
4027
  void compileAtomicRMWOp(unsigned MemoryIndex, unsigned MemoryOffset,
4028
                          [[maybe_unused]] unsigned Alignment,
4029
                          LLVMAtomicRMWBinOp BinOp, LLVM::Type IntType,
4030
0
                          LLVM::Type TargetType, bool Signed = false) noexcept {
4031
0
    auto Value = Builder.createSExtOrTrunc(stackPop(), TargetType);
4032
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4033
0
    if (MemoryOffset != 0) {
4034
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4035
0
    }
4036
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4037
0
    auto VPtr = Builder.createInBoundsGEP1(
4038
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4039
0
        Offset);
4040
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4041
4042
0
    LLVM::Value Ret;
4043
    if constexpr (Endian::native == Endian::big) {
4044
      if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpAdd ||
4045
          BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpSub) {
4046
        auto AtomicBB = LLVM::BasicBlock::create(LLContext, F.Fn, "atomic.rmw");
4047
        auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "atomic.rmw.ok");
4048
        Builder.createBr(AtomicBB);
4049
        Builder.positionAtEnd(AtomicBB);
4050
4051
        auto Load = Builder.createLoad(TargetType, Ptr, true);
4052
        Load.setOrdering(LLVMAtomicOrderingMonotonic);
4053
        Load.setAlignment(1 << Alignment);
4054
4055
        LLVM::Value New;
4056
        if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpAdd)
4057
          New = Builder.createAdd(switchEndian(Load), Value);
4058
        else if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpSub) {
4059
          New = Builder.createSub(switchEndian(Load), Value);
4060
        } else {
4061
          assumingUnreachable();
4062
        }
4063
        New = switchEndian(New);
4064
4065
        auto Exchange = Builder.createAtomicCmpXchg(
4066
            Ptr, Load, New, LLVMAtomicOrderingSequentiallyConsistent,
4067
            LLVMAtomicOrderingSequentiallyConsistent);
4068
4069
        Ret = Builder.createExtractValue(Exchange, 0);
4070
        auto Success = Builder.createExtractValue(Exchange, 1);
4071
        Builder.createCondBr(Success, OkBB, AtomicBB);
4072
        Builder.positionAtEnd(OkBB);
4073
      } else {
4074
        Ret = Builder.createAtomicRMW(BinOp, Ptr, switchEndian(Value),
4075
                                      LLVMAtomicOrderingSequentiallyConsistent);
4076
      }
4077
0
    } else {
4078
0
      Ret = Builder.createAtomicRMW(BinOp, Ptr, switchEndian(Value),
4079
0
                                    LLVMAtomicOrderingSequentiallyConsistent);
4080
0
    }
4081
0
    Ret = switchEndian(Ret);
4082
#if LLVM_VERSION_MAJOR >= 13
4083
    Ret.setAlignment(1 << Alignment);
4084
#endif
4085
0
    if (Signed) {
4086
0
      Stack.back() = Builder.createSExt(Ret, IntType);
4087
0
    } else {
4088
0
      Stack.back() = Builder.createZExt(Ret, IntType);
4089
0
    }
4090
0
  }
4091
  void compileAtomicCompareExchange(unsigned MemoryIndex, unsigned MemoryOffset,
4092
                                    [[maybe_unused]] unsigned Alignment,
4093
                                    LLVM::Type IntType, LLVM::Type TargetType,
4094
0
                                    bool Signed = false) noexcept {
4095
4096
0
    auto Replacement = Builder.createSExtOrTrunc(stackPop(), TargetType);
4097
0
    auto Expected = Builder.createSExtOrTrunc(stackPop(), TargetType);
4098
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4099
0
    if (MemoryOffset != 0) {
4100
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4101
0
    }
4102
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4103
0
    auto VPtr = Builder.createInBoundsGEP1(
4104
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4105
0
        Offset);
4106
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4107
4108
0
    auto Ret = Builder.createAtomicCmpXchg(
4109
0
        Ptr, switchEndian(Expected), switchEndian(Replacement),
4110
0
        LLVMAtomicOrderingSequentiallyConsistent,
4111
0
        LLVMAtomicOrderingSequentiallyConsistent);
4112
#if LLVM_VERSION_MAJOR >= 13
4113
    Ret.setAlignment(1 << Alignment);
4114
#endif
4115
0
    auto OldVal = Builder.createExtractValue(Ret, 0);
4116
0
    OldVal = switchEndian(OldVal);
4117
0
    if (Signed) {
4118
0
      Stack.back() = Builder.createSExt(OldVal, IntType);
4119
0
    } else {
4120
0
      Stack.back() = Builder.createZExt(OldVal, IntType);
4121
0
    }
4122
0
  }
4123
4124
11.5k
  void compileReturn() noexcept {
4125
11.5k
    updateInstrCount();
4126
11.5k
    updateGas();
4127
11.5k
    auto Ty = F.Ty.getReturnType();
4128
11.5k
    if (Ty.isVoidTy()) {
4129
2.01k
      Builder.createRetVoid();
4130
9.48k
    } else if (Ty.isStructTy()) {
4131
342
      const auto Count = Ty.getStructNumElements();
4132
342
      std::vector<LLVM::Value> Ret(Count);
4133
1.27k
      for (unsigned I = 0; I < Count; ++I) {
4134
935
        const unsigned J = Count - 1 - I;
4135
935
        Ret[J] = stackPop();
4136
935
      }
4137
342
      Builder.createAggregateRet(Ret);
4138
9.14k
    } else {
4139
9.14k
      Builder.createRet(stackPop());
4140
9.14k
    }
4141
11.5k
  }
4142
4143
19.4k
  void updateInstrCount() noexcept {
4144
19.4k
    if (LocalInstrCount) {
4145
0
      auto Store [[maybe_unused]] = Builder.createAtomicRMW(
4146
0
          LLVMAtomicRMWBinOpAdd, Context.getInstrCount(Builder, ExecCtx),
4147
0
          Builder.createLoad(Context.Int64Ty, LocalInstrCount),
4148
0
          LLVMAtomicOrderingMonotonic);
4149
#if LLVM_VERSION_MAJOR >= 13
4150
      Store.setAlignment(8);
4151
#endif
4152
0
      Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
4153
0
    }
4154
19.4k
  }
4155
4156
21.3k
  void updateGas() noexcept {
4157
21.3k
    if (LocalGas) {
4158
0
      auto CurrBB = Builder.getInsertBlock();
4159
0
      auto CheckBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_check");
4160
0
      auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_ok");
4161
0
      auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_end");
4162
4163
0
      auto Cost = Builder.createLoad(Context.Int64Ty, LocalGas);
4164
0
      Cost.setAlignment(64);
4165
0
      auto GasPtr = Context.getGas(Builder, ExecCtx);
4166
0
      auto GasLimit = Context.getGasLimit(Builder, ExecCtx);
4167
0
      auto Gas = Builder.createLoad(Context.Int64Ty, GasPtr);
4168
0
      Gas.setAlignment(64);
4169
0
      Gas.setOrdering(LLVMAtomicOrderingMonotonic);
4170
0
      Builder.createBr(CheckBB);
4171
0
      Builder.positionAtEnd(CheckBB);
4172
4173
0
      auto PHIOldGas = Builder.createPHI(Context.Int64Ty);
4174
0
      auto NewGas = Builder.createAdd(PHIOldGas, Cost);
4175
0
      auto IsGasRemain =
4176
0
          Builder.createLikely(Builder.createICmpULE(NewGas, GasLimit));
4177
0
      Builder.createCondBr(IsGasRemain, OkBB,
4178
0
                           getTrapBB(ErrCode::Value::CostLimitExceeded));
4179
0
      Builder.positionAtEnd(OkBB);
4180
4181
0
      auto RGasAndSucceed = Builder.createAtomicCmpXchg(
4182
0
          GasPtr, PHIOldGas, NewGas, LLVMAtomicOrderingMonotonic,
4183
0
          LLVMAtomicOrderingMonotonic);
4184
#if LLVM_VERSION_MAJOR >= 13
4185
      RGasAndSucceed.setAlignment(8);
4186
#endif
4187
0
      RGasAndSucceed.setWeak(true);
4188
0
      auto RGas = Builder.createExtractValue(RGasAndSucceed, 0);
4189
0
      auto Succeed = Builder.createExtractValue(RGasAndSucceed, 1);
4190
0
      Builder.createCondBr(Builder.createLikely(Succeed), EndBB, CheckBB);
4191
0
      Builder.positionAtEnd(EndBB);
4192
4193
0
      Builder.createStore(LLContext.getInt64(0), LocalGas);
4194
4195
0
      PHIOldGas.addIncoming(Gas, CurrBB);
4196
0
      PHIOldGas.addIncoming(RGas, OkBB);
4197
0
    }
4198
21.3k
  }
4199
4200
3.11k
  void updateGasAtTrap() noexcept {
4201
3.11k
    if (LocalGas) {
4202
0
      auto Update [[maybe_unused]] = Builder.createAtomicRMW(
4203
0
          LLVMAtomicRMWBinOpAdd, Context.getGas(Builder, ExecCtx),
4204
0
          Builder.createLoad(Context.Int64Ty, LocalGas),
4205
0
          LLVMAtomicOrderingMonotonic);
4206
#if LLVM_VERSION_MAJOR >= 13
4207
      Update.setAlignment(8);
4208
#endif
4209
0
    }
4210
3.11k
  }
4211
4212
private:
4213
3.48k
  void compileCallOp(const unsigned int FuncIndex) noexcept {
4214
3.48k
    const auto &FuncType =
4215
3.48k
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4216
3.48k
            ->getFuncType();
4217
3.48k
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4218
3.48k
    const auto &ParamTypes = FuncType.getParamTypes();
4219
4220
3.48k
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4221
3.48k
    Args[0] = F.Fn.getFirstParam();
4222
4.29k
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4223
817
      const size_t J = ParamTypes.size() - 1 - I;
4224
817
      Args[J + 1] = stackPop();
4225
817
    }
4226
4227
3.48k
    auto Ret = Builder.createCall(Function, Args);
4228
3.48k
    auto Ty = Ret.getType();
4229
3.48k
    if (Ty.isVoidTy()) {
4230
      // nothing to do
4231
1.80k
    } else if (Ty.isStructTy()) {
4232
156
      for (auto Val : unpackStruct(Builder, Ret)) {
4233
156
        stackPush(Val);
4234
156
      }
4235
1.61k
    } else {
4236
1.61k
      stackPush(Ret);
4237
1.61k
    }
4238
3.48k
  }
4239
4240
  void compileIndirectCallOp(const uint32_t TableIndex,
4241
1.17k
                             const uint32_t FuncTypeIndex) noexcept {
4242
1.17k
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4243
1.17k
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4244
1.17k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4245
4246
1.17k
    LLVM::Value FuncIndex = stackPop();
4247
1.17k
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4248
1.17k
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4249
1.17k
    auto RTy = FTy.getReturnType();
4250
4251
1.17k
    const size_t ArgSize = FuncType.getParamTypes().size();
4252
1.17k
    const size_t RetSize =
4253
1.17k
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4254
1.17k
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4255
1.17k
    ArgsVec[0] = F.Fn.getFirstParam();
4256
2.03k
    for (size_t I = 0; I < ArgSize; ++I) {
4257
862
      const size_t J = ArgSize - I;
4258
862
      ArgsVec[J] = stackPop();
4259
862
    }
4260
4261
1.17k
    std::vector<LLVM::Value> FPtrRetsVec;
4262
1.17k
    FPtrRetsVec.reserve(RetSize);
4263
1.17k
    {
4264
1.17k
      auto FPtr = Builder.createCall(
4265
1.17k
          Context.getIntrinsic(
4266
1.17k
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4267
1.17k
              LLVM::Type::getFunctionType(
4268
1.17k
                  FTy.getPointerTo(),
4269
1.17k
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4270
1.17k
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4271
1.17k
           FuncIndex});
4272
1.17k
      Builder.createCondBr(
4273
1.17k
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4274
1.17k
          NotNullBB, IsNullBB);
4275
1.17k
      Builder.positionAtEnd(NotNullBB);
4276
4277
1.17k
      auto FPtrRet =
4278
1.17k
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4279
1.17k
      if (RetSize == 0) {
4280
        // nothing to do
4281
800
      } else if (RetSize == 1) {
4282
780
        FPtrRetsVec.push_back(FPtrRet);
4283
780
      } else {
4284
40
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4285
40
          FPtrRetsVec.push_back(Val);
4286
40
        }
4287
20
      }
4288
1.17k
    }
4289
4290
1.17k
    Builder.createBr(EndBB);
4291
1.17k
    Builder.positionAtEnd(IsNullBB);
4292
4293
1.17k
    std::vector<LLVM::Value> RetsVec;
4294
1.17k
    {
4295
1.17k
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4296
1.17k
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4297
1.17k
      Builder.createArrayPtrStore(
4298
1.17k
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4299
1.17k
          kValSize);
4300
4301
1.17k
      Builder.createCall(
4302
1.17k
          Context.getIntrinsic(
4303
1.17k
              Builder, Executable::Intrinsics::kCallIndirect,
4304
1.17k
              LLVM::Type::getFunctionType(Context.VoidTy,
4305
1.17k
                                          {Context.Int32Ty, Context.Int32Ty,
4306
1.17k
                                           Context.Int32Ty, Context.Int8PtrTy,
4307
1.17k
                                           Context.Int8PtrTy},
4308
1.17k
                                          false)),
4309
1.17k
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4310
1.17k
           FuncIndex, Args, Rets});
4311
4312
1.17k
      if (RetSize == 0) {
4313
        // nothing to do
4314
800
      } else if (RetSize == 1) {
4315
780
        RetsVec.push_back(
4316
780
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4317
780
      } else {
4318
20
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4319
20
                                             kValSize);
4320
20
      }
4321
1.17k
      Builder.createBr(EndBB);
4322
1.17k
      Builder.positionAtEnd(EndBB);
4323
1.17k
    }
4324
4325
1.99k
    for (unsigned I = 0; I < RetSize; ++I) {
4326
820
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4327
820
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4328
820
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4329
820
      stackPush(PHIRet);
4330
820
    }
4331
1.17k
  }
4332
4333
63
  void compileReturnCallOp(const unsigned int FuncIndex) noexcept {
4334
63
    const auto &FuncType =
4335
63
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4336
63
            ->getFuncType();
4337
63
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4338
63
    const auto &ParamTypes = FuncType.getParamTypes();
4339
4340
63
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4341
63
    Args[0] = F.Fn.getFirstParam();
4342
127
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4343
64
      const size_t J = ParamTypes.size() - 1 - I;
4344
64
      Args[J + 1] = stackPop();
4345
64
    }
4346
4347
63
    auto Ret = Builder.createCall(Function, Args);
4348
63
    auto Ty = Ret.getType();
4349
63
    if (Ty.isVoidTy()) {
4350
1
      Builder.createRetVoid();
4351
62
    } else {
4352
62
      Builder.createRet(Ret);
4353
62
    }
4354
63
  }
4355
4356
  void compileReturnIndirectCallOp(const uint32_t TableIndex,
4357
102
                                   const uint32_t FuncTypeIndex) noexcept {
4358
102
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4359
102
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4360
4361
102
    LLVM::Value FuncIndex = stackPop();
4362
102
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4363
102
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4364
102
    auto RTy = FTy.getReturnType();
4365
4366
102
    const size_t ArgSize = FuncType.getParamTypes().size();
4367
102
    const size_t RetSize =
4368
102
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4369
102
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4370
102
    ArgsVec[0] = F.Fn.getFirstParam();
4371
191
    for (size_t I = 0; I < ArgSize; ++I) {
4372
89
      const size_t J = ArgSize - I;
4373
89
      ArgsVec[J] = stackPop();
4374
89
    }
4375
4376
102
    {
4377
102
      auto FPtr = Builder.createCall(
4378
102
          Context.getIntrinsic(
4379
102
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4380
102
              LLVM::Type::getFunctionType(
4381
102
                  FTy.getPointerTo(),
4382
102
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4383
102
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4384
102
           FuncIndex});
4385
102
      Builder.createCondBr(
4386
102
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4387
102
          NotNullBB, IsNullBB);
4388
102
      Builder.positionAtEnd(NotNullBB);
4389
4390
102
      auto FPtrRet =
4391
102
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4392
102
      if (RetSize == 0) {
4393
17
        Builder.createRetVoid();
4394
85
      } else {
4395
85
        Builder.createRet(FPtrRet);
4396
85
      }
4397
102
    }
4398
4399
102
    Builder.positionAtEnd(IsNullBB);
4400
4401
102
    {
4402
102
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4403
102
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4404
102
      Builder.createArrayPtrStore(
4405
102
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4406
102
          kValSize);
4407
4408
102
      Builder.createCall(
4409
102
          Context.getIntrinsic(
4410
102
              Builder, Executable::Intrinsics::kCallIndirect,
4411
102
              LLVM::Type::getFunctionType(Context.VoidTy,
4412
102
                                          {Context.Int32Ty, Context.Int32Ty,
4413
102
                                           Context.Int32Ty, Context.Int8PtrTy,
4414
102
                                           Context.Int8PtrTy},
4415
102
                                          false)),
4416
102
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4417
102
           FuncIndex, Args, Rets});
4418
4419
102
      if (RetSize == 0) {
4420
17
        Builder.createRetVoid();
4421
85
      } else if (RetSize == 1) {
4422
81
        Builder.createRet(
4423
81
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4424
81
      } else {
4425
4
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4426
4
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4427
4
      }
4428
102
    }
4429
102
  }
4430
4431
7
  void compileCallRefOp(const unsigned int TypeIndex) noexcept {
4432
7
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4433
7
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4434
7
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4435
4436
7
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4437
7
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4438
7
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4439
7
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4440
7
        LLContext.getInt64(0)));
4441
7
    Builder.createCondBr(IsRefNotNull, OkBB,
4442
7
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4443
7
    Builder.positionAtEnd(OkBB);
4444
4445
7
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4446
7
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4447
7
    auto RTy = FTy.getReturnType();
4448
4449
7
    const size_t ArgSize = FuncType.getParamTypes().size();
4450
7
    const size_t RetSize =
4451
7
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4452
7
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4453
7
    ArgsVec[0] = F.Fn.getFirstParam();
4454
7
    for (size_t I = 0; I < ArgSize; ++I) {
4455
0
      const size_t J = ArgSize - I;
4456
0
      ArgsVec[J] = stackPop();
4457
0
    }
4458
4459
7
    std::vector<LLVM::Value> FPtrRetsVec;
4460
7
    FPtrRetsVec.reserve(RetSize);
4461
7
    {
4462
7
      auto FPtr = Builder.createCall(
4463
7
          Context.getIntrinsic(
4464
7
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4465
7
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4466
7
                                          {Context.Int64x2Ty}, false)),
4467
7
          {Ref});
4468
7
      Builder.createCondBr(
4469
7
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4470
7
          NotNullBB, IsNullBB);
4471
7
      Builder.positionAtEnd(NotNullBB);
4472
4473
7
      auto FPtrRet =
4474
7
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4475
7
      if (RetSize == 0) {
4476
        // nothing to do
4477
6
      } else if (RetSize == 1) {
4478
6
        FPtrRetsVec.push_back(FPtrRet);
4479
6
      } else {
4480
0
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4481
0
          FPtrRetsVec.push_back(Val);
4482
0
        }
4483
0
      }
4484
7
    }
4485
4486
7
    Builder.createBr(EndBB);
4487
7
    Builder.positionAtEnd(IsNullBB);
4488
4489
7
    std::vector<LLVM::Value> RetsVec;
4490
7
    {
4491
7
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4492
7
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4493
7
      Builder.createArrayPtrStore(
4494
7
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4495
7
          kValSize);
4496
4497
7
      Builder.createCall(
4498
7
          Context.getIntrinsic(
4499
7
              Builder, Executable::Intrinsics::kCallRef,
4500
7
              LLVM::Type::getFunctionType(
4501
7
                  Context.VoidTy,
4502
7
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4503
7
                  false)),
4504
7
          {Ref, Args, Rets});
4505
4506
7
      if (RetSize == 0) {
4507
        // nothing to do
4508
6
      } else if (RetSize == 1) {
4509
6
        RetsVec.push_back(
4510
6
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4511
6
      } else {
4512
0
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4513
0
                                             kValSize);
4514
0
      }
4515
7
      Builder.createBr(EndBB);
4516
7
      Builder.positionAtEnd(EndBB);
4517
7
    }
4518
4519
13
    for (unsigned I = 0; I < RetSize; ++I) {
4520
6
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4521
6
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4522
6
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4523
6
      stackPush(PHIRet);
4524
6
    }
4525
7
  }
4526
4527
2
  void compileReturnCallRefOp(const unsigned int TypeIndex) noexcept {
4528
2
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4529
2
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4530
4531
2
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4532
2
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4533
2
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4534
2
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4535
2
        LLContext.getInt64(0)));
4536
2
    Builder.createCondBr(IsRefNotNull, OkBB,
4537
2
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4538
2
    Builder.positionAtEnd(OkBB);
4539
4540
2
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4541
2
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4542
2
    auto RTy = FTy.getReturnType();
4543
4544
2
    const size_t ArgSize = FuncType.getParamTypes().size();
4545
2
    const size_t RetSize =
4546
2
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4547
2
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4548
2
    ArgsVec[0] = F.Fn.getFirstParam();
4549
2
    for (size_t I = 0; I < ArgSize; ++I) {
4550
0
      const size_t J = ArgSize - I;
4551
0
      ArgsVec[J] = stackPop();
4552
0
    }
4553
4554
2
    {
4555
2
      auto FPtr = Builder.createCall(
4556
2
          Context.getIntrinsic(
4557
2
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4558
2
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4559
2
                                          {Context.Int64x2Ty}, false)),
4560
2
          {Ref});
4561
2
      Builder.createCondBr(
4562
2
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4563
2
          NotNullBB, IsNullBB);
4564
2
      Builder.positionAtEnd(NotNullBB);
4565
4566
2
      auto FPtrRet =
4567
2
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4568
2
      if (RetSize == 0) {
4569
1
        Builder.createRetVoid();
4570
1
      } else {
4571
1
        Builder.createRet(FPtrRet);
4572
1
      }
4573
2
    }
4574
4575
2
    Builder.positionAtEnd(IsNullBB);
4576
4577
2
    {
4578
2
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4579
2
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4580
2
      Builder.createArrayPtrStore(
4581
2
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4582
2
          kValSize);
4583
4584
2
      Builder.createCall(
4585
2
          Context.getIntrinsic(
4586
2
              Builder, Executable::Intrinsics::kCallRef,
4587
2
              LLVM::Type::getFunctionType(
4588
2
                  Context.VoidTy,
4589
2
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4590
2
                  false)),
4591
2
          {Ref, Args, Rets});
4592
4593
2
      if (RetSize == 0) {
4594
1
        Builder.createRetVoid();
4595
1
      } else if (RetSize == 1) {
4596
1
        Builder.createRet(
4597
1
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4598
1
      } else {
4599
0
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4600
0
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4601
0
      }
4602
2
    }
4603
2
  }
4604
4605
  void compileLoadOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4606
18.8k
                     LLVM::Type LoadTy) noexcept {
4607
18.8k
    if constexpr (kForceUnalignment) {
4608
18.8k
      Alignment = 0;
4609
18.8k
    }
4610
18.8k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4611
18.8k
    if (Offset != 0) {
4612
12.3k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4613
12.3k
    }
4614
4615
18.8k
    auto VPtr = Builder.createInBoundsGEP1(
4616
18.8k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4617
18.8k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4618
18.8k
    auto LoadInst = Builder.createLoad(LoadTy, Ptr, true);
4619
18.8k
    LoadInst.setAlignment(1 << Alignment);
4620
18.8k
    stackPush(switchEndian(LoadInst));
4621
18.8k
  }
4622
  void compileLoadOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4623
                     LLVM::Type LoadTy, LLVM::Type ExtendTy,
4624
7.72k
                     bool Signed) noexcept {
4625
7.72k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4626
7.72k
    if (Signed) {
4627
3.31k
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4628
4.40k
    } else {
4629
4.40k
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4630
4.40k
    }
4631
7.72k
  }
4632
  void compileVectorLoadOp(unsigned MemoryIndex, unsigned Offset,
4633
4.84k
                           unsigned Alignment, LLVM::Type LoadTy) noexcept {
4634
4.84k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4635
4.84k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4636
4.84k
  }
4637
  void compileVectorLoadOp(unsigned MemoryIndex, unsigned Offset,
4638
                           unsigned Alignment, LLVM::Type LoadTy,
4639
1.74k
                           LLVM::Type ExtendTy, bool Signed) noexcept {
4640
1.74k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy, ExtendTy, Signed);
4641
1.74k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4642
1.74k
  }
4643
  void compileSplatLoadOp(unsigned MemoryIndex, unsigned Offset,
4644
                          unsigned Alignment, LLVM::Type LoadTy,
4645
604
                          LLVM::Type VectorTy) noexcept {
4646
604
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4647
604
    compileSplatOp(VectorTy);
4648
604
  }
4649
  void compileLoadLaneOp(unsigned MemoryIndex, unsigned Offset,
4650
                         unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4651
434
                         LLVM::Type VectorTy) noexcept {
4652
434
    auto Vector = stackPop();
4653
434
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4654
    if constexpr (Endian::native == Endian::big) {
4655
      Index = VectorTy.getVectorSize() - 1 - Index;
4656
    }
4657
434
    auto Value = Stack.back();
4658
434
    Stack.back() = Builder.createBitCast(
4659
434
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4660
434
                                    Value, LLContext.getInt64(Index)),
4661
434
        Context.Int64x2Ty);
4662
434
  }
4663
  void compileStoreOp(unsigned MemoryIndex, unsigned Offset, unsigned Alignment,
4664
                      LLVM::Type LoadTy, bool Trunc = false,
4665
3.40k
                      bool BitCast = false) noexcept {
4666
3.40k
    if constexpr (kForceUnalignment) {
4667
3.40k
      Alignment = 0;
4668
3.40k
    }
4669
3.40k
    auto V = stackPop();
4670
3.40k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4671
3.40k
    if (Offset != 0) {
4672
2.52k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4673
2.52k
    }
4674
4675
3.40k
    if (Trunc) {
4676
675
      V = Builder.createTrunc(V, LoadTy);
4677
675
    }
4678
3.40k
    if (BitCast) {
4679
282
      V = Builder.createBitCast(V, LoadTy);
4680
282
    }
4681
3.40k
    V = switchEndian(V);
4682
3.40k
    auto VPtr = Builder.createInBoundsGEP1(
4683
3.40k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4684
3.40k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4685
3.40k
    auto StoreInst = Builder.createStore(V, Ptr, true);
4686
3.40k
    StoreInst.setAlignment(1 << Alignment);
4687
3.40k
  }
4688
  void compileStoreLaneOp(unsigned MemoryIndex, unsigned Offset,
4689
                          unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4690
386
                          LLVM::Type VectorTy) noexcept {
4691
386
    auto Vector = Stack.back();
4692
    if constexpr (Endian::native == Endian::big) {
4693
      Index = VectorTy.getVectorSize() - Index - 1;
4694
    }
4695
386
    Stack.back() = Builder.createExtractElement(
4696
386
        Builder.createBitCast(Vector, VectorTy), LLContext.getInt64(Index));
4697
386
    compileStoreOp(MemoryIndex, Offset, Alignment, LoadTy);
4698
386
  }
4699
50.5k
  void compileSplatOp(LLVM::Type VectorTy) noexcept {
4700
50.5k
    auto Undef = LLVM::Value::getUndef(VectorTy);
4701
50.5k
    auto Zeros = LLVM::Value::getConstNull(
4702
50.5k
        LLVM::Type::getVectorType(Context.Int32Ty, VectorTy.getVectorSize()));
4703
50.5k
    auto Value = Builder.createTrunc(Stack.back(), VectorTy.getElementType());
4704
50.5k
    auto Vector =
4705
50.5k
        Builder.createInsertElement(Undef, Value, LLContext.getInt64(0));
4706
50.5k
    Vector = Builder.createShuffleVector(Vector, Undef, Zeros);
4707
4708
50.5k
    Stack.back() = Builder.createBitCast(Vector, Context.Int64x2Ty);
4709
50.5k
  }
4710
1.37k
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4711
1.37k
    auto Vector = Builder.createBitCast(Stack.back(), VectorTy);
4712
    if constexpr (Endian::native == Endian::big) {
4713
      Index = VectorTy.getVectorSize() - Index - 1;
4714
    }
4715
1.37k
    Stack.back() =
4716
1.37k
        Builder.createExtractElement(Vector, LLContext.getInt64(Index));
4717
1.37k
  }
4718
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index,
4719
1.04k
                            LLVM::Type ExtendTy, bool Signed) noexcept {
4720
1.04k
    compileExtractLaneOp(VectorTy, Index);
4721
1.04k
    if (Signed) {
4722
560
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4723
560
    } else {
4724
483
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4725
483
    }
4726
1.04k
  }
4727
1.26k
  void compileReplaceLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4728
1.26k
    auto Value = Builder.createTrunc(stackPop(), VectorTy.getElementType());
4729
1.26k
    auto Vector = Stack.back();
4730
    if constexpr (Endian::native == Endian::big) {
4731
      Index = VectorTy.getVectorSize() - Index - 1;
4732
    }
4733
1.26k
    Stack.back() = Builder.createBitCast(
4734
1.26k
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4735
1.26k
                                    Value, LLContext.getInt64(Index)),
4736
1.26k
        Context.Int64x2Ty);
4737
1.26k
  }
4738
  void compileVectorCompareOp(LLVM::Type VectorTy,
4739
5.19k
                              LLVMIntPredicate Predicate) noexcept {
4740
5.19k
    auto RHS = stackPop();
4741
5.19k
    auto LHS = stackPop();
4742
5.19k
    auto Result = Builder.createSExt(
4743
5.19k
        Builder.createICmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4744
5.19k
                           Builder.createBitCast(RHS, VectorTy)),
4745
5.19k
        VectorTy);
4746
5.19k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4747
5.19k
  }
4748
  void compileVectorCompareOp(LLVM::Type VectorTy, LLVMRealPredicate Predicate,
4749
3.13k
                              LLVM::Type ResultTy) noexcept {
4750
3.13k
    auto RHS = stackPop();
4751
3.13k
    auto LHS = stackPop();
4752
3.13k
    auto Result = Builder.createSExt(
4753
3.13k
        Builder.createFCmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4754
3.13k
                           Builder.createBitCast(RHS, VectorTy)),
4755
3.13k
        ResultTy);
4756
3.13k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4757
3.13k
  }
4758
  template <typename Func>
4759
24.8k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
24.8k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
24.8k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
24.8k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
2.12k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
2.12k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
2.12k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
2.12k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
2.48k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
2.48k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
2.48k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
2.48k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
105
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
105
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
105
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
105
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
2.21k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
2.21k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
2.21k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
2.21k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
541
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
541
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
541
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
541
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
783
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
783
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
783
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
783
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
319
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
319
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
319
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
319
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
1.34k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
1.34k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
1.34k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
1.34k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
2.37k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
2.37k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
2.37k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
2.37k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
1.70k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
1.70k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
1.70k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
1.70k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
377
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
377
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
377
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
377
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
920
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
920
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
920
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
920
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
5.80k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
5.80k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
5.80k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
5.80k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
666
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
666
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
666
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
666
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
1.89k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
1.89k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
1.89k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
1.89k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
595
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
595
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
595
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
595
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4759
625
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4760
625
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4761
625
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4762
625
  }
4763
2.12k
  void compileVectorAbs(LLVM::Type VectorTy) noexcept {
4764
2.12k
    compileVectorOp(VectorTy, [this, VectorTy](auto V) noexcept {
4765
2.12k
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4766
2.12k
      auto C = Builder.createICmpSLT(V, Zero);
4767
2.12k
      return Builder.createSelect(C, Builder.createNeg(V), V);
4768
2.12k
    });
4769
2.12k
  }
4770
2.48k
  void compileVectorNeg(LLVM::Type VectorTy) noexcept {
4771
2.48k
    compileVectorOp(VectorTy,
4772
2.48k
                    [this](auto V) noexcept { return Builder.createNeg(V); });
4773
2.48k
  }
4774
105
  void compileVectorPopcnt() noexcept {
4775
105
    compileVectorOp(Context.Int8x16Ty, [this](auto V) noexcept {
4776
105
      assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
4777
105
      return Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, V);
4778
105
    });
4779
105
  }
4780
  template <typename Func>
4781
2.33k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4782
2.33k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4783
2.33k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4784
2.33k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}&&)
Line
Count
Source
4781
106
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4782
106
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4783
106
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4784
106
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4781
963
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4782
963
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4783
963
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4784
963
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4781
1.27k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4782
1.27k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4783
1.27k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4784
1.27k
  }
4785
106
  void compileVectorAnyTrue() noexcept {
4786
106
    compileVectorReduceIOp(Context.Int128x1Ty, [this](auto V) noexcept {
4787
106
      auto Zero = LLVM::Value::getConstNull(Context.Int128x1Ty);
4788
106
      return Builder.createBitCast(Builder.createICmpNE(V, Zero),
4789
106
                                   LLContext.getInt1Ty());
4790
106
    });
4791
106
  }
4792
963
  void compileVectorAllTrue(LLVM::Type VectorTy) noexcept {
4793
963
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4794
963
      const auto Size = VectorTy.getVectorSize();
4795
963
      auto IntType = LLContext.getIntNTy(Size);
4796
963
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4797
963
      auto Cmp = Builder.createBitCast(Builder.createICmpEQ(V, Zero), IntType);
4798
963
      auto CmpZero = LLVM::Value::getConstInt(IntType, 0);
4799
963
      return Builder.createICmpEQ(Cmp, CmpZero);
4800
963
    });
4801
963
  }
4802
1.27k
  void compileVectorBitMask(LLVM::Type VectorTy) noexcept {
4803
1.27k
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4804
1.27k
      const auto Size = VectorTy.getVectorSize();
4805
1.27k
      auto IntType = LLContext.getIntNTy(Size);
4806
1.27k
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4807
1.27k
      return Builder.createBitCast(Builder.createICmpSLT(V, Zero), IntType);
4808
1.27k
    });
4809
1.27k
  }
4810
  template <typename Func>
4811
4.82k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4812
4.82k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4813
4.82k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4814
4.82k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4815
4.82k
    auto RHS = Builder.createVectorSplat(
4816
4.82k
        VectorTy.getVectorSize(),
4817
4.82k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4818
4.82k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4819
4.82k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4820
4.82k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4821
4.82k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4811
1.84k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4812
1.84k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4813
1.84k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4814
1.84k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4815
1.84k
    auto RHS = Builder.createVectorSplat(
4816
1.84k
        VectorTy.getVectorSize(),
4817
1.84k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4818
1.84k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4819
1.84k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4820
1.84k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4821
1.84k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4811
2.19k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4812
2.19k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4813
2.19k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4814
2.19k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4815
2.19k
    auto RHS = Builder.createVectorSplat(
4816
2.19k
        VectorTy.getVectorSize(),
4817
2.19k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4818
2.19k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4819
2.19k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4820
2.19k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4821
2.19k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4811
777
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4812
777
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4813
777
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4814
777
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4815
777
    auto RHS = Builder.createVectorSplat(
4816
777
        VectorTy.getVectorSize(),
4817
777
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
4818
777
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
4819
777
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4820
777
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4821
777
  }
4822
1.84k
  void compileVectorShl(LLVM::Type VectorTy) noexcept {
4823
1.84k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4824
1.84k
      return Builder.createShl(LHS, RHS);
4825
1.84k
    });
4826
1.84k
  }
4827
777
  void compileVectorLShr(LLVM::Type VectorTy) noexcept {
4828
777
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4829
777
      return Builder.createLShr(LHS, RHS);
4830
777
    });
4831
777
  }
4832
2.19k
  void compileVectorAShr(LLVM::Type VectorTy) noexcept {
4833
2.19k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4834
2.19k
      return Builder.createAShr(LHS, RHS);
4835
2.19k
    });
4836
2.19k
  }
4837
  template <typename Func>
4838
8.24k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
8.24k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
8.24k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
8.24k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
8.24k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
414
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
414
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
414
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
414
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
414
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
1.47k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
1.47k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
1.47k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
1.47k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
1.47k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
788
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
788
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
788
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
788
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
788
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
401
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
401
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
401
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
401
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
401
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
297
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
297
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
297
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
297
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
297
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
303
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
303
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
303
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
303
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
303
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
409
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
409
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
409
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
409
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
409
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
987
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
987
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
987
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
987
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
987
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
279
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
279
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
279
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
279
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
279
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
429
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
429
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
429
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
429
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
429
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
162
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
162
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
162
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
162
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
162
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
180
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
180
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
180
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
180
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
180
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
460
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
460
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
460
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
460
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
460
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
248
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
248
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
248
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
248
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
248
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
204
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
204
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
204
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
204
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
204
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
294
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
294
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
294
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
294
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
294
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
197
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
197
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
197
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
197
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
197
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
387
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
387
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
387
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
387
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
387
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4838
334
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4839
334
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
4840
334
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
4841
334
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
4842
334
  }
4843
414
  void compileVectorVectorAdd(LLVM::Type VectorTy) noexcept {
4844
414
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4845
414
      return Builder.createAdd(LHS, RHS);
4846
414
    });
4847
414
  }
4848
1.47k
  void compileVectorVectorAddSat(LLVM::Type VectorTy, bool Signed) noexcept {
4849
1.47k
    auto ID = Signed ? LLVM::Core::SAddSat : LLVM::Core::UAddSat;
4850
1.47k
    assuming(ID != LLVM::Core::NotIntrinsic);
4851
1.47k
    compileVectorVectorOp(
4852
1.47k
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
4853
1.47k
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
4854
1.47k
        });
4855
1.47k
  }
4856
788
  void compileVectorVectorSub(LLVM::Type VectorTy) noexcept {
4857
788
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4858
788
      return Builder.createSub(LHS, RHS);
4859
788
    });
4860
788
  }
4861
401
  void compileVectorVectorSubSat(LLVM::Type VectorTy, bool Signed) noexcept {
4862
401
    auto ID = Signed ? LLVM::Core::SSubSat : LLVM::Core::USubSat;
4863
401
    assuming(ID != LLVM::Core::NotIntrinsic);
4864
401
    compileVectorVectorOp(
4865
401
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
4866
401
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
4867
401
        });
4868
401
  }
4869
429
  void compileVectorVectorMul(LLVM::Type VectorTy) noexcept {
4870
429
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4871
429
      return Builder.createMul(LHS, RHS);
4872
429
    });
4873
429
  }
4874
99
  void compileVectorSwizzle() noexcept {
4875
99
    auto Index = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
4876
99
    auto Vector = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
4877
4878
99
#if defined(__x86_64__)
4879
99
    if (Context.SupportSSSE3) {
4880
99
      auto Magic = Builder.createVectorSplat(16, LLContext.getInt8(112));
4881
99
      auto Added = Builder.createAdd(Index, Magic);
4882
99
      auto NewIndex = Builder.createSelect(
4883
99
          Builder.createICmpUGT(Index, Added),
4884
99
          LLVM::Value::getConstAllOnes(Context.Int8x16Ty), Added);
4885
99
      assuming(LLVM::Core::X86SSSE3PShufB128 != LLVM::Core::NotIntrinsic);
4886
99
      stackPush(Builder.createBitCast(
4887
99
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PShufB128, {},
4888
99
                                  {Vector, NewIndex}),
4889
99
          Context.Int64x2Ty));
4890
99
      return;
4891
99
    }
4892
0
#endif
4893
4894
#if defined(__aarch64__)
4895
    if (Context.SupportNEON) {
4896
      assuming(LLVM::Core::AArch64NeonTbl1 != LLVM::Core::NotIntrinsic);
4897
      stackPush(Builder.createBitCast(
4898
          Builder.createIntrinsic(LLVM::Core::AArch64NeonTbl1,
4899
                                  {Context.Int8x16Ty}, {Vector, Index}),
4900
          Context.Int64x2Ty));
4901
      return;
4902
    }
4903
#endif
4904
4905
0
    auto Mask = Builder.createVectorSplat(16, LLContext.getInt8(15));
4906
0
    auto Zero = Builder.createVectorSplat(16, LLContext.getInt8(0));
4907
4908
#if defined(__s390x__)
4909
    assuming(LLVM::Core::S390VPerm != LLVM::Core::NotIntrinsic);
4910
    auto Exceed = Builder.createICmpULE(Index, Mask);
4911
    Index = Builder.createSub(Mask, Index);
4912
    auto Result = Builder.createIntrinsic(LLVM::Core::S390VPerm, {},
4913
                                          {Vector, Zero, Index});
4914
    Result = Builder.createSelect(Exceed, Result, Zero);
4915
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4916
    return;
4917
#endif
4918
4919
    // Fallback case.
4920
    // If the SSSE3 is not supported on the x86_64 platform or
4921
    // the NEON is not supported on the aarch64 platform,
4922
    // then fallback to this.
4923
0
    auto IsOver = Builder.createICmpUGT(Index, Mask);
4924
0
    auto InboundIndex = Builder.createAnd(Index, Mask);
4925
0
    auto Array = Builder.createArray(16, 1);
4926
0
    for (size_t I = 0; I < 16; ++I) {
4927
0
      Builder.createStore(
4928
0
          Builder.createExtractElement(Vector, LLContext.getInt64(I)),
4929
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array,
4930
0
                                     LLContext.getInt64(I)));
4931
0
    }
4932
0
    LLVM::Value Ret = LLVM::Value::getUndef(Context.Int8x16Ty);
4933
0
    for (size_t I = 0; I < 16; ++I) {
4934
0
      auto Idx =
4935
0
          Builder.createExtractElement(InboundIndex, LLContext.getInt64(I));
4936
0
      auto Value = Builder.createLoad(
4937
0
          Context.Int8Ty,
4938
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array, Idx));
4939
0
      Ret = Builder.createInsertElement(Ret, Value, LLContext.getInt64(I));
4940
0
    }
4941
0
    Ret = Builder.createSelect(IsOver, Zero, Ret);
4942
0
    stackPush(Builder.createBitCast(Ret, Context.Int64x2Ty));
4943
0
  }
4944
4945
162
  void compileVectorVectorQ15MulSat() noexcept {
4946
162
    compileVectorVectorOp(
4947
162
        Context.Int16x8Ty, [this](auto LHS, auto RHS) noexcept -> LLVM::Value {
4948
162
#if defined(__x86_64__)
4949
162
          if (Context.SupportSSSE3) {
4950
162
            assuming(LLVM::Core::X86SSSE3PMulHrSw128 !=
4951
162
                     LLVM::Core::NotIntrinsic);
4952
162
            auto Result = Builder.createIntrinsic(
4953
162
                LLVM::Core::X86SSSE3PMulHrSw128, {}, {LHS, RHS});
4954
162
            auto IntMaxV = Builder.createVectorSplat(
4955
162
                8, LLContext.getInt16(UINT16_C(0x8000)));
4956
162
            auto NotOver = Builder.createSExt(
4957
162
                Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
4958
162
            return Builder.createXor(Result, NotOver);
4959
162
          }
4960
0
#endif
4961
4962
#if defined(__aarch64__)
4963
          if (Context.SupportNEON) {
4964
            assuming(LLVM::Core::AArch64NeonSQRDMulH !=
4965
                     LLVM::Core::NotIntrinsic);
4966
            return Builder.createBinaryIntrinsic(
4967
                LLVM::Core::AArch64NeonSQRDMulH, LHS, RHS);
4968
          }
4969
#endif
4970
4971
          // Fallback case.
4972
          // If the SSSE3 is not supported on the x86_64 platform or
4973
          // the NEON is not supported on the aarch64 platform,
4974
          // then fallback to this.
4975
0
          auto ExtTy = Context.Int16x8Ty.getExtendedElementVectorType();
4976
0
          auto Offset = Builder.createVectorSplat(
4977
0
              8, LLContext.getInt32(UINT32_C(0x4000)));
4978
0
          auto Shift =
4979
0
              Builder.createVectorSplat(8, LLContext.getInt32(UINT32_C(15)));
4980
0
          auto ExtLHS = Builder.createSExt(LHS, ExtTy);
4981
0
          auto ExtRHS = Builder.createSExt(RHS, ExtTy);
4982
0
          auto Result = Builder.createTrunc(
4983
0
              Builder.createAShr(
4984
0
                  Builder.createAdd(Builder.createMul(ExtLHS, ExtRHS), Offset),
4985
0
                  Shift),
4986
0
              Context.Int16x8Ty);
4987
0
          auto IntMaxV = Builder.createVectorSplat(
4988
0
              8, LLContext.getInt16(UINT16_C(0x8000)));
4989
0
          auto NotOver = Builder.createSExt(
4990
0
              Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
4991
0
          return Builder.createXor(Result, NotOver);
4992
162
        });
4993
162
  }
4994
297
  void compileVectorVectorSMin(LLVM::Type VectorTy) noexcept {
4995
297
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
4996
297
      auto C = Builder.createICmpSLE(LHS, RHS);
4997
297
      return Builder.createSelect(C, LHS, RHS);
4998
297
    });
4999
297
  }
5000
303
  void compileVectorVectorUMin(LLVM::Type VectorTy) noexcept {
5001
303
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5002
303
      auto C = Builder.createICmpULE(LHS, RHS);
5003
303
      return Builder.createSelect(C, LHS, RHS);
5004
303
    });
5005
303
  }
5006
409
  void compileVectorVectorSMax(LLVM::Type VectorTy) noexcept {
5007
409
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5008
409
      auto C = Builder.createICmpSGE(LHS, RHS);
5009
409
      return Builder.createSelect(C, LHS, RHS);
5010
409
    });
5011
409
  }
5012
987
  void compileVectorVectorUMax(LLVM::Type VectorTy) noexcept {
5013
987
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5014
987
      auto C = Builder.createICmpUGE(LHS, RHS);
5015
987
      return Builder.createSelect(C, LHS, RHS);
5016
987
    });
5017
987
  }
5018
279
  void compileVectorVectorUAvgr(LLVM::Type VectorTy) noexcept {
5019
279
    auto ExtendTy = VectorTy.getExtendedElementVectorType();
5020
279
    compileVectorVectorOp(
5021
279
        VectorTy,
5022
279
        [this, VectorTy, ExtendTy](auto LHS, auto RHS) noexcept -> LLVM::Value {
5023
279
#if defined(__x86_64__)
5024
279
          if (Context.SupportSSE2) {
5025
279
            const auto ID = [VectorTy]() noexcept {
5026
279
              switch (VectorTy.getElementType().getIntegerBitWidth()) {
5027
109
              case 8:
5028
109
                return LLVM::Core::X86SSE2PAvgB;
5029
170
              case 16:
5030
170
                return LLVM::Core::X86SSE2PAvgW;
5031
0
              default:
5032
0
                assumingUnreachable();
5033
279
              }
5034
279
            }();
5035
279
            assuming(ID != LLVM::Core::NotIntrinsic);
5036
279
            return Builder.createIntrinsic(ID, {}, {LHS, RHS});
5037
279
          }
5038
0
#endif
5039
5040
#if defined(__aarch64__)
5041
          if (Context.SupportNEON) {
5042
            assuming(LLVM::Core::AArch64NeonURHAdd != LLVM::Core::NotIntrinsic);
5043
            return Builder.createBinaryIntrinsic(LLVM::Core::AArch64NeonURHAdd,
5044
                                                 LHS, RHS);
5045
          }
5046
#endif
5047
5048
          // Fallback case.
5049
          // If the SSE2 is not supported on the x86_64 platform or
5050
          // the NEON is not supported on the aarch64 platform,
5051
          // then fallback to this.
5052
0
          auto EL = Builder.createZExt(LHS, ExtendTy);
5053
0
          auto ER = Builder.createZExt(RHS, ExtendTy);
5054
0
          auto One = Builder.createZExt(
5055
0
              Builder.createVectorSplat(ExtendTy.getVectorSize(),
5056
0
                                        LLContext.getTrue()),
5057
0
              ExtendTy);
5058
0
          return Builder.createTrunc(
5059
0
              Builder.createLShr(
5060
0
                  Builder.createAdd(Builder.createAdd(EL, ER), One), One),
5061
0
              VectorTy);
5062
279
        });
5063
279
  }
5064
660
  void compileVectorNarrow(LLVM::Type FromTy, bool Signed) noexcept {
5065
660
    auto [MinInt,
5066
660
          MaxInt] = [&]() noexcept -> std::tuple<LLVM::Value, LLVM::Value> {
5067
660
      switch (FromTy.getElementType().getIntegerBitWidth()) {
5068
276
      case 16: {
5069
276
        const auto Min =
5070
276
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::min()
5071
276
                                        : std::numeric_limits<uint8_t>::min());
5072
276
        const auto Max =
5073
276
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::max()
5074
276
                                        : std::numeric_limits<uint8_t>::max());
5075
276
        return {LLContext.getInt16(static_cast<uint16_t>(Min)),
5076
276
                LLContext.getInt16(static_cast<uint16_t>(Max))};
5077
0
      }
5078
384
      case 32: {
5079
384
        const auto Min =
5080
384
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::min()
5081
384
                                        : std::numeric_limits<uint16_t>::min());
5082
384
        const auto Max =
5083
384
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::max()
5084
384
                                        : std::numeric_limits<uint16_t>::max());
5085
384
        return {LLContext.getInt32(static_cast<uint32_t>(Min)),
5086
384
                LLContext.getInt32(static_cast<uint32_t>(Max))};
5087
0
      }
5088
0
      default:
5089
0
        assumingUnreachable();
5090
660
      }
5091
660
    }();
5092
660
    const auto Count = FromTy.getVectorSize();
5093
660
    auto VMin = Builder.createVectorSplat(Count, MinInt);
5094
660
    auto VMax = Builder.createVectorSplat(Count, MaxInt);
5095
5096
660
    auto TruncTy = FromTy.getTruncatedElementVectorType();
5097
5098
660
    auto F2 = Builder.createBitCast(stackPop(), FromTy);
5099
660
    F2 = Builder.createSelect(Builder.createICmpSLT(F2, VMin), VMin, F2);
5100
660
    F2 = Builder.createSelect(Builder.createICmpSGT(F2, VMax), VMax, F2);
5101
660
    F2 = Builder.createTrunc(F2, TruncTy);
5102
5103
660
    auto F1 = Builder.createBitCast(stackPop(), FromTy);
5104
660
    F1 = Builder.createSelect(Builder.createICmpSLT(F1, VMin), VMin, F1);
5105
660
    F1 = Builder.createSelect(Builder.createICmpSGT(F1, VMax), VMax, F1);
5106
660
    F1 = Builder.createTrunc(F1, TruncTy);
5107
5108
660
    std::vector<uint32_t> Mask(Count * 2);
5109
660
    std::iota(Mask.begin(), Mask.end(), 0);
5110
660
    auto V = Endian::native == Endian::little
5111
660
                 ? Builder.createShuffleVector(
5112
660
                       F1, F2, LLVM::Value::getConstVector32(LLContext, Mask))
5113
660
                 : Builder.createShuffleVector(
5114
0
                       F2, F1, LLVM::Value::getConstVector32(LLContext, Mask));
5115
660
    stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
5116
660
  }
5117
5.88k
  void compileVectorExtend(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5118
5.88k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5119
5.88k
    const auto Count = FromTy.getVectorSize();
5120
5.88k
    std::vector<uint32_t> Mask(Count / 2);
5121
    if constexpr (Endian::native == Endian::big) {
5122
      Low = !Low;
5123
    }
5124
5.88k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5125
5.88k
    auto R = Builder.createBitCast(Stack.back(), FromTy);
5126
5.88k
    if (Signed) {
5127
2.63k
      R = Builder.createSExt(R, ExtTy);
5128
3.24k
    } else {
5129
3.24k
      R = Builder.createZExt(R, ExtTy);
5130
3.24k
    }
5131
5.88k
    R = Builder.createShuffleVector(
5132
5.88k
        R, LLVM::Value::getUndef(ExtTy),
5133
5.88k
        LLVM::Value::getConstVector32(LLContext, Mask));
5134
5.88k
    Stack.back() = Builder.createBitCast(R, Context.Int64x2Ty);
5135
5.88k
  }
5136
2.05k
  void compileVectorExtMul(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5137
2.05k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5138
2.05k
    const auto Count = FromTy.getVectorSize();
5139
2.05k
    std::vector<uint32_t> Mask(Count / 2);
5140
2.05k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5141
4.11k
    auto Extend = [this, FromTy, Signed, ExtTy, &Mask](LLVM::Value R) noexcept {
5142
4.11k
      R = Builder.createBitCast(R, FromTy);
5143
4.11k
      if (Signed) {
5144
1.99k
        R = Builder.createSExt(R, ExtTy);
5145
2.11k
      } else {
5146
2.11k
        R = Builder.createZExt(R, ExtTy);
5147
2.11k
      }
5148
4.11k
      return Builder.createShuffleVector(
5149
4.11k
          R, LLVM::Value::getUndef(ExtTy),
5150
4.11k
          LLVM::Value::getConstVector32(LLContext, Mask));
5151
4.11k
    };
5152
2.05k
    auto RHS = Extend(stackPop());
5153
2.05k
    auto LHS = Extend(stackPop());
5154
2.05k
    stackPush(
5155
2.05k
        Builder.createBitCast(Builder.createMul(RHS, LHS), Context.Int64x2Ty));
5156
2.05k
  }
5157
2.21k
  void compileVectorExtAddPairwise(LLVM::Type VectorTy, bool Signed) noexcept {
5158
2.21k
    compileVectorOp(
5159
2.21k
        VectorTy, [this, VectorTy, Signed](auto V) noexcept -> LLVM::Value {
5160
2.21k
          auto ExtTy = VectorTy.getExtendedElementVectorType()
5161
2.21k
                           .getHalfElementsVectorType();
5162
2.21k
#if defined(__x86_64__)
5163
2.21k
          const auto Count = VectorTy.getVectorSize();
5164
2.21k
          if (Context.SupportXOP) {
5165
0
            const auto ID = [Count, Signed]() noexcept {
5166
0
              switch (Count) {
5167
0
              case 8:
5168
0
                return Signed ? LLVM::Core::X86XOpVPHAddWD
5169
0
                              : LLVM::Core::X86XOpVPHAddUWD;
5170
0
              case 16:
5171
0
                return Signed ? LLVM::Core::X86XOpVPHAddBW
5172
0
                              : LLVM::Core::X86XOpVPHAddUBW;
5173
0
              default:
5174
0
                assumingUnreachable();
5175
0
              }
5176
0
            }();
5177
0
            assuming(ID != LLVM::Core::NotIntrinsic);
5178
0
            return Builder.createUnaryIntrinsic(ID, V);
5179
0
          }
5180
2.21k
          if (Context.SupportSSSE3 && Count == 16) {
5181
635
            assuming(LLVM::Core::X86SSSE3PMAddUbSw128 !=
5182
635
                     LLVM::Core::NotIntrinsic);
5183
635
            if (Signed) {
5184
308
              return Builder.createIntrinsic(
5185
308
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5186
308
                  {Builder.createVectorSplat(16, LLContext.getInt8(1)), V});
5187
327
            } else {
5188
327
              return Builder.createIntrinsic(
5189
327
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5190
327
                  {V, Builder.createVectorSplat(16, LLContext.getInt8(1))});
5191
327
            }
5192
635
          }
5193
1.57k
          if (Context.SupportSSE2 && Count == 8) {
5194
1.57k
            assuming(LLVM::Core::X86SSE2PMAddWd != LLVM::Core::NotIntrinsic);
5195
1.57k
            if (Signed) {
5196
1.08k
              return Builder.createIntrinsic(
5197
1.08k
                  LLVM::Core::X86SSE2PMAddWd, {},
5198
1.08k
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5199
1.08k
            } else {
5200
497
              V = Builder.createXor(
5201
497
                  V, Builder.createVectorSplat(8, LLContext.getInt16(0x8000)));
5202
497
              V = Builder.createIntrinsic(
5203
497
                  LLVM::Core::X86SSE2PMAddWd, {},
5204
497
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5205
497
              return Builder.createAdd(
5206
497
                  V, Builder.createVectorSplat(4, LLContext.getInt32(0x10000)));
5207
497
            }
5208
1.57k
          }
5209
0
#endif
5210
5211
#if defined(__aarch64__)
5212
          if (Context.SupportNEON) {
5213
            const auto ID = Signed ? LLVM::Core::AArch64NeonSAddLP
5214
                                   : LLVM::Core::AArch64NeonUAddLP;
5215
            assuming(ID != LLVM::Core::NotIntrinsic);
5216
            return Builder.createIntrinsic(ID, {ExtTy, VectorTy}, {V});
5217
          }
5218
#endif
5219
5220
          // Fallback case.
5221
          // If the XOP, SSSE3, or SSE2 is not supported on the x86_64 platform
5222
          // or the NEON is not supported on the aarch64 platform,
5223
          // then fallback to this.
5224
0
          auto Width = LLVM::Value::getConstInt(
5225
0
              ExtTy.getElementType(),
5226
0
              VectorTy.getElementType().getIntegerBitWidth());
5227
0
          Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5228
0
          auto EV = Builder.createBitCast(V, ExtTy);
5229
0
          LLVM::Value L, R;
5230
0
          if (Signed) {
5231
0
            L = Builder.createAShr(EV, Width);
5232
0
            R = Builder.createAShr(Builder.createShl(EV, Width), Width);
5233
0
          } else {
5234
0
            L = Builder.createLShr(EV, Width);
5235
0
            R = Builder.createLShr(Builder.createShl(EV, Width), Width);
5236
0
          }
5237
0
          return Builder.createAdd(L, R);
5238
1.57k
        });
5239
2.21k
  }
5240
541
  void compileVectorFAbs(LLVM::Type VectorTy) noexcept {
5241
541
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5242
541
      assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
5243
541
      return Builder.createUnaryIntrinsic(LLVM::Core::Fabs, V);
5244
541
    });
5245
541
  }
5246
783
  void compileVectorFNeg(LLVM::Type VectorTy) noexcept {
5247
783
    compileVectorOp(VectorTy,
5248
783
                    [this](auto V) noexcept { return Builder.createFNeg(V); });
5249
783
  }
5250
319
  void compileVectorFSqrt(LLVM::Type VectorTy) noexcept {
5251
319
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5252
319
      assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
5253
319
      return Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, V);
5254
319
    });
5255
319
  }
5256
1.34k
  void compileVectorFCeil(LLVM::Type VectorTy) noexcept {
5257
1.34k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5258
1.34k
      assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
5259
1.34k
      return Builder.createUnaryIntrinsic(LLVM::Core::Ceil, V);
5260
1.34k
    });
5261
1.34k
  }
5262
2.37k
  void compileVectorFFloor(LLVM::Type VectorTy) noexcept {
5263
2.37k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5264
2.37k
      assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
5265
2.37k
      return Builder.createUnaryIntrinsic(LLVM::Core::Floor, V);
5266
2.37k
    });
5267
2.37k
  }
5268
1.70k
  void compileVectorFTrunc(LLVM::Type VectorTy) noexcept {
5269
1.70k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5270
1.70k
      assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
5271
1.70k
      return Builder.createUnaryIntrinsic(LLVM::Core::Trunc, V);
5272
1.70k
    });
5273
1.70k
  }
5274
377
  void compileVectorFNearest(LLVM::Type VectorTy) noexcept {
5275
377
    compileVectorOp(VectorTy, [&](auto V) noexcept {
5276
377
#if LLVM_VERSION_MAJOR >= 12 && !defined(__s390x__)
5277
377
      assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
5278
377
      if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
5279
377
        return Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, V);
5280
377
      }
5281
0
#endif
5282
5283
0
#if defined(__x86_64__)
5284
0
      if (Context.SupportSSE4_1) {
5285
0
        const bool IsFloat = VectorTy.getElementType().isFloatTy();
5286
0
        auto ID =
5287
0
            IsFloat ? LLVM::Core::X86SSE41RoundPs : LLVM::Core::X86SSE41RoundPd;
5288
0
        assuming(ID != LLVM::Core::NotIntrinsic);
5289
0
        return Builder.createIntrinsic(ID, {}, {V, LLContext.getInt32(8)});
5290
0
      }
5291
0
#endif
5292
5293
#if defined(__aarch64__)
5294
      if (Context.SupportNEON &&
5295
          LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
5296
        return Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, V);
5297
      }
5298
#endif
5299
5300
      // Fallback case.
5301
      // If the SSE4.1 is not supported on the x86_64 platform or
5302
      // the NEON is not supported on the aarch64 platform,
5303
      // then fallback to this.
5304
0
      assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
5305
0
      return Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, V);
5306
0
    });
5307
377
  }
5308
180
  void compileVectorVectorFAdd(LLVM::Type VectorTy) noexcept {
5309
180
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5310
180
      return Builder.createFAdd(LHS, RHS);
5311
180
    });
5312
180
  }
5313
460
  void compileVectorVectorFSub(LLVM::Type VectorTy) noexcept {
5314
460
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5315
460
      return Builder.createFSub(LHS, RHS);
5316
460
    });
5317
460
  }
5318
248
  void compileVectorVectorFMul(LLVM::Type VectorTy) noexcept {
5319
248
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5320
248
      return Builder.createFMul(LHS, RHS);
5321
248
    });
5322
248
  }
5323
204
  void compileVectorVectorFDiv(LLVM::Type VectorTy) noexcept {
5324
204
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5325
204
      return Builder.createFDiv(LHS, RHS);
5326
204
    });
5327
204
  }
5328
294
  void compileVectorVectorFMin(LLVM::Type VectorTy) noexcept {
5329
294
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5330
294
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5331
294
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5332
294
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5333
294
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5334
294
      auto Ret = Builder.createBitCast(
5335
294
          Builder.createOr(Builder.createBitCast(LHS, Context.Int64x2Ty),
5336
294
                           Builder.createBitCast(RHS, Context.Int64x2Ty)),
5337
294
          LHS.getType());
5338
294
      Ret = Builder.createSelect(OGT, RHS, Ret);
5339
294
      Ret = Builder.createSelect(OLT, LHS, Ret);
5340
294
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5341
294
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5342
294
      return Ret;
5343
294
    });
5344
294
  }
5345
197
  void compileVectorVectorFMax(LLVM::Type VectorTy) noexcept {
5346
197
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5347
197
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5348
197
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5349
197
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5350
197
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5351
197
      auto Ret = Builder.createBitCast(
5352
197
          Builder.createAnd(Builder.createBitCast(LHS, Context.Int64x2Ty),
5353
197
                            Builder.createBitCast(RHS, Context.Int64x2Ty)),
5354
197
          LHS.getType());
5355
197
      Ret = Builder.createSelect(OLT, RHS, Ret);
5356
197
      Ret = Builder.createSelect(OGT, LHS, Ret);
5357
197
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5358
197
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5359
197
      return Ret;
5360
197
    });
5361
197
  }
5362
387
  void compileVectorVectorFPMin(LLVM::Type VectorTy) noexcept {
5363
387
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5364
387
      auto Cmp = Builder.createFCmpOLT(RHS, LHS);
5365
387
      return Builder.createSelect(Cmp, RHS, LHS);
5366
387
    });
5367
387
  }
5368
334
  void compileVectorVectorFPMax(LLVM::Type VectorTy) noexcept {
5369
334
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5370
334
      auto Cmp = Builder.createFCmpOGT(RHS, LHS);
5371
334
      return Builder.createSelect(Cmp, RHS, LHS);
5372
334
    });
5373
334
  }
5374
920
  void compileVectorTruncSatS32(LLVM::Type VectorTy, bool PadZero) noexcept {
5375
920
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5376
920
      const auto Size = VectorTy.getVectorSize();
5377
920
      auto FPTy = VectorTy.getElementType();
5378
920
      auto IntMin = LLContext.getInt32(
5379
920
          static_cast<uint32_t>(std::numeric_limits<int32_t>::min()));
5380
920
      auto IntMax = LLContext.getInt32(
5381
920
          static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
5382
920
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5383
920
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5384
920
      auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5385
920
      auto FPMin = Builder.createSIToFP(IntMin, FPTy);
5386
920
      auto FPMax = Builder.createSIToFP(IntMax, FPTy);
5387
920
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5388
920
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5389
5390
920
      auto Normal = Builder.createFCmpORD(V, V);
5391
920
      auto NotUnder = Builder.createFCmpUGE(V, FPMinV);
5392
920
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5393
920
      V = Builder.createFPToSI(
5394
920
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5395
920
      V = Builder.createSelect(Normal, V, IntZeroV);
5396
920
      V = Builder.createSelect(NotUnder, V, IntMinV);
5397
920
      V = Builder.createSelect(NotOver, V, IntMaxV);
5398
920
      if (PadZero) {
5399
748
        std::vector<uint32_t> Mask(Size * 2);
5400
748
        std::iota(Mask.begin(), Mask.end(), 0);
5401
748
        if constexpr (Endian::native == Endian::little) {
5402
748
          V = Builder.createShuffleVector(
5403
748
              V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5404
        } else {
5405
          V = Builder.createShuffleVector(
5406
              IntZeroV, V, LLVM::Value::getConstVector32(LLContext, Mask));
5407
        }
5408
748
      }
5409
920
      return V;
5410
920
    });
5411
920
  }
5412
5.80k
  void compileVectorTruncSatU32(LLVM::Type VectorTy, bool PadZero) noexcept {
5413
5.80k
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5414
5.80k
      const auto Size = VectorTy.getVectorSize();
5415
5.80k
      auto FPTy = VectorTy.getElementType();
5416
5.80k
      auto IntMin = LLContext.getInt32(std::numeric_limits<uint32_t>::min());
5417
5.80k
      auto IntMax = LLContext.getInt32(std::numeric_limits<uint32_t>::max());
5418
5.80k
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5419
5.80k
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5420
5.80k
      auto FPMin = Builder.createUIToFP(IntMin, FPTy);
5421
5.80k
      auto FPMax = Builder.createUIToFP(IntMax, FPTy);
5422
5.80k
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5423
5.80k
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5424
5425
5.80k
      auto NotUnder = Builder.createFCmpOGE(V, FPMinV);
5426
5.80k
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5427
5.80k
      V = Builder.createFPToUI(
5428
5.80k
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5429
5.80k
      V = Builder.createSelect(NotUnder, V, IntMinV);
5430
5.80k
      V = Builder.createSelect(NotOver, V, IntMaxV);
5431
5.80k
      if (PadZero) {
5432
2.12k
        auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5433
2.12k
        std::vector<uint32_t> Mask(Size * 2);
5434
2.12k
        std::iota(Mask.begin(), Mask.end(), 0);
5435
2.12k
        if constexpr (Endian::native == Endian::little) {
5436
2.12k
          V = Builder.createShuffleVector(
5437
2.12k
              V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5438
        } else {
5439
          V = Builder.createShuffleVector(
5440
              IntZeroV, V, LLVM::Value::getConstVector32(LLContext, Mask));
5441
        }
5442
2.12k
      }
5443
5.80k
      return V;
5444
5.80k
    });
5445
5.80k
  }
5446
  void compileVectorConvertS(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5447
666
                             bool Low) noexcept {
5448
666
    compileVectorOp(VectorTy,
5449
666
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5450
666
                      if (Low) {
5451
333
                        const auto Size = VectorTy.getVectorSize() / 2;
5452
333
                        std::vector<uint32_t> Mask(Size);
5453
333
                        if constexpr (Endian::native == Endian::little) {
5454
333
                          std::iota(Mask.begin(), Mask.end(), 0);
5455
                        } else {
5456
                          std::iota(Mask.begin(), Mask.end(), Size);
5457
                        }
5458
333
                        V = Builder.createShuffleVector(
5459
333
                            V, LLVM::Value::getUndef(VectorTy),
5460
333
                            LLVM::Value::getConstVector32(LLContext, Mask));
5461
333
                      }
5462
666
                      return Builder.createSIToFP(V, FPVectorTy);
5463
666
                    });
5464
666
  }
5465
  void compileVectorConvertU(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5466
1.89k
                             bool Low) noexcept {
5467
1.89k
    compileVectorOp(VectorTy,
5468
1.89k
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5469
1.89k
                      if (Low) {
5470
1.20k
                        const auto Size = VectorTy.getVectorSize() / 2;
5471
1.20k
                        std::vector<uint32_t> Mask(Size);
5472
1.20k
                        if constexpr (Endian::native == Endian::little) {
5473
1.20k
                          std::iota(Mask.begin(), Mask.end(), 0);
5474
                        } else {
5475
                          std::iota(Mask.begin(), Mask.end(), Size);
5476
                        }
5477
1.20k
                        V = Builder.createShuffleVector(
5478
1.20k
                            V, LLVM::Value::getUndef(VectorTy),
5479
1.20k
                            LLVM::Value::getConstVector32(LLContext, Mask));
5480
1.20k
                      }
5481
1.89k
                      return Builder.createUIToFP(V, FPVectorTy);
5482
1.89k
                    });
5483
1.89k
  }
5484
595
  void compileVectorDemote() noexcept {
5485
595
    compileVectorOp(Context.Doublex2Ty, [this](auto V) noexcept {
5486
595
      auto Demoted = Builder.createFPTrunc(
5487
595
          V, LLVM::Type::getVectorType(Context.FloatTy, 2));
5488
595
      auto ZeroV = LLVM::Value::getConstNull(Demoted.getType());
5489
595
      if constexpr (Endian::native == Endian::little) {
5490
595
        return Builder.createShuffleVector(
5491
595
            Demoted, ZeroV,
5492
595
            LLVM::Value::getConstVector32(LLContext, {0u, 1u, 2u, 3u}));
5493
      } else {
5494
        return Builder.createShuffleVector(
5495
            Demoted, ZeroV,
5496
            LLVM::Value::getConstVector32(LLContext, {3u, 2u, 1u, 0u}));
5497
      }
5498
595
    });
5499
595
  }
5500
625
  void compileVectorPromote() noexcept {
5501
625
    compileVectorOp(Context.Floatx4Ty, [this](auto V) noexcept {
5502
625
      auto UndefV = LLVM::Value::getUndef(V.getType());
5503
625
      auto Low = Builder.createShuffleVector(
5504
625
          V, UndefV, LLVM::Value::getConstVector32(LLContext, {0u, 1u}));
5505
625
      return Builder.createFPExt(
5506
625
          Low, LLVM::Type::getVectorType(Context.DoubleTy, 2));
5507
625
    });
5508
625
  }
5509
5510
8
  void compileVectorVectorMAdd(LLVM::Type VectorTy) noexcept {
5511
8
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5512
8
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5513
8
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5514
8
    stackPush(Builder.createBitCast(
5515
8
        Builder.createFAdd(Builder.createFMul(LHS, RHS), C),
5516
8
        Context.Int64x2Ty));
5517
8
  }
5518
5519
20
  void compileVectorVectorNMAdd(LLVM::Type VectorTy) noexcept {
5520
20
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5521
20
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5522
20
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5523
20
    stackPush(Builder.createBitCast(
5524
20
        Builder.createFAdd(Builder.createFMul(Builder.createFNeg(LHS), RHS), C),
5525
20
        Context.Int64x2Ty));
5526
20
  }
5527
5528
6
  void compileVectorRelaxedIntegerDotProduct() noexcept {
5529
6
    auto OriTy = Context.Int8x16Ty;
5530
6
    auto ExtTy = Context.Int16x8Ty;
5531
6
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5532
6
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5533
6
#if defined(__x86_64__)
5534
6
    if (Context.SupportSSSE3) {
5535
6
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5536
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5537
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5538
      // side to match the WebAssembly spec
5539
6
      return stackPush(Builder.createBitCast(
5540
6
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5541
6
                                  {RHS, LHS}),
5542
6
          Context.Int64x2Ty));
5543
6
    }
5544
0
#endif
5545
0
    auto Width = LLVM::Value::getConstInt(
5546
0
        ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5547
0
    Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5548
0
    auto EA = Builder.createBitCast(LHS, ExtTy);
5549
0
    auto EB = Builder.createBitCast(RHS, ExtTy);
5550
5551
0
    LLVM::Value AL, AR, BL, BR;
5552
0
    AL = Builder.createAShr(EA, Width);
5553
0
    AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5554
0
    BL = Builder.createAShr(EB, Width);
5555
0
    BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5556
5557
0
    return stackPush(Builder.createBitCast(
5558
0
        Builder.createAdd(Builder.createMul(AL, BL), Builder.createMul(AR, BR)),
5559
0
        Context.Int64x2Ty));
5560
6
  }
5561
5562
8
  void compileVectorRelaxedIntegerDotProductAdd() noexcept {
5563
8
    auto OriTy = Context.Int8x16Ty;
5564
8
    auto ExtTy = Context.Int16x8Ty;
5565
8
    auto FinTy = Context.Int32x4Ty;
5566
8
    auto VC = Builder.createBitCast(stackPop(), FinTy);
5567
8
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5568
8
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5569
8
    LLVM::Value IM;
5570
8
#if defined(__x86_64__)
5571
8
    if (Context.SupportSSSE3) {
5572
8
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5573
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5574
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5575
      // side to match the WebAssembly spec
5576
8
      IM = Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5577
8
                                   {RHS, LHS});
5578
8
    } else
5579
0
#endif
5580
0
    {
5581
0
      auto Width = LLVM::Value::getConstInt(
5582
0
          ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5583
0
      Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5584
0
      auto EA = Builder.createBitCast(LHS, ExtTy);
5585
0
      auto EB = Builder.createBitCast(RHS, ExtTy);
5586
5587
0
      LLVM::Value AL, AR, BL, BR;
5588
0
      AL = Builder.createAShr(EA, Width);
5589
0
      AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5590
0
      BL = Builder.createAShr(EB, Width);
5591
0
      BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5592
0
      IM = Builder.createAdd(Builder.createMul(AL, BL),
5593
0
                             Builder.createMul(AR, BR));
5594
0
    }
5595
5596
8
    auto Width = LLVM::Value::getConstInt(
5597
8
        FinTy.getElementType(), ExtTy.getElementType().getIntegerBitWidth());
5598
8
    Width = Builder.createVectorSplat(FinTy.getVectorSize(), Width);
5599
8
    auto IME = Builder.createBitCast(IM, FinTy);
5600
8
    auto L = Builder.createAShr(IME, Width);
5601
8
    auto R = Builder.createAShr(Builder.createShl(IME, Width), Width);
5602
5603
8
    return stackPush(Builder.createBitCast(
5604
8
        Builder.createAdd(Builder.createAdd(L, R), VC), Context.Int64x2Ty));
5605
8
  }
5606
5607
  void
5608
  enterBlock(LLVM::BasicBlock JumpBlock, LLVM::BasicBlock NextBlock,
5609
             LLVM::BasicBlock ElseBlock, std::vector<LLVM::Value> Args,
5610
             std::pair<std::vector<ValType>, std::vector<ValType>> Type,
5611
             std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5612
21.1k
                 ReturnPHI = {}) noexcept {
5613
21.1k
    assuming(Type.first.size() == Args.size());
5614
21.1k
    for (auto &Value : Args) {
5615
4.50k
      stackPush(Value);
5616
4.50k
    }
5617
21.1k
    const auto Unreachable = isUnreachable();
5618
21.1k
    ControlStack.emplace_back(Stack.size() - Args.size(), Unreachable,
5619
21.1k
                              JumpBlock, NextBlock, ElseBlock, std::move(Args),
5620
21.1k
                              std::move(Type), std::move(ReturnPHI));
5621
21.1k
  }
5622
5623
21.1k
  Control leaveBlock() noexcept {
5624
21.1k
    Control Entry = std::move(ControlStack.back());
5625
21.1k
    ControlStack.pop_back();
5626
5627
21.1k
    auto NextBlock = Entry.NextBlock ? Entry.NextBlock : Entry.JumpBlock;
5628
21.1k
    if (!Entry.Unreachable) {
5629
13.2k
      const auto &ReturnType = Entry.Type.second;
5630
13.2k
      if (!ReturnType.empty()) {
5631
9.90k
        std::vector<LLVM::Value> Rets(ReturnType.size());
5632
20.2k
        for (size_t I = 0; I < Rets.size(); ++I) {
5633
10.3k
          const size_t J = Rets.size() - 1 - I;
5634
10.3k
          Rets[J] = stackPop();
5635
10.3k
        }
5636
9.90k
        Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5637
9.90k
      }
5638
13.2k
      Builder.createBr(NextBlock);
5639
13.2k
    } else {
5640
7.94k
      Builder.createUnreachable();
5641
7.94k
    }
5642
21.1k
    Builder.positionAtEnd(NextBlock);
5643
21.1k
    Stack.erase(Stack.begin() + static_cast<int64_t>(Entry.StackSize),
5644
21.1k
                Stack.end());
5645
21.1k
    return Entry;
5646
21.1k
  }
5647
5648
4.99k
  void checkStop() noexcept {
5649
4.99k
    if (!Interruptible) {
5650
4.99k
      return;
5651
4.99k
    }
5652
0
    auto NotStopBB = LLVM::BasicBlock::create(LLContext, F.Fn, "NotStop");
5653
0
    auto StopToken = Builder.createAtomicRMW(
5654
0
        LLVMAtomicRMWBinOpXchg, Context.getStopToken(Builder, ExecCtx),
5655
0
        LLContext.getInt32(0), LLVMAtomicOrderingMonotonic);
5656
#if LLVM_VERSION_MAJOR >= 13
5657
    StopToken.setAlignment(32);
5658
#endif
5659
0
    auto NotStop = Builder.createLikely(
5660
0
        Builder.createICmpEQ(StopToken, LLContext.getInt32(0)));
5661
0
    Builder.createCondBr(NotStop, NotStopBB,
5662
0
                         getTrapBB(ErrCode::Value::Interrupted));
5663
5664
0
    Builder.positionAtEnd(NotStopBB);
5665
0
  }
5666
5667
5.81k
  void setUnreachable() noexcept {
5668
5.81k
    if (ControlStack.empty()) {
5669
0
      IsUnreachable = true;
5670
5.81k
    } else {
5671
5.81k
      ControlStack.back().Unreachable = true;
5672
5.81k
    }
5673
5.81k
  }
5674
5675
1.55M
  bool isUnreachable() const noexcept {
5676
1.55M
    if (ControlStack.empty()) {
5677
10.7k
      return IsUnreachable;
5678
1.54M
    } else {
5679
1.54M
      return ControlStack.back().Unreachable;
5680
1.54M
    }
5681
1.55M
  }
5682
5683
  void
5684
  buildPHI(Span<const ValType> RetType,
5685
           Span<const std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5686
18.4k
               Incomings) noexcept {
5687
18.4k
    if (isVoidReturn(RetType)) {
5688
5.65k
      return;
5689
5.65k
    }
5690
12.8k
    std::vector<LLVM::Value> Nodes;
5691
12.8k
    if (Incomings.size() == 0) {
5692
2.76k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5693
2.76k
      Nodes.reserve(Types.size());
5694
3.10k
      for (LLVM::Type Type : Types) {
5695
3.10k
        Nodes.push_back(LLVM::Value::getUndef(Type));
5696
3.10k
      }
5697
10.0k
    } else if (Incomings.size() == 1) {
5698
8.90k
      Nodes = std::move(std::get<0>(Incomings.front()));
5699
8.90k
    } else {
5700
1.14k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5701
1.14k
      Nodes.reserve(Types.size());
5702
2.39k
      for (size_t I = 0; I < Types.size(); ++I) {
5703
1.24k
        auto PHIRet = Builder.createPHI(Types[I]);
5704
3.28k
        for (auto &[Value, BB] : Incomings) {
5705
3.28k
          assuming(Value.size() == Types.size());
5706
3.28k
          PHIRet.addIncoming(Value[I], BB);
5707
3.28k
        }
5708
1.24k
        Nodes.push_back(PHIRet);
5709
1.24k
      }
5710
1.14k
    }
5711
13.5k
    for (auto &Val : Nodes) {
5712
13.5k
      stackPush(Val);
5713
13.5k
    }
5714
12.8k
  }
5715
5716
37.5k
  void setLableJumpPHI(unsigned int Index) noexcept {
5717
37.5k
    assuming(Index < ControlStack.size());
5718
37.5k
    auto &Entry = *(ControlStack.rbegin() + Index);
5719
37.5k
    if (Entry.NextBlock) { // is loop
5720
2.14k
      std::vector<LLVM::Value> Args(Entry.Type.first.size());
5721
3.93k
      for (size_t I = 0; I < Args.size(); ++I) {
5722
1.79k
        const size_t J = Args.size() - 1 - I;
5723
1.79k
        Args[J] = stackPop();
5724
1.79k
      }
5725
3.93k
      for (size_t I = 0; I < Args.size(); ++I) {
5726
1.79k
        Entry.Args[I].addIncoming(Args[I], Builder.getInsertBlock());
5727
1.79k
        stackPush(Args[I]);
5728
1.79k
      }
5729
35.4k
    } else if (!Entry.Type.second.empty()) { // has return value
5730
2.06k
      std::vector<LLVM::Value> Rets(Entry.Type.second.size());
5731
4.26k
      for (size_t I = 0; I < Rets.size(); ++I) {
5732
2.20k
        const size_t J = Rets.size() - 1 - I;
5733
2.20k
        Rets[J] = stackPop();
5734
2.20k
      }
5735
4.26k
      for (size_t I = 0; I < Rets.size(); ++I) {
5736
2.20k
        stackPush(Rets[I]);
5737
2.20k
      }
5738
2.06k
      Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5739
2.06k
    }
5740
37.5k
  }
5741
5742
37.5k
  LLVM::BasicBlock getLabel(unsigned int Index) const noexcept {
5743
37.5k
    return (ControlStack.rbegin() + Index)->JumpBlock;
5744
37.5k
  }
5745
5746
936k
  void stackPush(LLVM::Value Value) noexcept { Stack.push_back(Value); }
5747
357k
  LLVM::Value stackPop() noexcept {
5748
357k
    assuming(!ControlStack.empty() || !Stack.empty());
5749
357k
    assuming(ControlStack.empty() ||
5750
357k
             Stack.size() > ControlStack.back().StackSize);
5751
357k
    auto Value = Stack.back();
5752
357k
    Stack.pop_back();
5753
357k
    return Value;
5754
357k
  }
5755
5756
22.2k
  LLVM::Value switchEndian(LLVM::Value Value) {
5757
    if constexpr (Endian::native == Endian::big) {
5758
      auto Type = Value.getType();
5759
      if ((Type.isIntegerTy() && Type.getIntegerBitWidth() > 8) ||
5760
          (Type.isVectorTy() && Type.getVectorSize() == 1)) {
5761
        return Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5762
      }
5763
      if (Type.isVectorTy()) {
5764
        LLVM::Type VecType = Type.getElementType().getIntegerBitWidth() == 128
5765
                                 ? Context.Int128Ty
5766
                                 : Context.Int64Ty;
5767
        Value = Builder.createBitCast(Value, VecType);
5768
        Value = Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5769
        return Builder.createBitCast(Value, Type);
5770
      }
5771
      if (Type.isFloatTy() || Type.isDoubleTy()) {
5772
        LLVM::Type IntType =
5773
            Type.isFloatTy() ? Context.Int32Ty : Context.Int64Ty;
5774
        Value = Builder.createBitCast(Value, IntType);
5775
        Value = Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5776
        return Builder.createBitCast(Value, Type);
5777
      }
5778
    }
5779
22.2k
    return Value;
5780
22.2k
  }
5781
5782
  LLVM::Compiler::CompileContext &Context;
5783
  LLVM::Context LLContext;
5784
  std::vector<std::pair<LLVM::Type, LLVM::Value>> Local;
5785
  std::vector<LLVM::Value> Stack;
5786
  LLVM::Value LocalInstrCount = nullptr;
5787
  LLVM::Value LocalGas = nullptr;
5788
  std::unordered_map<ErrCode::Value, LLVM::BasicBlock> TrapBB;
5789
  bool IsUnreachable = false;
5790
  bool Interruptible = false;
5791
  struct Control {
5792
    size_t StackSize;
5793
    bool Unreachable;
5794
    LLVM::BasicBlock JumpBlock;
5795
    LLVM::BasicBlock NextBlock;
5796
    LLVM::BasicBlock ElseBlock;
5797
    std::vector<LLVM::Value> Args;
5798
    std::pair<std::vector<ValType>, std::vector<ValType>> Type;
5799
    std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5800
        ReturnPHI;
5801
    Control(size_t S, bool U, LLVM::BasicBlock J, LLVM::BasicBlock N,
5802
            LLVM::BasicBlock E, std::vector<LLVM::Value> A,
5803
            std::pair<std::vector<ValType>, std::vector<ValType>> T,
5804
            std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5805
                R) noexcept
5806
21.1k
        : StackSize(S), Unreachable(U), JumpBlock(J), NextBlock(N),
5807
21.1k
          ElseBlock(E), Args(std::move(A)), Type(std::move(T)),
5808
21.1k
          ReturnPHI(std::move(R)) {}
5809
    Control(const Control &) = default;
5810
26.0k
    Control(Control &&) = default;
5811
    Control &operator=(const Control &) = default;
5812
1.00k
    Control &operator=(Control &&) = default;
5813
  };
5814
  std::vector<Control> ControlStack;
5815
  LLVM::FunctionCallee F;
5816
  LLVM::Value ExecCtx;
5817
  LLVM::Builder Builder;
5818
};
5819
5820
std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
5821
406
                                      LLVM::Value Struct) noexcept {
5822
406
  const auto N = Struct.getType().getStructNumElements();
5823
406
  std::vector<LLVM::Value> Ret;
5824
406
  Ret.reserve(N);
5825
1.45k
  for (unsigned I = 0; I < N; ++I) {
5826
1.04k
    Ret.push_back(Builder.createExtractValue(Struct, I));
5827
1.04k
  }
5828
406
  return Ret;
5829
406
}
5830
5831
} // namespace
5832
5833
namespace WasmEdge {
5834
namespace LLVM {
5835
5836
2.21k
Expect<void> Compiler::checkConfigure() noexcept {
5837
  // Note: Although the exception handling proposal and memory64 proposal is not
5838
  // implemented in AOT yet, we should not trap here because the default
5839
  // configuration becomes WASM 3.0 which contains these proposals.
5840
2.21k
  if (Conf.hasProposal(Proposal::ExceptionHandling)) {
5841
2.21k
    spdlog::warn("Proposal Exception Handling is not yet supported in WasmEdge "
5842
2.21k
                 "AOT/JIT. The compilation will be trapped when related data "
5843
2.21k
                 "structure or instructions found in WASM.");
5844
2.21k
  }
5845
2.21k
  if (Conf.hasProposal(Proposal::Memory64)) {
5846
0
    spdlog::warn("Proposal Memory64 is not yet supported in WasmEdge AOT/JIT. "
5847
0
                 "The compilation will be trapped when related data "
5848
0
                 "structure or instructions found in WASM.");
5849
0
  }
5850
2.21k
  if (Conf.hasProposal(Proposal::Annotations)) {
5851
0
    spdlog::error(ErrCode::Value::InvalidAOTConfigure);
5852
0
    spdlog::error("    Proposal Custom Annotation Syntax is not yet supported "
5853
0
                  "in WasmEdge AOT/JIT.");
5854
0
    return Unexpect(ErrCode::Value::InvalidAOTConfigure);
5855
0
  }
5856
2.21k
  return {};
5857
2.21k
}
5858
5859
2.21k
Expect<Data> Compiler::compile(const AST::Module &Module) noexcept {
5860
  // Check the module is validated.
5861
2.21k
  if (unlikely(!Module.getIsValidated())) {
5862
0
    spdlog::error(ErrCode::Value::NotValidated);
5863
0
    return Unexpect(ErrCode::Value::NotValidated);
5864
0
  }
5865
5866
2.21k
  std::unique_lock Lock(Mutex);
5867
2.21k
  spdlog::info("compile start"sv);
5868
5869
2.21k
  LLVM::Core::init();
5870
5871
2.21k
  LLVM::Data D;
5872
2.21k
  auto LLContext = D.extract().getLLContext();
5873
2.21k
  auto &LLModule = D.extract().LLModule;
5874
2.21k
  LLModule.setTarget(LLVM::getDefaultTargetTriple().unwrap());
5875
2.21k
  LLModule.addFlag(LLVMModuleFlagBehaviorError, "PIC Level"sv, 2);
5876
5877
2.21k
  CompileContext NewContext(LLContext, LLModule,
5878
2.21k
                            Conf.getCompilerConfigure().isGenericBinary());
5879
2.21k
  struct RAIICleanup {
5880
2.21k
    RAIICleanup(CompileContext *&Context, CompileContext &NewContext)
5881
2.21k
        : Context(Context) {
5882
2.21k
      Context = &NewContext;
5883
2.21k
    }
5884
2.21k
    ~RAIICleanup() { Context = nullptr; }
5885
2.21k
    CompileContext *&Context;
5886
2.21k
  };
5887
2.21k
  RAIICleanup Cleanup(Context, NewContext);
5888
5889
  // Compile Function Types
5890
2.21k
  compile(Module.getTypeSection());
5891
  // Compile ImportSection
5892
2.21k
  compile(Module.getImportSection());
5893
  // Compile GlobalSection
5894
2.21k
  compile(Module.getGlobalSection());
5895
  // Compile MemorySection (MemorySec, DataSec)
5896
2.21k
  compile(Module.getMemorySection(), Module.getDataSection());
5897
  // Compile TableSection (TableSec, ElemSec)
5898
2.21k
  compile(Module.getTableSection(), Module.getElementSection());
5899
  // compile Functions in module. (FunctionSec, CodeSec)
5900
2.21k
  EXPECTED_TRY(compile(Module.getFunctionSection(), Module.getCodeSection()));
5901
  // Compile ExportSection
5902
2.21k
  compile(Module.getExportSection());
5903
  // StartSection is not required to compile
5904
5905
2.21k
  spdlog::info("verify start"sv);
5906
2.21k
  LLModule.verify(LLVMPrintMessageAction);
5907
5908
2.21k
  spdlog::info("optimize start"sv);
5909
2.21k
  auto &TM = D.extract().TM;
5910
2.21k
  {
5911
2.21k
    auto Triple = LLModule.getTarget();
5912
2.21k
    auto [TheTarget, ErrorMessage] = LLVM::Target::getFromTriple(Triple);
5913
2.21k
    if (ErrorMessage) {
5914
0
      spdlog::error("getFromTriple failed:{}"sv, ErrorMessage.string_view());
5915
0
      return Unexpect(ErrCode::Value::IllegalPath);
5916
2.21k
    } else {
5917
2.21k
      std::string CPUName;
5918
#if defined(__riscv) && __riscv_xlen == 64
5919
      CPUName = "generic-rv64"s;
5920
#else
5921
2.21k
      if (!Conf.getCompilerConfigure().isGenericBinary()) {
5922
2.21k
        CPUName = LLVM::getHostCPUName().string_view();
5923
2.21k
      } else {
5924
0
        CPUName = "generic"s;
5925
0
      }
5926
2.21k
#endif
5927
5928
2.21k
      TM = LLVM::TargetMachine::create(
5929
2.21k
          TheTarget, Triple, CPUName.c_str(),
5930
2.21k
          LLVM::getHostCPUFeatures().unwrap(),
5931
2.21k
          toLLVMCodeGenLevel(
5932
2.21k
              Conf.getCompilerConfigure().getOptimizationLevel()),
5933
2.21k
          LLVMRelocPIC, LLVMCodeModelDefault);
5934
2.21k
    }
5935
5936
#if LLVM_VERSION_MAJOR >= 13
5937
    auto PBO = LLVM::PassBuilderOptions::create();
5938
    if (auto Error = PBO.runPasses(
5939
            LLModule,
5940
            toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel()),
5941
            TM)) {
5942
      spdlog::error("{}"sv, Error.message().string_view());
5943
    }
5944
#else
5945
2.21k
    auto FP = LLVM::PassManager::createForModule(LLModule);
5946
2.21k
    auto MP = LLVM::PassManager::create();
5947
5948
2.21k
    TM.addAnalysisPasses(MP);
5949
2.21k
    TM.addAnalysisPasses(FP);
5950
2.21k
    {
5951
2.21k
      auto PMB = LLVM::PassManagerBuilder::create();
5952
2.21k
      auto [OptLevel, SizeLevel] =
5953
2.21k
          toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel());
5954
2.21k
      PMB.setOptLevel(OptLevel);
5955
2.21k
      PMB.setSizeLevel(SizeLevel);
5956
2.21k
      PMB.populateFunctionPassManager(FP);
5957
2.21k
      PMB.populateModulePassManager(MP);
5958
2.21k
    }
5959
2.21k
    switch (Conf.getCompilerConfigure().getOptimizationLevel()) {
5960
0
    case CompilerConfigure::OptimizationLevel::O0:
5961
0
    case CompilerConfigure::OptimizationLevel::O1:
5962
0
      FP.addTailCallEliminationPass();
5963
0
      break;
5964
2.21k
    default:
5965
2.21k
      break;
5966
2.21k
    }
5967
5968
2.21k
    FP.initializeFunctionPassManager();
5969
24.1k
    for (auto Fn = LLModule.getFirstFunction(); Fn; Fn = Fn.getNextFunction()) {
5970
21.9k
      FP.runFunctionPassManager(Fn);
5971
21.9k
    }
5972
2.21k
    FP.finalizeFunctionPassManager();
5973
2.21k
    MP.runPassManager(LLModule);
5974
2.21k
#endif
5975
2.21k
  }
5976
5977
  // Set initializer for constant value
5978
2.21k
  if (auto IntrinsicsTable = LLModule.getNamedGlobal("intrinsics")) {
5979
1.27k
    IntrinsicsTable.setInitializer(
5980
1.27k
        LLVM::Value::getConstNull(IntrinsicsTable.getType()));
5981
1.27k
    IntrinsicsTable.setGlobalConstant(false);
5982
1.27k
  } else {
5983
938
    auto IntrinsicsTableTy = LLVM::Type::getArrayType(
5984
938
        LLContext.getInt8Ty().getPointerTo(),
5985
938
        static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax));
5986
938
    LLModule.addGlobal(
5987
938
        IntrinsicsTableTy.getPointerTo(), false, LLVMExternalLinkage,
5988
938
        LLVM::Value::getConstNull(IntrinsicsTableTy), "intrinsics");
5989
938
  }
5990
5991
2.21k
  spdlog::info("optimize done"sv);
5992
2.21k
  return Expect<Data>{std::move(D)};
5993
2.21k
}
5994
5995
2.21k
void Compiler::compile(const AST::TypeSection &TypeSec) noexcept {
5996
2.21k
  auto WrapperTy =
5997
2.21k
      LLVM::Type::getFunctionType(Context->VoidTy,
5998
2.21k
                                  {Context->ExecCtxPtrTy, Context->Int8PtrTy,
5999
2.21k
                                   Context->Int8PtrTy, Context->Int8PtrTy},
6000
2.21k
                                  false);
6001
2.21k
  auto SubTypes = TypeSec.getContent();
6002
2.21k
  const auto Size = SubTypes.size();
6003
2.21k
  if (Size == 0) {
6004
134
    return;
6005
134
  }
6006
2.08k
  Context->CompositeTypes.reserve(Size);
6007
2.08k
  Context->FunctionWrappers.reserve(Size);
6008
6009
  // Iterate and compile types.
6010
6.52k
  for (size_t I = 0; I < Size; ++I) {
6011
4.44k
    const auto &CompType = SubTypes[I].getCompositeType();
6012
4.44k
    const auto Name = fmt::format("t{}"sv, Context->CompositeTypes.size());
6013
4.44k
    if (CompType.isFunc()) {
6014
      // Check function type is unique
6015
4.42k
      {
6016
4.42k
        bool Unique = true;
6017
17.4k
        for (size_t J = 0; J < I; ++J) {
6018
13.2k
          if (Context->CompositeTypes[J] &&
6019
13.2k
              Context->CompositeTypes[J]->isFunc()) {
6020
13.1k
            const auto &OldFuncType = Context->CompositeTypes[J]->getFuncType();
6021
13.1k
            if (OldFuncType == CompType.getFuncType()) {
6022
153
              Unique = false;
6023
153
              Context->CompositeTypes.push_back(Context->CompositeTypes[J]);
6024
153
              auto F = Context->FunctionWrappers[J];
6025
153
              Context->FunctionWrappers.push_back(F);
6026
153
              auto A = Context->LLModule.addAlias(WrapperTy, F, Name.c_str());
6027
153
              A.setLinkage(LLVMExternalLinkage);
6028
153
              A.setVisibility(LLVMProtectedVisibility);
6029
153
              A.setDSOLocal(true);
6030
153
              A.setDLLStorageClass(LLVMDLLExportStorageClass);
6031
153
              break;
6032
153
            }
6033
13.1k
          }
6034
13.2k
        }
6035
4.42k
        if (!Unique) {
6036
153
          continue;
6037
153
        }
6038
4.42k
      }
6039
6040
      // Create Wrapper
6041
4.26k
      auto F = Context->LLModule.addFunction(WrapperTy, LLVMExternalLinkage,
6042
4.26k
                                             Name.c_str());
6043
4.26k
      {
6044
4.26k
        F.setVisibility(LLVMProtectedVisibility);
6045
4.26k
        F.setDSOLocal(true);
6046
4.26k
        F.setDLLStorageClass(LLVMDLLExportStorageClass);
6047
4.26k
        F.addFnAttr(Context->NoStackArgProbe);
6048
4.26k
        F.addFnAttr(Context->StrictFP);
6049
4.26k
        F.addFnAttr(Context->UWTable);
6050
4.26k
        F.addParamAttr(0, Context->ReadOnly);
6051
4.26k
        F.addParamAttr(0, Context->NoAlias);
6052
4.26k
        F.addParamAttr(1, Context->NoAlias);
6053
4.26k
        F.addParamAttr(2, Context->NoAlias);
6054
4.26k
        F.addParamAttr(3, Context->NoAlias);
6055
6056
4.26k
        LLVM::Builder Builder(Context->LLContext);
6057
4.26k
        Builder.positionAtEnd(
6058
4.26k
            LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
6059
6060
4.26k
        auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy,
6061
4.26k
                              CompType.getFuncType());
6062
4.26k
        auto RTy = FTy.getReturnType();
6063
4.26k
        std::vector<LLVM::Type> FPTy(FTy.getNumParams());
6064
4.26k
        FTy.getParamTypes(FPTy);
6065
6066
4.26k
        const size_t ArgCount = FPTy.size() - 1;
6067
4.26k
        auto ExecCtxPtr = F.getFirstParam();
6068
4.26k
        auto RawFunc = LLVM::FunctionCallee{
6069
4.26k
            FTy, Builder.createBitCast(ExecCtxPtr.getNextParam(),
6070
4.26k
                                       FTy.getPointerTo())};
6071
4.26k
        auto RawArgs = ExecCtxPtr.getNextParam().getNextParam();
6072
4.26k
        auto RawRets = RawArgs.getNextParam();
6073
6074
4.26k
        std::vector<LLVM::Value> Args;
6075
4.26k
        Args.reserve(FTy.getNumParams());
6076
4.26k
        Args.push_back(ExecCtxPtr);
6077
8.87k
        for (size_t J = 0; J < ArgCount; ++J) {
6078
4.60k
          Args.push_back(Builder.createValuePtrLoad(
6079
4.60k
              FPTy[J + 1], RawArgs, Context->Int8Ty, J * kValSize));
6080
4.60k
        }
6081
6082
4.26k
        auto Ret = Builder.createCall(RawFunc, Args);
6083
4.26k
        if (RTy.isVoidTy()) {
6084
          // nothing to do
6085
2.86k
        } else if (RTy.isStructTy()) {
6086
317
          auto Rets = unpackStruct(Builder, Ret);
6087
317
          Builder.createArrayPtrStore(Rets, RawRets, Context->Int8Ty, kValSize);
6088
2.54k
        } else {
6089
2.54k
          Builder.createValuePtrStore(Ret, RawRets, Context->Int8Ty);
6090
2.54k
        }
6091
4.26k
        Builder.createRetVoid();
6092
4.26k
      }
6093
      // Copy wrapper, param and return lists to module instance.
6094
4.26k
      Context->FunctionWrappers.push_back(F);
6095
4.26k
    } else {
6096
      // Non function type case. Create empty wrapper.
6097
26
      auto F = Context->LLModule.addFunction(WrapperTy, LLVMExternalLinkage,
6098
26
                                             Name.c_str());
6099
26
      {
6100
26
        F.setVisibility(LLVMProtectedVisibility);
6101
26
        F.setDSOLocal(true);
6102
26
        F.setDLLStorageClass(LLVMDLLExportStorageClass);
6103
26
        F.addFnAttr(Context->NoStackArgProbe);
6104
26
        F.addFnAttr(Context->StrictFP);
6105
26
        F.addFnAttr(Context->UWTable);
6106
26
        F.addParamAttr(0, Context->ReadOnly);
6107
26
        F.addParamAttr(0, Context->NoAlias);
6108
26
        F.addParamAttr(1, Context->NoAlias);
6109
26
        F.addParamAttr(2, Context->NoAlias);
6110
26
        F.addParamAttr(3, Context->NoAlias);
6111
6112
26
        LLVM::Builder Builder(Context->LLContext);
6113
26
        Builder.positionAtEnd(
6114
26
            LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
6115
26
        Builder.createRetVoid();
6116
26
      }
6117
26
      Context->FunctionWrappers.push_back(F);
6118
26
    }
6119
4.29k
    Context->CompositeTypes.push_back(&CompType);
6120
4.29k
  }
6121
2.08k
}
6122
6123
2.21k
void Compiler::compile(const AST::ImportSection &ImportSec) noexcept {
6124
  // Iterate and compile import descriptions.
6125
2.21k
  for (const auto &ImpDesc : ImportSec.getContent()) {
6126
    // Get data from import description.
6127
426
    const auto &ExtType = ImpDesc.getExternalType();
6128
6129
    // Add the imports into module instance.
6130
426
    switch (ExtType) {
6131
290
    case ExternalType::Function: // Function type index
6132
290
    {
6133
290
      const auto FuncID = static_cast<uint32_t>(Context->Functions.size());
6134
      // Get the function type index in module.
6135
290
      uint32_t TypeIdx = ImpDesc.getExternalFuncTypeIdx();
6136
290
      assuming(TypeIdx < Context->CompositeTypes.size());
6137
290
      assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6138
290
      const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6139
290
      auto FTy =
6140
290
          toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6141
290
      auto RTy = FTy.getReturnType();
6142
290
      auto F = LLVM::FunctionCallee{
6143
290
          FTy,
6144
290
          Context->LLModule.addFunction(FTy, LLVMInternalLinkage,
6145
290
                                        fmt::format("f{}"sv, FuncID).c_str())};
6146
290
      F.Fn.setDSOLocal(true);
6147
290
      F.Fn.addFnAttr(Context->NoStackArgProbe);
6148
290
      F.Fn.addFnAttr(Context->StrictFP);
6149
290
      F.Fn.addFnAttr(Context->UWTable);
6150
290
      F.Fn.addParamAttr(0, Context->ReadOnly);
6151
290
      F.Fn.addParamAttr(0, Context->NoAlias);
6152
6153
290
      LLVM::Builder Builder(Context->LLContext);
6154
290
      Builder.positionAtEnd(
6155
290
          LLVM::BasicBlock::create(Context->LLContext, F.Fn, "entry"));
6156
6157
290
      const auto ArgSize = FuncType.getParamTypes().size();
6158
290
      const auto RetSize =
6159
290
          RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
6160
6161
290
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
6162
290
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
6163
6164
290
      auto Arg = F.Fn.getFirstParam();
6165
438
      for (unsigned I = 0; I < ArgSize; ++I) {
6166
148
        Arg = Arg.getNextParam();
6167
148
        Builder.createValuePtrStore(Arg, Args, Context->Int8Ty, I * kValSize);
6168
148
      }
6169
6170
290
      Builder.createCall(
6171
290
          Context->getIntrinsic(
6172
290
              Builder, Executable::Intrinsics::kCall,
6173
290
              LLVM::Type::getFunctionType(
6174
290
                  Context->VoidTy,
6175
290
                  {Context->Int32Ty, Context->Int8PtrTy, Context->Int8PtrTy},
6176
290
                  false)),
6177
290
          {Context->LLContext.getInt32(FuncID), Args, Rets});
6178
6179
290
      if (RetSize == 0) {
6180
171
        Builder.createRetVoid();
6181
171
      } else if (RetSize == 1) {
6182
91
        Builder.createRet(
6183
91
            Builder.createValuePtrLoad(RTy, Rets, Context->Int8Ty));
6184
91
      } else {
6185
28
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
6186
28
            RetSize, RTy, Rets, Context->Int8Ty, kValSize));
6187
28
      }
6188
6189
290
      Context->Functions.emplace_back(TypeIdx, F, nullptr);
6190
290
      break;
6191
290
    }
6192
51
    case ExternalType::Table: // Table type
6193
51
    {
6194
      // Nothing to do.
6195
51
      break;
6196
290
    }
6197
37
    case ExternalType::Memory: // Memory type
6198
37
    {
6199
      // Nothing to do.
6200
37
      break;
6201
290
    }
6202
43
    case ExternalType::Global: // Global type
6203
43
    {
6204
      // Get global type. External type checked in validation.
6205
43
      const auto &GlobType = ImpDesc.getExternalGlobalType();
6206
43
      const auto &ValType = GlobType.getValType();
6207
43
      auto Type = toLLVMType(Context->LLContext, ValType);
6208
43
      Context->Globals.push_back(Type);
6209
43
      break;
6210
290
    }
6211
5
    case ExternalType::Tag: // Tag type
6212
5
    {
6213
      // TODO: EXCEPTION - implement the AOT.
6214
5
      break;
6215
290
    }
6216
0
    default:
6217
0
      assumingUnreachable();
6218
426
    }
6219
426
  }
6220
2.21k
}
6221
6222
2.21k
void Compiler::compile(const AST::ExportSection &) noexcept {}
6223
6224
2.21k
void Compiler::compile(const AST::GlobalSection &GlobalSec) noexcept {
6225
2.21k
  for (const auto &GlobalSeg : GlobalSec.getContent()) {
6226
159
    const auto &ValType = GlobalSeg.getGlobalType().getValType();
6227
159
    auto Type = toLLVMType(Context->LLContext, ValType);
6228
159
    Context->Globals.push_back(Type);
6229
159
  }
6230
2.21k
}
6231
6232
void Compiler::compile(const AST::MemorySection &,
6233
2.21k
                       const AST::DataSection &) noexcept {}
6234
6235
void Compiler::compile(const AST::TableSection &,
6236
2.21k
                       const AST::ElementSection &) noexcept {}
6237
6238
Expect<void> Compiler::compile(const AST::FunctionSection &FuncSec,
6239
2.21k
                               const AST::CodeSection &CodeSec) noexcept {
6240
2.21k
  const auto &TypeIdxs = FuncSec.getContent();
6241
2.21k
  const auto &CodeSegs = CodeSec.getContent();
6242
2.21k
  assuming(TypeIdxs.size() == CodeSegs.size());
6243
6244
12.9k
  for (size_t I = 0; I < CodeSegs.size(); ++I) {
6245
10.7k
    const auto &TypeIdx = TypeIdxs[I];
6246
10.7k
    const auto &Code = CodeSegs[I];
6247
10.7k
    assuming(TypeIdx < Context->CompositeTypes.size());
6248
10.7k
    assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6249
10.7k
    const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6250
10.7k
    const auto FuncID = Context->Functions.size();
6251
10.7k
    auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6252
10.7k
    LLVM::FunctionCallee F = {FTy, Context->LLModule.addFunction(
6253
10.7k
                                       FTy, LLVMExternalLinkage,
6254
10.7k
                                       fmt::format("f{}"sv, FuncID).c_str())};
6255
10.7k
    F.Fn.setVisibility(LLVMProtectedVisibility);
6256
10.7k
    F.Fn.setDSOLocal(true);
6257
10.7k
    F.Fn.setDLLStorageClass(LLVMDLLExportStorageClass);
6258
10.7k
    F.Fn.addFnAttr(Context->NoStackArgProbe);
6259
10.7k
    F.Fn.addFnAttr(Context->StrictFP);
6260
10.7k
    F.Fn.addFnAttr(Context->UWTable);
6261
10.7k
    F.Fn.addParamAttr(0, Context->ReadOnly);
6262
10.7k
    F.Fn.addParamAttr(0, Context->NoAlias);
6263
6264
10.7k
    Context->Functions.emplace_back(TypeIdx, F, &Code);
6265
10.7k
  }
6266
6267
11.0k
  for (auto [T, F, Code] : Context->Functions) {
6268
11.0k
    if (!Code) {
6269
290
      continue;
6270
290
    }
6271
6272
10.7k
    std::vector<ValType> Locals;
6273
10.7k
    for (const auto &Local : Code->getLocals()) {
6274
2.30M
      for (unsigned I = 0; I < Local.first; ++I) {
6275
2.30M
        Locals.push_back(Local.second);
6276
2.30M
      }
6277
1.66k
    }
6278
10.7k
    FunctionCompiler FC(*Context, F, Locals,
6279
10.7k
                        Conf.getCompilerConfigure().isInterruptible(),
6280
10.7k
                        Conf.getStatisticsConfigure().isInstructionCounting(),
6281
10.7k
                        Conf.getStatisticsConfigure().isCostMeasuring());
6282
10.7k
    auto Type = Context->resolveBlockType(T);
6283
10.7k
    EXPECTED_TRY(FC.compile(*Code, std::move(Type)));
6284
10.7k
    F.Fn.eliminateUnreachableBlocks();
6285
10.7k
  }
6286
2.21k
  return {};
6287
2.21k
}
6288
6289
} // namespace LLVM
6290
} // namespace WasmEdge