Coverage Report

Created: 2026-06-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/WasmEdge/lib/llvm/compiler.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// SPDX-FileCopyrightText: Copyright The WasmEdge Authors
3
4
#include "llvm/compiler.h"
5
6
#include "aot/version.h"
7
#include "common/defines.h"
8
#include "common/filesystem.h"
9
#include "common/spdlog.h"
10
#include "data.h"
11
#include "llvm.h"
12
#include "spdlog/spdlog.h"
13
#include "system/allocator.h"
14
15
#include <algorithm>
16
#include <array>
17
#include <cinttypes>
18
#include <cstdint>
19
#include <cstdlib>
20
#include <functional>
21
#include <limits>
22
#include <memory>
23
#include <numeric>
24
#include <string>
25
#include <string_view>
26
#include <system_error>
27
28
namespace LLVM = WasmEdge::LLVM;
29
using namespace std::literals;
30
31
namespace {
32
33
struct RAIICleanup {
34
  RAIICleanup(LLVM::Compiler::CompileContext *&ContextRef,
35
              LLVM::Compiler::CompileContext *NewContext)
36
2.30k
      : Context(ContextRef) {
37
2.30k
    Context = NewContext;
38
2.30k
  }
39
2.30k
  ~RAIICleanup() { Context = nullptr; }
40
  LLVM::Compiler::CompileContext *&Context;
41
};
42
43
static bool
44
isVoidReturn(WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
45
static LLVM::Type toLLVMType(LLVM::Context LLContext,
46
                             const WasmEdge::ValType &ValType) noexcept;
47
static std::vector<LLVM::Type>
48
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
49
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
50
static LLVM::Type
51
toLLVMRetsType(LLVM::Context LLContext,
52
               WasmEdge::Span<const WasmEdge::ValType> ValTypes) noexcept;
53
static LLVM::Type
54
toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
55
           const WasmEdge::AST::FunctionType &FuncType) noexcept;
56
static LLVM::Value
57
toLLVMConstantZero(LLVM::Context LLContext, const WasmEdge::ValType &ValType,
58
                   WasmEdge::Span<const WasmEdge::AST::CompositeType *const>
59
                       CompositeTypes) noexcept;
60
static std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
61
                                             LLVM::Value Struct) noexcept;
62
class FunctionCompiler;
63
64
// XXX: Misalignment handler not implemented yet, forcing unalignment
65
// force unalignment load/store
66
static inline constexpr const bool kForceUnalignment = true;
67
68
// force checking div/rem on zero
69
static inline constexpr const bool kForceDivCheck = true;
70
71
// Size of a ValVariant
72
static inline constexpr const uint32_t kValSize = sizeof(WasmEdge::ValVariant);
73
74
// Translate Compiler::OptimizationLevel to llvm::PassBuilder version
75
#if LLVM_VERSION_MAJOR >= 13
76
static inline const char *
77
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
78
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
79
  switch (Level) {
80
  case OL::O0:
81
    return "default<O0>,function(tailcallelim)";
82
  case OL::O1:
83
    return "default<O1>,function(tailcallelim)";
84
  case OL::O2:
85
    return "default<O2>";
86
  case OL::O3:
87
    return "default<O3>";
88
  case OL::Os:
89
    return "default<Os>";
90
  case OL::Oz:
91
    return "default<Oz>";
92
  default:
93
    assumingUnreachable();
94
  }
95
}
96
#else
97
static inline std::pair<unsigned int, unsigned int>
98
2.29k
toLLVMLevel(WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
99
2.29k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
100
2.29k
  switch (Level) {
101
0
  case OL::O0:
102
0
    return {0, 0};
103
0
  case OL::O1:
104
0
    return {1, 0};
105
0
  case OL::O2:
106
0
    return {2, 0};
107
2.29k
  case OL::O3:
108
2.29k
    return {3, 0};
109
0
  case OL::Os:
110
0
    return {2, 1};
111
0
  case OL::Oz:
112
0
    return {2, 2};
113
0
  default:
114
0
    assumingUnreachable();
115
2.29k
  }
116
2.29k
}
117
#endif
118
119
static inline LLVMCodeGenOptLevel toLLVMCodeGenLevel(
120
2.29k
    WasmEdge::CompilerConfigure::OptimizationLevel Level) noexcept {
121
2.29k
  using OL = WasmEdge::CompilerConfigure::OptimizationLevel;
122
2.29k
  switch (Level) {
123
0
  case OL::O0:
124
0
    return LLVMCodeGenLevelNone;
125
0
  case OL::O1:
126
0
    return LLVMCodeGenLevelLess;
127
0
  case OL::O2:
128
0
    return LLVMCodeGenLevelDefault;
129
2.29k
  case OL::O3:
130
2.29k
    return LLVMCodeGenLevelAggressive;
131
0
  case OL::Os:
132
0
    return LLVMCodeGenLevelDefault;
133
0
  case OL::Oz:
134
0
    return LLVMCodeGenLevelDefault;
135
0
  default:
136
0
    assumingUnreachable();
137
2.29k
  }
138
2.29k
}
139
} // namespace
140
141
struct LLVM::Compiler::CompileContext {
142
  LLVM::Context LLContext;
143
  std::reference_wrapper<LLVM::Module> LLModule;
144
  LLVM::Attribute Cold;
145
  LLVM::Attribute NoAlias;
146
  LLVM::Attribute NoInline;
147
  LLVM::Attribute NoReturn;
148
  LLVM::Attribute ReadOnly;
149
  LLVM::Attribute StrictFP;
150
  LLVM::Attribute UWTable;
151
  LLVM::Attribute NoStackArgProbe;
152
  LLVM::Type VoidTy;
153
  LLVM::Type Int8Ty;
154
  LLVM::Type Int16Ty;
155
  LLVM::Type Int32Ty;
156
  LLVM::Type Int64Ty;
157
  LLVM::Type Int128Ty;
158
  LLVM::Type FloatTy;
159
  LLVM::Type DoubleTy;
160
  LLVM::Type Int8x16Ty;
161
  LLVM::Type Int16x8Ty;
162
  LLVM::Type Int32x4Ty;
163
  LLVM::Type Floatx4Ty;
164
  LLVM::Type Int64x2Ty;
165
  LLVM::Type Doublex2Ty;
166
  LLVM::Type Int128x1Ty;
167
  LLVM::Type Int8PtrTy;
168
  LLVM::Type Int32PtrTy;
169
  LLVM::Type Int64PtrTy;
170
  LLVM::Type Int128PtrTy;
171
  LLVM::Type Int8PtrPtrTy;
172
  LLVM::Type ExecCtxTy;
173
  LLVM::Type ExecCtxPtrTy;
174
  LLVM::Type IntrinsicsTableTy;
175
  LLVM::Type IntrinsicsTablePtrTy;
176
  LLVM::Message SubtargetFeatures;
177
178
#if defined(__x86_64__)
179
#if defined(__XOP__)
180
  bool SupportXOP = true;
181
#else
182
  bool SupportXOP = false;
183
#endif
184
185
#if defined(__SSE4_1__)
186
  bool SupportSSE4_1 = true;
187
#else
188
  bool SupportSSE4_1 = false;
189
#endif
190
191
#if defined(__SSSE3__)
192
  bool SupportSSSE3 = true;
193
#else
194
  bool SupportSSSE3 = false;
195
#endif
196
197
#if defined(__SSE2__)
198
  bool SupportSSE2 = true;
199
#else
200
  bool SupportSSE2 = false;
201
#endif
202
#endif
203
204
#if defined(__aarch64__)
205
#if defined(__ARM_NEON__) || defined(__ARM_NEON) || defined(__ARM_NEON_FP)
206
  bool SupportNEON = true;
207
#else
208
  bool SupportNEON = false;
209
#endif
210
#endif
211
212
  std::vector<const AST::CompositeType *> CompositeTypes;
213
  std::vector<LLVM::Value> FunctionWrappers;
214
  std::vector<std::tuple<uint32_t, LLVM::FunctionCallee,
215
                         const WasmEdge::AST::CodeSegment *>>
216
      Functions;
217
  std::vector<LLVM::Value> LazyJITCacheVars;
218
  uint32_t ImportCount = 0;
219
  std::vector<LLVM::Type> MemoryAddrTypes;
220
  std::vector<LLVM::Type> TableAddrTypes;
221
  std::vector<LLVM::Type> Globals;
222
  LLVM::Value IntrinsicsTable;
223
  LLVM::FunctionCallee Trap;
224
  CompileContext(LLVM::Context C, LLVM::Module &M,
225
                 bool IsGenericBinary) noexcept
226
2.30k
      : LLContext(C), LLModule(M),
227
2.30k
        Cold(LLVM::Attribute::createEnum(C, LLVM::Core::Cold, 0)),
228
2.30k
        NoAlias(LLVM::Attribute::createEnum(C, LLVM::Core::NoAlias, 0)),
229
2.30k
        NoInline(LLVM::Attribute::createEnum(C, LLVM::Core::NoInline, 0)),
230
2.30k
        NoReturn(LLVM::Attribute::createEnum(C, LLVM::Core::NoReturn, 0)),
231
2.30k
        ReadOnly(LLVM::Attribute::createEnum(C, LLVM::Core::ReadOnly, 0)),
232
2.30k
        StrictFP(LLVM::Attribute::createEnum(C, LLVM::Core::StrictFP, 0)),
233
2.30k
        UWTable(LLVM::Attribute::createEnum(C, LLVM::Core::UWTable,
234
2.30k
                                            LLVM::Core::UWTableDefault)),
235
        NoStackArgProbe(
236
2.30k
            LLVM::Attribute::createString(C, "no-stack-arg-probe"sv, {})),
237
2.30k
        VoidTy(LLContext.getVoidTy()), Int8Ty(LLContext.getInt8Ty()),
238
2.30k
        Int16Ty(LLContext.getInt16Ty()), Int32Ty(LLContext.getInt32Ty()),
239
2.30k
        Int64Ty(LLContext.getInt64Ty()), Int128Ty(LLContext.getInt128Ty()),
240
2.30k
        FloatTy(LLContext.getFloatTy()), DoubleTy(LLContext.getDoubleTy()),
241
2.30k
        Int8x16Ty(LLVM::Type::getVectorType(Int8Ty, 16)),
242
2.30k
        Int16x8Ty(LLVM::Type::getVectorType(Int16Ty, 8)),
243
2.30k
        Int32x4Ty(LLVM::Type::getVectorType(Int32Ty, 4)),
244
2.30k
        Floatx4Ty(LLVM::Type::getVectorType(FloatTy, 4)),
245
2.30k
        Int64x2Ty(LLVM::Type::getVectorType(Int64Ty, 2)),
246
2.30k
        Doublex2Ty(LLVM::Type::getVectorType(DoubleTy, 2)),
247
2.30k
        Int128x1Ty(LLVM::Type::getVectorType(Int128Ty, 1)),
248
2.30k
        Int8PtrTy(Int8Ty.getPointerTo()), Int32PtrTy(Int32Ty.getPointerTo()),
249
2.30k
        Int64PtrTy(Int64Ty.getPointerTo()),
250
2.30k
        Int128PtrTy(Int128Ty.getPointerTo()),
251
2.30k
        Int8PtrPtrTy(Int8PtrTy.getPointerTo()),
252
2.30k
        ExecCtxTy(LLVM::Type::getStructType(
253
2.30k
            "ExecCtx",
254
2.30k
            std::initializer_list<LLVM::Type>{
255
                // Memory
256
2.30k
                Int8PtrTy.getPointerTo(),
257
                // Globals
258
2.30k
                Int128PtrTy.getPointerTo(),
259
                // InstrCount
260
2.30k
                Int64PtrTy,
261
                // CostTable
262
2.30k
                LLVM::Type::getArrayType(Int64Ty, UINT16_MAX + 1)
263
2.30k
                    .getPointerTo(),
264
                // Gas
265
2.30k
                Int64PtrTy,
266
                // GasLimit
267
2.30k
                Int64Ty,
268
                // StopToken
269
2.30k
                Int32PtrTy,
270
2.30k
            })),
271
2.30k
        ExecCtxPtrTy(ExecCtxTy.getPointerTo()),
272
2.30k
        IntrinsicsTableTy(LLVM::Type::getArrayType(
273
2.30k
            Int8Ty.getPointerTo(),
274
2.30k
            static_cast<uint32_t>(Executable::Intrinsics::kIntrinsicMax))),
275
2.30k
        IntrinsicsTablePtrTy(IntrinsicsTableTy.getPointerTo()),
276
2.30k
        IntrinsicsTable(LLModule.get().addGlobal(IntrinsicsTablePtrTy, true,
277
2.30k
                                                 LLVMExternalLinkage,
278
2.30k
                                                 LLVM::Value(), "intrinsics")) {
279
2.30k
    Trap.Ty = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
280
2.30k
    Trap.Fn = LLModule.get().addFunction(Trap.Ty, LLVMPrivateLinkage, "trap");
281
2.30k
    Trap.Fn.setDSOLocal(true);
282
2.30k
    Trap.Fn.addFnAttr(NoStackArgProbe);
283
2.30k
    Trap.Fn.addFnAttr(StrictFP);
284
2.30k
    Trap.Fn.addFnAttr(UWTable);
285
2.30k
    Trap.Fn.addFnAttr(NoReturn);
286
2.30k
    Trap.Fn.addFnAttr(Cold);
287
2.30k
    Trap.Fn.addFnAttr(NoInline);
288
289
2.30k
    if (!IsGenericBinary) {
290
2.30k
      SubtargetFeatures = LLVM::getHostCPUFeatures();
291
2.30k
      auto Features = SubtargetFeatures.string_view();
292
200k
      while (!Features.empty()) {
293
198k
        std::string_view Feature;
294
198k
        if (auto Pos = Features.find(','); Pos != std::string_view::npos) {
295
195k
          Feature = Features.substr(0, Pos);
296
195k
          Features = Features.substr(Pos + 1);
297
195k
        } else {
298
2.30k
          Feature = std::exchange(Features, std::string_view());
299
2.30k
        }
300
198k
        if (Feature[0] != '+') {
301
110k
          continue;
302
110k
        }
303
87.5k
        Feature = Feature.substr(1);
304
305
87.5k
#if defined(__x86_64__)
306
87.5k
        if (!SupportXOP && Feature == "xop"sv) {
307
0
          SupportXOP = true;
308
0
        }
309
87.5k
        if (!SupportSSE4_1 && Feature == "sse4.1"sv) {
310
2.30k
          SupportSSE4_1 = true;
311
2.30k
        }
312
87.5k
        if (!SupportSSSE3 && Feature == "ssse3"sv) {
313
2.30k
          SupportSSSE3 = true;
314
2.30k
        }
315
87.5k
        if (!SupportSSE2 && Feature == "sse2"sv) {
316
0
          SupportSSE2 = true;
317
0
        }
318
#elif defined(__aarch64__)
319
        if (!SupportNEON && Feature == "neon"sv) {
320
          SupportNEON = true;
321
        }
322
#endif
323
87.5k
      }
324
2.30k
    }
325
326
2.30k
    compileTrap();
327
2.30k
  }
328
  LLVM::Value getMemory(LLVM::Builder &Builder, LLVM::Value ExecCtx,
329
22.7k
                        uint32_t Index) noexcept {
330
22.7k
    auto Array = Builder.createExtractValue(ExecCtx, 0);
331
#if WASMEDGE_ALLOCATOR_IS_STABLE
332
    auto VPtr = Builder.createLoad(
333
        Int8PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
334
                                              LLContext.getInt64(Index)));
335
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
336
                     LLVM::Metadata(LLContext, {}));
337
#else
338
22.7k
    auto VPtrPtr = Builder.createLoad(
339
22.7k
        Int8PtrPtrTy, Builder.createInBoundsGEP1(Int8PtrPtrTy, Array,
340
22.7k
                                                 LLContext.getInt64(Index)));
341
22.7k
    VPtrPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
342
22.7k
                        LLVM::Metadata(LLContext, {}));
343
22.7k
    auto VPtr = Builder.createLoad(
344
22.7k
        Int8PtrTy,
345
22.7k
        Builder.createInBoundsGEP1(Int8PtrTy, VPtrPtr, LLContext.getInt64(0)));
346
22.7k
#endif
347
22.7k
    return Builder.createBitCast(VPtr, Int8PtrTy);
348
22.7k
  }
349
  std::pair<LLVM::Type, LLVM::Value> getGlobal(LLVM::Builder &Builder,
350
                                               LLVM::Value ExecCtx,
351
468
                                               uint32_t Index) noexcept {
352
468
    auto Ty = Globals[Index];
353
468
    auto Array = Builder.createExtractValue(ExecCtx, 1);
354
468
    auto VPtr = Builder.createLoad(
355
468
        Int128PtrTy, Builder.createInBoundsGEP1(Int8PtrTy, Array,
356
468
                                                LLContext.getInt64(Index)));
357
468
    VPtr.setMetadata(LLContext, LLVM::Core::InvariantGroup,
358
468
                     LLVM::Metadata(LLContext, {}));
359
468
    auto Ptr = Builder.createBitCast(VPtr, Ty.getPointerTo());
360
468
    return {Ty, Ptr};
361
468
  }
362
  LLVM::Value getInstrCount(LLVM::Builder &Builder,
363
0
                            LLVM::Value ExecCtx) noexcept {
364
0
    return Builder.createExtractValue(ExecCtx, 2);
365
0
  }
366
  LLVM::Value getCostTable(LLVM::Builder &Builder,
367
0
                           LLVM::Value ExecCtx) noexcept {
368
0
    return Builder.createExtractValue(ExecCtx, 3);
369
0
  }
370
0
  LLVM::Value getGas(LLVM::Builder &Builder, LLVM::Value ExecCtx) noexcept {
371
0
    return Builder.createExtractValue(ExecCtx, 4);
372
0
  }
373
  LLVM::Value getGasLimit(LLVM::Builder &Builder,
374
0
                          LLVM::Value ExecCtx) noexcept {
375
0
    return Builder.createExtractValue(ExecCtx, 5);
376
0
  }
377
  LLVM::Value getStopToken(LLVM::Builder &Builder,
378
0
                           LLVM::Value ExecCtx) noexcept {
379
0
    return Builder.createExtractValue(ExecCtx, 6);
380
0
  }
381
  LLVM::FunctionCallee getIntrinsic(LLVM::Builder &Builder,
382
                                    Executable::Intrinsics Index,
383
9.45k
                                    LLVM::Type Ty) noexcept {
384
9.45k
    const auto Value = static_cast<uint32_t>(Index);
385
9.45k
    auto PtrTy = Ty.getPointerTo();
386
9.45k
    auto PtrPtrTy = PtrTy.getPointerTo();
387
9.45k
    auto IT = Builder.createLoad(IntrinsicsTablePtrTy, IntrinsicsTable);
388
9.45k
    IT.setMetadata(LLContext, LLVM::Core::InvariantGroup,
389
9.45k
                   LLVM::Metadata(LLContext, {}));
390
9.45k
    auto VPtr =
391
9.45k
        Builder.createInBoundsGEP2(IntrinsicsTableTy, IT, LLContext.getInt64(0),
392
9.45k
                                   LLContext.getInt64(Value));
393
9.45k
    auto Ptr = Builder.createBitCast(VPtr, PtrPtrTy);
394
9.45k
    return {Ty, Builder.createLoad(PtrTy, Ptr)};
395
9.45k
  }
396
2.30k
  void compileTrap() noexcept {
397
2.30k
    LLVM::Builder Builder(LLContext);
398
2.30k
    Builder.positionAtEnd(
399
2.30k
        LLVM::BasicBlock::create(LLContext, Trap.Fn, "entry"));
400
2.30k
    auto FnTy = LLVM::Type::getFunctionType(VoidTy, {Int32Ty});
401
2.30k
    auto CallTrap = Builder.createCall(
402
2.30k
        getIntrinsic(Builder, Executable::Intrinsics::kTrap, FnTy),
403
2.30k
        {Trap.Fn.getFirstParam()});
404
2.30k
    CallTrap.addCallSiteAttribute(NoReturn);
405
2.30k
    Builder.createUnreachable();
406
2.30k
  }
407
2.30k
  void addVersionGlobal() noexcept {
408
2.30k
    LLModule.get().addGlobal(
409
2.30k
        Int32Ty, true, LLVMExternalLinkage,
410
2.30k
        LLVM::Value::getConstInt(Int32Ty, AOT::kBinaryVersion), "version");
411
2.30k
  }
412
2.29k
  void finalizeIntrinsicsTable() noexcept {
413
2.29k
    if (auto Table = LLModule.get().getNamedGlobal("intrinsics")) {
414
1.32k
      Table.setInitializer(LLVM::Value::getConstNull(Table.getType()));
415
1.32k
      Table.setGlobalConstant(false);
416
1.32k
    } else {
417
972
      LLModule.get().addGlobal(IntrinsicsTablePtrTy, false, LLVMExternalLinkage,
418
972
                               LLVM::Value::getConstNull(IntrinsicsTablePtrTy),
419
972
                               "intrinsics");
420
972
    }
421
2.29k
  }
422
  std::pair<std::vector<ValType>, std::vector<ValType>>
423
19.5k
  resolveBlockType(const BlockType &BType) const noexcept {
424
19.5k
    using VecT = std::vector<ValType>;
425
19.5k
    using RetT = std::pair<VecT, VecT>;
426
19.5k
    if (BType.isEmpty()) {
427
2.35k
      return RetT{};
428
2.35k
    }
429
17.2k
    if (BType.isValType()) {
430
2.68k
      return RetT{{}, {BType.getValType()}};
431
14.5k
    } else {
432
      // Type index case. t2* = type[index].returns
433
14.5k
      const uint32_t TypeIdx = BType.getTypeIndex();
434
14.5k
      const auto &FType = CompositeTypes[TypeIdx]->getFuncType();
435
14.5k
      return RetT{
436
14.5k
          VecT(FType.getParamTypes().begin(), FType.getParamTypes().end()),
437
14.5k
          VecT(FType.getReturnTypes().begin(), FType.getReturnTypes().end())};
438
14.5k
    }
439
17.2k
  }
440
};
441
442
namespace {
443
444
using namespace WasmEdge;
445
446
37.2k
static bool isVoidReturn(Span<const ValType> ValTypes) noexcept {
447
37.2k
  return ValTypes.empty();
448
37.2k
}
449
450
static LLVM::Type toLLVMType(LLVM::Context LLContext,
451
729k
                             const ValType &ValType) noexcept {
452
729k
  switch (ValType.getCode()) {
453
56.8k
  case TypeCode::I32:
454
56.8k
    return LLContext.getInt32Ty();
455
219k
  case TypeCode::I64:
456
219k
    return LLContext.getInt64Ty();
457
798
  case TypeCode::Ref:
458
61.1k
  case TypeCode::RefNull:
459
102k
  case TypeCode::V128:
460
102k
    return LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2);
461
34.8k
  case TypeCode::F32:
462
34.8k
    return LLContext.getFloatTy();
463
315k
  case TypeCode::F64:
464
315k
    return LLContext.getDoubleTy();
465
0
  default:
466
0
    assumingUnreachable();
467
729k
  }
468
729k
}
469
470
static LLVM::Type toLLVMType(LLVM::Context LLContext,
471
1.82k
                             const AddressType AddrType) noexcept {
472
1.82k
  switch (AddrType) {
473
1.56k
  case AddressType::I32:
474
1.56k
    return LLContext.getInt32Ty();
475
254
  case AddressType::I64:
476
254
    return LLContext.getInt64Ty();
477
0
  default:
478
0
    assumingUnreachable();
479
1.82k
  }
480
1.82k
}
481
482
static std::vector<LLVM::Type>
483
toLLVMTypeVector(LLVM::Context LLContext,
484
21.7k
                 Span<const ValType> ValTypes) noexcept {
485
21.7k
  std::vector<LLVM::Type> Result;
486
21.7k
  Result.reserve(ValTypes.size());
487
21.7k
  for (const auto &Type : ValTypes) {
488
20.6k
    Result.push_back(toLLVMType(LLContext, Type));
489
20.6k
  }
490
21.7k
  return Result;
491
21.7k
}
492
493
static std::vector<LLVM::Type>
494
toLLVMArgsType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
495
17.6k
               Span<const ValType> ValTypes) noexcept {
496
17.6k
  auto Result = toLLVMTypeVector(LLContext, ValTypes);
497
17.6k
  Result.insert(Result.begin(), ExecCtxPtrTy);
498
17.6k
  return Result;
499
17.6k
}
500
501
static LLVM::Type toLLVMRetsType(LLVM::Context LLContext,
502
17.6k
                                 Span<const ValType> ValTypes) noexcept {
503
17.6k
  if (isVoidReturn(ValTypes)) {
504
4.46k
    return LLContext.getVoidTy();
505
4.46k
  }
506
13.2k
  if (ValTypes.size() == 1) {
507
12.3k
    return toLLVMType(LLContext, ValTypes.front());
508
12.3k
  }
509
834
  std::vector<LLVM::Type> Result;
510
834
  Result.reserve(ValTypes.size());
511
2.24k
  for (const auto &Type : ValTypes) {
512
2.24k
    Result.push_back(toLLVMType(LLContext, Type));
513
2.24k
  }
514
834
  return LLVM::Type::getStructType(Result);
515
13.2k
}
516
517
static LLVM::Type toLLVMType(LLVM::Context LLContext, LLVM::Type ExecCtxPtrTy,
518
17.6k
                             const AST::FunctionType &FuncType) noexcept {
519
17.6k
  auto ArgsTy =
520
17.6k
      toLLVMArgsType(LLContext, ExecCtxPtrTy, FuncType.getParamTypes());
521
17.6k
  auto RetTy = toLLVMRetsType(LLContext, FuncType.getReturnTypes());
522
17.6k
  return LLVM::Type::getFunctionType(RetTy, ArgsTy);
523
17.6k
}
524
525
static LLVM::Value toLLVMConstantZero(
526
    LLVM::Context LLContext, const ValType &ValType,
527
692k
    Span<const AST::CompositeType *const> CompositeTypes) noexcept {
528
692k
  switch (ValType.getCode()) {
529
37.6k
  case TypeCode::I32:
530
37.6k
    return LLVM::Value::getConstNull(LLContext.getInt32Ty());
531
215k
  case TypeCode::I64:
532
215k
    return LLVM::Value::getConstNull(LLContext.getInt64Ty());
533
798
  case TypeCode::Ref:
534
60.2k
  case TypeCode::RefNull: {
535
60.2k
    std::array<uint8_t, 16> Data{};
536
60.2k
    if (ValType.isAbsHeapType()) {
537
      // Abstract heap types are already fine for null refs.
538
23.0k
      const auto Raw = ValType.getRawData();
539
23.0k
      std::copy(Raw.begin(), Raw.end(), Data.begin());
540
37.2k
    } else {
541
      // For non-abstract heap types (concrete type indices), convert to the
542
      // abstract heap type so that ref.cast/ref.test won't dereference a null
543
      // pointer when checking the type.
544
37.2k
      assuming(ValType.getTypeIndex() < CompositeTypes.size());
545
37.2k
      const auto *CompType = CompositeTypes[ValType.getTypeIndex()];
546
37.2k
      assuming(CompType != nullptr);
547
37.2k
      WasmEdge::ValType VType =
548
37.2k
          CompType->isFunc() ? TypeCode::NullFuncRef : TypeCode::NullRef;
549
37.2k
      std::copy_n(VType.getRawData().cbegin(), 8, Data.begin());
550
37.2k
    }
551
60.2k
    return LLVM::Value::getConstVector8(LLContext, Data);
552
60.2k
  }
553
34.9k
  case TypeCode::V128:
554
34.9k
    return LLVM::Value::getConstNull(
555
34.9k
        LLVM::Type::getVectorType(LLContext.getInt64Ty(), 2));
556
31.9k
  case TypeCode::F32:
557
31.9k
    return LLVM::Value::getConstNull(LLContext.getFloatTy());
558
312k
  case TypeCode::F64:
559
312k
    return LLVM::Value::getConstNull(LLContext.getDoubleTy());
560
0
  default:
561
0
    assumingUnreachable();
562
692k
  }
563
692k
}
564
565
class FunctionCompiler {
566
  struct Control;
567
568
public:
569
  FunctionCompiler(LLVM::Compiler::CompileContext &Context,
570
                   LLVM::FunctionCallee F, Span<const ValType> Locals,
571
                   bool Interruptible, bool InstructionCounting,
572
                   bool GasMeasuring, bool IsLazyJIT) noexcept
573
11.2k
      : Context(Context), LLContext(Context.LLContext),
574
11.2k
        Interruptible(Interruptible), IsLazyJIT(IsLazyJIT), F(F),
575
11.2k
        Builder(LLContext) {
576
11.2k
    if (F.Fn) {
577
11.2k
      Builder.positionAtEnd(LLVM::BasicBlock::create(LLContext, F.Fn, "entry"));
578
11.2k
      ExecCtx = Builder.createLoad(Context.ExecCtxTy, F.Fn.getFirstParam());
579
580
11.2k
      if (InstructionCounting) {
581
0
        LocalInstrCount = Builder.createAlloca(Context.Int64Ty);
582
0
        Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
583
0
      }
584
585
11.2k
      if (GasMeasuring) {
586
0
        LocalGas = Builder.createAlloca(Context.Int64Ty);
587
0
        Builder.createStore(LLContext.getInt64(0), LocalGas);
588
0
      }
589
590
20.8k
      for (LLVM::Value Arg = F.Fn.getFirstParam().getNextParam(); Arg;
591
11.2k
           Arg = Arg.getNextParam()) {
592
9.64k
        LLVM::Type Ty = Arg.getType();
593
9.64k
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
594
9.64k
        Builder.createStore(Arg, ArgPtr);
595
9.64k
        Local.emplace_back(Ty, ArgPtr);
596
9.64k
      }
597
598
692k
      for (const auto &Type : Locals) {
599
692k
        LLVM::Type Ty = toLLVMType(LLContext, Type);
600
692k
        LLVM::Value ArgPtr = Builder.createAlloca(Ty);
601
692k
        Builder.createStore(
602
692k
            toLLVMConstantZero(LLContext, Type, Context.CompositeTypes),
603
692k
            ArgPtr);
604
692k
        Local.emplace_back(Ty, ArgPtr);
605
692k
      }
606
11.2k
    }
607
11.2k
  }
608
609
32.9k
  LLVM::BasicBlock getTrapBB(ErrCode::Value Error) noexcept {
610
32.9k
    if (auto Iter = TrapBB.find(Error); Iter != TrapBB.end()) {
611
29.5k
      return Iter->second;
612
29.5k
    }
613
3.34k
    auto BB = LLVM::BasicBlock::create(LLContext, F.Fn, "trap");
614
3.34k
    TrapBB.emplace(Error, BB);
615
3.34k
    return BB;
616
32.9k
  }
617
618
  Expect<void>
619
  compile(const AST::CodeSegment &Code,
620
11.2k
          std::pair<std::vector<ValType>, std::vector<ValType>> Type) noexcept {
621
11.2k
    auto RetBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ret");
622
11.2k
    Type.first.clear();
623
11.2k
    enterBlock(RetBB, {}, {}, {}, std::move(Type));
624
11.2k
    EXPECTED_TRY(compile(Code.getExpr().getInstrs()));
625
11.1k
    assuming(ControlStack.empty());
626
11.1k
    compileReturn();
627
628
11.1k
    for (auto &[Error, BB] : TrapBB) {
629
3.33k
      Builder.positionAtEnd(BB);
630
3.33k
      updateInstrCount();
631
3.33k
      updateGasAtTrap();
632
3.33k
      auto CallTrap = Builder.createCall(
633
3.33k
          Context.Trap, {LLContext.getInt32(static_cast<uint32_t>(Error))});
634
3.33k
      CallTrap.addCallSiteAttribute(Context.NoReturn);
635
3.33k
      Builder.createUnreachable();
636
3.33k
    }
637
11.1k
    return {};
638
11.1k
  }
639
640
11.2k
  Expect<void> compile(AST::InstrView Instrs) noexcept {
641
1.65M
    auto Dispatch = [this](const AST::Instruction &Instr) -> Expect<void> {
642
1.65M
      switch (Instr.getOpCode()) {
643
      // Control instructions (for blocks)
644
3.48k
      case OpCode::Block: {
645
3.48k
        auto Block = LLVM::BasicBlock::create(LLContext, F.Fn, "block");
646
3.48k
        auto EndBlock = LLVM::BasicBlock::create(LLContext, F.Fn, "block.end");
647
3.48k
        Builder.createBr(Block);
648
649
3.48k
        Builder.positionAtEnd(Block);
650
3.48k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
651
3.48k
        const auto Arity = Type.first.size();
652
3.48k
        std::vector<LLVM::Value> Args(Arity);
653
3.48k
        if (isUnreachable()) {
654
1.04k
          for (size_t I = 0; I < Arity; ++I) {
655
287
            auto Ty = toLLVMType(LLContext, Type.first[I]);
656
287
            Args[I] = LLVM::Value::getUndef(Ty);
657
287
          }
658
2.72k
        } else {
659
3.10k
          for (size_t I = 0; I < Arity; ++I) {
660
377
            const size_t J = Arity - 1 - I;
661
377
            Args[J] = stackPop();
662
377
          }
663
2.72k
        }
664
3.48k
        enterBlock(EndBlock, {}, {}, std::move(Args), std::move(Type));
665
3.48k
        checkStop();
666
3.48k
        updateGas();
667
3.48k
        return {};
668
0
      }
669
1.94k
      case OpCode::Loop: {
670
1.94k
        auto Curr = Builder.getInsertBlock();
671
1.94k
        auto Loop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop");
672
1.94k
        auto EndLoop = LLVM::BasicBlock::create(LLContext, F.Fn, "loop.end");
673
1.94k
        Builder.createBr(Loop);
674
675
1.94k
        Builder.positionAtEnd(Loop);
676
1.94k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
677
1.94k
        const auto Arity = Type.first.size();
678
1.94k
        std::vector<LLVM::Value> Args(Arity);
679
1.94k
        if (isUnreachable()) {
680
1.03k
          for (size_t I = 0; I < Arity; ++I) {
681
407
            auto Ty = toLLVMType(LLContext, Type.first[I]);
682
407
            auto Value = LLVM::Value::getUndef(Ty);
683
407
            auto PHINode = Builder.createPHI(Ty);
684
407
            PHINode.addIncoming(Value, Curr);
685
407
            Args[I] = PHINode;
686
407
          }
687
1.31k
        } else {
688
1.98k
          for (size_t I = 0; I < Arity; ++I) {
689
665
            const size_t J = Arity - 1 - I;
690
665
            auto Value = stackPop();
691
665
            auto PHINode = Builder.createPHI(Value.getType());
692
665
            PHINode.addIncoming(Value, Curr);
693
665
            Args[J] = PHINode;
694
665
          }
695
1.31k
        }
696
1.94k
        enterBlock(Loop, EndLoop, {}, std::move(Args), std::move(Type));
697
1.94k
        checkStop();
698
1.94k
        updateGas();
699
1.94k
        return {};
700
0
      }
701
2.97k
      case OpCode::If: {
702
2.97k
        auto Then = LLVM::BasicBlock::create(LLContext, F.Fn, "then");
703
2.97k
        auto Else = LLVM::BasicBlock::create(LLContext, F.Fn, "else");
704
2.97k
        auto EndIf = LLVM::BasicBlock::create(LLContext, F.Fn, "if.end");
705
2.97k
        LLVM::Value Cond;
706
2.97k
        if (isUnreachable()) {
707
640
          Cond = LLVM::Value::getUndef(LLContext.getInt1Ty());
708
2.33k
        } else {
709
2.33k
          Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
710
2.33k
        }
711
2.97k
        Builder.createCondBr(Cond, Then, Else);
712
713
2.97k
        Builder.positionAtEnd(Then);
714
2.97k
        auto Type = Context.resolveBlockType(Instr.getBlockType());
715
2.97k
        const auto Arity = Type.first.size();
716
2.97k
        std::vector<LLVM::Value> Args(Arity);
717
2.97k
        if (isUnreachable()) {
718
1.09k
          for (size_t I = 0; I < Arity; ++I) {
719
453
            auto Ty = toLLVMType(LLContext, Type.first[I]);
720
453
            Args[I] = LLVM::Value::getUndef(Ty);
721
453
          }
722
2.33k
        } else {
723
3.13k
          for (size_t I = 0; I < Arity; ++I) {
724
805
            const size_t J = Arity - 1 - I;
725
805
            Args[J] = stackPop();
726
805
          }
727
2.33k
        }
728
2.97k
        enterBlock(EndIf, {}, Else, std::move(Args), std::move(Type));
729
2.97k
        return {};
730
0
      }
731
7
      case OpCode::Try_table:
732
        // TODO: EXCEPTION - implement the AOT.
733
7
        return Unexpect(ErrCode::Value::AOTNotImpl);
734
19.5k
      case OpCode::End: {
735
19.5k
        auto Entry = leaveBlock();
736
19.5k
        if (Entry.ElseBlock) {
737
1.28k
          auto Block = Builder.getInsertBlock();
738
1.28k
          Builder.positionAtEnd(Entry.ElseBlock);
739
1.28k
          enterBlock(Block, {}, {}, std::move(Entry.Args),
740
1.28k
                     std::move(Entry.Type), std::move(Entry.ReturnPHI));
741
1.28k
          Entry = leaveBlock();
742
1.28k
        }
743
19.5k
        buildPHI(Entry.Type.second, Entry.ReturnPHI);
744
19.5k
        return {};
745
0
      }
746
1.67k
      case OpCode::Else: {
747
1.67k
        auto Entry = leaveBlock();
748
1.67k
        Builder.positionAtEnd(Entry.ElseBlock);
749
1.67k
        enterBlock(Entry.JumpBlock, {}, {}, std::move(Entry.Args),
750
1.67k
                   std::move(Entry.Type), std::move(Entry.ReturnPHI));
751
1.67k
        return {};
752
0
      }
753
1.62M
      default:
754
1.62M
        break;
755
1.65M
      }
756
757
1.62M
      if (isUnreachable()) {
758
506k
        return {};
759
506k
      }
760
761
1.11M
      switch (Instr.getOpCode()) {
762
      // Control instructions
763
3.50k
      case OpCode::Unreachable:
764
3.50k
        Builder.createBr(getTrapBB(ErrCode::Value::Unreachable));
765
3.50k
        setUnreachable();
766
3.50k
        Builder.positionAtEnd(
767
3.50k
            LLVM::BasicBlock::create(LLContext, F.Fn, "unreachable.end"));
768
3.50k
        break;
769
50.3k
      case OpCode::Nop:
770
50.3k
        break;
771
1
      case OpCode::Throw:
772
2
      case OpCode::Throw_ref:
773
        // TODO: EXCEPTION - implement the AOT.
774
2
        return Unexpect(ErrCode::Value::AOTNotImpl);
775
746
      case OpCode::Br: {
776
746
        const auto Label = Instr.getJump().TargetIndex;
777
746
        setLableJumpPHI(Label);
778
746
        Builder.createBr(getLabel(Label));
779
746
        setUnreachable();
780
746
        Builder.positionAtEnd(
781
746
            LLVM::BasicBlock::create(LLContext, F.Fn, "br.end"));
782
746
        break;
783
1
      }
784
354
      case OpCode::Br_if: {
785
354
        const auto Label = Instr.getJump().TargetIndex;
786
354
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
787
354
        setLableJumpPHI(Label);
788
354
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_if.end");
789
354
        Builder.createCondBr(Cond, getLabel(Label), Next);
790
354
        Builder.positionAtEnd(Next);
791
354
        break;
792
1
      }
793
954
      case OpCode::Br_table: {
794
954
        auto LabelTable = Instr.getLabelList();
795
954
        assuming(LabelTable.size() <= std::numeric_limits<uint32_t>::max());
796
954
        const auto LabelTableSize =
797
954
            static_cast<uint32_t>(LabelTable.size() - 1);
798
954
        auto Value = stackPop();
799
954
        setLableJumpPHI(LabelTable[LabelTableSize].TargetIndex);
800
954
        auto Switch = Builder.createSwitch(
801
954
            Value, getLabel(LabelTable[LabelTableSize].TargetIndex),
802
954
            LabelTableSize);
803
20.2k
        for (uint32_t I = 0; I < LabelTableSize; ++I) {
804
19.3k
          setLableJumpPHI(LabelTable[I].TargetIndex);
805
19.3k
          Switch.addCase(LLContext.getInt32(I),
806
19.3k
                         getLabel(LabelTable[I].TargetIndex));
807
19.3k
        }
808
954
        setUnreachable();
809
954
        Builder.positionAtEnd(
810
954
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_table.end"));
811
954
        break;
812
954
      }
813
25
      case OpCode::Br_on_null: {
814
25
        const auto Label = Instr.getJump().TargetIndex;
815
25
        auto Value = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
816
25
        auto Cond = Builder.createICmpEQ(
817
25
            Builder.createExtractElement(Value, LLContext.getInt64(1)),
818
25
            LLContext.getInt64(0));
819
25
        setLableJumpPHI(Label);
820
25
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_null.end");
821
25
        Builder.createCondBr(Cond, getLabel(Label), Next);
822
25
        Builder.positionAtEnd(Next);
823
25
        stackPush(Value);
824
25
        break;
825
954
      }
826
10
      case OpCode::Br_on_non_null: {
827
10
        const auto Label = Instr.getJump().TargetIndex;
828
10
        auto Cond = Builder.createICmpNE(
829
10
            Builder.createExtractElement(
830
10
                Builder.createBitCast(Stack.back(), Context.Int64x2Ty),
831
10
                LLContext.getInt64(1)),
832
10
            LLContext.getInt64(0));
833
10
        setLableJumpPHI(Label);
834
10
        auto Next =
835
10
            LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_non_null.end");
836
10
        Builder.createCondBr(Cond, getLabel(Label), Next);
837
10
        Builder.positionAtEnd(Next);
838
10
        stackPop();
839
10
        break;
840
954
      }
841
0
      case OpCode::Br_on_cast:
842
0
      case OpCode::Br_on_cast_fail: {
843
0
        auto Ref = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
844
0
        const auto Label = Instr.getBrCast().Jump.TargetIndex;
845
0
        std::array<uint8_t, 16> Buf = {0};
846
0
        std::copy_n(Instr.getBrCast().RType2.getRawData().cbegin(), 8,
847
0
                    Buf.begin());
848
0
        auto VType = Builder.createExtractElement(
849
0
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
850
0
                                  Context.Int64x2Ty),
851
0
            LLContext.getInt64(0));
852
0
        auto IsRefTest = Builder.createCall(
853
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
854
0
                                 LLVM::Type::getFunctionType(
855
0
                                     Context.Int32Ty,
856
0
                                     {Context.Int64x2Ty, Context.Int64Ty},
857
0
                                     false)),
858
0
            {Ref, VType});
859
0
        auto Cond =
860
0
            (Instr.getOpCode() == OpCode::Br_on_cast)
861
0
                ? Builder.createICmpNE(IsRefTest, LLContext.getInt32(0))
862
0
                : Builder.createICmpEQ(IsRefTest, LLContext.getInt32(0));
863
0
        setLableJumpPHI(Label);
864
0
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "br_on_cast.end");
865
0
        Builder.createCondBr(Cond, getLabel(Label), Next);
866
0
        Builder.positionAtEnd(Next);
867
0
        break;
868
0
      }
869
692
      case OpCode::Return:
870
692
        compileReturn();
871
692
        setUnreachable();
872
692
        Builder.positionAtEnd(
873
692
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret.end"));
874
692
        break;
875
3.56k
      case OpCode::Call:
876
3.56k
        updateInstrCount();
877
3.56k
        updateGas();
878
3.56k
        compileCallOp(Instr.getTargetIndex());
879
3.56k
        break;
880
1.15k
      case OpCode::Call_indirect:
881
1.15k
        updateInstrCount();
882
1.15k
        updateGas();
883
1.15k
        compileIndirectCallOp(Instr.getSourceIndex(), Instr.getTargetIndex());
884
1.15k
        break;
885
63
      case OpCode::Return_call:
886
63
        updateInstrCount();
887
63
        updateGas();
888
63
        compileReturnCallOp(Instr.getTargetIndex());
889
63
        setUnreachable();
890
63
        Builder.positionAtEnd(
891
63
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call.end"));
892
63
        break;
893
163
      case OpCode::Return_call_indirect:
894
163
        updateInstrCount();
895
163
        updateGas();
896
163
        compileReturnIndirectCallOp(Instr.getSourceIndex(),
897
163
                                    Instr.getTargetIndex());
898
163
        setUnreachable();
899
163
        Builder.positionAtEnd(
900
163
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_indir.end"));
901
163
        break;
902
212
      case OpCode::Call_ref:
903
212
        updateInstrCount();
904
212
        updateGas();
905
212
        compileCallRefOp(Instr.getTargetIndex());
906
212
        break;
907
32
      case OpCode::Return_call_ref:
908
32
        updateInstrCount();
909
32
        updateGas();
910
32
        compileReturnCallRefOp(Instr.getTargetIndex());
911
32
        setUnreachable();
912
32
        Builder.positionAtEnd(
913
32
            LLVM::BasicBlock::create(LLContext, F.Fn, "ret_call_ref.end"));
914
32
        break;
915
0
      case OpCode::Try_table:
916
        // TODO: EXCEPTION - implement the AOT.
917
0
        return Unexpect(ErrCode::Value::AOTNotImpl);
918
919
      // Reference Instructions
920
7.25k
      case OpCode::Ref__null: {
921
7.25k
        std::array<uint8_t, 16> Buf = {0};
922
        // For null references, dynamic type downscaling is needed.
923
7.25k
        ValType VType;
924
7.25k
        if (Instr.getValType().isAbsHeapType()) {
925
6.97k
          switch (Instr.getValType().getHeapTypeCode()) {
926
25
          case TypeCode::NullFuncRef:
927
2.74k
          case TypeCode::FuncRef:
928
2.74k
            VType = TypeCode::NullFuncRef;
929
2.74k
            break;
930
55
          case TypeCode::NullExternRef:
931
3.03k
          case TypeCode::ExternRef:
932
3.03k
            VType = TypeCode::NullExternRef;
933
3.03k
            break;
934
45
          case TypeCode::NullExnRef:
935
144
          case TypeCode::ExnRef:
936
144
            VType = TypeCode::NullExnRef;
937
144
            break;
938
400
          case TypeCode::NullRef:
939
441
          case TypeCode::AnyRef:
940
928
          case TypeCode::EqRef:
941
1.00k
          case TypeCode::I31Ref:
942
1.02k
          case TypeCode::StructRef:
943
1.05k
          case TypeCode::ArrayRef:
944
1.05k
            VType = TypeCode::NullRef;
945
1.05k
            break;
946
0
          default:
947
0
            assumingUnreachable();
948
6.97k
          }
949
6.97k
        } else {
950
278
          assuming(Instr.getValType().getTypeIndex() <
951
278
                   Context.CompositeTypes.size());
952
278
          const auto *CompType =
953
278
              Context.CompositeTypes[Instr.getValType().getTypeIndex()];
954
278
          assuming(CompType != nullptr);
955
278
          if (CompType->isFunc()) {
956
262
            VType = TypeCode::NullFuncRef;
957
262
          } else {
958
16
            VType = TypeCode::NullRef;
959
16
          }
960
278
        }
961
7.25k
        std::copy_n(VType.getRawData().cbegin(), 8, Buf.begin());
962
7.25k
        stackPush(Builder.createBitCast(
963
7.25k
            LLVM::Value::getConstVector8(LLContext, Buf), Context.Int64x2Ty));
964
7.25k
        break;
965
7.25k
      }
966
3.20k
      case OpCode::Ref__is_null:
967
3.20k
        stackPush(Builder.createZExt(
968
3.20k
            Builder.createICmpEQ(
969
3.20k
                Builder.createExtractElement(
970
3.20k
                    Builder.createBitCast(stackPop(), Context.Int64x2Ty),
971
3.20k
                    LLContext.getInt64(1)),
972
3.20k
                LLContext.getInt64(0)),
973
3.20k
            Context.Int32Ty));
974
3.20k
        break;
975
27
      case OpCode::Ref__func:
976
27
        stackPush(Builder.createCall(
977
27
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefFunc,
978
27
                                 LLVM::Type::getFunctionType(Context.Int64x2Ty,
979
27
                                                             {Context.Int32Ty},
980
27
                                                             false)),
981
27
            {LLContext.getInt32(Instr.getTargetIndex())}));
982
27
        break;
983
12
      case OpCode::Ref__eq: {
984
12
        LLVM::Value RHS = stackPop();
985
12
        LLVM::Value LHS = stackPop();
986
12
        stackPush(Builder.createZExt(
987
12
            Builder.createICmpEQ(
988
12
                Builder.createExtractElement(LHS, LLContext.getInt64(1)),
989
12
                Builder.createExtractElement(RHS, LLContext.getInt64(1))),
990
12
            Context.Int32Ty));
991
12
        break;
992
7.25k
      }
993
377
      case OpCode::Ref__as_non_null: {
994
377
        auto Next =
995
377
            LLVM::BasicBlock::create(LLContext, F.Fn, "ref_as_non_null.ok");
996
377
        Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
997
377
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
998
377
            Builder.createExtractElement(Stack.back(), LLContext.getInt64(1)),
999
377
            LLContext.getInt64(0)));
1000
377
        Builder.createCondBr(IsNotNull, Next,
1001
377
                             getTrapBB(ErrCode::Value::CastNullToNonNull));
1002
377
        Builder.positionAtEnd(Next);
1003
377
        break;
1004
7.25k
      }
1005
1006
      // Reference Instructions (GC proposal)
1007
28
      case OpCode::Struct__new:
1008
62
      case OpCode::Struct__new_default: {
1009
62
        LLVM::Value Args = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
1010
62
        assuming(Instr.getTargetIndex() < Context.CompositeTypes.size());
1011
62
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
1012
62
        assuming(CompType != nullptr && !CompType->isFunc());
1013
62
        auto ArgSize = CompType->getFieldTypes().size();
1014
62
        if (Instr.getOpCode() == OpCode::Struct__new) {
1015
28
          std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
1016
29
          for (size_t I = 0; I < ArgSize; ++I) {
1017
1
            ArgsVec[ArgSize - I - 1] = stackPop();
1018
1
          }
1019
28
          Args = Builder.createArray(ArgSize, kValSize);
1020
28
          Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
1021
34
        } else {
1022
34
          ArgSize = 0;
1023
34
        }
1024
62
        stackPush(Builder.createCall(
1025
62
            Context.getIntrinsic(
1026
62
                Builder, Executable::Intrinsics::kStructNew,
1027
62
                LLVM::Type::getFunctionType(
1028
62
                    Context.Int64x2Ty,
1029
62
                    {Context.Int32Ty, Context.Int8PtrTy, Context.Int32Ty},
1030
62
                    false)),
1031
62
            {LLContext.getInt32(Instr.getTargetIndex()), Args,
1032
62
             LLContext.getInt32(static_cast<uint32_t>(ArgSize))}));
1033
62
        break;
1034
62
      }
1035
0
      case OpCode::Struct__get:
1036
0
      case OpCode::Struct__get_u:
1037
0
      case OpCode::Struct__get_s: {
1038
0
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
1039
0
                 Context.CompositeTypes.size());
1040
0
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
1041
0
        assuming(CompType != nullptr && !CompType->isFunc());
1042
0
        assuming(static_cast<size_t>(Instr.getSourceIndex()) <
1043
0
                 CompType->getFieldTypes().size());
1044
0
        const auto &StorageType =
1045
0
            CompType->getFieldTypes()[Instr.getSourceIndex()].getStorageType();
1046
0
        auto Ref = stackPop();
1047
0
        auto IsSigned = (Instr.getOpCode() == OpCode::Struct__get_s)
1048
0
                            ? LLContext.getInt8(1)
1049
0
                            : LLContext.getInt8(0);
1050
0
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
1051
0
        Builder.createCall(
1052
0
            Context.getIntrinsic(
1053
0
                Builder, Executable::Intrinsics::kStructGet,
1054
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1055
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1056
0
                                             Context.Int32Ty, Context.Int8Ty,
1057
0
                                             Context.Int8PtrTy},
1058
0
                                            false)),
1059
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1060
0
             LLContext.getInt32(Instr.getSourceIndex()), IsSigned, Ret});
1061
1062
0
        switch (StorageType.getCode()) {
1063
0
        case TypeCode::I8:
1064
0
        case TypeCode::I16:
1065
0
        case TypeCode::I32: {
1066
0
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1067
0
                                               Context.Int64x2Ty));
1068
0
          break;
1069
0
        }
1070
0
        case TypeCode::I64: {
1071
0
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1072
0
                                               Context.Int64x2Ty));
1073
0
          break;
1074
0
        }
1075
0
        case TypeCode::F32: {
1076
0
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1077
0
                                               Context.Int64x2Ty));
1078
0
          break;
1079
0
        }
1080
0
        case TypeCode::F64: {
1081
0
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1082
0
                                               Context.Int64x2Ty));
1083
0
          break;
1084
0
        }
1085
0
        case TypeCode::V128:
1086
0
        case TypeCode::Ref:
1087
0
        case TypeCode::RefNull: {
1088
0
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1089
0
                                               Context.Int64x2Ty));
1090
0
          break;
1091
0
        }
1092
0
        default:
1093
0
          assumingUnreachable();
1094
0
        }
1095
0
        break;
1096
0
      }
1097
0
      case OpCode::Struct__set: {
1098
0
        auto Val = stackPop();
1099
0
        auto Ref = stackPop();
1100
0
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1101
0
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1102
0
        Builder.createCall(
1103
0
            Context.getIntrinsic(Builder, Executable::Intrinsics::kStructSet,
1104
0
                                 LLVM::Type::getFunctionType(
1105
0
                                     Context.VoidTy,
1106
0
                                     {Context.Int64x2Ty, Context.Int32Ty,
1107
0
                                      Context.Int32Ty, Context.Int8PtrTy},
1108
0
                                     false)),
1109
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1110
0
             LLContext.getInt32(Instr.getSourceIndex()), Arg});
1111
0
        break;
1112
0
      }
1113
154
      case OpCode::Array__new: {
1114
154
        auto Length = stackPop();
1115
154
        auto Val = stackPop();
1116
154
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1117
154
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1118
154
        stackPush(Builder.createCall(
1119
154
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1120
154
                                 LLVM::Type::getFunctionType(
1121
154
                                     Context.Int64x2Ty,
1122
154
                                     {Context.Int32Ty, Context.Int32Ty,
1123
154
                                      Context.Int8PtrTy, Context.Int32Ty},
1124
154
                                     false)),
1125
154
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1126
154
             LLContext.getInt32(1)}));
1127
154
        break;
1128
0
      }
1129
23
      case OpCode::Array__new_default: {
1130
23
        auto Length = stackPop();
1131
23
        LLVM::Value Arg = LLVM::Value::getConstPointerNull(Context.Int8PtrTy);
1132
23
        stackPush(Builder.createCall(
1133
23
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1134
23
                                 LLVM::Type::getFunctionType(
1135
23
                                     Context.Int64x2Ty,
1136
23
                                     {Context.Int32Ty, Context.Int32Ty,
1137
23
                                      Context.Int8PtrTy, Context.Int32Ty},
1138
23
                                     false)),
1139
23
            {LLContext.getInt32(Instr.getTargetIndex()), Length, Arg,
1140
23
             LLContext.getInt32(0)}));
1141
23
        break;
1142
0
      }
1143
35
      case OpCode::Array__new_fixed: {
1144
35
        const auto ArgSize = Instr.getSourceIndex();
1145
35
        std::vector<LLVM::Value> ArgsVec(ArgSize, nullptr);
1146
165
        for (size_t I = 0; I < ArgSize; ++I) {
1147
130
          ArgsVec[ArgSize - I - 1] = stackPop();
1148
130
        }
1149
35
        LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
1150
35
        Builder.createArrayPtrStore(ArgsVec, Args, Context.Int8Ty, kValSize);
1151
35
        stackPush(Builder.createCall(
1152
35
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArrayNew,
1153
35
                                 LLVM::Type::getFunctionType(
1154
35
                                     Context.Int64x2Ty,
1155
35
                                     {Context.Int32Ty, Context.Int32Ty,
1156
35
                                      Context.Int8PtrTy, Context.Int32Ty},
1157
35
                                     false)),
1158
35
            {LLContext.getInt32(Instr.getTargetIndex()),
1159
35
             LLContext.getInt32(ArgSize), Args, LLContext.getInt32(ArgSize)}));
1160
35
        break;
1161
0
      }
1162
0
      case OpCode::Array__new_data:
1163
0
      case OpCode::Array__new_elem: {
1164
0
        auto Length = stackPop();
1165
0
        auto Start = stackPop();
1166
0
        stackPush(Builder.createCall(
1167
0
            Context.getIntrinsic(
1168
0
                Builder,
1169
0
                ((Instr.getOpCode() == OpCode::Array__new_data)
1170
0
                     ? Executable::Intrinsics::kArrayNewData
1171
0
                     : Executable::Intrinsics::kArrayNewElem),
1172
0
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1173
0
                                            {Context.Int32Ty, Context.Int32Ty,
1174
0
                                             Context.Int32Ty, Context.Int32Ty},
1175
0
                                            false)),
1176
0
            {LLContext.getInt32(Instr.getTargetIndex()),
1177
0
             LLContext.getInt32(Instr.getSourceIndex()), Start, Length}));
1178
0
        break;
1179
0
      }
1180
131
      case OpCode::Array__get:
1181
165
      case OpCode::Array__get_u:
1182
218
      case OpCode::Array__get_s: {
1183
218
        assuming(static_cast<size_t>(Instr.getTargetIndex()) <
1184
218
                 Context.CompositeTypes.size());
1185
218
        const auto *CompType = Context.CompositeTypes[Instr.getTargetIndex()];
1186
218
        assuming(CompType != nullptr && !CompType->isFunc());
1187
218
        assuming(static_cast<size_t>(1) == CompType->getFieldTypes().size());
1188
218
        const auto &StorageType = CompType->getFieldTypes()[0].getStorageType();
1189
218
        auto Idx = stackPop();
1190
218
        auto Ref = stackPop();
1191
218
        auto IsSigned = (Instr.getOpCode() == OpCode::Array__get_s)
1192
218
                            ? LLContext.getInt8(1)
1193
218
                            : LLContext.getInt8(0);
1194
218
        LLVM::Value Ret = Builder.createAlloca(Context.Int64x2Ty);
1195
218
        Builder.createCall(
1196
218
            Context.getIntrinsic(
1197
218
                Builder, Executable::Intrinsics::kArrayGet,
1198
218
                LLVM::Type::getFunctionType(Context.VoidTy,
1199
218
                                            {Context.Int64x2Ty, Context.Int32Ty,
1200
218
                                             Context.Int32Ty, Context.Int8Ty,
1201
218
                                             Context.Int8PtrTy},
1202
218
                                            false)),
1203
218
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, IsSigned,
1204
218
             Ret});
1205
1206
218
        switch (StorageType.getCode()) {
1207
38
        case TypeCode::I8:
1208
87
        case TypeCode::I16:
1209
110
        case TypeCode::I32: {
1210
110
          stackPush(Builder.createValuePtrLoad(Context.Int32Ty, Ret,
1211
110
                                               Context.Int64x2Ty));
1212
110
          break;
1213
87
        }
1214
21
        case TypeCode::I64: {
1215
21
          stackPush(Builder.createValuePtrLoad(Context.Int64Ty, Ret,
1216
21
                                               Context.Int64x2Ty));
1217
21
          break;
1218
87
        }
1219
20
        case TypeCode::F32: {
1220
20
          stackPush(Builder.createValuePtrLoad(Context.FloatTy, Ret,
1221
20
                                               Context.Int64x2Ty));
1222
20
          break;
1223
87
        }
1224
26
        case TypeCode::F64: {
1225
26
          stackPush(Builder.createValuePtrLoad(Context.DoubleTy, Ret,
1226
26
                                               Context.Int64x2Ty));
1227
26
          break;
1228
87
        }
1229
18
        case TypeCode::V128:
1230
18
        case TypeCode::Ref:
1231
41
        case TypeCode::RefNull: {
1232
41
          stackPush(Builder.createValuePtrLoad(Context.Int64x2Ty, Ret,
1233
41
                                               Context.Int64x2Ty));
1234
41
          break;
1235
18
        }
1236
0
        default:
1237
0
          assumingUnreachable();
1238
218
        }
1239
218
        break;
1240
218
      }
1241
218
      case OpCode::Array__set: {
1242
41
        auto Val = stackPop();
1243
41
        auto Idx = stackPop();
1244
41
        auto Ref = stackPop();
1245
41
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1246
41
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1247
41
        Builder.createCall(
1248
41
            Context.getIntrinsic(Builder, Executable::Intrinsics::kArraySet,
1249
41
                                 LLVM::Type::getFunctionType(
1250
41
                                     Context.VoidTy,
1251
41
                                     {Context.Int64x2Ty, Context.Int32Ty,
1252
41
                                      Context.Int32Ty, Context.Int8PtrTy},
1253
41
                                     false)),
1254
41
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Idx, Arg});
1255
41
        break;
1256
218
      }
1257
66
      case OpCode::Array__len: {
1258
66
        auto Ref = stackPop();
1259
66
        stackPush(Builder.createCall(
1260
66
            Context.getIntrinsic(
1261
66
                Builder, Executable::Intrinsics::kArrayLen,
1262
66
                LLVM::Type::getFunctionType(Context.Int32Ty,
1263
66
                                            {Context.Int64x2Ty}, false)),
1264
66
            {Ref}));
1265
66
        break;
1266
218
      }
1267
10
      case OpCode::Array__fill: {
1268
10
        auto Cnt = stackPop();
1269
10
        auto Val = stackPop();
1270
10
        auto Off = stackPop();
1271
10
        auto Ref = stackPop();
1272
10
        LLVM::Value Arg = Builder.createAlloca(Context.Int64x2Ty);
1273
10
        Builder.createValuePtrStore(Val, Arg, Context.Int64x2Ty);
1274
10
        Builder.createCall(
1275
10
            Context.getIntrinsic(
1276
10
                Builder, Executable::Intrinsics::kArrayFill,
1277
10
                LLVM::Type::getFunctionType(Context.VoidTy,
1278
10
                                            {Context.Int64x2Ty, Context.Int32Ty,
1279
10
                                             Context.Int32Ty, Context.Int32Ty,
1280
10
                                             Context.Int8PtrTy},
1281
10
                                            false)),
1282
10
            {Ref, LLContext.getInt32(Instr.getTargetIndex()), Off, Cnt, Arg});
1283
10
        break;
1284
218
      }
1285
10
      case OpCode::Array__copy: {
1286
10
        auto Cnt = stackPop();
1287
10
        auto SrcOff = stackPop();
1288
10
        auto SrcRef = stackPop();
1289
10
        auto DstOff = stackPop();
1290
10
        auto DstRef = stackPop();
1291
10
        Builder.createCall(
1292
10
            Context.getIntrinsic(
1293
10
                Builder, Executable::Intrinsics::kArrayCopy,
1294
10
                LLVM::Type::getFunctionType(Context.VoidTy,
1295
10
                                            {Context.Int64x2Ty, Context.Int32Ty,
1296
10
                                             Context.Int32Ty, Context.Int64x2Ty,
1297
10
                                             Context.Int32Ty, Context.Int32Ty,
1298
10
                                             Context.Int32Ty},
1299
10
                                            false)),
1300
10
            {DstRef, LLContext.getInt32(Instr.getTargetIndex()), DstOff, SrcRef,
1301
10
             LLContext.getInt32(Instr.getSourceIndex()), SrcOff, Cnt});
1302
10
        break;
1303
218
      }
1304
0
      case OpCode::Array__init_data:
1305
0
      case OpCode::Array__init_elem: {
1306
0
        auto Cnt = stackPop();
1307
0
        auto SrcOff = stackPop();
1308
0
        auto DstOff = stackPop();
1309
0
        auto Ref = stackPop();
1310
0
        Builder.createCall(
1311
0
            Context.getIntrinsic(
1312
0
                Builder,
1313
0
                ((Instr.getOpCode() == OpCode::Array__init_data)
1314
0
                     ? Executable::Intrinsics::kArrayInitData
1315
0
                     : Executable::Intrinsics::kArrayInitElem),
1316
0
                LLVM::Type::getFunctionType(Context.VoidTy,
1317
0
                                            {Context.Int64x2Ty, Context.Int32Ty,
1318
0
                                             Context.Int32Ty, Context.Int32Ty,
1319
0
                                             Context.Int32Ty, Context.Int32Ty},
1320
0
                                            false)),
1321
0
            {Ref, LLContext.getInt32(Instr.getTargetIndex()),
1322
0
             LLContext.getInt32(Instr.getSourceIndex()), DstOff, SrcOff, Cnt});
1323
0
        break;
1324
0
      }
1325
11
      case OpCode::Ref__test:
1326
39
      case OpCode::Ref__test_null: {
1327
39
        auto Ref = stackPop();
1328
39
        std::array<uint8_t, 16> Buf = {0};
1329
39
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1330
39
        auto VType = Builder.createExtractElement(
1331
39
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1332
39
                                  Context.Int64x2Ty),
1333
39
            LLContext.getInt64(0));
1334
39
        stackPush(Builder.createCall(
1335
39
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefTest,
1336
39
                                 LLVM::Type::getFunctionType(
1337
39
                                     Context.Int32Ty,
1338
39
                                     {Context.Int64x2Ty, Context.Int64Ty},
1339
39
                                     false)),
1340
39
            {Ref, VType}));
1341
39
        break;
1342
11
      }
1343
39
      case OpCode::Ref__cast:
1344
54
      case OpCode::Ref__cast_null: {
1345
54
        auto Ref = stackPop();
1346
54
        std::array<uint8_t, 16> Buf = {0};
1347
54
        std::copy_n(Instr.getValType().getRawData().cbegin(), 8, Buf.begin());
1348
54
        auto VType = Builder.createExtractElement(
1349
54
            Builder.createBitCast(LLVM::Value::getConstVector8(LLContext, Buf),
1350
54
                                  Context.Int64x2Ty),
1351
54
            LLContext.getInt64(0));
1352
54
        stackPush(Builder.createCall(
1353
54
            Context.getIntrinsic(Builder, Executable::Intrinsics::kRefCast,
1354
54
                                 LLVM::Type::getFunctionType(
1355
54
                                     Context.Int64x2Ty,
1356
54
                                     {Context.Int64x2Ty, Context.Int64Ty},
1357
54
                                     false)),
1358
54
            {Ref, VType}));
1359
54
        break;
1360
39
      }
1361
11
      case OpCode::Any__convert_extern: {
1362
11
        std::array<uint8_t, 16> RawRef = {0};
1363
11
        auto Ref = stackPop();
1364
11
        auto PtrVal = Builder.createExtractElement(Ref, LLContext.getInt64(1));
1365
11
        auto IsNullBB =
1366
11
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.null");
1367
11
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1368
11
                                                  "any_conv_extern.not_null");
1369
11
        auto IsExtrefBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1370
11
                                                   "any_conv_extern.is_extref");
1371
11
        auto EndBB =
1372
11
            LLVM::BasicBlock::create(LLContext, F.Fn, "any_conv_extern.end");
1373
11
        auto CondIsNull = Builder.createICmpEQ(PtrVal, LLContext.getInt64(0));
1374
11
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1375
1376
11
        Builder.positionAtEnd(IsNullBB);
1377
11
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullRef);
1378
11
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1379
11
        auto Ret1 = Builder.createBitCast(
1380
11
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1381
11
        Builder.createBr(EndBB);
1382
1383
11
        Builder.positionAtEnd(NotNullBB);
1384
11
        auto Ret2 = Builder.createBitCast(
1385
11
            Builder.createInsertElement(
1386
11
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1387
11
                LLContext.getInt8(0), LLContext.getInt64(1)),
1388
11
            Context.Int64x2Ty);
1389
11
        auto HType = Builder.createExtractElement(
1390
11
            Builder.createBitCast(Ret2, Context.Int8x16Ty),
1391
11
            LLContext.getInt64(3));
1392
11
        auto CondIsExtref = Builder.createOr(
1393
11
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1394
11
                                            TypeCode::ExternRef))),
1395
11
            Builder.createICmpEQ(HType, LLContext.getInt8(static_cast<uint8_t>(
1396
11
                                            TypeCode::NullExternRef))));
1397
11
        Builder.createCondBr(CondIsExtref, IsExtrefBB, EndBB);
1398
1399
11
        Builder.positionAtEnd(IsExtrefBB);
1400
11
        VT = ValType(TypeCode::Ref, TypeCode::AnyRef);
1401
11
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1402
11
        auto Ret3 = Builder.createInsertElement(
1403
11
            Builder.createBitCast(
1404
11
                LLVM::Value::getConstVector8(LLContext, RawRef),
1405
11
                Context.Int64x2Ty),
1406
11
            PtrVal, LLContext.getInt64(1));
1407
11
        Builder.createBr(EndBB);
1408
1409
11
        Builder.positionAtEnd(EndBB);
1410
11
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1411
11
        Ret.addIncoming(Ret1, IsNullBB);
1412
11
        Ret.addIncoming(Ret2, NotNullBB);
1413
11
        Ret.addIncoming(Ret3, IsExtrefBB);
1414
11
        stackPush(Ret);
1415
11
        break;
1416
39
      }
1417
41
      case OpCode::Extern__convert_any: {
1418
41
        std::array<uint8_t, 16> RawRef = {0};
1419
41
        auto Ref = stackPop();
1420
41
        auto IsNullBB =
1421
41
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.null");
1422
41
        auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn,
1423
41
                                                  "extern_conv_any.not_null");
1424
41
        auto EndBB =
1425
41
            LLVM::BasicBlock::create(LLContext, F.Fn, "extern_conv_any.end");
1426
41
        auto CondIsNull = Builder.createICmpEQ(
1427
41
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1428
41
            LLContext.getInt64(0));
1429
41
        Builder.createCondBr(CondIsNull, IsNullBB, NotNullBB);
1430
1431
41
        Builder.positionAtEnd(IsNullBB);
1432
41
        auto VT = ValType(TypeCode::RefNull, TypeCode::NullExternRef);
1433
41
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1434
41
        auto Ret1 = Builder.createBitCast(
1435
41
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1436
41
        Builder.createBr(EndBB);
1437
1438
41
        Builder.positionAtEnd(NotNullBB);
1439
41
        auto Ret2 = Builder.createBitCast(
1440
41
            Builder.createInsertElement(
1441
41
                Builder.createBitCast(Ref, Context.Int8x16Ty),
1442
41
                LLContext.getInt8(1), LLContext.getInt64(1)),
1443
41
            Context.Int64x2Ty);
1444
41
        Builder.createBr(EndBB);
1445
1446
41
        Builder.positionAtEnd(EndBB);
1447
41
        auto Ret = Builder.createPHI(Context.Int64x2Ty);
1448
41
        Ret.addIncoming(Ret1, IsNullBB);
1449
41
        Ret.addIncoming(Ret2, NotNullBB);
1450
41
        stackPush(Ret);
1451
41
        break;
1452
39
      }
1453
91
      case OpCode::Ref__i31: {
1454
91
        std::array<uint8_t, 16> RawRef = {0};
1455
91
        auto VT = ValType(TypeCode::Ref, TypeCode::I31Ref);
1456
91
        std::copy_n(VT.getRawData().cbegin(), 8, RawRef.begin());
1457
91
        auto Ref = Builder.createBitCast(
1458
91
            LLVM::Value::getConstVector8(LLContext, RawRef), Context.Int64x2Ty);
1459
91
        auto Val = Builder.createZExt(
1460
91
            Builder.createOr(
1461
91
                Builder.createAnd(stackPop(), LLContext.getInt32(0x7FFFFFFFU)),
1462
91
                LLContext.getInt32(0x80000000U)),
1463
91
            Context.Int64Ty);
1464
91
        stackPush(Builder.createInsertElement(Ref, Val, LLContext.getInt64(1)));
1465
91
        break;
1466
39
      }
1467
41
      case OpCode::I31__get_s: {
1468
41
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1469
41
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1470
41
        auto Val = Builder.createTrunc(
1471
41
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1472
41
            Context.Int32Ty);
1473
41
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1474
41
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1475
41
            LLContext.getInt32(0)));
1476
41
        Builder.createCondBr(IsNotNull, Next,
1477
41
                             getTrapBB(ErrCode::Value::AccessNullI31));
1478
41
        Builder.positionAtEnd(Next);
1479
41
        Val = Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU));
1480
41
        stackPush(Builder.createOr(
1481
41
            Val, Builder.createShl(
1482
41
                     Builder.createAnd(Val, LLContext.getInt32(0x40000000U)),
1483
41
                     LLContext.getInt32(1))));
1484
41
        break;
1485
39
      }
1486
19
      case OpCode::I31__get_u: {
1487
19
        auto Next = LLVM::BasicBlock::create(LLContext, F.Fn, "i31.get.ok");
1488
19
        auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
1489
19
        auto Val = Builder.createTrunc(
1490
19
            Builder.createExtractElement(Ref, LLContext.getInt64(1)),
1491
19
            Context.Int32Ty);
1492
19
        auto IsNotNull = Builder.createLikely(Builder.createICmpNE(
1493
19
            Builder.createAnd(Val, LLContext.getInt32(0x80000000U)),
1494
19
            LLContext.getInt32(0)));
1495
19
        Builder.createCondBr(IsNotNull, Next,
1496
19
                             getTrapBB(ErrCode::Value::AccessNullI31));
1497
19
        Builder.positionAtEnd(Next);
1498
19
        stackPush(Builder.createAnd(Val, LLContext.getInt32(0x7FFFFFFFU)));
1499
19
        break;
1500
39
      }
1501
1502
      // Parametric Instructions
1503
3.42k
      case OpCode::Drop:
1504
3.42k
        stackPop();
1505
3.42k
        break;
1506
691
      case OpCode::Select:
1507
1.11k
      case OpCode::Select_t: {
1508
1.11k
        auto Cond = Builder.createICmpNE(stackPop(), LLContext.getInt32(0));
1509
1.11k
        auto False = stackPop();
1510
1.11k
        auto True = stackPop();
1511
1.11k
        stackPush(Builder.createSelect(Cond, True, False));
1512
1.11k
        break;
1513
691
      }
1514
1515
      // Variable Instructions
1516
11.4k
      case OpCode::Local__get: {
1517
11.4k
        const auto &L = Local[Instr.getTargetIndex()];
1518
11.4k
        stackPush(Builder.createLoad(L.first, L.second));
1519
11.4k
        break;
1520
691
      }
1521
3.95k
      case OpCode::Local__set:
1522
3.95k
        Builder.createStore(stackPop(), Local[Instr.getTargetIndex()].second);
1523
3.95k
        break;
1524
796
      case OpCode::Local__tee:
1525
796
        Builder.createStore(Stack.back(), Local[Instr.getTargetIndex()].second);
1526
796
        break;
1527
373
      case OpCode::Global__get: {
1528
373
        const auto G =
1529
373
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex());
1530
373
        stackPush(Builder.createLoad(G.first, G.second));
1531
373
        break;
1532
691
      }
1533
95
      case OpCode::Global__set:
1534
95
        Builder.createStore(
1535
95
            stackPop(),
1536
95
            Context.getGlobal(Builder, ExecCtx, Instr.getTargetIndex()).second);
1537
95
        break;
1538
1539
      // Table Instructions
1540
40
      case OpCode::Table__get: {
1541
40
        auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
1542
40
        stackPush(Builder.createCall(
1543
40
            Context.getIntrinsic(
1544
40
                Builder, Executable::Intrinsics::kTableGet,
1545
40
                LLVM::Type::getFunctionType(Context.Int64x2Ty,
1546
40
                                            {Context.Int32Ty, Context.Int64Ty},
1547
40
                                            false)),
1548
40
            {LLContext.getInt32(Instr.getTargetIndex()), Off}));
1549
40
        break;
1550
691
      }
1551
32
      case OpCode::Table__set: {
1552
32
        auto Ref = stackPop();
1553
32
        auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
1554
32
        Builder.createCall(
1555
32
            Context.getIntrinsic(
1556
32
                Builder, Executable::Intrinsics::kTableSet,
1557
32
                LLVM::Type::getFunctionType(
1558
32
                    Context.Int64Ty,
1559
32
                    {Context.Int32Ty, Context.Int64Ty, Context.Int64x2Ty},
1560
32
                    false)),
1561
32
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Ref});
1562
32
        break;
1563
691
      }
1564
24
      case OpCode::Table__init: {
1565
24
        auto Len = stackPop();
1566
24
        auto Src = stackPop();
1567
24
        auto Dst = Builder.createZExt(stackPop(), Context.Int64Ty);
1568
24
        Builder.createCall(
1569
24
            Context.getIntrinsic(
1570
24
                Builder, Executable::Intrinsics::kTableInit,
1571
24
                LLVM::Type::getFunctionType(Context.VoidTy,
1572
24
                                            {Context.Int32Ty, Context.Int32Ty,
1573
24
                                             Context.Int64Ty, Context.Int32Ty,
1574
24
                                             Context.Int32Ty},
1575
24
                                            false)),
1576
24
            {LLContext.getInt32(Instr.getTargetIndex()),
1577
24
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1578
24
        break;
1579
691
      }
1580
33
      case OpCode::Elem__drop: {
1581
33
        Builder.createCall(
1582
33
            Context.getIntrinsic(Builder, Executable::Intrinsics::kElemDrop,
1583
33
                                 LLVM::Type::getFunctionType(
1584
33
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1585
33
            {LLContext.getInt32(Instr.getTargetIndex())});
1586
33
        break;
1587
691
      }
1588
16
      case OpCode::Table__copy: {
1589
16
        auto Len = Builder.createZExt(stackPop(), Context.Int64Ty);
1590
16
        auto Src = Builder.createZExt(stackPop(), Context.Int64Ty);
1591
16
        auto Dst = Builder.createZExt(stackPop(), Context.Int64Ty);
1592
16
        Builder.createCall(
1593
16
            Context.getIntrinsic(
1594
16
                Builder, Executable::Intrinsics::kTableCopy,
1595
16
                LLVM::Type::getFunctionType(Context.VoidTy,
1596
16
                                            {Context.Int32Ty, Context.Int32Ty,
1597
16
                                             Context.Int64Ty, Context.Int64Ty,
1598
16
                                             Context.Int64Ty},
1599
16
                                            false)),
1600
16
            {LLContext.getInt32(Instr.getTargetIndex()),
1601
16
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1602
16
        break;
1603
691
      }
1604
15
      case OpCode::Table__grow: {
1605
15
        auto NewSize = Builder.createZExt(stackPop(), Context.Int64Ty);
1606
15
        auto Val = stackPop();
1607
15
        stackPush(Builder.createTrunc(
1608
15
            Builder.createCall(
1609
15
                Context.getIntrinsic(
1610
15
                    Builder, Executable::Intrinsics::kTableGrow,
1611
15
                    LLVM::Type::getFunctionType(
1612
15
                        Context.Int64Ty,
1613
15
                        {Context.Int32Ty, Context.Int64x2Ty, Context.Int64Ty},
1614
15
                        false)),
1615
15
                {LLContext.getInt32(Instr.getTargetIndex()), Val, NewSize}),
1616
15
            Context.TableAddrTypes[Instr.getTargetIndex()]));
1617
15
        break;
1618
691
      }
1619
16
      case OpCode::Table__size: {
1620
16
        stackPush(Builder.createTrunc(
1621
16
            Builder.createCall(
1622
16
                Context.getIntrinsic(
1623
16
                    Builder, Executable::Intrinsics::kTableSize,
1624
16
                    LLVM::Type::getFunctionType(Context.Int64Ty,
1625
16
                                                {Context.Int32Ty}, false)),
1626
16
                {LLContext.getInt32(Instr.getTargetIndex())}),
1627
16
            Context.TableAddrTypes[Instr.getTargetIndex()]));
1628
16
        break;
1629
691
      }
1630
3
      case OpCode::Table__fill: {
1631
3
        auto Len = Builder.createZExt(stackPop(), Context.Int64Ty);
1632
3
        auto Val = stackPop();
1633
3
        auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
1634
3
        Builder.createCall(
1635
3
            Context.getIntrinsic(Builder, Executable::Intrinsics::kTableFill,
1636
3
                                 LLVM::Type::getFunctionType(
1637
3
                                     Context.Int32Ty,
1638
3
                                     {Context.Int32Ty, Context.Int64Ty,
1639
3
                                      Context.Int64x2Ty, Context.Int64Ty},
1640
3
                                     false)),
1641
3
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1642
3
        break;
1643
691
      }
1644
1645
      // Memory Instructions
1646
1.29k
      case OpCode::I32__load:
1647
1.29k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1648
1.29k
                      Instr.getMemoryAlign(), Context.Int32Ty);
1649
1.29k
        break;
1650
3.31k
      case OpCode::I64__load:
1651
3.31k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1652
3.31k
                      Instr.getMemoryAlign(), Context.Int64Ty);
1653
3.31k
        break;
1654
111
      case OpCode::F32__load:
1655
111
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1656
111
                      Instr.getMemoryAlign(), Context.FloatTy);
1657
111
        break;
1658
231
      case OpCode::F64__load:
1659
231
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1660
231
                      Instr.getMemoryAlign(), Context.DoubleTy);
1661
231
        break;
1662
691
      case OpCode::I32__load8_s:
1663
691
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1664
691
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1665
691
                      true);
1666
691
        break;
1667
436
      case OpCode::I32__load8_u:
1668
436
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1669
436
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int32Ty,
1670
436
                      false);
1671
436
        break;
1672
520
      case OpCode::I32__load16_s:
1673
520
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1674
520
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1675
520
                      true);
1676
520
        break;
1677
1.58k
      case OpCode::I32__load16_u:
1678
1.58k
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1679
1.58k
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int32Ty,
1680
1.58k
                      false);
1681
1.58k
        break;
1682
798
      case OpCode::I64__load8_s:
1683
798
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1684
798
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1685
798
                      true);
1686
798
        break;
1687
458
      case OpCode::I64__load8_u:
1688
458
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1689
458
                      Instr.getMemoryAlign(), Context.Int8Ty, Context.Int64Ty,
1690
458
                      false);
1691
458
        break;
1692
417
      case OpCode::I64__load16_s:
1693
417
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1694
417
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1695
417
                      true);
1696
417
        break;
1697
678
      case OpCode::I64__load16_u:
1698
678
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1699
678
                      Instr.getMemoryAlign(), Context.Int16Ty, Context.Int64Ty,
1700
678
                      false);
1701
678
        break;
1702
429
      case OpCode::I64__load32_s:
1703
429
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1704
429
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1705
429
                      true);
1706
429
        break;
1707
503
      case OpCode::I64__load32_u:
1708
503
        compileLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1709
503
                      Instr.getMemoryAlign(), Context.Int32Ty, Context.Int64Ty,
1710
503
                      false);
1711
503
        break;
1712
421
      case OpCode::I32__store:
1713
421
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1714
421
                       Instr.getMemoryAlign(), Context.Int32Ty);
1715
421
        break;
1716
1.54k
      case OpCode::I64__store:
1717
1.54k
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1718
1.54k
                       Instr.getMemoryAlign(), Context.Int64Ty);
1719
1.54k
        break;
1720
94
      case OpCode::F32__store:
1721
94
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1722
94
                       Instr.getMemoryAlign(), Context.FloatTy);
1723
94
        break;
1724
54
      case OpCode::F64__store:
1725
54
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1726
54
                       Instr.getMemoryAlign(), Context.DoubleTy);
1727
54
        break;
1728
327
      case OpCode::I32__store8:
1729
347
      case OpCode::I64__store8:
1730
347
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1731
347
                       Instr.getMemoryAlign(), Context.Int8Ty, true);
1732
347
        break;
1733
239
      case OpCode::I32__store16:
1734
352
      case OpCode::I64__store16:
1735
352
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1736
352
                       Instr.getMemoryAlign(), Context.Int16Ty, true);
1737
352
        break;
1738
35
      case OpCode::I64__store32:
1739
35
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
1740
35
                       Instr.getMemoryAlign(), Context.Int32Ty, true);
1741
35
        break;
1742
936
      case OpCode::Memory__size:
1743
936
        stackPush(Builder.createTrunc(
1744
936
            Builder.createCall(
1745
936
                Context.getIntrinsic(
1746
936
                    Builder, Executable::Intrinsics::kMemSize,
1747
936
                    LLVM::Type::getFunctionType(Context.Int64Ty,
1748
936
                                                {Context.Int32Ty}, false)),
1749
936
                {LLContext.getInt32(Instr.getTargetIndex())}),
1750
936
            Context.MemoryAddrTypes[Instr.getTargetIndex()]));
1751
936
        break;
1752
878
      case OpCode::Memory__grow: {
1753
878
        auto NewPageSize = Builder.createZExt(stackPop(), Context.Int64Ty);
1754
878
        stackPush(Builder.createTrunc(
1755
878
            Builder.createCall(
1756
878
                Context.getIntrinsic(Builder, Executable::Intrinsics::kMemGrow,
1757
878
                                     LLVM::Type::getFunctionType(
1758
878
                                         Context.Int64Ty,
1759
878
                                         {Context.Int32Ty, Context.Int64Ty},
1760
878
                                         false)),
1761
878
                {LLContext.getInt32(Instr.getTargetIndex()), NewPageSize}),
1762
878
            Context.MemoryAddrTypes[Instr.getTargetIndex()]));
1763
878
        break;
1764
239
      }
1765
22
      case OpCode::Memory__init: {
1766
22
        auto Len = stackPop();
1767
22
        auto Src = stackPop();
1768
22
        auto Dst = Builder.createZExt(stackPop(), Context.Int64Ty);
1769
22
        Builder.createCall(
1770
22
            Context.getIntrinsic(
1771
22
                Builder, Executable::Intrinsics::kMemInit,
1772
22
                LLVM::Type::getFunctionType(Context.VoidTy,
1773
22
                                            {Context.Int32Ty, Context.Int32Ty,
1774
22
                                             Context.Int64Ty, Context.Int32Ty,
1775
22
                                             Context.Int32Ty},
1776
22
                                            false)),
1777
22
            {LLContext.getInt32(Instr.getTargetIndex()),
1778
22
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1779
22
        break;
1780
239
      }
1781
22
      case OpCode::Data__drop: {
1782
22
        Builder.createCall(
1783
22
            Context.getIntrinsic(Builder, Executable::Intrinsics::kDataDrop,
1784
22
                                 LLVM::Type::getFunctionType(
1785
22
                                     Context.VoidTy, {Context.Int32Ty}, false)),
1786
22
            {LLContext.getInt32(Instr.getTargetIndex())});
1787
22
        break;
1788
239
      }
1789
249
      case OpCode::Memory__copy: {
1790
249
        auto Len = Builder.createZExt(stackPop(), Context.Int64Ty);
1791
249
        auto Src = Builder.createZExt(stackPop(), Context.Int64Ty);
1792
249
        auto Dst = Builder.createZExt(stackPop(), Context.Int64Ty);
1793
249
        Builder.createCall(
1794
249
            Context.getIntrinsic(
1795
249
                Builder, Executable::Intrinsics::kMemCopy,
1796
249
                LLVM::Type::getFunctionType(Context.VoidTy,
1797
249
                                            {Context.Int32Ty, Context.Int32Ty,
1798
249
                                             Context.Int64Ty, Context.Int64Ty,
1799
249
                                             Context.Int64Ty},
1800
249
                                            false)),
1801
249
            {LLContext.getInt32(Instr.getTargetIndex()),
1802
249
             LLContext.getInt32(Instr.getSourceIndex()), Dst, Src, Len});
1803
249
        break;
1804
239
      }
1805
633
      case OpCode::Memory__fill: {
1806
633
        auto Len = Builder.createZExt(stackPop(), Context.Int64Ty);
1807
633
        auto Val = Builder.createTrunc(stackPop(), Context.Int8Ty);
1808
633
        auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
1809
633
        Builder.createCall(
1810
633
            Context.getIntrinsic(
1811
633
                Builder, Executable::Intrinsics::kMemFill,
1812
633
                LLVM::Type::getFunctionType(Context.VoidTy,
1813
633
                                            {Context.Int32Ty, Context.Int64Ty,
1814
633
                                             Context.Int8Ty, Context.Int64Ty},
1815
633
                                            false)),
1816
633
            {LLContext.getInt32(Instr.getTargetIndex()), Off, Val, Len});
1817
633
        break;
1818
239
      }
1819
1820
      // Const Numeric Instructions
1821
617k
      case OpCode::I32__const:
1822
617k
        stackPush(LLContext.getInt32(Instr.getNum().get<uint32_t>()));
1823
617k
        break;
1824
74.5k
      case OpCode::I64__const:
1825
74.5k
        stackPush(LLContext.getInt64(Instr.getNum().get<uint64_t>()));
1826
74.5k
        break;
1827
16.8k
      case OpCode::F32__const:
1828
16.8k
        stackPush(LLContext.getFloat(Instr.getNum().get<float>()));
1829
16.8k
        break;
1830
7.31k
      case OpCode::F64__const:
1831
7.31k
        stackPush(LLContext.getDouble(Instr.getNum().get<double>()));
1832
7.31k
        break;
1833
1834
      // Unary Numeric Instructions
1835
7.74k
      case OpCode::I32__eqz:
1836
7.74k
        stackPush(Builder.createZExt(
1837
7.74k
            Builder.createICmpEQ(stackPop(), LLContext.getInt32(0)),
1838
7.74k
            Context.Int32Ty));
1839
7.74k
        break;
1840
1.24k
      case OpCode::I64__eqz:
1841
1.24k
        stackPush(Builder.createZExt(
1842
1.24k
            Builder.createICmpEQ(stackPop(), LLContext.getInt64(0)),
1843
1.24k
            Context.Int32Ty));
1844
1.24k
        break;
1845
3.15k
      case OpCode::I32__clz:
1846
3.15k
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1847
3.15k
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int32Ty},
1848
3.15k
                                          {stackPop(), LLContext.getFalse()}));
1849
3.15k
        break;
1850
374
      case OpCode::I64__clz:
1851
374
        assuming(LLVM::Core::Ctlz != LLVM::Core::NotIntrinsic);
1852
374
        stackPush(Builder.createIntrinsic(LLVM::Core::Ctlz, {Context.Int64Ty},
1853
374
                                          {stackPop(), LLContext.getFalse()}));
1854
374
        break;
1855
1.70k
      case OpCode::I32__ctz:
1856
1.70k
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1857
1.70k
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int32Ty},
1858
1.70k
                                          {stackPop(), LLContext.getFalse()}));
1859
1.70k
        break;
1860
574
      case OpCode::I64__ctz:
1861
574
        assuming(LLVM::Core::Cttz != LLVM::Core::NotIntrinsic);
1862
574
        stackPush(Builder.createIntrinsic(LLVM::Core::Cttz, {Context.Int64Ty},
1863
574
                                          {stackPop(), LLContext.getFalse()}));
1864
574
        break;
1865
18.7k
      case OpCode::I32__popcnt:
1866
20.7k
      case OpCode::I64__popcnt:
1867
20.7k
        assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
1868
20.7k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, stackPop()));
1869
20.7k
        break;
1870
826
      case OpCode::F32__abs:
1871
1.61k
      case OpCode::F64__abs:
1872
1.61k
        assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
1873
1.61k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Fabs, stackPop()));
1874
1.61k
        break;
1875
860
      case OpCode::F32__neg:
1876
1.47k
      case OpCode::F64__neg:
1877
1.47k
        stackPush(Builder.createFNeg(stackPop()));
1878
1.47k
        break;
1879
1.49k
      case OpCode::F32__ceil:
1880
3.90k
      case OpCode::F64__ceil:
1881
3.90k
        assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
1882
3.90k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Ceil, stackPop()));
1883
3.90k
        break;
1884
690
      case OpCode::F32__floor:
1885
1.07k
      case OpCode::F64__floor:
1886
1.07k
        assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
1887
1.07k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Floor, stackPop()));
1888
1.07k
        break;
1889
554
      case OpCode::F32__trunc:
1890
850
      case OpCode::F64__trunc:
1891
850
        assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
1892
850
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Trunc, stackPop()));
1893
850
        break;
1894
661
      case OpCode::F32__nearest:
1895
1.06k
      case OpCode::F64__nearest: {
1896
1.06k
        const bool IsFloat = Instr.getOpCode() == OpCode::F32__nearest;
1897
1.06k
        LLVM::Value Value = stackPop();
1898
1899
1.06k
#if LLVM_VERSION_MAJOR >= 12 && !defined(__s390x__)
1900
1.06k
        assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
1901
1.06k
        if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
1902
1.06k
          stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, Value));
1903
1.06k
          break;
1904
1.06k
        }
1905
0
#endif
1906
1907
        // The VectorSize is only used when SSE4_1 or NEON is supported.
1908
0
        [[maybe_unused]] const uint32_t VectorSize = IsFloat ? 4 : 2;
1909
0
#if defined(__x86_64__)
1910
0
        if (Context.SupportSSE4_1) {
1911
0
          auto Zero = LLContext.getInt64(0);
1912
0
          auto VectorTy =
1913
0
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1914
0
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1915
0
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1916
0
          auto ID = IsFloat ? LLVM::Core::X86SSE41RoundSs
1917
0
                            : LLVM::Core::X86SSE41RoundSd;
1918
0
          assuming(ID != LLVM::Core::NotIntrinsic);
1919
0
          Ret = Builder.createIntrinsic(ID, {},
1920
0
                                        {Ret, Ret, LLContext.getInt32(8)});
1921
0
          Ret = Builder.createExtractElement(Ret, Zero);
1922
0
          stackPush(Ret);
1923
0
          break;
1924
0
        }
1925
0
#endif
1926
1927
#if defined(__aarch64__)
1928
        if (Context.SupportNEON &&
1929
            LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
1930
          auto Zero = LLContext.getInt64(0);
1931
          auto VectorTy =
1932
              LLVM::Type::getVectorType(Value.getType(), VectorSize);
1933
          LLVM::Value Ret = LLVM::Value::getUndef(VectorTy);
1934
          Ret = Builder.createInsertElement(Ret, Value, Zero);
1935
          Ret =
1936
              Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, Ret);
1937
          Ret = Builder.createExtractElement(Ret, Zero);
1938
          stackPush(Ret);
1939
          break;
1940
        }
1941
#endif
1942
1943
        // Fallback case.
1944
        // If the SSE4.1 is not supported on the x86_64 platform or
1945
        // the NEON is not supported on the aarch64 platform,
1946
        // then fallback to this.
1947
0
        assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
1948
0
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, Value));
1949
0
        break;
1950
0
      }
1951
398
      case OpCode::F32__sqrt:
1952
1.60k
      case OpCode::F64__sqrt:
1953
1.60k
        assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
1954
1.60k
        stackPush(Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, stackPop()));
1955
1.60k
        break;
1956
333
      case OpCode::I32__wrap_i64:
1957
333
        stackPush(Builder.createTrunc(stackPop(), Context.Int32Ty));
1958
333
        break;
1959
1.36k
      case OpCode::I32__trunc_f32_s:
1960
1.36k
        compileSignedTrunc(Context.Int32Ty);
1961
1.36k
        break;
1962
241
      case OpCode::I32__trunc_f64_s:
1963
241
        compileSignedTrunc(Context.Int32Ty);
1964
241
        break;
1965
189
      case OpCode::I32__trunc_f32_u:
1966
189
        compileUnsignedTrunc(Context.Int32Ty);
1967
189
        break;
1968
1.23k
      case OpCode::I32__trunc_f64_u:
1969
1.23k
        compileUnsignedTrunc(Context.Int32Ty);
1970
1.23k
        break;
1971
2.27k
      case OpCode::I64__extend_i32_s:
1972
2.27k
        stackPush(Builder.createSExt(stackPop(), Context.Int64Ty));
1973
2.27k
        break;
1974
435
      case OpCode::I64__extend_i32_u:
1975
435
        stackPush(Builder.createZExt(stackPop(), Context.Int64Ty));
1976
435
        break;
1977
50
      case OpCode::I64__trunc_f32_s:
1978
50
        compileSignedTrunc(Context.Int64Ty);
1979
50
        break;
1980
391
      case OpCode::I64__trunc_f64_s:
1981
391
        compileSignedTrunc(Context.Int64Ty);
1982
391
        break;
1983
1.04k
      case OpCode::I64__trunc_f32_u:
1984
1.04k
        compileUnsignedTrunc(Context.Int64Ty);
1985
1.04k
        break;
1986
1.34k
      case OpCode::I64__trunc_f64_u:
1987
1.34k
        compileUnsignedTrunc(Context.Int64Ty);
1988
1.34k
        break;
1989
1.83k
      case OpCode::F32__convert_i32_s:
1990
2.21k
      case OpCode::F32__convert_i64_s:
1991
2.21k
        stackPush(Builder.createSIToFP(stackPop(), Context.FloatTy));
1992
2.21k
        break;
1993
653
      case OpCode::F32__convert_i32_u:
1994
1.81k
      case OpCode::F32__convert_i64_u:
1995
1.81k
        stackPush(Builder.createUIToFP(stackPop(), Context.FloatTy));
1996
1.81k
        break;
1997
1.41k
      case OpCode::F64__convert_i32_s:
1998
5.55k
      case OpCode::F64__convert_i64_s:
1999
5.55k
        stackPush(Builder.createSIToFP(stackPop(), Context.DoubleTy));
2000
5.55k
        break;
2001
2.09k
      case OpCode::F64__convert_i32_u:
2002
2.26k
      case OpCode::F64__convert_i64_u:
2003
2.26k
        stackPush(Builder.createUIToFP(stackPop(), Context.DoubleTy));
2004
2.26k
        break;
2005
244
      case OpCode::F32__demote_f64:
2006
244
        stackPush(Builder.createFPTrunc(stackPop(), Context.FloatTy));
2007
244
        break;
2008
85
      case OpCode::F64__promote_f32:
2009
85
        stackPush(Builder.createFPExt(stackPop(), Context.DoubleTy));
2010
85
        break;
2011
1.27k
      case OpCode::I32__reinterpret_f32:
2012
1.27k
        stackPush(Builder.createBitCast(stackPop(), Context.Int32Ty));
2013
1.27k
        break;
2014
657
      case OpCode::I64__reinterpret_f64:
2015
657
        stackPush(Builder.createBitCast(stackPop(), Context.Int64Ty));
2016
657
        break;
2017
4.40k
      case OpCode::F32__reinterpret_i32:
2018
4.40k
        stackPush(Builder.createBitCast(stackPop(), Context.FloatTy));
2019
4.40k
        break;
2020
1.12k
      case OpCode::F64__reinterpret_i64:
2021
1.12k
        stackPush(Builder.createBitCast(stackPop(), Context.DoubleTy));
2022
1.12k
        break;
2023
4.08k
      case OpCode::I32__extend8_s:
2024
4.08k
        stackPush(Builder.createSExt(
2025
4.08k
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int32Ty));
2026
4.08k
        break;
2027
3.08k
      case OpCode::I32__extend16_s:
2028
3.08k
        stackPush(Builder.createSExt(
2029
3.08k
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int32Ty));
2030
3.08k
        break;
2031
497
      case OpCode::I64__extend8_s:
2032
497
        stackPush(Builder.createSExt(
2033
497
            Builder.createTrunc(stackPop(), Context.Int8Ty), Context.Int64Ty));
2034
497
        break;
2035
634
      case OpCode::I64__extend16_s:
2036
634
        stackPush(Builder.createSExt(
2037
634
            Builder.createTrunc(stackPop(), Context.Int16Ty), Context.Int64Ty));
2038
634
        break;
2039
625
      case OpCode::I64__extend32_s:
2040
625
        stackPush(Builder.createSExt(
2041
625
            Builder.createTrunc(stackPop(), Context.Int32Ty), Context.Int64Ty));
2042
625
        break;
2043
2044
      // Binary Numeric Instructions
2045
1.15k
      case OpCode::I32__eq:
2046
1.39k
      case OpCode::I64__eq: {
2047
1.39k
        LLVM::Value RHS = stackPop();
2048
1.39k
        LLVM::Value LHS = stackPop();
2049
1.39k
        stackPush(Builder.createZExt(Builder.createICmpEQ(LHS, RHS),
2050
1.39k
                                     Context.Int32Ty));
2051
1.39k
        break;
2052
1.15k
      }
2053
878
      case OpCode::I32__ne:
2054
897
      case OpCode::I64__ne: {
2055
897
        LLVM::Value RHS = stackPop();
2056
897
        LLVM::Value LHS = stackPop();
2057
897
        stackPush(Builder.createZExt(Builder.createICmpNE(LHS, RHS),
2058
897
                                     Context.Int32Ty));
2059
897
        break;
2060
878
      }
2061
2.80k
      case OpCode::I32__lt_s:
2062
3.41k
      case OpCode::I64__lt_s: {
2063
3.41k
        LLVM::Value RHS = stackPop();
2064
3.41k
        LLVM::Value LHS = stackPop();
2065
3.41k
        stackPush(Builder.createZExt(Builder.createICmpSLT(LHS, RHS),
2066
3.41k
                                     Context.Int32Ty));
2067
3.41k
        break;
2068
2.80k
      }
2069
6.65k
      case OpCode::I32__lt_u:
2070
7.01k
      case OpCode::I64__lt_u: {
2071
7.01k
        LLVM::Value RHS = stackPop();
2072
7.01k
        LLVM::Value LHS = stackPop();
2073
7.01k
        stackPush(Builder.createZExt(Builder.createICmpULT(LHS, RHS),
2074
7.01k
                                     Context.Int32Ty));
2075
7.01k
        break;
2076
6.65k
      }
2077
1.06k
      case OpCode::I32__gt_s:
2078
1.54k
      case OpCode::I64__gt_s: {
2079
1.54k
        LLVM::Value RHS = stackPop();
2080
1.54k
        LLVM::Value LHS = stackPop();
2081
1.54k
        stackPush(Builder.createZExt(Builder.createICmpSGT(LHS, RHS),
2082
1.54k
                                     Context.Int32Ty));
2083
1.54k
        break;
2084
1.06k
      }
2085
7.26k
      case OpCode::I32__gt_u:
2086
7.48k
      case OpCode::I64__gt_u: {
2087
7.48k
        LLVM::Value RHS = stackPop();
2088
7.48k
        LLVM::Value LHS = stackPop();
2089
7.48k
        stackPush(Builder.createZExt(Builder.createICmpUGT(LHS, RHS),
2090
7.48k
                                     Context.Int32Ty));
2091
7.48k
        break;
2092
7.26k
      }
2093
2.15k
      case OpCode::I32__le_s:
2094
2.99k
      case OpCode::I64__le_s: {
2095
2.99k
        LLVM::Value RHS = stackPop();
2096
2.99k
        LLVM::Value LHS = stackPop();
2097
2.99k
        stackPush(Builder.createZExt(Builder.createICmpSLE(LHS, RHS),
2098
2.99k
                                     Context.Int32Ty));
2099
2.99k
        break;
2100
2.15k
      }
2101
463
      case OpCode::I32__le_u:
2102
1.81k
      case OpCode::I64__le_u: {
2103
1.81k
        LLVM::Value RHS = stackPop();
2104
1.81k
        LLVM::Value LHS = stackPop();
2105
1.81k
        stackPush(Builder.createZExt(Builder.createICmpULE(LHS, RHS),
2106
1.81k
                                     Context.Int32Ty));
2107
1.81k
        break;
2108
463
      }
2109
1.13k
      case OpCode::I32__ge_s:
2110
1.17k
      case OpCode::I64__ge_s: {
2111
1.17k
        LLVM::Value RHS = stackPop();
2112
1.17k
        LLVM::Value LHS = stackPop();
2113
1.17k
        stackPush(Builder.createZExt(Builder.createICmpSGE(LHS, RHS),
2114
1.17k
                                     Context.Int32Ty));
2115
1.17k
        break;
2116
1.13k
      }
2117
1.64k
      case OpCode::I32__ge_u:
2118
2.31k
      case OpCode::I64__ge_u: {
2119
2.31k
        LLVM::Value RHS = stackPop();
2120
2.31k
        LLVM::Value LHS = stackPop();
2121
2.31k
        stackPush(Builder.createZExt(Builder.createICmpUGE(LHS, RHS),
2122
2.31k
                                     Context.Int32Ty));
2123
2.31k
        break;
2124
1.64k
      }
2125
170
      case OpCode::F32__eq:
2126
227
      case OpCode::F64__eq: {
2127
227
        LLVM::Value RHS = stackPop();
2128
227
        LLVM::Value LHS = stackPop();
2129
227
        stackPush(Builder.createZExt(Builder.createFCmpOEQ(LHS, RHS),
2130
227
                                     Context.Int32Ty));
2131
227
        break;
2132
170
      }
2133
81
      case OpCode::F32__ne:
2134
107
      case OpCode::F64__ne: {
2135
107
        LLVM::Value RHS = stackPop();
2136
107
        LLVM::Value LHS = stackPop();
2137
107
        stackPush(Builder.createZExt(Builder.createFCmpUNE(LHS, RHS),
2138
107
                                     Context.Int32Ty));
2139
107
        break;
2140
81
      }
2141
174
      case OpCode::F32__lt:
2142
294
      case OpCode::F64__lt: {
2143
294
        LLVM::Value RHS = stackPop();
2144
294
        LLVM::Value LHS = stackPop();
2145
294
        stackPush(Builder.createZExt(Builder.createFCmpOLT(LHS, RHS),
2146
294
                                     Context.Int32Ty));
2147
294
        break;
2148
174
      }
2149
130
      case OpCode::F32__gt:
2150
182
      case OpCode::F64__gt: {
2151
182
        LLVM::Value RHS = stackPop();
2152
182
        LLVM::Value LHS = stackPop();
2153
182
        stackPush(Builder.createZExt(Builder.createFCmpOGT(LHS, RHS),
2154
182
                                     Context.Int32Ty));
2155
182
        break;
2156
130
      }
2157
84
      case OpCode::F32__le:
2158
193
      case OpCode::F64__le: {
2159
193
        LLVM::Value RHS = stackPop();
2160
193
        LLVM::Value LHS = stackPop();
2161
193
        stackPush(Builder.createZExt(Builder.createFCmpOLE(LHS, RHS),
2162
193
                                     Context.Int32Ty));
2163
193
        break;
2164
84
      }
2165
213
      case OpCode::F32__ge:
2166
240
      case OpCode::F64__ge: {
2167
240
        LLVM::Value RHS = stackPop();
2168
240
        LLVM::Value LHS = stackPop();
2169
240
        stackPush(Builder.createZExt(Builder.createFCmpOGE(LHS, RHS),
2170
240
                                     Context.Int32Ty));
2171
240
        break;
2172
213
      }
2173
731
      case OpCode::I32__add:
2174
1.20k
      case OpCode::I64__add: {
2175
1.20k
        LLVM::Value RHS = stackPop();
2176
1.20k
        LLVM::Value LHS = stackPop();
2177
1.20k
        stackPush(Builder.createAdd(LHS, RHS));
2178
1.20k
        break;
2179
731
      }
2180
2.07k
      case OpCode::I32__sub:
2181
2.53k
      case OpCode::I64__sub: {
2182
2.53k
        LLVM::Value RHS = stackPop();
2183
2.53k
        LLVM::Value LHS = stackPop();
2184
2185
2.53k
        stackPush(Builder.createSub(LHS, RHS));
2186
2.53k
        break;
2187
2.07k
      }
2188
611
      case OpCode::I32__mul:
2189
1.07k
      case OpCode::I64__mul: {
2190
1.07k
        LLVM::Value RHS = stackPop();
2191
1.07k
        LLVM::Value LHS = stackPop();
2192
1.07k
        stackPush(Builder.createMul(LHS, RHS));
2193
1.07k
        break;
2194
611
      }
2195
1.22k
      case OpCode::I32__div_s:
2196
1.57k
      case OpCode::I64__div_s: {
2197
1.57k
        LLVM::Value RHS = stackPop();
2198
1.57k
        LLVM::Value LHS = stackPop();
2199
1.57k
        if constexpr (kForceDivCheck) {
2200
1.57k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_s;
2201
1.57k
          LLVM::Value IntZero =
2202
1.57k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2203
1.57k
          LLVM::Value IntMinusOne =
2204
1.57k
              Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2205
1.57k
                   : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2206
1.57k
          LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2207
1.22k
                                          std::numeric_limits<int32_t>::min()))
2208
1.57k
                                    : LLContext.getInt64(static_cast<uint64_t>(
2209
344
                                          std::numeric_limits<int64_t>::min()));
2210
2211
1.57k
          auto NoZeroBB =
2212
1.57k
              LLVM::BasicBlock::create(LLContext, F.Fn, "div.nozero");
2213
1.57k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2214
2215
1.57k
          auto IsNotZero =
2216
1.57k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2217
1.57k
          Builder.createCondBr(IsNotZero, NoZeroBB,
2218
1.57k
                               getTrapBB(ErrCode::Value::DivideByZero));
2219
2220
1.57k
          Builder.positionAtEnd(NoZeroBB);
2221
1.57k
          auto NotOverflow = Builder.createLikely(
2222
1.57k
              Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2223
1.57k
                               Builder.createICmpNE(RHS, IntMinusOne)));
2224
1.57k
          Builder.createCondBr(NotOverflow, OkBB,
2225
1.57k
                               getTrapBB(ErrCode::Value::IntegerOverflow));
2226
2227
1.57k
          Builder.positionAtEnd(OkBB);
2228
1.57k
        }
2229
1.57k
        stackPush(Builder.createSDiv(LHS, RHS));
2230
1.57k
        break;
2231
1.22k
      }
2232
3.21k
      case OpCode::I32__div_u:
2233
3.53k
      case OpCode::I64__div_u: {
2234
3.53k
        LLVM::Value RHS = stackPop();
2235
3.53k
        LLVM::Value LHS = stackPop();
2236
3.53k
        if constexpr (kForceDivCheck) {
2237
3.53k
          const bool Is32 = Instr.getOpCode() == OpCode::I32__div_u;
2238
3.53k
          LLVM::Value IntZero =
2239
3.53k
              Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2240
3.53k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "div.ok");
2241
2242
3.53k
          auto IsNotZero =
2243
3.53k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2244
3.53k
          Builder.createCondBr(IsNotZero, OkBB,
2245
3.53k
                               getTrapBB(ErrCode::Value::DivideByZero));
2246
3.53k
          Builder.positionAtEnd(OkBB);
2247
3.53k
        }
2248
3.53k
        stackPush(Builder.createUDiv(LHS, RHS));
2249
3.53k
        break;
2250
3.21k
      }
2251
1.22k
      case OpCode::I32__rem_s:
2252
1.66k
      case OpCode::I64__rem_s: {
2253
1.66k
        LLVM::Value RHS = stackPop();
2254
1.66k
        LLVM::Value LHS = stackPop();
2255
        // handle INT32_MIN % -1
2256
1.66k
        const bool Is32 = Instr.getOpCode() == OpCode::I32__rem_s;
2257
1.66k
        LLVM::Value IntMinusOne =
2258
1.66k
            Is32 ? LLContext.getInt32(static_cast<uint32_t>(INT32_C(-1)))
2259
1.66k
                 : LLContext.getInt64(static_cast<uint64_t>(INT64_C(-1)));
2260
1.66k
        LLVM::Value IntMin = Is32 ? LLContext.getInt32(static_cast<uint32_t>(
2261
1.22k
                                        std::numeric_limits<int32_t>::min()))
2262
1.66k
                                  : LLContext.getInt64(static_cast<uint64_t>(
2263
446
                                        std::numeric_limits<int64_t>::min()));
2264
1.66k
        LLVM::Value IntZero =
2265
1.66k
            Is32 ? LLContext.getInt32(0) : LLContext.getInt64(0);
2266
2267
1.66k
        auto NoOverflowBB =
2268
1.66k
            LLVM::BasicBlock::create(LLContext, F.Fn, "no.overflow");
2269
1.66k
        auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "end.overflow");
2270
2271
1.66k
        if constexpr (kForceDivCheck) {
2272
1.66k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2273
2274
1.66k
          auto IsNotZero =
2275
1.66k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2276
1.66k
          Builder.createCondBr(IsNotZero, OkBB,
2277
1.66k
                               getTrapBB(ErrCode::Value::DivideByZero));
2278
1.66k
          Builder.positionAtEnd(OkBB);
2279
1.66k
        }
2280
2281
1.66k
        auto CurrBB = Builder.getInsertBlock();
2282
2283
1.66k
        auto NotOverflow = Builder.createLikely(
2284
1.66k
            Builder.createOr(Builder.createICmpNE(LHS, IntMin),
2285
1.66k
                             Builder.createICmpNE(RHS, IntMinusOne)));
2286
1.66k
        Builder.createCondBr(NotOverflow, NoOverflowBB, EndBB);
2287
2288
1.66k
        Builder.positionAtEnd(NoOverflowBB);
2289
1.66k
        auto Ret1 = Builder.createSRem(LHS, RHS);
2290
1.66k
        Builder.createBr(EndBB);
2291
2292
1.66k
        Builder.positionAtEnd(EndBB);
2293
1.66k
        auto Ret = Builder.createPHI(Ret1.getType());
2294
1.66k
        Ret.addIncoming(Ret1, NoOverflowBB);
2295
1.66k
        Ret.addIncoming(IntZero, CurrBB);
2296
2297
1.66k
        stackPush(Ret);
2298
1.66k
        break;
2299
1.22k
      }
2300
2.05k
      case OpCode::I32__rem_u:
2301
2.78k
      case OpCode::I64__rem_u: {
2302
2.78k
        LLVM::Value RHS = stackPop();
2303
2.78k
        LLVM::Value LHS = stackPop();
2304
2.78k
        if constexpr (kForceDivCheck) {
2305
2.78k
          LLVM::Value IntZero = Instr.getOpCode() == OpCode::I32__rem_u
2306
2.78k
                                    ? LLContext.getInt32(0)
2307
2.78k
                                    : LLContext.getInt64(0);
2308
2.78k
          auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rem.ok");
2309
2310
2.78k
          auto IsNotZero =
2311
2.78k
              Builder.createLikely(Builder.createICmpNE(RHS, IntZero));
2312
2.78k
          Builder.createCondBr(IsNotZero, OkBB,
2313
2.78k
                               getTrapBB(ErrCode::Value::DivideByZero));
2314
2.78k
          Builder.positionAtEnd(OkBB);
2315
2.78k
        }
2316
2.78k
        stackPush(Builder.createURem(LHS, RHS));
2317
2.78k
        break;
2318
2.05k
      }
2319
632
      case OpCode::I32__and:
2320
1.97k
      case OpCode::I64__and: {
2321
1.97k
        LLVM::Value RHS = stackPop();
2322
1.97k
        LLVM::Value LHS = stackPop();
2323
1.97k
        stackPush(Builder.createAnd(LHS, RHS));
2324
1.97k
        break;
2325
632
      }
2326
1.32k
      case OpCode::I32__or:
2327
1.68k
      case OpCode::I64__or: {
2328
1.68k
        LLVM::Value RHS = stackPop();
2329
1.68k
        LLVM::Value LHS = stackPop();
2330
1.68k
        stackPush(Builder.createOr(LHS, RHS));
2331
1.68k
        break;
2332
1.32k
      }
2333
1.53k
      case OpCode::I32__xor:
2334
2.16k
      case OpCode::I64__xor: {
2335
2.16k
        LLVM::Value RHS = stackPop();
2336
2.16k
        LLVM::Value LHS = stackPop();
2337
2.16k
        stackPush(Builder.createXor(LHS, RHS));
2338
2.16k
        break;
2339
1.53k
      }
2340
1.88k
      case OpCode::I32__shl:
2341
2.23k
      case OpCode::I64__shl: {
2342
2.23k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shl
2343
2.23k
                               ? LLContext.getInt32(31)
2344
2.23k
                               : LLContext.getInt64(63);
2345
2.23k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2346
2.23k
        LLVM::Value LHS = stackPop();
2347
2.23k
        stackPush(Builder.createShl(LHS, RHS));
2348
2.23k
        break;
2349
1.88k
      }
2350
2.46k
      case OpCode::I32__shr_s:
2351
2.86k
      case OpCode::I64__shr_s: {
2352
2.86k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_s
2353
2.86k
                               ? LLContext.getInt32(31)
2354
2.86k
                               : LLContext.getInt64(63);
2355
2.86k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2356
2.86k
        LLVM::Value LHS = stackPop();
2357
2.86k
        stackPush(Builder.createAShr(LHS, RHS));
2358
2.86k
        break;
2359
2.46k
      }
2360
4.21k
      case OpCode::I32__shr_u:
2361
4.50k
      case OpCode::I64__shr_u: {
2362
4.50k
        LLVM::Value Mask = Instr.getOpCode() == OpCode::I32__shr_u
2363
4.50k
                               ? LLContext.getInt32(31)
2364
4.50k
                               : LLContext.getInt64(63);
2365
4.50k
        LLVM::Value RHS = Builder.createAnd(stackPop(), Mask);
2366
4.50k
        LLVM::Value LHS = stackPop();
2367
4.50k
        stackPush(Builder.createLShr(LHS, RHS));
2368
4.50k
        break;
2369
4.21k
      }
2370
2.57k
      case OpCode::I32__rotl: {
2371
2.57k
        LLVM::Value RHS = stackPop();
2372
2.57k
        LLVM::Value LHS = stackPop();
2373
2.57k
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2374
2.57k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int32Ty},
2375
2.57k
                                          {LHS, LHS, RHS}));
2376
2.57k
        break;
2377
2.57k
      }
2378
909
      case OpCode::I32__rotr: {
2379
909
        LLVM::Value RHS = stackPop();
2380
909
        LLVM::Value LHS = stackPop();
2381
909
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2382
909
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int32Ty},
2383
909
                                          {LHS, LHS, RHS}));
2384
909
        break;
2385
909
      }
2386
911
      case OpCode::I64__rotl: {
2387
911
        LLVM::Value RHS = stackPop();
2388
911
        LLVM::Value LHS = stackPop();
2389
911
        assuming(LLVM::Core::FShl != LLVM::Core::NotIntrinsic);
2390
911
        stackPush(Builder.createIntrinsic(LLVM::Core::FShl, {Context.Int64Ty},
2391
911
                                          {LHS, LHS, RHS}));
2392
911
        break;
2393
911
      }
2394
1.31k
      case OpCode::I64__rotr: {
2395
1.31k
        LLVM::Value RHS = stackPop();
2396
1.31k
        LLVM::Value LHS = stackPop();
2397
1.31k
        assuming(LLVM::Core::FShr != LLVM::Core::NotIntrinsic);
2398
1.31k
        stackPush(Builder.createIntrinsic(LLVM::Core::FShr, {Context.Int64Ty},
2399
1.31k
                                          {LHS, LHS, RHS}));
2400
1.31k
        break;
2401
1.31k
      }
2402
285
      case OpCode::F32__add:
2403
591
      case OpCode::F64__add: {
2404
591
        LLVM::Value RHS = stackPop();
2405
591
        LLVM::Value LHS = stackPop();
2406
591
        stackPush(Builder.createFAdd(LHS, RHS));
2407
591
        break;
2408
285
      }
2409
144
      case OpCode::F32__sub:
2410
497
      case OpCode::F64__sub: {
2411
497
        LLVM::Value RHS = stackPop();
2412
497
        LLVM::Value LHS = stackPop();
2413
497
        stackPush(Builder.createFSub(LHS, RHS));
2414
497
        break;
2415
144
      }
2416
441
      case OpCode::F32__mul:
2417
578
      case OpCode::F64__mul: {
2418
578
        LLVM::Value RHS = stackPop();
2419
578
        LLVM::Value LHS = stackPop();
2420
578
        stackPush(Builder.createFMul(LHS, RHS));
2421
578
        break;
2422
441
      }
2423
188
      case OpCode::F32__div:
2424
472
      case OpCode::F64__div: {
2425
472
        LLVM::Value RHS = stackPop();
2426
472
        LLVM::Value LHS = stackPop();
2427
472
        stackPush(Builder.createFDiv(LHS, RHS));
2428
472
        break;
2429
188
      }
2430
285
      case OpCode::F32__min:
2431
613
      case OpCode::F64__min: {
2432
613
        LLVM::Value RHS = stackPop();
2433
613
        LLVM::Value LHS = stackPop();
2434
613
        auto FpTy = Instr.getOpCode() == OpCode::F32__min ? Context.FloatTy
2435
613
                                                          : Context.DoubleTy;
2436
613
        auto IntTy = Instr.getOpCode() == OpCode::F32__min ? Context.Int32Ty
2437
613
                                                           : Context.Int64Ty;
2438
2439
613
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2440
613
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2441
2442
613
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2443
613
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2444
613
        auto OrInt = Builder.createOr(LHSInt, RHSInt);
2445
613
        auto OrFp = Builder.createBitCast(OrInt, FpTy);
2446
2447
613
        auto AddFp = Builder.createFAdd(LHS, RHS);
2448
2449
613
        assuming(LLVM::Core::MinNum != LLVM::Core::NotIntrinsic);
2450
613
        auto MinFp = Builder.createIntrinsic(LLVM::Core::MinNum,
2451
613
                                             {LHS.getType()}, {LHS, RHS});
2452
2453
613
        auto Ret = Builder.createSelect(
2454
613
            UEQ, Builder.createSelect(UNO, AddFp, OrFp), MinFp);
2455
613
        stackPush(Ret);
2456
613
        break;
2457
613
      }
2458
339
      case OpCode::F32__max:
2459
807
      case OpCode::F64__max: {
2460
807
        LLVM::Value RHS = stackPop();
2461
807
        LLVM::Value LHS = stackPop();
2462
807
        auto FpTy = Instr.getOpCode() == OpCode::F32__max ? Context.FloatTy
2463
807
                                                          : Context.DoubleTy;
2464
807
        auto IntTy = Instr.getOpCode() == OpCode::F32__max ? Context.Int32Ty
2465
807
                                                           : Context.Int64Ty;
2466
2467
807
        auto UEQ = Builder.createFCmpUEQ(LHS, RHS);
2468
807
        auto UNO = Builder.createFCmpUNO(LHS, RHS);
2469
2470
807
        auto LHSInt = Builder.createBitCast(LHS, IntTy);
2471
807
        auto RHSInt = Builder.createBitCast(RHS, IntTy);
2472
807
        auto AndInt = Builder.createAnd(LHSInt, RHSInt);
2473
807
        auto AndFp = Builder.createBitCast(AndInt, FpTy);
2474
2475
807
        auto AddFp = Builder.createFAdd(LHS, RHS);
2476
2477
807
        assuming(LLVM::Core::MaxNum != LLVM::Core::NotIntrinsic);
2478
807
        auto MaxFp = Builder.createIntrinsic(LLVM::Core::MaxNum,
2479
807
                                             {LHS.getType()}, {LHS, RHS});
2480
2481
807
        auto Ret = Builder.createSelect(
2482
807
            UEQ, Builder.createSelect(UNO, AddFp, AndFp), MaxFp);
2483
807
        stackPush(Ret);
2484
807
        break;
2485
807
      }
2486
463
      case OpCode::F32__copysign:
2487
876
      case OpCode::F64__copysign: {
2488
876
        LLVM::Value RHS = stackPop();
2489
876
        LLVM::Value LHS = stackPop();
2490
876
        assuming(LLVM::Core::CopySign != LLVM::Core::NotIntrinsic);
2491
876
        stackPush(Builder.createIntrinsic(LLVM::Core::CopySign, {LHS.getType()},
2492
876
                                          {LHS, RHS}));
2493
876
        break;
2494
876
      }
2495
2496
      // Saturating Truncation Numeric Instructions
2497
198
      case OpCode::I32__trunc_sat_f32_s:
2498
198
        compileSignedTruncSat(Context.Int32Ty);
2499
198
        break;
2500
95
      case OpCode::I32__trunc_sat_f32_u:
2501
95
        compileUnsignedTruncSat(Context.Int32Ty);
2502
95
        break;
2503
538
      case OpCode::I32__trunc_sat_f64_s:
2504
538
        compileSignedTruncSat(Context.Int32Ty);
2505
538
        break;
2506
455
      case OpCode::I32__trunc_sat_f64_u:
2507
455
        compileUnsignedTruncSat(Context.Int32Ty);
2508
455
        break;
2509
388
      case OpCode::I64__trunc_sat_f32_s:
2510
388
        compileSignedTruncSat(Context.Int64Ty);
2511
388
        break;
2512
363
      case OpCode::I64__trunc_sat_f32_u:
2513
363
        compileUnsignedTruncSat(Context.Int64Ty);
2514
363
        break;
2515
192
      case OpCode::I64__trunc_sat_f64_s:
2516
192
        compileSignedTruncSat(Context.Int64Ty);
2517
192
        break;
2518
385
      case OpCode::I64__trunc_sat_f64_u:
2519
385
        compileUnsignedTruncSat(Context.Int64Ty);
2520
385
        break;
2521
2522
      // SIMD Memory Instructions
2523
5.04k
      case OpCode::V128__load:
2524
5.04k
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2525
5.04k
                            Instr.getMemoryAlign(), Context.Int128x1Ty);
2526
5.04k
        break;
2527
224
      case OpCode::V128__load8x8_s:
2528
224
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2529
224
                            Instr.getMemoryAlign(),
2530
224
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2531
224
                            Context.Int16x8Ty, true);
2532
224
        break;
2533
49
      case OpCode::V128__load8x8_u:
2534
49
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2535
49
                            Instr.getMemoryAlign(),
2536
49
                            LLVM::Type::getVectorType(Context.Int8Ty, 8),
2537
49
                            Context.Int16x8Ty, false);
2538
49
        break;
2539
313
      case OpCode::V128__load16x4_s:
2540
313
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2541
313
                            Instr.getMemoryAlign(),
2542
313
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2543
313
                            Context.Int32x4Ty, true);
2544
313
        break;
2545
535
      case OpCode::V128__load16x4_u:
2546
535
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2547
535
                            Instr.getMemoryAlign(),
2548
535
                            LLVM::Type::getVectorType(Context.Int16Ty, 4),
2549
535
                            Context.Int32x4Ty, false);
2550
535
        break;
2551
156
      case OpCode::V128__load32x2_s:
2552
156
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2553
156
                            Instr.getMemoryAlign(),
2554
156
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2555
156
                            Context.Int64x2Ty, true);
2556
156
        break;
2557
185
      case OpCode::V128__load32x2_u:
2558
185
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2559
185
                            Instr.getMemoryAlign(),
2560
185
                            LLVM::Type::getVectorType(Context.Int32Ty, 2),
2561
185
                            Context.Int64x2Ty, false);
2562
185
        break;
2563
75
      case OpCode::V128__load8_splat:
2564
75
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2565
75
                           Instr.getMemoryAlign(), Context.Int8Ty,
2566
75
                           Context.Int8x16Ty);
2567
75
        break;
2568
176
      case OpCode::V128__load16_splat:
2569
176
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2570
176
                           Instr.getMemoryAlign(), Context.Int16Ty,
2571
176
                           Context.Int16x8Ty);
2572
176
        break;
2573
235
      case OpCode::V128__load32_splat:
2574
235
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2575
235
                           Instr.getMemoryAlign(), Context.Int32Ty,
2576
235
                           Context.Int32x4Ty);
2577
235
        break;
2578
159
      case OpCode::V128__load64_splat:
2579
159
        compileSplatLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2580
159
                           Instr.getMemoryAlign(), Context.Int64Ty,
2581
159
                           Context.Int64x2Ty);
2582
159
        break;
2583
81
      case OpCode::V128__load32_zero:
2584
81
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2585
81
                            Instr.getMemoryAlign(), Context.Int32Ty,
2586
81
                            Context.Int128Ty, false);
2587
81
        break;
2588
140
      case OpCode::V128__load64_zero:
2589
140
        compileVectorLoadOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2590
140
                            Instr.getMemoryAlign(), Context.Int64Ty,
2591
140
                            Context.Int128Ty, false);
2592
140
        break;
2593
235
      case OpCode::V128__store:
2594
235
        compileStoreOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2595
235
                       Instr.getMemoryAlign(), Context.Int128x1Ty, false, true);
2596
235
        break;
2597
187
      case OpCode::V128__load8_lane:
2598
187
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2599
187
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2600
187
                          Context.Int8Ty, Context.Int8x16Ty);
2601
187
        break;
2602
138
      case OpCode::V128__load16_lane:
2603
138
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2604
138
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2605
138
                          Context.Int16Ty, Context.Int16x8Ty);
2606
138
        break;
2607
138
      case OpCode::V128__load32_lane:
2608
138
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2609
138
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2610
138
                          Context.Int32Ty, Context.Int32x4Ty);
2611
138
        break;
2612
22
      case OpCode::V128__load64_lane:
2613
22
        compileLoadLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2614
22
                          Instr.getMemoryAlign(), Instr.getMemoryLane(),
2615
22
                          Context.Int64Ty, Context.Int64x2Ty);
2616
22
        break;
2617
169
      case OpCode::V128__store8_lane:
2618
169
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2619
169
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2620
169
                           Context.Int8Ty, Context.Int8x16Ty);
2621
169
        break;
2622
80
      case OpCode::V128__store16_lane:
2623
80
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2624
80
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2625
80
                           Context.Int16Ty, Context.Int16x8Ty);
2626
80
        break;
2627
91
      case OpCode::V128__store32_lane:
2628
91
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2629
91
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2630
91
                           Context.Int32Ty, Context.Int32x4Ty);
2631
91
        break;
2632
35
      case OpCode::V128__store64_lane:
2633
35
        compileStoreLaneOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
2634
35
                           Instr.getMemoryAlign(), Instr.getMemoryLane(),
2635
35
                           Context.Int64Ty, Context.Int64x2Ty);
2636
35
        break;
2637
2638
      // SIMD Const Instructions
2639
357
      case OpCode::V128__const: {
2640
357
        const auto Value = Instr.getNum().get<uint64x2_t>();
2641
357
        auto Vector =
2642
357
            LLVM::Value::getConstVector64(LLContext, {Value[0], Value[1]});
2643
357
        stackPush(Builder.createBitCast(Vector, Context.Int64x2Ty));
2644
357
        break;
2645
876
      }
2646
2647
      // SIMD Shuffle Instructions
2648
15
      case OpCode::I8x16__shuffle: {
2649
15
        auto V2 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2650
15
        auto V1 = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
2651
15
        const auto V3 = Instr.getNum().get<uint128_t>();
2652
15
        std::array<uint8_t, 16> Mask;
2653
255
        for (size_t I = 0; I < 16; ++I) {
2654
240
          auto Num = static_cast<uint8_t>(V3 >> (I * 8));
2655
240
          if constexpr (Endian::native == Endian::little) {
2656
240
            Mask[I] = Num;
2657
          } else {
2658
            Mask[15 - I] = Num < 16 ? 15 - Num : 47 - Num;
2659
          }
2660
240
        }
2661
15
        stackPush(Builder.createBitCast(
2662
15
            Builder.createShuffleVector(
2663
15
                V1, V2, LLVM::Value::getConstVector8(LLContext, Mask)),
2664
15
            Context.Int64x2Ty));
2665
15
        break;
2666
876
      }
2667
2668
      // SIMD Lane Instructions
2669
69
      case OpCode::I8x16__extract_lane_s:
2670
69
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2671
69
                             Context.Int32Ty, true);
2672
69
        break;
2673
28
      case OpCode::I8x16__extract_lane_u:
2674
28
        compileExtractLaneOp(Context.Int8x16Ty, Instr.getMemoryLane(),
2675
28
                             Context.Int32Ty, false);
2676
28
        break;
2677
212
      case OpCode::I8x16__replace_lane:
2678
212
        compileReplaceLaneOp(Context.Int8x16Ty, Instr.getMemoryLane());
2679
212
        break;
2680
447
      case OpCode::I16x8__extract_lane_s:
2681
447
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2682
447
                             Context.Int32Ty, true);
2683
447
        break;
2684
393
      case OpCode::I16x8__extract_lane_u:
2685
393
        compileExtractLaneOp(Context.Int16x8Ty, Instr.getMemoryLane(),
2686
393
                             Context.Int32Ty, false);
2687
393
        break;
2688
476
      case OpCode::I16x8__replace_lane:
2689
476
        compileReplaceLaneOp(Context.Int16x8Ty, Instr.getMemoryLane());
2690
476
        break;
2691
63
      case OpCode::I32x4__extract_lane:
2692
63
        compileExtractLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2693
63
        break;
2694
278
      case OpCode::I32x4__replace_lane:
2695
278
        compileReplaceLaneOp(Context.Int32x4Ty, Instr.getMemoryLane());
2696
278
        break;
2697
128
      case OpCode::I64x2__extract_lane:
2698
128
        compileExtractLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2699
128
        break;
2700
14
      case OpCode::I64x2__replace_lane:
2701
14
        compileReplaceLaneOp(Context.Int64x2Ty, Instr.getMemoryLane());
2702
14
        break;
2703
57
      case OpCode::F32x4__extract_lane:
2704
57
        compileExtractLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2705
57
        break;
2706
23
      case OpCode::F32x4__replace_lane:
2707
23
        compileReplaceLaneOp(Context.Floatx4Ty, Instr.getMemoryLane());
2708
23
        break;
2709
68
      case OpCode::F64x2__extract_lane:
2710
68
        compileExtractLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2711
68
        break;
2712
7
      case OpCode::F64x2__replace_lane:
2713
7
        compileReplaceLaneOp(Context.Doublex2Ty, Instr.getMemoryLane());
2714
7
        break;
2715
2716
      // SIMD Numeric Instructions
2717
112
      case OpCode::I8x16__swizzle:
2718
112
        compileVectorSwizzle();
2719
112
        break;
2720
42.4k
      case OpCode::I8x16__splat:
2721
42.4k
        compileSplatOp(Context.Int8x16Ty);
2722
42.4k
        break;
2723
9.59k
      case OpCode::I16x8__splat:
2724
9.59k
        compileSplatOp(Context.Int16x8Ty);
2725
9.59k
        break;
2726
1.37k
      case OpCode::I32x4__splat:
2727
1.37k
        compileSplatOp(Context.Int32x4Ty);
2728
1.37k
        break;
2729
831
      case OpCode::I64x2__splat:
2730
831
        compileSplatOp(Context.Int64x2Ty);
2731
831
        break;
2732
340
      case OpCode::F32x4__splat:
2733
340
        compileSplatOp(Context.Floatx4Ty);
2734
340
        break;
2735
76
      case OpCode::F64x2__splat:
2736
76
        compileSplatOp(Context.Doublex2Ty);
2737
76
        break;
2738
99
      case OpCode::I8x16__eq:
2739
99
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntEQ);
2740
99
        break;
2741
399
      case OpCode::I8x16__ne:
2742
399
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntNE);
2743
399
        break;
2744
63
      case OpCode::I8x16__lt_s:
2745
63
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLT);
2746
63
        break;
2747
65
      case OpCode::I8x16__lt_u:
2748
65
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULT);
2749
65
        break;
2750
250
      case OpCode::I8x16__gt_s:
2751
250
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGT);
2752
250
        break;
2753
251
      case OpCode::I8x16__gt_u:
2754
251
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGT);
2755
251
        break;
2756
110
      case OpCode::I8x16__le_s:
2757
110
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSLE);
2758
110
        break;
2759
157
      case OpCode::I8x16__le_u:
2760
157
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntULE);
2761
157
        break;
2762
1.30k
      case OpCode::I8x16__ge_s:
2763
1.30k
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntSGE);
2764
1.30k
        break;
2765
94
      case OpCode::I8x16__ge_u:
2766
94
        compileVectorCompareOp(Context.Int8x16Ty, LLVMIntUGE);
2767
94
        break;
2768
180
      case OpCode::I16x8__eq:
2769
180
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntEQ);
2770
180
        break;
2771
175
      case OpCode::I16x8__ne:
2772
175
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntNE);
2773
175
        break;
2774
50
      case OpCode::I16x8__lt_s:
2775
50
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLT);
2776
50
        break;
2777
254
      case OpCode::I16x8__lt_u:
2778
254
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULT);
2779
254
        break;
2780
277
      case OpCode::I16x8__gt_s:
2781
277
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGT);
2782
277
        break;
2783
139
      case OpCode::I16x8__gt_u:
2784
139
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGT);
2785
139
        break;
2786
90
      case OpCode::I16x8__le_s:
2787
90
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSLE);
2788
90
        break;
2789
87
      case OpCode::I16x8__le_u:
2790
87
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntULE);
2791
87
        break;
2792
141
      case OpCode::I16x8__ge_s:
2793
141
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntSGE);
2794
141
        break;
2795
67
      case OpCode::I16x8__ge_u:
2796
67
        compileVectorCompareOp(Context.Int16x8Ty, LLVMIntUGE);
2797
67
        break;
2798
74
      case OpCode::I32x4__eq:
2799
74
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntEQ);
2800
74
        break;
2801
121
      case OpCode::I32x4__ne:
2802
121
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntNE);
2803
121
        break;
2804
57
      case OpCode::I32x4__lt_s:
2805
57
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLT);
2806
57
        break;
2807
138
      case OpCode::I32x4__lt_u:
2808
138
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULT);
2809
138
        break;
2810
201
      case OpCode::I32x4__gt_s:
2811
201
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGT);
2812
201
        break;
2813
221
      case OpCode::I32x4__gt_u:
2814
221
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGT);
2815
221
        break;
2816
293
      case OpCode::I32x4__le_s:
2817
293
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSLE);
2818
293
        break;
2819
265
      case OpCode::I32x4__le_u:
2820
265
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntULE);
2821
265
        break;
2822
74
      case OpCode::I32x4__ge_s:
2823
74
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntSGE);
2824
74
        break;
2825
157
      case OpCode::I32x4__ge_u:
2826
157
        compileVectorCompareOp(Context.Int32x4Ty, LLVMIntUGE);
2827
157
        break;
2828
122
      case OpCode::I64x2__eq:
2829
122
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntEQ);
2830
122
        break;
2831
49
      case OpCode::I64x2__ne:
2832
49
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntNE);
2833
49
        break;
2834
47
      case OpCode::I64x2__lt_s:
2835
47
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLT);
2836
47
        break;
2837
163
      case OpCode::I64x2__gt_s:
2838
163
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGT);
2839
163
        break;
2840
49
      case OpCode::I64x2__le_s:
2841
49
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSLE);
2842
49
        break;
2843
73
      case OpCode::I64x2__ge_s:
2844
73
        compileVectorCompareOp(Context.Int64x2Ty, LLVMIntSGE);
2845
73
        break;
2846
1.45k
      case OpCode::F32x4__eq:
2847
1.45k
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOEQ,
2848
1.45k
                               Context.Int32x4Ty);
2849
1.45k
        break;
2850
53
      case OpCode::F32x4__ne:
2851
53
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealUNE,
2852
53
                               Context.Int32x4Ty);
2853
53
        break;
2854
699
      case OpCode::F32x4__lt:
2855
699
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLT,
2856
699
                               Context.Int32x4Ty);
2857
699
        break;
2858
82
      case OpCode::F32x4__gt:
2859
82
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGT,
2860
82
                               Context.Int32x4Ty);
2861
82
        break;
2862
369
      case OpCode::F32x4__le:
2863
369
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOLE,
2864
369
                               Context.Int32x4Ty);
2865
369
        break;
2866
77
      case OpCode::F32x4__ge:
2867
77
        compileVectorCompareOp(Context.Floatx4Ty, LLVMRealOGE,
2868
77
                               Context.Int32x4Ty);
2869
77
        break;
2870
58
      case OpCode::F64x2__eq:
2871
58
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOEQ,
2872
58
                               Context.Int64x2Ty);
2873
58
        break;
2874
102
      case OpCode::F64x2__ne:
2875
102
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealUNE,
2876
102
                               Context.Int64x2Ty);
2877
102
        break;
2878
130
      case OpCode::F64x2__lt:
2879
130
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLT,
2880
130
                               Context.Int64x2Ty);
2881
130
        break;
2882
58
      case OpCode::F64x2__gt:
2883
58
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGT,
2884
58
                               Context.Int64x2Ty);
2885
58
        break;
2886
185
      case OpCode::F64x2__le:
2887
185
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOLE,
2888
185
                               Context.Int64x2Ty);
2889
185
        break;
2890
83
      case OpCode::F64x2__ge:
2891
83
        compileVectorCompareOp(Context.Doublex2Ty, LLVMRealOGE,
2892
83
                               Context.Int64x2Ty);
2893
83
        break;
2894
390
      case OpCode::V128__not:
2895
390
        Stack.back() = Builder.createNot(Stack.back());
2896
390
        break;
2897
115
      case OpCode::V128__and: {
2898
115
        auto RHS = stackPop();
2899
115
        auto LHS = stackPop();
2900
115
        stackPush(Builder.createAnd(LHS, RHS));
2901
115
        break;
2902
876
      }
2903
96
      case OpCode::V128__andnot: {
2904
96
        auto RHS = stackPop();
2905
96
        auto LHS = stackPop();
2906
96
        stackPush(Builder.createAnd(LHS, Builder.createNot(RHS)));
2907
96
        break;
2908
876
      }
2909
123
      case OpCode::V128__or: {
2910
123
        auto RHS = stackPop();
2911
123
        auto LHS = stackPop();
2912
123
        stackPush(Builder.createOr(LHS, RHS));
2913
123
        break;
2914
876
      }
2915
60
      case OpCode::V128__xor: {
2916
60
        auto RHS = stackPop();
2917
60
        auto LHS = stackPop();
2918
60
        stackPush(Builder.createXor(LHS, RHS));
2919
60
        break;
2920
876
      }
2921
151
      case OpCode::V128__bitselect: {
2922
151
        auto C = stackPop();
2923
151
        auto V2 = stackPop();
2924
151
        auto V1 = stackPop();
2925
151
        stackPush(Builder.createXor(
2926
151
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
2927
151
        break;
2928
876
      }
2929
139
      case OpCode::V128__any_true:
2930
139
        compileVectorAnyTrue();
2931
139
        break;
2932
1.38k
      case OpCode::I8x16__abs:
2933
1.38k
        compileVectorAbs(Context.Int8x16Ty);
2934
1.38k
        break;
2935
2.34k
      case OpCode::I8x16__neg:
2936
2.34k
        compileVectorNeg(Context.Int8x16Ty);
2937
2.34k
        break;
2938
125
      case OpCode::I8x16__popcnt:
2939
125
        compileVectorPopcnt();
2940
125
        break;
2941
295
      case OpCode::I8x16__all_true:
2942
295
        compileVectorAllTrue(Context.Int8x16Ty);
2943
295
        break;
2944
750
      case OpCode::I8x16__bitmask:
2945
750
        compileVectorBitMask(Context.Int8x16Ty);
2946
750
        break;
2947
93
      case OpCode::I8x16__narrow_i16x8_s:
2948
93
        compileVectorNarrow(Context.Int16x8Ty, true);
2949
93
        break;
2950
184
      case OpCode::I8x16__narrow_i16x8_u:
2951
184
        compileVectorNarrow(Context.Int16x8Ty, false);
2952
184
        break;
2953
369
      case OpCode::I8x16__shl:
2954
369
        compileVectorShl(Context.Int8x16Ty);
2955
369
        break;
2956
1.12k
      case OpCode::I8x16__shr_s:
2957
1.12k
        compileVectorAShr(Context.Int8x16Ty);
2958
1.12k
        break;
2959
95
      case OpCode::I8x16__shr_u:
2960
95
        compileVectorLShr(Context.Int8x16Ty);
2961
95
        break;
2962
43
      case OpCode::I8x16__add:
2963
43
        compileVectorVectorAdd(Context.Int8x16Ty);
2964
43
        break;
2965
647
      case OpCode::I8x16__add_sat_s:
2966
647
        compileVectorVectorAddSat(Context.Int8x16Ty, true);
2967
647
        break;
2968
69
      case OpCode::I8x16__add_sat_u:
2969
69
        compileVectorVectorAddSat(Context.Int8x16Ty, false);
2970
69
        break;
2971
67
      case OpCode::I8x16__sub:
2972
67
        compileVectorVectorSub(Context.Int8x16Ty);
2973
67
        break;
2974
201
      case OpCode::I8x16__sub_sat_s:
2975
201
        compileVectorVectorSubSat(Context.Int8x16Ty, true);
2976
201
        break;
2977
74
      case OpCode::I8x16__sub_sat_u:
2978
74
        compileVectorVectorSubSat(Context.Int8x16Ty, false);
2979
74
        break;
2980
70
      case OpCode::I8x16__min_s:
2981
70
        compileVectorVectorSMin(Context.Int8x16Ty);
2982
70
        break;
2983
98
      case OpCode::I8x16__min_u:
2984
98
        compileVectorVectorUMin(Context.Int8x16Ty);
2985
98
        break;
2986
313
      case OpCode::I8x16__max_s:
2987
313
        compileVectorVectorSMax(Context.Int8x16Ty);
2988
313
        break;
2989
90
      case OpCode::I8x16__max_u:
2990
90
        compileVectorVectorUMax(Context.Int8x16Ty);
2991
90
        break;
2992
138
      case OpCode::I8x16__avgr_u:
2993
138
        compileVectorVectorUAvgr(Context.Int8x16Ty);
2994
138
        break;
2995
263
      case OpCode::I16x8__abs:
2996
263
        compileVectorAbs(Context.Int16x8Ty);
2997
263
        break;
2998
214
      case OpCode::I16x8__neg:
2999
214
        compileVectorNeg(Context.Int16x8Ty);
3000
214
        break;
3001
146
      case OpCode::I16x8__all_true:
3002
146
        compileVectorAllTrue(Context.Int16x8Ty);
3003
146
        break;
3004
130
      case OpCode::I16x8__bitmask:
3005
130
        compileVectorBitMask(Context.Int16x8Ty);
3006
130
        break;
3007
47
      case OpCode::I16x8__narrow_i32x4_s:
3008
47
        compileVectorNarrow(Context.Int32x4Ty, true);
3009
47
        break;
3010
414
      case OpCode::I16x8__narrow_i32x4_u:
3011
414
        compileVectorNarrow(Context.Int32x4Ty, false);
3012
414
        break;
3013
1.05k
      case OpCode::I16x8__extend_low_i8x16_s:
3014
1.05k
        compileVectorExtend(Context.Int8x16Ty, true, true);
3015
1.05k
        break;
3016
107
      case OpCode::I16x8__extend_high_i8x16_s:
3017
107
        compileVectorExtend(Context.Int8x16Ty, true, false);
3018
107
        break;
3019
449
      case OpCode::I16x8__extend_low_i8x16_u:
3020
449
        compileVectorExtend(Context.Int8x16Ty, false, true);
3021
449
        break;
3022
12
      case OpCode::I16x8__extend_high_i8x16_u:
3023
12
        compileVectorExtend(Context.Int8x16Ty, false, false);
3024
12
        break;
3025
112
      case OpCode::I16x8__shl:
3026
112
        compileVectorShl(Context.Int16x8Ty);
3027
112
        break;
3028
389
      case OpCode::I16x8__shr_s:
3029
389
        compileVectorAShr(Context.Int16x8Ty);
3030
389
        break;
3031
168
      case OpCode::I16x8__shr_u:
3032
168
        compileVectorLShr(Context.Int16x8Ty);
3033
168
        break;
3034
143
      case OpCode::I16x8__add:
3035
143
        compileVectorVectorAdd(Context.Int16x8Ty);
3036
143
        break;
3037
16
      case OpCode::I16x8__add_sat_s:
3038
16
        compileVectorVectorAddSat(Context.Int16x8Ty, true);
3039
16
        break;
3040
521
      case OpCode::I16x8__add_sat_u:
3041
521
        compileVectorVectorAddSat(Context.Int16x8Ty, false);
3042
521
        break;
3043
363
      case OpCode::I16x8__sub:
3044
363
        compileVectorVectorSub(Context.Int16x8Ty);
3045
363
        break;
3046
31
      case OpCode::I16x8__sub_sat_s:
3047
31
        compileVectorVectorSubSat(Context.Int16x8Ty, true);
3048
31
        break;
3049
90
      case OpCode::I16x8__sub_sat_u:
3050
90
        compileVectorVectorSubSat(Context.Int16x8Ty, false);
3051
90
        break;
3052
127
      case OpCode::I16x8__mul:
3053
127
        compileVectorVectorMul(Context.Int16x8Ty);
3054
127
        break;
3055
156
      case OpCode::I16x8__min_s:
3056
156
        compileVectorVectorSMin(Context.Int16x8Ty);
3057
156
        break;
3058
142
      case OpCode::I16x8__min_u:
3059
142
        compileVectorVectorUMin(Context.Int16x8Ty);
3060
142
        break;
3061
87
      case OpCode::I16x8__max_s:
3062
87
        compileVectorVectorSMax(Context.Int16x8Ty);
3063
87
        break;
3064
688
      case OpCode::I16x8__max_u:
3065
688
        compileVectorVectorUMax(Context.Int16x8Ty);
3066
688
        break;
3067
150
      case OpCode::I16x8__avgr_u:
3068
150
        compileVectorVectorUAvgr(Context.Int16x8Ty);
3069
150
        break;
3070
64
      case OpCode::I16x8__extmul_low_i8x16_s:
3071
64
        compileVectorExtMul(Context.Int8x16Ty, true, true);
3072
64
        break;
3073
199
      case OpCode::I16x8__extmul_high_i8x16_s:
3074
199
        compileVectorExtMul(Context.Int8x16Ty, true, false);
3075
199
        break;
3076
124
      case OpCode::I16x8__extmul_low_i8x16_u:
3077
124
        compileVectorExtMul(Context.Int8x16Ty, false, true);
3078
124
        break;
3079
522
      case OpCode::I16x8__extmul_high_i8x16_u:
3080
522
        compileVectorExtMul(Context.Int8x16Ty, false, false);
3081
522
        break;
3082
144
      case OpCode::I16x8__q15mulr_sat_s:
3083
144
        compileVectorVectorQ15MulSat();
3084
144
        break;
3085
386
      case OpCode::I16x8__extadd_pairwise_i8x16_s:
3086
386
        compileVectorExtAddPairwise(Context.Int8x16Ty, true);
3087
386
        break;
3088
361
      case OpCode::I16x8__extadd_pairwise_i8x16_u:
3089
361
        compileVectorExtAddPairwise(Context.Int8x16Ty, false);
3090
361
        break;
3091
58
      case OpCode::I32x4__abs:
3092
58
        compileVectorAbs(Context.Int32x4Ty);
3093
58
        break;
3094
222
      case OpCode::I32x4__neg:
3095
222
        compileVectorNeg(Context.Int32x4Ty);
3096
222
        break;
3097
184
      case OpCode::I32x4__all_true:
3098
184
        compileVectorAllTrue(Context.Int32x4Ty);
3099
184
        break;
3100
90
      case OpCode::I32x4__bitmask:
3101
90
        compileVectorBitMask(Context.Int32x4Ty);
3102
90
        break;
3103
106
      case OpCode::I32x4__extend_low_i16x8_s:
3104
106
        compileVectorExtend(Context.Int16x8Ty, true, true);
3105
106
        break;
3106
602
      case OpCode::I32x4__extend_high_i16x8_s:
3107
602
        compileVectorExtend(Context.Int16x8Ty, true, false);
3108
602
        break;
3109
2.24k
      case OpCode::I32x4__extend_low_i16x8_u:
3110
2.24k
        compileVectorExtend(Context.Int16x8Ty, false, true);
3111
2.24k
        break;
3112
171
      case OpCode::I32x4__extend_high_i16x8_u:
3113
171
        compileVectorExtend(Context.Int16x8Ty, false, false);
3114
171
        break;
3115
1.20k
      case OpCode::I32x4__shl:
3116
1.20k
        compileVectorShl(Context.Int32x4Ty);
3117
1.20k
        break;
3118
299
      case OpCode::I32x4__shr_s:
3119
299
        compileVectorAShr(Context.Int32x4Ty);
3120
299
        break;
3121
667
      case OpCode::I32x4__shr_u:
3122
667
        compileVectorLShr(Context.Int32x4Ty);
3123
667
        break;
3124
217
      case OpCode::I32x4__add:
3125
217
        compileVectorVectorAdd(Context.Int32x4Ty);
3126
217
        break;
3127
145
      case OpCode::I32x4__sub:
3128
145
        compileVectorVectorSub(Context.Int32x4Ty);
3129
145
        break;
3130
315
      case OpCode::I32x4__mul:
3131
315
        compileVectorVectorMul(Context.Int32x4Ty);
3132
315
        break;
3133
83
      case OpCode::I32x4__min_s:
3134
83
        compileVectorVectorSMin(Context.Int32x4Ty);
3135
83
        break;
3136
152
      case OpCode::I32x4__min_u:
3137
152
        compileVectorVectorUMin(Context.Int32x4Ty);
3138
152
        break;
3139
94
      case OpCode::I32x4__max_s:
3140
94
        compileVectorVectorSMax(Context.Int32x4Ty);
3141
94
        break;
3142
77
      case OpCode::I32x4__max_u:
3143
77
        compileVectorVectorUMax(Context.Int32x4Ty);
3144
77
        break;
3145
97
      case OpCode::I32x4__extmul_low_i16x8_s:
3146
97
        compileVectorExtMul(Context.Int16x8Ty, true, true);
3147
97
        break;
3148
82
      case OpCode::I32x4__extmul_high_i16x8_s:
3149
82
        compileVectorExtMul(Context.Int16x8Ty, true, false);
3150
82
        break;
3151
255
      case OpCode::I32x4__extmul_low_i16x8_u:
3152
255
        compileVectorExtMul(Context.Int16x8Ty, false, true);
3153
255
        break;
3154
162
      case OpCode::I32x4__extmul_high_i16x8_u:
3155
162
        compileVectorExtMul(Context.Int16x8Ty, false, false);
3156
162
        break;
3157
1.32k
      case OpCode::I32x4__extadd_pairwise_i16x8_s:
3158
1.32k
        compileVectorExtAddPairwise(Context.Int16x8Ty, true);
3159
1.32k
        break;
3160
1.01k
      case OpCode::I32x4__extadd_pairwise_i16x8_u:
3161
1.01k
        compileVectorExtAddPairwise(Context.Int16x8Ty, false);
3162
1.01k
        break;
3163
115
      case OpCode::I32x4__dot_i16x8_s: {
3164
115
        auto ExtendTy = Context.Int16x8Ty.getExtendedElementVectorType();
3165
115
        auto Undef = LLVM::Value::getUndef(ExtendTy);
3166
115
        auto LHS = Builder.createSExt(
3167
115
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3168
115
        auto RHS = Builder.createSExt(
3169
115
            Builder.createBitCast(stackPop(), Context.Int16x8Ty), ExtendTy);
3170
115
        auto M = Builder.createMul(LHS, RHS);
3171
115
        auto L = Builder.createShuffleVector(
3172
115
            M, Undef,
3173
115
            LLVM::Value::getConstVector32(LLContext, {0U, 2U, 4U, 6U}));
3174
115
        auto R = Builder.createShuffleVector(
3175
115
            M, Undef,
3176
115
            LLVM::Value::getConstVector32(LLContext, {1U, 3U, 5U, 7U}));
3177
115
        auto V = Builder.createAdd(L, R);
3178
115
        stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
3179
115
        break;
3180
876
      }
3181
924
      case OpCode::I64x2__abs:
3182
924
        compileVectorAbs(Context.Int64x2Ty);
3183
924
        break;
3184
537
      case OpCode::I64x2__neg:
3185
537
        compileVectorNeg(Context.Int64x2Ty);
3186
537
        break;
3187
312
      case OpCode::I64x2__all_true:
3188
312
        compileVectorAllTrue(Context.Int64x2Ty);
3189
312
        break;
3190
262
      case OpCode::I64x2__bitmask:
3191
262
        compileVectorBitMask(Context.Int64x2Ty);
3192
262
        break;
3193
384
      case OpCode::I64x2__extend_low_i32x4_s:
3194
384
        compileVectorExtend(Context.Int32x4Ty, true, true);
3195
384
        break;
3196
759
      case OpCode::I64x2__extend_high_i32x4_s:
3197
759
        compileVectorExtend(Context.Int32x4Ty, true, false);
3198
759
        break;
3199
239
      case OpCode::I64x2__extend_low_i32x4_u:
3200
239
        compileVectorExtend(Context.Int32x4Ty, false, true);
3201
239
        break;
3202
620
      case OpCode::I64x2__extend_high_i32x4_u:
3203
620
        compileVectorExtend(Context.Int32x4Ty, false, false);
3204
620
        break;
3205
117
      case OpCode::I64x2__shl:
3206
117
        compileVectorShl(Context.Int64x2Ty);
3207
117
        break;
3208
322
      case OpCode::I64x2__shr_s:
3209
322
        compileVectorAShr(Context.Int64x2Ty);
3210
322
        break;
3211
71
      case OpCode::I64x2__shr_u:
3212
71
        compileVectorLShr(Context.Int64x2Ty);
3213
71
        break;
3214
55
      case OpCode::I64x2__add:
3215
55
        compileVectorVectorAdd(Context.Int64x2Ty);
3216
55
        break;
3217
239
      case OpCode::I64x2__sub:
3218
239
        compileVectorVectorSub(Context.Int64x2Ty);
3219
239
        break;
3220
81
      case OpCode::I64x2__mul:
3221
81
        compileVectorVectorMul(Context.Int64x2Ty);
3222
81
        break;
3223
37
      case OpCode::I64x2__extmul_low_i32x4_s:
3224
37
        compileVectorExtMul(Context.Int32x4Ty, true, true);
3225
37
        break;
3226
621
      case OpCode::I64x2__extmul_high_i32x4_s:
3227
621
        compileVectorExtMul(Context.Int32x4Ty, true, false);
3228
621
        break;
3229
30
      case OpCode::I64x2__extmul_low_i32x4_u:
3230
30
        compileVectorExtMul(Context.Int32x4Ty, false, true);
3231
30
        break;
3232
177
      case OpCode::I64x2__extmul_high_i32x4_u:
3233
177
        compileVectorExtMul(Context.Int32x4Ty, false, false);
3234
177
        break;
3235
140
      case OpCode::F32x4__abs:
3236
140
        compileVectorFAbs(Context.Floatx4Ty);
3237
140
        break;
3238
170
      case OpCode::F32x4__neg:
3239
170
        compileVectorFNeg(Context.Floatx4Ty);
3240
170
        break;
3241
169
      case OpCode::F32x4__sqrt:
3242
169
        compileVectorFSqrt(Context.Floatx4Ty);
3243
169
        break;
3244
128
      case OpCode::F32x4__add:
3245
128
        compileVectorVectorFAdd(Context.Floatx4Ty);
3246
128
        break;
3247
264
      case OpCode::F32x4__sub:
3248
264
        compileVectorVectorFSub(Context.Floatx4Ty);
3249
264
        break;
3250
38
      case OpCode::F32x4__mul:
3251
38
        compileVectorVectorFMul(Context.Floatx4Ty);
3252
38
        break;
3253
162
      case OpCode::F32x4__div:
3254
162
        compileVectorVectorFDiv(Context.Floatx4Ty);
3255
162
        break;
3256
146
      case OpCode::F32x4__min:
3257
146
        compileVectorVectorFMin(Context.Floatx4Ty);
3258
146
        break;
3259
37
      case OpCode::F32x4__max:
3260
37
        compileVectorVectorFMax(Context.Floatx4Ty);
3261
37
        break;
3262
50
      case OpCode::F32x4__pmin:
3263
50
        compileVectorVectorFPMin(Context.Floatx4Ty);
3264
50
        break;
3265
219
      case OpCode::F32x4__pmax:
3266
219
        compileVectorVectorFPMax(Context.Floatx4Ty);
3267
219
        break;
3268
1.06k
      case OpCode::F32x4__ceil:
3269
1.06k
        compileVectorFCeil(Context.Floatx4Ty);
3270
1.06k
        break;
3271
2.10k
      case OpCode::F32x4__floor:
3272
2.10k
        compileVectorFFloor(Context.Floatx4Ty);
3273
2.10k
        break;
3274
2.06k
      case OpCode::F32x4__trunc:
3275
2.06k
        compileVectorFTrunc(Context.Floatx4Ty);
3276
2.06k
        break;
3277
281
      case OpCode::F32x4__nearest:
3278
281
        compileVectorFNearest(Context.Floatx4Ty);
3279
281
        break;
3280
521
      case OpCode::F64x2__abs:
3281
521
        compileVectorFAbs(Context.Doublex2Ty);
3282
521
        break;
3283
624
      case OpCode::F64x2__neg:
3284
624
        compileVectorFNeg(Context.Doublex2Ty);
3285
624
        break;
3286
105
      case OpCode::F64x2__sqrt:
3287
105
        compileVectorFSqrt(Context.Doublex2Ty);
3288
105
        break;
3289
48
      case OpCode::F64x2__add:
3290
48
        compileVectorVectorFAdd(Context.Doublex2Ty);
3291
48
        break;
3292
213
      case OpCode::F64x2__sub:
3293
213
        compileVectorVectorFSub(Context.Doublex2Ty);
3294
213
        break;
3295
231
      case OpCode::F64x2__mul:
3296
231
        compileVectorVectorFMul(Context.Doublex2Ty);
3297
231
        break;
3298
37
      case OpCode::F64x2__div:
3299
37
        compileVectorVectorFDiv(Context.Doublex2Ty);
3300
37
        break;
3301
172
      case OpCode::F64x2__min:
3302
172
        compileVectorVectorFMin(Context.Doublex2Ty);
3303
172
        break;
3304
160
      case OpCode::F64x2__max:
3305
160
        compileVectorVectorFMax(Context.Doublex2Ty);
3306
160
        break;
3307
358
      case OpCode::F64x2__pmin:
3308
358
        compileVectorVectorFPMin(Context.Doublex2Ty);
3309
358
        break;
3310
104
      case OpCode::F64x2__pmax:
3311
104
        compileVectorVectorFPMax(Context.Doublex2Ty);
3312
104
        break;
3313
675
      case OpCode::F64x2__ceil:
3314
675
        compileVectorFCeil(Context.Doublex2Ty);
3315
675
        break;
3316
799
      case OpCode::F64x2__floor:
3317
799
        compileVectorFFloor(Context.Doublex2Ty);
3318
799
        break;
3319
126
      case OpCode::F64x2__trunc:
3320
126
        compileVectorFTrunc(Context.Doublex2Ty);
3321
126
        break;
3322
158
      case OpCode::F64x2__nearest:
3323
158
        compileVectorFNearest(Context.Doublex2Ty);
3324
158
        break;
3325
159
      case OpCode::I32x4__trunc_sat_f32x4_s:
3326
159
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3327
159
        break;
3328
4.48k
      case OpCode::I32x4__trunc_sat_f32x4_u:
3329
4.48k
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3330
4.48k
        break;
3331
354
      case OpCode::F32x4__convert_i32x4_s:
3332
354
        compileVectorConvertS(Context.Int32x4Ty, Context.Floatx4Ty, false);
3333
354
        break;
3334
825
      case OpCode::F32x4__convert_i32x4_u:
3335
825
        compileVectorConvertU(Context.Int32x4Ty, Context.Floatx4Ty, false);
3336
825
        break;
3337
885
      case OpCode::I32x4__trunc_sat_f64x2_s_zero:
3338
885
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3339
885
        break;
3340
2.52k
      case OpCode::I32x4__trunc_sat_f64x2_u_zero:
3341
2.52k
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3342
2.52k
        break;
3343
388
      case OpCode::F64x2__convert_low_i32x4_s:
3344
388
        compileVectorConvertS(Context.Int32x4Ty, Context.Doublex2Ty, true);
3345
388
        break;
3346
1.35k
      case OpCode::F64x2__convert_low_i32x4_u:
3347
1.35k
        compileVectorConvertU(Context.Int32x4Ty, Context.Doublex2Ty, true);
3348
1.35k
        break;
3349
746
      case OpCode::F32x4__demote_f64x2_zero:
3350
746
        compileVectorDemote();
3351
746
        break;
3352
802
      case OpCode::F64x2__promote_low_f32x4:
3353
802
        compileVectorPromote();
3354
802
        break;
3355
3356
      // Relaxed SIMD Instructions
3357
19
      case OpCode::I8x16__relaxed_swizzle:
3358
19
        compileVectorSwizzle();
3359
19
        break;
3360
13
      case OpCode::I32x4__relaxed_trunc_f32x4_s:
3361
13
        compileVectorTruncSatS32(Context.Floatx4Ty, false);
3362
13
        break;
3363
13
      case OpCode::I32x4__relaxed_trunc_f32x4_u:
3364
13
        compileVectorTruncSatU32(Context.Floatx4Ty, false);
3365
13
        break;
3366
10
      case OpCode::I32x4__relaxed_trunc_f64x2_s_zero:
3367
10
        compileVectorTruncSatS32(Context.Doublex2Ty, true);
3368
10
        break;
3369
20
      case OpCode::I32x4__relaxed_trunc_f64x2_u_zero:
3370
20
        compileVectorTruncSatU32(Context.Doublex2Ty, true);
3371
20
        break;
3372
13
      case OpCode::F32x4__relaxed_madd:
3373
13
        compileVectorVectorMAdd(Context.Floatx4Ty);
3374
13
        break;
3375
64
      case OpCode::F32x4__relaxed_nmadd:
3376
64
        compileVectorVectorNMAdd(Context.Floatx4Ty);
3377
64
        break;
3378
14
      case OpCode::F64x2__relaxed_madd:
3379
14
        compileVectorVectorMAdd(Context.Doublex2Ty);
3380
14
        break;
3381
26
      case OpCode::F64x2__relaxed_nmadd:
3382
26
        compileVectorVectorNMAdd(Context.Doublex2Ty);
3383
26
        break;
3384
12
      case OpCode::I8x16__relaxed_laneselect:
3385
24
      case OpCode::I16x8__relaxed_laneselect:
3386
34
      case OpCode::I32x4__relaxed_laneselect:
3387
38
      case OpCode::I64x2__relaxed_laneselect: {
3388
38
        auto C = stackPop();
3389
38
        auto V2 = stackPop();
3390
38
        auto V1 = stackPop();
3391
38
        stackPush(Builder.createXor(
3392
38
            Builder.createAnd(Builder.createXor(V1, V2), C), V2));
3393
38
        break;
3394
34
      }
3395
20
      case OpCode::F32x4__relaxed_min:
3396
20
        compileVectorVectorFMin(Context.Floatx4Ty);
3397
20
        break;
3398
12
      case OpCode::F32x4__relaxed_max:
3399
12
        compileVectorVectorFMax(Context.Floatx4Ty);
3400
12
        break;
3401
22
      case OpCode::F64x2__relaxed_min:
3402
22
        compileVectorVectorFMin(Context.Doublex2Ty);
3403
22
        break;
3404
12
      case OpCode::F64x2__relaxed_max:
3405
12
        compileVectorVectorFMax(Context.Doublex2Ty);
3406
12
        break;
3407
24
      case OpCode::I16x8__relaxed_q15mulr_s:
3408
24
        compileVectorVectorQ15MulSat();
3409
24
        break;
3410
14
      case OpCode::I16x8__relaxed_dot_i8x16_i7x16_s:
3411
14
        compileVectorRelaxedIntegerDotProduct();
3412
14
        break;
3413
12
      case OpCode::I32x4__relaxed_dot_i8x16_i7x16_add_s:
3414
12
        compileVectorRelaxedIntegerDotProductAdd();
3415
12
        break;
3416
3417
      // Atomic Instructions
3418
192
      case OpCode::Atomic__fence:
3419
192
        compileMemoryFence();
3420
192
        break;
3421
44
      case OpCode::Memory__atomic__notify:
3422
44
        compileAtomicNotify(Instr.getTargetIndex(), Instr.getMemoryOffset());
3423
44
        break;
3424
7
      case OpCode::Memory__atomic__wait32:
3425
7
        compileAtomicWait(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3426
7
                          Context.Int32Ty, 32);
3427
7
        break;
3428
4
      case OpCode::Memory__atomic__wait64:
3429
4
        compileAtomicWait(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3430
4
                          Context.Int64Ty, 64);
3431
4
        break;
3432
0
      case OpCode::I32__atomic__load:
3433
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3434
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3435
0
                          Context.Int32Ty, true);
3436
0
        break;
3437
0
      case OpCode::I64__atomic__load:
3438
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3439
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3440
0
                          Context.Int64Ty, true);
3441
0
        break;
3442
0
      case OpCode::I32__atomic__load8_u:
3443
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3444
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3445
0
                          Context.Int8Ty);
3446
0
        break;
3447
0
      case OpCode::I32__atomic__load16_u:
3448
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3449
0
                          Instr.getMemoryAlign(), Context.Int32Ty,
3450
0
                          Context.Int16Ty);
3451
0
        break;
3452
0
      case OpCode::I64__atomic__load8_u:
3453
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3454
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3455
0
                          Context.Int8Ty);
3456
0
        break;
3457
0
      case OpCode::I64__atomic__load16_u:
3458
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3459
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3460
0
                          Context.Int16Ty);
3461
0
        break;
3462
0
      case OpCode::I64__atomic__load32_u:
3463
0
        compileAtomicLoad(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3464
0
                          Instr.getMemoryAlign(), Context.Int64Ty,
3465
0
                          Context.Int32Ty);
3466
0
        break;
3467
0
      case OpCode::I32__atomic__store:
3468
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3469
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3470
0
                           Context.Int32Ty, true);
3471
0
        break;
3472
0
      case OpCode::I64__atomic__store:
3473
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3474
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3475
0
                           Context.Int64Ty, true);
3476
0
        break;
3477
0
      case OpCode::I32__atomic__store8:
3478
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3479
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3480
0
                           Context.Int8Ty, true);
3481
0
        break;
3482
0
      case OpCode::I32__atomic__store16:
3483
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3484
0
                           Instr.getMemoryAlign(), Context.Int32Ty,
3485
0
                           Context.Int16Ty, true);
3486
0
        break;
3487
0
      case OpCode::I64__atomic__store8:
3488
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3489
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3490
0
                           Context.Int8Ty, true);
3491
0
        break;
3492
0
      case OpCode::I64__atomic__store16:
3493
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3494
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3495
0
                           Context.Int16Ty, true);
3496
0
        break;
3497
0
      case OpCode::I64__atomic__store32:
3498
0
        compileAtomicStore(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3499
0
                           Instr.getMemoryAlign(), Context.Int64Ty,
3500
0
                           Context.Int32Ty, true);
3501
0
        break;
3502
0
      case OpCode::I32__atomic__rmw__add:
3503
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3504
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3505
0
                           Context.Int32Ty, Context.Int32Ty, true);
3506
0
        break;
3507
0
      case OpCode::I64__atomic__rmw__add:
3508
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3509
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3510
0
                           Context.Int64Ty, Context.Int64Ty, true);
3511
0
        break;
3512
0
      case OpCode::I32__atomic__rmw8__add_u:
3513
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3514
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3515
0
                           Context.Int32Ty, Context.Int8Ty);
3516
0
        break;
3517
0
      case OpCode::I32__atomic__rmw16__add_u:
3518
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3519
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3520
0
                           Context.Int32Ty, Context.Int16Ty);
3521
0
        break;
3522
0
      case OpCode::I64__atomic__rmw8__add_u:
3523
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3524
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3525
0
                           Context.Int64Ty, Context.Int8Ty);
3526
0
        break;
3527
0
      case OpCode::I64__atomic__rmw16__add_u:
3528
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3529
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3530
0
                           Context.Int64Ty, Context.Int16Ty);
3531
0
        break;
3532
0
      case OpCode::I64__atomic__rmw32__add_u:
3533
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3534
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAdd,
3535
0
                           Context.Int64Ty, Context.Int32Ty);
3536
0
        break;
3537
0
      case OpCode::I32__atomic__rmw__sub:
3538
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3539
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3540
0
                           Context.Int32Ty, Context.Int32Ty, true);
3541
0
        break;
3542
0
      case OpCode::I64__atomic__rmw__sub:
3543
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3544
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3545
0
                           Context.Int64Ty, Context.Int64Ty, true);
3546
0
        break;
3547
0
      case OpCode::I32__atomic__rmw8__sub_u:
3548
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3549
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3550
0
                           Context.Int32Ty, Context.Int8Ty);
3551
0
        break;
3552
0
      case OpCode::I32__atomic__rmw16__sub_u:
3553
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3554
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3555
0
                           Context.Int32Ty, Context.Int16Ty);
3556
0
        break;
3557
0
      case OpCode::I64__atomic__rmw8__sub_u:
3558
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3559
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3560
0
                           Context.Int64Ty, Context.Int8Ty);
3561
0
        break;
3562
0
      case OpCode::I64__atomic__rmw16__sub_u:
3563
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3564
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3565
0
                           Context.Int64Ty, Context.Int16Ty);
3566
0
        break;
3567
0
      case OpCode::I64__atomic__rmw32__sub_u:
3568
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3569
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpSub,
3570
0
                           Context.Int64Ty, Context.Int32Ty);
3571
0
        break;
3572
0
      case OpCode::I32__atomic__rmw__and:
3573
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3574
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3575
0
                           Context.Int32Ty, Context.Int32Ty, true);
3576
0
        break;
3577
0
      case OpCode::I64__atomic__rmw__and:
3578
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3579
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3580
0
                           Context.Int64Ty, Context.Int64Ty, true);
3581
0
        break;
3582
0
      case OpCode::I32__atomic__rmw8__and_u:
3583
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3584
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3585
0
                           Context.Int32Ty, Context.Int8Ty);
3586
0
        break;
3587
0
      case OpCode::I32__atomic__rmw16__and_u:
3588
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3589
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3590
0
                           Context.Int32Ty, Context.Int16Ty);
3591
0
        break;
3592
0
      case OpCode::I64__atomic__rmw8__and_u:
3593
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3594
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3595
0
                           Context.Int64Ty, Context.Int8Ty);
3596
0
        break;
3597
0
      case OpCode::I64__atomic__rmw16__and_u:
3598
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3599
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3600
0
                           Context.Int64Ty, Context.Int16Ty);
3601
0
        break;
3602
0
      case OpCode::I64__atomic__rmw32__and_u:
3603
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3604
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpAnd,
3605
0
                           Context.Int64Ty, Context.Int32Ty);
3606
0
        break;
3607
0
      case OpCode::I32__atomic__rmw__or:
3608
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3609
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3610
0
                           Context.Int32Ty, Context.Int32Ty, true);
3611
0
        break;
3612
0
      case OpCode::I64__atomic__rmw__or:
3613
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3614
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3615
0
                           Context.Int64Ty, Context.Int64Ty, true);
3616
0
        break;
3617
0
      case OpCode::I32__atomic__rmw8__or_u:
3618
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3619
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3620
0
                           Context.Int32Ty, Context.Int8Ty);
3621
0
        break;
3622
0
      case OpCode::I32__atomic__rmw16__or_u:
3623
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3624
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3625
0
                           Context.Int32Ty, Context.Int16Ty);
3626
0
        break;
3627
0
      case OpCode::I64__atomic__rmw8__or_u:
3628
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3629
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3630
0
                           Context.Int64Ty, Context.Int8Ty);
3631
0
        break;
3632
0
      case OpCode::I64__atomic__rmw16__or_u:
3633
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3634
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3635
0
                           Context.Int64Ty, Context.Int16Ty);
3636
0
        break;
3637
0
      case OpCode::I64__atomic__rmw32__or_u:
3638
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3639
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpOr,
3640
0
                           Context.Int64Ty, Context.Int32Ty);
3641
0
        break;
3642
0
      case OpCode::I32__atomic__rmw__xor:
3643
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3644
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3645
0
                           Context.Int32Ty, Context.Int32Ty, true);
3646
0
        break;
3647
0
      case OpCode::I64__atomic__rmw__xor:
3648
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3649
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3650
0
                           Context.Int64Ty, Context.Int64Ty, true);
3651
0
        break;
3652
0
      case OpCode::I32__atomic__rmw8__xor_u:
3653
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3654
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3655
0
                           Context.Int32Ty, Context.Int8Ty);
3656
0
        break;
3657
0
      case OpCode::I32__atomic__rmw16__xor_u:
3658
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3659
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3660
0
                           Context.Int32Ty, Context.Int16Ty);
3661
0
        break;
3662
0
      case OpCode::I64__atomic__rmw8__xor_u:
3663
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3664
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3665
0
                           Context.Int64Ty, Context.Int8Ty);
3666
0
        break;
3667
0
      case OpCode::I64__atomic__rmw16__xor_u:
3668
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3669
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3670
0
                           Context.Int64Ty, Context.Int16Ty);
3671
0
        break;
3672
0
      case OpCode::I64__atomic__rmw32__xor_u:
3673
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3674
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXor,
3675
0
                           Context.Int64Ty, Context.Int32Ty);
3676
0
        break;
3677
0
      case OpCode::I32__atomic__rmw__xchg:
3678
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3679
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3680
0
                           Context.Int32Ty, Context.Int32Ty, true);
3681
0
        break;
3682
0
      case OpCode::I64__atomic__rmw__xchg:
3683
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3684
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3685
0
                           Context.Int64Ty, Context.Int64Ty, true);
3686
0
        break;
3687
0
      case OpCode::I32__atomic__rmw8__xchg_u:
3688
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3689
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3690
0
                           Context.Int32Ty, Context.Int8Ty);
3691
0
        break;
3692
0
      case OpCode::I32__atomic__rmw16__xchg_u:
3693
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3694
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3695
0
                           Context.Int32Ty, Context.Int16Ty);
3696
0
        break;
3697
0
      case OpCode::I64__atomic__rmw8__xchg_u:
3698
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3699
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3700
0
                           Context.Int64Ty, Context.Int8Ty);
3701
0
        break;
3702
0
      case OpCode::I64__atomic__rmw16__xchg_u:
3703
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3704
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3705
0
                           Context.Int64Ty, Context.Int16Ty);
3706
0
        break;
3707
0
      case OpCode::I64__atomic__rmw32__xchg_u:
3708
0
        compileAtomicRMWOp(Instr.getTargetIndex(), Instr.getMemoryOffset(),
3709
0
                           Instr.getMemoryAlign(), LLVMAtomicRMWBinOpXchg,
3710
0
                           Context.Int64Ty, Context.Int32Ty);
3711
0
        break;
3712
0
      case OpCode::I32__atomic__rmw__cmpxchg:
3713
0
        compileAtomicCompareExchange(
3714
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3715
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int32Ty, true);
3716
0
        break;
3717
0
      case OpCode::I64__atomic__rmw__cmpxchg:
3718
0
        compileAtomicCompareExchange(
3719
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3720
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int64Ty, true);
3721
0
        break;
3722
0
      case OpCode::I32__atomic__rmw8__cmpxchg_u:
3723
0
        compileAtomicCompareExchange(
3724
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3725
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int8Ty);
3726
0
        break;
3727
0
      case OpCode::I32__atomic__rmw16__cmpxchg_u:
3728
0
        compileAtomicCompareExchange(
3729
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3730
0
            Instr.getMemoryAlign(), Context.Int32Ty, Context.Int16Ty);
3731
0
        break;
3732
0
      case OpCode::I64__atomic__rmw8__cmpxchg_u:
3733
0
        compileAtomicCompareExchange(
3734
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3735
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int8Ty);
3736
0
        break;
3737
0
      case OpCode::I64__atomic__rmw16__cmpxchg_u:
3738
0
        compileAtomicCompareExchange(
3739
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3740
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int16Ty);
3741
0
        break;
3742
0
      case OpCode::I64__atomic__rmw32__cmpxchg_u:
3743
0
        compileAtomicCompareExchange(
3744
0
            Instr.getTargetIndex(), Instr.getMemoryOffset(),
3745
0
            Instr.getMemoryAlign(), Context.Int64Ty, Context.Int32Ty);
3746
0
        break;
3747
3748
0
      default:
3749
0
        assumingUnreachable();
3750
1.11M
      }
3751
1.11M
      return {};
3752
1.11M
    };
3753
3754
1.65M
    for (const auto &Instr : Instrs) {
3755
      // Update instruction count
3756
1.65M
      if (LocalInstrCount) {
3757
0
        Builder.createStore(
3758
0
            Builder.createAdd(
3759
0
                Builder.createLoad(Context.Int64Ty, LocalInstrCount),
3760
0
                LLContext.getInt64(1)),
3761
0
            LocalInstrCount);
3762
0
      }
3763
1.65M
      if (LocalGas) {
3764
0
        auto NewGas = Builder.createAdd(
3765
0
            Builder.createLoad(Context.Int64Ty, LocalGas),
3766
0
            Builder.createLoad(
3767
0
                Context.Int64Ty,
3768
0
                Builder.createConstInBoundsGEP2_64(
3769
0
                    LLVM::Type::getArrayType(Context.Int64Ty, UINT16_MAX + 1),
3770
0
                    Context.getCostTable(Builder, ExecCtx), 0,
3771
0
                    uint16_t(Instr.getOpCode()))));
3772
0
        Builder.createStore(NewGas, LocalGas);
3773
0
      }
3774
3775
      // Make the instruction node according to Code.
3776
1.65M
      EXPECTED_TRY(Dispatch(Instr));
3777
1.65M
    }
3778
11.1k
    return {};
3779
11.2k
  }
3780
2.04k
  void compileSignedTrunc(LLVM::Type IntType) noexcept {
3781
2.04k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.norm");
3782
2.04k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmin");
3783
2.04k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "strunc.notmax");
3784
2.04k
    auto Value = stackPop();
3785
2.04k
    const auto [Precise, MinFp, MaxFp] =
3786
2.04k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3787
2.04k
      const auto BitWidth = IntType.getIntegerBitWidth();
3788
2.04k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3789
2.04k
        switch (BitWidth) {
3790
1.60k
        case 32:
3791
1.60k
          return {std::numeric_limits<int32_t>::min(),
3792
1.60k
                  std::numeric_limits<int32_t>::max()};
3793
441
        case 64:
3794
441
          return {std::numeric_limits<int64_t>::min(),
3795
441
                  std::numeric_limits<int64_t>::max()};
3796
0
        default:
3797
0
          assumingUnreachable();
3798
2.04k
        }
3799
2.04k
      }();
3800
2.04k
      auto FPType = Value.getType();
3801
2.04k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3802
2.04k
      const auto FPWidth = FPType.getFPMantissaWidth();
3803
2.04k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3804
2.04k
              LLVM::Value::getConstReal(FPType, Max)};
3805
2.04k
    }();
3806
3807
2.04k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3808
2.04k
    Builder.createCondBr(IsNotNan, NormBB,
3809
2.04k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3810
3811
2.04k
    Builder.positionAtEnd(NormBB);
3812
2.04k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3813
2.04k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3814
2.04k
    auto IsNotUnderflow =
3815
2.04k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3816
2.04k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3817
2.04k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3818
3819
2.04k
    Builder.positionAtEnd(NotMinBB);
3820
2.04k
    auto IsNotOverflow = Builder.createLikely(
3821
2.04k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3822
2.04k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3823
2.04k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3824
3825
2.04k
    Builder.positionAtEnd(NotMaxBB);
3826
2.04k
    stackPush(Builder.createFPToSI(Trunc, IntType));
3827
2.04k
  }
3828
1.31k
  void compileSignedTruncSat(LLVM::Type IntType) noexcept {
3829
1.31k
    auto CurrBB = Builder.getInsertBlock();
3830
1.31k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.norm");
3831
1.31k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmin");
3832
1.31k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.notmax");
3833
1.31k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ssat.end");
3834
1.31k
    auto Value = stackPop();
3835
1.31k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3836
1.31k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3837
1.31k
      const auto BitWidth = IntType.getIntegerBitWidth();
3838
1.31k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<int64_t, int64_t> {
3839
1.31k
        switch (BitWidth) {
3840
736
        case 32:
3841
736
          return {std::numeric_limits<int32_t>::min(),
3842
736
                  std::numeric_limits<int32_t>::max()};
3843
580
        case 64:
3844
580
          return {std::numeric_limits<int64_t>::min(),
3845
580
                  std::numeric_limits<int64_t>::max()};
3846
0
        default:
3847
0
          assumingUnreachable();
3848
1.31k
        }
3849
1.31k
      }();
3850
1.31k
      auto FPType = Value.getType();
3851
1.31k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3852
1.31k
      const auto FPWidth = FPType.getFPMantissaWidth();
3853
1.31k
      return {BitWidth <= FPWidth, static_cast<uint64_t>(Min),
3854
1.31k
              static_cast<uint64_t>(Max),
3855
1.31k
              LLVM::Value::getConstReal(FPType, Min),
3856
1.31k
              LLVM::Value::getConstReal(FPType, Max)};
3857
1.31k
    }();
3858
3859
1.31k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3860
1.31k
    Builder.createCondBr(IsNotNan, NormBB, EndBB);
3861
3862
1.31k
    Builder.positionAtEnd(NormBB);
3863
1.31k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3864
1.31k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3865
1.31k
    auto IsNotUnderflow =
3866
1.31k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3867
1.31k
    Builder.createCondBr(IsNotUnderflow, NotMinBB, EndBB);
3868
3869
1.31k
    Builder.positionAtEnd(NotMinBB);
3870
1.31k
    auto IsNotOverflow = Builder.createLikely(
3871
1.31k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3872
1.31k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3873
3874
1.31k
    Builder.positionAtEnd(NotMaxBB);
3875
1.31k
    auto IntValue = Builder.createFPToSI(Trunc, IntType);
3876
1.31k
    Builder.createBr(EndBB);
3877
3878
1.31k
    Builder.positionAtEnd(EndBB);
3879
1.31k
    auto PHIRet = Builder.createPHI(IntType);
3880
1.31k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, 0, true), CurrBB);
3881
1.31k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt, true), NormBB);
3882
1.31k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt, true),
3883
1.31k
                       NotMinBB);
3884
1.31k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3885
3886
1.31k
    stackPush(PHIRet);
3887
1.31k
  }
3888
3.81k
  void compileUnsignedTrunc(LLVM::Type IntType) noexcept {
3889
3.81k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.norm");
3890
3.81k
    auto NotMinBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmin");
3891
3.81k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "utrunc.notmax");
3892
3.81k
    auto Value = stackPop();
3893
3.81k
    const auto [Precise, MinFp, MaxFp] =
3894
3.81k
        [IntType, Value]() -> std::tuple<bool, LLVM::Value, LLVM::Value> {
3895
3.81k
      const auto BitWidth = IntType.getIntegerBitWidth();
3896
3.81k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3897
3.81k
        switch (BitWidth) {
3898
1.41k
        case 32:
3899
1.41k
          return {std::numeric_limits<uint32_t>::min(),
3900
1.41k
                  std::numeric_limits<uint32_t>::max()};
3901
2.39k
        case 64:
3902
2.39k
          return {std::numeric_limits<uint64_t>::min(),
3903
2.39k
                  std::numeric_limits<uint64_t>::max()};
3904
0
        default:
3905
0
          assumingUnreachable();
3906
3.81k
        }
3907
3.81k
      }();
3908
3.81k
      auto FPType = Value.getType();
3909
3.81k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3910
3.81k
      const auto FPWidth = FPType.getFPMantissaWidth();
3911
3.81k
      return {BitWidth <= FPWidth, LLVM::Value::getConstReal(FPType, Min),
3912
3.81k
              LLVM::Value::getConstReal(FPType, Max)};
3913
3.81k
    }();
3914
3915
3.81k
    auto IsNotNan = Builder.createLikely(Builder.createFCmpORD(Value, Value));
3916
3.81k
    Builder.createCondBr(IsNotNan, NormBB,
3917
3.81k
                         getTrapBB(ErrCode::Value::InvalidConvToInt));
3918
3919
3.81k
    Builder.positionAtEnd(NormBB);
3920
3.81k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3921
3.81k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3922
3.81k
    auto IsNotUnderflow =
3923
3.81k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3924
3.81k
    Builder.createCondBr(IsNotUnderflow, NotMinBB,
3925
3.81k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3926
3927
3.81k
    Builder.positionAtEnd(NotMinBB);
3928
3.81k
    auto IsNotOverflow = Builder.createLikely(
3929
3.81k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3930
3.81k
    Builder.createCondBr(IsNotOverflow, NotMaxBB,
3931
3.81k
                         getTrapBB(ErrCode::Value::IntegerOverflow));
3932
3933
3.81k
    Builder.positionAtEnd(NotMaxBB);
3934
3.81k
    stackPush(Builder.createFPToUI(Trunc, IntType));
3935
3.81k
  }
3936
1.29k
  void compileUnsignedTruncSat(LLVM::Type IntType) noexcept {
3937
1.29k
    auto CurrBB = Builder.getInsertBlock();
3938
1.29k
    auto NormBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.norm");
3939
1.29k
    auto NotMaxBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.notmax");
3940
1.29k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "usat.end");
3941
1.29k
    auto Value = stackPop();
3942
1.29k
    const auto [Precise, MinInt, MaxInt, MinFp, MaxFp] = [IntType, Value]()
3943
1.29k
        -> std::tuple<bool, uint64_t, uint64_t, LLVM::Value, LLVM::Value> {
3944
1.29k
      const auto BitWidth = IntType.getIntegerBitWidth();
3945
1.29k
      const auto [Min, Max] = [BitWidth]() -> std::tuple<uint64_t, uint64_t> {
3946
1.29k
        switch (BitWidth) {
3947
550
        case 32:
3948
550
          return {std::numeric_limits<uint32_t>::min(),
3949
550
                  std::numeric_limits<uint32_t>::max()};
3950
748
        case 64:
3951
748
          return {std::numeric_limits<uint64_t>::min(),
3952
748
                  std::numeric_limits<uint64_t>::max()};
3953
0
        default:
3954
0
          assumingUnreachable();
3955
1.29k
        }
3956
1.29k
      }();
3957
1.29k
      auto FPType = Value.getType();
3958
1.29k
      assuming(FPType.isFloatTy() || FPType.isDoubleTy());
3959
1.29k
      const auto FPWidth = FPType.getFPMantissaWidth();
3960
1.29k
      return {BitWidth <= FPWidth, Min, Max,
3961
1.29k
              LLVM::Value::getConstReal(FPType, Min),
3962
1.29k
              LLVM::Value::getConstReal(FPType, Max)};
3963
1.29k
    }();
3964
3965
1.29k
    assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
3966
1.29k
    auto Trunc = Builder.createUnaryIntrinsic(LLVM::Core::Trunc, Value);
3967
1.29k
    auto IsNotUnderflow =
3968
1.29k
        Builder.createLikely(Builder.createFCmpOGE(Trunc, MinFp));
3969
1.29k
    Builder.createCondBr(IsNotUnderflow, NormBB, EndBB);
3970
3971
1.29k
    Builder.positionAtEnd(NormBB);
3972
1.29k
    auto IsNotOverflow = Builder.createLikely(
3973
1.29k
        Builder.createFCmp(Precise ? LLVMRealOLE : LLVMRealOLT, Trunc, MaxFp));
3974
1.29k
    Builder.createCondBr(IsNotOverflow, NotMaxBB, EndBB);
3975
3976
1.29k
    Builder.positionAtEnd(NotMaxBB);
3977
1.29k
    auto IntValue = Builder.createFPToUI(Trunc, IntType);
3978
1.29k
    Builder.createBr(EndBB);
3979
3980
1.29k
    Builder.positionAtEnd(EndBB);
3981
1.29k
    auto PHIRet = Builder.createPHI(IntType);
3982
1.29k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MinInt), CurrBB);
3983
1.29k
    PHIRet.addIncoming(LLVM::Value::getConstInt(IntType, MaxInt), NormBB);
3984
1.29k
    PHIRet.addIncoming(IntValue, NotMaxBB);
3985
3986
1.29k
    stackPush(PHIRet);
3987
1.29k
  }
3988
3989
  void compileAtomicCheckOffsetAlignment(LLVM::Value Offset,
3990
55
                                         LLVM::Type IntType) noexcept {
3991
55
    const auto BitWidth = IntType.getIntegerBitWidth();
3992
55
    auto BWMask = LLContext.getInt64((BitWidth >> 3) - 1);
3993
55
    auto Value = Builder.createAnd(Offset, BWMask);
3994
55
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "address_align_ok");
3995
55
    auto IsAddressAligned = Builder.createLikely(
3996
55
        Builder.createICmpEQ(Value, LLContext.getInt64(0)));
3997
55
    Builder.createCondBr(IsAddressAligned, OkBB,
3998
55
                         getTrapBB(ErrCode::Value::UnalignedAtomicAccess));
3999
4000
55
    Builder.positionAtEnd(OkBB);
4001
55
  }
4002
4003
192
  void compileMemoryFence() noexcept {
4004
192
    Builder.createFence(LLVMAtomicOrderingSequentiallyConsistent);
4005
192
  }
4006
  void compileAtomicNotify(unsigned MemoryIndex,
4007
44
                           uint64_t MemoryOffset) noexcept {
4008
44
    auto Count = Builder.createZExt(stackPop(), Context.Int64Ty);
4009
44
    auto Offset = Builder.createZExt(stackPop(), Context.Int64Ty);
4010
44
    if (MemoryOffset != 0) {
4011
35
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4012
35
    }
4013
44
    compileAtomicCheckOffsetAlignment(Offset, Context.Int32Ty);
4014
44
    stackPush(Builder.createTrunc(
4015
44
        Builder.createCall(
4016
44
            Context.getIntrinsic(
4017
44
                Builder, Executable::Intrinsics::kMemAtomicNotify,
4018
44
                LLVM::Type::getFunctionType(
4019
44
                    Context.Int64Ty,
4020
44
                    {Context.Int32Ty, Context.Int64Ty, Context.Int64Ty},
4021
44
                    false)),
4022
44
            {LLContext.getInt32(MemoryIndex), Offset, Count}),
4023
44
        Context.MemoryAddrTypes[MemoryIndex]));
4024
44
  }
4025
  void compileAtomicWait(unsigned MemoryIndex, uint64_t MemoryOffset,
4026
11
                         LLVM::Type TargetType, uint32_t BitWidth) noexcept {
4027
11
    auto Timeout = stackPop();
4028
11
    auto ExpectedValue = Builder.createZExtOrTrunc(stackPop(), Context.Int64Ty);
4029
11
    auto Offset = Builder.createZExt(stackPop(), Context.Int64Ty);
4030
11
    if (MemoryOffset != 0) {
4031
7
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4032
7
    }
4033
11
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4034
11
    stackPush(Builder.createTrunc(
4035
11
        Builder.createCall(
4036
11
            Context.getIntrinsic(
4037
11
                Builder, Executable::Intrinsics::kMemAtomicWait,
4038
11
                LLVM::Type::getFunctionType(Context.Int64Ty,
4039
11
                                            {Context.Int32Ty, Context.Int64Ty,
4040
11
                                             Context.Int64Ty, Context.Int64Ty,
4041
11
                                             Context.Int32Ty},
4042
11
                                            false)),
4043
11
            {LLContext.getInt32(MemoryIndex), Offset, ExpectedValue, Timeout,
4044
11
             LLContext.getInt32(BitWidth)}),
4045
11
        Context.MemoryAddrTypes[MemoryIndex]));
4046
11
  }
4047
  void compileAtomicLoad(unsigned MemoryIndex, uint64_t MemoryOffset,
4048
                         unsigned Alignment, LLVM::Type IntType,
4049
0
                         LLVM::Type TargetType, bool Signed = false) noexcept {
4050
4051
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4052
0
    if (MemoryOffset != 0) {
4053
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4054
0
    }
4055
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4056
0
    auto VPtr = Builder.createInBoundsGEP1(
4057
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4058
0
        Offset);
4059
4060
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4061
0
    auto Load = switchEndian(Builder.createLoad(TargetType, Ptr, true));
4062
0
    Load.setAlignment(1 << Alignment);
4063
0
    Load.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
4064
4065
0
    if (Signed) {
4066
0
      Stack.back() = Builder.createSExt(Load, IntType);
4067
0
    } else {
4068
0
      Stack.back() = Builder.createZExt(Load, IntType);
4069
0
    }
4070
0
  }
4071
  void compileAtomicStore(unsigned MemoryIndex, uint64_t MemoryOffset,
4072
                          unsigned Alignment, LLVM::Type, LLVM::Type TargetType,
4073
0
                          bool Signed = false) noexcept {
4074
0
    auto V = stackPop();
4075
4076
0
    if (Signed) {
4077
0
      V = Builder.createSExtOrTrunc(V, TargetType);
4078
0
    } else {
4079
0
      V = Builder.createZExtOrTrunc(V, TargetType);
4080
0
    }
4081
0
    V = switchEndian(V);
4082
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4083
0
    if (MemoryOffset != 0) {
4084
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4085
0
    }
4086
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4087
0
    auto VPtr = Builder.createInBoundsGEP1(
4088
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4089
0
        Offset);
4090
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4091
0
    auto Store = Builder.createStore(V, Ptr, true);
4092
0
    Store.setAlignment(1 << Alignment);
4093
0
    Store.setOrdering(LLVMAtomicOrderingSequentiallyConsistent);
4094
0
  }
4095
4096
  void compileAtomicRMWOp(unsigned MemoryIndex, uint64_t MemoryOffset,
4097
                          [[maybe_unused]] unsigned Alignment,
4098
                          LLVMAtomicRMWBinOp BinOp, LLVM::Type IntType,
4099
0
                          LLVM::Type TargetType, bool Signed = false) noexcept {
4100
0
    auto Value = Builder.createSExtOrTrunc(stackPop(), TargetType);
4101
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4102
0
    if (MemoryOffset != 0) {
4103
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4104
0
    }
4105
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4106
0
    auto VPtr = Builder.createInBoundsGEP1(
4107
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4108
0
        Offset);
4109
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4110
4111
0
    LLVM::Value Ret;
4112
    if constexpr (Endian::native == Endian::big) {
4113
      if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpAdd ||
4114
          BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpSub) {
4115
        auto AtomicBB = LLVM::BasicBlock::create(LLContext, F.Fn, "atomic.rmw");
4116
        auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "atomic.rmw.ok");
4117
        Builder.createBr(AtomicBB);
4118
        Builder.positionAtEnd(AtomicBB);
4119
4120
        auto Load = Builder.createLoad(TargetType, Ptr, true);
4121
        Load.setOrdering(LLVMAtomicOrderingMonotonic);
4122
        Load.setAlignment(1 << Alignment);
4123
4124
        LLVM::Value New;
4125
        if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpAdd)
4126
          New = Builder.createAdd(switchEndian(Load), Value);
4127
        else if (BinOp == LLVMAtomicRMWBinOp::LLVMAtomicRMWBinOpSub) {
4128
          New = Builder.createSub(switchEndian(Load), Value);
4129
        } else {
4130
          assumingUnreachable();
4131
        }
4132
        New = switchEndian(New);
4133
4134
        auto Exchange = Builder.createAtomicCmpXchg(
4135
            Ptr, Load, New, LLVMAtomicOrderingSequentiallyConsistent,
4136
            LLVMAtomicOrderingSequentiallyConsistent);
4137
4138
        Ret = Builder.createExtractValue(Exchange, 0);
4139
        auto Success = Builder.createExtractValue(Exchange, 1);
4140
        Builder.createCondBr(Success, OkBB, AtomicBB);
4141
        Builder.positionAtEnd(OkBB);
4142
      } else {
4143
        Ret = Builder.createAtomicRMW(BinOp, Ptr, switchEndian(Value),
4144
                                      LLVMAtomicOrderingSequentiallyConsistent);
4145
      }
4146
0
    } else {
4147
0
      Ret = Builder.createAtomicRMW(BinOp, Ptr, switchEndian(Value),
4148
0
                                    LLVMAtomicOrderingSequentiallyConsistent);
4149
0
    }
4150
0
    Ret = switchEndian(Ret);
4151
#if LLVM_VERSION_MAJOR >= 13
4152
    Ret.setAlignment(1 << Alignment);
4153
#endif
4154
0
    if (Signed) {
4155
0
      Stack.back() = Builder.createSExt(Ret, IntType);
4156
0
    } else {
4157
0
      Stack.back() = Builder.createZExt(Ret, IntType);
4158
0
    }
4159
0
  }
4160
  void compileAtomicCompareExchange(unsigned MemoryIndex, uint64_t MemoryOffset,
4161
                                    [[maybe_unused]] unsigned Alignment,
4162
                                    LLVM::Type IntType, LLVM::Type TargetType,
4163
0
                                    bool Signed = false) noexcept {
4164
4165
0
    auto Replacement = Builder.createSExtOrTrunc(stackPop(), TargetType);
4166
0
    auto Expected = Builder.createSExtOrTrunc(stackPop(), TargetType);
4167
0
    auto Offset = Builder.createZExt(Stack.back(), Context.Int64Ty);
4168
0
    if (MemoryOffset != 0) {
4169
0
      Offset = Builder.createAdd(Offset, LLContext.getInt64(MemoryOffset));
4170
0
    }
4171
0
    compileAtomicCheckOffsetAlignment(Offset, TargetType);
4172
0
    auto VPtr = Builder.createInBoundsGEP1(
4173
0
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex),
4174
0
        Offset);
4175
0
    auto Ptr = Builder.createBitCast(VPtr, TargetType.getPointerTo());
4176
4177
0
    auto Ret = Builder.createAtomicCmpXchg(
4178
0
        Ptr, switchEndian(Expected), switchEndian(Replacement),
4179
0
        LLVMAtomicOrderingSequentiallyConsistent,
4180
0
        LLVMAtomicOrderingSequentiallyConsistent);
4181
#if LLVM_VERSION_MAJOR >= 13
4182
    Ret.setAlignment(1 << Alignment);
4183
#endif
4184
0
    auto OldVal = Builder.createExtractValue(Ret, 0);
4185
0
    OldVal = switchEndian(OldVal);
4186
0
    if (Signed) {
4187
0
      Stack.back() = Builder.createSExt(OldVal, IntType);
4188
0
    } else {
4189
0
      Stack.back() = Builder.createZExt(OldVal, IntType);
4190
0
    }
4191
0
  }
4192
4193
11.8k
  void compileReturn() noexcept {
4194
11.8k
    updateInstrCount();
4195
11.8k
    updateGas();
4196
11.8k
    auto Ty = F.Ty.getReturnType();
4197
11.8k
    if (Ty.isVoidTy()) {
4198
2.28k
      Builder.createRetVoid();
4199
9.59k
    } else if (Ty.isStructTy()) {
4200
394
      const auto Count = Ty.getStructNumElements();
4201
394
      std::vector<LLVM::Value> Ret(Count);
4202
1.45k
      for (unsigned I = 0; I < Count; ++I) {
4203
1.06k
        const unsigned J = Count - 1 - I;
4204
1.06k
        Ret[J] = stackPop();
4205
1.06k
      }
4206
394
      Builder.createAggregateRet(Ret);
4207
9.20k
    } else {
4208
9.20k
      Builder.createRet(stackPop());
4209
9.20k
    }
4210
11.8k
  }
4211
4212
20.4k
  void updateInstrCount() noexcept {
4213
20.4k
    if (LocalInstrCount) {
4214
0
      auto Store [[maybe_unused]] = Builder.createAtomicRMW(
4215
0
          LLVMAtomicRMWBinOpAdd, Context.getInstrCount(Builder, ExecCtx),
4216
0
          Builder.createLoad(Context.Int64Ty, LocalInstrCount),
4217
0
          LLVMAtomicOrderingMonotonic);
4218
#if LLVM_VERSION_MAJOR >= 13
4219
      Store.setAlignment(8);
4220
#endif
4221
0
      Builder.createStore(LLContext.getInt64(0), LocalInstrCount);
4222
0
    }
4223
20.4k
  }
4224
4225
22.4k
  void updateGas() noexcept {
4226
22.4k
    if (LocalGas) {
4227
0
      auto CurrBB = Builder.getInsertBlock();
4228
0
      auto CheckBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_check");
4229
0
      auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_ok");
4230
0
      auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "gas_end");
4231
4232
0
      auto Cost = Builder.createLoad(Context.Int64Ty, LocalGas);
4233
0
      Cost.setAlignment(64);
4234
0
      auto GasPtr = Context.getGas(Builder, ExecCtx);
4235
0
      auto GasLimit = Context.getGasLimit(Builder, ExecCtx);
4236
0
      auto Gas = Builder.createLoad(Context.Int64Ty, GasPtr);
4237
0
      Gas.setAlignment(64);
4238
0
      Gas.setOrdering(LLVMAtomicOrderingMonotonic);
4239
0
      Builder.createBr(CheckBB);
4240
0
      Builder.positionAtEnd(CheckBB);
4241
4242
0
      auto PHIOldGas = Builder.createPHI(Context.Int64Ty);
4243
0
      auto NewGas = Builder.createAdd(PHIOldGas, Cost);
4244
0
      auto IsGasRemain =
4245
0
          Builder.createLikely(Builder.createICmpULE(NewGas, GasLimit));
4246
0
      Builder.createCondBr(IsGasRemain, OkBB,
4247
0
                           getTrapBB(ErrCode::Value::CostLimitExceeded));
4248
0
      Builder.positionAtEnd(OkBB);
4249
4250
0
      auto RGasAndSucceed = Builder.createAtomicCmpXchg(
4251
0
          GasPtr, PHIOldGas, NewGas, LLVMAtomicOrderingMonotonic,
4252
0
          LLVMAtomicOrderingMonotonic);
4253
#if LLVM_VERSION_MAJOR >= 13
4254
      RGasAndSucceed.setAlignment(8);
4255
#endif
4256
0
      RGasAndSucceed.setWeak(true);
4257
0
      auto RGas = Builder.createExtractValue(RGasAndSucceed, 0);
4258
0
      auto Succeed = Builder.createExtractValue(RGasAndSucceed, 1);
4259
0
      Builder.createCondBr(Builder.createLikely(Succeed), EndBB, CheckBB);
4260
0
      Builder.positionAtEnd(EndBB);
4261
4262
0
      Builder.createStore(LLContext.getInt64(0), LocalGas);
4263
4264
0
      PHIOldGas.addIncoming(Gas, CurrBB);
4265
0
      PHIOldGas.addIncoming(RGas, OkBB);
4266
0
    }
4267
22.4k
  }
4268
4269
3.33k
  void updateGasAtTrap() noexcept {
4270
3.33k
    if (LocalGas) {
4271
0
      auto Update [[maybe_unused]] = Builder.createAtomicRMW(
4272
0
          LLVMAtomicRMWBinOpAdd, Context.getGas(Builder, ExecCtx),
4273
0
          Builder.createLoad(Context.Int64Ty, LocalGas),
4274
0
          LLVMAtomicOrderingMonotonic);
4275
#if LLVM_VERSION_MAJOR >= 13
4276
      Update.setAlignment(8);
4277
#endif
4278
0
    }
4279
3.33k
  }
4280
4281
private:
4282
3.56k
  void compileCallOp(const unsigned int FuncIndex) noexcept {
4283
3.56k
    const auto &FuncType =
4284
3.56k
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4285
3.56k
            ->getFuncType();
4286
3.56k
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4287
3.56k
    const auto &ParamTypes = FuncType.getParamTypes();
4288
4289
3.56k
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4290
3.56k
    Args[0] = F.Fn.getFirstParam();
4291
4.39k
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4292
828
      const size_t J = ParamTypes.size() - 1 - I;
4293
828
      Args[J + 1] = stackPop();
4294
828
    }
4295
4296
3.56k
    LLVM::Value Ret;
4297
3.56k
    if (IsLazyJIT) {
4298
0
      bool IsImport = std::get<2>(Context.Functions[FuncIndex]) == nullptr;
4299
0
      if (IsImport) {
4300
0
        Ret = Builder.createCall(Function, Args);
4301
0
      } else {
4302
0
        auto FTy = toLLVMType(LLContext, Context.ExecCtxPtrTy, FuncType);
4303
4304
0
        if (Context.LazyJITCacheVars.size() <= FuncIndex) {
4305
0
          Context.LazyJITCacheVars.resize(Context.Functions.size());
4306
0
        }
4307
0
        auto &CacheVar = Context.LazyJITCacheVars[FuncIndex];
4308
0
        if (!CacheVar) {
4309
0
          CacheVar = Context.LLModule.get().addGlobal(
4310
0
              FTy.getPointerTo(), false, LLVMPrivateLinkage,
4311
0
              LLVM::Value::getConstNull(FTy.getPointerTo()), "");
4312
0
        }
4313
4314
0
        auto CheckBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ic.check");
4315
0
        auto ResolveBB =
4316
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "ic.resolve");
4317
0
        auto CallBB = LLVM::BasicBlock::create(LLContext, F.Fn, "ic.call");
4318
4319
0
        Builder.createBr(CheckBB);
4320
0
        Builder.positionAtEnd(CheckBB);
4321
4322
0
        auto CachedPtr =
4323
0
            Builder.createLoad(FTy.getPointerTo(), CacheVar, false);
4324
0
        CachedPtr.setAlignment(8);
4325
0
        CachedPtr.setOrdering(LLVMAtomicOrderingAcquire);
4326
0
        auto IsNull = Builder.createIsNull(CachedPtr);
4327
0
        auto IsNotNull = Builder.createLikely(Builder.createNot(IsNull));
4328
0
        Builder.createCondBr(IsNotNull, CallBB, ResolveBB);
4329
4330
0
        Builder.positionAtEnd(ResolveBB);
4331
0
        auto FPtr = Builder.createCall(
4332
0
            Context.getIntrinsic(
4333
0
                Builder, Executable::Intrinsics::kFuncGetFuncSymbol,
4334
0
                LLVM::Type::getFunctionType(FTy.getPointerTo(),
4335
0
                                            {Context.Int32Ty}, false)),
4336
0
            {LLContext.getInt32(FuncIndex)});
4337
0
        auto Store = Builder.createStore(FPtr, CacheVar);
4338
0
        Store.setAlignment(8);
4339
0
        Store.setOrdering(LLVMAtomicOrderingRelease);
4340
0
        Builder.createBr(CallBB);
4341
4342
0
        Builder.positionAtEnd(CallBB);
4343
0
        auto FinalPtr = Builder.createPHI(FTy.getPointerTo());
4344
0
        FinalPtr.addIncoming(CachedPtr, CheckBB);
4345
0
        FinalPtr.addIncoming(FPtr, ResolveBB);
4346
4347
0
        Ret = Builder.createCall(LLVM::FunctionCallee(FTy, FinalPtr), Args);
4348
0
      }
4349
3.56k
    } else {
4350
3.56k
      Ret = Builder.createCall(Function, Args);
4351
3.56k
    }
4352
4353
3.56k
    auto Ty = Ret.getType();
4354
3.56k
    if (Ty.isVoidTy()) {
4355
      // nothing to do
4356
1.90k
    } else if (Ty.isStructTy()) {
4357
188
      for (auto Val : unpackStruct(Builder, Ret)) {
4358
188
        stackPush(Val);
4359
188
      }
4360
1.57k
    } else {
4361
1.57k
      stackPush(Ret);
4362
1.57k
    }
4363
3.56k
  }
4364
4365
  void compileIndirectCallOp(const uint32_t TableIndex,
4366
1.15k
                             const uint32_t FuncTypeIndex) noexcept {
4367
1.15k
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4368
1.15k
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4369
1.15k
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4370
4371
1.15k
    LLVM::Value FuncIndex = stackPop();
4372
1.15k
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4373
1.15k
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4374
1.15k
    auto RTy = FTy.getReturnType();
4375
4376
1.15k
    const size_t ArgSize = FuncType.getParamTypes().size();
4377
1.15k
    const size_t RetSize =
4378
1.15k
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4379
1.15k
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4380
1.15k
    ArgsVec[0] = F.Fn.getFirstParam();
4381
2.05k
    for (size_t I = 0; I < ArgSize; ++I) {
4382
901
      const size_t J = ArgSize - I;
4383
901
      ArgsVec[J] = stackPop();
4384
901
    }
4385
4386
1.15k
    std::vector<LLVM::Value> FPtrRetsVec;
4387
1.15k
    FPtrRetsVec.reserve(RetSize);
4388
1.15k
    {
4389
1.15k
      auto FPtr = Builder.createCall(
4390
1.15k
          Context.getIntrinsic(
4391
1.15k
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4392
1.15k
              LLVM::Type::getFunctionType(
4393
1.15k
                  FTy.getPointerTo(),
4394
1.15k
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4395
1.15k
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4396
1.15k
           FuncIndex});
4397
1.15k
      Builder.createCondBr(
4398
1.15k
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4399
1.15k
          NotNullBB, IsNullBB);
4400
1.15k
      Builder.positionAtEnd(NotNullBB);
4401
4402
1.15k
      auto FPtrRet =
4403
1.15k
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4404
1.15k
      if (RetSize == 0) {
4405
        // nothing to do
4406
916
      } else if (RetSize == 1) {
4407
886
        FPtrRetsVec.push_back(FPtrRet);
4408
886
      } else {
4409
60
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4410
60
          FPtrRetsVec.push_back(Val);
4411
60
        }
4412
30
      }
4413
1.15k
    }
4414
4415
1.15k
    Builder.createBr(EndBB);
4416
1.15k
    Builder.positionAtEnd(IsNullBB);
4417
4418
1.15k
    std::vector<LLVM::Value> RetsVec;
4419
1.15k
    {
4420
1.15k
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4421
1.15k
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4422
1.15k
      Builder.createArrayPtrStore(
4423
1.15k
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4424
1.15k
          kValSize);
4425
4426
1.15k
      Builder.createCall(
4427
1.15k
          Context.getIntrinsic(
4428
1.15k
              Builder, Executable::Intrinsics::kCallIndirect,
4429
1.15k
              LLVM::Type::getFunctionType(Context.VoidTy,
4430
1.15k
                                          {Context.Int32Ty, Context.Int32Ty,
4431
1.15k
                                           Context.Int32Ty, Context.Int8PtrTy,
4432
1.15k
                                           Context.Int8PtrTy},
4433
1.15k
                                          false)),
4434
1.15k
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4435
1.15k
           FuncIndex, Args, Rets});
4436
4437
1.15k
      if (RetSize == 0) {
4438
        // nothing to do
4439
916
      } else if (RetSize == 1) {
4440
886
        RetsVec.push_back(
4441
886
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4442
886
      } else {
4443
30
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4444
30
                                             kValSize);
4445
30
      }
4446
1.15k
      Builder.createBr(EndBB);
4447
1.15k
      Builder.positionAtEnd(EndBB);
4448
1.15k
    }
4449
4450
2.10k
    for (unsigned I = 0; I < RetSize; ++I) {
4451
946
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4452
946
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4453
946
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4454
946
      stackPush(PHIRet);
4455
946
    }
4456
1.15k
  }
4457
4458
63
  void compileReturnCallOp(const unsigned int FuncIndex) noexcept {
4459
63
    const auto &FuncType =
4460
63
        Context.CompositeTypes[std::get<0>(Context.Functions[FuncIndex])]
4461
63
            ->getFuncType();
4462
63
    const auto &Function = std::get<1>(Context.Functions[FuncIndex]);
4463
63
    const auto &ParamTypes = FuncType.getParamTypes();
4464
4465
63
    std::vector<LLVM::Value> Args(ParamTypes.size() + 1);
4466
63
    Args[0] = F.Fn.getFirstParam();
4467
110
    for (size_t I = 0; I < ParamTypes.size(); ++I) {
4468
47
      const size_t J = ParamTypes.size() - 1 - I;
4469
47
      Args[J + 1] = stackPop();
4470
47
    }
4471
4472
63
    LLVM::Value Ret;
4473
63
    if (IsLazyJIT) {
4474
0
      bool IsImport = std::get<2>(Context.Functions[FuncIndex]) == nullptr;
4475
0
      if (IsImport) {
4476
0
        Ret = Builder.createCall(Function, Args);
4477
0
      } else {
4478
0
        auto FTy = toLLVMType(LLContext, Context.ExecCtxPtrTy, FuncType);
4479
4480
0
        if (Context.LazyJITCacheVars.size() <= FuncIndex) {
4481
0
          Context.LazyJITCacheVars.resize(Context.Functions.size());
4482
0
        }
4483
0
        auto &CacheVar = Context.LazyJITCacheVars[FuncIndex];
4484
0
        if (!CacheVar) {
4485
0
          CacheVar = Context.LLModule.get().addGlobal(
4486
0
              FTy.getPointerTo(), false, LLVMPrivateLinkage,
4487
0
              LLVM::Value::getConstNull(FTy.getPointerTo()), "");
4488
0
        }
4489
4490
0
        auto CheckBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rc.check");
4491
0
        auto ResolveBB =
4492
0
            LLVM::BasicBlock::create(LLContext, F.Fn, "rc.resolve");
4493
0
        auto CallBB = LLVM::BasicBlock::create(LLContext, F.Fn, "rc.call");
4494
4495
0
        Builder.createBr(CheckBB);
4496
0
        Builder.positionAtEnd(CheckBB);
4497
4498
0
        auto CachedPtr =
4499
0
            Builder.createLoad(FTy.getPointerTo(), CacheVar, false);
4500
0
        CachedPtr.setAlignment(8);
4501
0
        CachedPtr.setOrdering(LLVMAtomicOrderingAcquire);
4502
0
        auto IsNull = Builder.createIsNull(CachedPtr);
4503
0
        auto IsNotNull = Builder.createLikely(Builder.createNot(IsNull));
4504
0
        Builder.createCondBr(IsNotNull, CallBB, ResolveBB);
4505
4506
0
        Builder.positionAtEnd(ResolveBB);
4507
0
        auto FPtr = Builder.createCall(
4508
0
            Context.getIntrinsic(
4509
0
                Builder, Executable::Intrinsics::kFuncGetFuncSymbol,
4510
0
                LLVM::Type::getFunctionType(FTy.getPointerTo(),
4511
0
                                            {Context.Int32Ty}, false)),
4512
0
            {LLContext.getInt32(FuncIndex)});
4513
0
        auto Store = Builder.createStore(FPtr, CacheVar);
4514
0
        Store.setAlignment(8);
4515
0
        Store.setOrdering(LLVMAtomicOrderingRelease);
4516
0
        Builder.createBr(CallBB);
4517
4518
0
        Builder.positionAtEnd(CallBB);
4519
0
        auto FinalPtr = Builder.createPHI(FTy.getPointerTo());
4520
0
        FinalPtr.addIncoming(CachedPtr, CheckBB);
4521
0
        FinalPtr.addIncoming(FPtr, ResolveBB);
4522
4523
0
        Ret = Builder.createCall(LLVM::FunctionCallee(FTy, FinalPtr), Args);
4524
0
      }
4525
63
    } else {
4526
63
      Ret = Builder.createCall(Function, Args);
4527
63
    }
4528
4529
63
    Ret.setMustTailCall();
4530
63
    auto Ty = Ret.getType();
4531
63
    if (Ty.isVoidTy()) {
4532
18
      Builder.createRetVoid();
4533
45
    } else {
4534
45
      Builder.createRet(Ret);
4535
45
    }
4536
63
  }
4537
4538
  void compileReturnIndirectCallOp(const uint32_t TableIndex,
4539
163
                                   const uint32_t FuncTypeIndex) noexcept {
4540
163
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.not_null");
4541
163
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.is_null");
4542
4543
163
    LLVM::Value FuncIndex = stackPop();
4544
163
    const auto &FuncType = Context.CompositeTypes[FuncTypeIndex]->getFuncType();
4545
163
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4546
163
    auto RTy = FTy.getReturnType();
4547
4548
163
    const size_t ArgSize = FuncType.getParamTypes().size();
4549
163
    const size_t RetSize =
4550
163
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4551
163
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4552
163
    ArgsVec[0] = F.Fn.getFirstParam();
4553
328
    for (size_t I = 0; I < ArgSize; ++I) {
4554
165
      const size_t J = ArgSize - I;
4555
165
      ArgsVec[J] = stackPop();
4556
165
    }
4557
4558
163
    {
4559
163
      auto FPtr = Builder.createCall(
4560
163
          Context.getIntrinsic(
4561
163
              Builder, Executable::Intrinsics::kTableGetFuncSymbol,
4562
163
              LLVM::Type::getFunctionType(
4563
163
                  FTy.getPointerTo(),
4564
163
                  {Context.Int32Ty, Context.Int32Ty, Context.Int32Ty}, false)),
4565
163
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4566
163
           FuncIndex});
4567
163
      Builder.createCondBr(
4568
163
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4569
163
          NotNullBB, IsNullBB);
4570
163
      Builder.positionAtEnd(NotNullBB);
4571
4572
163
      auto FPtrRet =
4573
163
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4574
163
      FPtrRet.setMustTailCall();
4575
163
      if (RetSize == 0) {
4576
40
        Builder.createRetVoid();
4577
123
      } else {
4578
123
        Builder.createRet(FPtrRet);
4579
123
      }
4580
163
    }
4581
4582
163
    Builder.positionAtEnd(IsNullBB);
4583
4584
163
    {
4585
163
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4586
163
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4587
163
      Builder.createArrayPtrStore(
4588
163
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4589
163
          kValSize);
4590
4591
163
      Builder.createCall(
4592
163
          Context.getIntrinsic(
4593
163
              Builder, Executable::Intrinsics::kCallIndirect,
4594
163
              LLVM::Type::getFunctionType(Context.VoidTy,
4595
163
                                          {Context.Int32Ty, Context.Int32Ty,
4596
163
                                           Context.Int32Ty, Context.Int8PtrTy,
4597
163
                                           Context.Int8PtrTy},
4598
163
                                          false)),
4599
163
          {LLContext.getInt32(TableIndex), LLContext.getInt32(FuncTypeIndex),
4600
163
           FuncIndex, Args, Rets});
4601
4602
163
      if (RetSize == 0) {
4603
40
        Builder.createRetVoid();
4604
123
      } else if (RetSize == 1) {
4605
112
        Builder.createRet(
4606
112
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4607
112
      } else {
4608
11
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4609
11
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4610
11
      }
4611
163
    }
4612
163
  }
4613
4614
212
  void compileCallRefOp(const unsigned int TypeIndex) noexcept {
4615
212
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4616
212
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4617
212
    auto EndBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_i.end");
4618
4619
212
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4620
212
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4621
212
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4622
212
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4623
212
        LLContext.getInt64(0)));
4624
212
    Builder.createCondBr(IsRefNotNull, OkBB,
4625
212
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4626
212
    Builder.positionAtEnd(OkBB);
4627
4628
212
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4629
212
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4630
212
    auto RTy = FTy.getReturnType();
4631
4632
212
    const size_t ArgSize = FuncType.getParamTypes().size();
4633
212
    const size_t RetSize =
4634
212
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4635
212
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4636
212
    ArgsVec[0] = F.Fn.getFirstParam();
4637
330
    for (size_t I = 0; I < ArgSize; ++I) {
4638
118
      const size_t J = ArgSize - I;
4639
118
      ArgsVec[J] = stackPop();
4640
118
    }
4641
4642
212
    std::vector<LLVM::Value> FPtrRetsVec;
4643
212
    FPtrRetsVec.reserve(RetSize);
4644
212
    {
4645
212
      auto FPtr = Builder.createCall(
4646
212
          Context.getIntrinsic(
4647
212
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4648
212
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4649
212
                                          {Context.Int64x2Ty}, false)),
4650
212
          {Ref});
4651
212
      Builder.createCondBr(
4652
212
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4653
212
          NotNullBB, IsNullBB);
4654
212
      Builder.positionAtEnd(NotNullBB);
4655
4656
212
      auto FPtrRet =
4657
212
          Builder.createCall(LLVM::FunctionCallee{FTy, FPtr}, ArgsVec);
4658
212
      if (RetSize == 0) {
4659
        // nothing to do
4660
138
      } else if (RetSize == 1) {
4661
58
        FPtrRetsVec.push_back(FPtrRet);
4662
58
      } else {
4663
32
        for (auto Val : unpackStruct(Builder, FPtrRet)) {
4664
32
          FPtrRetsVec.push_back(Val);
4665
32
        }
4666
16
      }
4667
212
    }
4668
4669
212
    Builder.createBr(EndBB);
4670
212
    Builder.positionAtEnd(IsNullBB);
4671
4672
212
    std::vector<LLVM::Value> RetsVec;
4673
212
    {
4674
212
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4675
212
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4676
212
      Builder.createArrayPtrStore(
4677
212
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4678
212
          kValSize);
4679
4680
212
      Builder.createCall(
4681
212
          Context.getIntrinsic(
4682
212
              Builder, Executable::Intrinsics::kCallRef,
4683
212
              LLVM::Type::getFunctionType(
4684
212
                  Context.VoidTy,
4685
212
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4686
212
                  false)),
4687
212
          {Ref, Args, Rets});
4688
4689
212
      if (RetSize == 0) {
4690
        // nothing to do
4691
138
      } else if (RetSize == 1) {
4692
58
        RetsVec.push_back(
4693
58
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4694
58
      } else {
4695
16
        RetsVec = Builder.createArrayPtrLoad(RetSize, RTy, Rets, Context.Int8Ty,
4696
16
                                             kValSize);
4697
16
      }
4698
212
      Builder.createBr(EndBB);
4699
212
      Builder.positionAtEnd(EndBB);
4700
212
    }
4701
4702
302
    for (unsigned I = 0; I < RetSize; ++I) {
4703
90
      auto PHIRet = Builder.createPHI(FPtrRetsVec[I].getType());
4704
90
      PHIRet.addIncoming(FPtrRetsVec[I], NotNullBB);
4705
90
      PHIRet.addIncoming(RetsVec[I], IsNullBB);
4706
90
      stackPush(PHIRet);
4707
90
    }
4708
212
  }
4709
4710
32
  void compileReturnCallRefOp(const unsigned int TypeIndex) noexcept {
4711
32
    auto NotNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.not_null");
4712
32
    auto IsNullBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.is_null");
4713
4714
32
    auto Ref = Builder.createBitCast(stackPop(), Context.Int64x2Ty);
4715
32
    auto OkBB = LLVM::BasicBlock::create(LLContext, F.Fn, "c_r.ref_not_null");
4716
32
    auto IsRefNotNull = Builder.createLikely(Builder.createICmpNE(
4717
32
        Builder.createExtractElement(Ref, LLContext.getInt64(1)),
4718
32
        LLContext.getInt64(0)));
4719
32
    Builder.createCondBr(IsRefNotNull, OkBB,
4720
32
                         getTrapBB(ErrCode::Value::AccessNullFunc));
4721
32
    Builder.positionAtEnd(OkBB);
4722
4723
32
    const auto &FuncType = Context.CompositeTypes[TypeIndex]->getFuncType();
4724
32
    auto FTy = toLLVMType(Context.LLContext, Context.ExecCtxPtrTy, FuncType);
4725
32
    auto RTy = FTy.getReturnType();
4726
4727
32
    const size_t ArgSize = FuncType.getParamTypes().size();
4728
32
    const size_t RetSize =
4729
32
        RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
4730
32
    std::vector<LLVM::Value> ArgsVec(ArgSize + 1, nullptr);
4731
32
    ArgsVec[0] = F.Fn.getFirstParam();
4732
42
    for (size_t I = 0; I < ArgSize; ++I) {
4733
10
      const size_t J = ArgSize - I;
4734
10
      ArgsVec[J] = stackPop();
4735
10
    }
4736
4737
32
    {
4738
32
      auto FPtr = Builder.createCall(
4739
32
          Context.getIntrinsic(
4740
32
              Builder, Executable::Intrinsics::kRefGetFuncSymbol,
4741
32
              LLVM::Type::getFunctionType(FTy.getPointerTo(),
4742
32
                                          {Context.Int64x2Ty}, false)),
4743
32
          {Ref});
4744
32
      Builder.createCondBr(
4745
32
          Builder.createLikely(Builder.createNot(Builder.createIsNull(FPtr))),
4746
32
          NotNullBB, IsNullBB);
4747
32
      Builder.positionAtEnd(NotNullBB);
4748
4749
32
      auto FPtrRet =
4750
32
          Builder.createCall(LLVM::FunctionCallee(FTy, FPtr), ArgsVec);
4751
32
      FPtrRet.setMustTailCall();
4752
32
      if (RetSize == 0) {
4753
25
        Builder.createRetVoid();
4754
25
      } else {
4755
7
        Builder.createRet(FPtrRet);
4756
7
      }
4757
32
    }
4758
4759
32
    Builder.positionAtEnd(IsNullBB);
4760
4761
32
    {
4762
32
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
4763
32
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
4764
32
      Builder.createArrayPtrStore(
4765
32
          Span<LLVM::Value>(ArgsVec.begin() + 1, ArgSize), Args, Context.Int8Ty,
4766
32
          kValSize);
4767
4768
32
      Builder.createCall(
4769
32
          Context.getIntrinsic(
4770
32
              Builder, Executable::Intrinsics::kCallRef,
4771
32
              LLVM::Type::getFunctionType(
4772
32
                  Context.VoidTy,
4773
32
                  {Context.Int64x2Ty, Context.Int8PtrTy, Context.Int8PtrTy},
4774
32
                  false)),
4775
32
          {Ref, Args, Rets});
4776
4777
32
      if (RetSize == 0) {
4778
25
        Builder.createRetVoid();
4779
25
      } else if (RetSize == 1) {
4780
5
        Builder.createRet(
4781
5
            Builder.createValuePtrLoad(RTy, Rets, Context.Int8Ty));
4782
5
      } else {
4783
2
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
4784
2
            RetSize, RTy, Rets, Context.Int8Ty, kValSize));
4785
2
      }
4786
32
    }
4787
32
  }
4788
4789
  void compileLoadOp(unsigned MemoryIndex, uint64_t Offset, unsigned Alignment,
4790
19.3k
                     LLVM::Type LoadTy) noexcept {
4791
19.3k
    if constexpr (kForceUnalignment) {
4792
19.3k
      Alignment = 0;
4793
19.3k
    }
4794
19.3k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4795
19.3k
    if (Offset != 0) {
4796
12.5k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4797
12.5k
    }
4798
4799
19.3k
    auto VPtr = Builder.createInBoundsGEP1(
4800
19.3k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4801
19.3k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4802
19.3k
    auto LoadInst = Builder.createLoad(LoadTy, Ptr, true);
4803
19.3k
    LoadInst.setAlignment(1 << Alignment);
4804
19.3k
    stackPush(switchEndian(LoadInst));
4805
19.3k
  }
4806
  void compileLoadOp(unsigned MemoryIndex, uint64_t Offset, unsigned Alignment,
4807
                     LLVM::Type LoadTy, LLVM::Type ExtendTy,
4808
8.19k
                     bool Signed) noexcept {
4809
8.19k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4810
8.19k
    if (Signed) {
4811
3.54k
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4812
4.64k
    } else {
4813
4.64k
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4814
4.64k
    }
4815
8.19k
  }
4816
  void compileVectorLoadOp(unsigned MemoryIndex, uint64_t Offset,
4817
5.04k
                           unsigned Alignment, LLVM::Type LoadTy) noexcept {
4818
5.04k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4819
5.04k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4820
5.04k
  }
4821
  void compileVectorLoadOp(unsigned MemoryIndex, uint64_t Offset,
4822
                           unsigned Alignment, LLVM::Type LoadTy,
4823
1.68k
                           LLVM::Type ExtendTy, bool Signed) noexcept {
4824
1.68k
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy, ExtendTy, Signed);
4825
1.68k
    Stack.back() = Builder.createBitCast(Stack.back(), Context.Int64x2Ty);
4826
1.68k
  }
4827
  void compileSplatLoadOp(unsigned MemoryIndex, uint64_t Offset,
4828
                          unsigned Alignment, LLVM::Type LoadTy,
4829
645
                          LLVM::Type VectorTy) noexcept {
4830
645
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4831
645
    compileSplatOp(VectorTy);
4832
645
  }
4833
  void compileLoadLaneOp(unsigned MemoryIndex, uint64_t Offset,
4834
                         unsigned Alignment, unsigned Index, LLVM::Type LoadTy,
4835
485
                         LLVM::Type VectorTy) noexcept {
4836
485
    auto Vector = stackPop();
4837
485
    compileLoadOp(MemoryIndex, Offset, Alignment, LoadTy);
4838
    if constexpr (Endian::native == Endian::big) {
4839
      Index = VectorTy.getVectorSize() - 1 - Index;
4840
    }
4841
485
    auto Value = Stack.back();
4842
485
    Stack.back() = Builder.createBitCast(
4843
485
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4844
485
                                    Value, LLContext.getInt64(Index)),
4845
485
        Context.Int64x2Ty);
4846
485
  }
4847
  void compileStoreOp(uint32_t MemoryIndex, uint64_t Offset, uint32_t Alignment,
4848
                      LLVM::Type LoadTy, bool Trunc = false,
4849
3.46k
                      bool BitCast = false) noexcept {
4850
3.46k
    if constexpr (kForceUnalignment) {
4851
3.46k
      Alignment = 0;
4852
3.46k
    }
4853
3.46k
    auto V = stackPop();
4854
3.46k
    auto Off = Builder.createZExt(stackPop(), Context.Int64Ty);
4855
3.46k
    if (Offset != 0) {
4856
2.59k
      Off = Builder.createAdd(Off, LLContext.getInt64(Offset));
4857
2.59k
    }
4858
4859
3.46k
    if (Trunc) {
4860
734
      V = Builder.createTrunc(V, LoadTy);
4861
734
    }
4862
3.46k
    if (BitCast) {
4863
235
      V = Builder.createBitCast(V, LoadTy);
4864
235
    }
4865
3.46k
    V = switchEndian(V);
4866
3.46k
    auto VPtr = Builder.createInBoundsGEP1(
4867
3.46k
        Context.Int8Ty, Context.getMemory(Builder, ExecCtx, MemoryIndex), Off);
4868
3.46k
    auto Ptr = Builder.createBitCast(VPtr, LoadTy.getPointerTo());
4869
3.46k
    auto StoreInst = Builder.createStore(V, Ptr, true);
4870
3.46k
    StoreInst.setAlignment(1 << Alignment);
4871
3.46k
  }
4872
  void compileStoreLaneOp(uint32_t MemoryIndex, uint64_t Offset,
4873
                          uint32_t Alignment, uint8_t Index, LLVM::Type LoadTy,
4874
375
                          LLVM::Type VectorTy) noexcept {
4875
375
    auto Vector = Stack.back();
4876
    if constexpr (Endian::native == Endian::big) {
4877
      Index = static_cast<uint8_t>(VectorTy.getVectorSize() - Index - 1);
4878
    }
4879
375
    Stack.back() = Builder.createExtractElement(
4880
375
        Builder.createBitCast(Vector, VectorTy), LLContext.getInt64(Index));
4881
375
    compileStoreOp(MemoryIndex, Offset, Alignment, LoadTy);
4882
375
  }
4883
55.3k
  void compileSplatOp(LLVM::Type VectorTy) noexcept {
4884
55.3k
    auto Undef = LLVM::Value::getUndef(VectorTy);
4885
55.3k
    auto Zeros = LLVM::Value::getConstNull(
4886
55.3k
        LLVM::Type::getVectorType(Context.Int32Ty, VectorTy.getVectorSize()));
4887
55.3k
    auto Value = Builder.createTrunc(Stack.back(), VectorTy.getElementType());
4888
55.3k
    auto Vector =
4889
55.3k
        Builder.createInsertElement(Undef, Value, LLContext.getInt64(0));
4890
55.3k
    Vector = Builder.createShuffleVector(Vector, Undef, Zeros);
4891
4892
55.3k
    Stack.back() = Builder.createBitCast(Vector, Context.Int64x2Ty);
4893
55.3k
  }
4894
1.25k
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4895
1.25k
    auto Vector = Builder.createBitCast(Stack.back(), VectorTy);
4896
    if constexpr (Endian::native == Endian::big) {
4897
      Index = VectorTy.getVectorSize() - Index - 1;
4898
    }
4899
1.25k
    Stack.back() =
4900
1.25k
        Builder.createExtractElement(Vector, LLContext.getInt64(Index));
4901
1.25k
  }
4902
  void compileExtractLaneOp(LLVM::Type VectorTy, unsigned Index,
4903
937
                            LLVM::Type ExtendTy, bool Signed) noexcept {
4904
937
    compileExtractLaneOp(VectorTy, Index);
4905
937
    if (Signed) {
4906
516
      Stack.back() = Builder.createSExt(Stack.back(), ExtendTy);
4907
516
    } else {
4908
421
      Stack.back() = Builder.createZExt(Stack.back(), ExtendTy);
4909
421
    }
4910
937
  }
4911
1.01k
  void compileReplaceLaneOp(LLVM::Type VectorTy, unsigned Index) noexcept {
4912
1.01k
    auto Value = Builder.createTrunc(stackPop(), VectorTy.getElementType());
4913
1.01k
    auto Vector = Stack.back();
4914
    if constexpr (Endian::native == Endian::big) {
4915
      Index = VectorTy.getVectorSize() - Index - 1;
4916
    }
4917
1.01k
    Stack.back() = Builder.createBitCast(
4918
1.01k
        Builder.createInsertElement(Builder.createBitCast(Vector, VectorTy),
4919
1.01k
                                    Value, LLContext.getInt64(Index)),
4920
1.01k
        Context.Int64x2Ty);
4921
1.01k
  }
4922
  void compileVectorCompareOp(LLVM::Type VectorTy,
4923
6.35k
                              LLVMIntPredicate Predicate) noexcept {
4924
6.35k
    auto RHS = stackPop();
4925
6.35k
    auto LHS = stackPop();
4926
6.35k
    auto Result = Builder.createSExt(
4927
6.35k
        Builder.createICmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4928
6.35k
                           Builder.createBitCast(RHS, VectorTy)),
4929
6.35k
        VectorTy);
4930
6.35k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4931
6.35k
  }
4932
  void compileVectorCompareOp(LLVM::Type VectorTy, LLVMRealPredicate Predicate,
4933
3.34k
                              LLVM::Type ResultTy) noexcept {
4934
3.34k
    auto RHS = stackPop();
4935
3.34k
    auto LHS = stackPop();
4936
3.34k
    auto Result = Builder.createSExt(
4937
3.34k
        Builder.createFCmp(Predicate, Builder.createBitCast(LHS, VectorTy),
4938
3.34k
                           Builder.createBitCast(RHS, VectorTy)),
4939
3.34k
        ResultTy);
4940
3.34k
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
4941
3.34k
  }
4942
  template <typename Func>
4943
30.7k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
30.7k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
30.7k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
30.7k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
2.62k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
2.62k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
2.62k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
2.62k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
3.32k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
3.32k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
3.32k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
3.32k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPopcnt()::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
125
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
125
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
125
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
125
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorExtAddPairwise(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
3.08k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
3.08k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
3.08k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
3.08k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFAbs(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
661
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
661
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
661
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
661
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNeg(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
794
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
794
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
794
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
794
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFSqrt(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
274
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
274
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
274
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
274
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFCeil(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
1.74k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
1.74k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
1.74k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
1.74k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFFloor(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
2.90k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
2.90k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
2.90k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
2.90k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFTrunc(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
2.19k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
2.19k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
2.19k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
2.19k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorFNearest(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
439
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
439
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
439
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
439
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatS32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
1.06k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
1.06k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
1.06k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
1.06k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorTruncSatU32(WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
7.05k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
7.05k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
7.05k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
7.05k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertS(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
742
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
742
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
742
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
742
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorConvertU(WasmEdge::LLVM::Type, WasmEdge::LLVM::Type, bool)::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
2.18k
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
2.18k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
2.18k
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
2.18k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorDemote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
746
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
746
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
746
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
746
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorPromote()::{lambda(auto:1)#1}&&)
Line
Count
Source
4943
802
  void compileVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4944
802
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4945
802
    Stack.back() = Builder.createBitCast(Op(V), Context.Int64x2Ty);
4946
802
  }
4947
2.62k
  void compileVectorAbs(LLVM::Type VectorTy) noexcept {
4948
2.62k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
4949
2.62k
      return Builder.createIntrinsic(LLVM::Core::Abs, {V.getType()},
4950
2.62k
                                     {V, LLContext.getFalse()});
4951
2.62k
    });
4952
2.62k
  }
4953
3.32k
  void compileVectorNeg(LLVM::Type VectorTy) noexcept {
4954
3.32k
    compileVectorOp(VectorTy,
4955
3.32k
                    [this](auto V) noexcept { return Builder.createNeg(V); });
4956
3.32k
  }
4957
125
  void compileVectorPopcnt() noexcept {
4958
125
    compileVectorOp(Context.Int8x16Ty, [this](auto V) noexcept {
4959
125
      assuming(LLVM::Core::Ctpop != LLVM::Core::NotIntrinsic);
4960
125
      return Builder.createUnaryIntrinsic(LLVM::Core::Ctpop, V);
4961
125
    });
4962
125
  }
4963
  template <typename Func>
4964
2.30k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4965
2.30k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4966
2.30k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4967
2.30k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAnyTrue()::{lambda(auto:1)#1}&&)
Line
Count
Source
4964
139
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4965
139
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4966
139
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4967
139
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAllTrue(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4964
937
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4965
937
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4966
937
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4967
937
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorReduceIOp<(anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorBitMask(WasmEdge::LLVM::Type)::{lambda(auto:1)#1}&&)
Line
Count
Source
4964
1.23k
  void compileVectorReduceIOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4965
1.23k
    auto V = Builder.createBitCast(Stack.back(), VectorTy);
4966
1.23k
    Stack.back() = Builder.createZExt(Op(V), Context.Int32Ty);
4967
1.23k
  }
4968
139
  void compileVectorAnyTrue() noexcept {
4969
139
    compileVectorReduceIOp(Context.Int128x1Ty, [this](auto V) noexcept {
4970
139
      auto Zero = LLVM::Value::getConstNull(Context.Int128x1Ty);
4971
139
      return Builder.createBitCast(Builder.createICmpNE(V, Zero),
4972
139
                                   LLContext.getInt1Ty());
4973
139
    });
4974
139
  }
4975
937
  void compileVectorAllTrue(LLVM::Type VectorTy) noexcept {
4976
937
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4977
937
      const auto Size = VectorTy.getVectorSize();
4978
937
      auto IntType = LLContext.getIntNTy(Size);
4979
937
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4980
937
      auto Cmp = Builder.createBitCast(Builder.createICmpEQ(V, Zero), IntType);
4981
937
      auto CmpZero = LLVM::Value::getConstInt(IntType, 0);
4982
937
      return Builder.createICmpEQ(Cmp, CmpZero);
4983
937
    });
4984
937
  }
4985
1.23k
  void compileVectorBitMask(LLVM::Type VectorTy) noexcept {
4986
1.23k
    compileVectorReduceIOp(VectorTy, [this, VectorTy](auto V) noexcept {
4987
1.23k
      const auto Size = VectorTy.getVectorSize();
4988
1.23k
      auto IntType = LLContext.getIntNTy(Size);
4989
1.23k
      auto Zero = LLVM::Value::getConstNull(VectorTy);
4990
1.23k
      return Builder.createBitCast(Builder.createICmpSLT(V, Zero), IntType);
4991
1.23k
    });
4992
1.23k
  }
4993
  template <typename Func>
4994
4.94k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4995
4.94k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4996
4.94k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4997
4.94k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4998
4.94k
    auto RHS = Builder.createVectorSplat(
4999
4.94k
        VectorTy.getVectorSize(),
5000
4.94k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
5001
4.94k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
5002
4.94k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5003
4.94k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5004
4.94k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorShl(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4994
1.80k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4995
1.80k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4996
1.80k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4997
1.80k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4998
1.80k
    auto RHS = Builder.createVectorSplat(
4999
1.80k
        VectorTy.getVectorSize(),
5000
1.80k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
5001
1.80k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
5002
1.80k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5003
1.80k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5004
1.80k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorAShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4994
2.13k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4995
2.13k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4996
2.13k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4997
2.13k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4998
2.13k
    auto RHS = Builder.createVectorSplat(
4999
2.13k
        VectorTy.getVectorSize(),
5000
2.13k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
5001
2.13k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
5002
2.13k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5003
2.13k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5004
2.13k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorShiftOp<(anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorLShr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
4994
1.00k
  void compileVectorShiftOp(LLVM::Type VectorTy, Func &&Op) noexcept {
4995
1.00k
    const bool Trunc = VectorTy.getElementType().getIntegerBitWidth() < 32;
4996
1.00k
    const uint32_t Mask = VectorTy.getElementType().getIntegerBitWidth() - 1;
4997
1.00k
    auto N = Builder.createAnd(stackPop(), LLContext.getInt32(Mask));
4998
1.00k
    auto RHS = Builder.createVectorSplat(
4999
1.00k
        VectorTy.getVectorSize(),
5000
1.00k
        Trunc ? Builder.createTrunc(N, VectorTy.getElementType())
5001
1.00k
              : Builder.createZExtOrTrunc(N, VectorTy.getElementType()));
5002
1.00k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5003
1.00k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5004
1.00k
  }
5005
1.80k
  void compileVectorShl(LLVM::Type VectorTy) noexcept {
5006
1.80k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5007
1.80k
      return Builder.createShl(LHS, RHS);
5008
1.80k
    });
5009
1.80k
  }
5010
1.00k
  void compileVectorLShr(LLVM::Type VectorTy) noexcept {
5011
1.00k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5012
1.00k
      return Builder.createLShr(LHS, RHS);
5013
1.00k
    });
5014
1.00k
  }
5015
2.13k
  void compileVectorAShr(LLVM::Type VectorTy) noexcept {
5016
2.13k
    compileVectorShiftOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5017
2.13k
      return Builder.createAShr(LHS, RHS);
5018
2.13k
    });
5019
2.13k
  }
5020
  template <typename Func>
5021
8.38k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
8.38k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
8.38k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
8.38k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
8.38k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
458
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
458
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
458
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
458
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
458
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorAddSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
1.25k
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
1.25k
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
1.25k
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
1.25k
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
1.25k
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
814
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
814
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
814
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
814
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
814
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSubSat(WasmEdge::LLVM::Type, bool)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
396
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
396
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
396
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
396
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
396
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
309
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
309
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
309
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
309
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
309
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
392
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
392
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
392
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
392
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
392
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorSMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
494
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
494
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
494
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
494
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
494
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
855
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
855
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
855
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
855
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
855
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorUAvgr(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
288
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
288
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
288
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
288
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
288
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
523
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
523
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
523
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
523
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
523
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorQ15MulSat()::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
168
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
168
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
168
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
168
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
168
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFAdd(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
176
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
176
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
176
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
176
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
176
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFSub(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
477
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
477
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
477
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
477
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
477
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMul(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
269
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
269
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
269
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
269
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
269
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFDiv(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
199
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
199
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
199
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
199
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
199
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
360
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
360
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
360
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
360
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
360
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
221
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
221
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
221
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
221
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
221
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMin(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
408
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
408
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
408
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
408
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
408
  }
compiler.cpp:void (anonymous namespace)::FunctionCompiler::compileVectorVectorOp<(anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}>(WasmEdge::LLVM::Type, (anonymous namespace)::FunctionCompiler::compileVectorVectorFPMax(WasmEdge::LLVM::Type)::{lambda(auto:1, auto:2)#1}&&)
Line
Count
Source
5021
323
  void compileVectorVectorOp(LLVM::Type VectorTy, Func &&Op) noexcept {
5022
323
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5023
323
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5024
323
    stackPush(Builder.createBitCast(Op(LHS, RHS), Context.Int64x2Ty));
5025
323
  }
5026
458
  void compileVectorVectorAdd(LLVM::Type VectorTy) noexcept {
5027
458
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5028
458
      return Builder.createAdd(LHS, RHS);
5029
458
    });
5030
458
  }
5031
1.25k
  void compileVectorVectorAddSat(LLVM::Type VectorTy, bool Signed) noexcept {
5032
1.25k
    auto ID = Signed ? LLVM::Core::SAddSat : LLVM::Core::UAddSat;
5033
1.25k
    assuming(ID != LLVM::Core::NotIntrinsic);
5034
1.25k
    compileVectorVectorOp(
5035
1.25k
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
5036
1.25k
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
5037
1.25k
        });
5038
1.25k
  }
5039
814
  void compileVectorVectorSub(LLVM::Type VectorTy) noexcept {
5040
814
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5041
814
      return Builder.createSub(LHS, RHS);
5042
814
    });
5043
814
  }
5044
396
  void compileVectorVectorSubSat(LLVM::Type VectorTy, bool Signed) noexcept {
5045
396
    auto ID = Signed ? LLVM::Core::SSubSat : LLVM::Core::USubSat;
5046
396
    assuming(ID != LLVM::Core::NotIntrinsic);
5047
396
    compileVectorVectorOp(
5048
396
        VectorTy, [this, VectorTy, ID](auto LHS, auto RHS) noexcept {
5049
396
          return Builder.createIntrinsic(ID, {VectorTy}, {LHS, RHS});
5050
396
        });
5051
396
  }
5052
523
  void compileVectorVectorMul(LLVM::Type VectorTy) noexcept {
5053
523
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5054
523
      return Builder.createMul(LHS, RHS);
5055
523
    });
5056
523
  }
5057
131
  void compileVectorSwizzle() noexcept {
5058
131
    auto Index = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
5059
131
    auto Vector = Builder.createBitCast(stackPop(), Context.Int8x16Ty);
5060
5061
131
#if defined(__x86_64__)
5062
131
    if (Context.SupportSSSE3) {
5063
131
      auto Magic = Builder.createVectorSplat(16, LLContext.getInt8(112));
5064
131
      auto Added = Builder.createAdd(Index, Magic);
5065
131
      auto NewIndex = Builder.createSelect(
5066
131
          Builder.createICmpUGT(Index, Added),
5067
131
          LLVM::Value::getConstAllOnes(Context.Int8x16Ty), Added);
5068
131
      assuming(LLVM::Core::X86SSSE3PShufB128 != LLVM::Core::NotIntrinsic);
5069
131
      stackPush(Builder.createBitCast(
5070
131
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PShufB128, {},
5071
131
                                  {Vector, NewIndex}),
5072
131
          Context.Int64x2Ty));
5073
131
      return;
5074
131
    }
5075
0
#endif
5076
5077
#if defined(__aarch64__)
5078
    if (Context.SupportNEON) {
5079
      assuming(LLVM::Core::AArch64NeonTbl1 != LLVM::Core::NotIntrinsic);
5080
      stackPush(Builder.createBitCast(
5081
          Builder.createIntrinsic(LLVM::Core::AArch64NeonTbl1,
5082
                                  {Context.Int8x16Ty}, {Vector, Index}),
5083
          Context.Int64x2Ty));
5084
      return;
5085
    }
5086
#endif
5087
5088
0
    auto Mask = Builder.createVectorSplat(16, LLContext.getInt8(15));
5089
0
    auto Zero = Builder.createVectorSplat(16, LLContext.getInt8(0));
5090
5091
#if defined(__s390x__)
5092
    assuming(LLVM::Core::S390VPerm != LLVM::Core::NotIntrinsic);
5093
    auto Exceed = Builder.createICmpULE(Index, Mask);
5094
    Index = Builder.createSub(Mask, Index);
5095
    auto Result = Builder.createIntrinsic(LLVM::Core::S390VPerm, {},
5096
                                          {Vector, Zero, Index});
5097
    Result = Builder.createSelect(Exceed, Result, Zero);
5098
    stackPush(Builder.createBitCast(Result, Context.Int64x2Ty));
5099
    return;
5100
#endif
5101
5102
    // Fallback case.
5103
    // If the SSSE3 is not supported on the x86_64 platform or
5104
    // the NEON is not supported on the aarch64 platform,
5105
    // then fallback to this.
5106
0
    auto IsOver = Builder.createICmpUGT(Index, Mask);
5107
0
    auto InboundIndex = Builder.createAnd(Index, Mask);
5108
0
    auto Array = Builder.createArray(16, 1);
5109
0
    for (size_t I = 0; I < 16; ++I) {
5110
0
      Builder.createStore(
5111
0
          Builder.createExtractElement(Vector, LLContext.getInt64(I)),
5112
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array,
5113
0
                                     LLContext.getInt64(I)));
5114
0
    }
5115
0
    LLVM::Value Ret = LLVM::Value::getUndef(Context.Int8x16Ty);
5116
0
    for (size_t I = 0; I < 16; ++I) {
5117
0
      auto Idx =
5118
0
          Builder.createExtractElement(InboundIndex, LLContext.getInt64(I));
5119
0
      auto Value = Builder.createLoad(
5120
0
          Context.Int8Ty,
5121
0
          Builder.createInBoundsGEP1(Context.Int8Ty, Array, Idx));
5122
0
      Ret = Builder.createInsertElement(Ret, Value, LLContext.getInt64(I));
5123
0
    }
5124
0
    Ret = Builder.createSelect(IsOver, Zero, Ret);
5125
0
    stackPush(Builder.createBitCast(Ret, Context.Int64x2Ty));
5126
0
  }
5127
5128
168
  void compileVectorVectorQ15MulSat() noexcept {
5129
168
    compileVectorVectorOp(
5130
168
        Context.Int16x8Ty, [this](auto LHS, auto RHS) noexcept -> LLVM::Value {
5131
168
#if defined(__x86_64__)
5132
168
          if (Context.SupportSSSE3) {
5133
168
            assuming(LLVM::Core::X86SSSE3PMulHrSw128 !=
5134
168
                     LLVM::Core::NotIntrinsic);
5135
168
            auto Result = Builder.createIntrinsic(
5136
168
                LLVM::Core::X86SSSE3PMulHrSw128, {}, {LHS, RHS});
5137
168
            auto IntMaxV = Builder.createVectorSplat(
5138
168
                8, LLContext.getInt16(UINT16_C(0x8000)));
5139
168
            auto NotOver = Builder.createSExt(
5140
168
                Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
5141
168
            return Builder.createXor(Result, NotOver);
5142
168
          }
5143
0
#endif
5144
5145
#if defined(__aarch64__)
5146
          if (Context.SupportNEON) {
5147
            assuming(LLVM::Core::AArch64NeonSQRDMulH !=
5148
                     LLVM::Core::NotIntrinsic);
5149
            return Builder.createBinaryIntrinsic(
5150
                LLVM::Core::AArch64NeonSQRDMulH, LHS, RHS);
5151
          }
5152
#endif
5153
5154
          // Fallback case.
5155
          // If the SSSE3 is not supported on the x86_64 platform or
5156
          // the NEON is not supported on the aarch64 platform,
5157
          // then fallback to this.
5158
0
          auto ExtTy = Context.Int16x8Ty.getExtendedElementVectorType();
5159
0
          auto Offset = Builder.createVectorSplat(
5160
0
              8, LLContext.getInt32(UINT32_C(0x4000)));
5161
0
          auto Shift =
5162
0
              Builder.createVectorSplat(8, LLContext.getInt32(UINT32_C(15)));
5163
0
          auto ExtLHS = Builder.createSExt(LHS, ExtTy);
5164
0
          auto ExtRHS = Builder.createSExt(RHS, ExtTy);
5165
0
          auto Result = Builder.createTrunc(
5166
0
              Builder.createAShr(
5167
0
                  Builder.createAdd(Builder.createMul(ExtLHS, ExtRHS), Offset),
5168
0
                  Shift),
5169
0
              Context.Int16x8Ty);
5170
0
          auto IntMaxV = Builder.createVectorSplat(
5171
0
              8, LLContext.getInt16(UINT16_C(0x8000)));
5172
0
          auto NotOver = Builder.createSExt(
5173
0
              Builder.createICmpEQ(Result, IntMaxV), Context.Int16x8Ty);
5174
0
          return Builder.createXor(Result, NotOver);
5175
168
        });
5176
168
  }
5177
309
  void compileVectorVectorSMin(LLVM::Type VectorTy) noexcept {
5178
309
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5179
309
      return Builder.createIntrinsic(LLVM::Core::SMin, {LHS.getType()},
5180
309
                                     {LHS, RHS});
5181
309
    });
5182
309
  }
5183
392
  void compileVectorVectorUMin(LLVM::Type VectorTy) noexcept {
5184
392
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5185
392
      return Builder.createIntrinsic(LLVM::Core::UMin, {LHS.getType()},
5186
392
                                     {LHS, RHS});
5187
392
    });
5188
392
  }
5189
494
  void compileVectorVectorSMax(LLVM::Type VectorTy) noexcept {
5190
494
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5191
494
      return Builder.createIntrinsic(LLVM::Core::SMax, {LHS.getType()},
5192
494
                                     {LHS, RHS});
5193
494
    });
5194
494
  }
5195
855
  void compileVectorVectorUMax(LLVM::Type VectorTy) noexcept {
5196
855
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5197
855
      return Builder.createIntrinsic(LLVM::Core::UMax, {LHS.getType()},
5198
855
                                     {LHS, RHS});
5199
855
    });
5200
855
  }
5201
288
  void compileVectorVectorUAvgr(LLVM::Type VectorTy) noexcept {
5202
288
    auto ExtendTy = VectorTy.getExtendedElementVectorType();
5203
288
    compileVectorVectorOp(
5204
288
        VectorTy,
5205
288
        [this, VectorTy, ExtendTy](auto LHS, auto RHS) noexcept -> LLVM::Value {
5206
288
#if defined(__x86_64__)
5207
288
          if (Context.SupportSSE2) {
5208
288
            const auto ID = [VectorTy]() noexcept {
5209
288
              switch (VectorTy.getElementType().getIntegerBitWidth()) {
5210
138
              case 8:
5211
138
                return LLVM::Core::X86SSE2PAvgB;
5212
150
              case 16:
5213
150
                return LLVM::Core::X86SSE2PAvgW;
5214
0
              default:
5215
0
                assumingUnreachable();
5216
288
              }
5217
288
            }();
5218
288
            assuming(ID != LLVM::Core::NotIntrinsic);
5219
288
            return Builder.createIntrinsic(ID, {}, {LHS, RHS});
5220
288
          }
5221
0
#endif
5222
5223
#if defined(__aarch64__)
5224
          if (Context.SupportNEON) {
5225
            assuming(LLVM::Core::AArch64NeonURHAdd != LLVM::Core::NotIntrinsic);
5226
            return Builder.createBinaryIntrinsic(LLVM::Core::AArch64NeonURHAdd,
5227
                                                 LHS, RHS);
5228
          }
5229
#endif
5230
5231
          // Fallback case.
5232
          // If the SSE2 is not supported on the x86_64 platform or
5233
          // the NEON is not supported on the aarch64 platform,
5234
          // then fallback to this.
5235
0
          auto EL = Builder.createZExt(LHS, ExtendTy);
5236
0
          auto ER = Builder.createZExt(RHS, ExtendTy);
5237
0
          auto One = Builder.createZExt(
5238
0
              Builder.createVectorSplat(ExtendTy.getVectorSize(),
5239
0
                                        LLContext.getTrue()),
5240
0
              ExtendTy);
5241
0
          return Builder.createTrunc(
5242
0
              Builder.createLShr(
5243
0
                  Builder.createAdd(Builder.createAdd(EL, ER), One), One),
5244
0
              VectorTy);
5245
288
        });
5246
288
  }
5247
738
  void compileVectorNarrow(LLVM::Type FromTy, bool Signed) noexcept {
5248
738
    auto [MinInt,
5249
738
          MaxInt] = [&]() noexcept -> std::tuple<LLVM::Value, LLVM::Value> {
5250
738
      switch (FromTy.getElementType().getIntegerBitWidth()) {
5251
277
      case 16: {
5252
277
        const auto Min =
5253
277
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::min()
5254
277
                                        : std::numeric_limits<uint8_t>::min());
5255
277
        const auto Max =
5256
277
            static_cast<int16_t>(Signed ? std::numeric_limits<int8_t>::max()
5257
277
                                        : std::numeric_limits<uint8_t>::max());
5258
277
        return {LLContext.getInt16(static_cast<uint16_t>(Min)),
5259
277
                LLContext.getInt16(static_cast<uint16_t>(Max))};
5260
0
      }
5261
461
      case 32: {
5262
461
        const auto Min =
5263
461
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::min()
5264
461
                                        : std::numeric_limits<uint16_t>::min());
5265
461
        const auto Max =
5266
461
            static_cast<int32_t>(Signed ? std::numeric_limits<int16_t>::max()
5267
461
                                        : std::numeric_limits<uint16_t>::max());
5268
461
        return {LLContext.getInt32(static_cast<uint32_t>(Min)),
5269
461
                LLContext.getInt32(static_cast<uint32_t>(Max))};
5270
0
      }
5271
0
      default:
5272
0
        assumingUnreachable();
5273
738
      }
5274
738
    }();
5275
738
    const auto Count = FromTy.getVectorSize();
5276
738
    auto VMin = Builder.createVectorSplat(Count, MinInt);
5277
738
    auto VMax = Builder.createVectorSplat(Count, MaxInt);
5278
5279
738
    auto TruncTy = FromTy.getTruncatedElementVectorType();
5280
5281
738
    auto F2 = Builder.createBitCast(stackPop(), FromTy);
5282
738
    F2 = Builder.createSelect(Builder.createICmpSLT(F2, VMin), VMin, F2);
5283
738
    F2 = Builder.createSelect(Builder.createICmpSGT(F2, VMax), VMax, F2);
5284
738
    F2 = Builder.createTrunc(F2, TruncTy);
5285
5286
738
    auto F1 = Builder.createBitCast(stackPop(), FromTy);
5287
738
    F1 = Builder.createSelect(Builder.createICmpSLT(F1, VMin), VMin, F1);
5288
738
    F1 = Builder.createSelect(Builder.createICmpSGT(F1, VMax), VMax, F1);
5289
738
    F1 = Builder.createTrunc(F1, TruncTy);
5290
5291
738
    std::vector<uint32_t> Mask(Count * 2);
5292
738
    std::iota(Mask.begin(), Mask.end(), 0);
5293
738
    auto V = Endian::native == Endian::little
5294
738
                 ? Builder.createShuffleVector(
5295
738
                       F1, F2, LLVM::Value::getConstVector32(LLContext, Mask))
5296
738
                 : Builder.createShuffleVector(
5297
0
                       F2, F1, LLVM::Value::getConstVector32(LLContext, Mask));
5298
738
    stackPush(Builder.createBitCast(V, Context.Int64x2Ty));
5299
738
  }
5300
6.74k
  void compileVectorExtend(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5301
6.74k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5302
6.74k
    const auto Count = FromTy.getVectorSize();
5303
6.74k
    std::vector<uint32_t> Mask(Count / 2);
5304
    if constexpr (Endian::native == Endian::big) {
5305
      Low = !Low;
5306
    }
5307
6.74k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5308
6.74k
    auto R = Builder.createBitCast(Stack.back(), FromTy);
5309
6.74k
    if (Signed) {
5310
3.00k
      R = Builder.createSExt(R, ExtTy);
5311
3.74k
    } else {
5312
3.74k
      R = Builder.createZExt(R, ExtTy);
5313
3.74k
    }
5314
6.74k
    R = Builder.createShuffleVector(
5315
6.74k
        R, LLVM::Value::getUndef(ExtTy),
5316
6.74k
        LLVM::Value::getConstVector32(LLContext, Mask));
5317
6.74k
    Stack.back() = Builder.createBitCast(R, Context.Int64x2Ty);
5318
6.74k
  }
5319
2.37k
  void compileVectorExtMul(LLVM::Type FromTy, bool Signed, bool Low) noexcept {
5320
2.37k
    auto ExtTy = FromTy.getExtendedElementVectorType();
5321
2.37k
    const auto Count = FromTy.getVectorSize();
5322
2.37k
    std::vector<uint32_t> Mask(Count / 2);
5323
2.37k
    std::iota(Mask.begin(), Mask.end(), Low ? 0 : Count / 2);
5324
4.74k
    auto Extend = [this, FromTy, Signed, ExtTy, &Mask](LLVM::Value R) noexcept {
5325
4.74k
      R = Builder.createBitCast(R, FromTy);
5326
4.74k
      if (Signed) {
5327
2.20k
        R = Builder.createSExt(R, ExtTy);
5328
2.54k
      } else {
5329
2.54k
        R = Builder.createZExt(R, ExtTy);
5330
2.54k
      }
5331
4.74k
      return Builder.createShuffleVector(
5332
4.74k
          R, LLVM::Value::getUndef(ExtTy),
5333
4.74k
          LLVM::Value::getConstVector32(LLContext, Mask));
5334
4.74k
    };
5335
2.37k
    auto RHS = Extend(stackPop());
5336
2.37k
    auto LHS = Extend(stackPop());
5337
2.37k
    stackPush(
5338
2.37k
        Builder.createBitCast(Builder.createMul(RHS, LHS), Context.Int64x2Ty));
5339
2.37k
  }
5340
3.08k
  void compileVectorExtAddPairwise(LLVM::Type VectorTy, bool Signed) noexcept {
5341
3.08k
    compileVectorOp(
5342
3.08k
        VectorTy, [this, VectorTy, Signed](auto V) noexcept -> LLVM::Value {
5343
3.08k
          auto ExtTy = VectorTy.getExtendedElementVectorType()
5344
3.08k
                           .getHalfElementsVectorType();
5345
3.08k
#if defined(__x86_64__)
5346
3.08k
          const auto Count = VectorTy.getVectorSize();
5347
3.08k
          if (Context.SupportXOP) {
5348
0
            const auto ID = [Count, Signed]() noexcept {
5349
0
              switch (Count) {
5350
0
              case 8:
5351
0
                return Signed ? LLVM::Core::X86XOpVPHAddWD
5352
0
                              : LLVM::Core::X86XOpVPHAddUWD;
5353
0
              case 16:
5354
0
                return Signed ? LLVM::Core::X86XOpVPHAddBW
5355
0
                              : LLVM::Core::X86XOpVPHAddUBW;
5356
0
              default:
5357
0
                assumingUnreachable();
5358
0
              }
5359
0
            }();
5360
0
            assuming(ID != LLVM::Core::NotIntrinsic);
5361
0
            return Builder.createUnaryIntrinsic(ID, V);
5362
0
          }
5363
3.08k
          if (Context.SupportSSSE3 && Count == 16) {
5364
747
            assuming(LLVM::Core::X86SSSE3PMAddUbSw128 !=
5365
747
                     LLVM::Core::NotIntrinsic);
5366
747
            if (Signed) {
5367
386
              return Builder.createIntrinsic(
5368
386
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5369
386
                  {Builder.createVectorSplat(16, LLContext.getInt8(1)), V});
5370
386
            } else {
5371
361
              return Builder.createIntrinsic(
5372
361
                  LLVM::Core::X86SSSE3PMAddUbSw128, {},
5373
361
                  {V, Builder.createVectorSplat(16, LLContext.getInt8(1))});
5374
361
            }
5375
747
          }
5376
2.33k
          if (Context.SupportSSE2 && Count == 8) {
5377
2.33k
            assuming(LLVM::Core::X86SSE2PMAddWd != LLVM::Core::NotIntrinsic);
5378
2.33k
            if (Signed) {
5379
1.32k
              return Builder.createIntrinsic(
5380
1.32k
                  LLVM::Core::X86SSE2PMAddWd, {},
5381
1.32k
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5382
1.32k
            } else {
5383
1.01k
              V = Builder.createXor(
5384
1.01k
                  V, Builder.createVectorSplat(8, LLContext.getInt16(0x8000)));
5385
1.01k
              V = Builder.createIntrinsic(
5386
1.01k
                  LLVM::Core::X86SSE2PMAddWd, {},
5387
1.01k
                  {V, Builder.createVectorSplat(8, LLContext.getInt16(1))});
5388
1.01k
              return Builder.createAdd(
5389
1.01k
                  V, Builder.createVectorSplat(4, LLContext.getInt32(0x10000)));
5390
1.01k
            }
5391
2.33k
          }
5392
0
#endif
5393
5394
#if defined(__aarch64__)
5395
          if (Context.SupportNEON) {
5396
            const auto ID = Signed ? LLVM::Core::AArch64NeonSAddLP
5397
                                   : LLVM::Core::AArch64NeonUAddLP;
5398
            assuming(ID != LLVM::Core::NotIntrinsic);
5399
            return Builder.createIntrinsic(ID, {ExtTy, VectorTy}, {V});
5400
          }
5401
#endif
5402
5403
          // Fallback case.
5404
          // If the XOP, SSSE3, or SSE2 is not supported on the x86_64 platform
5405
          // or the NEON is not supported on the aarch64 platform,
5406
          // then fallback to this.
5407
0
          auto Width = LLVM::Value::getConstInt(
5408
0
              ExtTy.getElementType(),
5409
0
              VectorTy.getElementType().getIntegerBitWidth());
5410
0
          Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5411
0
          auto EV = Builder.createBitCast(V, ExtTy);
5412
0
          LLVM::Value L, R;
5413
0
          if (Signed) {
5414
0
            L = Builder.createAShr(EV, Width);
5415
0
            R = Builder.createAShr(Builder.createShl(EV, Width), Width);
5416
0
          } else {
5417
0
            L = Builder.createLShr(EV, Width);
5418
0
            R = Builder.createLShr(Builder.createShl(EV, Width), Width);
5419
0
          }
5420
0
          return Builder.createAdd(L, R);
5421
2.33k
        });
5422
3.08k
  }
5423
661
  void compileVectorFAbs(LLVM::Type VectorTy) noexcept {
5424
661
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5425
661
      assuming(LLVM::Core::Fabs != LLVM::Core::NotIntrinsic);
5426
661
      return Builder.createUnaryIntrinsic(LLVM::Core::Fabs, V);
5427
661
    });
5428
661
  }
5429
794
  void compileVectorFNeg(LLVM::Type VectorTy) noexcept {
5430
794
    compileVectorOp(VectorTy,
5431
794
                    [this](auto V) noexcept { return Builder.createFNeg(V); });
5432
794
  }
5433
274
  void compileVectorFSqrt(LLVM::Type VectorTy) noexcept {
5434
274
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5435
274
      assuming(LLVM::Core::Sqrt != LLVM::Core::NotIntrinsic);
5436
274
      return Builder.createUnaryIntrinsic(LLVM::Core::Sqrt, V);
5437
274
    });
5438
274
  }
5439
1.74k
  void compileVectorFCeil(LLVM::Type VectorTy) noexcept {
5440
1.74k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5441
1.74k
      assuming(LLVM::Core::Ceil != LLVM::Core::NotIntrinsic);
5442
1.74k
      return Builder.createUnaryIntrinsic(LLVM::Core::Ceil, V);
5443
1.74k
    });
5444
1.74k
  }
5445
2.90k
  void compileVectorFFloor(LLVM::Type VectorTy) noexcept {
5446
2.90k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5447
2.90k
      assuming(LLVM::Core::Floor != LLVM::Core::NotIntrinsic);
5448
2.90k
      return Builder.createUnaryIntrinsic(LLVM::Core::Floor, V);
5449
2.90k
    });
5450
2.90k
  }
5451
2.19k
  void compileVectorFTrunc(LLVM::Type VectorTy) noexcept {
5452
2.19k
    compileVectorOp(VectorTy, [this](auto V) noexcept {
5453
2.19k
      assuming(LLVM::Core::Trunc != LLVM::Core::NotIntrinsic);
5454
2.19k
      return Builder.createUnaryIntrinsic(LLVM::Core::Trunc, V);
5455
2.19k
    });
5456
2.19k
  }
5457
439
  void compileVectorFNearest(LLVM::Type VectorTy) noexcept {
5458
439
    compileVectorOp(VectorTy, [&](auto V) noexcept {
5459
439
#if LLVM_VERSION_MAJOR >= 12 && !defined(__s390x__)
5460
439
      assuming(LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic);
5461
439
      if (LLVM::Core::Roundeven != LLVM::Core::NotIntrinsic) {
5462
439
        return Builder.createUnaryIntrinsic(LLVM::Core::Roundeven, V);
5463
439
      }
5464
0
#endif
5465
5466
0
#if defined(__x86_64__)
5467
0
      if (Context.SupportSSE4_1) {
5468
0
        const bool IsFloat = VectorTy.getElementType().isFloatTy();
5469
0
        auto ID =
5470
0
            IsFloat ? LLVM::Core::X86SSE41RoundPs : LLVM::Core::X86SSE41RoundPd;
5471
0
        assuming(ID != LLVM::Core::NotIntrinsic);
5472
0
        return Builder.createIntrinsic(ID, {}, {V, LLContext.getInt32(8)});
5473
0
      }
5474
0
#endif
5475
5476
#if defined(__aarch64__)
5477
      if (Context.SupportNEON &&
5478
          LLVM::Core::AArch64NeonFRIntN != LLVM::Core::NotIntrinsic) {
5479
        return Builder.createUnaryIntrinsic(LLVM::Core::AArch64NeonFRIntN, V);
5480
      }
5481
#endif
5482
5483
      // Fallback case.
5484
      // If the SSE4.1 is not supported on the x86_64 platform or
5485
      // the NEON is not supported on the aarch64 platform,
5486
      // then fallback to this.
5487
0
      assuming(LLVM::Core::Nearbyint != LLVM::Core::NotIntrinsic);
5488
0
      return Builder.createUnaryIntrinsic(LLVM::Core::Nearbyint, V);
5489
0
    });
5490
439
  }
5491
176
  void compileVectorVectorFAdd(LLVM::Type VectorTy) noexcept {
5492
176
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5493
176
      return Builder.createFAdd(LHS, RHS);
5494
176
    });
5495
176
  }
5496
477
  void compileVectorVectorFSub(LLVM::Type VectorTy) noexcept {
5497
477
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5498
477
      return Builder.createFSub(LHS, RHS);
5499
477
    });
5500
477
  }
5501
269
  void compileVectorVectorFMul(LLVM::Type VectorTy) noexcept {
5502
269
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5503
269
      return Builder.createFMul(LHS, RHS);
5504
269
    });
5505
269
  }
5506
199
  void compileVectorVectorFDiv(LLVM::Type VectorTy) noexcept {
5507
199
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5508
199
      return Builder.createFDiv(LHS, RHS);
5509
199
    });
5510
199
  }
5511
360
  void compileVectorVectorFMin(LLVM::Type VectorTy) noexcept {
5512
360
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5513
360
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5514
360
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5515
360
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5516
360
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5517
360
      auto Ret = Builder.createBitCast(
5518
360
          Builder.createOr(Builder.createBitCast(LHS, Context.Int64x2Ty),
5519
360
                           Builder.createBitCast(RHS, Context.Int64x2Ty)),
5520
360
          LHS.getType());
5521
360
      Ret = Builder.createSelect(OGT, RHS, Ret);
5522
360
      Ret = Builder.createSelect(OLT, LHS, Ret);
5523
360
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5524
360
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5525
360
      return Ret;
5526
360
    });
5527
360
  }
5528
221
  void compileVectorVectorFMax(LLVM::Type VectorTy) noexcept {
5529
221
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5530
221
      auto LNaN = Builder.createFCmpUNO(LHS, LHS);
5531
221
      auto RNaN = Builder.createFCmpUNO(RHS, RHS);
5532
221
      auto OLT = Builder.createFCmpOLT(LHS, RHS);
5533
221
      auto OGT = Builder.createFCmpOGT(LHS, RHS);
5534
221
      auto Ret = Builder.createBitCast(
5535
221
          Builder.createAnd(Builder.createBitCast(LHS, Context.Int64x2Ty),
5536
221
                            Builder.createBitCast(RHS, Context.Int64x2Ty)),
5537
221
          LHS.getType());
5538
221
      Ret = Builder.createSelect(OLT, RHS, Ret);
5539
221
      Ret = Builder.createSelect(OGT, LHS, Ret);
5540
221
      Ret = Builder.createSelect(LNaN, LHS, Ret);
5541
221
      Ret = Builder.createSelect(RNaN, RHS, Ret);
5542
221
      return Ret;
5543
221
    });
5544
221
  }
5545
408
  void compileVectorVectorFPMin(LLVM::Type VectorTy) noexcept {
5546
408
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5547
408
      auto Cmp = Builder.createFCmpOLT(RHS, LHS);
5548
408
      return Builder.createSelect(Cmp, RHS, LHS);
5549
408
    });
5550
408
  }
5551
323
  void compileVectorVectorFPMax(LLVM::Type VectorTy) noexcept {
5552
323
    compileVectorVectorOp(VectorTy, [this](auto LHS, auto RHS) noexcept {
5553
323
      auto Cmp = Builder.createFCmpOGT(RHS, LHS);
5554
323
      return Builder.createSelect(Cmp, RHS, LHS);
5555
323
    });
5556
323
  }
5557
1.06k
  void compileVectorTruncSatS32(LLVM::Type VectorTy, bool PadZero) noexcept {
5558
1.06k
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5559
1.06k
      const auto Size = VectorTy.getVectorSize();
5560
1.06k
      auto FPTy = VectorTy.getElementType();
5561
1.06k
      auto IntMin = LLContext.getInt32(
5562
1.06k
          static_cast<uint32_t>(std::numeric_limits<int32_t>::min()));
5563
1.06k
      auto IntMax = LLContext.getInt32(
5564
1.06k
          static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
5565
1.06k
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5566
1.06k
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5567
1.06k
      auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5568
1.06k
      auto FPMin = Builder.createSIToFP(IntMin, FPTy);
5569
1.06k
      auto FPMax = Builder.createSIToFP(IntMax, FPTy);
5570
1.06k
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5571
1.06k
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5572
5573
1.06k
      auto Normal = Builder.createFCmpORD(V, V);
5574
1.06k
      auto NotUnder = Builder.createFCmpUGE(V, FPMinV);
5575
1.06k
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5576
1.06k
      V = Builder.createFPToSI(
5577
1.06k
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5578
1.06k
      V = Builder.createSelect(Normal, V, IntZeroV);
5579
1.06k
      V = Builder.createSelect(NotUnder, V, IntMinV);
5580
1.06k
      V = Builder.createSelect(NotOver, V, IntMaxV);
5581
1.06k
      if (PadZero) {
5582
895
        std::vector<uint32_t> Mask(Size * 2);
5583
895
        std::iota(Mask.begin(), Mask.end(), 0);
5584
895
        if constexpr (Endian::native == Endian::little) {
5585
895
          V = Builder.createShuffleVector(
5586
895
              V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5587
        } else {
5588
          V = Builder.createShuffleVector(
5589
              IntZeroV, V, LLVM::Value::getConstVector32(LLContext, Mask));
5590
        }
5591
895
      }
5592
1.06k
      return V;
5593
1.06k
    });
5594
1.06k
  }
5595
7.05k
  void compileVectorTruncSatU32(LLVM::Type VectorTy, bool PadZero) noexcept {
5596
7.05k
    compileVectorOp(VectorTy, [this, VectorTy, PadZero](auto V) noexcept {
5597
7.05k
      const auto Size = VectorTy.getVectorSize();
5598
7.05k
      auto FPTy = VectorTy.getElementType();
5599
7.05k
      auto IntMin = LLContext.getInt32(std::numeric_limits<uint32_t>::min());
5600
7.05k
      auto IntMax = LLContext.getInt32(std::numeric_limits<uint32_t>::max());
5601
7.05k
      auto IntMinV = Builder.createVectorSplat(Size, IntMin);
5602
7.05k
      auto IntMaxV = Builder.createVectorSplat(Size, IntMax);
5603
7.05k
      auto FPMin = Builder.createUIToFP(IntMin, FPTy);
5604
7.05k
      auto FPMax = Builder.createUIToFP(IntMax, FPTy);
5605
7.05k
      auto FPMinV = Builder.createVectorSplat(Size, FPMin);
5606
7.05k
      auto FPMaxV = Builder.createVectorSplat(Size, FPMax);
5607
5608
7.05k
      auto NotUnder = Builder.createFCmpOGE(V, FPMinV);
5609
7.05k
      auto NotOver = Builder.createFCmpULT(V, FPMaxV);
5610
7.05k
      V = Builder.createFPToUI(
5611
7.05k
          V, LLVM::Type::getVectorType(LLContext.getInt32Ty(), Size));
5612
7.05k
      V = Builder.createSelect(NotUnder, V, IntMinV);
5613
7.05k
      V = Builder.createSelect(NotOver, V, IntMaxV);
5614
7.05k
      if (PadZero) {
5615
2.54k
        auto IntZeroV = LLVM::Value::getConstNull(IntMinV.getType());
5616
2.54k
        std::vector<uint32_t> Mask(Size * 2);
5617
2.54k
        std::iota(Mask.begin(), Mask.end(), 0);
5618
2.54k
        if constexpr (Endian::native == Endian::little) {
5619
2.54k
          V = Builder.createShuffleVector(
5620
2.54k
              V, IntZeroV, LLVM::Value::getConstVector32(LLContext, Mask));
5621
        } else {
5622
          V = Builder.createShuffleVector(
5623
              IntZeroV, V, LLVM::Value::getConstVector32(LLContext, Mask));
5624
        }
5625
2.54k
      }
5626
7.05k
      return V;
5627
7.05k
    });
5628
7.05k
  }
5629
  void compileVectorConvertS(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5630
742
                             bool Low) noexcept {
5631
742
    compileVectorOp(VectorTy,
5632
742
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5633
742
                      if (Low) {
5634
388
                        const auto Size = VectorTy.getVectorSize() / 2;
5635
388
                        std::vector<uint32_t> Mask(Size);
5636
388
                        if constexpr (Endian::native == Endian::little) {
5637
388
                          std::iota(Mask.begin(), Mask.end(), 0);
5638
                        } else {
5639
                          std::iota(Mask.begin(), Mask.end(), Size);
5640
                        }
5641
388
                        V = Builder.createShuffleVector(
5642
388
                            V, LLVM::Value::getUndef(VectorTy),
5643
388
                            LLVM::Value::getConstVector32(LLContext, Mask));
5644
388
                      }
5645
742
                      return Builder.createSIToFP(V, FPVectorTy);
5646
742
                    });
5647
742
  }
5648
  void compileVectorConvertU(LLVM::Type VectorTy, LLVM::Type FPVectorTy,
5649
2.18k
                             bool Low) noexcept {
5650
2.18k
    compileVectorOp(VectorTy,
5651
2.18k
                    [this, VectorTy, FPVectorTy, Low](auto V) noexcept {
5652
2.18k
                      if (Low) {
5653
1.35k
                        const auto Size = VectorTy.getVectorSize() / 2;
5654
1.35k
                        std::vector<uint32_t> Mask(Size);
5655
1.35k
                        if constexpr (Endian::native == Endian::little) {
5656
1.35k
                          std::iota(Mask.begin(), Mask.end(), 0);
5657
                        } else {
5658
                          std::iota(Mask.begin(), Mask.end(), Size);
5659
                        }
5660
1.35k
                        V = Builder.createShuffleVector(
5661
1.35k
                            V, LLVM::Value::getUndef(VectorTy),
5662
1.35k
                            LLVM::Value::getConstVector32(LLContext, Mask));
5663
1.35k
                      }
5664
2.18k
                      return Builder.createUIToFP(V, FPVectorTy);
5665
2.18k
                    });
5666
2.18k
  }
5667
746
  void compileVectorDemote() noexcept {
5668
746
    compileVectorOp(Context.Doublex2Ty, [this](auto V) noexcept {
5669
746
      auto Demoted = Builder.createFPTrunc(
5670
746
          V, LLVM::Type::getVectorType(Context.FloatTy, 2));
5671
746
      auto ZeroV = LLVM::Value::getConstNull(Demoted.getType());
5672
746
      if constexpr (Endian::native == Endian::little) {
5673
746
        return Builder.createShuffleVector(
5674
746
            Demoted, ZeroV,
5675
746
            LLVM::Value::getConstVector32(LLContext, {0u, 1u, 2u, 3u}));
5676
      } else {
5677
        return Builder.createShuffleVector(
5678
            Demoted, ZeroV,
5679
            LLVM::Value::getConstVector32(LLContext, {3u, 2u, 1u, 0u}));
5680
      }
5681
746
    });
5682
746
  }
5683
802
  void compileVectorPromote() noexcept {
5684
802
    compileVectorOp(Context.Floatx4Ty, [this](auto V) noexcept {
5685
802
      auto UndefV = LLVM::Value::getUndef(V.getType());
5686
802
      auto Low = Builder.createShuffleVector(
5687
802
          V, UndefV, LLVM::Value::getConstVector32(LLContext, {0u, 1u}));
5688
802
      return Builder.createFPExt(
5689
802
          Low, LLVM::Type::getVectorType(Context.DoubleTy, 2));
5690
802
    });
5691
802
  }
5692
5693
27
  void compileVectorVectorMAdd(LLVM::Type VectorTy) noexcept {
5694
27
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5695
27
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5696
27
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5697
27
    stackPush(Builder.createBitCast(
5698
27
        Builder.createFAdd(Builder.createFMul(LHS, RHS), C),
5699
27
        Context.Int64x2Ty));
5700
27
  }
5701
5702
90
  void compileVectorVectorNMAdd(LLVM::Type VectorTy) noexcept {
5703
90
    auto C = Builder.createBitCast(stackPop(), VectorTy);
5704
90
    auto RHS = Builder.createBitCast(stackPop(), VectorTy);
5705
90
    auto LHS = Builder.createBitCast(stackPop(), VectorTy);
5706
90
    stackPush(Builder.createBitCast(
5707
90
        Builder.createFAdd(Builder.createFMul(Builder.createFNeg(LHS), RHS), C),
5708
90
        Context.Int64x2Ty));
5709
90
  }
5710
5711
14
  void compileVectorRelaxedIntegerDotProduct() noexcept {
5712
14
    auto OriTy = Context.Int8x16Ty;
5713
14
    auto ExtTy = Context.Int16x8Ty;
5714
14
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5715
14
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5716
14
#if defined(__x86_64__)
5717
14
    if (Context.SupportSSSE3) {
5718
14
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5719
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5720
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5721
      // side to match the WebAssembly spec
5722
14
      return stackPush(Builder.createBitCast(
5723
14
          Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5724
14
                                  {RHS, LHS}),
5725
14
          Context.Int64x2Ty));
5726
14
    }
5727
0
#endif
5728
0
    auto Width = LLVM::Value::getConstInt(
5729
0
        ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5730
0
    Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5731
0
    auto EA = Builder.createBitCast(LHS, ExtTy);
5732
0
    auto EB = Builder.createBitCast(RHS, ExtTy);
5733
5734
0
    LLVM::Value AL, AR, BL, BR;
5735
0
    AL = Builder.createAShr(EA, Width);
5736
0
    AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5737
0
    BL = Builder.createAShr(EB, Width);
5738
0
    BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5739
5740
0
    return stackPush(Builder.createBitCast(
5741
0
        Builder.createAdd(Builder.createMul(AL, BL), Builder.createMul(AR, BR)),
5742
0
        Context.Int64x2Ty));
5743
14
  }
5744
5745
12
  void compileVectorRelaxedIntegerDotProductAdd() noexcept {
5746
12
    auto OriTy = Context.Int8x16Ty;
5747
12
    auto ExtTy = Context.Int16x8Ty;
5748
12
    auto FinTy = Context.Int32x4Ty;
5749
12
    auto VC = Builder.createBitCast(stackPop(), FinTy);
5750
12
    auto RHS = Builder.createBitCast(stackPop(), OriTy);
5751
12
    auto LHS = Builder.createBitCast(stackPop(), OriTy);
5752
12
    LLVM::Value IM;
5753
12
#if defined(__x86_64__)
5754
12
    if (Context.SupportSSSE3) {
5755
12
      assuming(LLVM::Core::X86SSSE3PMAddUbSw128 != LLVM::Core::NotIntrinsic);
5756
      // WebAssembly Relaxed SIMD spec: signed(LHS) * unsigned/signed(RHS)
5757
      // But PMAddUbSw128 is unsigned(LHS) * signed(RHS). Therefore swap both
5758
      // side to match the WebAssembly spec
5759
12
      IM = Builder.createIntrinsic(LLVM::Core::X86SSSE3PMAddUbSw128, {},
5760
12
                                   {RHS, LHS});
5761
12
    } else
5762
0
#endif
5763
0
    {
5764
0
      auto Width = LLVM::Value::getConstInt(
5765
0
          ExtTy.getElementType(), OriTy.getElementType().getIntegerBitWidth());
5766
0
      Width = Builder.createVectorSplat(ExtTy.getVectorSize(), Width);
5767
0
      auto EA = Builder.createBitCast(LHS, ExtTy);
5768
0
      auto EB = Builder.createBitCast(RHS, ExtTy);
5769
5770
0
      LLVM::Value AL, AR, BL, BR;
5771
0
      AL = Builder.createAShr(EA, Width);
5772
0
      AR = Builder.createAShr(Builder.createShl(EA, Width), Width);
5773
0
      BL = Builder.createAShr(EB, Width);
5774
0
      BR = Builder.createAShr(Builder.createShl(EB, Width), Width);
5775
0
      IM = Builder.createAdd(Builder.createMul(AL, BL),
5776
0
                             Builder.createMul(AR, BR));
5777
0
    }
5778
5779
12
    auto Width = LLVM::Value::getConstInt(
5780
12
        FinTy.getElementType(), ExtTy.getElementType().getIntegerBitWidth());
5781
12
    Width = Builder.createVectorSplat(FinTy.getVectorSize(), Width);
5782
12
    auto IME = Builder.createBitCast(IM, FinTy);
5783
12
    auto L = Builder.createAShr(IME, Width);
5784
12
    auto R = Builder.createAShr(Builder.createShl(IME, Width), Width);
5785
5786
12
    return stackPush(Builder.createBitCast(
5787
12
        Builder.createAdd(Builder.createAdd(L, R), VC), Context.Int64x2Ty));
5788
12
  }
5789
5790
  void
5791
  enterBlock(LLVM::BasicBlock JumpBlock, LLVM::BasicBlock NextBlock,
5792
             LLVM::BasicBlock ElseBlock, std::vector<LLVM::Value> Args,
5793
             std::pair<std::vector<ValType>, std::vector<ValType>> Type,
5794
             std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5795
22.5k
                 ReturnPHI = {}) noexcept {
5796
22.5k
    assuming(Type.first.size() == Args.size());
5797
22.5k
    for (auto &Value : Args) {
5798
4.25k
      stackPush(Value);
5799
4.25k
    }
5800
22.5k
    const auto Unreachable = isUnreachable();
5801
22.5k
    ControlStack.emplace_back(Stack.size() - Args.size(), Unreachable,
5802
22.5k
                              JumpBlock, NextBlock, ElseBlock, std::move(Args),
5803
22.5k
                              std::move(Type), std::move(ReturnPHI));
5804
22.5k
  }
5805
5806
22.5k
  Control leaveBlock() noexcept {
5807
22.5k
    Control Entry = std::move(ControlStack.back());
5808
22.5k
    ControlStack.pop_back();
5809
5810
22.5k
    auto NextBlock = Entry.NextBlock ? Entry.NextBlock : Entry.JumpBlock;
5811
22.5k
    if (!Entry.Unreachable) {
5812
13.7k
      const auto &ReturnType = Entry.Type.second;
5813
13.7k
      if (!ReturnType.empty()) {
5814
10.3k
        std::vector<LLVM::Value> Rets(ReturnType.size());
5815
21.2k
        for (size_t I = 0; I < Rets.size(); ++I) {
5816
10.8k
          const size_t J = Rets.size() - 1 - I;
5817
10.8k
          Rets[J] = stackPop();
5818
10.8k
        }
5819
10.3k
        Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5820
10.3k
      }
5821
13.7k
      Builder.createBr(NextBlock);
5822
13.7k
    } else {
5823
8.78k
      Builder.createUnreachable();
5824
8.78k
    }
5825
22.5k
    Builder.positionAtEnd(NextBlock);
5826
22.5k
    Stack.erase(Stack.begin() + static_cast<int64_t>(Entry.StackSize),
5827
22.5k
                Stack.end());
5828
22.5k
    return Entry;
5829
22.5k
  }
5830
5831
5.42k
  void checkStop() noexcept {
5832
5.42k
    if (!Interruptible) {
5833
5.42k
      return;
5834
5.42k
    }
5835
0
    auto NotStopBB = LLVM::BasicBlock::create(LLContext, F.Fn, "NotStop");
5836
0
    auto StopToken = Builder.createAtomicRMW(
5837
0
        LLVMAtomicRMWBinOpXchg, Context.getStopToken(Builder, ExecCtx),
5838
0
        LLContext.getInt32(0), LLVMAtomicOrderingMonotonic);
5839
#if LLVM_VERSION_MAJOR >= 13
5840
    StopToken.setAlignment(32);
5841
#endif
5842
0
    auto NotStop = Builder.createLikely(
5843
0
        Builder.createICmpEQ(StopToken, LLContext.getInt32(0)));
5844
0
    Builder.createCondBr(NotStop, NotStopBB,
5845
0
                         getTrapBB(ErrCode::Value::Interrupted));
5846
5847
0
    Builder.positionAtEnd(NotStopBB);
5848
0
  }
5849
5850
6.15k
  void setUnreachable() noexcept {
5851
6.15k
    if (ControlStack.empty()) {
5852
0
      IsUnreachable = true;
5853
6.15k
    } else {
5854
6.15k
      ControlStack.back().Unreachable = true;
5855
6.15k
    }
5856
6.15k
  }
5857
5858
1.66M
  bool isUnreachable() const noexcept {
5859
1.66M
    if (ControlStack.empty()) {
5860
11.2k
      return IsUnreachable;
5861
1.64M
    } else {
5862
1.64M
      return ControlStack.back().Unreachable;
5863
1.64M
    }
5864
1.66M
  }
5865
5866
  void
5867
  buildPHI(Span<const ValType> RetType,
5868
           Span<const std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5869
19.5k
               Incomings) noexcept {
5870
19.5k
    if (isVoidReturn(RetType)) {
5871
6.30k
      return;
5872
6.30k
    }
5873
13.2k
    std::vector<LLVM::Value> Nodes;
5874
13.2k
    if (Incomings.size() == 0) {
5875
2.89k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5876
2.89k
      Nodes.reserve(Types.size());
5877
3.40k
      for (LLVM::Type Type : Types) {
5878
3.40k
        Nodes.push_back(LLVM::Value::getUndef(Type));
5879
3.40k
      }
5880
10.3k
    } else if (Incomings.size() == 1) {
5881
9.18k
      Nodes = std::move(std::get<0>(Incomings.front()));
5882
9.18k
    } else {
5883
1.18k
      const auto &Types = toLLVMTypeVector(LLContext, RetType);
5884
1.18k
      Nodes.reserve(Types.size());
5885
2.47k
      for (size_t I = 0; I < Types.size(); ++I) {
5886
1.29k
        auto PHIRet = Builder.createPHI(Types[I]);
5887
3.30k
        for (auto &[Value, BB] : Incomings) {
5888
3.30k
          assuming(Value.size() == Types.size());
5889
3.30k
          PHIRet.addIncoming(Value[I], BB);
5890
3.30k
        }
5891
1.29k
        Nodes.push_back(PHIRet);
5892
1.29k
      }
5893
1.18k
    }
5894
14.2k
    for (auto &Val : Nodes) {
5895
14.2k
      stackPush(Val);
5896
14.2k
    }
5897
13.2k
  }
5898
5899
21.4k
  void setLableJumpPHI(unsigned int Index) noexcept {
5900
21.4k
    assuming(Index < ControlStack.size());
5901
21.4k
    auto &Entry = *(ControlStack.rbegin() + Index);
5902
21.4k
    if (Entry.NextBlock) { // is loop
5903
2.19k
      std::vector<LLVM::Value> Args(Entry.Type.first.size());
5904
4.67k
      for (size_t I = 0; I < Args.size(); ++I) {
5905
2.47k
        const size_t J = Args.size() - 1 - I;
5906
2.47k
        Args[J] = stackPop();
5907
2.47k
      }
5908
4.67k
      for (size_t I = 0; I < Args.size(); ++I) {
5909
2.47k
        Entry.Args[I].addIncoming(Args[I], Builder.getInsertBlock());
5910
2.47k
        stackPush(Args[I]);
5911
2.47k
      }
5912
19.2k
    } else if (!Entry.Type.second.empty()) { // has return value
5913
1.89k
      std::vector<LLVM::Value> Rets(Entry.Type.second.size());
5914
3.91k
      for (size_t I = 0; I < Rets.size(); ++I) {
5915
2.01k
        const size_t J = Rets.size() - 1 - I;
5916
2.01k
        Rets[J] = stackPop();
5917
2.01k
      }
5918
3.91k
      for (size_t I = 0; I < Rets.size(); ++I) {
5919
2.01k
        stackPush(Rets[I]);
5920
2.01k
      }
5921
1.89k
      Entry.ReturnPHI.emplace_back(std::move(Rets), Builder.getInsertBlock());
5922
1.89k
    }
5923
21.4k
  }
5924
5925
21.4k
  LLVM::BasicBlock getLabel(unsigned int Index) const noexcept {
5926
21.4k
    return (ControlStack.rbegin() + Index)->JumpBlock;
5927
21.4k
  }
5928
5929
972k
  void stackPush(LLVM::Value Value) noexcept { Stack.push_back(Value); }
5930
366k
  LLVM::Value stackPop() noexcept {
5931
366k
    assuming(!ControlStack.empty() || !Stack.empty());
5932
366k
    assuming(ControlStack.empty() ||
5933
366k
             Stack.size() > ControlStack.back().StackSize);
5934
366k
    auto Value = Stack.back();
5935
366k
    Stack.pop_back();
5936
366k
    return Value;
5937
366k
  }
5938
5939
22.7k
  LLVM::Value switchEndian(LLVM::Value Value) {
5940
    if constexpr (Endian::native == Endian::big) {
5941
      auto Type = Value.getType();
5942
      if ((Type.isIntegerTy() && Type.getIntegerBitWidth() > 8) ||
5943
          (Type.isVectorTy() && Type.getVectorSize() == 1)) {
5944
        return Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5945
      }
5946
      if (Type.isVectorTy()) {
5947
        LLVM::Type VecType = Type.getElementType().getIntegerBitWidth() == 128
5948
                                 ? Context.Int128Ty
5949
                                 : Context.Int64Ty;
5950
        Value = Builder.createBitCast(Value, VecType);
5951
        Value = Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5952
        return Builder.createBitCast(Value, Type);
5953
      }
5954
      if (Type.isFloatTy() || Type.isDoubleTy()) {
5955
        LLVM::Type IntType =
5956
            Type.isFloatTy() ? Context.Int32Ty : Context.Int64Ty;
5957
        Value = Builder.createBitCast(Value, IntType);
5958
        Value = Builder.createUnaryIntrinsic(LLVM::Core::Bswap, Value);
5959
        return Builder.createBitCast(Value, Type);
5960
      }
5961
    }
5962
22.7k
    return Value;
5963
22.7k
  }
5964
5965
  LLVM::Compiler::CompileContext &Context;
5966
  LLVM::Context LLContext;
5967
  std::vector<std::pair<LLVM::Type, LLVM::Value>> Local;
5968
  std::vector<LLVM::Value> Stack;
5969
  LLVM::Value LocalInstrCount = nullptr;
5970
  LLVM::Value LocalGas = nullptr;
5971
  std::unordered_map<ErrCode::Value, LLVM::BasicBlock> TrapBB;
5972
  bool IsUnreachable = false;
5973
  bool Interruptible = false;
5974
  struct Control {
5975
    size_t StackSize;
5976
    bool Unreachable;
5977
    LLVM::BasicBlock JumpBlock;
5978
    LLVM::BasicBlock NextBlock;
5979
    LLVM::BasicBlock ElseBlock;
5980
    std::vector<LLVM::Value> Args;
5981
    std::pair<std::vector<ValType>, std::vector<ValType>> Type;
5982
    std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5983
        ReturnPHI;
5984
    Control(size_t S, bool U, LLVM::BasicBlock J, LLVM::BasicBlock N,
5985
            LLVM::BasicBlock E, std::vector<LLVM::Value> A,
5986
            std::pair<std::vector<ValType>, std::vector<ValType>> T,
5987
            std::vector<std::tuple<std::vector<LLVM::Value>, LLVM::BasicBlock>>
5988
                R) noexcept
5989
22.5k
        : StackSize(S), Unreachable(U), JumpBlock(J), NextBlock(N),
5990
22.5k
          ElseBlock(E), Args(std::move(A)), Type(std::move(T)),
5991
22.5k
          ReturnPHI(std::move(R)) {}
5992
    Control(const Control &) = default;
5993
27.8k
    Control(Control &&) = default;
5994
    Control &operator=(const Control &) = default;
5995
1.28k
    Control &operator=(Control &&) = default;
5996
  };
5997
  bool IsLazyJIT;
5998
  std::vector<Control> ControlStack;
5999
  LLVM::FunctionCallee F;
6000
  LLVM::Value ExecCtx;
6001
  LLVM::Builder Builder;
6002
};
6003
6004
std::vector<LLVM::Value> unpackStruct(LLVM::Builder &Builder,
6005
501
                                      LLVM::Value Struct) noexcept {
6006
501
  const auto N = Struct.getType().getStructNumElements();
6007
501
  std::vector<LLVM::Value> Ret;
6008
501
  Ret.reserve(N);
6009
1.82k
  for (unsigned I = 0; I < N; ++I) {
6010
1.32k
    Ret.push_back(Builder.createExtractValue(Struct, I));
6011
1.32k
  }
6012
501
  return Ret;
6013
501
}
6014
6015
} // namespace
6016
6017
namespace WasmEdge {
6018
namespace LLVM {
6019
6020
2.30k
Expect<void> Compiler::checkConfigure() noexcept {
6021
  // Note: Although the Exception Handling and Memory64 proposals are not
6022
  // implemented in AOT yet, we should not trap here because the default
6023
  // configuration has become WASM 3.0, which contains these proposals.
6024
2.30k
  if (Conf.hasProposal(Proposal::ExceptionHandling)) {
6025
2.30k
    spdlog::warn("Proposal Exception Handling is not yet supported in WasmEdge "
6026
2.30k
                 "AOT/JIT. The compilation will be trapped when related data "
6027
2.30k
                 "structure or instructions found in WASM.");
6028
2.30k
  }
6029
2.30k
  if (Conf.hasProposal(Proposal::Annotations)) {
6030
0
    spdlog::error(ErrCode::Value::InvalidAOTConfigure);
6031
0
    spdlog::error("    Proposal Custom Annotation Syntax is not yet supported "
6032
0
                  "in WasmEdge AOT/JIT.");
6033
0
    return Unexpect(ErrCode::Value::InvalidAOTConfigure);
6034
0
  }
6035
2.30k
  return {};
6036
2.30k
}
6037
6038
Expect<void> Compiler::optimize(LLVM::Module &LLModule,
6039
2.29k
                                LLVM::TargetMachine &TM) noexcept {
6040
2.29k
  spdlog::info("optimize start"sv);
6041
2.29k
  auto Triple = LLModule.getTarget();
6042
2.29k
  auto [TheTarget, ErrorMessage] = LLVM::Target::getFromTriple(Triple);
6043
2.29k
  if (ErrorMessage) {
6044
0
    spdlog::error("getFromTriple failed:{}"sv, ErrorMessage.string_view());
6045
0
    return Unexpect(ErrCode::Value::IllegalPath);
6046
0
  }
6047
6048
2.29k
  std::string CPUName;
6049
#if defined(__riscv) && __riscv_xlen == 64
6050
  CPUName = "generic-rv64"s;
6051
#else
6052
2.29k
  if (!Conf.getCompilerConfigure().isGenericBinary()) {
6053
2.29k
    CPUName = LLVM::getHostCPUName().string_view();
6054
2.29k
  } else {
6055
0
    CPUName = "generic"s;
6056
0
  }
6057
2.29k
#endif
6058
6059
  // On RISC-V we use generic-rv64 as the CPU, so also use default
6060
  // features; host features under QEMU can be inconsistent (e.g.
6061
  // zvl*b without v) which LLVM >= 20 rejects.
6062
2.29k
  TM = LLVM::TargetMachine::create(
6063
2.29k
      TheTarget, Triple, CPUName.c_str(),
6064
#if defined(__riscv) && __riscv_xlen == 64
6065
      "",
6066
#else
6067
2.29k
      LLVM::getHostCPUFeatures().unwrap(),
6068
2.29k
#endif
6069
2.29k
      toLLVMCodeGenLevel(Conf.getCompilerConfigure().getOptimizationLevel()),
6070
2.29k
      LLVMRelocPIC, LLVMCodeModelDefault);
6071
6072
#if LLVM_VERSION_MAJOR >= 13
6073
  auto PBO = LLVM::PassBuilderOptions::create();
6074
  if (auto Error = PBO.runPasses(
6075
          LLModule,
6076
          toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel()),
6077
          TM)) {
6078
    spdlog::error("{}"sv, Error.message().string_view());
6079
  }
6080
#else
6081
2.29k
  auto FP = LLVM::PassManager::createForModule(LLModule);
6082
2.29k
  auto MP = LLVM::PassManager::create();
6083
6084
2.29k
  TM.addAnalysisPasses(MP);
6085
2.29k
  TM.addAnalysisPasses(FP);
6086
2.29k
  {
6087
2.29k
    auto PMB = LLVM::PassManagerBuilder::create();
6088
2.29k
    auto [OptLevel, SizeLevel] =
6089
2.29k
        toLLVMLevel(Conf.getCompilerConfigure().getOptimizationLevel());
6090
2.29k
    PMB.setOptLevel(OptLevel);
6091
2.29k
    PMB.setSizeLevel(SizeLevel);
6092
2.29k
    PMB.populateFunctionPassManager(FP);
6093
2.29k
    PMB.populateModulePassManager(MP);
6094
2.29k
  }
6095
2.29k
  switch (Conf.getCompilerConfigure().getOptimizationLevel()) {
6096
0
  case CompilerConfigure::OptimizationLevel::O0:
6097
0
  case CompilerConfigure::OptimizationLevel::O1:
6098
0
    FP.addTailCallEliminationPass();
6099
0
    break;
6100
2.29k
  default:
6101
2.29k
    break;
6102
2.29k
  }
6103
6104
2.29k
  FP.initializeFunctionPassManager();
6105
25.7k
  for (auto Fn = LLModule.getFirstFunction(); Fn; Fn = Fn.getNextFunction()) {
6106
23.4k
    FP.runFunctionPassManager(Fn);
6107
23.4k
  }
6108
2.29k
  FP.finalizeFunctionPassManager();
6109
2.29k
  MP.runPassManager(LLModule);
6110
2.29k
#endif
6111
6112
2.29k
  spdlog::info("optimize done"sv);
6113
2.29k
  return {};
6114
2.29k
}
6115
6116
// Initialize the LLVM module held by the data for compilation: set the
6117
// target triple and the PIC level, and return the LLVM context.
6118
2.30k
static LLVM::Context initLLVMModule(LLVM::Data &D) noexcept {
6119
2.30k
  auto LLContext = D.extract().getLLContext();
6120
2.30k
  LLVM::Core::init(LLContext.unwrap());
6121
2.30k
  auto &LLModule = D.extract().LLModule;
6122
2.30k
  LLModule.setTarget(LLVM::getDefaultTargetTriple().unwrap());
6123
2.30k
  LLModule.addFlag(LLVMModuleFlagBehaviorError, "PIC Level"sv, 2);
6124
2.30k
  return LLContext;
6125
2.30k
}
6126
6127
2.30k
Expect<Data> Compiler::compile(const AST::Module &Module) noexcept {
6128
  // Check that the module is validated.
6129
2.30k
  if (unlikely(!Module.getIsValidated())) {
6130
0
    spdlog::error(ErrCode::Value::NotValidated);
6131
0
    return Unexpect(ErrCode::Value::NotValidated);
6132
0
  }
6133
6134
2.30k
  std::unique_lock Lock(Mutex);
6135
2.30k
  spdlog::info("compile start"sv);
6136
6137
2.30k
  LLVM::Data D;
6138
2.30k
  auto LLContext = initLLVMModule(D);
6139
2.30k
  auto &LLModule = D.extract().LLModule;
6140
6141
2.30k
  CompileContext NewContext(LLContext, LLModule,
6142
2.30k
                            Conf.getCompilerConfigure().isGenericBinary());
6143
2.30k
  RAIICleanup Cleanup(Context, &NewContext);
6144
2.30k
  Context->addVersionGlobal();
6145
6146
  // Compile all sections and the function declarations.
6147
2.30k
  compileSections(Module, false);
6148
  // Compile all function bodies.
6149
2.30k
  const auto DefinedCount = Module.getDefinedFuncCount();
6150
13.4k
  for (uint32_t I = 0; I < DefinedCount; ++I) {
6151
11.2k
    EXPECTED_TRY(compileFunctionBody(I));
6152
11.2k
  }
6153
  // Compile ExportSection.
6154
2.29k
  compile(Module.getExportSection());
6155
  // StartSection is not required for compilation.
6156
6157
2.29k
  spdlog::info("verify start"sv);
6158
2.29k
  LLModule.verify(LLVMPrintMessageAction);
6159
6160
2.29k
  auto &TM = D.extract().TM;
6161
2.29k
  EXPECTED_TRY(optimize(LLModule, TM));
6162
6163
  // Set initializer for constant value
6164
2.29k
  Context->finalizeIntrinsicsTable();
6165
2.29k
  return Expect<Data>{std::move(D)};
6166
2.29k
}
6167
6168
void Compiler::compile(const AST::TypeSection &TypeSec,
6169
2.30k
                       bool DeclarationsOnly) noexcept {
6170
2.30k
  auto WrapperTy =
6171
2.30k
      LLVM::Type::getFunctionType(Context->VoidTy,
6172
2.30k
                                  {Context->ExecCtxPtrTy, Context->Int8PtrTy,
6173
2.30k
                                   Context->Int8PtrTy, Context->Int8PtrTy},
6174
2.30k
                                  false);
6175
2.30k
  auto SubTypes = TypeSec.getContent();
6176
2.30k
  const auto Size = SubTypes.size();
6177
2.30k
  if (Size == 0) {
6178
137
    return;
6179
137
  }
6180
2.16k
  Context->CompositeTypes.reserve(Size);
6181
2.16k
  Context->FunctionWrappers.reserve(Size);
6182
6183
4.76k
  auto SetFuncAttributes = [&](auto FDecl) {
6184
4.76k
    FDecl.setVisibility(LLVMProtectedVisibility);
6185
4.76k
    FDecl.setDSOLocal(true);
6186
4.76k
    FDecl.setDLLStorageClass(LLVMDLLExportStorageClass);
6187
4.76k
    FDecl.addFnAttr(Context->NoStackArgProbe);
6188
4.76k
    FDecl.addFnAttr(Context->StrictFP);
6189
4.76k
    FDecl.addFnAttr(Context->UWTable);
6190
4.76k
    FDecl.addParamAttr(0, Context->ReadOnly);
6191
4.76k
    FDecl.addParamAttr(0, Context->NoAlias);
6192
4.76k
    FDecl.addParamAttr(1, Context->NoAlias);
6193
4.76k
    FDecl.addParamAttr(2, Context->NoAlias);
6194
4.76k
    FDecl.addParamAttr(3, Context->NoAlias);
6195
4.76k
  };
6196
6197
  // Iterate and compile types.
6198
7.14k
  for (size_t I = 0; I < Size; ++I) {
6199
4.98k
    const auto &CompType = SubTypes[I].getCompositeType();
6200
4.98k
    const auto Name = fmt::format("t{}"sv, Context->CompositeTypes.size());
6201
4.98k
    if (CompType.isFunc()) {
6202
      // Check that the function type is unique.
6203
4.81k
      {
6204
4.81k
        bool Unique = true;
6205
20.7k
        for (size_t J = 0; J < I; ++J) {
6206
16.1k
          if (Context->CompositeTypes[J] &&
6207
16.1k
              Context->CompositeTypes[J]->isFunc()) {
6208
15.8k
            const auto &OldFuncType = Context->CompositeTypes[J]->getFuncType();
6209
15.8k
            if (OldFuncType == CompType.getFuncType()) {
6210
219
              Unique = false;
6211
219
              Context->CompositeTypes.push_back(Context->CompositeTypes[J]);
6212
219
              if (DeclarationsOnly) {
6213
0
                auto FDecl = Context->LLModule.get().addFunction(
6214
0
                    WrapperTy, LLVMExternalLinkage, Name.c_str());
6215
0
                SetFuncAttributes(FDecl);
6216
0
                Context->FunctionWrappers.push_back(FDecl);
6217
219
              } else {
6218
219
                auto F = Context->FunctionWrappers[J];
6219
219
                Context->FunctionWrappers.push_back(F);
6220
219
                auto A = Context->LLModule.get().addAlias(WrapperTy, F,
6221
219
                                                          Name.c_str());
6222
219
                A.setLinkage(LLVMExternalLinkage);
6223
219
                A.setVisibility(LLVMProtectedVisibility);
6224
219
                A.setDSOLocal(true);
6225
219
                A.setDLLStorageClass(LLVMDLLExportStorageClass);
6226
219
              }
6227
219
              break;
6228
219
            }
6229
15.8k
          }
6230
16.1k
        }
6231
4.81k
        if (!Unique) {
6232
219
          continue;
6233
219
        }
6234
4.81k
      }
6235
6236
      // Create Wrapper
6237
4.59k
      auto F = Context->LLModule.get().addFunction(
6238
4.59k
          WrapperTy, LLVMExternalLinkage, Name.c_str());
6239
4.59k
      {
6240
4.59k
        SetFuncAttributes(F);
6241
6242
4.59k
        if (!DeclarationsOnly) {
6243
4.59k
          LLVM::Builder Builder(Context->LLContext);
6244
4.59k
          Builder.positionAtEnd(
6245
4.59k
              LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
6246
6247
4.59k
          auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy,
6248
4.59k
                                CompType.getFuncType());
6249
4.59k
          auto RTy = FTy.getReturnType();
6250
4.59k
          std::vector<LLVM::Type> FPTy(FTy.getNumParams());
6251
4.59k
          FTy.getParamTypes(FPTy);
6252
6253
4.59k
          const size_t ArgCount = FPTy.size() - 1;
6254
4.59k
          auto ExecCtxPtr = F.getFirstParam();
6255
4.59k
          auto RawFunc = LLVM::FunctionCallee{
6256
4.59k
              FTy, Builder.createBitCast(ExecCtxPtr.getNextParam(),
6257
4.59k
                                         FTy.getPointerTo())};
6258
4.59k
          auto RawArgs = ExecCtxPtr.getNextParam().getNextParam();
6259
4.59k
          auto RawRets = RawArgs.getNextParam();
6260
6261
4.59k
          std::vector<LLVM::Value> Args;
6262
4.59k
          Args.reserve(FTy.getNumParams());
6263
4.59k
          Args.push_back(ExecCtxPtr);
6264
9.57k
          for (size_t J = 0; J < ArgCount; ++J) {
6265
4.97k
            Args.push_back(Builder.createValuePtrLoad(
6266
4.97k
                FPTy[J + 1], RawArgs, Context->Int8Ty, J * kValSize));
6267
4.97k
          }
6268
6269
4.59k
          auto Ret = Builder.createCall(RawFunc, Args);
6270
4.59k
          if (RTy.isVoidTy()) {
6271
            // nothing to do
6272
3.00k
          } else if (RTy.isStructTy()) {
6273
374
            auto Rets = unpackStruct(Builder, Ret);
6274
374
            Builder.createArrayPtrStore(Rets, RawRets, Context->Int8Ty,
6275
374
                                        kValSize);
6276
2.63k
          } else {
6277
2.63k
            Builder.createValuePtrStore(Ret, RawRets, Context->Int8Ty);
6278
2.63k
          }
6279
4.59k
          Builder.createRetVoid();
6280
4.59k
        }
6281
4.59k
      }
6282
      // Copy wrapper, param and return lists to module instance.
6283
4.59k
      Context->FunctionWrappers.push_back(F);
6284
4.59k
    } else {
6285
      // Non function type case. Create empty wrapper.
6286
166
      auto F = Context->LLModule.get().addFunction(
6287
166
          WrapperTy, LLVMExternalLinkage, Name.c_str());
6288
166
      {
6289
166
        SetFuncAttributes(F);
6290
6291
166
        if (!DeclarationsOnly) {
6292
166
          LLVM::Builder Builder(Context->LLContext);
6293
166
          Builder.positionAtEnd(
6294
166
              LLVM::BasicBlock::create(Context->LLContext, F, "entry"));
6295
166
          Builder.createRetVoid();
6296
166
        }
6297
166
      }
6298
166
      Context->FunctionWrappers.push_back(F);
6299
166
    }
6300
4.76k
    Context->CompositeTypes.push_back(&CompType);
6301
4.76k
  }
6302
2.16k
}
6303
6304
2.30k
void Compiler::compile(const AST::ImportSection &ImportSec) noexcept {
6305
  // Iterate and compile import descriptions.
6306
2.30k
  for (const auto &ImpDesc : ImportSec.getContent()) {
6307
    // Get data from import description.
6308
490
    const auto &ExtType = ImpDesc.getExternalType();
6309
6310
    // Add the imports to the module instance.
6311
490
    switch (ExtType) {
6312
319
    case ExternalType::Function: // Function type index
6313
319
    {
6314
319
      const auto FuncID = static_cast<uint32_t>(Context->Functions.size());
6315
      // Get the function type index in module.
6316
319
      uint32_t TypeIdx = ImpDesc.getExternalFuncTypeIdx();
6317
319
      assuming(TypeIdx < Context->CompositeTypes.size());
6318
319
      assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6319
319
      const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6320
319
      auto FTy =
6321
319
          toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6322
319
      auto RTy = FTy.getReturnType();
6323
319
      auto F =
6324
319
          LLVM::FunctionCallee{FTy, Context->LLModule.get().addFunction(
6325
319
                                        FTy, LLVMInternalLinkage,
6326
319
                                        fmt::format("f{}"sv, FuncID).c_str())};
6327
319
      F.Fn.setDSOLocal(true);
6328
319
      F.Fn.addFnAttr(Context->NoStackArgProbe);
6329
319
      F.Fn.addFnAttr(Context->StrictFP);
6330
319
      F.Fn.addFnAttr(Context->UWTable);
6331
319
      F.Fn.addParamAttr(0, Context->ReadOnly);
6332
319
      F.Fn.addParamAttr(0, Context->NoAlias);
6333
6334
319
      LLVM::Builder Builder(Context->LLContext);
6335
319
      Builder.positionAtEnd(
6336
319
          LLVM::BasicBlock::create(Context->LLContext, F.Fn, "entry"));
6337
6338
319
      const auto ArgSize = FuncType.getParamTypes().size();
6339
319
      const auto RetSize =
6340
319
          RTy.isVoidTy() ? 0 : FuncType.getReturnTypes().size();
6341
6342
319
      LLVM::Value Args = Builder.createArray(ArgSize, kValSize);
6343
319
      LLVM::Value Rets = Builder.createArray(RetSize, kValSize);
6344
6345
319
      auto Arg = F.Fn.getFirstParam();
6346
492
      for (unsigned I = 0; I < ArgSize; ++I) {
6347
173
        Arg = Arg.getNextParam();
6348
173
        Builder.createValuePtrStore(Arg, Args, Context->Int8Ty, I * kValSize);
6349
173
      }
6350
6351
319
      Builder.createCall(
6352
319
          Context->getIntrinsic(
6353
319
              Builder, Executable::Intrinsics::kCall,
6354
319
              LLVM::Type::getFunctionType(
6355
319
                  Context->VoidTy,
6356
319
                  {Context->Int32Ty, Context->Int8PtrTy, Context->Int8PtrTy},
6357
319
                  false)),
6358
319
          {Context->LLContext.getInt32(FuncID), Args, Rets});
6359
6360
319
      if (RetSize == 0) {
6361
192
        Builder.createRetVoid();
6362
192
      } else if (RetSize == 1) {
6363
89
        Builder.createRet(
6364
89
            Builder.createValuePtrLoad(RTy, Rets, Context->Int8Ty));
6365
89
      } else {
6366
38
        Builder.createAggregateRet(Builder.createArrayPtrLoad(
6367
38
            RetSize, RTy, Rets, Context->Int8Ty, kValSize));
6368
38
      }
6369
6370
319
      Context->Functions.emplace_back(TypeIdx, F, nullptr);
6371
319
      Context->ImportCount++;
6372
319
      break;
6373
319
    }
6374
64
    case ExternalType::Table: // Table type
6375
64
    {
6376
      // Get table address type. External type checked in validation.
6377
64
      const auto &TabType = ImpDesc.getExternalTableType();
6378
64
      const auto AddrType = TabType.getLimit().getAddrType();
6379
64
      auto Type = toLLVMType(Context->LLContext, AddrType);
6380
64
      Context->TableAddrTypes.push_back(Type);
6381
64
      break;
6382
319
    }
6383
61
    case ExternalType::Memory: // Memory type
6384
61
    {
6385
      // Get memory address type. External type checked in validation.
6386
61
      const auto &MemType = ImpDesc.getExternalMemoryType();
6387
61
      const auto AddrType = MemType.getLimit().getAddrType();
6388
61
      auto Type = toLLVMType(Context->LLContext, AddrType);
6389
61
      Context->MemoryAddrTypes.push_back(Type);
6390
61
      break;
6391
319
    }
6392
43
    case ExternalType::Global: // Global type
6393
43
    {
6394
      // Get global type. External type checked in validation.
6395
43
      const auto &GlobType = ImpDesc.getExternalGlobalType();
6396
43
      const auto &ValType = GlobType.getValType();
6397
43
      auto Type = toLLVMType(Context->LLContext, ValType);
6398
43
      Context->Globals.push_back(Type);
6399
43
      break;
6400
319
    }
6401
3
    case ExternalType::Tag: // Tag type
6402
3
    {
6403
      // TODO: EXCEPTION - implement the AOT.
6404
3
      break;
6405
319
    }
6406
0
    default:
6407
0
      assumingUnreachable();
6408
490
    }
6409
490
  }
6410
2.30k
}
6411
6412
2.29k
void Compiler::compile(const AST::ExportSection &) noexcept {}
6413
6414
2.30k
void Compiler::compile(const AST::GlobalSection &GlobalSec) noexcept {
6415
2.30k
  for (const auto &GlobalSeg : GlobalSec.getContent()) {
6416
167
    const auto &ValType = GlobalSeg.getGlobalType().getValType();
6417
167
    auto Type = toLLVMType(Context->LLContext, ValType);
6418
167
    Context->Globals.push_back(Type);
6419
167
  }
6420
2.30k
}
6421
6422
void Compiler::compile(const AST::MemorySection &MemorySec,
6423
2.30k
                       const AST::DataSection &) noexcept {
6424
2.30k
  for (const auto &MemType : MemorySec.getContent()) {
6425
1.42k
    const auto AddrType = MemType.getLimit().getAddrType();
6426
1.42k
    auto Type = toLLVMType(Context->LLContext, AddrType);
6427
1.42k
    Context->MemoryAddrTypes.push_back(Type);
6428
1.42k
  }
6429
2.30k
}
6430
6431
void Compiler::compile(const AST::TableSection &TableSec,
6432
2.30k
                       const AST::ElementSection &) noexcept {
6433
2.30k
  for (const auto &TableSeg : TableSec.getContent()) {
6434
275
    const auto AddrType = TableSeg.getTableType().getLimit().getAddrType();
6435
275
    auto Type = toLLVMType(Context->LLContext, AddrType);
6436
275
    Context->TableAddrTypes.push_back(Type);
6437
275
  }
6438
2.30k
}
6439
6440
void Compiler::compileSections(const AST::Module &Module,
6441
2.30k
                               bool DeclarationsOnly) noexcept {
6442
  // Compile Function Types
6443
2.30k
  compile(Module.getTypeSection(), DeclarationsOnly);
6444
  // Compile ImportSection
6445
2.30k
  compile(Module.getImportSection());
6446
  // Compile GlobalSection
6447
2.30k
  compile(Module.getGlobalSection());
6448
  // Compile MemorySection (MemorySec, DataSec)
6449
2.30k
  compile(Module.getMemorySection(), Module.getDataSection());
6450
  // Compile TableSection (TableSec, ElemSec)
6451
2.30k
  compile(Module.getTableSection(), Module.getElementSection());
6452
  // Create function declarations without compiling bodies. (FunctionSec,
6453
  // CodeSec)
6454
2.30k
  compileFunctionDeclarations(Module.getFunctionSection(),
6455
2.30k
                              Module.getCodeSection());
6456
2.30k
}
6457
6458
void Compiler::compileFunctionDeclarations(
6459
    const AST::FunctionSection &FunctionSec,
6460
2.30k
    const AST::CodeSection &CodeSec) noexcept {
6461
2.30k
  const auto &TypeIdxs = FunctionSec.getContent();
6462
2.30k
  const auto &CodeSegs = CodeSec.getContent();
6463
2.30k
  assuming(TypeIdxs.size() == CodeSegs.size());
6464
6465
13.5k
  for (size_t I = 0; I < CodeSegs.size(); ++I) {
6466
11.2k
    const auto &TypeIdx = TypeIdxs[I];
6467
11.2k
    const auto &Code = CodeSegs[I];
6468
11.2k
    assuming(TypeIdx < Context->CompositeTypes.size());
6469
11.2k
    assuming(Context->CompositeTypes[TypeIdx]->isFunc());
6470
11.2k
    const auto &FuncType = Context->CompositeTypes[TypeIdx]->getFuncType();
6471
11.2k
    const auto FuncID = Context->Functions.size();
6472
11.2k
    auto FTy = toLLVMType(Context->LLContext, Context->ExecCtxPtrTy, FuncType);
6473
11.2k
    LLVM::FunctionCallee F = {FTy, Context->LLModule.get().addFunction(
6474
11.2k
                                       FTy, LLVMExternalLinkage,
6475
11.2k
                                       fmt::format("f{}"sv, FuncID).c_str())};
6476
11.2k
    F.Fn.setVisibility(LLVMProtectedVisibility);
6477
11.2k
    F.Fn.setDSOLocal(true);
6478
11.2k
    F.Fn.setDLLStorageClass(LLVMDLLExportStorageClass);
6479
11.2k
    F.Fn.addFnAttr(Context->NoStackArgProbe);
6480
11.2k
    F.Fn.addFnAttr(Context->StrictFP);
6481
11.2k
    F.Fn.addFnAttr(Context->UWTable);
6482
11.2k
    F.Fn.addParamAttr(0, Context->ReadOnly);
6483
11.2k
    F.Fn.addParamAttr(0, Context->NoAlias);
6484
6485
11.2k
    Context->Functions.emplace_back(TypeIdx, F, &Code);
6486
11.2k
  }
6487
2.30k
}
6488
6489
11.2k
Expect<void> Compiler::compileFunctionBody(uint32_t LocalFuncIndex) noexcept {
6490
  // Find the function in the Functions list
6491
  // LocalFuncIndex is relative to the defined functions (not imports)
6492
11.2k
  uint32_t GlobalFuncIndex = Context->ImportCount + LocalFuncIndex;
6493
11.2k
  if (GlobalFuncIndex >= Context->Functions.size()) {
6494
0
    spdlog::error("[lazy-jit]: function index {} out of range"sv,
6495
0
                  LocalFuncIndex);
6496
0
    return Unexpect(ErrCode::Value::IllegalPath);
6497
0
  }
6498
6499
11.2k
  auto &[T, F, Code] = Context->Functions[GlobalFuncIndex];
6500
11.2k
  if (!Code) {
6501
0
    spdlog::error("[lazy-jit]: cannot compile import function {}"sv,
6502
0
                  LocalFuncIndex);
6503
0
    return Unexpect(ErrCode::Value::IllegalPath);
6504
0
  }
6505
6506
  // Check if already compiled (function has basic blocks)
6507
11.2k
  if (F.Fn.countBasicBlocks() > 0) {
6508
0
    spdlog::debug("[lazy-jit]: function {} already compiled"sv, LocalFuncIndex);
6509
0
    return {};
6510
0
  }
6511
6512
11.2k
  spdlog::debug("[lazy-jit]: compiling function {}"sv, LocalFuncIndex);
6513
6514
11.2k
  std::vector<ValType> Locals;
6515
11.2k
  for (const auto &Local : Code->getLocals()) {
6516
694k
    for (unsigned I = 0; I < Local.first; ++I) {
6517
692k
      Locals.push_back(Local.second);
6518
692k
    }
6519
1.70k
  }
6520
6521
11.2k
  FunctionCompiler FC(
6522
11.2k
      *Context, F, Locals, Conf.getCompilerConfigure().isInterruptible(),
6523
11.2k
      Conf.getStatisticsConfigure().isInstructionCounting(),
6524
11.2k
      Conf.getStatisticsConfigure().isCostMeasuring(),
6525
11.2k
      Conf.getRuntimeConfigure().getRunMode() == RunMode::LazyJIT);
6526
11.2k
  auto Type = Context->resolveBlockType(T);
6527
11.2k
  EXPECTED_TRY(FC.compile(*Code, std::move(Type)));
6528
11.1k
  F.Fn.eliminateUnreachableBlocks();
6529
6530
11.1k
  return {};
6531
11.2k
}
6532
6533
Expect<LLVM::Data>
6534
0
LLVM::Compiler::compileInfrastructure(const AST::Module &Module) noexcept {
6535
  // Check the module is validated.
6536
0
  if (unlikely(!Module.getIsValidated())) {
6537
0
    spdlog::error(ErrCode::Value::NotValidated);
6538
0
    return Unexpect(ErrCode::Value::NotValidated);
6539
0
  }
6540
6541
0
  std::unique_lock Lock(Mutex);
6542
0
  spdlog::info("[lazy-jit]: compile infrastructure start"sv);
6543
6544
0
  Data D;
6545
0
  auto LLContext = initLLVMModule(D);
6546
0
  auto &LLModule = D.extract().LLModule;
6547
6548
0
  CompileContext NewContext(LLContext, LLModule,
6549
0
                            Conf.getCompilerConfigure().isGenericBinary());
6550
0
  RAIICleanup Cleanup(Context, &NewContext);
6551
0
  Context->addVersionGlobal();
6552
6553
  // Compile all sections and the function declarations without bodies.
6554
0
  compileSections(Module, false);
6555
  // Compile ExportSection
6556
0
  compile(Module.getExportSection());
6557
6558
  // Set initializer for constant value
6559
0
  Context->finalizeIntrinsicsTable();
6560
0
  LLModule.verify(LLVMPrintMessageAction);
6561
6562
0
  spdlog::info("[lazy-jit]: infrastructure compilation done"sv);
6563
6564
0
  return Expect<Data>{std::move(D)};
6565
0
}
6566
6567
Expect<LLVM::Data>
6568
Compiler::compileFunctions(Data &&LLData, const AST::Module &Module,
6569
0
                           Span<const uint32_t> LocalFuncIndices) noexcept {
6570
0
  if (unlikely(!Module.getIsValidated())) {
6571
0
    spdlog::error(ErrCode::Value::NotValidated);
6572
0
    return Unexpect(ErrCode::Value::NotValidated);
6573
0
  }
6574
0
  if (unlikely(LocalFuncIndices.empty())) {
6575
0
    spdlog::error("[lazy-jit]: compileFunctions with empty index list"sv);
6576
0
    return Unexpect(ErrCode::Value::IllegalPath);
6577
0
  }
6578
6579
0
  std::unique_lock Lock(Mutex);
6580
0
  std::vector<uint32_t> Sorted(LocalFuncIndices.begin(),
6581
0
                               LocalFuncIndices.end());
6582
0
  std::sort(Sorted.begin(), Sorted.end());
6583
0
  Sorted.erase(std::unique(Sorted.begin(), Sorted.end()), Sorted.end());
6584
6585
0
  spdlog::debug("[lazy-jit]: compile functions batch ({}) start"sv,
6586
0
                Sorted.size());
6587
6588
  // Each batch starts from a fresh module sharing the same thread-safe
6589
  // context: on success the previous batch module was consumed by the JIT,
6590
  // and after a failed batch the leftover module must be discarded so its
6591
  // declarations are not re-added on top of themselves.
6592
0
  LLData.extract().resetModule();
6593
0
  auto LLContext = initLLVMModule(LLData);
6594
0
  auto &LLModule = LLData.extract().LLModule;
6595
6596
0
  CompileContext NewContext(LLContext, LLModule,
6597
0
                            Conf.getCompilerConfigure().isGenericBinary());
6598
0
  RAIICleanup Cleanup(Context, &NewContext);
6599
6600
  // Emit the type wrappers as external declarations resolved against the
6601
  // infrastructure module, then declare the functions and compile the
6602
  // requested bodies.
6603
0
  compileSections(Module, true);
6604
6605
0
  for (uint32_t FuncIndex : Sorted) {
6606
0
    EXPECTED_TRY(compileFunctionBody(FuncIndex));
6607
0
  }
6608
6609
0
  spdlog::info("[lazy-jit]: verify batch ({} funcs) start"sv, Sorted.size());
6610
0
  LLModule.verify(LLVMPrintMessageAction);
6611
0
  spdlog::info("[lazy-jit]: verify batch ({} funcs) done"sv, Sorted.size());
6612
6613
0
  auto &TM = LLData.extract().TM;
6614
0
  EXPECTED_TRY(optimize(LLModule, TM));
6615
6616
0
  spdlog::debug("[lazy-jit]: compile functions batch ({}) done"sv,
6617
0
                Sorted.size());
6618
0
  return Expect<Data>{std::move(LLData)};
6619
0
}
6620
6621
} // namespace LLVM
6622
} // namespace WasmEdge