Coverage Report

Created: 2025-11-15 06:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wasm3/source/m3_compile.c
Line
Count
Source
1
//
2
//  m3_compile.c
3
//
4
//  Created by Steven Massey on 4/17/19.
5
//  Copyright © 2019 Steven Massey. All rights reserved.
6
//
7
8
// Allow using opcodes for compilation process
9
#define M3_COMPILE_OPCODES
10
11
#include "m3_env.h"
12
#include "m3_compile.h"
13
#include "m3_exec.h"
14
#include "m3_exception.h"
15
#include "m3_info.h"
16
17
//----- EMIT --------------------------------------------------------------------------------------------------------------
18
19
static inline
20
pc_t GetPC (IM3Compilation o)
21
230
{
22
230
    return GetPagePC (o->page);
23
230
}
24
25
static M3_NOINLINE
26
M3Result  EnsureCodePageNumLines  (IM3Compilation o, u32 i_numLines)
27
49.3k
{
28
49.3k
    M3Result result = m3Err_none;
29
30
49.3k
    i_numLines += 2; // room for Bridge
31
32
49.3k
    if (NumFreeLines (o->page) < i_numLines)
33
35
    {
34
35
        IM3CodePage page = AcquireCodePageWithCapacity (o->runtime, i_numLines);
35
36
35
        if (page)
37
35
        {
38
35
            m3log (emit, "bridging new code page from: %d %p (free slots: %d) to: %d", o->page->info.sequence, GetPC (o), NumFreeLines (o->page), page->info.sequence);
39
35
            d_m3Assert (NumFreeLines (o->page) >= 2);
40
41
35
            EmitWord (o->page, op_Branch);
42
35
            EmitWord (o->page, GetPagePC (page));
43
44
35
            ReleaseCodePage (o->runtime, o->page);
45
46
35
            o->page = page;
47
35
        }
48
0
        else result = m3Err_mallocFailedCodePage;
49
35
    }
50
51
49.3k
    return result;
52
49.3k
}
53
54
static M3_NOINLINE
55
M3Result  EmitOp  (IM3Compilation o, IM3Operation i_operation)
56
50.3k
{
57
50.3k
    M3Result result = m3Err_none;                                 d_m3Assert (i_operation or IsStackPolymorphic (o));
58
59
    // it's OK for page to be null; when compile-walking the bytecode without emitting
60
50.3k
    if (o->page)
61
49.3k
    {
62
# if d_m3EnableOpTracing
63
        if (i_operation != op_DumpStack)
64
            o->numEmits++;
65
# endif
66
67
        // have execution jump to a new page if slots are critically low
68
49.3k
        result = EnsureCodePageNumLines (o, d_m3CodePageFreeLinesThreshold);
69
70
49.3k
        if (not result)
71
49.3k
        {                                                           if (d_m3LogEmit) log_emit (o, i_operation);
72
# if d_m3RecordBacktraces
73
            EmitMappingEntry (o->page, o->lastOpcodeStart - o->module->wasmStart);
74
# endif // d_m3RecordBacktraces
75
49.3k
            EmitWord (o->page, i_operation);
76
49.3k
        }
77
49.3k
    }
78
79
50.3k
    return result;
80
50.3k
}
81
82
// Push an immediate constant into the M3 codestream
83
static M3_NOINLINE
84
void  EmitConstant32  (IM3Compilation o, const u32 i_immediate)
85
84
{
86
84
    if (o->page)
87
84
        EmitWord32 (o->page, i_immediate);
88
84
}
89
90
static M3_NOINLINE
91
void  EmitSlotOffset  (IM3Compilation o, const i32 i_offset)
92
86.3k
{
93
86.3k
    if (o->page)
94
85.8k
        EmitWord32 (o->page, i_offset);
95
86.3k
}
96
97
static M3_NOINLINE
98
pc_t  EmitPointer  (IM3Compilation o, const void * const i_pointer)
99
1.92k
{
100
1.92k
    pc_t ptr = GetPagePC (o->page);
101
102
1.92k
    if (o->page)
103
1.33k
        EmitWord (o->page, i_pointer);
104
105
1.92k
    return ptr;
106
1.92k
}
107
108
static M3_NOINLINE
109
void * ReservePointer (IM3Compilation o)
110
56
{
111
56
    pc_t ptr = GetPagePC (o->page);
112
56
    EmitPointer (o, NULL);
113
56
    return (void *) ptr;
114
56
}
115
116
117
//-------------------------------------------------------------------------------------------------------------------------
118
119
#define d_indent "     | %s"
120
121
// just want less letters and numbers to stare at down the way in the compiler table
122
#define i_32    c_m3Type_i32
123
#define i_64    c_m3Type_i64
124
#define f_32    c_m3Type_f32
125
#define f_64    c_m3Type_f64
126
#define none    c_m3Type_none
127
#define any     (u8)-1
128
129
#if d_m3HasFloat
130
#   define FPOP(x) x
131
#else
132
#   define FPOP(x) NULL
133
#endif
134
135
static const IM3Operation c_preserveSetSlot [] = { NULL, op_PreserveSetSlot_i32,       op_PreserveSetSlot_i64,
136
                                                    FPOP(op_PreserveSetSlot_f32), FPOP(op_PreserveSetSlot_f64) };
137
static const IM3Operation c_setSetOps [] =       { NULL, op_SetSlot_i32,               op_SetSlot_i64,
138
                                                    FPOP(op_SetSlot_f32),         FPOP(op_SetSlot_f64) };
139
static const IM3Operation c_setGlobalOps [] =    { NULL, op_SetGlobal_i32,             op_SetGlobal_i64,
140
                                                    FPOP(op_SetGlobal_f32),       FPOP(op_SetGlobal_f64) };
141
static const IM3Operation c_setRegisterOps [] =  { NULL, op_SetRegister_i32,           op_SetRegister_i64,
142
                                                    FPOP(op_SetRegister_f32),     FPOP(op_SetRegister_f64) };
143
144
static const IM3Operation c_intSelectOps [2] [4] =      { { op_Select_i32_rss, op_Select_i32_srs, op_Select_i32_ssr, op_Select_i32_sss },
145
                                                          { op_Select_i64_rss, op_Select_i64_srs, op_Select_i64_ssr, op_Select_i64_sss } };
146
#if d_m3HasFloat
147
static const IM3Operation c_fpSelectOps [2] [2] [3] = { { { op_Select_f32_sss, op_Select_f32_srs, op_Select_f32_ssr },        // selector in slot
148
                                                          { op_Select_f32_rss, op_Select_f32_rrs, op_Select_f32_rsr } },      // selector in reg
149
                                                        { { op_Select_f64_sss, op_Select_f64_srs, op_Select_f64_ssr },        // selector in slot
150
                                                          { op_Select_f64_rss, op_Select_f64_rrs, op_Select_f64_rsr } } };    // selector in reg
151
#endif
152
153
// all args & returns are 64-bit aligned, so use 2 slots for a d_m3Use32BitSlots=1 build
154
static const u16 c_ioSlotCount = sizeof (u64) / sizeof (m3slot_t);
155
156
static
157
M3Result  AcquireCompilationCodePage  (IM3Compilation o, IM3CodePage * o_codePage)
158
396
{
159
396
    M3Result result = m3Err_none;
160
161
396
    IM3CodePage page = AcquireCodePage (o->runtime);
162
163
396
    if (page)
164
396
    {
165
#       if (d_m3EnableCodePageRefCounting)
166
        {
167
            if (o->function)
168
            {
169
                IM3Function func = o->function;
170
                page->info.usageCount++;
171
172
                u32 index = func->numCodePageRefs++;
173
_               (m3ReallocArray (& func->codePageRefs, IM3CodePage, func->numCodePageRefs, index));
174
                func->codePageRefs [index] = page;
175
            }
176
        }
177
#       endif
178
396
    }
179
396
    else _throw (m3Err_mallocFailedCodePage);
180
181
396
    _catch:
182
183
396
    * o_codePage = page;
184
185
396
    return result;
186
396
}
187
188
static inline
189
void  ReleaseCompilationCodePage  (IM3Compilation o)
190
388
{
191
388
    ReleaseCodePage (o->runtime, o->page);
192
388
}
193
194
static inline
195
u16 GetTypeNumSlots (u8 i_type)
196
410k
{
197
410k
#   if d_m3Use32BitSlots
198
410k
        return Is64BitType (i_type) ? 2 : 1;
199
#   else
200
        return 1;
201
#   endif
202
410k
}
203
204
static inline
205
void  AlignSlotToType  (u16 * io_slot, u8 i_type)
206
47.0k
{
207
    // align 64-bit words to even slots (if d_m3Use32BitSlots)
208
47.0k
    u16 numSlots = GetTypeNumSlots (i_type);
209
210
47.0k
    u16 mask = numSlots - 1;
211
47.0k
    * io_slot = (* io_slot + mask) & ~mask;
212
47.0k
}
213
214
static inline
215
i16  GetStackTopIndex  (IM3Compilation o)
216
4.80k
{                                                           d_m3Assert (o->stackIndex > o->stackFirstDynamicIndex or IsStackPolymorphic (o));
217
4.80k
    return o->stackIndex - 1;
218
4.80k
}
219
220
221
// Items in the static portion of the stack (args/locals) are hidden from GetStackTypeFromTop ()
222
// In other words, only "real" Wasm stack items can be inspected.  This is important when
223
// returning values, etc. and you need an accurate wasm-view of the stack.
224
static
225
u8  GetStackTypeFromTop  (IM3Compilation o, u16 i_offset)
226
18.8k
{
227
18.8k
    u8 type = c_m3Type_none;
228
229
18.8k
    ++i_offset;
230
18.8k
    if (o->stackIndex >= i_offset)
231
18.7k
    {
232
18.7k
        u16 index = o->stackIndex - i_offset;
233
234
18.7k
        if (index >= o->stackFirstDynamicIndex)
235
18.0k
            type = o->typeStack [index];
236
18.7k
    }
237
238
18.8k
    return type;
239
18.8k
}
240
241
static inline
242
u8  GetStackTopType  (IM3Compilation o)
243
14.7k
{
244
14.7k
    return GetStackTypeFromTop (o, 0);
245
14.7k
}
246
247
static inline
248
u8  GetStackTypeFromBottom  (IM3Compilation o, u16 i_offset)
249
224k
{
250
224k
    u8 type = c_m3Type_none;
251
252
224k
    if (i_offset < o->stackIndex)
253
224k
        type = o->typeStack [i_offset];
254
255
224k
    return type;
256
224k
}
257
258
259
10.5k
static inline bool  IsConstantSlot    (IM3Compilation o, u16 i_slot)  { return (i_slot >= o->slotFirstConstIndex and i_slot < o->slotMaxConstIndex); }
260
5.41k
static inline bool  IsSlotAllocated   (IM3Compilation o, u16 i_slot)  { return o->m3Slots [i_slot]; }
261
262
static inline
263
bool  IsStackIndexInRegister  (IM3Compilation o, i32 i_stackIndex)
264
39.6k
{                                                                           d_m3Assert (i_stackIndex < o->stackIndex or IsStackPolymorphic (o));
265
39.6k
    if (i_stackIndex >= 0 and i_stackIndex < o->stackIndex)
266
39.6k
        return (o->wasmStack [i_stackIndex] >= d_m3Reg0SlotAlias);
267
60
    else
268
60
        return false;
269
39.6k
}
270
271
771
static inline u16   GetNumBlockValuesOnStack      (IM3Compilation o)  { return o->stackIndex - o->block.blockStackIndex; }
272
273
1.63k
static inline bool  IsStackTopInRegister          (IM3Compilation o)  { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o));       }
274
298
static inline bool  IsStackTopMinus1InRegister    (IM3Compilation o)  { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o) - 1);   }
275
13
static inline bool  IsStackTopMinus2InRegister    (IM3Compilation o)  { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o) - 2);   }
276
277
1.05k
static inline bool  IsStackTopInSlot              (IM3Compilation o)  { return not IsStackTopInRegister (o); }
278
279
132
static inline bool  IsValidSlot                   (u16 i_slot)        { return (i_slot < d_m3MaxFunctionSlots); }
280
281
static inline
282
u16  GetStackTopSlotNumber  (IM3Compilation o)
283
816
{
284
816
    i16 i = GetStackTopIndex (o);
285
286
816
    u16 slot = c_slotUnused;
287
288
816
    if (i >= 0)
289
786
        slot = o->wasmStack [i];
290
291
816
    return slot;
292
816
}
293
294
295
// from bottom
296
static inline
297
u16  GetSlotForStackIndex  (IM3Compilation o, u16 i_stackIndex)
298
428k
{                                                                   d_m3Assert (i_stackIndex < o->stackIndex or IsStackPolymorphic (o));
299
428k
    u16 slot = c_slotUnused;
300
301
428k
    if (i_stackIndex < o->stackIndex)
302
428k
        slot = o->wasmStack [i_stackIndex];
303
304
428k
    return slot;
305
428k
}
306
307
static inline
308
u16  GetExtraSlotForStackIndex  (IM3Compilation o, u16 i_stackIndex)
309
155k
{
310
155k
    u16 baseSlot = GetSlotForStackIndex (o, i_stackIndex);
311
312
155k
    if (baseSlot != c_slotUnused)
313
155k
    {
314
155k
        u16 extraSlot = GetTypeNumSlots (GetStackTypeFromBottom (o, i_stackIndex)) - 1;
315
155k
        baseSlot += extraSlot;
316
155k
    }
317
318
155k
    return baseSlot;
319
155k
}
320
321
322
static inline
323
void  TouchSlot  (IM3Compilation o, u16 i_slot)
324
102k
{
325
    // op_Entry uses this value to track and detect stack overflow
326
102k
    o->maxStackSlots = M3_MAX (o->maxStackSlots, i_slot + 1);
327
102k
}
328
329
static inline
330
void  MarkSlotAllocated  (IM3Compilation o, u16 i_slot)
331
88.8k
{                                                                   d_m3Assert (o->m3Slots [i_slot] == 0); // shouldn't be already allocated
332
88.8k
    o->m3Slots [i_slot] = 1;
333
334
88.8k
    o->slotMaxAllocatedIndexPlusOne = M3_MAX (o->slotMaxAllocatedIndexPlusOne, i_slot + 1);
335
336
88.8k
    TouchSlot (o, i_slot);
337
88.8k
}
338
339
static inline
340
void  MarkSlotsAllocated  (IM3Compilation o, u16 i_slot, u16 i_numSlots)
341
62.9k
{
342
146k
    while (i_numSlots--)
343
83.2k
        MarkSlotAllocated (o, i_slot++);
344
62.9k
}
345
346
static inline
347
void  MarkSlotsAllocatedByType  (IM3Compilation o, u16 i_slot, u8 i_type)
348
25.3k
{
349
25.3k
    u16 numSlots = GetTypeNumSlots (i_type);
350
25.3k
    MarkSlotsAllocated (o, i_slot, numSlots);
351
25.3k
}
352
353
354
static
355
M3Result  AllocateSlotsWithinRange  (IM3Compilation o, u16 * o_slot, u8 i_type, u16 i_startSlot, u16 i_endSlot)
356
46.5k
{
357
46.5k
    M3Result result = m3Err_functionStackOverflow;
358
359
46.5k
    u16 numSlots = GetTypeNumSlots (i_type);
360
46.5k
    u16 searchOffset = numSlots - 1;
361
362
46.5k
    AlignSlotToType (& i_startSlot, i_type);
363
364
    // search for 1 or 2 consecutive slots in the execution stack
365
46.5k
    u16 i = i_startSlot;
366
7.50M
    while (i + searchOffset < i_endSlot)
367
7.49M
    {
368
7.49M
        if (o->m3Slots [i] == 0 and o->m3Slots [i + searchOffset] == 0)
369
37.5k
        {
370
37.5k
            MarkSlotsAllocated (o, i, numSlots);
371
372
37.5k
            * o_slot = i;
373
37.5k
            result = m3Err_none;
374
37.5k
            break;
375
37.5k
        }
376
377
        // keep 2-slot allocations even-aligned
378
7.46M
        i += numSlots;
379
7.46M
    }
380
381
46.5k
    return result;
382
46.5k
}
383
384
static inline
385
M3Result  AllocateSlots  (IM3Compilation o, u16 * o_slot, u8 i_type)
386
37.5k
{
387
37.5k
    return AllocateSlotsWithinRange (o, o_slot, i_type, o->slotFirstDynamicIndex, d_m3MaxFunctionSlots);
388
37.5k
}
389
390
static inline
391
M3Result  AllocateConstantSlots  (IM3Compilation o, u16 * o_slot, u8 i_type)
392
8.96k
{
393
8.96k
    u16 maxTableIndex = o->slotFirstConstIndex + d_m3MaxConstantTableSize;
394
8.96k
    return AllocateSlotsWithinRange (o, o_slot, i_type, o->slotFirstConstIndex, M3_MIN(o->slotFirstDynamicIndex, maxTableIndex));
395
8.96k
}
396
397
398
// TOQUE: this usage count system could be eliminated. real world code doesn't frequently trigger it.  just copy to multiple
399
// unique slots.
400
static inline
401
M3Result  IncrementSlotUsageCount  (IM3Compilation o, u16 i_slot)
402
0
{                                                                                       d_m3Assert (i_slot < d_m3MaxFunctionSlots);
403
0
    M3Result result = m3Err_none;                                                       d_m3Assert (o->m3Slots [i_slot] > 0);
404
405
    // OPTZ (memory): 'm3Slots' could still be fused with 'typeStack' if 4 bits were used to indicate: [0,1,2,many]. The many-case
406
    // would scan 'wasmStack' to determine the actual usage count
407
0
    if (o->m3Slots [i_slot] < 0xFF)
408
0
    {
409
0
        o->m3Slots [i_slot]++;
410
0
    }
411
0
    else result = "slot usage count overflow";
412
413
0
    return result;
414
0
}
415
416
static inline
417
void DeallocateSlot (IM3Compilation o, i16 i_slot, u8 i_type)
418
41.4k
{                                                                                       d_m3Assert (i_slot >= o->slotFirstDynamicIndex);
419
41.4k
                                                                                        d_m3Assert (i_slot < o->slotMaxAllocatedIndexPlusOne);
420
92.4k
    for (u16 i = 0; i < GetTypeNumSlots (i_type); ++i, ++i_slot)
421
51.0k
    {                                                                                   d_m3Assert (o->m3Slots [i_slot]);
422
51.0k
        -- o->m3Slots [i_slot];
423
51.0k
    }
424
41.4k
}
425
426
427
static inline
428
bool  IsRegisterTypeAllocated  (IM3Compilation o, u8 i_type)
429
20
{
430
20
    return IsRegisterAllocated (o, IsFpType (i_type));
431
20
}
432
433
static inline
434
void  AllocateRegister  (IM3Compilation o, u32 i_register, u16 i_stackIndex)
435
502
{                                                                                       d_m3Assert (not IsRegisterAllocated (o, i_register));
436
502
    o->regStackIndexPlusOne [i_register] = i_stackIndex + 1;
437
502
}
438
439
static inline
440
void  DeallocateRegister  (IM3Compilation o, u32 i_register)
441
483
{                                                                                       d_m3Assert (IsRegisterAllocated (o, i_register));
442
483
    o->regStackIndexPlusOne [i_register] = c_m3RegisterUnallocated;
443
483
}
444
445
static inline
446
u16  GetRegisterStackIndex  (IM3Compilation o, u32 i_register)
447
106
{                                                                                       d_m3Assert (IsRegisterAllocated (o, i_register));
448
106
    return o->regStackIndexPlusOne [i_register] - 1;
449
106
}
450
451
u16  GetMaxUsedSlotPlusOne  (IM3Compilation o)
452
204
{
453
5.47k
    while (o->slotMaxAllocatedIndexPlusOne > o->slotFirstDynamicIndex)
454
5.40k
    {
455
5.40k
        if (IsSlotAllocated (o, o->slotMaxAllocatedIndexPlusOne - 1))
456
135
            break;
457
458
5.27k
        o->slotMaxAllocatedIndexPlusOne--;
459
5.27k
    }
460
461
#   ifdef DEBUG
462
        u16 maxSlot = o->slotMaxAllocatedIndexPlusOne;
463
        while (maxSlot < d_m3MaxFunctionSlots)
464
        {
465
            d_m3Assert (o->m3Slots [maxSlot] == 0);
466
            maxSlot++;
467
        }
468
#   endif
469
470
204
    return o->slotMaxAllocatedIndexPlusOne;
471
204
}
472
473
static
474
M3Result  PreserveRegisterIfOccupied  (IM3Compilation o, u8 i_registerType)
475
1.38k
{
476
1.38k
    M3Result result = m3Err_none;
477
478
1.38k
    u32 regSelect = IsFpType (i_registerType);
479
480
1.38k
    if (IsRegisterAllocated (o, regSelect))
481
104
    {
482
104
        u16 stackIndex = GetRegisterStackIndex (o, regSelect);
483
104
        DeallocateRegister (o, regSelect);
484
485
104
        u8 type = GetStackTypeFromBottom (o, stackIndex);
486
487
        // and point to a exec slot
488
104
        u16 slot = c_slotUnused;
489
104
_       (AllocateSlots (o, & slot, type));
490
104
        o->wasmStack [stackIndex] = slot;
491
492
104
_       (EmitOp (o, c_setSetOps [type]));
493
104
        EmitSlotOffset (o, slot);
494
104
    }
495
496
1.38k
    _catch: return result;
497
1.38k
}
498
499
500
// all values must be in slots before entering loop, if, and else blocks
501
// otherwise they'd end up preserve-copied in the block to probably different locations (if/else)
502
static inline
503
M3Result  PreserveRegisters  (IM3Compilation o)
504
451
{
505
451
    M3Result result;
506
507
451
_   (PreserveRegisterIfOccupied (o, c_m3Type_f64));
508
451
_   (PreserveRegisterIfOccupied (o, c_m3Type_i64));
509
510
451
    _catch: return result;
511
451
}
512
513
static
514
M3Result  PreserveNonTopRegisters  (IM3Compilation o)
515
38
{
516
38
    M3Result result = m3Err_none;
517
518
38
    i16 stackTop = GetStackTopIndex (o);
519
520
38
    if (stackTop >= 0)
521
38
    {
522
38
        if (IsRegisterAllocated (o, 0))     // r0
523
2
        {
524
2
            if (GetRegisterStackIndex (o, 0) != stackTop)
525
2
_               (PreserveRegisterIfOccupied (o, c_m3Type_i64));
526
2
        }
527
528
38
        if (IsRegisterAllocated (o, 1))     // fp0
529
0
        {
530
0
            if (GetRegisterStackIndex (o, 1) != stackTop)
531
0
_               (PreserveRegisterIfOccupied (o, c_m3Type_f64));
532
0
        }
533
38
    }
534
535
38
    _catch: return result;
536
38
}
537
538
539
//----------------------------------------------------------------------------------------------------------------------
540
541
static
542
M3Result  Push  (IM3Compilation o, u8 i_type, u16 i_slot)
543
63.7k
{
544
63.7k
    M3Result result = m3Err_none;
545
546
#if !d_m3HasFloat
547
    if (i_type == c_m3Type_f32 || i_type == c_m3Type_f64) {
548
        return m3Err_unknownOpcode;
549
    }
550
#endif
551
552
63.7k
    u16 stackIndex = o->stackIndex++;                                       // printf ("push: %d\n", (i32) i);
553
554
63.7k
    if (stackIndex < d_m3MaxFunctionStackHeight)
555
62.7k
    {
556
62.7k
        o->wasmStack        [stackIndex] = i_slot;
557
62.7k
        o->typeStack        [stackIndex] = i_type;
558
559
62.7k
        if (IsRegisterSlotAlias (i_slot))
560
502
        {
561
502
            u32 regSelect = IsFpRegisterSlotAlias (i_slot);
562
502
            AllocateRegister (o, regSelect, stackIndex);
563
502
        }
564
565
62.7k
        if (d_m3LogWasmStack) dump_type_stack (o);
566
62.7k
    }
567
942
    else result = m3Err_functionStackOverflow;
568
569
63.7k
    return result;
570
63.7k
}
571
572
static inline
573
M3Result  PushRegister  (IM3Compilation o, u8 i_type)
574
507
{
575
507
    M3Result result = m3Err_none;                                                       d_m3Assert ((u16) d_m3Reg0SlotAlias > (u16) d_m3MaxFunctionSlots);
576
507
    u16 slot = IsFpType (i_type) ? d_m3Fp0SlotAlias : d_m3Reg0SlotAlias;                d_m3Assert (i_type or IsStackPolymorphic (o));
577
578
507
_   (Push (o, i_type, slot));
579
580
507
    _catch: return result;
581
502
}
582
583
static
584
M3Result  Pop  (IM3Compilation o)
585
45.7k
{
586
45.7k
    M3Result result = m3Err_none;
587
588
45.7k
    if (o->stackIndex > o->block.blockStackIndex)
589
42.6k
    {
590
42.6k
        o->stackIndex--;                                                //  printf ("pop: %d\n", (i32) o->stackIndex);
591
592
42.6k
        u16 slot = o->wasmStack [o->stackIndex];
593
42.6k
        u8 type = o->typeStack [o->stackIndex];
594
595
42.6k
        if (IsRegisterSlotAlias (slot))
596
379
        {
597
379
            u32 regSelect = IsFpRegisterSlotAlias (slot);
598
379
            DeallocateRegister (o, regSelect);
599
379
        }
600
42.2k
        else if (slot >= o->slotFirstDynamicIndex)
601
41.4k
        {
602
41.4k
            DeallocateSlot (o, slot, type);
603
41.4k
        }
604
42.6k
    }
605
3.07k
    else if (not IsStackPolymorphic (o))
606
0
        result = m3Err_functionStackUnderrun;
607
608
45.7k
    return result;
609
45.7k
}
610
611
static
612
M3Result  PopType  (IM3Compilation o, u8 i_type)
613
13.8k
{
614
13.8k
    M3Result result = m3Err_none;
615
616
13.8k
    u8 topType = GetStackTopType (o);
617
618
13.8k
    if (i_type == topType or o->block.isPolymorphic)
619
13.8k
    {
620
13.8k
_       (Pop (o));
621
13.8k
    }
622
13.8k
    else _throw (m3Err_typeMismatch);
623
624
13.8k
    _catch:
625
13.8k
    return result;
626
13.8k
}
627
628
static
629
M3Result  _PushAllocatedSlotAndEmit  (IM3Compilation o, u8 i_type, bool i_doEmit)
630
37.4k
{
631
37.4k
    M3Result result = m3Err_none;
632
633
37.4k
    u16 slot = c_slotUnused;
634
635
37.4k
_   (AllocateSlots (o, & slot, i_type));
636
37.4k
_   (Push (o, i_type, slot));
637
638
37.4k
    if (i_doEmit)
639
9.96k
        EmitSlotOffset (o, slot);
640
641
//    printf ("push: %d\n", (u32) slot);
642
643
37.4k
    _catch: return result;
644
37.4k
}
645
646
static inline
647
M3Result  PushAllocatedSlotAndEmit  (IM3Compilation o, u8 i_type)
648
9.96k
{
649
9.96k
    return _PushAllocatedSlotAndEmit (o, i_type, true);
650
9.96k
}
651
652
static inline
653
M3Result  PushAllocatedSlot  (IM3Compilation o, u8 i_type)
654
27.5k
{
655
27.5k
    return _PushAllocatedSlotAndEmit (o, i_type, false);
656
27.5k
}
657
658
static
659
M3Result  PushConst  (IM3Compilation o, u64 i_word, u8 i_type)
660
29.7k
{
661
29.7k
    M3Result result = m3Err_none;
662
663
    // Early-exit if we're not emitting
664
29.7k
    if (!o->page) return result;
665
666
29.7k
    bool matchFound = false;
667
8.96k
    bool is64BitType = Is64BitType (i_type);
668
669
8.96k
    u16 numRequiredSlots = GetTypeNumSlots (i_type);
670
8.96k
    u16 numUsedConstSlots = o->slotMaxConstIndex - o->slotFirstConstIndex;
671
672
    // search for duplicate matching constant slot to reuse
673
8.96k
    if (numRequiredSlots == 2 and numUsedConstSlots >= 2)
674
3
    {
675
3
        u16 firstConstSlot = o->slotFirstConstIndex;
676
3
        AlignSlotToType (& firstConstSlot, c_m3Type_i64);
677
678
6
        for (u16 slot = firstConstSlot; slot < o->slotMaxConstIndex - 1; slot += 2)
679
3
        {
680
3
            if (IsSlotAllocated (o, slot) and IsSlotAllocated (o, slot + 1))
681
3
            {
682
3
                u64 constant;
683
3
                memcpy (&constant, &o->constants [slot - o->slotFirstConstIndex], sizeof(constant));
684
685
3
                if (constant == i_word)
686
0
                {
687
0
                    matchFound = true;
688
0
_                   (Push (o, i_type, slot));
689
0
                    break;
690
0
                }
691
3
            }
692
3
        }
693
3
    }
694
8.95k
    else if (numRequiredSlots == 1)
695
4.12k
    {
696
4.12k
        for (u16 i = 0; i < numUsedConstSlots; ++i)
697
1
        {
698
1
            u16 slot = o->slotFirstConstIndex + i;
699
700
1
            if (IsSlotAllocated (o, slot))
701
1
            {
702
1
                bool matches;
703
1
                if (is64BitType) {
704
0
                    u64 constant;
705
0
                    memcpy (&constant, &o->constants [i], sizeof(constant));
706
0
                    matches = (constant == i_word);
707
1
                } else {
708
1
                    u32 constant;
709
1
                    memcpy (&constant, &o->constants [i], sizeof(constant));
710
1
                    matches = (constant == i_word);
711
1
                }
712
1
                if (matches)
713
0
                {
714
0
                    matchFound = true;
715
0
_                   (Push (o, i_type, slot));
716
0
                    break;
717
0
                }
718
1
            }
719
1
        }
720
4.12k
    }
721
722
8.96k
    if (not matchFound)
723
8.96k
    {
724
8.96k
        u16 slot = c_slotUnused;
725
8.96k
        result = AllocateConstantSlots (o, & slot, i_type);
726
727
8.96k
        if (result || slot == c_slotUnused) // no more constant table space; use inline constants
728
8.94k
        {
729
8.94k
            result = m3Err_none;
730
731
8.94k
            if (is64BitType) {
732
4.82k
_               (EmitOp (o, op_Const64));
733
4.82k
                EmitWord64 (o->page, i_word);
734
4.82k
            } else {
735
4.11k
_               (EmitOp (o, op_Const32));
736
4.11k
                EmitWord32 (o->page, (u32) i_word);
737
4.11k
            }
738
739
8.94k
_           (PushAllocatedSlotAndEmit (o, i_type));
740
8.94k
        }
741
20
        else
742
20
        {
743
20
            u16 constTableIndex = slot - o->slotFirstConstIndex;
744
745
20
            d_m3Assert(constTableIndex < d_m3MaxConstantTableSize);
746
747
20
            if (is64BitType) {
748
9
                memcpy (& o->constants [constTableIndex], &i_word, sizeof(i_word));
749
11
            } else {
750
11
                u32 word32 = i_word;
751
11
                memcpy (& o->constants [constTableIndex], &word32, sizeof(word32));
752
11
            }
753
754
20
_           (Push (o, i_type, slot));
755
756
20
            o->slotMaxConstIndex = M3_MAX (slot + numRequiredSlots, o->slotMaxConstIndex);
757
20
        }
758
8.96k
    }
759
760
8.96k
    _catch: return result;
761
8.96k
}
762
763
static inline
764
M3Result  EmitSlotNumOfStackTopAndPop  (IM3Compilation o)
765
755
{
766
    // no emit if value is in register
767
755
    if (IsStackTopInSlot (o))
768
503
        EmitSlotOffset (o, GetStackTopSlotNumber (o));
769
770
755
    return Pop (o);
771
755
}
772
773
774
// Or, maybe: EmitTrappingOp
775
M3Result  AddTrapRecord  (IM3Compilation o)
776
212
{
777
212
    M3Result result = m3Err_none;
778
779
212
    if (o->function)
780
212
    {
781
212
    }
782
783
212
    return result;
784
212
}
785
786
static
787
M3Result  UnwindBlockStack  (IM3Compilation o)
788
267
{
789
267
    M3Result result = m3Err_none;
790
791
267
    u32 popCount = 0;
792
5.61k
    while (o->stackIndex > o->block.blockStackIndex)
793
5.34k
    {
794
5.34k
_       (Pop (o));
795
5.34k
        ++popCount;
796
5.34k
    }
797
798
267
    if (popCount)
799
122
    {
800
122
        m3log (compile, "unwound stack top: %d", popCount);
801
122
    }
802
803
267
    _catch: return result;
804
267
}
805
806
static inline
807
M3Result  SetStackPolymorphic  (IM3Compilation o)
808
213
{
809
213
    o->block.isPolymorphic = true;                              m3log (compile, "stack set polymorphic");
810
213
    return UnwindBlockStack (o);
811
213
}
812
813
static
814
void  PatchBranches  (IM3Compilation o)
815
213
{
816
213
    pc_t pc = GetPC (o);
817
818
213
    pc_t patches = o->block.patches;
819
213
    o->block.patches = NULL;
820
821
213
    while (patches)
822
0
    {                                                           m3log (compile, "patching location: %p to pc: %p", patches, pc);
823
0
        pc_t next = * (pc_t *) patches;
824
0
        * (pc_t *) patches = pc;
825
0
        patches = next;
826
0
    }
827
213
}
828
829
//-------------------------------------------------------------------------------------------------------------------------
830
831
static
832
M3Result  CopyStackIndexToSlot  (IM3Compilation o, u16 i_destSlot, u16 i_stackIndex)  // NoPushPop
833
37.7k
{
834
37.7k
    M3Result result = m3Err_none;
835
836
37.7k
    IM3Operation op;
837
838
37.7k
    u8 type = GetStackTypeFromBottom (o, i_stackIndex);
839
37.7k
    bool inRegister = IsStackIndexInRegister (o, i_stackIndex);
840
841
37.7k
    if (inRegister)
842
130
    {
843
130
        op = c_setSetOps [type];
844
130
    }
845
37.6k
    else op = Is64BitType (type) ? op_CopySlot_64 : op_CopySlot_32;
846
847
37.7k
_   (EmitOp (o, op));
848
37.7k
    EmitSlotOffset (o, i_destSlot);
849
850
37.7k
    if (not inRegister)
851
37.6k
    {
852
37.6k
        u16 srcSlot = GetSlotForStackIndex (o, i_stackIndex);
853
37.6k
        EmitSlotOffset (o, srcSlot);
854
37.6k
    }
855
856
37.7k
    _catch: return result;
857
37.7k
}
858
859
static
860
M3Result  CopyStackTopToSlot  (IM3Compilation o, u16 i_destSlot)  // NoPushPop
861
1.31k
{
862
1.31k
    M3Result result;
863
864
1.31k
    i16 stackTop = GetStackTopIndex (o);
865
1.31k
_   (CopyStackIndexToSlot (o, i_destSlot, (u16) stackTop));
866
867
1.31k
    _catch: return result;
868
1.31k
}
869
870
871
// a copy-on-write strategy is used with locals. when a get local occurs, it's not copied anywhere. the stack
872
// entry just has a index pointer to that local memory slot.
873
// then, when a previously referenced local is set, the current value needs to be preserved for those references
874
875
// TODO: consider getting rid of these specialized operations: PreserveSetSlot & PreserveCopySlot.
876
// They likely just take up space (which seems to reduce performance) without improving performance.
877
static
878
M3Result  PreservedCopyTopSlot  (IM3Compilation o, u16 i_destSlot, u16 i_preserveSlot)
879
2
{
880
2
    M3Result result = m3Err_none;             d_m3Assert (i_destSlot != i_preserveSlot);
881
882
2
    IM3Operation op;
883
884
2
    u8 type = GetStackTopType (o);
885
886
2
    if (IsStackTopInRegister (o))
887
2
    {
888
2
        op = c_preserveSetSlot [type];
889
2
    }
890
0
    else op = Is64BitType (type) ? op_PreserveCopySlot_64 : op_PreserveCopySlot_32;
891
892
2
_   (EmitOp (o, op));
893
2
    EmitSlotOffset (o, i_destSlot);
894
895
2
    if (IsStackTopInSlot (o))
896
0
        EmitSlotOffset (o, GetStackTopSlotNumber (o));
897
898
2
    EmitSlotOffset (o, i_preserveSlot);
899
900
2
    _catch: return result;
901
2
}
902
903
static
904
M3Result  CopyStackTopToRegister  (IM3Compilation o, bool i_updateStack)
905
275
{
906
275
    M3Result result = m3Err_none;
907
908
275
    if (IsStackTopInSlot (o))
909
213
    {
910
213
        u8 type = GetStackTopType (o);
911
912
213
_       (PreserveRegisterIfOccupied (o, type));
913
914
213
        IM3Operation op = c_setRegisterOps [type];
915
916
213
_       (EmitOp (o, op));
917
213
        EmitSlotOffset (o, GetStackTopSlotNumber (o));
918
919
213
        if (i_updateStack)
920
0
        {
921
0
_           (PopType (o, type));
922
0
_           (PushRegister (o, type));
923
0
        }
924
213
    }
925
926
275
    _catch: return result;
927
275
}
928
929
930
// if local is unreferenced, o_preservedSlotNumber will be equal to localIndex on return
931
static
932
M3Result  FindReferencedLocalWithinCurrentBlock  (IM3Compilation o, u16 * o_preservedSlotNumber, u32 i_localSlot)
933
13.9k
{
934
13.9k
    M3Result result = m3Err_none;
935
936
13.9k
    IM3CompilationScope scope = & o->block;
937
13.9k
    u16 startIndex = scope->blockStackIndex;
938
939
14.1k
    while (scope->opcode == c_waOp_block)
940
192
    {
941
192
        scope = scope->outer;
942
192
        if (not scope)
943
0
            break;
944
945
192
        startIndex = scope->blockStackIndex;
946
192
    }
947
948
13.9k
    * o_preservedSlotNumber = (u16) i_localSlot;
949
950
409k
    for (u32 i = startIndex; i < o->stackIndex; ++i)
951
395k
    {
952
395k
        if (o->wasmStack [i] == i_localSlot)
953
2
        {
954
2
            if (* o_preservedSlotNumber == i_localSlot)
955
2
            {
956
2
                u8 type = GetStackTypeFromBottom (o, i);                    d_m3Assert (type != c_m3Type_none)
957
958
2
_               (AllocateSlots (o, o_preservedSlotNumber, type));
959
2
            }
960
0
            else
961
2
_               (IncrementSlotUsageCount (o, * o_preservedSlotNumber));
962
963
2
            o->wasmStack [i] = * o_preservedSlotNumber;
964
2
        }
965
395k
    }
966
967
13.9k
    _catch: return result;
968
13.9k
}
969
970
static
971
M3Result  GetBlockScope  (IM3Compilation o, IM3CompilationScope * o_scope, u32 i_depth)
972
359
{
973
359
    M3Result result = m3Err_none;
974
975
359
    IM3CompilationScope scope = & o->block;
976
977
373
    while (i_depth--)
978
18
    {
979
18
        scope = scope->outer;
980
18
        _throwif ("invalid block depth", not scope);
981
14
    }
982
983
355
    * o_scope = scope;
984
985
359
    _catch:
986
359
    return result;
987
355
}
988
989
static
990
M3Result  CopyStackSlotsR  (IM3Compilation o, u16 i_targetSlotStackIndex, u16 i_stackIndex, u16 i_endStackIndex, u16 i_tempSlot)
991
20.4k
{
992
20.4k
    M3Result result = m3Err_none;
993
994
20.4k
    if (i_stackIndex < i_endStackIndex)
995
20.1k
    {
996
20.1k
        u16 srcSlot = GetSlotForStackIndex (o, i_stackIndex);
997
998
20.1k
        u8 type = GetStackTypeFromBottom (o, i_stackIndex);
999
20.1k
        u16 numSlots = GetTypeNumSlots (type);
1000
20.1k
        u16 extraSlot = numSlots - 1;
1001
1002
20.1k
        u16 targetSlot = GetSlotForStackIndex (o, i_targetSlotStackIndex);
1003
1004
20.1k
        u16 preserveIndex = i_stackIndex;
1005
20.1k
        u16 collisionSlot = srcSlot;
1006
1007
20.1k
        if (targetSlot != srcSlot)
1008
19.1k
        {
1009
            // search for collisions
1010
19.1k
            u16 checkIndex = i_stackIndex + 1;
1011
161k
            while (checkIndex < i_endStackIndex)
1012
155k
            {
1013
155k
                u16 otherSlot1 = GetSlotForStackIndex (o, checkIndex);
1014
155k
                u16 otherSlot2 = GetExtraSlotForStackIndex (o, checkIndex);
1015
1016
155k
                if (targetSlot == otherSlot1 or
1017
142k
                    targetSlot == otherSlot2 or
1018
142k
                    targetSlot + extraSlot == otherSlot1)
1019
13.6k
                {
1020
13.6k
                    _throwif (m3Err_functionStackOverflow, i_tempSlot >= d_m3MaxFunctionSlots);
1021
1022
13.5k
_                   (CopyStackIndexToSlot (o, i_tempSlot, checkIndex));
1023
13.5k
                    o->wasmStack [checkIndex] = i_tempSlot;
1024
13.5k
                    i_tempSlot += GetTypeNumSlots (c_m3Type_i64);
1025
13.5k
                    TouchSlot (o, i_tempSlot - 1);
1026
1027
                    // restore this on the way back down
1028
13.5k
                    preserveIndex = checkIndex;
1029
13.5k
                    collisionSlot = otherSlot1;
1030
1031
13.5k
                    break;
1032
13.5k
                }
1033
1034
142k
                ++checkIndex;
1035
142k
            }
1036
1037
19.1k
_           (CopyStackIndexToSlot (o, targetSlot, i_stackIndex));                                               m3log (compile, " copying slot: %d to slot: %d", srcSlot, targetSlot);
1038
19.1k
            o->wasmStack [i_stackIndex] = targetSlot;
1039
1040
19.1k
        }
1041
1042
20.1k
_       (CopyStackSlotsR (o, i_targetSlotStackIndex + 1, i_stackIndex + 1, i_endStackIndex, i_tempSlot));
1043
1044
        // restore the stack state
1045
20.0k
        o->wasmStack [i_stackIndex] = srcSlot;
1046
20.0k
        o->wasmStack [preserveIndex] = collisionSlot;
1047
20.0k
    }
1048
1049
20.4k
    _catch:
1050
20.4k
    return result;
1051
20.4k
}
1052
1053
static
1054
M3Result  ResolveBlockResults  (IM3Compilation o, IM3CompilationScope i_targetBlock, bool i_isBranch)
1055
296
{
1056
296
    M3Result result = m3Err_none;                                   if (d_m3LogWasmStack) dump_type_stack (o);
1057
1058
296
    bool isLoop = (i_targetBlock->opcode == c_waOp_loop and i_isBranch);
1059
1060
296
    u16 numParams = GetFuncTypeNumParams (i_targetBlock->type);
1061
296
    u16 numResults = GetFuncTypeNumResults (i_targetBlock->type);
1062
1063
296
    u16 slotRecords = i_targetBlock->exitStackIndex;
1064
1065
296
    u16 numValues;
1066
1067
296
    if (not isLoop)
1068
294
    {
1069
294
        numValues = numResults;
1070
294
        slotRecords += numParams;
1071
294
    }
1072
2
    else numValues = numParams;
1073
1074
296
    u16 blockHeight = GetNumBlockValuesOnStack (o);
1075
1076
296
    _throwif (m3Err_typeCountMismatch, i_isBranch ? (blockHeight < numValues) : (blockHeight != numValues));
1077
1078
295
    if (numValues)
1079
295
    {
1080
295
        u16 endIndex = GetStackTopIndex (o) + 1;
1081
295
        u16 numRemValues = numValues;
1082
1083
        // The last result is taken from _fp0. See PushBlockResults.
1084
295
        if (not isLoop and IsFpType (GetStackTopType (o)))
1085
216
        {
1086
216
_           (CopyStackTopToRegister (o, false));
1087
216
            --endIndex;
1088
216
            --numRemValues;
1089
216
        }
1090
1091
        // TODO: tempslot affects maxStackSlots, so can grow unnecess each time.
1092
295
        u16 tempSlot = o->maxStackSlots;// GetMaxUsedSlotPlusOne (o); doesn't work cause can collide with slotRecords
1093
295
        AlignSlotToType (& tempSlot, c_m3Type_i64);
1094
1095
295
_       (CopyStackSlotsR (o, slotRecords, endIndex - numRemValues, endIndex, tempSlot));
1096
1097
288
        if (d_m3LogWasmStack) dump_type_stack (o);
1098
288
    }
1099
1100
296
    _catch: return result;
1101
295
}
1102
1103
1104
static
1105
M3Result  ReturnValues  (IM3Compilation o, IM3CompilationScope i_functionBlock, bool i_isBranch)
1106
65
{
1107
65
    M3Result result = m3Err_none;                                               if (d_m3LogWasmStack) dump_type_stack (o);
1108
1109
65
    u16 numReturns = GetFuncTypeNumResults (i_functionBlock->type);     // could just o->function too...
1110
65
    u16 blockHeight = GetNumBlockValuesOnStack (o);
1111
1112
65
    if (not IsStackPolymorphic (o))
1113
64
        _throwif (m3Err_typeCountMismatch, i_isBranch ? (blockHeight < numReturns) : (blockHeight != numReturns));
1114
1115
64
    if (numReturns)
1116
64
    {
1117
        // return slots like args are 64-bit aligned
1118
64
        u16 returnSlot = numReturns * c_ioSlotCount;
1119
64
        u16 stackTop = GetStackTopIndex (o);
1120
1121
4.12k
        for (u16 i = 0; i < numReturns; ++i)
1122
4.05k
        {
1123
4.05k
            u8 returnType = GetFuncTypeResultType (i_functionBlock->type, numReturns - 1 - i);
1124
1125
4.05k
            u8 stackType = GetStackTypeFromTop (o, i);  // using FromTop so that only dynamic items are checked
1126
1127
4.05k
            if (IsStackPolymorphic (o) and stackType == c_m3Type_none)
1128
228
                stackType = returnType;
1129
1130
4.05k
            _throwif (m3Err_typeMismatch, returnType != stackType);
1131
1132
4.05k
            if (not IsStackPolymorphic (o))
1133
3.67k
            {
1134
3.67k
                returnSlot -= c_ioSlotCount;
1135
3.67k
_               (CopyStackIndexToSlot (o, returnSlot, stackTop--));
1136
3.67k
            }
1137
4.05k
        }
1138
1139
63
        if (not i_isBranch)
1140
5
        {
1141
384
            while (numReturns--)
1142
379
_               (Pop (o));
1143
5
        }
1144
63
    }
1145
1146
65
    _catch: return result;
1147
64
}
1148
1149
1150
//-------------------------------------------------------------------------------------------------------------------------
1151
1152
static
1153
M3Result  Compile_Const_i32  (IM3Compilation o, m3opcode_t i_opcode)
1154
13.1k
{
1155
13.1k
    M3Result result;
1156
1157
13.1k
    i32 value;
1158
13.1k
_   (ReadLEB_i32 (& value, & o->wasm, o->wasmEnd));
1159
13.1k
_   (PushConst (o, value, c_m3Type_i32));                       m3log (compile, d_indent " (const i32 = %" PRIi32 ")", get_indention_string (o), value);
1160
13.1k
    _catch: return result;
1161
13.1k
}
1162
1163
static
1164
M3Result  Compile_Const_i64  (IM3Compilation o, m3opcode_t i_opcode)
1165
15.6k
{
1166
15.6k
    M3Result result;
1167
1168
15.6k
    i64 value;
1169
15.6k
_   (ReadLEB_i64 (& value, & o->wasm, o->wasmEnd));
1170
15.6k
_   (PushConst (o, value, c_m3Type_i64));                       m3log (compile, d_indent " (const i64 = %" PRIi64 ")", get_indention_string (o), value);
1171
15.6k
    _catch: return result;
1172
15.6k
}
1173
1174
1175
#if d_m3ImplementFloat
1176
static
1177
M3Result  Compile_Const_f32  (IM3Compilation o, m3opcode_t i_opcode)
1178
576
{
1179
576
    M3Result result;
1180
1181
576
    union { u32 u; f32 f; } value = { 0 };
1182
1183
576
_   (Read_f32 (& value.f, & o->wasm, o->wasmEnd));              m3log (compile, d_indent " (const f32 = %" PRIf32 ")", get_indention_string (o), value.f);
1184
576
_   (PushConst (o, value.u, c_m3Type_f32));
1185
1186
576
    _catch: return result;
1187
576
}
1188
1189
static
1190
M3Result  Compile_Const_f64  (IM3Compilation o, m3opcode_t i_opcode)
1191
352
{
1192
352
    M3Result result;
1193
1194
352
    union { u64 u; f64 f; } value = { 0 };
1195
1196
352
_   (Read_f64 (& value.f, & o->wasm, o->wasmEnd));              m3log (compile, d_indent " (const f64 = %" PRIf64 ")", get_indention_string (o), value.f);
1197
352
_   (PushConst (o, value.u, c_m3Type_f64));
1198
1199
352
    _catch: return result;
1200
352
}
1201
#endif
1202
1203
#if d_m3CascadedOpcodes
1204
1205
static
1206
M3Result  Compile_ExtendedOpcode  (IM3Compilation o, m3opcode_t i_opcode)
1207
7
{
1208
7
_try {
1209
7
    u8 opcode;
1210
7
_   (Read_u8 (& opcode, & o->wasm, o->wasmEnd));             m3log (compile, d_indent " (FC: %" PRIi32 ")", get_indention_string (o), opcode);
1211
1212
7
    i_opcode = (i_opcode << 8) | opcode;
1213
1214
    //printf("Extended opcode: 0x%x\n", i_opcode);
1215
1216
7
    IM3OpInfo opInfo = GetOpInfo (i_opcode);
1217
7
    _throwif (m3Err_unknownOpcode, not opInfo);
1218
1219
7
    M3Compiler compiler = opInfo->compiler;
1220
7
    _throwif (m3Err_noCompiler, not compiler);
1221
1222
7
_   ((* compiler) (o, i_opcode));
1223
1224
7
    o->previousOpcode = i_opcode;
1225
1226
7
    } _catch: return result;
1227
7
}
1228
#endif
1229
1230
static
1231
M3Result  Compile_Return  (IM3Compilation o, m3opcode_t i_opcode)
1232
1
{
1233
1
    M3Result result = m3Err_none;
1234
1235
1
    if (not IsStackPolymorphic (o))
1236
1
    {
1237
1
        IM3CompilationScope functionScope;
1238
1
_       (GetBlockScope (o, & functionScope, o->block.depth));
1239
1240
1
_       (ReturnValues (o, functionScope, true));
1241
1242
1
_       (EmitOp (o, op_Return));
1243
1244
1
_       (SetStackPolymorphic (o));
1245
1
    }
1246
1247
1
    _catch: return result;
1248
1
}
1249
1250
static
1251
M3Result  ValidateBlockEnd  (IM3Compilation o)
1252
735
{
1253
735
    M3Result result = m3Err_none;
1254
/*
1255
    u16 numResults = GetFuncTypeNumResults (o->block.type);
1256
    u16 blockHeight = GetNumBlockValuesOnStack (o);
1257
1258
    if (IsStackPolymorphic (o))
1259
    {
1260
    }
1261
    else
1262
    {
1263
    }
1264
1265
735
    _catch: */ return result;
1266
735
}
1267
1268
static
1269
M3Result  Compile_End  (IM3Compilation o, m3opcode_t i_opcode)
1270
576
{
1271
576
    M3Result result = m3Err_none;                   //dump_type_stack (o);
1272
1273
    // function end:
1274
576
    if (o->block.depth == 0)
1275
522
    {
1276
522
        ValidateBlockEnd (o);
1277
1278
//      if (not IsStackPolymorphic (o))
1279
522
        {
1280
522
            if (o->function)
1281
6
            {
1282
6
_               (ReturnValues (o, & o->block, false));
1283
5
            }
1284
1285
521
_           (EmitOp (o, op_Return));
1286
521
        }
1287
521
    }
1288
1289
576
    _catch: return result;
1290
576
}
1291
1292
1293
static
1294
M3Result  Compile_SetLocal  (IM3Compilation o, m3opcode_t i_opcode)
1295
8
{
1296
8
    M3Result result;
1297
1298
8
    u32 localIndex;
1299
8
_   (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd));             //  printf ("--- set local: %d \n", localSlot);
1300
1301
8
    if (localIndex < GetFunctionNumArgsAndLocals (o->function))
1302
6
    {
1303
6
        u16 localSlot = GetSlotForStackIndex (o, localIndex);
1304
1305
6
        u16 preserveSlot;
1306
6
_       (FindReferencedLocalWithinCurrentBlock (o, & preserveSlot, localSlot));  // preserve will be different than local, if referenced
1307
1308
6
        if (preserveSlot == localSlot)
1309
4
_           (CopyStackTopToSlot (o, localSlot))
1310
2
        else
1311
2
_           (PreservedCopyTopSlot (o, localSlot, preserveSlot))
1312
1313
6
        if (i_opcode != c_waOp_teeLocal)
1314
6
_           (Pop (o));
1315
6
    }
1316
6
    else _throw ("local index out of bounds");
1317
1318
8
    _catch: return result;
1319
6
}
1320
1321
static
1322
M3Result  Compile_GetLocal  (IM3Compilation o, m3opcode_t i_opcode)
1323
11
{
1324
11
_try {
1325
1326
11
    u32 localIndex;
1327
11
_   (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd));
1328
1329
11
    if (localIndex >= GetFunctionNumArgsAndLocals (o->function))
1330
11
        _throw ("local index out of bounds");
1331
1332
11
    u8 type = GetStackTypeFromBottom (o, localIndex);
1333
11
    u16 slot = GetSlotForStackIndex (o, localIndex);
1334
1335
11
_   (Push (o, type, slot));
1336
1337
11
    } _catch: return result;
1338
11
}
1339
1340
static
1341
M3Result  Compile_GetGlobal  (IM3Compilation o, M3Global * i_global)
1342
1.01k
{
1343
1.01k
    M3Result result;
1344
1345
1.01k
    IM3Operation op = Is64BitType (i_global->type) ? op_GetGlobal_s64 : op_GetGlobal_s32;
1346
1.01k
_   (EmitOp (o, op));
1347
1.01k
    EmitPointer (o, & i_global->i64Value);
1348
1.01k
_   (PushAllocatedSlotAndEmit (o, i_global->type));
1349
1350
1.01k
    _catch: return result;
1351
1.01k
}
1352
1353
static
1354
M3Result  Compile_SetGlobal  (IM3Compilation o, M3Global * i_global)
1355
0
{
1356
0
    M3Result result = m3Err_none;
1357
1358
0
    if (i_global->isMutable)
1359
0
    {
1360
0
        IM3Operation op;
1361
0
        u8 type = GetStackTopType (o);
1362
1363
0
        if (IsStackTopInRegister (o))
1364
0
        {
1365
0
            op = c_setGlobalOps [type];
1366
0
        }
1367
0
        else op = Is64BitType (type) ? op_SetGlobal_s64 : op_SetGlobal_s32;
1368
1369
0
_      (EmitOp (o, op));
1370
0
        EmitPointer (o, & i_global->i64Value);
1371
1372
0
        if (IsStackTopInSlot (o))
1373
0
            EmitSlotOffset (o, GetStackTopSlotNumber (o));
1374
1375
0
_      (Pop (o));
1376
0
    }
1377
0
    else _throw (m3Err_settingImmutableGlobal);
1378
1379
0
    _catch: return result;
1380
0
}
1381
1382
static
1383
M3Result  Compile_GetSetGlobal  (IM3Compilation o, m3opcode_t i_opcode)
1384
1.01k
{
1385
1.01k
    M3Result result = m3Err_none;
1386
1387
1.01k
    u32 globalIndex;
1388
1.01k
_   (ReadLEB_u32 (& globalIndex, & o->wasm, o->wasmEnd));
1389
1390
1.01k
    if (globalIndex < o->module->numGlobals)
1391
1.01k
    {
1392
1.01k
        if (o->module->globals)
1393
1.01k
        {
1394
1.01k
            M3Global * global = & o->module->globals [globalIndex];
1395
1396
1.01k
_           ((i_opcode == c_waOp_getGlobal) ? Compile_GetGlobal (o, global) : Compile_SetGlobal (o, global));
1397
1.01k
        }
1398
1.01k
        else _throw (ErrorCompile (m3Err_globalMemoryNotAllocated, o, "module '%s' is missing global memory", o->module->name));
1399
1.01k
    }
1400
1.01k
    else _throw (m3Err_globaIndexOutOfBounds);
1401
1402
1.01k
    _catch: return result;
1403
1.01k
}
1404
1405
static
1406
void  EmitPatchingBranchPointer  (IM3Compilation o, IM3CompilationScope i_scope)
1407
286
{
1408
286
    pc_t patch = EmitPointer (o, i_scope->patches);                     m3log (compile, "branch patch required at: %p", patch);
1409
286
    i_scope->patches = patch;
1410
286
}
1411
1412
static
1413
M3Result  EmitPatchingBranch  (IM3Compilation o, IM3CompilationScope i_scope)
1414
286
{
1415
286
    M3Result result = m3Err_none;
1416
1417
286
_   (EmitOp (o, op_Branch));
1418
286
    EmitPatchingBranchPointer (o, i_scope);
1419
1420
286
    _catch: return result;
1421
286
}
1422
1423
static
1424
M3Result  Compile_Branch  (IM3Compilation o, m3opcode_t i_opcode)
1425
19
{
1426
19
    M3Result result;
1427
1428
19
    u32 depth;
1429
19
_   (ReadLEB_u32 (& depth, & o->wasm, o->wasmEnd));
1430
1431
19
    IM3CompilationScope scope;
1432
19
_   (GetBlockScope (o, & scope, depth));
1433
1434
    // branch target is a loop (continue)
1435
19
    if (scope->opcode == c_waOp_loop)
1436
3
    {
1437
3
        if (i_opcode == c_waOp_branchIf)
1438
3
        {
1439
3
            if (GetFuncTypeNumParams (scope->type))
1440
2
            {
1441
2
                IM3Operation op = IsStackTopInRegister (o) ? op_BranchIfPrologue_r : op_BranchIfPrologue_s;
1442
1443
2
_               (EmitOp (o, op));
1444
2
_               (EmitSlotNumOfStackTopAndPop (o));
1445
1446
2
                pc_t * jumpTo = (pc_t *) ReservePointer (o);
1447
1448
2
_               (ResolveBlockResults (o, scope, /* isBranch: */ true));
1449
1450
1
_               (EmitOp (o, op_ContinueLoop));
1451
1
                EmitPointer (o, scope->pc);
1452
1453
1
                * jumpTo = GetPC (o);
1454
1
            }
1455
1
            else
1456
1
            {
1457
                // move the condition to a register
1458
1
_               (CopyStackTopToRegister (o, false));
1459
1
_               (PopType (o, c_m3Type_i32));
1460
1461
1
_               (EmitOp (o, op_ContinueLoopIf));
1462
1
                EmitPointer (o, scope->pc);
1463
1
            }
1464
1465
//          dump_type_stack(o);
1466
3
        }
1467
0
        else // is c_waOp_branch
1468
0
        {
1469
0
    _       (EmitOp (o, op_ContinueLoop));
1470
0
            EmitPointer (o, scope->pc);
1471
0
            o->block.isPolymorphic = true;
1472
0
        }
1473
3
    }
1474
16
    else // forward branch
1475
16
    {
1476
16
        pc_t * jumpTo = NULL;
1477
1478
16
        bool isReturn = (scope->depth == 0);
1479
16
        bool targetHasResults = GetFuncTypeNumResults (scope->type);
1480
1481
16
        if (i_opcode == c_waOp_branchIf)
1482
16
        {
1483
16
            if (targetHasResults or isReturn)
1484
16
            {
1485
16
                IM3Operation op = IsStackTopInRegister (o) ? op_BranchIfPrologue_r : op_BranchIfPrologue_s;
1486
1487
16
    _           (EmitOp (o, op));
1488
16
    _           (EmitSlotNumOfStackTopAndPop (o)); // condition
1489
1490
                // this is continuation point, if the branch isn't taken
1491
16
                jumpTo = (pc_t *) ReservePointer (o);
1492
16
            }
1493
0
            else
1494
0
            {
1495
0
                IM3Operation op = IsStackTopInRegister (o) ? op_BranchIf_r : op_BranchIf_s;
1496
1497
0
    _           (EmitOp (o, op));
1498
0
    _           (EmitSlotNumOfStackTopAndPop (o)); // condition
1499
1500
0
                EmitPatchingBranchPointer (o, scope);
1501
0
                goto _catch;
1502
0
            }
1503
16
        }
1504
1505
16
        if (not IsStackPolymorphic (o))
1506
16
        {
1507
16
            if (isReturn)
1508
0
            {
1509
0
_               (ReturnValues (o, scope, true));
1510
0
_               (EmitOp (o, op_Return));
1511
0
            }
1512
16
            else
1513
16
            {
1514
16
_               (ResolveBlockResults (o, scope, true));
1515
16
_               (EmitPatchingBranch (o, scope));
1516
16
            }
1517
16
        }
1518
1519
16
        if (jumpTo)
1520
16
        {
1521
16
            * jumpTo = GetPC (o);
1522
16
        }
1523
1524
16
        if (i_opcode == c_waOp_branch)
1525
16
_           (SetStackPolymorphic (o));
1526
16
    }
1527
1528
19
    _catch: return result;
1529
19
}
1530
1531
static
1532
M3Result  Compile_BranchTable  (IM3Compilation o, m3opcode_t i_opcode)
1533
12
{
1534
12
_try {
1535
12
    u32 targetCount;
1536
12
_   (ReadLEB_u32 (& targetCount, & o->wasm, o->wasmEnd));
1537
1538
12
_   (PreserveRegisterIfOccupied (o, c_m3Type_i64));         // move branch operand to a slot
1539
12
    u16 slot = GetStackTopSlotNumber (o);
1540
12
_   (Pop (o));
1541
1542
    // OPTZ: according to spec: "forward branches that target a control instruction with a non-empty
1543
    // result type consume matching operands first and push them back on the operand stack after unwinding"
1544
    // So, this move-to-reg is only necessary if the target scopes have a type.
1545
1546
12
    u32 numCodeLines = targetCount + 4; // 3 => IM3Operation + slot + target_count + default_target
1547
12
_   (EnsureCodePageNumLines (o, numCodeLines));
1548
1549
12
_   (EmitOp (o, op_BranchTable));
1550
12
    EmitSlotOffset (o, slot);
1551
12
    EmitConstant32 (o, targetCount);
1552
1553
12
    IM3CodePage continueOpPage = NULL;
1554
1555
12
    ++targetCount; // include default
1556
339
    for (u32 i = 0; i < targetCount; ++i)
1557
339
    {
1558
339
        u32 target;
1559
339
_       (ReadLEB_u32 (& target, & o->wasm, o->wasmEnd));
1560
1561
339
        IM3CompilationScope scope;
1562
339
_       (GetBlockScope (o, & scope, target));
1563
1564
        // TODO: don't need codepage rigmarole for
1565
        // no-param forward-branch targets
1566
1567
335
_       (AcquireCompilationCodePage (o, & continueOpPage));
1568
1569
335
        pc_t startPC = GetPagePC (continueOpPage);
1570
335
        IM3CodePage savedPage = o->page;
1571
335
        o->page = continueOpPage;
1572
1573
335
        if (scope->opcode == c_waOp_loop)
1574
0
        {
1575
0
_           (ResolveBlockResults (o, scope, true));
1576
1577
0
_           (EmitOp (o, op_ContinueLoop));
1578
0
            EmitPointer (o, scope->pc);
1579
0
        }
1580
335
        else
1581
335
        {
1582
            // TODO: this could be fused with equivalent targets
1583
335
            if (not IsStackPolymorphic (o))
1584
335
            {
1585
335
                if (scope->depth == 0)
1586
58
                {
1587
58
_                   (ReturnValues (o, scope, true));
1588
57
_                   (EmitOp (o, op_Return));
1589
57
                }
1590
277
                else
1591
277
                {
1592
277
_                   (ResolveBlockResults (o, scope, true));
1593
1594
270
_                   (EmitPatchingBranch (o, scope));
1595
270
                }
1596
335
            }
1597
335
        }
1598
1599
327
        ReleaseCompilationCodePage (o);     // FIX: continueOpPage can get lost if thrown
1600
327
        o->page = savedPage;
1601
1602
327
        EmitPointer (o, startPC);
1603
327
    }
1604
1605
0
_   (SetStackPolymorphic (o));
1606
1607
0
    }
1608
1609
12
    _catch: return result;
1610
0
}
1611
1612
static
1613
M3Result  CompileCallArgsAndReturn  (IM3Compilation o, u16 * o_stackOffset, IM3FuncType i_type, bool i_isIndirect)
1614
160
{
1615
160
_try {
1616
1617
160
    u16 topSlot = GetMaxUsedSlotPlusOne (o);
1618
1619
    // force use of at least one stack slot; this is to help ensure
1620
    // the m3 stack overflows (and traps) before the native stack can overflow.
1621
    // e.g. see Wasm spec test 'runaway' in call.wast
1622
160
    topSlot = M3_MAX (1, topSlot);
1623
1624
    // stack frame is 64-bit aligned
1625
160
    AlignSlotToType (& topSlot, c_m3Type_i64);
1626
1627
160
    * o_stackOffset = topSlot;
1628
1629
    // wait to pop this here so that topSlot search is correct
1630
160
    if (i_isIndirect)
1631
160
_       (Pop (o));
1632
1633
160
    u16 numArgs = GetFuncTypeNumParams (i_type);
1634
160
    u16 numRets = GetFuncTypeNumResults (i_type);
1635
1636
160
    u16 argTop = topSlot + (numArgs + numRets) * c_ioSlotCount;
1637
1638
1.47k
    while (numArgs--)
1639
1.31k
    {
1640
1.31k
_       (CopyStackTopToSlot (o, argTop -= c_ioSlotCount));
1641
1.31k
_       (Pop (o));
1642
1.31k
    }
1643
1644
160
    u16 i = 0;
1645
12.0k
    while (numRets--)
1646
11.9k
    {
1647
11.9k
        u8 type = GetFuncTypeResultType (i_type, i++);
1648
1649
11.9k
_       (Push (o, type, topSlot));
1650
11.9k
        MarkSlotsAllocatedByType (o, topSlot, type);
1651
1652
11.9k
        topSlot += c_ioSlotCount;
1653
11.9k
    }
1654
1655
160
    } _catch: return result;
1656
160
}
1657
1658
static
1659
M3Result  Compile_Call  (IM3Compilation o, m3opcode_t i_opcode)
1660
136
{
1661
136
_try {
1662
136
    u32 functionIndex;
1663
136
_   (ReadLEB_u32 (& functionIndex, & o->wasm, o->wasmEnd));
1664
1665
136
    IM3Function function = Module_GetFunction (o->module, functionIndex);
1666
1667
136
    if (function)
1668
136
    {                                                                   m3log (compile, d_indent " (func= [%d] '%s'; args= %d)",
1669
136
                                                                                get_indention_string (o), functionIndex, m3_GetFunctionName (function), function->funcType->numArgs);
1670
136
        if (function->module)
1671
136
        {
1672
136
            u16 slotTop;
1673
136
_           (CompileCallArgsAndReturn (o, & slotTop, function->funcType, false));
1674
1675
136
            IM3Operation op;
1676
136
            const void * operand;
1677
1678
136
            if (function->compiled)
1679
0
            {
1680
0
                op = op_Call;
1681
0
                operand = function->compiled;
1682
0
            }
1683
136
            else
1684
136
            {
1685
136
                op = op_Compile;
1686
136
                operand = function;
1687
136
            }
1688
1689
136
_           (EmitOp     (o, op));
1690
136
            EmitPointer (o, operand);
1691
136
            EmitSlotOffset  (o, slotTop);
1692
136
        }
1693
0
        else
1694
0
        {
1695
0
            _throw (ErrorCompile (m3Err_functionImportMissing, o, "'%s.%s'", GetFunctionImportModuleName (function), m3_GetFunctionName (function)));
1696
0
        }
1697
136
    }
1698
136
    else _throw (m3Err_functionLookupFailed);
1699
1700
136
    } _catch: return result;
1701
136
}
1702
1703
static
1704
M3Result  Compile_CallIndirect  (IM3Compilation o, m3opcode_t i_opcode)
1705
24
{
1706
24
_try {
1707
24
    u32 typeIndex;
1708
24
_   (ReadLEB_u32 (& typeIndex, & o->wasm, o->wasmEnd));
1709
1710
24
    u32 tableIndex;
1711
24
_   (ReadLEB_u32 (& tableIndex, & o->wasm, o->wasmEnd));
1712
1713
24
    _throwif ("function call type index out of range", typeIndex >= o->module->numFuncTypes);
1714
1715
24
    if (IsStackTopInRegister (o))
1716
24
_       (PreserveRegisterIfOccupied (o, c_m3Type_i32));
1717
1718
24
    u16 tableIndexSlot = GetStackTopSlotNumber (o);
1719
1720
24
    u16 execTop;
1721
24
    IM3FuncType type = o->module->funcTypes [typeIndex];
1722
24
_   (CompileCallArgsAndReturn (o, & execTop, type, true));
1723
1724
22
_   (EmitOp         (o, op_CallIndirect));
1725
22
    EmitSlotOffset  (o, tableIndexSlot);
1726
22
    EmitPointer     (o, o->module);
1727
22
    EmitPointer     (o, type);              // TODO: unify all types in M3Environment
1728
22
    EmitSlotOffset  (o, execTop);
1729
1730
24
} _catch:
1731
24
    return result;
1732
22
}
1733
1734
static
1735
M3Result  Compile_Memory_Size  (IM3Compilation o, m3opcode_t i_opcode)
1736
0
{
1737
0
    M3Result result;
1738
1739
0
    i8 reserved;
1740
0
_   (ReadLEB_i7 (& reserved, & o->wasm, o->wasmEnd));
1741
1742
0
_   (PreserveRegisterIfOccupied (o, c_m3Type_i32));
1743
1744
0
_   (EmitOp     (o, op_MemSize));
1745
1746
0
_   (PushRegister (o, c_m3Type_i32));
1747
1748
0
    _catch: return result;
1749
0
}
1750
1751
static
1752
M3Result  Compile_Memory_Grow  (IM3Compilation o, m3opcode_t i_opcode)
1753
53
{
1754
53
    M3Result result;
1755
1756
53
    i8 reserved;
1757
53
_   (ReadLEB_i7 (& reserved, & o->wasm, o->wasmEnd));
1758
1759
53
_   (CopyStackTopToRegister (o, false));
1760
53
_   (PopType (o, c_m3Type_i32));
1761
1762
53
_   (EmitOp     (o, op_MemGrow));
1763
1764
53
_   (PushRegister (o, c_m3Type_i32));
1765
1766
53
    _catch: return result;
1767
49
}
1768
1769
static
1770
M3Result  Compile_Memory_CopyFill  (IM3Compilation o, m3opcode_t i_opcode)
1771
5
{
1772
5
    M3Result result = m3Err_none;
1773
1774
5
    u32 sourceMemoryIdx, targetMemoryIdx;
1775
5
    IM3Operation op;
1776
5
    if (i_opcode == c_waOp_memoryCopy)
1777
3
    {
1778
3
_       (ReadLEB_u32 (& sourceMemoryIdx, & o->wasm, o->wasmEnd));
1779
3
        op = op_MemCopy;
1780
3
    }
1781
2
    else op = op_MemFill;
1782
1783
5
_   (ReadLEB_u32 (& targetMemoryIdx, & o->wasm, o->wasmEnd));
1784
1785
5
_   (CopyStackTopToRegister (o, false));
1786
1787
5
_   (EmitOp  (o, op));
1788
5
_   (PopType (o, c_m3Type_i32));
1789
5
_   (EmitSlotNumOfStackTopAndPop (o));
1790
5
_   (EmitSlotNumOfStackTopAndPop (o));
1791
1792
5
    _catch: return result;
1793
5
}
1794
1795
1796
static
1797
M3Result  ReadBlockType  (IM3Compilation o, IM3FuncType * o_blockType)
1798
489
{
1799
489
    M3Result result;
1800
1801
489
    i64 type;
1802
489
_   (ReadLebSigned (& type, 33, & o->wasm, o->wasmEnd));
1803
1804
488
    if (type < 0)
1805
4
    {
1806
4
        u8 valueType;
1807
4
_       (NormalizeType (&valueType, type));                                m3log (compile, d_indent " (type: %s)", get_indention_string (o), c_waTypes [valueType]);
1808
2
        *o_blockType = o->module->environment->retFuncTypes[valueType];
1809
2
    }
1810
484
    else
1811
484
    {
1812
484
        _throwif("func type out of bounds", type >= o->module->numFuncTypes);
1813
483
        *o_blockType = o->module->funcTypes[type];                         m3log (compile, d_indent " (type: %s)", get_indention_string (o), SPrintFuncTypeSignature (*o_blockType));
1814
483
    }
1815
489
    _catch: return result;
1816
488
}
1817
1818
static
1819
M3Result  PreserveArgsAndLocals  (IM3Compilation o)
1820
489
{
1821
489
    M3Result result = m3Err_none;
1822
1823
489
    if (o->stackIndex > o->stackFirstDynamicIndex)
1824
484
    {
1825
484
        u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o->function);
1826
1827
14.4k
        for (u32 i = 0; i < numArgsAndLocals; ++i)
1828
13.9k
        {
1829
13.9k
            u16 slot = GetSlotForStackIndex (o, i);
1830
1831
13.9k
            u16 preservedSlotNumber;
1832
13.9k
_           (FindReferencedLocalWithinCurrentBlock (o, & preservedSlotNumber, slot));
1833
1834
13.9k
            if (preservedSlotNumber != slot)
1835
0
            {
1836
0
                u8 type = GetStackTypeFromBottom (o, i);                    d_m3Assert (type != c_m3Type_none)
1837
0
                IM3Operation op = Is64BitType (type) ? op_CopySlot_64 : op_CopySlot_32;
1838
1839
0
                EmitOp          (o, op);
1840
0
                EmitSlotOffset  (o, preservedSlotNumber);
1841
0
                EmitSlotOffset  (o, slot);
1842
0
            }
1843
13.9k
        }
1844
484
    }
1845
1846
489
    _catch:
1847
489
    return result;
1848
489
}
1849
1850
static
1851
M3Result  Compile_LoopOrBlock  (IM3Compilation o, m3opcode_t i_opcode)
1852
451
{
1853
451
    M3Result result;
1854
1855
    // TODO: these shouldn't be necessary for non-loop blocks?
1856
451
_   (PreserveRegisters (o));
1857
451
_   (PreserveArgsAndLocals (o));
1858
1859
451
    IM3FuncType blockType;
1860
451
_   (ReadBlockType (o, & blockType));
1861
1862
447
    if (i_opcode == c_waOp_loop)
1863
426
    {
1864
426
        u16 numParams = GetFuncTypeNumParams (blockType);
1865
426
        if (numParams)
1866
410
        {
1867
            // instantiate constants
1868
410
            u16 numValues = GetNumBlockValuesOnStack (o);                   // CompileBlock enforces this at comptime
1869
410
                                                                            d_m3Assert (numValues >= numParams);
1870
410
            if (numValues >= numParams)
1871
331
            {
1872
331
                u16 stackTop = GetStackTopIndex (o) + 1;
1873
1874
10.9k
                for (u16 i = stackTop - numParams; i < stackTop; ++i)
1875
10.5k
                {
1876
10.5k
                    u16 slot = GetSlotForStackIndex (o, i);
1877
10.5k
                    u8 type = GetStackTypeFromBottom (o, i);
1878
1879
10.5k
                    if (IsConstantSlot (o, slot))
1880
1
                    {
1881
1
                        u16 newSlot = c_slotUnused;
1882
1
_                       (AllocateSlots (o, & newSlot, type));
1883
1
_                       (CopyStackIndexToSlot (o, newSlot, i));
1884
1
                        o->wasmStack [i] = newSlot;
1885
1
                    }
1886
10.5k
                }
1887
331
            }
1888
410
        }
1889
1890
426
_       (EmitOp (o, op_Loop));
1891
426
    }
1892
21
    else
1893
21
    {
1894
21
    }
1895
1896
447
_   (CompileBlock (o, blockType, i_opcode));
1897
1898
451
    _catch: return result;
1899
22
}
1900
1901
static
1902
M3Result  CompileElseBlock  (IM3Compilation o, pc_t * o_startPC, IM3FuncType i_blockType)
1903
16
{
1904
16
    IM3CodePage savedPage = o->page;
1905
16
_try {
1906
1907
16
    IM3CodePage elsePage;
1908
16
_   (AcquireCompilationCodePage (o, & elsePage));
1909
1910
16
    * o_startPC = GetPagePC (elsePage);
1911
1912
16
    o->page = elsePage;
1913
1914
16
_   (CompileBlock (o, i_blockType, c_waOp_else));
1915
1916
16
_   (EmitOp (o, op_Branch));
1917
16
    EmitPointer (o, GetPagePC (savedPage));
1918
16
} _catch:
1919
16
    if(o->page != savedPage) {
1920
16
        ReleaseCompilationCodePage (o);
1921
16
    }
1922
16
    o->page = savedPage;
1923
16
    return result;
1924
16
}
1925
1926
static
1927
M3Result  Compile_If  (IM3Compilation o, m3opcode_t i_opcode)
1928
38
{
1929
    /*      [   op_If   ]
1930
            [ <else-pc> ]   ---->   [ ..else..  ]
1931
            [  ..if..   ]           [ ..block.. ]
1932
            [ ..block.. ]           [ op_Branch ]
1933
            [    end    ]  <-----   [  <end-pc> ]       */
1934
1935
38
_try {
1936
1937
38
_   (PreserveNonTopRegisters (o));
1938
38
_   (PreserveArgsAndLocals (o));
1939
1940
38
    IM3Operation op = IsStackTopInRegister (o) ? op_If_r : op_If_s;
1941
1942
38
_   (EmitOp (o, op));
1943
38
_   (EmitSlotNumOfStackTopAndPop (o));
1944
1945
38
    pc_t * pc = (pc_t *) ReservePointer (o);
1946
1947
38
    IM3FuncType blockType;
1948
38
_   (ReadBlockType (o, & blockType));
1949
1950
//  dump_type_stack (o);
1951
1952
38
    u16 stackIndex = o->stackIndex;
1953
1954
38
_   (CompileBlock (o, blockType, i_opcode));
1955
1956
16
    if (o->previousOpcode == c_waOp_else)
1957
0
    {
1958
0
        o->stackIndex = stackIndex;
1959
0
_       (CompileElseBlock (o, pc, blockType));
1960
0
    }
1961
16
    else
1962
16
    {
1963
        // if block produces values and there isn't a defined else
1964
        // case, then we need to make one up so that the pass-through
1965
        // results end up in the right place
1966
16
        if (GetFuncTypeNumResults (blockType))
1967
16
        {
1968
            // rewind to the if's end to create a fake else block
1969
16
            o->wasm--;
1970
16
            o->stackIndex = stackIndex;
1971
1972
//          dump_type_stack (o);
1973
1974
16
_           (CompileElseBlock (o, pc, blockType));
1975
16
        }
1976
0
        else * pc = GetPC (o);
1977
16
    }
1978
1979
38
    } _catch: return result;
1980
16
}
1981
1982
static
1983
M3Result  Compile_Select  (IM3Compilation o, m3opcode_t i_opcode)
1984
44
{
1985
44
    M3Result result = m3Err_none;
1986
1987
44
    u16 slots [3] = { c_slotUnused, c_slotUnused, c_slotUnused };
1988
1989
44
    u8 type = GetStackTypeFromTop (o, 1); // get type of selection
1990
1991
44
    IM3Operation op = NULL;
1992
1993
44
    if (IsFpType (type))
1994
6
    {
1995
6
#   if d_m3HasFloat
1996
        // not consuming a fp reg, so preserve
1997
6
        if (not IsStackTopMinus1InRegister (o) and
1998
4
            not IsStackTopMinus2InRegister (o))
1999
4
        {
2000
4
_           (PreserveRegisterIfOccupied (o, type));
2001
4
        }
2002
2003
6
        bool selectorInReg = IsStackTopInRegister (o);
2004
6
        slots [0] = GetStackTopSlotNumber (o);
2005
6
_       (Pop (o));
2006
2007
6
        u32 opIndex = 0;
2008
2009
18
        for (u32 i = 1; i <= 2; ++i)
2010
12
        {
2011
12
            if (IsStackTopInRegister (o))
2012
4
                opIndex = i;
2013
8
            else
2014
8
                slots [i] = GetStackTopSlotNumber (o);
2015
2016
12
_          (Pop (o));
2017
12
        }
2018
2019
6
        op = c_fpSelectOps [type - c_m3Type_f32] [selectorInReg] [opIndex];
2020
#   else
2021
        _throw (m3Err_unknownOpcode);
2022
#   endif
2023
6
    }
2024
38
    else if (IsIntType (type))
2025
22
    {
2026
        // 'sss' operation doesn't consume a register, so might have to protected its contents
2027
22
        if (not IsStackTopInRegister (o) and
2028
9
            not IsStackTopMinus1InRegister (o) and
2029
9
            not IsStackTopMinus2InRegister (o))
2030
9
        {
2031
9
_           (PreserveRegisterIfOccupied (o, type));
2032
9
        }
2033
2034
22
        u32 opIndex = 3;  // op_Select_*_sss
2035
2036
88
        for (u32 i = 0; i < 3; ++i)
2037
66
        {
2038
66
            if (IsStackTopInRegister (o))
2039
16
                opIndex = i;
2040
50
            else
2041
50
                slots [i] = GetStackTopSlotNumber (o);
2042
2043
66
_          (Pop (o));
2044
66
        }
2045
2046
22
        op = c_intSelectOps [type - c_m3Type_i32] [opIndex];
2047
22
    }
2048
16
    else if (not IsStackPolymorphic (o))
2049
44
        _throw (m3Err_functionStackUnderrun);
2050
2051
44
    EmitOp (o, op);
2052
176
    for (u32 i = 0; i < 3; i++)
2053
132
    {
2054
132
        if (IsValidSlot (slots [i]))
2055
61
            EmitSlotOffset (o, slots [i]);
2056
132
    }
2057
44
_   (PushRegister (o, type));
2058
2059
44
    _catch: return result;
2060
44
}
2061
2062
static
2063
M3Result  Compile_Drop  (IM3Compilation o, m3opcode_t i_opcode)
2064
4
{
2065
4
    M3Result result = Pop (o);                                              if (d_m3LogWasmStack) dump_type_stack (o);
2066
4
    return result;
2067
4
}
2068
2069
static
2070
M3Result  Compile_Nop  (IM3Compilation o, m3opcode_t i_opcode)
2071
28
{
2072
28
    return m3Err_none;
2073
28
}
2074
2075
static
2076
M3Result  Compile_Unreachable  (IM3Compilation o, m3opcode_t i_opcode)
2077
212
{
2078
212
    M3Result result;
2079
2080
212
_   (AddTrapRecord (o));
2081
2082
212
_   (EmitOp (o, op_Unreachable));
2083
212
_   (SetStackPolymorphic (o));
2084
2085
212
    _catch:
2086
212
    return result;
2087
212
}
2088
2089
2090
// OPTZ: currently all stack slot indices take up a full word, but
2091
// dual stack source operands could be packed together
2092
static
2093
M3Result  Compile_Operator  (IM3Compilation o, m3opcode_t i_opcode)
2094
392
{
2095
392
    M3Result result;
2096
2097
392
    IM3OpInfo opInfo = GetOpInfo (i_opcode);
2098
392
    _throwif (m3Err_unknownOpcode, not opInfo);
2099
2100
392
    IM3Operation op;
2101
2102
    // This preserve is for for FP compare operations.
2103
    // either need additional slot destination operations or the
2104
    // easy fix, move _r0 out of the way.
2105
    // moving out the way might be the optimal solution most often?
2106
    // otherwise, the _r0 reg can get buried down in the stack
2107
    // and be idle & wasted for a moment.
2108
392
    if (IsFpType (GetStackTopType (o)) and IsIntType (opInfo->type))
2109
74
    {
2110
74
_       (PreserveRegisterIfOccupied (o, opInfo->type));
2111
74
    }
2112
2113
392
    if (opInfo->stackOffset == 0)
2114
109
    {
2115
109
        if (IsStackTopInRegister (o))
2116
57
        {
2117
57
            op = opInfo->operations [0]; // _s
2118
57
        }
2119
52
        else
2120
52
        {
2121
52
_           (PreserveRegisterIfOccupied (o, opInfo->type));
2122
52
            op = opInfo->operations [1]; // _r
2123
52
        }
2124
109
    }
2125
283
    else
2126
283
    {
2127
283
        if (IsStackTopInRegister (o))
2128
162
        {
2129
162
            op = opInfo->operations [0];  // _rs
2130
2131
162
            if (IsStackTopMinus1InRegister (o))
2132
1
            {                                       d_m3Assert (i_opcode == c_waOp_store_f32 or i_opcode == c_waOp_store_f64);
2133
1
                op = opInfo->operations [3]; // _rr for fp.store
2134
1
            }
2135
162
        }
2136
121
        else if (IsStackTopMinus1InRegister (o))
2137
8
        {
2138
8
            op = opInfo->operations [1]; // _sr
2139
2140
8
            if (not op)  // must be commutative, then
2141
0
                op = opInfo->operations [0];
2142
8
        }
2143
113
        else
2144
113
        {
2145
113
_           (PreserveRegisterIfOccupied (o, opInfo->type));     // _ss
2146
113
            op = opInfo->operations [2];
2147
113
        }
2148
283
    }
2149
2150
392
    if (op)
2151
387
    {
2152
387
_       (EmitOp (o, op));
2153
2154
387
_       (EmitSlotNumOfStackTopAndPop (o));
2155
2156
387
        if (opInfo->stackOffset < 0)
2157
387
_           (EmitSlotNumOfStackTopAndPop (o));
2158
2159
387
        if (opInfo->type != c_m3Type_none)
2160
386
_           (PushRegister (o, opInfo->type));
2161
386
    }
2162
5
    else
2163
5
    {
2164
#       ifdef DEBUG
2165
            result = ErrorCompile ("no operation found for opcode", o, "'%s'", opInfo->name);
2166
#       else
2167
5
            result = ErrorCompile ("no operation found for opcode", o, "%x", i_opcode);
2168
5
#       endif
2169
5
        _throw (result);
2170
0
    }
2171
2172
392
    _catch: return result;
2173
392
}
2174
2175
static
2176
M3Result  Compile_Convert  (IM3Compilation o, m3opcode_t i_opcode)
2177
20
{
2178
20
_try {
2179
20
    IM3OpInfo opInfo = GetOpInfo (i_opcode);
2180
20
    _throwif (m3Err_unknownOpcode, not opInfo);
2181
2182
20
    bool destInSlot = IsRegisterTypeAllocated (o, opInfo->type);
2183
20
    bool sourceInSlot = IsStackTopInSlot (o);
2184
2185
20
    IM3Operation op = opInfo->operations [destInSlot * 2 + sourceInSlot];
2186
2187
20
_   (EmitOp (o, op));
2188
20
_   (EmitSlotNumOfStackTopAndPop (o));
2189
2190
20
    if (destInSlot)
2191
7
_       (PushAllocatedSlotAndEmit (o, opInfo->type))
2192
13
    else
2193
13
_       (PushRegister (o, opInfo->type))
2194
2195
20
}
2196
20
    _catch: return result;
2197
20
}
2198
2199
static
2200
M3Result  Compile_Load_Store  (IM3Compilation o, m3opcode_t i_opcode)
2201
73
{
2202
73
_try {
2203
73
    u32 alignHint, memoryOffset;
2204
2205
73
_   (ReadLEB_u32 (& alignHint, & o->wasm, o->wasmEnd));
2206
72
_   (ReadLEB_u32 (& memoryOffset, & o->wasm, o->wasmEnd));
2207
72
                                                                        m3log (compile, d_indent " (offset = %d)", get_indention_string (o), memoryOffset);
2208
72
    IM3OpInfo opInfo = GetOpInfo (i_opcode);
2209
72
    _throwif (m3Err_unknownOpcode, not opInfo);
2210
2211
72
    if (IsFpType (opInfo->type))
2212
72
_       (PreserveRegisterIfOccupied (o, c_m3Type_f64));
2213
2214
72
_   (Compile_Operator (o, i_opcode));
2215
2216
72
    EmitConstant32 (o, memoryOffset);
2217
72
}
2218
73
    _catch: return result;
2219
72
}
2220
2221
2222
M3Result  CompileRawFunction  (IM3Module io_module,  IM3Function io_function, const void * i_function, const void * i_userdata)
2223
0
{
2224
0
    d_m3Assert (io_module->runtime);
2225
2226
0
    IM3CodePage page = AcquireCodePageWithCapacity (io_module->runtime, 4);
2227
2228
0
    if (page)
2229
0
    {
2230
0
        io_function->compiled = GetPagePC (page);
2231
0
        io_function->module = io_module;
2232
2233
0
        EmitWord (page, op_CallRawFunction);
2234
0
        EmitWord (page, i_function);
2235
0
        EmitWord (page, io_function);
2236
0
        EmitWord (page, i_userdata);
2237
2238
0
        ReleaseCodePage (io_module->runtime, page);
2239
0
        return m3Err_none;
2240
0
    }
2241
0
    else {
2242
0
        return m3Err_mallocFailedCodePage;
2243
0
    }
2244
0
}
2245
2246
2247
2248
// d_logOp, d_logOp2 macros aren't actually used by the compiler, just codepage decoding (d_m3LogCodePages = 1)
2249
#define d_logOp(OP)                         { op_##OP,                  NULL,                       NULL,                       NULL }
2250
#define d_logOp2(OP1,OP2)                   { op_##OP1,                 op_##OP2,                   NULL,                       NULL }
2251
2252
#define d_emptyOpList                       { NULL,                     NULL,                       NULL,                       NULL }
2253
#define d_unaryOpList(TYPE, NAME)           { op_##TYPE##_##NAME##_r,   op_##TYPE##_##NAME##_s,     NULL,                       NULL }
2254
#define d_binOpList(TYPE, NAME)             { op_##TYPE##_##NAME##_rs,  op_##TYPE##_##NAME##_sr,    op_##TYPE##_##NAME##_ss,    NULL }
2255
#define d_storeFpOpList(TYPE, NAME)         { op_##TYPE##_##NAME##_rs,  op_##TYPE##_##NAME##_sr,    op_##TYPE##_##NAME##_ss,    op_##TYPE##_##NAME##_rr }
2256
#define d_commutativeBinOpList(TYPE, NAME)  { op_##TYPE##_##NAME##_rs,  NULL,                       op_##TYPE##_##NAME##_ss,    NULL }
2257
#define d_convertOpList(OP)                 { op_##OP##_r_r,            op_##OP##_r_s,              op_##OP##_s_r,              op_##OP##_s_s }
2258
2259
2260
const M3OpInfo c_operations [] =
2261
{
2262
    M3OP( "unreachable",         0, none,   d_logOp (Unreachable),              Compile_Unreachable ),  // 0x00
2263
    M3OP( "nop",                 0, none,   d_emptyOpList,                      Compile_Nop ),          // 0x01 .
2264
    M3OP( "block",               0, none,   d_emptyOpList,                      Compile_LoopOrBlock ),  // 0x02
2265
    M3OP( "loop",                0, none,   d_logOp (Loop),                     Compile_LoopOrBlock ),  // 0x03
2266
    M3OP( "if",                 -1, none,   d_emptyOpList,                      Compile_If ),           // 0x04
2267
    M3OP( "else",                0, none,   d_emptyOpList,                      Compile_Nop ),          // 0x05
2268
2269
    M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED,                          // 0x06...0x0a
2270
2271
    M3OP( "end",                 0, none,   d_emptyOpList,                      Compile_End ),          // 0x0b
2272
    M3OP( "br",                  0, none,   d_logOp (Branch),                   Compile_Branch ),       // 0x0c
2273
    M3OP( "br_if",              -1, none,   d_logOp2 (BranchIf_r, BranchIf_s),  Compile_Branch ),       // 0x0d
2274
    M3OP( "br_table",           -1, none,   d_logOp (BranchTable),              Compile_BranchTable ),  // 0x0e
2275
    M3OP( "return",              0, any,    d_logOp (Return),                   Compile_Return ),       // 0x0f
2276
    M3OP( "call",                0, any,    d_logOp (Call),                     Compile_Call ),         // 0x10
2277
    M3OP( "call_indirect",       0, any,    d_logOp (CallIndirect),             Compile_CallIndirect ), // 0x11
2278
    M3OP( "return_call",         0, any,    d_emptyOpList,                      Compile_Call ),         // 0x12 TODO: Optimize
2279
    M3OP( "return_call_indirect",0, any,    d_emptyOpList,                      Compile_CallIndirect ), // 0x13
2280
2281
    M3OP_RESERVED,  M3OP_RESERVED,                                                                      // 0x14...
2282
    M3OP_RESERVED,  M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED,                                        // ...0x19
2283
2284
    M3OP( "drop",               -1, none,   d_emptyOpList,                      Compile_Drop ),         // 0x1a
2285
    M3OP( "select",             -2, any,    d_emptyOpList,                      Compile_Select  ),      // 0x1b
2286
2287
    M3OP_RESERVED,  M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED,                                        // 0x1c...0x1f
2288
2289
    M3OP( "local.get",          1,  any,    d_emptyOpList,                      Compile_GetLocal ),     // 0x20
2290
    M3OP( "local.set",          1,  none,   d_emptyOpList,                      Compile_SetLocal ),     // 0x21
2291
    M3OP( "local.tee",          0,  any,    d_emptyOpList,                      Compile_SetLocal ),     // 0x22
2292
    M3OP( "global.get",         1,  none,   d_emptyOpList,                      Compile_GetSetGlobal ), // 0x23
2293
    M3OP( "global.set",         1,  none,   d_emptyOpList,                      Compile_GetSetGlobal ), // 0x24
2294
2295
    M3OP_RESERVED,  M3OP_RESERVED, M3OP_RESERVED,                                                       // 0x25...0x27
2296
2297
    M3OP( "i32.load",           0,  i_32,   d_unaryOpList (i32, Load_i32),      Compile_Load_Store ),   // 0x28
2298
    M3OP( "i64.load",           0,  i_64,   d_unaryOpList (i64, Load_i64),      Compile_Load_Store ),   // 0x29
2299
    M3OP_F( "f32.load",         0,  f_32,   d_unaryOpList (f32, Load_f32),      Compile_Load_Store ),   // 0x2a
2300
    M3OP_F( "f64.load",         0,  f_64,   d_unaryOpList (f64, Load_f64),      Compile_Load_Store ),   // 0x2b
2301
2302
    M3OP( "i32.load8_s",        0,  i_32,   d_unaryOpList (i32, Load_i8),       Compile_Load_Store ),   // 0x2c
2303
    M3OP( "i32.load8_u",        0,  i_32,   d_unaryOpList (i32, Load_u8),       Compile_Load_Store ),   // 0x2d
2304
    M3OP( "i32.load16_s",       0,  i_32,   d_unaryOpList (i32, Load_i16),      Compile_Load_Store ),   // 0x2e
2305
    M3OP( "i32.load16_u",       0,  i_32,   d_unaryOpList (i32, Load_u16),      Compile_Load_Store ),   // 0x2f
2306
2307
    M3OP( "i64.load8_s",        0,  i_64,   d_unaryOpList (i64, Load_i8),       Compile_Load_Store ),   // 0x30
2308
    M3OP( "i64.load8_u",        0,  i_64,   d_unaryOpList (i64, Load_u8),       Compile_Load_Store ),   // 0x31
2309
    M3OP( "i64.load16_s",       0,  i_64,   d_unaryOpList (i64, Load_i16),      Compile_Load_Store ),   // 0x32
2310
    M3OP( "i64.load16_u",       0,  i_64,   d_unaryOpList (i64, Load_u16),      Compile_Load_Store ),   // 0x33
2311
    M3OP( "i64.load32_s",       0,  i_64,   d_unaryOpList (i64, Load_i32),      Compile_Load_Store ),   // 0x34
2312
    M3OP( "i64.load32_u",       0,  i_64,   d_unaryOpList (i64, Load_u32),      Compile_Load_Store ),   // 0x35
2313
2314
    M3OP( "i32.store",          -2, none,   d_binOpList (i32, Store_i32),       Compile_Load_Store ),   // 0x36
2315
    M3OP( "i64.store",          -2, none,   d_binOpList (i64, Store_i64),       Compile_Load_Store ),   // 0x37
2316
    M3OP_F( "f32.store",        -2, none,   d_storeFpOpList (f32, Store_f32),   Compile_Load_Store ),   // 0x38
2317
    M3OP_F( "f64.store",        -2, none,   d_storeFpOpList (f64, Store_f64),   Compile_Load_Store ),   // 0x39
2318
2319
    M3OP( "i32.store8",         -2, none,   d_binOpList (i32, Store_u8),        Compile_Load_Store ),   // 0x3a
2320
    M3OP( "i32.store16",        -2, none,   d_binOpList (i32, Store_i16),       Compile_Load_Store ),   // 0x3b
2321
2322
    M3OP( "i64.store8",         -2, none,   d_binOpList (i64, Store_u8),        Compile_Load_Store ),   // 0x3c
2323
    M3OP( "i64.store16",        -2, none,   d_binOpList (i64, Store_i16),       Compile_Load_Store ),   // 0x3d
2324
    M3OP( "i64.store32",        -2, none,   d_binOpList (i64, Store_i32),       Compile_Load_Store ),   // 0x3e
2325
2326
    M3OP( "memory.size",        1,  i_32,   d_logOp (MemSize),                  Compile_Memory_Size ),  // 0x3f
2327
    M3OP( "memory.grow",        1,  i_32,   d_logOp (MemGrow),                  Compile_Memory_Grow ),  // 0x40
2328
2329
    M3OP( "i32.const",          1,  i_32,   d_logOp (Const32),                  Compile_Const_i32 ),    // 0x41
2330
    M3OP( "i64.const",          1,  i_64,   d_logOp (Const64),                  Compile_Const_i64 ),    // 0x42
2331
    M3OP_F( "f32.const",        1,  f_32,   d_emptyOpList,                      Compile_Const_f32 ),    // 0x43
2332
    M3OP_F( "f64.const",        1,  f_64,   d_emptyOpList,                      Compile_Const_f64 ),    // 0x44
2333
2334
    M3OP( "i32.eqz",            0,  i_32,   d_unaryOpList (i32, EqualToZero)        , NULL  ),          // 0x45
2335
    M3OP( "i32.eq",             -1, i_32,   d_commutativeBinOpList (i32, Equal)     , NULL  ),          // 0x46
2336
    M3OP( "i32.ne",             -1, i_32,   d_commutativeBinOpList (i32, NotEqual)  , NULL  ),          // 0x47
2337
    M3OP( "i32.lt_s",           -1, i_32,   d_binOpList (i32, LessThan)             , NULL  ),          // 0x48
2338
    M3OP( "i32.lt_u",           -1, i_32,   d_binOpList (u32, LessThan)             , NULL  ),          // 0x49
2339
    M3OP( "i32.gt_s",           -1, i_32,   d_binOpList (i32, GreaterThan)          , NULL  ),          // 0x4a
2340
    M3OP( "i32.gt_u",           -1, i_32,   d_binOpList (u32, GreaterThan)          , NULL  ),          // 0x4b
2341
    M3OP( "i32.le_s",           -1, i_32,   d_binOpList (i32, LessThanOrEqual)      , NULL  ),          // 0x4c
2342
    M3OP( "i32.le_u",           -1, i_32,   d_binOpList (u32, LessThanOrEqual)      , NULL  ),          // 0x4d
2343
    M3OP( "i32.ge_s",           -1, i_32,   d_binOpList (i32, GreaterThanOrEqual)   , NULL  ),          // 0x4e
2344
    M3OP( "i32.ge_u",           -1, i_32,   d_binOpList (u32, GreaterThanOrEqual)   , NULL  ),          // 0x4f
2345
2346
    M3OP( "i64.eqz",            0,  i_32,   d_unaryOpList (i64, EqualToZero)        , NULL  ),          // 0x50
2347
    M3OP( "i64.eq",             -1, i_32,   d_commutativeBinOpList (i64, Equal)     , NULL  ),          // 0x51
2348
    M3OP( "i64.ne",             -1, i_32,   d_commutativeBinOpList (i64, NotEqual)  , NULL  ),          // 0x52
2349
    M3OP( "i64.lt_s",           -1, i_32,   d_binOpList (i64, LessThan)             , NULL  ),          // 0x53
2350
    M3OP( "i64.lt_u",           -1, i_32,   d_binOpList (u64, LessThan)             , NULL  ),          // 0x54
2351
    M3OP( "i64.gt_s",           -1, i_32,   d_binOpList (i64, GreaterThan)          , NULL  ),          // 0x55
2352
    M3OP( "i64.gt_u",           -1, i_32,   d_binOpList (u64, GreaterThan)          , NULL  ),          // 0x56
2353
    M3OP( "i64.le_s",           -1, i_32,   d_binOpList (i64, LessThanOrEqual)      , NULL  ),          // 0x57
2354
    M3OP( "i64.le_u",           -1, i_32,   d_binOpList (u64, LessThanOrEqual)      , NULL  ),          // 0x58
2355
    M3OP( "i64.ge_s",           -1, i_32,   d_binOpList (i64, GreaterThanOrEqual)   , NULL  ),          // 0x59
2356
    M3OP( "i64.ge_u",           -1, i_32,   d_binOpList (u64, GreaterThanOrEqual)   , NULL  ),          // 0x5a
2357
2358
    M3OP_F( "f32.eq",           -1, i_32,   d_commutativeBinOpList (f32, Equal)     , NULL  ),          // 0x5b
2359
    M3OP_F( "f32.ne",           -1, i_32,   d_commutativeBinOpList (f32, NotEqual)  , NULL  ),          // 0x5c
2360
    M3OP_F( "f32.lt",           -1, i_32,   d_binOpList (f32, LessThan)             , NULL  ),          // 0x5d
2361
    M3OP_F( "f32.gt",           -1, i_32,   d_binOpList (f32, GreaterThan)          , NULL  ),          // 0x5e
2362
    M3OP_F( "f32.le",           -1, i_32,   d_binOpList (f32, LessThanOrEqual)      , NULL  ),          // 0x5f
2363
    M3OP_F( "f32.ge",           -1, i_32,   d_binOpList (f32, GreaterThanOrEqual)   , NULL  ),          // 0x60
2364
2365
    M3OP_F( "f64.eq",           -1, i_32,   d_commutativeBinOpList (f64, Equal)     , NULL  ),          // 0x61
2366
    M3OP_F( "f64.ne",           -1, i_32,   d_commutativeBinOpList (f64, NotEqual)  , NULL  ),          // 0x62
2367
    M3OP_F( "f64.lt",           -1, i_32,   d_binOpList (f64, LessThan)             , NULL  ),          // 0x63
2368
    M3OP_F( "f64.gt",           -1, i_32,   d_binOpList (f64, GreaterThan)          , NULL  ),          // 0x64
2369
    M3OP_F( "f64.le",           -1, i_32,   d_binOpList (f64, LessThanOrEqual)      , NULL  ),          // 0x65
2370
    M3OP_F( "f64.ge",           -1, i_32,   d_binOpList (f64, GreaterThanOrEqual)   , NULL  ),          // 0x66
2371
2372
    M3OP( "i32.clz",            0,  i_32,   d_unaryOpList (u32, Clz)                , NULL  ),          // 0x67
2373
    M3OP( "i32.ctz",            0,  i_32,   d_unaryOpList (u32, Ctz)                , NULL  ),          // 0x68
2374
    M3OP( "i32.popcnt",         0,  i_32,   d_unaryOpList (u32, Popcnt)             , NULL  ),          // 0x69
2375
2376
    M3OP( "i32.add",            -1, i_32,   d_commutativeBinOpList (i32, Add)       , NULL  ),          // 0x6a
2377
    M3OP( "i32.sub",            -1, i_32,   d_binOpList (i32, Subtract)             , NULL  ),          // 0x6b
2378
    M3OP( "i32.mul",            -1, i_32,   d_commutativeBinOpList (i32, Multiply)  , NULL  ),          // 0x6c
2379
    M3OP( "i32.div_s",          -1, i_32,   d_binOpList (i32, Divide)               , NULL  ),          // 0x6d
2380
    M3OP( "i32.div_u",          -1, i_32,   d_binOpList (u32, Divide)               , NULL  ),          // 0x6e
2381
    M3OP( "i32.rem_s",          -1, i_32,   d_binOpList (i32, Remainder)            , NULL  ),          // 0x6f
2382
    M3OP( "i32.rem_u",          -1, i_32,   d_binOpList (u32, Remainder)            , NULL  ),          // 0x70
2383
    M3OP( "i32.and",            -1, i_32,   d_commutativeBinOpList (u32, And)       , NULL  ),          // 0x71
2384
    M3OP( "i32.or",             -1, i_32,   d_commutativeBinOpList (u32, Or)        , NULL  ),          // 0x72
2385
    M3OP( "i32.xor",            -1, i_32,   d_commutativeBinOpList (u32, Xor)       , NULL  ),          // 0x73
2386
    M3OP( "i32.shl",            -1, i_32,   d_binOpList (u32, ShiftLeft)            , NULL  ),          // 0x74
2387
    M3OP( "i32.shr_s",          -1, i_32,   d_binOpList (i32, ShiftRight)           , NULL  ),          // 0x75
2388
    M3OP( "i32.shr_u",          -1, i_32,   d_binOpList (u32, ShiftRight)           , NULL  ),          // 0x76
2389
    M3OP( "i32.rotl",           -1, i_32,   d_binOpList (u32, Rotl)                 , NULL  ),          // 0x77
2390
    M3OP( "i32.rotr",           -1, i_32,   d_binOpList (u32, Rotr)                 , NULL  ),          // 0x78
2391
2392
    M3OP( "i64.clz",            0,  i_64,   d_unaryOpList (u64, Clz)                , NULL  ),          // 0x79
2393
    M3OP( "i64.ctz",            0,  i_64,   d_unaryOpList (u64, Ctz)                , NULL  ),          // 0x7a
2394
    M3OP( "i64.popcnt",         0,  i_64,   d_unaryOpList (u64, Popcnt)             , NULL  ),          // 0x7b
2395
2396
    M3OP( "i64.add",            -1, i_64,   d_commutativeBinOpList (i64, Add)       , NULL  ),          // 0x7c
2397
    M3OP( "i64.sub",            -1, i_64,   d_binOpList (i64, Subtract)             , NULL  ),          // 0x7d
2398
    M3OP( "i64.mul",            -1, i_64,   d_commutativeBinOpList (i64, Multiply)  , NULL  ),          // 0x7e
2399
    M3OP( "i64.div_s",          -1, i_64,   d_binOpList (i64, Divide)               , NULL  ),          // 0x7f
2400
    M3OP( "i64.div_u",          -1, i_64,   d_binOpList (u64, Divide)               , NULL  ),          // 0x80
2401
    M3OP( "i64.rem_s",          -1, i_64,   d_binOpList (i64, Remainder)            , NULL  ),          // 0x81
2402
    M3OP( "i64.rem_u",          -1, i_64,   d_binOpList (u64, Remainder)            , NULL  ),          // 0x82
2403
    M3OP( "i64.and",            -1, i_64,   d_commutativeBinOpList (u64, And)       , NULL  ),          // 0x83
2404
    M3OP( "i64.or",             -1, i_64,   d_commutativeBinOpList (u64, Or)        , NULL  ),          // 0x84
2405
    M3OP( "i64.xor",            -1, i_64,   d_commutativeBinOpList (u64, Xor)       , NULL  ),          // 0x85
2406
    M3OP( "i64.shl",            -1, i_64,   d_binOpList (u64, ShiftLeft)            , NULL  ),          // 0x86
2407
    M3OP( "i64.shr_s",          -1, i_64,   d_binOpList (i64, ShiftRight)           , NULL  ),          // 0x87
2408
    M3OP( "i64.shr_u",          -1, i_64,   d_binOpList (u64, ShiftRight)           , NULL  ),          // 0x88
2409
    M3OP( "i64.rotl",           -1, i_64,   d_binOpList (u64, Rotl)                 , NULL  ),          // 0x89
2410
    M3OP( "i64.rotr",           -1, i_64,   d_binOpList (u64, Rotr)                 , NULL  ),          // 0x8a
2411
2412
    M3OP_F( "f32.abs",          0,  f_32,   d_unaryOpList(f32, Abs)                 , NULL  ),          // 0x8b
2413
    M3OP_F( "f32.neg",          0,  f_32,   d_unaryOpList(f32, Negate)              , NULL  ),          // 0x8c
2414
    M3OP_F( "f32.ceil",         0,  f_32,   d_unaryOpList(f32, Ceil)                , NULL  ),          // 0x8d
2415
    M3OP_F( "f32.floor",        0,  f_32,   d_unaryOpList(f32, Floor)               , NULL  ),          // 0x8e
2416
    M3OP_F( "f32.trunc",        0,  f_32,   d_unaryOpList(f32, Trunc)               , NULL  ),          // 0x8f
2417
    M3OP_F( "f32.nearest",      0,  f_32,   d_unaryOpList(f32, Nearest)             , NULL  ),          // 0x90
2418
    M3OP_F( "f32.sqrt",         0,  f_32,   d_unaryOpList(f32, Sqrt)                , NULL  ),          // 0x91
2419
2420
    M3OP_F( "f32.add",          -1, f_32,   d_commutativeBinOpList (f32, Add)       , NULL  ),          // 0x92
2421
    M3OP_F( "f32.sub",          -1, f_32,   d_binOpList (f32, Subtract)             , NULL  ),          // 0x93
2422
    M3OP_F( "f32.mul",          -1, f_32,   d_commutativeBinOpList (f32, Multiply)  , NULL  ),          // 0x94
2423
    M3OP_F( "f32.div",          -1, f_32,   d_binOpList (f32, Divide)               , NULL  ),          // 0x95
2424
    M3OP_F( "f32.min",          -1, f_32,   d_commutativeBinOpList (f32, Min)       , NULL  ),          // 0x96
2425
    M3OP_F( "f32.max",          -1, f_32,   d_commutativeBinOpList (f32, Max)       , NULL  ),          // 0x97
2426
    M3OP_F( "f32.copysign",     -1, f_32,   d_binOpList (f32, CopySign)             , NULL  ),          // 0x98
2427
2428
    M3OP_F( "f64.abs",          0,  f_64,   d_unaryOpList(f64, Abs)                 , NULL  ),          // 0x99
2429
    M3OP_F( "f64.neg",          0,  f_64,   d_unaryOpList(f64, Negate)              , NULL  ),          // 0x9a
2430
    M3OP_F( "f64.ceil",         0,  f_64,   d_unaryOpList(f64, Ceil)                , NULL  ),          // 0x9b
2431
    M3OP_F( "f64.floor",        0,  f_64,   d_unaryOpList(f64, Floor)               , NULL  ),          // 0x9c
2432
    M3OP_F( "f64.trunc",        0,  f_64,   d_unaryOpList(f64, Trunc)               , NULL  ),          // 0x9d
2433
    M3OP_F( "f64.nearest",      0,  f_64,   d_unaryOpList(f64, Nearest)             , NULL  ),          // 0x9e
2434
    M3OP_F( "f64.sqrt",         0,  f_64,   d_unaryOpList(f64, Sqrt)                , NULL  ),          // 0x9f
2435
2436
    M3OP_F( "f64.add",          -1, f_64,   d_commutativeBinOpList (f64, Add)       , NULL  ),          // 0xa0
2437
    M3OP_F( "f64.sub",          -1, f_64,   d_binOpList (f64, Subtract)             , NULL  ),          // 0xa1
2438
    M3OP_F( "f64.mul",          -1, f_64,   d_commutativeBinOpList (f64, Multiply)  , NULL  ),          // 0xa2
2439
    M3OP_F( "f64.div",          -1, f_64,   d_binOpList (f64, Divide)               , NULL  ),          // 0xa3
2440
    M3OP_F( "f64.min",          -1, f_64,   d_commutativeBinOpList (f64, Min)       , NULL  ),          // 0xa4
2441
    M3OP_F( "f64.max",          -1, f_64,   d_commutativeBinOpList (f64, Max)       , NULL  ),          // 0xa5
2442
    M3OP_F( "f64.copysign",     -1, f_64,   d_binOpList (f64, CopySign)             , NULL  ),          // 0xa6
2443
2444
    M3OP( "i32.wrap/i64",       0,  i_32,   d_unaryOpList (i32, Wrap_i64),          NULL    ),          // 0xa7
2445
    M3OP_F( "i32.trunc_s/f32",  0,  i_32,   d_convertOpList (i32_Trunc_f32),        Compile_Convert ),  // 0xa8
2446
    M3OP_F( "i32.trunc_u/f32",  0,  i_32,   d_convertOpList (u32_Trunc_f32),        Compile_Convert ),  // 0xa9
2447
    M3OP_F( "i32.trunc_s/f64",  0,  i_32,   d_convertOpList (i32_Trunc_f64),        Compile_Convert ),  // 0xaa
2448
    M3OP_F( "i32.trunc_u/f64",  0,  i_32,   d_convertOpList (u32_Trunc_f64),        Compile_Convert ),  // 0xab
2449
2450
    M3OP( "i64.extend_s/i32",   0,  i_64,   d_unaryOpList (i64, Extend_i32),        NULL    ),          // 0xac
2451
    M3OP( "i64.extend_u/i32",   0,  i_64,   d_unaryOpList (i64, Extend_u32),        NULL    ),          // 0xad
2452
2453
    M3OP_F( "i64.trunc_s/f32",  0,  i_64,   d_convertOpList (i64_Trunc_f32),        Compile_Convert ),  // 0xae
2454
    M3OP_F( "i64.trunc_u/f32",  0,  i_64,   d_convertOpList (u64_Trunc_f32),        Compile_Convert ),  // 0xaf
2455
    M3OP_F( "i64.trunc_s/f64",  0,  i_64,   d_convertOpList (i64_Trunc_f64),        Compile_Convert ),  // 0xb0
2456
    M3OP_F( "i64.trunc_u/f64",  0,  i_64,   d_convertOpList (u64_Trunc_f64),        Compile_Convert ),  // 0xb1
2457
2458
    M3OP_F( "f32.convert_s/i32",0,  f_32,   d_convertOpList (f32_Convert_i32),      Compile_Convert ),  // 0xb2
2459
    M3OP_F( "f32.convert_u/i32",0,  f_32,   d_convertOpList (f32_Convert_u32),      Compile_Convert ),  // 0xb3
2460
    M3OP_F( "f32.convert_s/i64",0,  f_32,   d_convertOpList (f32_Convert_i64),      Compile_Convert ),  // 0xb4
2461
    M3OP_F( "f32.convert_u/i64",0,  f_32,   d_convertOpList (f32_Convert_u64),      Compile_Convert ),  // 0xb5
2462
2463
    M3OP_F( "f32.demote/f64",   0,  f_32,   d_unaryOpList (f32, Demote_f64),        NULL    ),          // 0xb6
2464
2465
    M3OP_F( "f64.convert_s/i32",0,  f_64,   d_convertOpList (f64_Convert_i32),      Compile_Convert ),  // 0xb7
2466
    M3OP_F( "f64.convert_u/i32",0,  f_64,   d_convertOpList (f64_Convert_u32),      Compile_Convert ),  // 0xb8
2467
    M3OP_F( "f64.convert_s/i64",0,  f_64,   d_convertOpList (f64_Convert_i64),      Compile_Convert ),  // 0xb9
2468
    M3OP_F( "f64.convert_u/i64",0,  f_64,   d_convertOpList (f64_Convert_u64),      Compile_Convert ),  // 0xba
2469
2470
    M3OP_F( "f64.promote/f32",  0,  f_64,   d_unaryOpList (f64, Promote_f32),       NULL    ),          // 0xbb
2471
2472
    M3OP_F( "i32.reinterpret/f32",0,i_32,   d_convertOpList (i32_Reinterpret_f32),  Compile_Convert ),  // 0xbc
2473
    M3OP_F( "i64.reinterpret/f64",0,i_64,   d_convertOpList (i64_Reinterpret_f64),  Compile_Convert ),  // 0xbd
2474
    M3OP_F( "f32.reinterpret/i32",0,f_32,   d_convertOpList (f32_Reinterpret_i32),  Compile_Convert ),  // 0xbe
2475
    M3OP_F( "f64.reinterpret/i64",0,f_64,   d_convertOpList (f64_Reinterpret_i64),  Compile_Convert ),  // 0xbf
2476
2477
    M3OP( "i32.extend8_s",       0,  i_32,   d_unaryOpList (i32, Extend8_s),        NULL    ),          // 0xc0
2478
    M3OP( "i32.extend16_s",      0,  i_32,   d_unaryOpList (i32, Extend16_s),       NULL    ),          // 0xc1
2479
    M3OP( "i64.extend8_s",       0,  i_64,   d_unaryOpList (i64, Extend8_s),        NULL    ),          // 0xc2
2480
    M3OP( "i64.extend16_s",      0,  i_64,   d_unaryOpList (i64, Extend16_s),       NULL    ),          // 0xc3
2481
    M3OP( "i64.extend32_s",      0,  i_64,   d_unaryOpList (i64, Extend32_s),       NULL    ),          // 0xc4
2482
2483
# ifdef DEBUG // for codepage logging. the order doesn't matter:
2484
#   define d_m3DebugOp(OP) M3OP (#OP, 0, none, { op_##OP })
2485
2486
# if d_m3HasFloat
2487
#   define d_m3DebugTypedOp(OP) M3OP (#OP, 0, none, { op_##OP##_i32, op_##OP##_i64, op_##OP##_f32, op_##OP##_f64, })
2488
# else
2489
#   define d_m3DebugTypedOp(OP) M3OP (#OP, 0, none, { op_##OP##_i32, op_##OP##_i64 })
2490
# endif
2491
2492
    d_m3DebugOp (Compile),          d_m3DebugOp (Entry),            d_m3DebugOp (End),
2493
    d_m3DebugOp (Unsupported),      d_m3DebugOp (CallRawFunction),
2494
2495
    d_m3DebugOp (GetGlobal_s32),    d_m3DebugOp (GetGlobal_s64),    d_m3DebugOp (ContinueLoop),     d_m3DebugOp (ContinueLoopIf),
2496
2497
    d_m3DebugOp (CopySlot_32),      d_m3DebugOp (PreserveCopySlot_32), d_m3DebugOp (If_s),          d_m3DebugOp (BranchIfPrologue_s),
2498
    d_m3DebugOp (CopySlot_64),      d_m3DebugOp (PreserveCopySlot_64), d_m3DebugOp (If_r),          d_m3DebugOp (BranchIfPrologue_r),
2499
2500
    d_m3DebugOp (Select_i32_rss),   d_m3DebugOp (Select_i32_srs),   d_m3DebugOp (Select_i32_ssr),   d_m3DebugOp (Select_i32_sss),
2501
    d_m3DebugOp (Select_i64_rss),   d_m3DebugOp (Select_i64_srs),   d_m3DebugOp (Select_i64_ssr),   d_m3DebugOp (Select_i64_sss),
2502
2503
# if d_m3HasFloat
2504
    d_m3DebugOp (Select_f32_sss),   d_m3DebugOp (Select_f32_srs),   d_m3DebugOp (Select_f32_ssr),
2505
    d_m3DebugOp (Select_f32_rss),   d_m3DebugOp (Select_f32_rrs),   d_m3DebugOp (Select_f32_rsr),
2506
2507
    d_m3DebugOp (Select_f64_sss),   d_m3DebugOp (Select_f64_srs),   d_m3DebugOp (Select_f64_ssr),
2508
    d_m3DebugOp (Select_f64_rss),   d_m3DebugOp (Select_f64_rrs),   d_m3DebugOp (Select_f64_rsr),
2509
# endif
2510
2511
    d_m3DebugOp (MemFill),          d_m3DebugOp (MemCopy),
2512
2513
    d_m3DebugTypedOp (SetGlobal),   d_m3DebugOp (SetGlobal_s32),    d_m3DebugOp (SetGlobal_s64),
2514
2515
    d_m3DebugTypedOp (SetRegister), d_m3DebugTypedOp (SetSlot),     d_m3DebugTypedOp (PreserveSetSlot),
2516
# endif
2517
2518
# if d_m3CascadedOpcodes
2519
    [c_waOp_extended] = M3OP( "0xFC", 0, c_m3Type_unknown,   d_emptyOpList,  Compile_ExtendedOpcode ),
2520
# endif
2521
2522
# ifdef DEBUG
2523
    M3OP( "termination", 0, c_m3Type_unknown ) // for find_operation_info
2524
# endif
2525
};
2526
2527
const M3OpInfo c_operationsFC [] =
2528
{
2529
    M3OP_F( "i32.trunc_s:sat/f32",0,  i_32,   d_convertOpList (i32_TruncSat_f32),        Compile_Convert ),  // 0x00
2530
    M3OP_F( "i32.trunc_u:sat/f32",0,  i_32,   d_convertOpList (u32_TruncSat_f32),        Compile_Convert ),  // 0x01
2531
    M3OP_F( "i32.trunc_s:sat/f64",0,  i_32,   d_convertOpList (i32_TruncSat_f64),        Compile_Convert ),  // 0x02
2532
    M3OP_F( "i32.trunc_u:sat/f64",0,  i_32,   d_convertOpList (u32_TruncSat_f64),        Compile_Convert ),  // 0x03
2533
    M3OP_F( "i64.trunc_s:sat/f32",0,  i_64,   d_convertOpList (i64_TruncSat_f32),        Compile_Convert ),  // 0x04
2534
    M3OP_F( "i64.trunc_u:sat/f32",0,  i_64,   d_convertOpList (u64_TruncSat_f32),        Compile_Convert ),  // 0x05
2535
    M3OP_F( "i64.trunc_s:sat/f64",0,  i_64,   d_convertOpList (i64_TruncSat_f64),        Compile_Convert ),  // 0x06
2536
    M3OP_F( "i64.trunc_u:sat/f64",0,  i_64,   d_convertOpList (u64_TruncSat_f64),        Compile_Convert ),  // 0x07
2537
2538
    M3OP_RESERVED, M3OP_RESERVED,
2539
2540
    M3OP( "memory.copy",            0,  none,   d_emptyOpList,                           Compile_Memory_CopyFill ), // 0x0a
2541
    M3OP( "memory.fill",            0,  none,   d_emptyOpList,                           Compile_Memory_CopyFill ), // 0x0b
2542
2543
2544
# ifdef DEBUG
2545
    M3OP( "termination", 0, c_m3Type_unknown ) // for find_operation_info
2546
# endif
2547
};
2548
2549
2550
IM3OpInfo  GetOpInfo  (m3opcode_t opcode)
2551
33.3k
{
2552
33.3k
    switch (opcode >> 8) {
2553
33.3k
    case 0x00:
2554
33.3k
        if (M3_LIKELY(opcode < M3_COUNT_OF(c_operations))) {
2555
33.2k
            return &c_operations[opcode];
2556
33.2k
        }
2557
2
        break;
2558
9
    case c_waOp_extended:
2559
9
        opcode &= 0xFF;
2560
9
        if (M3_LIKELY(opcode < M3_COUNT_OF(c_operationsFC))) {
2561
9
            return &c_operationsFC[opcode];
2562
9
        }
2563
0
        break;
2564
33.3k
    }
2565
2
    return NULL;
2566
33.3k
}
2567
2568
M3Result  CompileBlockStatements  (IM3Compilation o)
2569
1.07k
{
2570
1.07k
    M3Result result = m3Err_none;
2571
1.07k
    bool validEnd = false;
2572
2573
32.8k
    while (o->wasm < o->wasmEnd)
2574
32.8k
    {
2575
# if d_m3EnableOpTracing
2576
        if (o->numEmits)
2577
        {
2578
            EmitOp          (o, op_DumpStack);
2579
            EmitConstant32  (o, o->numOpcodes);
2580
            EmitConstant32  (o, GetMaxUsedSlotPlusOne(o));
2581
            EmitPointer     (o, o->function);
2582
2583
            o->numEmits = 0;
2584
        }
2585
# endif
2586
32.8k
        m3opcode_t opcode;
2587
32.8k
        o->lastOpcodeStart = o->wasm;
2588
32.8k
_       (Read_opcode (& opcode, & o->wasm, o->wasmEnd));                log_opcode (o, opcode);
2589
2590
        // Restrict opcodes when evaluating expressions
2591
32.8k
        if (not o->function) {
2592
31.2k
            switch (opcode) {
2593
28.8k
            case c_waOp_i32_const: case c_waOp_i64_const:
2594
29.7k
            case c_waOp_f32_const: case c_waOp_f64_const:
2595
31.2k
            case c_waOp_getGlobal: case c_waOp_end:
2596
31.2k
                break;
2597
12
            default:
2598
12
                _throw(m3Err_restrictedOpcode);
2599
31.2k
            }
2600
31.2k
        }
2601
2602
32.8k
        IM3OpInfo opinfo = GetOpInfo (opcode);
2603
2604
32.8k
        if (opinfo == NULL)
2605
32.8k
            _throw (ErrorCompile (m3Err_unknownOpcode, o, "opcode '%x' not available", opcode));
2606
2607
32.8k
        if (opinfo->compiler) {
2608
32.4k
_           ((* opinfo->compiler) (o, opcode))
2609
32.4k
        } else {
2610
320
_           (Compile_Operator (o, opcode));
2611
314
        }
2612
2613
32.3k
        o->previousOpcode = opcode;
2614
2615
32.3k
        if (opcode == c_waOp_else)
2616
0
        {
2617
0
            _throwif (m3Err_wasmMalformed, o->block.opcode != c_waOp_if);
2618
0
            validEnd = true;
2619
0
            break;
2620
0
        }
2621
32.3k
        else if (opcode == c_waOp_end)
2622
575
        {
2623
575
            validEnd = true;
2624
575
            break;
2625
575
        }
2626
32.3k
    }
2627
579
    _throwif(m3Err_wasmMalformed, !(validEnd));
2628
2629
1.07k
_catch:
2630
1.07k
    return result;
2631
575
}
2632
2633
static
2634
M3Result  PushBlockResults  (IM3Compilation o)
2635
699
{
2636
699
    M3Result result = m3Err_none;
2637
2638
699
    u16 numResults = GetFuncTypeNumResults (o->block.type);
2639
2640
25.5k
    for (u16 i = 0; i < numResults; ++i)
2641
24.9k
    {
2642
24.9k
        u8 type = GetFuncTypeResultType (o->block.type, i);
2643
2644
24.9k
        if (i == numResults - 1 and IsFpType (type))
2645
34
        {
2646
34
_           (PushRegister (o, type));
2647
34
        }
2648
24.8k
        else
2649
24.8k
_           (PushAllocatedSlot (o, type));
2650
24.8k
    }
2651
2652
699
    _catch: return result;
2653
699
}
2654
2655
2656
M3Result  CompileBlock  (IM3Compilation o, IM3FuncType i_blockType, m3opcode_t i_blockOpcode)
2657
661
{
2658
661
                                                                                        d_m3Assert (not IsRegisterAllocated (o, 0));
2659
661
                                                                                        d_m3Assert (not IsRegisterAllocated (o, 1));
2660
661
    M3CompilationScope outerScope = o->block;
2661
661
    M3CompilationScope * block = & o->block;
2662
2663
661
    block->outer            = & outerScope;
2664
661
    block->pc               = GetPagePC (o->page);
2665
661
    block->patches          = NULL;
2666
661
    block->type             = i_blockType;
2667
661
    block->depth            ++;
2668
661
    block->opcode           = i_blockOpcode;
2669
2670
    /*
2671
     The block stack frame is a little strange but for good reasons.  Because blocks need to be restarted to
2672
     compile different pathways (if/else), the incoming params must be saved.  The parameters are popped
2673
     and validated.  But, then the stack top is readjusted so they aren't subsequently overwritten.
2674
     Next, the result are preallocated to find destination slots.  But again these are immediately popped
2675
     (deallocated) and the stack top is readjusted to keep these records in pace. This allows branch instructions
2676
     to find their result landing pads.  Finally, the params are copied from the "dead" records and pushed back
2677
     onto the stack as active stack items for the CompileBlockStatements () call.
2678
2679
    [     block      ]
2680
    [     params     ]
2681
    ------------------
2682
    [     result     ]  <---- blockStackIndex
2683
    [      slots     ]
2684
    ------------------
2685
    [   saved param  ]
2686
    [     records    ]
2687
                        <----- exitStackIndex
2688
    */
2689
2690
661
_try {
2691
    // validate and dealloc params ----------------------------
2692
2693
661
    u16 stackIndex = o->stackIndex;
2694
2695
661
    u16 numParams = GetFuncTypeNumParams (i_blockType);
2696
2697
661
    if (i_blockOpcode != c_waOp_else)
2698
645
    {
2699
14.4k
        for (u16 i = 0; i < numParams; ++i)
2700
13.8k
        {
2701
13.8k
            u8 type = GetFuncTypeParamType (i_blockType, numParams - 1 - i);
2702
13.8k
_           (PopType (o, type));
2703
13.8k
        }
2704
645
    }
2705
16
    else {
2706
16
        if (IsStackPolymorphic (o) && o->block.blockStackIndex + numParams > o->stackIndex) {
2707
0
            o->stackIndex = o->block.blockStackIndex;
2708
16
        } else {
2709
16
            o->stackIndex -= numParams;
2710
16
        }
2711
16
    }
2712
2713
661
    u16 paramIndex = o->stackIndex;
2714
661
    block->exitStackIndex = paramIndex; // consume the params at block exit
2715
2716
    // keep copies of param slots in the stack
2717
661
    o->stackIndex = stackIndex;
2718
2719
    // find slots for the results ----------------------------
2720
661
    PushBlockResults (o);
2721
2722
661
    stackIndex = o->stackIndex;
2723
2724
    // dealloc but keep record of the result slots in the stack
2725
661
    u16 numResults = GetFuncTypeNumResults (i_blockType);
2726
24.5k
    while (numResults--)
2727
23.8k
        Pop (o);
2728
2729
661
    block->blockStackIndex = o->stackIndex = stackIndex;
2730
2731
    // push the params back onto the stack -------------------
2732
14.4k
    for (u16 i = 0; i < numParams; ++i)
2733
13.8k
    {
2734
13.8k
        u8 type = GetFuncTypeParamType (i_blockType, i);
2735
2736
13.8k
        u16 slot = GetSlotForStackIndex (o, paramIndex + i);
2737
13.8k
        Push (o, type, slot);
2738
2739
13.8k
        if (slot >= o->slotFirstDynamicIndex && slot != c_slotUnused)
2740
13.4k
            MarkSlotsAllocatedByType (o, slot, type);
2741
13.8k
    }
2742
2743
    //--------------------------------------------------------
2744
2745
661
_   (CompileBlockStatements (o));
2746
2747
213
_   (ValidateBlockEnd (o));
2748
2749
213
    if (o->function)    // skip for expressions
2750
54
    {
2751
54
        if (not IsStackPolymorphic (o))
2752
54
_           (ResolveBlockResults (o, & o->block, /* isBranch: */ false));
2753
2754
54
_       (UnwindBlockStack (o))
2755
2756
54
        if (not ((i_blockOpcode == c_waOp_if and numResults) or o->previousOpcode == c_waOp_else))
2757
38
        {
2758
38
            o->stackIndex = o->block.exitStackIndex;
2759
38
_           (PushBlockResults (o));
2760
38
        }
2761
54
    }
2762
2763
213
    PatchBranches (o);
2764
2765
213
    o->block = outerScope;
2766
2767
661
}   _catch: return result;
2768
213
}
2769
2770
static
2771
M3Result  CompileLocals  (IM3Compilation o)
2772
45
{
2773
45
    M3Result result;
2774
2775
45
    u32 numLocals = 0;
2776
45
    u32 numLocalBlocks;
2777
45
_   (ReadLEB_u32 (& numLocalBlocks, & o->wasm, o->wasmEnd));
2778
2779
47
    for (u32 l = 0; l < numLocalBlocks; ++l)
2780
3
    {
2781
3
        u32 varCount;
2782
3
        i8 waType;
2783
3
        u8 localType;
2784
2785
3
_       (ReadLEB_u32 (& varCount, & o->wasm, o->wasmEnd));
2786
3
_       (ReadLEB_i7 (& waType, & o->wasm, o->wasmEnd));
2787
3
_       (NormalizeType (& localType, waType));
2788
3
        numLocals += varCount;                                                          m3log (compile, "pushing locals. count: %d; type: %s", varCount, c_waTypes [localType]);
2789
1.97k
        while (varCount--)
2790
1.97k
_           (PushAllocatedSlot (o, localType));
2791
2
    }
2792
2793
44
    if (o->function)
2794
44
        o->function->numLocals = numLocals;
2795
2796
45
    _catch: return result;
2797
44
}
2798
2799
static
2800
M3Result  ReserveConstants  (IM3Compilation o)
2801
44
{
2802
44
    M3Result result = m3Err_none;
2803
2804
    // in the interest of speed, this blindly scans the Wasm code looking for any byte
2805
    // that looks like an const opcode.
2806
44
    u16 numConstantSlots = 0;
2807
2808
44
    bytes_t wa = o->wasm;
2809
3.56k
    while (wa < o->wasmEnd)
2810
3.51k
    {
2811
3.51k
        u8 code = * wa++;
2812
3.51k
        u16 addSlots = 0;
2813
2814
3.51k
        if (code == c_waOp_i32_const or code == c_waOp_f32_const)
2815
16
            addSlots = 1;
2816
3.50k
        else if (code == c_waOp_i64_const or code == c_waOp_f64_const)
2817
14
            addSlots = GetTypeNumSlots (c_m3Type_i64);
2818
2819
3.51k
        if (numConstantSlots + addSlots >= d_m3MaxConstantTableSize)
2820
0
            break;
2821
2822
3.51k
        numConstantSlots += addSlots;
2823
3.51k
    }
2824
2825
    // if constants overflow their reserved stack space, the compiler simply emits op_Const
2826
    // operations as needed. Compiled expressions (global inits) don't pass through this
2827
    // ReserveConstants function and thus always produce inline constants.
2828
2829
44
    AlignSlotToType (& numConstantSlots, c_m3Type_i64);                                         m3log (compile, "reserved constant slots: %d", numConstantSlots);
2830
2831
44
    o->slotFirstDynamicIndex = o->slotFirstConstIndex + numConstantSlots;
2832
2833
44
    if (o->slotFirstDynamicIndex >= d_m3MaxFunctionSlots)
2834
44
        _throw (m3Err_functionStackOverflow);
2835
2836
44
    _catch:
2837
44
    return result;
2838
44
}
2839
2840
2841
M3Result  CompileFunction  (IM3Function io_function)
2842
45
{
2843
45
    if (!io_function->wasm) return "function body is missing";
2844
2845
45
    IM3FuncType funcType = io_function->funcType;                   m3log (compile, "compiling: [%d] %s %s; wasm-size: %d",
2846
45
                                                                        io_function->index, m3_GetFunctionName (io_function), SPrintFuncTypeSignature (funcType), (u32) (io_function->wasmEnd - io_function->wasm));
2847
45
    IM3Runtime runtime = io_function->module->runtime;
2848
2849
45
    IM3Compilation o = & runtime->compilation;                      d_m3Assert (d_m3MaxFunctionSlots >= d_m3MaxFunctionStackHeight * (d_m3Use32BitSlots + 1))  // need twice as many slots in 32-bit mode
2850
45
    memset (o, 0x0, sizeof (M3Compilation));
2851
2852
45
    o->runtime  = runtime;
2853
45
    o->module   = io_function->module;
2854
45
    o->function = io_function;
2855
45
    o->wasm     = io_function->wasm;
2856
45
    o->wasmEnd  = io_function->wasmEnd;
2857
45
    o->block.type = funcType;
2858
2859
45
_try {
2860
    // skip over code size. the end was already calculated during parse phase
2861
45
    u32 size;
2862
45
_   (ReadLEB_u32 (& size, & o->wasm, o->wasmEnd));                  d_m3Assert (size == (o->wasmEnd - o->wasm))
2863
2864
45
_   (AcquireCompilationCodePage (o, & o->page));
2865
2866
45
    pc_t pc = GetPagePC (o->page);
2867
2868
45
    u16 numRetSlots = GetFunctionNumReturns (o->function) * c_ioSlotCount;
2869
2870
5.64k
    for (u16 i = 0; i < numRetSlots; ++i)
2871
5.60k
        MarkSlotAllocated (o, i);
2872
2873
45
    o->function->numRetSlots = o->slotFirstDynamicIndex = numRetSlots;
2874
2875
45
    u16 numArgs = GetFunctionNumArgs (o->function);
2876
2877
    // push the arg types to the type stack
2878
685
    for (u16 i = 0; i < numArgs; ++i)
2879
640
    {
2880
640
        u8 type = GetFunctionArgType (o->function, i);
2881
640
_       (PushAllocatedSlot (o, type));
2882
2883
        // prevent allocator fill-in
2884
640
        o->slotFirstDynamicIndex += c_ioSlotCount;
2885
640
    }
2886
2887
45
    o->slotMaxAllocatedIndexPlusOne = o->function->numRetAndArgSlots = o->slotFirstLocalIndex = o->slotFirstDynamicIndex;
2888
2889
45
_   (CompileLocals (o));
2890
2891
44
    u16 maxSlot = GetMaxUsedSlotPlusOne (o);
2892
2893
44
    o->function->numLocalBytes = (maxSlot - o->slotFirstLocalIndex) * sizeof (m3slot_t);
2894
2895
44
    o->slotFirstConstIndex = o->slotMaxConstIndex = maxSlot;
2896
2897
    // ReserveConstants initializes o->firstDynamicSlotNumber
2898
44
_   (ReserveConstants (o));
2899
2900
    // start tracking the max stack used (Push() also updates this value) so that op_Entry can precisely detect stack overflow
2901
44
    o->maxStackSlots = o->slotMaxAllocatedIndexPlusOne = o->slotFirstDynamicIndex;
2902
2903
44
    o->block.blockStackIndex = o->stackFirstDynamicIndex = o->stackIndex;                           m3log (compile, "start stack index: %d",
2904
44
                                                                                                          (u32) o->stackFirstDynamicIndex);
2905
44
_   (EmitOp (o, op_Entry));
2906
44
    EmitPointer (o, io_function);
2907
2908
44
_   (CompileBlockStatements (o));
2909
2910
    // TODO: validate opcode sequences
2911
5
    _throwif(m3Err_wasmMalformed, o->previousOpcode != c_waOp_end);
2912
2913
5
    io_function->compiled = pc;
2914
5
    io_function->maxStackSlots = o->maxStackSlots;
2915
2916
5
    u16 numConstantSlots = o->slotMaxConstIndex - o->slotFirstConstIndex;                           m3log (compile, "unique constant slots: %d; unused slots: %d",
2917
5
                                                                                                           numConstantSlots, o->slotFirstDynamicIndex - o->slotMaxConstIndex);
2918
5
    io_function->numConstantBytes = numConstantSlots * sizeof (m3slot_t);
2919
2920
5
    if (numConstantSlots)
2921
2
    {
2922
2
        io_function->constants = m3_CopyMem (o->constants, io_function->numConstantBytes);
2923
2
        _throwifnull(io_function->constants);
2924
2
    }
2925
2926
45
} _catch:
2927
2928
45
    ReleaseCompilationCodePage (o);
2929
2930
45
    return result;
2931
5
}