Coverage Report

Created: 2025-07-13 06:42

/src/pcre2/deps/sljit/sljit_src/sljitLir.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *    Stack-less Just-In-Time compiler
3
 *
4
 *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without modification, are
7
 * permitted provided that the following conditions are met:
8
 *
9
 *   1. Redistributions of source code must retain the above copyright notice, this list of
10
 *      conditions and the following disclaimer.
11
 *
12
 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13
 *      of conditions and the following disclaimer in the documentation and/or other materials
14
 *      provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 */
26
27
#ifndef SLJIT_LIR_H_
28
#define SLJIT_LIR_H_
29
30
/*
31
   ------------------------------------------------------------------------
32
    Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC)
33
   ------------------------------------------------------------------------
34
35
   Short description
36
    Advantages:
37
      - The execution can be continued from any LIR instruction. In other
38
        words, it is possible to jump to any label from anywhere, even from
39
        a code fragment, which is compiled later, as long as the compiling
40
        context is the same. See sljit_emit_enter for more details.
41
      - Supports self modifying code: target of any jump and call
42
        instructions and some constant values can be dynamically modified
43
        during runtime. See SLJIT_REWRITABLE_JUMP.
44
        - although it is not suggested to do it frequently
45
        - can be used for inline caching: save an important value once
46
          in the instruction stream
47
      - A fixed stack space can be allocated for local variables
48
      - The compiler is thread-safe
49
      - The compiler is highly configurable through preprocessor macros.
50
        You can disable unneeded features (multithreading in single
51
        threaded applications), and you can use your own system functions
52
        (including memory allocators). See sljitConfig.h.
53
    Disadvantages:
54
      - The compiler is more like a platform independent assembler, so
55
        there is no built-in variable management. Registers and stack must
56
        be managed manually (the name of the compiler refers to this).
57
    In practice:
58
      - This approach is very effective for interpreters
59
        - One of the saved registers typically points to a stack interface
60
        - It can jump to any exception handler anytime (even if it belongs
61
          to another function)
62
        - Hot paths can be modified during runtime reflecting the changes
63
          of the fastest execution path of the dynamic language
64
        - SLJIT supports complex memory addressing modes
65
        - mainly position and context independent code (except some cases)
66
67
    For valgrind users:
68
      - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code"
69
*/
70
71
#if (defined SLJIT_HAVE_CONFIG_PRE && SLJIT_HAVE_CONFIG_PRE)
72
#include "sljitConfigPre.h"
73
#endif /* SLJIT_HAVE_CONFIG_PRE */
74
75
#include "sljitConfigCPU.h"
76
#include "sljitConfig.h"
77
78
/* The following header file defines useful macros for fine tuning
79
SLJIT based code generators. They are listed in the beginning
80
of sljitConfigInternal.h */
81
82
#include "sljitConfigInternal.h"
83
84
#if (defined SLJIT_HAVE_CONFIG_POST && SLJIT_HAVE_CONFIG_POST)
85
#include "sljitConfigPost.h"
86
#endif /* SLJIT_HAVE_CONFIG_POST */
87
88
#ifdef __cplusplus
89
extern "C" {
90
#endif /* __cplusplus */
91
92
/* Version numbers. */
93
#define SLJIT_MAJOR_VERSION 0
94
#define SLJIT_MINOR_VERSION 95
95
96
/* --------------------------------------------------------------------- */
97
/*  Error codes                                                          */
98
/* --------------------------------------------------------------------- */
99
100
/* Indicates no error. */
101
17.2G
#define SLJIT_SUCCESS     0
102
/* After the call of sljit_generate_code(), the error code of the compiler
103
   is set to this value to avoid further code generation.
104
   The complier should be freed after sljit_generate_code(). */
105
586k
#define SLJIT_ERR_COMPILED    1
106
/* Cannot allocate non-executable memory. */
107
0
#define SLJIT_ERR_ALLOC_FAILED    2
108
/* Cannot allocate executable memory.
109
   Only sljit_generate_code() returns with this error code. */
110
0
#define SLJIT_ERR_EX_ALLOC_FAILED 3
111
/* Unsupported instruction form. */
112
2.35G
#define SLJIT_ERR_UNSUPPORTED   4
113
/* An invalid argument is passed to any SLJIT function. */
114
#define SLJIT_ERR_BAD_ARGUMENT    5
115
116
/* --------------------------------------------------------------------- */
117
/*  Registers                                                            */
118
/* --------------------------------------------------------------------- */
119
120
/*
121
  Scratch (R) registers: registers which may not preserve their values
122
  across function calls.
123
124
  Saved (S) registers: registers which preserve their values across
125
  function calls.
126
127
  The scratch and saved register sets overlap. The last scratch register
128
  is the first saved register, the one before the last is the second saved
129
  register, and so on.
130
131
  For example, in an architecture with only five registers (A-E), if two
132
  are scratch and three saved registers, they will be defined as follows:
133
134
    A |   R0   |      |  R0 always represent scratch register A
135
    B |   R1   |      |  R1 always represent scratch register B
136
    C |  [R2]  |  S2  |  R2 and S2 represent the same physical register C
137
    D |  [R3]  |  S1  |  R3 and S1 represent the same physical register D
138
    E |  [R4]  |  S0  |  R4 and S0 represent the same physical register E
139
140
  Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS will be 2 and
141
        SLJIT_NUMBER_OF_SAVED_REGISTERS will be 3.
142
143
  Note: For all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12
144
        and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers
145
        are virtual on x86-32. See below.
146
147
  The purpose of this definition is convenience: saved registers can
148
  be used as extra scratch registers. For example, building in the
149
  previous example, four registers can be specified as scratch registers
150
  and the fifth one as saved register, allowing any user code which requires
151
  four scratch registers to run unmodified. The SLJIT compiler automatically
152
  saves the content of the two extra scratch register on the stack. Scratch
153
  registers can also be preserved by saving their value on the stack but
154
  that needs to be done manually.
155
156
  Note: To emphasize that registers assigned to R2-R4 are saved
157
        registers, they are enclosed by square brackets.
158
159
  Note: sljit_emit_enter and sljit_set_context define whether a register
160
        is S or R register. E.g: if in the previous example 3 scratches and
161
        1 saved are mapped by sljit_emit_enter, the allowed register set
162
        will be: R0-R2 and S0. Although S2 is mapped to the same register
163
        than R2, it is not available in that configuration. Furthermore
164
        the S1 register cannot be used at all.
165
*/
166
167
/* Scratch registers. */
168
10.0G
#define SLJIT_R0  1
169
0
#define SLJIT_R1  2
170
295M
#define SLJIT_R2  3
171
/* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they
172
   are allocated on the stack). These registers are called virtual
173
   and cannot be used for memory addressing (cannot be part of
174
   any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such
175
   limitation on other CPUs. See sljit_get_register_index(). */
176
645M
#define SLJIT_R3  4
177
6.20M
#define SLJIT_R4  5
178
#define SLJIT_R5  6
179
#define SLJIT_R6  7
180
#define SLJIT_R7  8
181
#define SLJIT_R8  9
182
#define SLJIT_R9  10
183
/* All R registers provided by the architecture can be accessed by SLJIT_R(i)
184
   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */
185
#define SLJIT_R(i)  (1 + (i))
186
187
/* Saved registers. */
188
4.20M
#define SLJIT_S0  (SLJIT_NUMBER_OF_REGISTERS)
189
1.53M
#define SLJIT_S1  (SLJIT_NUMBER_OF_REGISTERS - 1)
190
6.92M
#define SLJIT_S2  (SLJIT_NUMBER_OF_REGISTERS - 2)
191
/* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they
192
   are allocated on the stack). These registers are called virtual
193
   and cannot be used for memory addressing (cannot be part of
194
   any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such
195
   limitation on other CPUs. See sljit_get_register_index(). */
196
6.96M
#define SLJIT_S3  (SLJIT_NUMBER_OF_REGISTERS - 3)
197
6.92M
#define SLJIT_S4  (SLJIT_NUMBER_OF_REGISTERS - 4)
198
#define SLJIT_S5  (SLJIT_NUMBER_OF_REGISTERS - 5)
199
#define SLJIT_S6  (SLJIT_NUMBER_OF_REGISTERS - 6)
200
#define SLJIT_S7  (SLJIT_NUMBER_OF_REGISTERS - 7)
201
#define SLJIT_S8  (SLJIT_NUMBER_OF_REGISTERS - 8)
202
#define SLJIT_S9  (SLJIT_NUMBER_OF_REGISTERS - 9)
203
/* All S registers provided by the architecture can be accessed by SLJIT_S(i)
204
   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */
205
#define SLJIT_S(i)  (SLJIT_NUMBER_OF_REGISTERS - (i))
206
207
/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */
208
1.75M
#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1)
209
210
/* The SLJIT_SP provides direct access to the linear stack space allocated by
211
   sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP).
212
   The immediate offset is extended by the relative stack offset automatically.
213
   sljit_get_local_base can be used to obtain the real address of a value. */
214
235M
#define SLJIT_SP  (SLJIT_NUMBER_OF_REGISTERS + 1)
215
216
/* Return with machine word. */
217
218
1.17M
#define SLJIT_RETURN_REG  SLJIT_R0
219
220
/* --------------------------------------------------------------------- */
221
/*  Floating point registers                                             */
222
/* --------------------------------------------------------------------- */
223
224
/* Each floating point register can store a 32 or a 64 bit precision
225
   value. The FR and FS register sets overlap in the same way as R
226
   and S register sets. See above. */
227
228
/* Floating point scratch registers. */
229
0
#define SLJIT_FR0 1
230
#define SLJIT_FR1 2
231
#define SLJIT_FR2 3
232
#define SLJIT_FR3 4
233
#define SLJIT_FR4 5
234
#define SLJIT_FR5 6
235
#define SLJIT_FR6 7
236
#define SLJIT_FR7 8
237
#define SLJIT_FR8 9
238
#define SLJIT_FR9 10
239
/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i)
240
   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */
241
#define SLJIT_FR(i) (1 + (i))
242
243
/* Floating point saved registers. */
244
#define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS)
245
#define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1)
246
#define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2)
247
#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3)
248
#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4)
249
#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5)
250
#define SLJIT_FS6 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 6)
251
#define SLJIT_FS7 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 7)
252
#define SLJIT_FS8 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 8)
253
#define SLJIT_FS9 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 9)
254
/* All FS registers provided by the architecture can be accessed by SLJIT_FS(i)
255
   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */
256
#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i))
257
258
/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */
259
#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1)
260
261
/* Return with floating point arg. */
262
263
0
#define SLJIT_RETURN_FREG SLJIT_FR0
264
265
/* --------------------------------------------------------------------- */
266
/*  Vector registers                                                     */
267
/* --------------------------------------------------------------------- */
268
269
/* Vector registers are storage areas, which are used for Single Instruction
270
   Multiple Data (SIMD) computations. The VR and VS register sets overlap
271
   in the same way as R and S register sets. See above.
272
273
   The storage space of vector registers often overlap with floating point
274
   registers. In this case setting the value of SLJIT_VR(i) destroys the
275
   value of SLJIT_FR(i) and vice versa. See SLJIT_SEPARATE_VECTOR_REGISTERS
276
   macro. */
277
278
/* Vector scratch registers. */
279
2.87M
#define SLJIT_VR0 1
280
2.05M
#define SLJIT_VR1 2
281
879k
#define SLJIT_VR2 3
282
812k
#define SLJIT_VR3 4
283
131k
#define SLJIT_VR4 5
284
128k
#define SLJIT_VR5 6
285
108k
#define SLJIT_VR6 7
286
#define SLJIT_VR7 8
287
#define SLJIT_VR8 9
288
#define SLJIT_VR9 10
289
/* All VR registers provided by the architecture can be accessed by SLJIT_VR(i)
290
   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_VECTOR_REGISTERS. */
291
#define SLJIT_VR(i) (1 + (i))
292
293
/* Vector saved registers. */
294
#define SLJIT_VS0 (SLJIT_NUMBER_OF_VECTOR_REGISTERS)
295
#define SLJIT_VS1 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 1)
296
#define SLJIT_VS2 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 2)
297
#define SLJIT_VS3 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 3)
298
#define SLJIT_VS4 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 4)
299
#define SLJIT_VS5 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 5)
300
#define SLJIT_VS6 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 6)
301
#define SLJIT_VS7 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 7)
302
#define SLJIT_VS8 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 8)
303
#define SLJIT_VS9 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 9)
304
/* All VS registers provided by the architecture can be accessed by SLJIT_VS(i)
305
   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS. */
306
#define SLJIT_VS(i) (SLJIT_NUMBER_OF_VECTOR_REGISTERS - (i))
307
308
/* Vector registers >= SLJIT_FIRST_SAVED_VECTOR_REG are saved registers. */
309
#define SLJIT_FIRST_SAVED_VECTOR_REG (SLJIT_VS0 - SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS + 1)
310
311
/* --------------------------------------------------------------------- */
312
/*  Argument type definitions                                            */
313
/* --------------------------------------------------------------------- */
314
315
/* The following argument type definitions are used by sljit_emit_enter,
316
   sljit_set_context, sljit_emit_call, and sljit_emit_icall functions.
317
318
   For sljit_emit_call and sljit_emit_icall, the first integer argument
319
   must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on.
320
   Similarly the first floating point argument must be placed into SLJIT_FR0,
321
   the second one into SLJIT_FR1, and so on.
322
323
   For sljit_emit_enter, the integer arguments can be stored in scratch
324
   or saved registers. Scratch registers are identified by a _R suffix.
325
326
   If only saved registers are used, then the allocation mirrors what is
327
   done for the "call" functions but using saved registers, meaning that
328
   the first integer argument goes to SLJIT_S0, the second one goes into
329
   SLJIT_S1, and so on.
330
331
   If scratch registers are used, then the way the integer registers are
332
   allocated changes so that SLJIT_S0, SLJIT_S1, etc; will be assigned
333
   only for the arguments not using scratch registers, while SLJIT_R<n>
334
   will be used for the ones using scratch registers.
335
336
   Furthermore, the index (shown as "n" above) that will be used for the
337
   scratch register depends on how many previous integer registers
338
   (scratch or saved) were used already, starting with SLJIT_R0.
339
   Eventhough some indexes will be likely skipped, they still need to be
340
   accounted for in the scratches parameter of sljit_emit_enter. See below
341
   for some examples.
342
343
   The floating point arguments always use scratch registers (but not the
344
   _R suffix like the integer arguments) and must use SLJIT_FR0, SLJIT_FR1,
345
   just like in the "call" functions.
346
347
   Note: the mapping for scratch registers is part of the compiler context
348
         and therefore a new context after sljit_emit_call/sljit_emit_icall
349
         could remove access to some scratch registers that were used as
350
         arguments.
351
352
   Example function definition:
353
     sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a,
354
         sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d);
355
356
   Argument type definition:
357
     SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32)
358
        | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2)
359
        | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4)
360
361
   Short form of argument type definition:
362
     SLJIT_ARGS4(F32, P, F64, 32, F32)
363
364
   Argument passing:
365
     arg_a must be placed in SLJIT_R0
366
     arg_b must be placed in SLJIT_FR0
367
     arg_c must be placed in SLJIT_R1
368
     arg_d must be placed in SLJIT_FR1
369
370
   Examples for argument processing by sljit_emit_enter:
371
     SLJIT_ARGS4V(P, 32_R, F32, W)
372
     Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1
373
     The type of the result is void.
374
375
     SLJIT_ARGS4(F32, W, W_R, W, W_R)
376
     Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3
377
     The type of the result is sljit_f32.
378
379
     SLJIT_ARGS4(P, W, F32, P_R)
380
     Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1
381
     The type of the result is pointer.
382
383
     Note: it is recommended to pass the scratch arguments first
384
     followed by the saved arguments:
385
386
       SLJIT_ARGS4(W, W_R, W_R, W, W)
387
       Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1
388
       The type of the result is sljit_sw / sljit_uw.
389
*/
390
391
/* The following flag is only allowed for the integer arguments of
392
   sljit_emit_enter. When the flag is set, the integer argument is
393
   stored in a scratch register instead of a saved register. */
394
586k
#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8
395
396
/* No return value, only supported by SLJIT_ARG_RETURN. */
397
#define SLJIT_ARG_TYPE_RET_VOID   0
398
/* Machine word sized integer argument or result. */
399
#define SLJIT_ARG_TYPE_W    1
400
#define SLJIT_ARG_TYPE_W_R  (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG)
401
/* 32 bit integer argument or result. */
402
#define SLJIT_ARG_TYPE_32   2
403
#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG)
404
/* Pointer sized integer argument or result. */
405
#define SLJIT_ARG_TYPE_P    3
406
#define SLJIT_ARG_TYPE_P_R  (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG)
407
/* 64 bit floating point argument or result. */
408
319M
#define SLJIT_ARG_TYPE_F64    4
409
/* 32 bit floating point argument or result. */
410
#define SLJIT_ARG_TYPE_F32    5
411
412
1.04G
#define SLJIT_ARG_SHIFT 4
413
210M
#define SLJIT_ARG_RETURN(type) (type)
414
618M
#define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT))
415
416
/* Simplified argument list definitions.
417
418
   The following definition:
419
       SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1)
420
421
   can be shortened to:
422
       SLJIT_ARGS1(W, F32)
423
424
   Another example where no value is returned:
425
       SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W_R, 1)
426
427
   can be shortened to:
428
       SLJIT_ARGS1V(W_R)
429
*/
430
431
#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type
432
433
#define SLJIT_ARGS0(ret) \
434
210M
  SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret))
435
#define SLJIT_ARGS0V() \
436
  SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID)
437
438
#define SLJIT_ARGS1(ret, arg1) \
439
210M
  (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1))
440
#define SLJIT_ARGS1V(arg1) \
441
  (SLJIT_ARGS0V() | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1))
442
443
#define SLJIT_ARGS2(ret, arg1, arg2) \
444
209M
  (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2))
445
#define SLJIT_ARGS2V(arg1, arg2) \
446
  (SLJIT_ARGS1V(arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2))
447
448
#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \
449
199M
  (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3))
450
#define SLJIT_ARGS3V(arg1, arg2, arg3) \
451
  (SLJIT_ARGS2V(arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3))
452
453
#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \
454
  (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4))
455
#define SLJIT_ARGS4V(arg1, arg2, arg3, arg4) \
456
  (SLJIT_ARGS3V(arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4))
457
458
/* --------------------------------------------------------------------- */
459
/*  Main structures and functions                                        */
460
/* --------------------------------------------------------------------- */
461
462
/*
463
  The following structures are private, and can be changed in the
464
  future. Keeping them here allows code inlining.
465
*/
466
467
struct sljit_memory_fragment {
468
  struct sljit_memory_fragment *next;
469
  sljit_uw used_size;
470
  /* Must be aligned to sljit_sw. */
471
  sljit_u8 memory[1];
472
};
473
474
struct sljit_label {
475
  struct sljit_label *next;
476
  union {
477
    sljit_uw index;
478
    sljit_uw addr;
479
  } u;
480
  /* The maximum size difference. */
481
  sljit_uw size;
482
};
483
484
struct sljit_jump {
485
  struct sljit_jump *next;
486
  sljit_uw addr;
487
  /* Architecture dependent flags. */
488
  sljit_uw flags;
489
  union {
490
    sljit_uw target;
491
    struct sljit_label *label;
492
  } u;
493
};
494
495
struct sljit_const {
496
  struct sljit_const *next;
497
  sljit_uw addr;
498
};
499
500
struct sljit_generate_code_buffer {
501
  void *buffer;
502
  sljit_uw size;
503
  sljit_sw executable_offset;
504
};
505
506
struct sljit_read_only_buffer {
507
  struct sljit_read_only_buffer *next;
508
  sljit_uw size;
509
  /* Label can be replaced by address after sljit_generate_code. */
510
  union {
511
    struct sljit_label *label;
512
    sljit_uw addr;
513
  } u;
514
};
515
516
struct sljit_compiler {
517
  sljit_s32 error;
518
  sljit_s32 options;
519
520
  struct sljit_label *labels;
521
  struct sljit_jump *jumps;
522
  struct sljit_const *consts;
523
  struct sljit_label *last_label;
524
  struct sljit_jump *last_jump;
525
  struct sljit_const *last_const;
526
527
  void *allocator_data;
528
  void *user_data;
529
  struct sljit_memory_fragment *buf;
530
  struct sljit_memory_fragment *abuf;
531
532
  /* Number of labels created by the compiler. */
533
  sljit_uw label_count;
534
  /* Available scratch registers. */
535
  sljit_s32 scratches;
536
  /* Available saved registers. */
537
  sljit_s32 saveds;
538
  /* Available float scratch registers. */
539
  sljit_s32 fscratches;
540
  /* Available float saved registers. */
541
  sljit_s32 fsaveds;
542
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) \
543
    || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
544
    || (defined SLJIT_DEBUG && SLJIT_DEBUG) \
545
    || (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
546
  /* Available vector scratch registers. */
547
  sljit_s32 vscratches;
548
  /* Available vector saved registers. */
549
  sljit_s32 vsaveds;
550
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS || SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG || SLJIT_VERBOSE */
551
  /* Local stack size. */
552
  sljit_s32 local_size;
553
  /* Maximum code size. */
554
  sljit_uw size;
555
  /* Relative offset of the executable mapping from the writable mapping. */
556
  sljit_sw executable_offset;
557
  /* Executable size for statistical purposes. */
558
  sljit_uw executable_size;
559
560
#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE)
561
  sljit_s32 status_flags_state;
562
#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */
563
564
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
565
  sljit_s32 args_size;
566
#endif /* SLJIT_CONFIG_X86_32 */
567
568
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
569
  /* Temporary fields. */
570
  sljit_s32 mode32;
571
#endif /* SLJIT_CONFIG_X86_64 */
572
573
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
574
  /* Constant pool handling. */
575
  sljit_uw *cpool;
576
  sljit_u8 *cpool_unique;
577
  sljit_uw cpool_diff;
578
  sljit_uw cpool_fill;
579
  /* Other members. */
580
  /* Contains pointer, "ldr pc, [...]" pairs. */
581
  sljit_uw patches;
582
#endif /* SLJIT_CONFIG_ARM_V6 */
583
584
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
585
  /* Temporary fields. */
586
  sljit_uw shift_imm;
587
#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V6 */
588
589
#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)
590
  sljit_uw args_size;
591
#endif /* SLJIT_CONFIG_ARM_32 && __SOFTFP__ */
592
593
#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
594
  /* Temporary fields. */
595
  sljit_u32 imm;
596
#endif /* SLJIT_CONFIG_PPC */
597
598
#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
599
  sljit_s32 delay_slot;
600
  /* Temporary fields. */
601
  sljit_s32 cache_arg;
602
  sljit_sw cache_argw;
603
#endif /* SLJIT_CONFIG_MIPS */
604
605
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
606
  sljit_uw args_size;
607
#endif /* SLJIT_CONFIG_MIPS_32 */
608
609
#if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
610
  /* Temporary fields. */
611
  sljit_s32 cache_arg;
612
  sljit_sw cache_argw;
613
#endif /* SLJIT_CONFIG_RISCV */
614
615
#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
616
  /* Need to allocate register save area to make calls. */
617
  /* Temporary fields. */
618
  sljit_s32 mode;
619
#endif /* SLJIT_CONFIG_S390X */
620
621
#if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
622
  /* Temporary fields. */
623
  sljit_s32 cache_arg;
624
  sljit_sw cache_argw;
625
#endif /* SLJIT_CONFIG_LOONGARCH */
626
627
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
628
  FILE* verbose;
629
#endif /* SLJIT_VERBOSE */
630
631
  /* Note: SLJIT_DEBUG enables SLJIT_ARGUMENT_CHECKS. */
632
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
633
    || (defined SLJIT_DEBUG && SLJIT_DEBUG)
634
  /* Flags specified by the last arithmetic instruction.
635
     It contains the type of the variable flag. */
636
  sljit_s32 last_flags;
637
  /* Return value type set by entry functions. */
638
  sljit_s32 last_return;
639
  /* Local size passed to entry functions. */
640
  sljit_s32 logical_local_size;
641
#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */
642
643
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
644
    || (defined SLJIT_DEBUG && SLJIT_DEBUG) \
645
    || (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
646
#if !(defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
647
  /* Available float scratch registers. */
648
  sljit_s32 real_fscratches;
649
  /* Available float saved registers. */
650
  sljit_s32 real_fsaveds;
651
#endif /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
652
653
  /* Trust arguments when an API function is called.
654
     Used internally for calling API functions. */
655
  sljit_s32 skip_checks;
656
#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG || SLJIT_VERBOSE */
657
};
658
659
/* --------------------------------------------------------------------- */
660
/*  Main functions                                                       */
661
/* --------------------------------------------------------------------- */
662
663
/* Creates an SLJIT compiler. The allocator_data is required by some
664
   custom memory managers. This pointer is passed to SLJIT_MALLOC
665
   and SLJIT_FREE macros. Most allocators (including the default
666
   one) ignores this value, and it is recommended to pass NULL
667
   as a dummy value for allocator_data.
668
669
   Returns NULL if failed. */
670
SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data);
671
672
/* Frees everything except the compiled machine code. */
673
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler);
674
675
/* Returns the current error code. If an error occurres, future calls
676
   which uses the same compiler argument returns early with the same
677
   error code. Thus there is no need for checking the error after every
678
   call, it is enough to do it after the code is compiled. Removing
679
   these checks increases the performance of the compiling process. */
680
511M
static SLJIT_INLINE sljit_s32 sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; }
681
682
/* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except
683
   if an error was detected before. After the error code is set
684
   the compiler behaves as if the allocation failure happened
685
   during an SLJIT function call. This can greatly simplify error
686
   checking, since it is enough to check the compiler status
687
   after the code is compiled. */
688
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler);
689
690
/* Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit,
691
   and <= 128 bytes on 64 bit architectures. The memory area is owned by the
692
   compiler, and freed by sljit_free_compiler. The returned pointer is
693
   sizeof(sljit_sw) aligned. Excellent for allocating small blocks during
694
   compiling, and no need to worry about freeing them. The size is enough
695
   to contain at most 16 pointers. If the size is outside of the range,
696
   the function will return with NULL. However, this return value does not
697
   indicate that there is no more memory (does not set the current error code
698
   of the compiler to out-of-memory status). */
699
SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size);
700
701
/* Returns the allocator data passed to sljit_create_compiler. */
702
0
static SLJIT_INLINE void* sljit_compiler_get_allocator_data(struct sljit_compiler *compiler) { return compiler->allocator_data; }
703
/* Sets/get the user data for a compiler. */
704
0
static SLJIT_INLINE void sljit_compiler_set_user_data(struct sljit_compiler *compiler, void *user_data) { compiler->user_data = user_data; }
705
0
static SLJIT_INLINE void* sljit_compiler_get_user_data(struct sljit_compiler *compiler) { return compiler->user_data; }
706
707
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
708
/* Passing NULL disables verbose. */
709
SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose);
710
#endif /* SLJIT_VERBOSE */
711
712
/* Option bits for sljit_generate_code. */
713
714
/* The exec_allocator_data points to a pre-allocated
715
   buffer which type is sljit_generate_code_buffer. */
716
#define SLJIT_GENERATE_CODE_BUFFER    0x1
717
718
/* Create executable code from the instruction stream. This is the final step
719
   of the code generation, and no more instructions can be emitted after this call.
720
721
   options is the combination of SLJIT_GENERATE_CODE_* bits
722
   exec_allocator_data is passed to SLJIT_MALLOC_EXEC and
723
                       SLJIT_MALLOC_FREE functions */
724
725
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data);
726
727
/* Free executable code. */
728
729
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data);
730
731
/* When the protected executable allocator is used the JIT code is mapped
732
   twice. The first mapping has read/write and the second mapping has read/exec
733
   permissions. This function returns with the relative offset of the executable
734
   mapping using the writable mapping as the base after the machine code is
735
   successfully generated. The returned value is always 0 for the normal executable
736
   allocator, since it uses only one mapping with read/write/exec permissions.
737
   Dynamic code modifications requires this value.
738
739
   Before a successful code generation, this function returns with 0. */
740
0
static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; }
741
742
/* The executable memory consumption of the generated code can be retrieved by
743
   this function. The returned value can be used for statistical purposes.
744
745
   Before a successful code generation, this function returns with 0. */
746
586k
static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }
747
748
/* Returns with non-zero if the feature or limitation type passed as its
749
   argument is present on the current CPU. The return value is one, if a
750
   feature is fully supported, and it is two, if partially supported.
751
752
   Some features (e.g. floating point operations) require hardware (CPU)
753
   support while others (e.g. move with update) are emulated if not available.
754
   However, even when a feature is emulated, specialized code paths may be
755
   faster than the emulation. Some limitations are emulated as well so their
756
   general case is supported but it has extra performance costs.
757
758
   Note: sljitConfigInternal.h also provides several feature detection macros. */
759
760
/* [Not emulated] Floating-point support is available. */
761
0
#define SLJIT_HAS_FPU     0
762
/* [Limitation] Some registers are virtual registers. */
763
#define SLJIT_HAS_VIRTUAL_REGISTERS 1
764
/* [Emulated] Has zero register (setting a memory location to zero is efficient). */
765
77.3k
#define SLJIT_HAS_ZERO_REGISTER   2
766
/* [Emulated] Count leading zero is supported. */
767
0
#define SLJIT_HAS_CLZ     3
768
/* [Emulated] Count trailing zero is supported. */
769
0
#define SLJIT_HAS_CTZ     4
770
/* [Emulated] Reverse the order of bytes is supported. */
771
0
#define SLJIT_HAS_REV     5
772
/* [Emulated] Rotate left/right is supported. */
773
0
#define SLJIT_HAS_ROT     6
774
/* [Emulated] Conditional move is supported. */
775
1.22G
#define SLJIT_HAS_CMOV      7
776
/* [Emulated] Prefetch instruction is available (emulated as a nop). */
777
0
#define SLJIT_HAS_PREFETCH    8
778
/* [Emulated] Copy from/to f32 operation is available (see sljit_emit_fcopy). */
779
0
#define SLJIT_HAS_COPY_F32    9
780
/* [Emulated] Copy from/to f64 operation is available (see sljit_emit_fcopy). */
781
0
#define SLJIT_HAS_COPY_F64    10
782
/* [Not emulated] The 64 bit floating point registers can be used as
783
   two separate 32 bit floating point registers (e.g. ARM32). The
784
   second 32 bit part can be accessed by SLJIT_F64_SECOND. */
785
#define SLJIT_HAS_F64_AS_F32_PAIR 11
786
/* [Not emulated] Some SIMD operations are supported by the compiler. */
787
1.17M
#define SLJIT_HAS_SIMD      12
788
/* [Not emulated] SIMD registers are mapped to a pair of double precision
789
   floating point registers. E.g. passing either SLJIT_FR0 or SLJIT_FR1 to
790
   a simd operation represents the same 128 bit register, and both SLJIT_FR0
791
   and SLJIT_FR1 are overwritten. */
792
#define SLJIT_SIMD_REGS_ARE_PAIRS 13
793
/* [Not emulated] Atomic support is available. */
794
0
#define SLJIT_HAS_ATOMIC    14
795
/* [Not emulated] Memory barrier support is available. */
796
0
#define SLJIT_HAS_MEMORY_BARRIER    15
797
798
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
799
/* [Not emulated] AVX support is available on x86. */
800
0
#define SLJIT_HAS_AVX     100
801
/* [Not emulated] AVX2 support is available on x86. */
802
0
#define SLJIT_HAS_AVX2      101
803
#endif /* SLJIT_CONFIG_X86 */
804
805
#if (defined SLJIT_CONFIG_LOONGARCH)
806
/* [Not emulated] LASX support is available on LoongArch */
807
#define SLJIT_HAS_LASX        201
808
#endif /* SLJIT_CONFIG_LOONGARCH */
809
810
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type);
811
812
/* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL,
813
   sljit_cmp_info returns with:
814
     zero - if the cpu supports the floating point comparison type
815
     one - if the comparison requires two machine instructions
816
     two - if the comparison requires more than two machine instructions
817
818
   When the result is non-zero, it is recommended to avoid
819
   using the specified comparison type if it is easy to do so.
820
821
   Otherwise it returns zero. */
822
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type);
823
824
/* The following functions generate machine code. If there is no
825
   error, they return with SLJIT_SUCCESS, otherwise they return
826
   with an error code. */
827
828
/*
829
   The executable code is a callable function from the viewpoint
830
   of the C language. Function calls must conform with the ABI
831
   (Application Binary Interface) of the target platform, which
832
   specify the purpose of machine registers and stack handling
833
   among other things. The sljit_emit_enter function emits the
834
   necessary instructions for setting up an entry point for the
835
   executable code. This is often called as function prologue.
836
837
   The "options" argument can be used to pass configuration options
838
   to the sljit compiler which affects the generated code, until
839
   another sljit_emit_enter or sljit_set_context is called. The
840
   available options are listed before sljit_emit_enter.
841
842
   The function argument list is specified by the SLJIT_ARGSx
843
   (SLJIT_ARGS0 .. SLJIT_ARGS4) macros. Currently maximum four
844
   arguments are supported. See the description of SLJIT_ARGSx
845
   macros about argument passing.
846
847
   The register set used by the function must be declared as well.
848
   The number of scratch and saved registers available to the
849
   function must be passed to sljit_emit_enter. Only R registers
850
   between R0 and "scratches" argument can be used later. E.g.
851
   if "scratches" is set to two, the scratch register set will
852
   be limited to SLJIT_R0 and SLJIT_R1. The S registers are
853
   declared in a similar manner, but their count is specified
854
   by "saveds" argument. The floating point scratch and saved
855
   registers can be set by using "scratches" and "saveds" argument
856
   as well, but their value must be passed to the SLJIT_ENTER_FLOAT
857
   macro, see below.
858
859
   The sljit_emit_enter is also capable of allocating a stack
860
   space for local data. The "local_size" argument contains the
861
   size in bytes of this local area, and it can be accessed using
862
   SLJIT_MEM1(SLJIT_SP). The memory area between SLJIT_SP (inclusive)
863
   and SLJIT_SP + local_size (exclusive) can be modified freely
864
   until the function returns. The alocated stack space is an
865
   uninitialized memory area.
866
867
   Floating point scratch and saved registers must be specified
868
   by the SLJIT_ENTER_FLOAT macro, which result value should be
869
   combined with scratches / saveds argument.
870
871
   Examples:
872
       To use three scratch and four floating point scratch
873
       registers, the "scratches" argument must be set to:
874
            3 | SLJIT_ENTER_FLOAT(4)
875
876
       To use six saved and five floating point saved
877
       registers, the "saveds" argument must be set to:
878
            6 | SLJIT_ENTER_FLOAT(5)
879
880
   Note: the following conditions must met:
881
         0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS
882
         0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS
883
         scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS
884
885
         0 <= float scratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
886
         0 <= float saveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS
887
         float scratches + float saveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
888
889
   Note: the compiler can use saved registers as scratch registers,
890
         but the opposite is not supported
891
892
   Note: every call of sljit_emit_enter and sljit_set_context
893
         overwrites the previous context.
894
*/
895
896
/* The following options are available for sljit_emit_enter. */
897
898
/* Saved registers between SLJIT_S0 and SLJIT_S(n - 1) (inclusive)
899
   are not saved / restored on function enter / return. Instead,
900
   these registers can be used to pass / return data (such as
901
   global / local context pointers) across function calls. The
902
   value of n must be between 1 and 3. This option is only
903
   supported by SLJIT_ENTER_REG_ARG calling convention. */
904
#define SLJIT_ENTER_KEEP(n)   (n)
905
906
/* The compiled function uses an SLJIT specific register argument
907
   calling convention. This is a lightweight function call type where
908
   both the caller and the called functions must be compiled by
909
   SLJIT. The type argument of the call must be SLJIT_CALL_REG_ARG
910
   and all arguments must be stored in scratch registers. */
911
586k
#define SLJIT_ENTER_REG_ARG   0x00000004
912
913
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
914
/* Use VEX prefix for all SIMD operations on x86. */
915
3.64M
#define SLJIT_ENTER_USE_VEX   0x00010000
916
#endif /* !SLJIT_CONFIG_X86 */
917
918
/* Macros for other sljit_emit_enter arguments. */
919
920
/* Floating point scratch and saved registers can be
921
   specified by SLJIT_ENTER_FLOAT. */
922
#define SLJIT_ENTER_FLOAT(regs)   ((regs) << 8)
923
924
/* Vector scratch and saved registers can be specified
925
   by SLJIT_ENTER_VECTOR. */
926
586k
#define SLJIT_ENTER_VECTOR(regs)  ((regs) << 16)
927
928
/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
929
739M
#define SLJIT_MAX_LOCAL_SIZE    1048576
930
931
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
932
  sljit_s32 options, sljit_s32 arg_types,
933
  sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size);
934
935
/* The SLJIT compiler has a current context (which contains the local
936
   stack space size, number of used registers, etc.) which is initialized
937
   by sljit_emit_enter. Several functions (such as sljit_emit_return)
938
   requires this context to be able to generate the appropriate code.
939
   However, some code fragments (compiled separately) may have no
940
   normal entry point so their context is unknown to the compiler.
941
942
   sljit_set_context and sljit_emit_enter have the same arguments,
943
   but sljit_set_context does not generate any machine code.
944
945
   Note: every call of sljit_emit_enter and sljit_set_context overwrites
946
         the previous context. */
947
948
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
949
  sljit_s32 options, sljit_s32 arg_types,
950
  sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size);
951
952
/* Return to the caller function. The sljit_emit_return_void function
953
   does not return with any value. The sljit_emit_return function returns
954
   with a single value loaded from its source operand. The load operation
955
   can be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1) and
956
   SLJIT_MOV_F32/SLJIT_MOV_F64 (see sljit_emit_fop1) depending on the
957
   return value specified by sljit_emit_enter/sljit_set_context. */
958
959
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler);
960
961
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op,
962
  sljit_s32 src, sljit_sw srcw);
963
964
/* Restores the saved registers and free the stack area, then the execution
965
   continues from the address specified by the source operand. This
966
   operation is similar to sljit_emit_return, but it ignores the return
967
   address. The code where the exection continues should use the same context
968
   as the caller function (see sljit_set_context). A word (pointer) value
969
   can be passed in the SLJIT_RETURN_REG register. This function can be used
970
   to jump to exception handlers. */
971
972
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
973
  sljit_s32 src, sljit_sw srcw);
974
975
/*
976
   Source and destination operands for arithmetical instructions
977
    imm              - a simple immediate value (cannot be used as a destination)
978
    reg              - any of the available registers (immediate argument must be 0)
979
    [imm]            - absolute memory address
980
    [reg+imm]        - indirect memory address
981
    [reg+(reg<<imm)] - indirect indexed memory address (shift must be between 0 and 3)
982
                       useful for accessing arrays (fully supported by both x86 and
983
                       ARM architectures, and cheap operation on others)
984
*/
985
986
/*
987
   IMPORTANT NOTE: memory accesses MUST be naturally aligned unless
988
                   SLJIT_UNALIGNED macro is defined and its value is 1.
989
990
     length | alignment
991
   ---------+-----------
992
     byte   | 1 byte (any physical_address is accepted)
993
     half   | 2 byte (physical_address & 0x1 == 0)
994
     int    | 4 byte (physical_address & 0x3 == 0)
995
     word   | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1
996
            | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1
997
    pointer | size of sljit_up type (4 byte on 32 bit machines, 4 or 8 byte
998
            | on 64 bit machines)
999
1000
   Note:   Different architectures have different addressing limitations.
1001
           A single instruction is enough for the following addressing
1002
           modes. Other addressing modes are emulated by instruction
1003
           sequences. This information could help to improve those code
1004
           generators which focuses only a few architectures.
1005
1006
   x86:    [reg+imm], -2^32+1 <= imm <= 2^32-1 (full address space on x86-32)
1007
           [reg+(reg<<imm)] is supported
1008
           [imm], -2^32+1 <= imm <= 2^32-1 is supported
1009
           Write-back is not supported
1010
   arm:    [reg+imm], -4095 <= imm <= 4095 or -255 <= imm <= 255 for signed
1011
                bytes, any halfs or floating point values)
1012
           [reg+(reg<<imm)] is supported
1013
           Write-back is supported
1014
   arm-t2: [reg+imm], -255 <= imm <= 4095
1015
           [reg+(reg<<imm)] is supported
1016
           Write back is supported only for [reg+imm], where -255 <= imm <= 255
1017
   arm64:  [reg+imm], -256 <= imm <= 255, 0 <= aligned imm <= 4095 * alignment
1018
           [reg+(reg<<imm)] is supported
1019
           Write back is supported only for [reg+imm], where -256 <= imm <= 255
1020
   ppc:    [reg+imm], -65536 <= imm <= 65535. 64 bit loads/stores and 32 bit
1021
                signed load on 64 bit requires immediates divisible by 4.
1022
                [reg+imm] is not supported for signed 8 bit values.
1023
           [reg+reg] is supported
1024
           Write-back is supported except for one instruction: 32 bit signed
1025
                load with [reg+imm] addressing mode on 64 bit.
1026
   mips:   [reg+imm], -65536 <= imm <= 65535
1027
           Write-back is not supported
1028
   riscv:  [reg+imm], -2048 <= imm <= 2047
1029
           Write-back is not supported
1030
   s390x:  [reg+imm], -2^19 <= imm < 2^19
1031
           [reg+reg] is supported
1032
           Write-back is not supported
1033
   loongarch:  [reg+imm], -2048 <= imm <= 2047
1034
           [reg+reg] is supported
1035
           Write-back is not supported
1036
*/
1037
1038
/* Macros for specifying operand types. */
1039
40.8G
#define SLJIT_MEM   0x80
1040
#define SLJIT_MEM0()    (SLJIT_MEM)
1041
21.2G
#define SLJIT_MEM1(r1)    (SLJIT_MEM | (r1))
1042
0
#define SLJIT_MEM2(r1, r2)  (SLJIT_MEM | (r1) | ((r2) << 8))
1043
51.0G
#define SLJIT_IMM   0x7f
1044
#define SLJIT_REG_PAIR(r1, r2)  ((r1) | ((r2) << 8))
1045
1046
/* Macros for checking operand types (only for valid arguments). */
1047
#define SLJIT_IS_REG(arg) ((arg) > 0 && (arg) < SLJIT_IMM)
1048
#define SLJIT_IS_MEM(arg) ((arg) & SLJIT_MEM)
1049
#define SLJIT_IS_MEM0(arg)  ((arg) == SLJIT_MEM)
1050
#define SLJIT_IS_MEM1(arg)  ((arg) > SLJIT_MEM && (arg) < (SLJIT_MEM << 1))
1051
#define SLJIT_IS_MEM2(arg)  (((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1))
1052
#define SLJIT_IS_IMM(arg) ((arg) == SLJIT_IMM)
1053
#define SLJIT_IS_REG_PAIR(arg)  (!((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1))
1054
1055
/* Macros for extracting registers from operands. */
1056
/* Support operands which contains a single register or
1057
   constructed using SLJIT_MEM1, SLJIT_MEM2, or SLJIT_REG_PAIR. */
1058
#define SLJIT_EXTRACT_REG(arg)    ((arg) & 0x7f)
1059
/* Support operands which constructed using SLJIT_MEM2, or SLJIT_REG_PAIR. */
1060
#define SLJIT_EXTRACT_SECOND_REG(arg) ((arg) >> 8)
1061
1062
/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on
1063
   32 bit CPUs. When this option is set for an arithmetic operation, only
1064
   the lower 32 bits of the input registers are used, and the CPU status
1065
   flags are set according to the 32 bit result. Although the higher 32 bit
1066
   of the input and the result registers are not defined by SLJIT, it might
1067
   be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU
1068
   requirements all source registers must be the result of those operations
1069
   where this option was also set. Memory loads read 32 bit values rather
1070
   than 64 bit ones. In other words 32 bit and 64 bit operations cannot be
1071
   mixed. The only exception is SLJIT_MOV32 which source register can hold
1072
   any 32 or 64 bit value, and it is converted to a 32 bit compatible format
1073
   first. When the source and destination registers are the same, this
1074
   conversion is free (no instructions are emitted) on most CPUs. A 32 bit
1075
   value can also be converted to a 64 bit value by SLJIT_MOV_S32
1076
   (sign extension) or SLJIT_MOV_U32 (zero extension).
1077
1078
   As for floating-point operations, this option sets 32 bit single
1079
   precision mode. Similar to the integer operations, all register arguments
1080
   must be the result of those operations where this option was also set.
1081
1082
   Note: memory addressing always uses 64 bit values on 64 bit systems so
1083
         the result of a 32 bit operation must not be used with SLJIT_MEMx
1084
         macros.
1085
1086
   This option is part of the instruction name, so there is no need to
1087
   manually set it. E.g:
1088
1089
     SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */
1090
20.6G
#define SLJIT_32    0x100
1091
1092
/* Many CPUs (x86, ARM, PPC) have status flag bits which can be set according
1093
   to the result of an operation. Other CPUs (MIPS) do not have status
1094
   flag bits, and results must be stored in registers. To cover both
1095
   architecture types efficiently only two flags are defined by SLJIT:
1096
1097
    * Zero (equal) flag: it is set if the result is zero
1098
    * Variable flag: its value is defined by the arithmetic operation
1099
1100
   SLJIT instructions can set any or both of these flags. The value of
1101
   these flags is undefined if the instruction does not specify their
1102
   value. The description of each instruction contains the list of
1103
   allowed flag types.
1104
1105
   Note: the logical or operation can be used to set flags.
1106
1107
   Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence
1108
1109
     sljit_op2(..., SLJIT_ADD, ...)
1110
       Both the zero and variable flags are undefined so they can
1111
       have any value after the operation is completed.
1112
1113
     sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...)
1114
       Sets the zero flag if the result is zero, clears it otherwise.
1115
       The variable flag is undefined.
1116
1117
     sljit_op2(..., SLJIT_ADD | SLJIT_SET_OVERFLOW, ...)
1118
       Sets the variable flag if an integer overflow occurs, clears
1119
       it otherwise. The zero flag is undefined.
1120
1121
     sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...)
1122
       Sets the zero flag if the result is zero, clears it otherwise.
1123
       Sets the variable flag if unsigned overflow (carry) occurs,
1124
       clears it otherwise.
1125
1126
   Certain instructions (e.g. SLJIT_MOV) does not modify flags, so
1127
   status flags are unchanged.
1128
1129
   Example:
1130
1131
     sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...)
1132
     sljit_op1(..., SLJIT_MOV, ...)
1133
       Zero flag is set according to the result of SLJIT_ADD.
1134
1135
     sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...)
1136
     sljit_op2(..., SLJIT_ADD, ...)
1137
       Zero flag has unknown value.
1138
1139
   These flags can be used for code optimization. E.g. a fast loop can be
1140
   implemented by decreasing a counter register and set the zero flag
1141
   using a single instruction. The zero register can be used by a
1142
   conditional jump to restart the loop. A single comparison can set a
1143
   zero and less flags to check if a value is less, equal, or greater
1144
   than another value.
1145
1146
   Motivation: although some CPUs can set a large number of flag bits,
1147
   usually their values are ignored or only a few of them are used. Emulating
1148
   a large number of flags on systems without a flag register is complicated
1149
   so SLJIT instructions must specify the flag they want to use and only
1150
   that flag is computed. The last arithmetic instruction can be repeated if
1151
   multiple flags need to be checked.
1152
*/
1153
1154
/* Set Zero status flag. */
1155
6.67G
#define SLJIT_SET_Z     0x0200
1156
/* Set the variable status flag if condition is true.
1157
   See comparison types (e.g. SLJIT_SET_LESS, SLJIT_SET_F_EQUAL). */
1158
44.5k
#define SLJIT_SET(condition)      ((condition) << 10)
1159
1160
/* Starting index of opcodes for sljit_emit_op0. */
1161
26.1M
#define SLJIT_OP0_BASE      0
1162
1163
/* Flags: - (does not modify flags)
1164
   Note: breakpoint instruction is not supported by all architectures (e.g. ppc)
1165
         It falls back to SLJIT_NOP in those cases. */
1166
0
#define SLJIT_BREAKPOINT    (SLJIT_OP0_BASE + 0)
1167
/* Flags: - (does not modify flags)
1168
   Note: may or may not cause an extra cycle wait
1169
         it can even decrease the runtime in a few cases. */
1170
0
#define SLJIT_NOP     (SLJIT_OP0_BASE + 1)
1171
/* Flags: - (may destroy flags)
1172
   Unsigned multiplication of SLJIT_R0 and SLJIT_R1.
1173
   Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */
1174
0
#define SLJIT_LMUL_UW     (SLJIT_OP0_BASE + 2)
1175
/* Flags: - (may destroy flags)
1176
   Signed multiplication of SLJIT_R0 and SLJIT_R1.
1177
   Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */
1178
0
#define SLJIT_LMUL_SW     (SLJIT_OP0_BASE + 3)
1179
/* Flags: - (may destroy flags)
1180
   Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
1181
   The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
1182
   Note: if SLJIT_R1 is 0, the behaviour is undefined. */
1183
0
#define SLJIT_DIVMOD_UW     (SLJIT_OP0_BASE + 4)
1184
#define SLJIT_DIVMOD_U32    (SLJIT_DIVMOD_UW | SLJIT_32)
1185
/* Flags: - (may destroy flags)
1186
   Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
1187
   The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
1188
   Note: if SLJIT_R1 is 0, the behaviour is undefined.
1189
   Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00),
1190
         the behaviour is undefined. */
1191
0
#define SLJIT_DIVMOD_SW     (SLJIT_OP0_BASE + 5)
1192
#define SLJIT_DIVMOD_S32    (SLJIT_DIVMOD_SW | SLJIT_32)
1193
/* Flags: - (may destroy flags)
1194
   Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
1195
   The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
1196
   Note: if SLJIT_R1 is 0, the behaviour is undefined. */
1197
0
#define SLJIT_DIV_UW      (SLJIT_OP0_BASE + 6)
1198
#define SLJIT_DIV_U32     (SLJIT_DIV_UW | SLJIT_32)
1199
/* Flags: - (may destroy flags)
1200
   Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
1201
   The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
1202
   Note: if SLJIT_R1 is 0, the behaviour is undefined.
1203
   Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00),
1204
         the behaviour is undefined. */
1205
0
#define SLJIT_DIV_SW      (SLJIT_OP0_BASE + 7)
1206
#define SLJIT_DIV_S32     (SLJIT_DIV_SW | SLJIT_32)
1207
/* Flags: - (does not modify flags)
1208
   May return with SLJIT_ERR_UNSUPPORTED if SLJIT_HAS_MEMORY_BARRIER
1209
   feature is not supported (calling sljit_has_cpu_feature() with
1210
   this feature option returns with 0). */
1211
0
#define SLJIT_MEMORY_BARRIER    (SLJIT_OP0_BASE + 8)
1212
/* Flags: - (does not modify flags)
1213
   ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64
1214
   when Intel Control-flow Enforcement Technology (CET) is enabled.
1215
   No instructions are emitted for other architectures. */
1216
24.9M
#define SLJIT_ENDBR     (SLJIT_OP0_BASE + 9)
1217
/* Flags: - (may destroy flags)
1218
   Skip stack frames before return when Intel Control-flow
1219
   Enforcement Technology (CET) is enabled. No instructions
1220
   are emitted for other architectures. */
1221
1.17M
#define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 10)
1222
1223
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op);
1224
1225
/* Starting index of opcodes for sljit_emit_op1. */
1226
15.9G
#define SLJIT_OP1_BASE      32
1227
1228
/* The MOV instruction transfers data from source to destination.
1229
1230
   MOV instruction suffixes:
1231
1232
   U8  - unsigned 8 bit data transfer
1233
   S8  - signed 8 bit data transfer
1234
   U16 - unsigned 16 bit data transfer
1235
   S16 - signed 16 bit data transfer
1236
   U32 - unsigned int (32 bit) data transfer
1237
   S32 - signed int (32 bit) data transfer
1238
   P   - pointer (sljit_up) data transfer
1239
*/
1240
1241
/* Flags: - (does not modify flags) */
1242
11.7G
#define SLJIT_MOV     (SLJIT_OP1_BASE + 0)
1243
/* Flags: - (does not modify flags) */
1244
288M
#define SLJIT_MOV_U8      (SLJIT_OP1_BASE + 1)
1245
#define SLJIT_MOV32_U8      (SLJIT_MOV_U8 | SLJIT_32)
1246
/* Flags: - (does not modify flags) */
1247
0
#define SLJIT_MOV_S8      (SLJIT_OP1_BASE + 2)
1248
#define SLJIT_MOV32_S8      (SLJIT_MOV_S8 | SLJIT_32)
1249
/* Flags: - (does not modify flags) */
1250
150M
#define SLJIT_MOV_U16     (SLJIT_OP1_BASE + 3)
1251
#define SLJIT_MOV32_U16     (SLJIT_MOV_U16 | SLJIT_32)
1252
/* Flags: - (does not modify flags) */
1253
0
#define SLJIT_MOV_S16     (SLJIT_OP1_BASE + 4)
1254
#define SLJIT_MOV32_S16     (SLJIT_MOV_S16 | SLJIT_32)
1255
/* Flags: - (does not modify flags)
1256
   Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */
1257
807M
#define SLJIT_MOV_U32     (SLJIT_OP1_BASE + 5)
1258
/* Flags: - (does not modify flags)
1259
   Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */
1260
203M
#define SLJIT_MOV_S32     (SLJIT_OP1_BASE + 6)
1261
/* Flags: - (does not modify flags) */
1262
0
#define SLJIT_MOV32     (SLJIT_OP1_BASE + 7)
1263
/* Flags: - (does not modify flags)
1264
   Note: loads a pointer sized data, useful on x32 mode (a 64 bit mode
1265
         on x86-64 which uses 32 bit pointers) or similar compiling modes */
1266
7.13G
#define SLJIT_MOV_P     (SLJIT_OP1_BASE + 8)
1267
/* Count leading zeroes
1268
   Flags: - (may destroy flags)
1269
   Note: immediate source argument is not supported */
1270
0
#define SLJIT_CLZ     (SLJIT_OP1_BASE + 9)
1271
#define SLJIT_CLZ32     (SLJIT_CLZ | SLJIT_32)
1272
/* Count trailing zeroes
1273
   Flags: - (may destroy flags)
1274
   Note: immediate source argument is not supported */
1275
0
#define SLJIT_CTZ     (SLJIT_OP1_BASE + 10)
1276
#define SLJIT_CTZ32     (SLJIT_CTZ | SLJIT_32)
1277
/* Reverse the order of bytes
1278
   Flags: - (may destroy flags)
1279
   Note: converts between little and big endian formats
1280
   Note: immediate source argument is not supported */
1281
0
#define SLJIT_REV     (SLJIT_OP1_BASE + 11)
1282
#define SLJIT_REV32     (SLJIT_REV | SLJIT_32)
1283
/* Reverse the order of bytes in the lower 16 bit and extend as unsigned
1284
   Flags: - (may destroy flags)
1285
   Note: converts between little and big endian formats
1286
   Note: immediate source argument is not supported */
1287
0
#define SLJIT_REV_U16     (SLJIT_OP1_BASE + 12)
1288
#define SLJIT_REV32_U16     (SLJIT_REV_U16 | SLJIT_32)
1289
/* Reverse the order of bytes in the lower 16 bit and extend as signed
1290
   Flags: - (may destroy flags)
1291
   Note: converts between little and big endian formats
1292
   Note: immediate source argument is not supported */
1293
0
#define SLJIT_REV_S16     (SLJIT_OP1_BASE + 13)
1294
#define SLJIT_REV32_S16     (SLJIT_REV_S16 | SLJIT_32)
1295
/* Reverse the order of bytes in the lower 32 bit and extend as unsigned
1296
   Flags: - (may destroy flags)
1297
   Note: converts between little and big endian formats
1298
   Note: immediate source argument is not supported */
1299
0
#define SLJIT_REV_U32     (SLJIT_OP1_BASE + 14)
1300
/* Reverse the order of bytes in the lower 32 bit and extend as signed
1301
   Flags: - (may destroy flags)
1302
   Note: converts between little and big endian formats
1303
   Note: immediate source argument is not supported */
1304
0
#define SLJIT_REV_S32     (SLJIT_OP1_BASE + 15)
1305
1306
/* The following unary operations are supported by using sljit_emit_op2:
1307
     - binary not: SLJIT_XOR with immedate -1 as src1 or src2
1308
     - negate: SLJIT_SUB with immedate 0 as src1
1309
   Note: these operations are optimized by the compiler if the
1310
     target CPU has specialized instruction forms for them. */
1311
1312
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1313
  sljit_s32 dst, sljit_sw dstw,
1314
  sljit_s32 src, sljit_sw srcw);
1315
1316
/* Starting index of opcodes for sljit_emit_op2. */
1317
9.27G
#define SLJIT_OP2_BASE      64
1318
1319
/* Flags: Z | OVERFLOW | CARRY */
1320
1.65G
#define SLJIT_ADD     (SLJIT_OP2_BASE + 0)
1321
#define SLJIT_ADD32     (SLJIT_ADD | SLJIT_32)
1322
/* Flags: CARRY */
1323
0
#define SLJIT_ADDC      (SLJIT_OP2_BASE + 1)
1324
#define SLJIT_ADDC32      (SLJIT_ADDC | SLJIT_32)
1325
/* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL
1326
          SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER
1327
          SIG_LESS_EQUAL | OVERFLOW | CARRY */
1328
10.5G
#define SLJIT_SUB     (SLJIT_OP2_BASE + 2)
1329
#define SLJIT_SUB32     (SLJIT_SUB | SLJIT_32)
1330
/* Flags: CARRY */
1331
0
#define SLJIT_SUBC      (SLJIT_OP2_BASE + 3)
1332
#define SLJIT_SUBC32      (SLJIT_SUBC | SLJIT_32)
1333
/* Note: integer mul
1334
   Flags: OVERFLOW */
1335
0
#define SLJIT_MUL     (SLJIT_OP2_BASE + 4)
1336
#define SLJIT_MUL32     (SLJIT_MUL | SLJIT_32)
1337
/* Flags: Z */
1338
131M
#define SLJIT_AND     (SLJIT_OP2_BASE + 5)
1339
#define SLJIT_AND32     (SLJIT_AND | SLJIT_32)
1340
/* Flags: Z */
1341
952M
#define SLJIT_OR      (SLJIT_OP2_BASE + 6)
1342
#define SLJIT_OR32      (SLJIT_OR | SLJIT_32)
1343
/* Flags: Z */
1344
443k
#define SLJIT_XOR     (SLJIT_OP2_BASE + 7)
1345
#define SLJIT_XOR32     (SLJIT_XOR | SLJIT_32)
1346
/* Flags: Z
1347
   Let bit_length be the length of the shift operation: 32 or 64.
1348
   If src2 is immediate, src2w is masked by (bit_length - 1).
1349
   Otherwise, if the content of src2 is outside the range from 0
1350
   to bit_length - 1, the result is undefined. */
1351
84.2M
#define SLJIT_SHL     (SLJIT_OP2_BASE + 8)
1352
#define SLJIT_SHL32     (SLJIT_SHL | SLJIT_32)
1353
/* Flags: Z
1354
   Same as SLJIT_SHL, except the the second operand is
1355
   always masked by the length of the shift operation. */
1356
84.2M
#define SLJIT_MSHL      (SLJIT_OP2_BASE + 9)
1357
#define SLJIT_MSHL32      (SLJIT_MSHL | SLJIT_32)
1358
/* Flags: Z
1359
   Let bit_length be the length of the shift operation: 32 or 64.
1360
   If src2 is immediate, src2w is masked by (bit_length - 1).
1361
   Otherwise, if the content of src2 is outside the range from 0
1362
   to bit_length - 1, the result is undefined. */
1363
21.0M
#define SLJIT_LSHR      (SLJIT_OP2_BASE + 10)
1364
#define SLJIT_LSHR32      (SLJIT_LSHR | SLJIT_32)
1365
/* Flags: Z
1366
   Same as SLJIT_LSHR, except the the second operand is
1367
   always masked by the length of the shift operation. */
1368
21.0M
#define SLJIT_MLSHR     (SLJIT_OP2_BASE + 11)
1369
#define SLJIT_MLSHR32     (SLJIT_MLSHR | SLJIT_32)
1370
/* Flags: Z
1371
   Let bit_length be the length of the shift operation: 32 or 64.
1372
   If src2 is immediate, src2w is masked by (bit_length - 1).
1373
   Otherwise, if the content of src2 is outside the range from 0
1374
   to bit_length - 1, the result is undefined. */
1375
391k
#define SLJIT_ASHR      (SLJIT_OP2_BASE + 12)
1376
#define SLJIT_ASHR32      (SLJIT_ASHR | SLJIT_32)
1377
/* Flags: Z
1378
   Same as SLJIT_ASHR, except the the second operand is
1379
   always masked by the length of the shift operation. */
1380
391k
#define SLJIT_MASHR     (SLJIT_OP2_BASE + 13)
1381
#define SLJIT_MASHR32     (SLJIT_MASHR | SLJIT_32)
1382
/* Flags: - (may destroy flags)
1383
   Let bit_length be the length of the rotate operation: 32 or 64.
1384
   The second operand is always masked by (bit_length - 1). */
1385
0
#define SLJIT_ROTL      (SLJIT_OP2_BASE + 14)
1386
#define SLJIT_ROTL32      (SLJIT_ROTL | SLJIT_32)
1387
/* Flags: - (may destroy flags)
1388
   Let bit_length be the length of the rotate operation: 32 or 64.
1389
   The second operand is always masked by (bit_length - 1). */
1390
0
#define SLJIT_ROTR      (SLJIT_OP2_BASE + 15)
1391
#define SLJIT_ROTR32      (SLJIT_ROTR | SLJIT_32)
1392
1393
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1394
  sljit_s32 dst, sljit_sw dstw,
1395
  sljit_s32 src1, sljit_sw src1w,
1396
  sljit_s32 src2, sljit_sw src2w);
1397
1398
/* The sljit_emit_op2u function is the same as sljit_emit_op2
1399
   except the result is discarded. */
1400
1401
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
1402
  sljit_s32 src1, sljit_sw src1w,
1403
  sljit_s32 src2, sljit_sw src2w);
1404
1405
/* Starting index of opcodes for sljit_emit_op2r. */
1406
#define SLJIT_OP2R_BASE     96
1407
1408
/* Flags: - (may destroy flags) */
1409
#define SLJIT_MULADD      (SLJIT_OP2R_BASE + 0)
1410
#define SLJIT_MULADD32      (SLJIT_MULADD | SLJIT_32)
1411
1412
/* Similar to sljit_emit_fop2, except the destination is always a register. */
1413
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
1414
  sljit_s32 dst_reg,
1415
  sljit_s32 src1, sljit_sw src1w,
1416
  sljit_s32 src2, sljit_sw src2w);
1417
1418
/* Emit a left or right shift operation, where the bits shifted
1419
   in comes from a separate source operand. All operands are
1420
   interpreted as unsigned integers.
1421
1422
   In the followings the value_mask variable is 31 for 32 bit
1423
     operations and word_size - 1 otherwise.
1424
1425
   op must be one of the following operations:
1426
     SLJIT_SHL or SLJIT_SHL32:
1427
       dst_reg = src1_reg << src3_reg
1428
       dst_reg |= ((src2_reg >> 1) >> (src3 ^ value_mask))
1429
     SLJIT_MSHL or SLJIT_MSHL32:
1430
       src3 &= value_mask
1431
       perform the SLJIT_SHL or SLJIT_SHL32 operation
1432
     SLJIT_LSHR or SLJIT_LSHR32:
1433
       dst_reg = src1_reg >> src3_reg
1434
       dst_reg |= ((src2_reg << 1) << (src3 ^ value_mask))
1435
     SLJIT_MLSHR or SLJIT_MLSHR32:
1436
       src3 &= value_mask
1437
       perform the SLJIT_LSHR or SLJIT_LSHR32 operation
1438
1439
   op can be combined (or'ed) with SLJIT_SHIFT_INTO_NON_ZERO
1440
1441
   dst_reg specifies the destination register, where dst_reg
1442
     and src2_reg cannot be the same registers
1443
   src1_reg specifies the source register
1444
   src2_reg specifies the register which is shifted into src1_reg
1445
   src3 / src3w contains the shift amount
1446
1447
   Note: a rotate operation is performed if src1_reg and
1448
         src2_reg are the same registers
1449
1450
   Flags: - (may destroy flags) */
1451
1452
/* The src3 operand contains a non-zero value. Improves
1453
   the generated code on certain architectures, which
1454
   provides a small performance improvement. */
1455
#define SLJIT_SHIFT_INTO_NON_ZERO 0x200
1456
1457
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
1458
  sljit_s32 dst_reg,
1459
  sljit_s32 src1_reg,
1460
  sljit_s32 src2_reg,
1461
  sljit_s32 src3, sljit_sw src3w);
1462
1463
/* Starting index of opcodes for sljit_emit_op_src
1464
   and sljit_emit_op_dst. */
1465
4.12M
#define SLJIT_OP_SRC_DST_BASE   112
1466
1467
/* Fast return, see SLJIT_FAST_CALL for more details.
1468
   Note: src cannot be an immedate value
1469
   Flags: - (does not modify flags) */
1470
1.33M
#define SLJIT_FAST_RETURN   (SLJIT_OP_SRC_DST_BASE + 0)
1471
/* Skip stack frames before fast return.
1472
   Note: src cannot be an immedate value
1473
   Flags: may destroy flags. */
1474
0
#define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN  (SLJIT_OP_SRC_DST_BASE + 1)
1475
/* Prefetch value into the level 1 data cache
1476
   Note: if the target CPU does not support data prefetch,
1477
         no instructions are emitted.
1478
   Note: this instruction never fails, even if the memory address is invalid.
1479
   Flags: - (does not modify flags) */
1480
0
#define SLJIT_PREFETCH_L1   (SLJIT_OP_SRC_DST_BASE + 2)
1481
/* Prefetch value into the level 2 data cache
1482
   Note: same as SLJIT_PREFETCH_L1 if the target CPU
1483
         does not support this instruction form.
1484
   Note: this instruction never fails, even if the memory address is invalid.
1485
   Flags: - (does not modify flags) */
1486
0
#define SLJIT_PREFETCH_L2   (SLJIT_OP_SRC_DST_BASE + 3)
1487
/* Prefetch value into the level 3 data cache
1488
   Note: same as SLJIT_PREFETCH_L2 if the target CPU
1489
         does not support this instruction form.
1490
   Note: this instruction never fails, even if the memory address is invalid.
1491
   Flags: - (does not modify flags) */
1492
0
#define SLJIT_PREFETCH_L3   (SLJIT_OP_SRC_DST_BASE + 4)
1493
/* Prefetch a value which is only used once (and can be discarded afterwards)
1494
   Note: same as SLJIT_PREFETCH_L1 if the target CPU
1495
         does not support this instruction form.
1496
   Note: this instruction never fails, even if the memory address is invalid.
1497
   Flags: - (does not modify flags) */
1498
0
#define SLJIT_PREFETCH_ONCE   (SLJIT_OP_SRC_DST_BASE + 5)
1499
1500
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
1501
  sljit_s32 src, sljit_sw srcw);
1502
1503
/* Fast enter, see SLJIT_FAST_CALL for more details.
1504
   Flags: - (does not modify flags) */
1505
2.78M
#define SLJIT_FAST_ENTER    (SLJIT_OP_SRC_DST_BASE + 6)
1506
1507
/* Copies the return address into dst. The return address is the
1508
   address where the execution continues after the called function
1509
   returns (see: sljit_emit_return / sljit_emit_return_void).
1510
   Flags: - (does not modify flags) */
1511
0
#define SLJIT_GET_RETURN_ADDRESS  (SLJIT_OP_SRC_DST_BASE + 7)
1512
1513
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
1514
  sljit_s32 dst, sljit_sw dstw);
1515
1516
/* Starting index of opcodes for sljit_emit_fop1. */
1517
586k
#define SLJIT_FOP1_BASE     144
1518
1519
/* Flags: - (does not modify flags) */
1520
586k
#define SLJIT_MOV_F64     (SLJIT_FOP1_BASE + 0)
1521
#define SLJIT_MOV_F32     (SLJIT_MOV_F64 | SLJIT_32)
1522
/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE]
1523
   SRC/DST TYPE can be: F64, F32, S32, SW
1524
   Rounding mode when the destination is SW or S32: round towards zero. */
1525
/* Flags: - (may destroy flags) */
1526
0
#define SLJIT_CONV_F64_FROM_F32   (SLJIT_FOP1_BASE + 1)
1527
#define SLJIT_CONV_F32_FROM_F64   (SLJIT_CONV_F64_FROM_F32 | SLJIT_32)
1528
/* Flags: - (may destroy flags) */
1529
0
#define SLJIT_CONV_SW_FROM_F64    (SLJIT_FOP1_BASE + 2)
1530
#define SLJIT_CONV_SW_FROM_F32    (SLJIT_CONV_SW_FROM_F64 | SLJIT_32)
1531
/* Flags: - (may destroy flags) */
1532
0
#define SLJIT_CONV_S32_FROM_F64   (SLJIT_FOP1_BASE + 3)
1533
#define SLJIT_CONV_S32_FROM_F32   (SLJIT_CONV_S32_FROM_F64 | SLJIT_32)
1534
/* Flags: - (may destroy flags) */
1535
0
#define SLJIT_CONV_F64_FROM_SW    (SLJIT_FOP1_BASE + 4)
1536
#define SLJIT_CONV_F32_FROM_SW    (SLJIT_CONV_F64_FROM_SW | SLJIT_32)
1537
/* Flags: - (may destroy flags) */
1538
0
#define SLJIT_CONV_F64_FROM_S32   (SLJIT_FOP1_BASE + 5)
1539
#define SLJIT_CONV_F32_FROM_S32   (SLJIT_CONV_F64_FROM_S32 | SLJIT_32)
1540
/* Flags: - (may destroy flags) */
1541
#define SLJIT_CONV_F64_FROM_UW    (SLJIT_FOP1_BASE + 6)
1542
#define SLJIT_CONV_F32_FROM_UW    (SLJIT_CONV_F64_FROM_UW | SLJIT_32)
1543
/* Flags: - (may destroy flags) */
1544
0
#define SLJIT_CONV_F64_FROM_U32   (SLJIT_FOP1_BASE + 7)
1545
#define SLJIT_CONV_F32_FROM_U32   (SLJIT_CONV_F64_FROM_U32 | SLJIT_32)
1546
/* Note: dst is the left and src is the right operand for SLJIT_CMP_F32/64.
1547
   Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */
1548
0
#define SLJIT_CMP_F64     (SLJIT_FOP1_BASE + 8)
1549
#define SLJIT_CMP_F32     (SLJIT_CMP_F64 | SLJIT_32)
1550
/* Flags: - (may destroy flags) */
1551
0
#define SLJIT_NEG_F64     (SLJIT_FOP1_BASE + 9)
1552
#define SLJIT_NEG_F32     (SLJIT_NEG_F64 | SLJIT_32)
1553
/* Flags: - (may destroy flags) */
1554
0
#define SLJIT_ABS_F64     (SLJIT_FOP1_BASE + 10)
1555
#define SLJIT_ABS_F32     (SLJIT_ABS_F64 | SLJIT_32)
1556
1557
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1558
  sljit_s32 dst, sljit_sw dstw,
1559
  sljit_s32 src, sljit_sw srcw);
1560
1561
/* Starting index of opcodes for sljit_emit_fop2. */
1562
#define SLJIT_FOP2_BASE     176
1563
1564
/* Flags: - (may destroy flags) */
1565
#define SLJIT_ADD_F64     (SLJIT_FOP2_BASE + 0)
1566
#define SLJIT_ADD_F32     (SLJIT_ADD_F64 | SLJIT_32)
1567
/* Flags: - (may destroy flags) */
1568
#define SLJIT_SUB_F64     (SLJIT_FOP2_BASE + 1)
1569
#define SLJIT_SUB_F32     (SLJIT_SUB_F64 | SLJIT_32)
1570
/* Flags: - (may destroy flags) */
1571
#define SLJIT_MUL_F64     (SLJIT_FOP2_BASE + 2)
1572
#define SLJIT_MUL_F32     (SLJIT_MUL_F64 | SLJIT_32)
1573
/* Flags: - (may destroy flags) */
1574
#define SLJIT_DIV_F64     (SLJIT_FOP2_BASE + 3)
1575
#define SLJIT_DIV_F32     (SLJIT_DIV_F64 | SLJIT_32)
1576
1577
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1578
  sljit_s32 dst, sljit_sw dstw,
1579
  sljit_s32 src1, sljit_sw src1w,
1580
  sljit_s32 src2, sljit_sw src2w);
1581
1582
/* Starting index of opcodes for sljit_emit_fop2r. */
1583
#define SLJIT_FOP2R_BASE    192
1584
1585
/* Flags: - (may destroy flags) */
1586
#define SLJIT_COPYSIGN_F64    (SLJIT_FOP2R_BASE + 0)
1587
#define SLJIT_COPYSIGN_F32    (SLJIT_COPYSIGN_F64 | SLJIT_32)
1588
1589
/* Similar to sljit_emit_fop2, except the destination is always a register. */
1590
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
1591
  sljit_s32 dst_freg,
1592
  sljit_s32 src1, sljit_sw src1w,
1593
  sljit_s32 src2, sljit_sw src2w);
1594
1595
/* Sets a floating point register to an immediate value. */
1596
1597
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
1598
  sljit_s32 freg, sljit_f32 value);
1599
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
1600
  sljit_s32 freg, sljit_f64 value);
1601
1602
/* The following opcodes are used by sljit_emit_fcopy(). */
1603
1604
/* 64 bit: copy a 64 bit value from an integer register into a
1605
           64 bit floating point register without any modifications.
1606
   32 bit: copy a 32 bit register or register pair into a 64 bit
1607
           floating point register without any modifications. The
1608
           register, or the first register of the register pair
1609
           replaces the high order 32 bit of the floating point
1610
           register. If a register pair is passed, the low
1611
           order 32 bit is replaced by the second register.
1612
           Otherwise, the low order 32 bit is unchanged. */
1613
#define SLJIT_COPY_TO_F64   1
1614
/* Copy a 32 bit value from an integer register into a 32 bit
1615
   floating point register without any modifications. */
1616
#define SLJIT_COPY32_TO_F32   (SLJIT_COPY_TO_F64 | SLJIT_32)
1617
/* 64 bit: copy the value of a 64 bit floating point register into
1618
           an integer register without any modifications.
1619
   32 bit: copy a 64 bit floating point register into a 32 bit register
1620
           or a 32 bit register pair without any modifications. The
1621
           high order 32 bit of the floating point register is copied
1622
           into the register, or the first register of the register
1623
           pair. If a register pair is passed, the low order 32 bit
1624
           is copied into the second register. */
1625
#define SLJIT_COPY_FROM_F64   2
1626
/* Copy the value of a 32 bit floating point register into an integer
1627
   register without any modifications. The register should be processed
1628
   with 32 bit operations later. */
1629
#define SLJIT_COPY32_FROM_F32   (SLJIT_COPY_FROM_F64 | SLJIT_32)
1630
1631
/* Special data copy which involves floating point registers.
1632
1633
  op must be between SLJIT_COPY_TO_F64 and SLJIT_COPY32_FROM_F32
1634
  freg must be a floating point register
1635
  reg must be a register or register pair */
1636
1637
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
1638
  sljit_s32 freg, sljit_s32 reg);
1639
1640
/* Label and jump instructions. */
1641
1642
/* Emits a label which can be the target of jump / mov_addr instructions. */
1643
1644
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler);
1645
1646
/* Alignment values for sljit_emit_aligned_label. */
1647
1648
#define SLJIT_LABEL_ALIGN_1 0
1649
#define SLJIT_LABEL_ALIGN_2 1
1650
#define SLJIT_LABEL_ALIGN_4 2
1651
#define SLJIT_LABEL_ALIGN_8 3
1652
#define SLJIT_LABEL_ALIGN_16  4
1653
#define SLJIT_LABEL_ALIGN_W SLJIT_WORD_SHIFT
1654
#define SLJIT_LABEL_ALIGN_P SLJIT_POINTER_SHIFT
1655
1656
/* Emits a label which address is aligned to a power of 2 value. When some
1657
   extra space needs to be added to align the label, that space is filled
1658
   with SLJIT_NOP instructions. These labels usually represent the end of a
1659
   compilation block, and a new function or some read-only data (e.g. a
1660
   jump table) follows it. In these typical cases the SLJIT_NOPs are never
1661
   executed.
1662
1663
   Optionally, buffers for storing read-only data or code can be allocated
1664
   by this operation. The buffers are passed as a chain list, and a separate
1665
   memory area is allocated for each item in the list. All buffers are aligned
1666
   to SLJIT_NOP instruction size, and their starting address is returned as
1667
   as a label. The sljit_get_label_abs_addr function or the SLJIT_MOV_ABS_ADDR
1668
   operation can be used to get the real address. The label of the first buffer
1669
   is always the same as the returned label. The buffers are initially
1670
   initialized with SLJIT_NOP instructions. The alignment of the buffers can
1671
   be controlled by their starting address and sizes. If the starting address
1672
   is aligned to N, and size is also divisible by N, the next buffer is aligned
1673
   to N. I.e. if a buffer is 16 byte aligned, and its size is divisible by 4,
1674
   the next buffer is 4 byte aligned. Note: if a buffer is N (>=2) byte aligned,
1675
   it is also N/2 byte aligned.
1676
1677
   align represents the alignment, and its value can
1678
         be specified by SLJIT_LABEL_* constants
1679
1680
   buffers is a list of read-only buffers stored in a chain list.
1681
           After calling sljit_generate_code, these buffers can be
1682
           modified by sljit_read_only_buffer_start_writing() /
1683
           sljit_read_only_buffer_end_writing() functions
1684
1685
   Note: the constant pool (if present) may be stored before the label. */
1686
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,
1687
  sljit_s32 alignment, struct sljit_read_only_buffer *buffers);
1688
1689
/* The SLJIT_FAST_CALL is a calling method for creating lightweight function
1690
   calls. This type of calls preserve the values of all registers and stack
1691
   frame. Unlike normal function calls, the enter and return operations must
1692
   be performed by the SLJIT_FAST_ENTER and SLJIT_FAST_RETURN operations
1693
   respectively. The return address is stored in the dst argument of the
1694
   SLJIT_FAST_ENTER operation, and this return address should be passed as
1695
   the src argument for the SLJIT_FAST_RETURN operation to return from the
1696
   called function.
1697
1698
   Fast calls are cheap operations (usually only a single call instruction is
1699
   emitted) but they do not preserve any registers. However the callee function
1700
   can freely use / update any registers and the locals area which can be
1701
   efficiently exploited by various optimizations. Registers can be saved
1702
   and restored manually if needed.
1703
1704
   Although returning to different address by SLJIT_FAST_RETURN is possible,
1705
   this address usually cannot be predicted by the return address predictor of
1706
   modern CPUs which may reduce performance. Furthermore certain security
1707
   enhancement technologies such as Intel Control-flow Enforcement Technology
1708
   (CET) may disallow returning to a different address (indirect jumps
1709
   can be used instead, see SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN). */
1710
1711
/* Invert (negate) conditional type: xor (^) with 0x1 */
1712
1713
/* Integer comparison types. */
1714
1.23G
#define SLJIT_EQUAL     0
1715
#define SLJIT_ZERO      SLJIT_EQUAL
1716
3.17G
#define SLJIT_NOT_EQUAL     1
1717
2.09G
#define SLJIT_NOT_ZERO      SLJIT_NOT_EQUAL
1718
1719
378M
#define SLJIT_LESS      2
1720
#define SLJIT_SET_LESS      SLJIT_SET(SLJIT_LESS)
1721
164M
#define SLJIT_GREATER_EQUAL   3
1722
#define SLJIT_SET_GREATER_EQUAL   SLJIT_SET(SLJIT_LESS)
1723
722M
#define SLJIT_GREATER     4
1724
#define SLJIT_SET_GREATER   SLJIT_SET(SLJIT_GREATER)
1725
75.6M
#define SLJIT_LESS_EQUAL    5
1726
#define SLJIT_SET_LESS_EQUAL    SLJIT_SET(SLJIT_GREATER)
1727
0
#define SLJIT_SIG_LESS      6
1728
#define SLJIT_SET_SIG_LESS    SLJIT_SET(SLJIT_SIG_LESS)
1729
0
#define SLJIT_SIG_GREATER_EQUAL   7
1730
#define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_LESS)
1731
99.6M
#define SLJIT_SIG_GREATER   8
1732
#define SLJIT_SET_SIG_GREATER   SLJIT_SET(SLJIT_SIG_GREATER)
1733
44.5k
#define SLJIT_SIG_LESS_EQUAL    9
1734
44.5k
#define SLJIT_SET_SIG_LESS_EQUAL  SLJIT_SET(SLJIT_SIG_GREATER)
1735
1736
0
#define SLJIT_OVERFLOW      10
1737
#define SLJIT_SET_OVERFLOW    SLJIT_SET(SLJIT_OVERFLOW)
1738
0
#define SLJIT_NOT_OVERFLOW    11
1739
1740
/* Unlike other flags, sljit_emit_jump may destroy the carry flag. */
1741
378M
#define SLJIT_CARRY     12
1742
#define SLJIT_SET_CARRY     SLJIT_SET(SLJIT_CARRY)
1743
164M
#define SLJIT_NOT_CARRY     13
1744
1745
1.22G
#define SLJIT_ATOMIC_STORED   14
1746
#define SLJIT_SET_ATOMIC_STORED   SLJIT_SET(SLJIT_ATOMIC_STORED)
1747
1.08G
#define SLJIT_ATOMIC_NOT_STORED   15
1748
1749
/* Basic floating point comparison types.
1750
1751
   Note: when the comparison result is unordered, their behaviour is unspecified. */
1752
1753
1.22G
#define SLJIT_F_EQUAL       16
1754
#define SLJIT_SET_F_EQUAL     SLJIT_SET(SLJIT_F_EQUAL)
1755
1.08G
#define SLJIT_F_NOT_EQUAL     17
1756
#define SLJIT_SET_F_NOT_EQUAL     SLJIT_SET(SLJIT_F_EQUAL)
1757
378M
#define SLJIT_F_LESS        18
1758
#define SLJIT_SET_F_LESS      SLJIT_SET(SLJIT_F_LESS)
1759
164M
#define SLJIT_F_GREATER_EQUAL     19
1760
#define SLJIT_SET_F_GREATER_EQUAL   SLJIT_SET(SLJIT_F_LESS)
1761
722M
#define SLJIT_F_GREATER       20
1762
#define SLJIT_SET_F_GREATER     SLJIT_SET(SLJIT_F_GREATER)
1763
75.6M
#define SLJIT_F_LESS_EQUAL      21
1764
#define SLJIT_SET_F_LESS_EQUAL      SLJIT_SET(SLJIT_F_GREATER)
1765
1766
/* Jumps when either argument contains a NaN value. */
1767
0
#define SLJIT_UNORDERED       22
1768
#define SLJIT_SET_UNORDERED     SLJIT_SET(SLJIT_UNORDERED)
1769
/* Jumps when neither argument contains a NaN value. */
1770
0
#define SLJIT_ORDERED       23
1771
#define SLJIT_SET_ORDERED     SLJIT_SET(SLJIT_UNORDERED)
1772
1773
/* Ordered / unordered floating point comparison types.
1774
1775
   Note: each comparison type has an ordered and unordered form. Some
1776
         architectures supports only either of them (see: sljit_cmp_info). */
1777
1778
0
#define SLJIT_ORDERED_EQUAL     24
1779
#define SLJIT_SET_ORDERED_EQUAL     SLJIT_SET(SLJIT_ORDERED_EQUAL)
1780
0
#define SLJIT_UNORDERED_OR_NOT_EQUAL    25
1781
#define SLJIT_SET_UNORDERED_OR_NOT_EQUAL  SLJIT_SET(SLJIT_ORDERED_EQUAL)
1782
722M
#define SLJIT_ORDERED_LESS      26
1783
#define SLJIT_SET_ORDERED_LESS      SLJIT_SET(SLJIT_ORDERED_LESS)
1784
75.6M
#define SLJIT_UNORDERED_OR_GREATER_EQUAL  27
1785
#define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL  SLJIT_SET(SLJIT_ORDERED_LESS)
1786
722M
#define SLJIT_ORDERED_GREATER     28
1787
#define SLJIT_SET_ORDERED_GREATER   SLJIT_SET(SLJIT_ORDERED_GREATER)
1788
75.6M
#define SLJIT_UNORDERED_OR_LESS_EQUAL   29
1789
#define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER)
1790
1791
1.22G
#define SLJIT_UNORDERED_OR_EQUAL    30
1792
#define SLJIT_SET_UNORDERED_OR_EQUAL    SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL)
1793
1.08G
#define SLJIT_ORDERED_NOT_EQUAL     31
1794
#define SLJIT_SET_ORDERED_NOT_EQUAL   SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL)
1795
378M
#define SLJIT_UNORDERED_OR_LESS     32
1796
#define SLJIT_SET_UNORDERED_OR_LESS   SLJIT_SET(SLJIT_UNORDERED_OR_LESS)
1797
164M
#define SLJIT_ORDERED_GREATER_EQUAL   33
1798
#define SLJIT_SET_ORDERED_GREATER_EQUAL   SLJIT_SET(SLJIT_UNORDERED_OR_LESS)
1799
378M
#define SLJIT_UNORDERED_OR_GREATER    34
1800
#define SLJIT_SET_UNORDERED_OR_GREATER    SLJIT_SET(SLJIT_UNORDERED_OR_GREATER)
1801
164M
#define SLJIT_ORDERED_LESS_EQUAL    35
1802
#define SLJIT_SET_ORDERED_LESS_EQUAL    SLJIT_SET(SLJIT_UNORDERED_OR_GREATER)
1803
1804
/* Unconditional jump types. */
1805
18.0G
#define SLJIT_JUMP      36
1806
/* Fast calling method. See the description above. */
1807
#define SLJIT_FAST_CALL     37
1808
/* Default C calling convention. */
1809
209M
#define SLJIT_CALL      38
1810
/* Called function must be compiled by SLJIT.
1811
   See SLJIT_ENTER_REG_ARG option. */
1812
109M
#define SLJIT_CALL_REG_ARG    39
1813
1814
/* The target can be changed during runtime (see: sljit_set_jump_addr). */
1815
14.2G
#define SLJIT_REWRITABLE_JUMP   0x10000
1816
/* When this flag is passed, the execution of the current function ends and
1817
   the called function returns to the caller of the current function. The
1818
   stack usage is reduced before the call, but it is not necessarily reduced
1819
   to zero. In the latter case the compiler needs to allocate space for some
1820
   arguments and the return address must be stored on the stack as well. */
1821
219M
#define SLJIT_CALL_RETURN   0x20000
1822
1823
/* Emit a jump instruction. The destination is not set, only the type of the jump.
1824
    type must be between SLJIT_EQUAL and SLJIT_FAST_CALL
1825
    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
1826
1827
   Flags: does not modify flags. */
1828
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type);
1829
1830
/* Emit a C compiler (ABI) compatible function call.
1831
    type must be SLJIT_CALL or SLJIT_CALL_REG_ARG
1832
    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and/or SLJIT_CALL_RETURN
1833
    arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros
1834
1835
   Flags: destroy all flags. */
1836
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types);
1837
1838
/* Integer comparison operation. In most architectures it is implemented
1839
   as a compare (sljit_emit_op2u with SLJIT_SUB) operation followed by
1840
   an sljit_emit_jump. However, some architectures (e.g: ARM64 or RISCV)
1841
   may optimize the generated code further. It is suggested to use this
1842
   comparison form when appropriate.
1843
    type must be between SLJIT_EQUAL and SLJIT_SIG_LESS_EQUAL
1844
    type can be combined (or'ed) with SLJIT_32 or SLJIT_REWRITABLE_JUMP
1845
1846
   Flags: may destroy flags. */
1847
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
1848
  sljit_s32 src1, sljit_sw src1w,
1849
  sljit_s32 src2, sljit_sw src2w);
1850
1851
/* Floating point comparison operation. In most architectures it is
1852
   implemented as a SLJIT_CMP_F32/64 operation (setting appropriate
1853
   flags) followed by a sljit_emit_jump. However, some architectures
1854
   (e.g: MIPS) may optimize the generated code further. It is suggested
1855
   to use this comparison form when appropriate.
1856
    type must be between SLJIT_F_EQUAL and SLJIT_ORDERED_LESS_EQUAL
1857
    type can be combined (or'ed) with SLJIT_32 or SLJIT_REWRITABLE_JUMP
1858
1859
   Flags: destroy flags.
1860
   Note: when any operand is NaN the behaviour depends on the comparison type. */
1861
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type,
1862
  sljit_s32 src1, sljit_sw src1w,
1863
  sljit_s32 src2, sljit_sw src2w);
1864
1865
/* The following flags are used by sljit_emit_op2cmpz(). */
1866
#define SLJIT_JUMP_IF_NON_ZERO    0
1867
#define SLJIT_JUMP_IF_ZERO    SLJIT_SET_Z
1868
1869
/* Perform an integer arithmetic operation, then its result is compared to
1870
   zero. In most architectures it is implemented as an sljit_emit_op2
1871
   followed by an sljit_emit_jump. However, some architectures (e.g: RISCV)
1872
   may optimize the generated code further. It is suggested to use this
1873
   operation form when appropriate (e.g. for loops with counters).
1874
1875
   op must be an sljit_emit_op2 operation where zero flag can be set,
1876
   op can be combined with SLJIT_SET_* status flag setters except
1877
     SLJIT_SET_Z, SLJIT_REWRITABLE_JUMP or SLJIT_JUMP_IF_* option bits.
1878
1879
   Note: SLJIT_JUMP_IF_NON_ZERO is the default operation if neither
1880
      SLJIT_JUMP_IF_ZERO or SLJIT_JUMP_IF_NON_ZERO is specified.
1881
   Flags: sets the variable flag depending on op argument, the
1882
      zero flag is undefined. */
1883
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op2cmpz(struct sljit_compiler *compiler, sljit_s32 op,
1884
  sljit_s32 dst, sljit_sw dstw,
1885
  sljit_s32 src1, sljit_sw src1w,
1886
  sljit_s32 src2, sljit_sw src2w);
1887
1888
/* Set the destination of the jump to this label. */
1889
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label);
1890
/* Set the destination address of the jump to this label. */
1891
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
1892
1893
/* Emit an indirect jump or fast call.
1894
   Direct form: set src to SLJIT_IMM() and srcw to the address
1895
   Indirect form: any other valid addressing mode
1896
    type must be between SLJIT_JUMP and SLJIT_FAST_CALL
1897
1898
   Flags: does not modify flags. */
1899
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw);
1900
1901
/* Emit a C compiler (ABI) compatible function call.
1902
   Direct form: set src to SLJIT_IMM() and srcw to the address
1903
   Indirect form: any other valid addressing mode
1904
    type must be SLJIT_CALL or SLJIT_CALL_REG_ARG
1905
    type can be combined (or'ed) with SLJIT_CALL_RETURN
1906
    arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros
1907
1908
   Flags: destroy all flags. */
1909
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw);
1910
1911
/* Perform an operation using the conditional flags as the second argument.
1912
   Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL.
1913
   The value represented by the type is 1, if the condition represented
1914
   by the type is fulfilled, and 0 otherwise.
1915
1916
   When op is SLJIT_MOV or SLJIT_MOV32:
1917
     Set dst to the value represented by the type (0 or 1).
1918
     Flags: - (does not modify flags)
1919
   When op is SLJIT_AND, SLJIT_AND32, SLJIT_OR, SLJIT_OR32, SLJIT_XOR, or SLJIT_XOR32
1920
     Performs the binary operation using dst as the first, and the value
1921
     represented by type as the second argument. Result is written into dst.
1922
     Flags: Z (may destroy flags) */
1923
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
1924
  sljit_s32 dst, sljit_sw dstw,
1925
  sljit_s32 type);
1926
1927
/* Emit a conditional select instruction which moves src1 to dst_reg,
1928
   if the condition is satisfied, or src2_reg to dst_reg otherwise.
1929
1930
   type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL
1931
   type can be combined (or'ed) with SLJIT_32 to move 32 bit
1932
       register values instead of word sized ones
1933
   dst_reg and src2_reg must be valid registers
1934
   src1 must be valid operand
1935
1936
   Note: if src1 is a memory operand, its value
1937
         might be loaded even if the condition is false.
1938
1939
   Flags: - (does not modify flags) */
1940
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
1941
  sljit_s32 dst_reg,
1942
  sljit_s32 src1, sljit_sw src1w,
1943
  sljit_s32 src2_reg);
1944
1945
/* Emit a conditional floating point select instruction which moves
1946
   src1 to dst_reg, if the condition is satisfied, or src2_reg to
1947
   dst_reg otherwise.
1948
1949
   type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL
1950
   type can be combined (or'ed) with SLJIT_32 to move 32 bit
1951
       floating point values instead of 64 bit ones
1952
   dst_freg and src2_freg must be valid floating point registers
1953
   src1 must be valid operand
1954
1955
   Note: if src1 is a memory operand, its value
1956
         might be loaded even if the condition is false.
1957
1958
   Flags: - (does not modify flags) */
1959
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
1960
  sljit_s32 dst_freg,
1961
  sljit_s32 src1, sljit_sw src1w,
1962
  sljit_s32 src2_freg);
1963
1964
/* The following flags are used by sljit_emit_mem(), sljit_emit_mem_update(),
1965
   sljit_emit_fmem(), and sljit_emit_fmem_update(). */
1966
1967
/* Memory load operation. This is the default. */
1968
#define SLJIT_MEM_LOAD    0x000000
1969
/* Memory store operation. */
1970
28.6k
#define SLJIT_MEM_STORE   0x000200
1971
1972
/* The following flags are used by sljit_emit_mem() and sljit_emit_fmem(). */
1973
1974
/* Load or stora data from an unaligned (byte aligned) address. */
1975
#define SLJIT_MEM_UNALIGNED 0x000400
1976
/* Load or stora data from a 16 bit aligned address. */
1977
#define SLJIT_MEM_ALIGNED_16  0x000800
1978
/* Load or stora data from a 32 bit aligned address. */
1979
#define SLJIT_MEM_ALIGNED_32  0x001000
1980
1981
/* The following flags are used by sljit_emit_mem_update(),
1982
   and sljit_emit_fmem_update(). */
1983
1984
/* Base register is updated before the memory access (default). */
1985
1.48M
#define SLJIT_MEM_PRE   0x000000
1986
/* Base register is updated after the memory access. */
1987
33.3k
#define SLJIT_MEM_POST    0x000400
1988
1989
/* When SLJIT_MEM_SUPP is passed, no instructions are emitted.
1990
   Instead the function returns with SLJIT_SUCCESS if the instruction
1991
   form is supported and SLJIT_ERR_UNSUPPORTED otherwise. This flag
1992
   allows runtime checking of available instruction forms. */
1993
1.52M
#define SLJIT_MEM_SUPP    0x000800
1994
1995
/* The sljit_emit_mem emits instructions for various memory operations:
1996
1997
   When SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_16 /
1998
        SLJIT_MEM_ALIGNED_32 is set in type argument:
1999
     Emit instructions for unaligned memory loads or stores. When
2000
     SLJIT_UNALIGNED is not defined, the only way to access unaligned
2001
     memory data is using sljit_emit_mem. Otherwise all operations (e.g.
2002
     sljit_emit_op1/2, or sljit_emit_fop1/2) supports unaligned access.
2003
     In general, the performance of unaligned memory accesses are often
2004
     lower than aligned and should be avoided.
2005
2006
   When a pair of registers is passed in reg argument:
2007
     Emit instructions for moving data between a register pair and
2008
     memory. The register pair can be specified by the SLJIT_REG_PAIR
2009
     macro. The first register is loaded from or stored into the
2010
     location specified by the mem/memw arguments, and the end address
2011
     of this operation is the starting address of the data transfer
2012
     between the second register and memory. The type argument must
2013
     be SLJIT_MOV. The SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_*
2014
     options are allowed for this operation.
2015
2016
   type must be between SLJIT_MOV and SLJIT_MOV_P and can be
2017
     combined (or'ed) with SLJIT_MEM_* flags
2018
   reg is a register or register pair, which is the source or
2019
     destination of the operation
2020
   mem must be a memory operand
2021
2022
   Flags: - (does not modify flags) */
2023
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
2024
  sljit_s32 reg,
2025
  sljit_s32 mem, sljit_sw memw);
2026
2027
/* Emit a single memory load or store with update instruction.
2028
   When the requested instruction form is not supported by the CPU,
2029
   it returns with SLJIT_ERR_UNSUPPORTED instead of emulating the
2030
   instruction. This allows specializing tight loops based on
2031
   the supported instruction forms (see SLJIT_MEM_SUPP flag).
2032
   Absolute address (SLJIT_MEM0) forms are never supported
2033
   and the base (first) register specified by the mem argument
2034
   must not be SLJIT_SP and must also be different from the
2035
   register specified by the reg argument.
2036
2037
   type must be between SLJIT_MOV and SLJIT_MOV_P and can be
2038
     combined (or'ed) with SLJIT_MEM_* flags
2039
   reg is the source or destination register of the operation
2040
   mem must be a memory operand
2041
2042
   Flags: - (does not modify flags) */
2043
2044
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
2045
  sljit_s32 reg,
2046
  sljit_s32 mem, sljit_sw memw);
2047
2048
/* Same as sljit_emit_mem except the followings:
2049
2050
   Loading or storing a pair of registers is not supported.
2051
2052
   type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be
2053
     combined (or'ed) with SLJIT_MEM_* flags.
2054
   freg is the source or destination floating point register
2055
     of the operation
2056
   mem must be a memory operand
2057
2058
   Flags: - (does not modify flags) */
2059
2060
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
2061
  sljit_s32 freg,
2062
  sljit_s32 mem, sljit_sw memw);
2063
2064
/* Same as sljit_emit_mem_update except the followings:
2065
2066
   type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be
2067
     combined (or'ed) with SLJIT_MEM_* flags
2068
   freg is the source or destination floating point register
2069
     of the operation
2070
   mem must be a memory operand
2071
2072
   Flags: - (does not modify flags) */
2073
2074
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type,
2075
  sljit_s32 freg,
2076
  sljit_s32 mem, sljit_sw memw);
2077
2078
/* The following options are used by several simd operations. */
2079
2080
/* Load data into a vector register, this is the default */
2081
#define SLJIT_SIMD_LOAD     0x000000
2082
/* Store data from a vector register */
2083
2.16M
#define SLJIT_SIMD_STORE    0x000001
2084
/* The vector register contains floating point values */
2085
4.71M
#define SLJIT_SIMD_FLOAT    0x000400
2086
/* Tests whether the operation is available */
2087
3.64M
#define SLJIT_SIMD_TEST     0x000800
2088
/* Move data to/from a 64 bit (8 byte) long vector register */
2089
#define SLJIT_SIMD_REG_64   (3 << 12)
2090
/* Move data to/from a 128 bit (16 byte) long vector register */
2091
12.8M
#define SLJIT_SIMD_REG_128    (4 << 12)
2092
/* Move data to/from a 256 bit (32 byte) long vector register */
2093
5.99M
#define SLJIT_SIMD_REG_256    (5 << 12)
2094
/* Move data to/from a 512 bit (64 byte) long vector register */
2095
0
#define SLJIT_SIMD_REG_512    (6 << 12)
2096
/* Element size is 8 bit long (this is the default), usually cannot be combined with SLJIT_SIMD_FLOAT */
2097
974k
#define SLJIT_SIMD_ELEM_8   (0 << 18)
2098
/* Element size is 16 bit long, usually cannot be combined with SLJIT_SIMD_FLOAT */
2099
#define SLJIT_SIMD_ELEM_16    (1 << 18)
2100
/* Element size is 32 bit long */
2101
1.12M
#define SLJIT_SIMD_ELEM_32    (2 << 18)
2102
/* Element size is 64 bit long */
2103
#define SLJIT_SIMD_ELEM_64    (3 << 18)
2104
/* Element size is 128 bit long */
2105
#define SLJIT_SIMD_ELEM_128   (4 << 18)
2106
/* Element size is 256 bit long */
2107
#define SLJIT_SIMD_ELEM_256   (5 << 18)
2108
2109
/* The following options are used by sljit_emit_simd_mov()
2110
   and sljit_emit_simd_op2(). */
2111
2112
/* Memory address is unaligned (this is the default) */
2113
#define SLJIT_SIMD_MEM_UNALIGNED  (0 << 24)
2114
/* Memory address is 16 bit aligned */
2115
#define SLJIT_SIMD_MEM_ALIGNED_16 (1 << 24)
2116
/* Memory address is 32 bit aligned */
2117
#define SLJIT_SIMD_MEM_ALIGNED_32 (2 << 24)
2118
/* Memory address is 64 bit aligned */
2119
#define SLJIT_SIMD_MEM_ALIGNED_64 (3 << 24)
2120
/* Memory address is 128 bit aligned */
2121
1.94M
#define SLJIT_SIMD_MEM_ALIGNED_128  (4 << 24)
2122
/* Memory address is 256 bit aligned */
2123
0
#define SLJIT_SIMD_MEM_ALIGNED_256  (5 << 24)
2124
/* Memory address is 512 bit aligned */
2125
#define SLJIT_SIMD_MEM_ALIGNED_512  (6 << 24)
2126
2127
/* Moves data between a vector register and memory.
2128
2129
   If the operation is not supported, it returns with
2130
   SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
2131
   it does not emit any instructions.
2132
2133
   type must be a combination of SLJIT_SIMD_* and
2134
     SLJIT_SIMD_MEM_* options
2135
   vreg is the source or destination vector register
2136
     of the operation
2137
   srcdst must be a memory operand or a vector register
2138
2139
   Note:
2140
       The alignment and element size must be
2141
       less or equal than vector register size.
2142
2143
   Flags: - (does not modify flags) */
2144
2145
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
2146
  sljit_s32 vreg,
2147
  sljit_s32 srcdst, sljit_sw srcdstw);
2148
2149
/* Replicates a scalar value to all lanes of a vector
2150
   register.
2151
2152
   If the operation is not supported, it returns with
2153
   SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
2154
   it does not emit any instructions.
2155
2156
   type must be a combination of SLJIT_SIMD_* options
2157
     except SLJIT_SIMD_STORE.
2158
   vreg is the destination vector register of the operation
2159
   src is the value which is replicated
2160
2161
   Note:
2162
       The src == SLJIT_IMM and srcw == 0 can be used to
2163
       clear a register even when SLJIT_SIMD_FLOAT is set.
2164
2165
   Flags: - (does not modify flags) */
2166
2167
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
2168
  sljit_s32 vreg,
2169
  sljit_s32 src, sljit_sw srcw);
2170
2171
/* The following options are used by sljit_emit_simd_lane_mov(). */
2172
2173
/* Clear all bits of the simd register before loading the lane. */
2174
1.12M
#define SLJIT_SIMD_LANE_ZERO    0x000002
2175
/* Sign extend the integer value stored from the lane. */
2176
0
#define SLJIT_SIMD_LANE_SIGNED    0x000004
2177
2178
/* Moves data between a vector register lane and a register or
2179
   memory. If the srcdst argument is a register, it must be
2180
   a floating point register when SLJIT_SIMD_FLOAT is specified,
2181
   or a general purpose register otherwise.
2182
2183
   If the operation is not supported, it returns with
2184
   SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
2185
   it does not emit any instructions.
2186
2187
   type must be a combination of SLJIT_SIMD_* options
2188
     Further options:
2189
       SLJIT_32 - when SLJIT_SIMD_FLOAT is not set
2190
       SLJIT_SIMD_LANE_SIGNED - when SLJIT_SIMD_STORE
2191
           is set and SLJIT_SIMD_FLOAT is not set
2192
       SLJIT_SIMD_LANE_ZERO - when SLJIT_SIMD_LOAD
2193
           is specified
2194
   vreg is the source or destination vector register
2195
     of the operation
2196
   lane_index is the index of the lane
2197
   srcdst is the destination operand for loads, and
2198
     source operand for stores
2199
2200
   Note:
2201
       The elem size must be lower than register size.
2202
2203
   Flags: - (does not modify flags) */
2204
2205
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
2206
  sljit_s32 vreg, sljit_s32 lane_index,
2207
  sljit_s32 srcdst, sljit_sw srcdstw);
2208
2209
/* Replicates a scalar value from a lane to all lanes
2210
   of a vector register.
2211
2212
   If the operation is not supported, it returns with
2213
   SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
2214
   it does not emit any instructions.
2215
2216
   type must be a combination of SLJIT_SIMD_* options
2217
     except SLJIT_SIMD_STORE.
2218
   vreg is the destination vector register of the operation
2219
   src is the vector register which lane is replicated
2220
   src_lane_index is the lane index of the src register
2221
2222
   Flags: - (does not modify flags) */
2223
2224
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
2225
  sljit_s32 vreg,
2226
  sljit_s32 src, sljit_s32 src_lane_index);
2227
2228
/* The following options are used by sljit_emit_simd_load_extend(). */
2229
2230
/* Sign extend the integer elements */
2231
#define SLJIT_SIMD_EXTEND_SIGNED  0x000002
2232
/* Extend data to 16 bit */
2233
#define SLJIT_SIMD_EXTEND_16    (1 << 24)
2234
/* Extend data to 32 bit */
2235
#define SLJIT_SIMD_EXTEND_32    (2 << 24)
2236
/* Extend data to 64 bit */
2237
#define SLJIT_SIMD_EXTEND_64    (3 << 24)
2238
2239
/* Extend elements and stores them in a vector register.
2240
   The extension operation increases the size of the
2241
   elements (e.g. from 16 bit to 64 bit). For integer
2242
   values, the extension can be signed or unsigned.
2243
2244
   If the operation is not supported, it returns with
2245
   SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
2246
   it does not emit any instructions.
2247
2248
   type must be a combination of SLJIT_SIMD_*, and
2249
     SLJIT_SIMD_EXTEND_* options except SLJIT_SIMD_STORE
2250
   vreg is the destination vector register of the operation
2251
   src must be a memory operand or a vector register.
2252
     In the latter case, the source elements are stored
2253
     in the lower half of the register.
2254
2255
   Flags: - (does not modify flags) */
2256
2257
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
2258
  sljit_s32 vreg,
2259
  sljit_s32 src, sljit_sw srcw);
2260
2261
/* Extract the highest bit (usually the sign bit) from
2262
   each elements of a vector.
2263
2264
   If the operation is not supported, it returns with
2265
   SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
2266
   it does not emit any instructions.
2267
2268
   type must be a combination of SLJIT_SIMD_* and SLJIT_32
2269
     options except SLJIT_SIMD_LOAD
2270
   vreg is the source vector register of the operation
2271
   dst is the destination operand
2272
2273
   Flags: - (does not modify flags) */
2274
2275
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
2276
  sljit_s32 vreg,
2277
  sljit_s32 dst, sljit_sw dstw);
2278
2279
/* The following operations are used by sljit_emit_simd_op2(). */
2280
2281
/* Binary 'and' operation */
2282
433k
#define SLJIT_SIMD_OP2_AND    0x000001
2283
/* Binary 'or' operation */
2284
0
#define SLJIT_SIMD_OP2_OR   0x000002
2285
/* Binary 'xor' operation */
2286
0
#define SLJIT_SIMD_OP2_XOR    0x000003
2287
/* Shuffle bytes of src1 using the indicies in src2 */
2288
0
#define SLJIT_SIMD_OP2_SHUFFLE    0x000004
2289
2290
/* Perform simd operations using vector registers.
2291
2292
   If the operation is not supported, it returns with
2293
   SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
2294
   it does not emit any instructions.
2295
2296
   type must be a combination of SLJIT_SIMD_*, SLJIT_SIMD_MEM_*
2297
     and SLJIT_SIMD_OP2_* options except SLJIT_SIMD_LOAD
2298
     and SLJIT_SIMD_STORE
2299
   dst_vreg is the destination register of the operation
2300
   src1_vreg is the first source register of the operation
2301
   src2 is the second source operand of the operation
2302
2303
   Flags: - (does not modify flags) */
2304
2305
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
2306
  sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w);
2307
2308
/* The following operations are used by sljit_emit_atomic_load() and
2309
   sljit_emit_atomic_store() operations. */
2310
2311
/* Tests whether the atomic operation is available (does not generate
2312
   any instructions). When a load from is allowed, its corresponding
2313
   store form is allowed and vice versa. */
2314
#define SLJIT_ATOMIC_TEST 0x10000
2315
/* The compiler must generate compare and swap instruction.
2316
   When this bit is set, calling sljit_emit_atomic_load() is optional. */
2317
#define SLJIT_ATOMIC_USE_CAS 0x20000
2318
/* The compiler must generate load-acquire and store-release instructions.
2319
   When this bit is set, the temp_reg for sljit_emit_atomic_store is not used. */
2320
#define SLJIT_ATOMIC_USE_LS 0x40000
2321
2322
/* The sljit_emit_atomic_load and sljit_emit_atomic_store operation pair
2323
   can perform an atomic read-modify-write operation. First, an unsigned
2324
   value must be loaded from memory using sljit_emit_atomic_load. Then,
2325
   the updated value must be written back to the same memory location by
2326
   sljit_emit_atomic_store. A thread can only perform a single atomic
2327
   operation at a time.
2328
2329
   The following conditions must be satisfied, or the operation
2330
   is undefined:
2331
     - the address provided in mem_reg must be divisible by the size of
2332
       the value (only naturally aligned updates are supported)
2333
     - no memory operations are allowed between the load and store operations
2334
     - the memory operation (op) and the base address (stored in mem_reg)
2335
       passed to the load/store operations must be the same (the mem_reg
2336
       can be a different register, only its value must be the same)
2337
     - a store must always follow a load for the same transaction.
2338
2339
   op must be between SLJIT_MOV and SLJIT_MOV_P
2340
   dst_reg is the register where the data will be loaded into
2341
   mem_reg is the base address of the memory load (it cannot be
2342
     SLJIT_SP or a virtual register on x86-32)
2343
2344
   Flags: - (does not modify flags) */
2345
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
2346
  sljit_s32 dst_reg,
2347
  sljit_s32 mem_reg);
2348
2349
/* The sljit_emit_atomic_load and sljit_emit_atomic_store operations
2350
   allows performing an atomic read-modify-write operation. See the
2351
   description of sljit_emit_atomic_load.
2352
2353
   op must be between SLJIT_MOV and SLJIT_MOV_P
2354
   src_reg is the register which value is stored into the memory
2355
   mem_reg is the base address of the memory store (it cannot be
2356
     SLJIT_SP or a virtual register on x86-32)
2357
   temp_reg is a scratch register, which must be initialized with
2358
     the value loaded into the dst_reg during the corresponding
2359
     sljit_emit_atomic_load operation, or the operation is undefined.
2360
     The temp_reg register preserves its value, if the memory store
2361
     is successful. Otherwise, its value is undefined.
2362
2363
   Flags: ATOMIC_STORED
2364
     if ATOMIC_STORED flag is set, it represents that the memory
2365
     is updated with a new value. Otherwise the memory is unchanged. */
2366
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
2367
  sljit_s32 src_reg,
2368
  sljit_s32 mem_reg,
2369
  sljit_s32 temp_reg);
2370
2371
/* Copies the base address of SLJIT_SP + offset to dst. The offset can
2372
   represent the starting address of a value in the local data (stack).
2373
   The offset is not limited by the local data limits, it can be any value.
2374
   For example if an array of bytes are stored on the stack from
2375
   offset 0x40, and R0 contains the offset of an array item plus 0x120,
2376
   this item can be changed by two SLJIT instructions:
2377
2378
   sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120);
2379
   sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5);
2380
2381
   Flags: - (may destroy flags) */
2382
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);
2383
2384
/* Store a value that can be changed at runtime. The constant
2385
   can be managed by sljit_get_const_addr and sljit_set_const.
2386
2387
   op must be SLJIT_MOV, SLJIT_MOV32, SLJIT_MOV_S32,
2388
     SLJIT_MOV_U8, SLJIT_MOV32_U8
2389
2390
   Note: when SLJIT_MOV_U8 is used, and dst is a register,
2391
         init_value supports a 9 bit signed value between [-256..255]
2392
2393
   Flags: - (does not modify flags) */
2394
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,
2395
  sljit_s32 dst, sljit_sw dstw,
2396
  sljit_sw init_value);
2397
2398
/* Opcodes for sljit_emit_mov_addr. */
2399
2400
/* The address is suitable for jump/call target. */
2401
16.6M
#define SLJIT_MOV_ADDR 0
2402
/* The address is suitable for reading memory. */
2403
#define SLJIT_MOV_ABS_ADDR 1
2404
/* Add absolute address. */
2405
8.32M
#define SLJIT_ADD_ABS_ADDR 2
2406
2407
/* Store the value of a label (see: sljit_set_label / sljit_set_target)
2408
   Flags: - (does not modify flags) */
2409
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,
2410
  sljit_s32 dst, sljit_sw dstw);
2411
2412
/* Returns the address of a label after sljit_generate_code is called, and
2413
   before the compiler is freed by sljit_free_compiler. It is recommended
2414
   to save these addresses elsewhere before sljit_free_compiler is called.
2415
2416
   The address returned by sljit_get_label_addr is suitable for a jump/call
2417
   target, and the address returned by sljit_get_label_abs_addr is suitable
2418
   for reading memory. */
2419
2420
0
static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->u.addr; }
2421
#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
2422
static SLJIT_INLINE sljit_uw sljit_get_label_abs_addr(struct sljit_label *label) { return label->u.addr & ~(sljit_uw)1; }
2423
#else /* !SLJIT_CONFIG_ARM_THUMB2 */
2424
0
static SLJIT_INLINE sljit_uw sljit_get_label_abs_addr(struct sljit_label *label) { return label->u.addr; }
2425
#endif /* SLJIT_CONFIG_ARM_THUMB2 */
2426
2427
/* Returns the address of jump and const instructions after sljit_generate_code
2428
   is called, and before the compiler is freed by sljit_free_compiler. It is
2429
   recommended to save these addresses elsewhere before sljit_free_compiler is called. */
2430
2431
0
static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; }
2432
0
static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; }
2433
2434
/* Only the address and executable offset are required to perform dynamic
2435
   code modifications. See sljit_get_executable_offset function. */
2436
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset);
2437
/* The op opcode must be set to the same value that was passed to sljit_emit_const. */
2438
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset);
2439
2440
/* Only a single buffer is writable at a time, so sljit_read_only_buffer_end_writing()
2441
   must be called before sljit_read_only_buffer_start_writing() is called again. */
2442
SLJIT_API_FUNC_ATTRIBUTE void* sljit_read_only_buffer_start_writing(sljit_uw addr, sljit_uw size, sljit_sw executable_offset);
2443
SLJIT_API_FUNC_ATTRIBUTE void sljit_read_only_buffer_end_writing(sljit_uw addr, sljit_uw size, sljit_sw executable_offset);
2444
2445
/* --------------------------------------------------------------------- */
2446
/*  CPU specific functions                                               */
2447
/* --------------------------------------------------------------------- */
2448
2449
/* Types for sljit_get_register_index */
2450
2451
/* General purpose (integer) registers. */
2452
10.8M
#define SLJIT_GP_REGISTER 0
2453
/* Floating point registers. */
2454
4.76M
#define SLJIT_FLOAT_REGISTER 1
2455
2456
/* The following function is a helper function for sljit_emit_op_custom.
2457
   It returns with the real machine register index ( >=0 ) of any registers.
2458
2459
   When type is SLJIT_GP_REGISTER:
2460
      reg must be an SLJIT_R(i), SLJIT_S(i), or SLJIT_SP register
2461
2462
   When type is SLJIT_FLOAT_REGISTER:
2463
      reg must be an SLJIT_FR(i) or SLJIT_FS(i) register
2464
2465
   When type is SLJIT_SIMD_REG_64 / 128 / 256 / 512 :
2466
      reg must be an SLJIT_FR(i) or SLJIT_FS(i) register
2467
2468
   Note: it returns with -1 for unknown registers, such as virtual
2469
         registers on x86-32 or unsupported simd registers. */
2470
2471
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg);
2472
2473
/* Any instruction can be inserted into the instruction stream by
2474
   sljit_emit_op_custom. It has a similar purpose as inline assembly.
2475
   The size parameter must match to the instruction size of the target
2476
   architecture:
2477
2478
         x86: 0 < size <= 15, the instruction argument can be byte aligned.
2479
      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
2480
              if size == 4, the instruction argument must be 4 byte aligned.
2481
       s390x: size can be 2, 4, or 6, the instruction argument can be byte aligned.
2482
   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
2483
2484
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2485
  void *instruction, sljit_u32 size);
2486
2487
/* Flags were set by a 32 bit operation. */
2488
#define SLJIT_CURRENT_FLAGS_32      SLJIT_32
2489
2490
/* Flags were set by an ADD or ADDC operations. */
2491
#define SLJIT_CURRENT_FLAGS_ADD     0x01
2492
/* Flags were set by a SUB or SUBC operation. */
2493
44.5k
#define SLJIT_CURRENT_FLAGS_SUB     0x02
2494
2495
/* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode.
2496
   Must be combined with SLJIT_CURRENT_FLAGS_SUB. */
2497
44.5k
#define SLJIT_CURRENT_FLAGS_COMPARE   0x04
2498
2499
/* Flags were set by sljit_emit_op2cmpz operation. */
2500
#define SLJIT_CURRENT_FLAGS_OP2CMPZ   0x08
2501
2502
/* Define the currently available CPU status flags. It is usually used after
2503
   an sljit_emit_label or sljit_emit_op_custom operations to define which CPU
2504
   status flags are available.
2505
2506
   The current_flags must be a valid combination of SLJIT_SET_* and
2507
   SLJIT_CURRENT_FLAGS_* constants. */
2508
2509
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler,
2510
  sljit_s32 current_flags);
2511
2512
/* --------------------------------------------------------------------- */
2513
/*  Serialization functions                                              */
2514
/* --------------------------------------------------------------------- */
2515
2516
/* Label/jump/const enumeration functions. The items in each group
2517
   are enumerated in creation order. Serialization / deserialization
2518
   preserves this order for each group. For example the fifth label
2519
   after deserialization refers to the same machine code location as
2520
   the fifth label before the serialization. */
2521
0
static SLJIT_INLINE struct sljit_label *sljit_get_first_label(struct sljit_compiler *compiler) { return compiler->labels; }
2522
0
static SLJIT_INLINE struct sljit_jump *sljit_get_first_jump(struct sljit_compiler *compiler) { return compiler->jumps; }
2523
0
static SLJIT_INLINE struct sljit_const *sljit_get_first_const(struct sljit_compiler *compiler) { return compiler->consts; }
2524
2525
0
static SLJIT_INLINE struct sljit_label *sljit_get_next_label(struct sljit_label *label) { return label->next; }
2526
0
static SLJIT_INLINE struct sljit_jump *sljit_get_next_jump(struct sljit_jump *jump) { return jump->next; }
2527
0
static SLJIT_INLINE struct sljit_const *sljit_get_next_const(struct sljit_const *const_) { return const_->next; }
2528
2529
/* A number starting from 0 is assigned to each label, which
2530
represents its creation index. The first label created by the
2531
compiler has index 0, the second one has index 1, the third one
2532
has index 2, and so on. The returned value is unspecified after
2533
sljit_generate_code() is called.
2534
2535
It is recommended to use this function to get the creation index
2536
of a label, since sljit_emit_label() may return with the last label,
2537
if no code is generated since the last sljit_emit_label() call. */
2538
SLJIT_API_FUNC_ATTRIBUTE sljit_uw sljit_get_label_index(struct sljit_label *label);
2539
2540
/* The sljit_jump_has_label() and sljit_jump_has_target() functions
2541
returns non-zero value if a label or target is set for the jump
2542
respectively. Both may return with a zero value. The other two
2543
functions return the value assigned to the jump. */
2544
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump);
2545
0
static SLJIT_INLINE struct sljit_label *sljit_jump_get_label(struct sljit_jump *jump) { return jump->u.label; }
2546
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump);
2547
0
static SLJIT_INLINE sljit_uw sljit_jump_get_target(struct sljit_jump *jump) { return jump->u.target; }
2548
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump);
2549
2550
/* Option bits for sljit_serialize_compiler. */
2551
2552
/* When debugging is enabled, the serialized buffer contains
2553
debugging information unless this option is specified. */
2554
#define SLJIT_SERIALIZE_IGNORE_DEBUG    0x1
2555
2556
/* Serialize the internal structure of the compiler into a buffer.
2557
If the serialization is successful, the returned value is a newly
2558
allocated buffer which is allocated by the memory allocator assigned
2559
to the compiler. Otherwise the returned value is NULL. Unlike
2560
sljit_generate_code(), serialization does not modify the internal
2561
state of the compiler, so the code generation can be continued.
2562
2563
  options must be the combination of SLJIT_SERIALIZE_* option bits
2564
  size is an output argument, which is set to the byte size of
2565
    the result buffer if the operation is successful
2566
2567
Notes:
2568
  - This function is useful for ahead-of-time compilation (AOT).
2569
  - The returned buffer must be freed later by the caller.
2570
    The SLJIT_FREE() macro is suitable for this purpose:
2571
    SLJIT_FREE(returned_buffer, sljit_get_allocator_data(compiler))
2572
  - Memory allocated by sljit_alloc_memory() is not serialized.
2573
  - The type of the returned buffer is sljit_uw* to emphasize that
2574
    the buffer is word aligned. However, the 'size' output argument
2575
    contains the byte size, so this value is always divisible by
2576
    sizeof(sljit_uw).
2577
*/
2578
SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler,
2579
  sljit_s32 options, sljit_uw *size);
2580
2581
/* Construct a new compiler instance from a buffer produced by
2582
sljit_serialize_compiler(). If the operation is successful, the new
2583
compiler instance is returned. Otherwise the returned value is NULL.
2584
2585
  buffer points to a word aligned memory data which was
2586
    created by sljit_serialize_compiler()
2587
  size is the byte size of the buffer
2588
  options must be 0
2589
  allocator_data specify an allocator specific data, see
2590
                 sljit_create_compiler() for further details
2591
2592
Notes:
2593
  - Labels assigned to jumps are restored with their
2594
    corresponding label in the label set created by
2595
    the deserializer. Target addresses assigned to
2596
    jumps are also restored. Uninitialized jumps
2597
    remain uninitialized.
2598
  - After the deserialization, sljit_generate_code() does
2599
    not need to be the next operation on the returned
2600
    compiler, the code generation can be continued.
2601
    Even sljit_serialize_compiler() can be called again.
2602
  - When debugging is enabled, a buffers without debug
2603
    information cannot be deserialized.
2604
*/
2605
SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size,
2606
  sljit_s32 options, void *allocator_data);
2607
2608
/* --------------------------------------------------------------------- */
2609
/*  Miscellaneous utility functions                                      */
2610
/* --------------------------------------------------------------------- */
2611
2612
/* Get the human readable name of the platform. Can be useful on platforms
2613
   like ARM, where ARM and Thumb2 functions can be mixed, and it is useful
2614
   to know the type of the code generator. */
2615
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void);
2616
2617
/* Portable helper function to get an offset of a member.
2618
   Same as offsetof() macro defined in stddef.h */
2619
25.1G
#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10)
2620
2621
#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
2622
2623
/* The sljit_stack structure and its manipulation functions provides
2624
   an implementation for a top-down stack. The stack top is stored
2625
   in the end field of the sljit_stack structure and the stack goes
2626
   down to the min_start field, so the memory region reserved for
2627
   this stack is between min_start (inclusive) and end (exclusive)
2628
   fields. However the application can only use the region between
2629
   start (inclusive) and end (exclusive) fields. The sljit_stack_resize
2630
   function can be used to extend this region up to min_start.
2631
2632
   This feature uses the "address space reserve" feature of modern
2633
   operating systems. Instead of allocating a large memory block
2634
   applications can allocate a small memory region and extend it
2635
   later without moving the content of the memory area. Therefore
2636
   after a successful resize by sljit_stack_resize all pointers into
2637
   this region are still valid.
2638
2639
   Note:
2640
     this structure may not be supported by all operating systems.
2641
     end and max_limit fields are aligned to PAGE_SIZE bytes (usually
2642
         4 Kbyte or more).
2643
     stack should grow in larger steps, e.g. 4Kbyte, 16Kbyte or more. */
2644
2645
struct sljit_stack {
2646
  /* User data, anything can be stored here.
2647
     Initialized to the same value as the end field. */
2648
  sljit_u8 *top;
2649
/* These members are read only. */
2650
  /* End address of the stack */
2651
  sljit_u8 *end;
2652
  /* Current start address of the stack. */
2653
  sljit_u8 *start;
2654
  /* Lowest start address of the stack. */
2655
  sljit_u8 *min_start;
2656
};
2657
2658
/* Allocates a new stack. Returns NULL if unsuccessful.
2659
   Note: see sljit_create_compiler for the explanation of allocator_data. */
2660
SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data);
2661
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data);
2662
2663
/* Can be used to increase (extend) or decrease (shrink) the stack
2664
   memory area. Returns with new_start if successful and NULL otherwise.
2665
   It always fails if new_start is less than min_start or greater or equal
2666
   than end fields. The fields of the stack are not changed if the returned
2667
   value is NULL (the current memory content is never lost). */
2668
SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start);
2669
2670
#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */
2671
2672
#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
2673
2674
/* Get the entry address of a given function (signed, unsigned result). */
2675
209M
#define SLJIT_FUNC_ADDR(func_name)  ((sljit_sw)func_name)
2676
#define SLJIT_FUNC_UADDR(func_name) ((sljit_uw)func_name)
2677
2678
#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
2679
2680
/* All JIT related code should be placed in the same context (library, binary, etc.). */
2681
2682
/* Get the entry address of a given function (signed, unsigned result). */
2683
#define SLJIT_FUNC_ADDR(func_name)  (*(sljit_sw*)(void*)func_name)
2684
#define SLJIT_FUNC_UADDR(func_name) (*(sljit_uw*)(void*)func_name)
2685
2686
/* For powerpc64, the function pointers point to a context descriptor. */
2687
struct sljit_function_context {
2688
  sljit_uw addr;
2689
  sljit_uw r2;
2690
  sljit_uw r11;
2691
};
2692
2693
/* Fill the context arguments using the addr and the function.
2694
   If func_ptr is NULL, it will not be set to the address of context
2695
   If addr is NULL, the function address also comes from the func pointer. */
2696
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func);
2697
2698
#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
2699
2700
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
2701
/* Free unused executable memory. The allocator keeps some free memory
2702
   around to reduce the number of OS executable memory allocations.
2703
   This improves performance since these calls are costly. However
2704
   it is sometimes desired to free all unused memory regions, e.g.
2705
   before the application terminates. */
2706
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
2707
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
2708
2709
#ifdef __cplusplus
2710
} /* extern "C" */
2711
#endif /* __cplusplus */
2712
2713
#endif /* SLJIT_LIR_H_ */