/src/pcre2/deps/sljit/sljit_src/sljitLir.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Stack-less Just-In-Time compiler |
3 | | * |
4 | | * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without modification, are |
7 | | * permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this list of |
10 | | * conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, this list |
13 | | * of conditions and the following disclaimer in the documentation and/or other materials |
14 | | * provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY |
17 | | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
18 | | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT |
19 | | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
20 | | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
21 | | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
22 | | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
24 | | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | #ifndef SLJIT_LIR_H_ |
28 | | #define SLJIT_LIR_H_ |
29 | | |
30 | | /* |
31 | | ------------------------------------------------------------------------ |
32 | | Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC) |
33 | | ------------------------------------------------------------------------ |
34 | | |
35 | | Short description |
36 | | Advantages: |
37 | | - The execution can be continued from any LIR instruction. In other |
38 | | words, it is possible to jump to any label from anywhere, even from |
39 | | a code fragment, which is compiled later, as long as the compiling |
40 | | context is the same. See sljit_emit_enter for more details. |
41 | | - Supports self modifying code: target of any jump and call |
42 | | instructions and some constant values can be dynamically modified |
43 | | during runtime. See SLJIT_REWRITABLE_JUMP. |
44 | | - although it is not suggested to do it frequently |
45 | | - can be used for inline caching: save an important value once |
46 | | in the instruction stream |
47 | | - A fixed stack space can be allocated for local variables |
48 | | - The compiler is thread-safe |
49 | | - The compiler is highly configurable through preprocessor macros. |
50 | | You can disable unneeded features (multithreading in single |
51 | | threaded applications), and you can use your own system functions |
52 | | (including memory allocators). See sljitConfig.h. |
53 | | Disadvantages: |
54 | | - The compiler is more like a platform independent assembler, so |
55 | | there is no built-in variable management. Registers and stack must |
56 | | be managed manually (the name of the compiler refers to this). |
57 | | In practice: |
58 | | - This approach is very effective for interpreters |
59 | | - One of the saved registers typically points to a stack interface |
60 | | - It can jump to any exception handler anytime (even if it belongs |
61 | | to another function) |
62 | | - Hot paths can be modified during runtime reflecting the changes |
63 | | of the fastest execution path of the dynamic language |
64 | | - SLJIT supports complex memory addressing modes |
65 | | - mainly position and context independent code (except some cases) |
66 | | |
67 | | For valgrind users: |
68 | | - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code" |
69 | | */ |
70 | | |
71 | | #if (defined SLJIT_HAVE_CONFIG_PRE && SLJIT_HAVE_CONFIG_PRE) |
72 | | #include "sljitConfigPre.h" |
73 | | #endif /* SLJIT_HAVE_CONFIG_PRE */ |
74 | | |
75 | | #include "sljitConfigCPU.h" |
76 | | #include "sljitConfig.h" |
77 | | |
78 | | /* The following header file defines useful macros for fine tuning |
79 | | SLJIT based code generators. They are listed in the beginning |
80 | | of sljitConfigInternal.h */ |
81 | | |
82 | | #include "sljitConfigInternal.h" |
83 | | |
84 | | #if (defined SLJIT_HAVE_CONFIG_POST && SLJIT_HAVE_CONFIG_POST) |
85 | | #include "sljitConfigPost.h" |
86 | | #endif /* SLJIT_HAVE_CONFIG_POST */ |
87 | | |
88 | | #ifdef __cplusplus |
89 | | extern "C" { |
90 | | #endif /* __cplusplus */ |
91 | | |
92 | | /* Version numbers. */ |
93 | | #define SLJIT_MAJOR_VERSION 0 |
94 | | #define SLJIT_MINOR_VERSION 95 |
95 | | |
96 | | /* --------------------------------------------------------------------- */ |
97 | | /* Error codes */ |
98 | | /* --------------------------------------------------------------------- */ |
99 | | |
100 | | /* Indicates no error. */ |
101 | 17.2G | #define SLJIT_SUCCESS 0 |
102 | | /* After the call of sljit_generate_code(), the error code of the compiler |
103 | | is set to this value to avoid further code generation. |
104 | | The complier should be freed after sljit_generate_code(). */ |
105 | 586k | #define SLJIT_ERR_COMPILED 1 |
106 | | /* Cannot allocate non-executable memory. */ |
107 | 0 | #define SLJIT_ERR_ALLOC_FAILED 2 |
108 | | /* Cannot allocate executable memory. |
109 | | Only sljit_generate_code() returns with this error code. */ |
110 | 0 | #define SLJIT_ERR_EX_ALLOC_FAILED 3 |
111 | | /* Unsupported instruction form. */ |
112 | 2.35G | #define SLJIT_ERR_UNSUPPORTED 4 |
113 | | /* An invalid argument is passed to any SLJIT function. */ |
114 | | #define SLJIT_ERR_BAD_ARGUMENT 5 |
115 | | |
116 | | /* --------------------------------------------------------------------- */ |
117 | | /* Registers */ |
118 | | /* --------------------------------------------------------------------- */ |
119 | | |
120 | | /* |
121 | | Scratch (R) registers: registers which may not preserve their values |
122 | | across function calls. |
123 | | |
124 | | Saved (S) registers: registers which preserve their values across |
125 | | function calls. |
126 | | |
127 | | The scratch and saved register sets overlap. The last scratch register |
128 | | is the first saved register, the one before the last is the second saved |
129 | | register, and so on. |
130 | | |
131 | | For example, in an architecture with only five registers (A-E), if two |
132 | | are scratch and three saved registers, they will be defined as follows: |
133 | | |
134 | | A | R0 | | R0 always represent scratch register A |
135 | | B | R1 | | R1 always represent scratch register B |
136 | | C | [R2] | S2 | R2 and S2 represent the same physical register C |
137 | | D | [R3] | S1 | R3 and S1 represent the same physical register D |
138 | | E | [R4] | S0 | R4 and S0 represent the same physical register E |
139 | | |
140 | | Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS will be 2 and |
141 | | SLJIT_NUMBER_OF_SAVED_REGISTERS will be 3. |
142 | | |
143 | | Note: For all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12 |
144 | | and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers |
145 | | are virtual on x86-32. See below. |
146 | | |
147 | | The purpose of this definition is convenience: saved registers can |
148 | | be used as extra scratch registers. For example, building in the |
149 | | previous example, four registers can be specified as scratch registers |
150 | | and the fifth one as saved register, allowing any user code which requires |
151 | | four scratch registers to run unmodified. The SLJIT compiler automatically |
152 | | saves the content of the two extra scratch register on the stack. Scratch |
153 | | registers can also be preserved by saving their value on the stack but |
154 | | that needs to be done manually. |
155 | | |
156 | | Note: To emphasize that registers assigned to R2-R4 are saved |
157 | | registers, they are enclosed by square brackets. |
158 | | |
159 | | Note: sljit_emit_enter and sljit_set_context define whether a register |
160 | | is S or R register. E.g: if in the previous example 3 scratches and |
161 | | 1 saved are mapped by sljit_emit_enter, the allowed register set |
162 | | will be: R0-R2 and S0. Although S2 is mapped to the same register |
163 | | than R2, it is not available in that configuration. Furthermore |
164 | | the S1 register cannot be used at all. |
165 | | */ |
166 | | |
167 | | /* Scratch registers. */ |
168 | 10.0G | #define SLJIT_R0 1 |
169 | 0 | #define SLJIT_R1 2 |
170 | 295M | #define SLJIT_R2 3 |
171 | | /* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they |
172 | | are allocated on the stack). These registers are called virtual |
173 | | and cannot be used for memory addressing (cannot be part of |
174 | | any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such |
175 | | limitation on other CPUs. See sljit_get_register_index(). */ |
176 | 645M | #define SLJIT_R3 4 |
177 | 6.20M | #define SLJIT_R4 5 |
178 | | #define SLJIT_R5 6 |
179 | | #define SLJIT_R6 7 |
180 | | #define SLJIT_R7 8 |
181 | | #define SLJIT_R8 9 |
182 | | #define SLJIT_R9 10 |
183 | | /* All R registers provided by the architecture can be accessed by SLJIT_R(i) |
184 | | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ |
185 | | #define SLJIT_R(i) (1 + (i)) |
186 | | |
187 | | /* Saved registers. */ |
188 | 4.20M | #define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) |
189 | 1.53M | #define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) |
190 | 6.92M | #define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) |
191 | | /* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they |
192 | | are allocated on the stack). These registers are called virtual |
193 | | and cannot be used for memory addressing (cannot be part of |
194 | | any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such |
195 | | limitation on other CPUs. See sljit_get_register_index(). */ |
196 | 6.96M | #define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) |
197 | 6.92M | #define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) |
198 | | #define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) |
199 | | #define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) |
200 | | #define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) |
201 | | #define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) |
202 | | #define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) |
203 | | /* All S registers provided by the architecture can be accessed by SLJIT_S(i) |
204 | | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ |
205 | | #define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) |
206 | | |
207 | | /* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ |
208 | 1.75M | #define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) |
209 | | |
210 | | /* The SLJIT_SP provides direct access to the linear stack space allocated by |
211 | | sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). |
212 | | The immediate offset is extended by the relative stack offset automatically. |
213 | | sljit_get_local_base can be used to obtain the real address of a value. */ |
214 | 235M | #define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) |
215 | | |
216 | | /* Return with machine word. */ |
217 | | |
218 | 1.17M | #define SLJIT_RETURN_REG SLJIT_R0 |
219 | | |
220 | | /* --------------------------------------------------------------------- */ |
221 | | /* Floating point registers */ |
222 | | /* --------------------------------------------------------------------- */ |
223 | | |
224 | | /* Each floating point register can store a 32 or a 64 bit precision |
225 | | value. The FR and FS register sets overlap in the same way as R |
226 | | and S register sets. See above. */ |
227 | | |
228 | | /* Floating point scratch registers. */ |
229 | 0 | #define SLJIT_FR0 1 |
230 | | #define SLJIT_FR1 2 |
231 | | #define SLJIT_FR2 3 |
232 | | #define SLJIT_FR3 4 |
233 | | #define SLJIT_FR4 5 |
234 | | #define SLJIT_FR5 6 |
235 | | #define SLJIT_FR6 7 |
236 | | #define SLJIT_FR7 8 |
237 | | #define SLJIT_FR8 9 |
238 | | #define SLJIT_FR9 10 |
239 | | /* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) |
240 | | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ |
241 | | #define SLJIT_FR(i) (1 + (i)) |
242 | | |
243 | | /* Floating point saved registers. */ |
244 | | #define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) |
245 | | #define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) |
246 | | #define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) |
247 | | #define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) |
248 | | #define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) |
249 | | #define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) |
250 | | #define SLJIT_FS6 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 6) |
251 | | #define SLJIT_FS7 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 7) |
252 | | #define SLJIT_FS8 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 8) |
253 | | #define SLJIT_FS9 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 9) |
254 | | /* All FS registers provided by the architecture can be accessed by SLJIT_FS(i) |
255 | | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ |
256 | | #define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) |
257 | | |
258 | | /* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ |
259 | | #define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) |
260 | | |
261 | | /* Return with floating point arg. */ |
262 | | |
263 | 0 | #define SLJIT_RETURN_FREG SLJIT_FR0 |
264 | | |
265 | | /* --------------------------------------------------------------------- */ |
266 | | /* Vector registers */ |
267 | | /* --------------------------------------------------------------------- */ |
268 | | |
269 | | /* Vector registers are storage areas, which are used for Single Instruction |
270 | | Multiple Data (SIMD) computations. The VR and VS register sets overlap |
271 | | in the same way as R and S register sets. See above. |
272 | | |
273 | | The storage space of vector registers often overlap with floating point |
274 | | registers. In this case setting the value of SLJIT_VR(i) destroys the |
275 | | value of SLJIT_FR(i) and vice versa. See SLJIT_SEPARATE_VECTOR_REGISTERS |
276 | | macro. */ |
277 | | |
278 | | /* Vector scratch registers. */ |
279 | 2.87M | #define SLJIT_VR0 1 |
280 | 2.05M | #define SLJIT_VR1 2 |
281 | 879k | #define SLJIT_VR2 3 |
282 | 812k | #define SLJIT_VR3 4 |
283 | 131k | #define SLJIT_VR4 5 |
284 | 128k | #define SLJIT_VR5 6 |
285 | 108k | #define SLJIT_VR6 7 |
286 | | #define SLJIT_VR7 8 |
287 | | #define SLJIT_VR8 9 |
288 | | #define SLJIT_VR9 10 |
289 | | /* All VR registers provided by the architecture can be accessed by SLJIT_VR(i) |
290 | | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_VECTOR_REGISTERS. */ |
291 | | #define SLJIT_VR(i) (1 + (i)) |
292 | | |
293 | | /* Vector saved registers. */ |
294 | | #define SLJIT_VS0 (SLJIT_NUMBER_OF_VECTOR_REGISTERS) |
295 | | #define SLJIT_VS1 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 1) |
296 | | #define SLJIT_VS2 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 2) |
297 | | #define SLJIT_VS3 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 3) |
298 | | #define SLJIT_VS4 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 4) |
299 | | #define SLJIT_VS5 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 5) |
300 | | #define SLJIT_VS6 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 6) |
301 | | #define SLJIT_VS7 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 7) |
302 | | #define SLJIT_VS8 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 8) |
303 | | #define SLJIT_VS9 (SLJIT_NUMBER_OF_VECTOR_REGISTERS - 9) |
304 | | /* All VS registers provided by the architecture can be accessed by SLJIT_VS(i) |
305 | | The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS. */ |
306 | | #define SLJIT_VS(i) (SLJIT_NUMBER_OF_VECTOR_REGISTERS - (i)) |
307 | | |
308 | | /* Vector registers >= SLJIT_FIRST_SAVED_VECTOR_REG are saved registers. */ |
309 | | #define SLJIT_FIRST_SAVED_VECTOR_REG (SLJIT_VS0 - SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS + 1) |
310 | | |
311 | | /* --------------------------------------------------------------------- */ |
312 | | /* Argument type definitions */ |
313 | | /* --------------------------------------------------------------------- */ |
314 | | |
315 | | /* The following argument type definitions are used by sljit_emit_enter, |
316 | | sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. |
317 | | |
318 | | For sljit_emit_call and sljit_emit_icall, the first integer argument |
319 | | must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. |
320 | | Similarly the first floating point argument must be placed into SLJIT_FR0, |
321 | | the second one into SLJIT_FR1, and so on. |
322 | | |
323 | | For sljit_emit_enter, the integer arguments can be stored in scratch |
324 | | or saved registers. Scratch registers are identified by a _R suffix. |
325 | | |
326 | | If only saved registers are used, then the allocation mirrors what is |
327 | | done for the "call" functions but using saved registers, meaning that |
328 | | the first integer argument goes to SLJIT_S0, the second one goes into |
329 | | SLJIT_S1, and so on. |
330 | | |
331 | | If scratch registers are used, then the way the integer registers are |
332 | | allocated changes so that SLJIT_S0, SLJIT_S1, etc; will be assigned |
333 | | only for the arguments not using scratch registers, while SLJIT_R<n> |
334 | | will be used for the ones using scratch registers. |
335 | | |
336 | | Furthermore, the index (shown as "n" above) that will be used for the |
337 | | scratch register depends on how many previous integer registers |
338 | | (scratch or saved) were used already, starting with SLJIT_R0. |
339 | | Eventhough some indexes will be likely skipped, they still need to be |
340 | | accounted for in the scratches parameter of sljit_emit_enter. See below |
341 | | for some examples. |
342 | | |
343 | | The floating point arguments always use scratch registers (but not the |
344 | | _R suffix like the integer arguments) and must use SLJIT_FR0, SLJIT_FR1, |
345 | | just like in the "call" functions. |
346 | | |
347 | | Note: the mapping for scratch registers is part of the compiler context |
348 | | and therefore a new context after sljit_emit_call/sljit_emit_icall |
349 | | could remove access to some scratch registers that were used as |
350 | | arguments. |
351 | | |
352 | | Example function definition: |
353 | | sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, |
354 | | sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); |
355 | | |
356 | | Argument type definition: |
357 | | SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32) |
358 | | | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2) |
359 | | | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) |
360 | | |
361 | | Short form of argument type definition: |
362 | | SLJIT_ARGS4(F32, P, F64, 32, F32) |
363 | | |
364 | | Argument passing: |
365 | | arg_a must be placed in SLJIT_R0 |
366 | | arg_b must be placed in SLJIT_FR0 |
367 | | arg_c must be placed in SLJIT_R1 |
368 | | arg_d must be placed in SLJIT_FR1 |
369 | | |
370 | | Examples for argument processing by sljit_emit_enter: |
371 | | SLJIT_ARGS4V(P, 32_R, F32, W) |
372 | | Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 |
373 | | The type of the result is void. |
374 | | |
375 | | SLJIT_ARGS4(F32, W, W_R, W, W_R) |
376 | | Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 |
377 | | The type of the result is sljit_f32. |
378 | | |
379 | | SLJIT_ARGS4(P, W, F32, P_R) |
380 | | Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 |
381 | | The type of the result is pointer. |
382 | | |
383 | | Note: it is recommended to pass the scratch arguments first |
384 | | followed by the saved arguments: |
385 | | |
386 | | SLJIT_ARGS4(W, W_R, W_R, W, W) |
387 | | Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 |
388 | | The type of the result is sljit_sw / sljit_uw. |
389 | | */ |
390 | | |
391 | | /* The following flag is only allowed for the integer arguments of |
392 | | sljit_emit_enter. When the flag is set, the integer argument is |
393 | | stored in a scratch register instead of a saved register. */ |
394 | 586k | #define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 |
395 | | |
396 | | /* No return value, only supported by SLJIT_ARG_RETURN. */ |
397 | | #define SLJIT_ARG_TYPE_RET_VOID 0 |
398 | | /* Machine word sized integer argument or result. */ |
399 | | #define SLJIT_ARG_TYPE_W 1 |
400 | | #define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) |
401 | | /* 32 bit integer argument or result. */ |
402 | | #define SLJIT_ARG_TYPE_32 2 |
403 | | #define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) |
404 | | /* Pointer sized integer argument or result. */ |
405 | | #define SLJIT_ARG_TYPE_P 3 |
406 | | #define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) |
407 | | /* 64 bit floating point argument or result. */ |
408 | 319M | #define SLJIT_ARG_TYPE_F64 4 |
409 | | /* 32 bit floating point argument or result. */ |
410 | | #define SLJIT_ARG_TYPE_F32 5 |
411 | | |
412 | 1.04G | #define SLJIT_ARG_SHIFT 4 |
413 | 210M | #define SLJIT_ARG_RETURN(type) (type) |
414 | 618M | #define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT)) |
415 | | |
416 | | /* Simplified argument list definitions. |
417 | | |
418 | | The following definition: |
419 | | SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1) |
420 | | |
421 | | can be shortened to: |
422 | | SLJIT_ARGS1(W, F32) |
423 | | |
424 | | Another example where no value is returned: |
425 | | SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W_R, 1) |
426 | | |
427 | | can be shortened to: |
428 | | SLJIT_ARGS1V(W_R) |
429 | | */ |
430 | | |
431 | | #define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type |
432 | | |
433 | | #define SLJIT_ARGS0(ret) \ |
434 | 210M | SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret)) |
435 | | #define SLJIT_ARGS0V() \ |
436 | | SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) |
437 | | |
438 | | #define SLJIT_ARGS1(ret, arg1) \ |
439 | 210M | (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) |
440 | | #define SLJIT_ARGS1V(arg1) \ |
441 | | (SLJIT_ARGS0V() | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) |
442 | | |
443 | | #define SLJIT_ARGS2(ret, arg1, arg2) \ |
444 | 209M | (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) |
445 | | #define SLJIT_ARGS2V(arg1, arg2) \ |
446 | | (SLJIT_ARGS1V(arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) |
447 | | |
448 | | #define SLJIT_ARGS3(ret, arg1, arg2, arg3) \ |
449 | 199M | (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) |
450 | | #define SLJIT_ARGS3V(arg1, arg2, arg3) \ |
451 | | (SLJIT_ARGS2V(arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) |
452 | | |
453 | | #define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \ |
454 | | (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) |
455 | | #define SLJIT_ARGS4V(arg1, arg2, arg3, arg4) \ |
456 | | (SLJIT_ARGS3V(arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) |
457 | | |
458 | | /* --------------------------------------------------------------------- */ |
459 | | /* Main structures and functions */ |
460 | | /* --------------------------------------------------------------------- */ |
461 | | |
462 | | /* |
463 | | The following structures are private, and can be changed in the |
464 | | future. Keeping them here allows code inlining. |
465 | | */ |
466 | | |
467 | | struct sljit_memory_fragment { |
468 | | struct sljit_memory_fragment *next; |
469 | | sljit_uw used_size; |
470 | | /* Must be aligned to sljit_sw. */ |
471 | | sljit_u8 memory[1]; |
472 | | }; |
473 | | |
474 | | struct sljit_label { |
475 | | struct sljit_label *next; |
476 | | union { |
477 | | sljit_uw index; |
478 | | sljit_uw addr; |
479 | | } u; |
480 | | /* The maximum size difference. */ |
481 | | sljit_uw size; |
482 | | }; |
483 | | |
484 | | struct sljit_jump { |
485 | | struct sljit_jump *next; |
486 | | sljit_uw addr; |
487 | | /* Architecture dependent flags. */ |
488 | | sljit_uw flags; |
489 | | union { |
490 | | sljit_uw target; |
491 | | struct sljit_label *label; |
492 | | } u; |
493 | | }; |
494 | | |
495 | | struct sljit_const { |
496 | | struct sljit_const *next; |
497 | | sljit_uw addr; |
498 | | }; |
499 | | |
500 | | struct sljit_generate_code_buffer { |
501 | | void *buffer; |
502 | | sljit_uw size; |
503 | | sljit_sw executable_offset; |
504 | | }; |
505 | | |
506 | | struct sljit_read_only_buffer { |
507 | | struct sljit_read_only_buffer *next; |
508 | | sljit_uw size; |
509 | | /* Label can be replaced by address after sljit_generate_code. */ |
510 | | union { |
511 | | struct sljit_label *label; |
512 | | sljit_uw addr; |
513 | | } u; |
514 | | }; |
515 | | |
516 | | struct sljit_compiler { |
517 | | sljit_s32 error; |
518 | | sljit_s32 options; |
519 | | |
520 | | struct sljit_label *labels; |
521 | | struct sljit_jump *jumps; |
522 | | struct sljit_const *consts; |
523 | | struct sljit_label *last_label; |
524 | | struct sljit_jump *last_jump; |
525 | | struct sljit_const *last_const; |
526 | | |
527 | | void *allocator_data; |
528 | | void *user_data; |
529 | | struct sljit_memory_fragment *buf; |
530 | | struct sljit_memory_fragment *abuf; |
531 | | |
532 | | /* Number of labels created by the compiler. */ |
533 | | sljit_uw label_count; |
534 | | /* Available scratch registers. */ |
535 | | sljit_s32 scratches; |
536 | | /* Available saved registers. */ |
537 | | sljit_s32 saveds; |
538 | | /* Available float scratch registers. */ |
539 | | sljit_s32 fscratches; |
540 | | /* Available float saved registers. */ |
541 | | sljit_s32 fsaveds; |
542 | | #if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) \ |
543 | | || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ |
544 | | || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ |
545 | | || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) |
546 | | /* Available vector scratch registers. */ |
547 | | sljit_s32 vscratches; |
548 | | /* Available vector saved registers. */ |
549 | | sljit_s32 vsaveds; |
550 | | #endif /* SLJIT_SEPARATE_VECTOR_REGISTERS || SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG || SLJIT_VERBOSE */ |
551 | | /* Local stack size. */ |
552 | | sljit_s32 local_size; |
553 | | /* Maximum code size. */ |
554 | | sljit_uw size; |
555 | | /* Relative offset of the executable mapping from the writable mapping. */ |
556 | | sljit_sw executable_offset; |
557 | | /* Executable size for statistical purposes. */ |
558 | | sljit_uw executable_size; |
559 | | |
560 | | #if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) |
561 | | sljit_s32 status_flags_state; |
562 | | #endif /* SLJIT_HAS_STATUS_FLAGS_STATE */ |
563 | | |
564 | | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
565 | | sljit_s32 args_size; |
566 | | #endif /* SLJIT_CONFIG_X86_32 */ |
567 | | |
568 | | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
569 | | /* Temporary fields. */ |
570 | | sljit_s32 mode32; |
571 | | #endif /* SLJIT_CONFIG_X86_64 */ |
572 | | |
573 | | #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) |
574 | | /* Constant pool handling. */ |
575 | | sljit_uw *cpool; |
576 | | sljit_u8 *cpool_unique; |
577 | | sljit_uw cpool_diff; |
578 | | sljit_uw cpool_fill; |
579 | | /* Other members. */ |
580 | | /* Contains pointer, "ldr pc, [...]" pairs. */ |
581 | | sljit_uw patches; |
582 | | #endif /* SLJIT_CONFIG_ARM_V6 */ |
583 | | |
584 | | #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) |
585 | | /* Temporary fields. */ |
586 | | sljit_uw shift_imm; |
587 | | #endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V6 */ |
588 | | |
589 | | #if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__) |
590 | | sljit_uw args_size; |
591 | | #endif /* SLJIT_CONFIG_ARM_32 && __SOFTFP__ */ |
592 | | |
593 | | #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) |
594 | | /* Temporary fields. */ |
595 | | sljit_u32 imm; |
596 | | #endif /* SLJIT_CONFIG_PPC */ |
597 | | |
598 | | #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) |
599 | | sljit_s32 delay_slot; |
600 | | /* Temporary fields. */ |
601 | | sljit_s32 cache_arg; |
602 | | sljit_sw cache_argw; |
603 | | #endif /* SLJIT_CONFIG_MIPS */ |
604 | | |
605 | | #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) |
606 | | sljit_uw args_size; |
607 | | #endif /* SLJIT_CONFIG_MIPS_32 */ |
608 | | |
609 | | #if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) |
610 | | /* Temporary fields. */ |
611 | | sljit_s32 cache_arg; |
612 | | sljit_sw cache_argw; |
613 | | #endif /* SLJIT_CONFIG_RISCV */ |
614 | | |
615 | | #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) |
616 | | /* Need to allocate register save area to make calls. */ |
617 | | /* Temporary fields. */ |
618 | | sljit_s32 mode; |
619 | | #endif /* SLJIT_CONFIG_S390X */ |
620 | | |
621 | | #if (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH) |
622 | | /* Temporary fields. */ |
623 | | sljit_s32 cache_arg; |
624 | | sljit_sw cache_argw; |
625 | | #endif /* SLJIT_CONFIG_LOONGARCH */ |
626 | | |
627 | | #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) |
628 | | FILE* verbose; |
629 | | #endif /* SLJIT_VERBOSE */ |
630 | | |
631 | | /* Note: SLJIT_DEBUG enables SLJIT_ARGUMENT_CHECKS. */ |
632 | | #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ |
633 | | || (defined SLJIT_DEBUG && SLJIT_DEBUG) |
634 | | /* Flags specified by the last arithmetic instruction. |
635 | | It contains the type of the variable flag. */ |
636 | | sljit_s32 last_flags; |
637 | | /* Return value type set by entry functions. */ |
638 | | sljit_s32 last_return; |
639 | | /* Local size passed to entry functions. */ |
640 | | sljit_s32 logical_local_size; |
641 | | #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */ |
642 | | |
643 | | #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ |
644 | | || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ |
645 | | || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) |
646 | | #if !(defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) |
647 | | /* Available float scratch registers. */ |
648 | | sljit_s32 real_fscratches; |
649 | | /* Available float saved registers. */ |
650 | | sljit_s32 real_fsaveds; |
651 | | #endif /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ |
652 | | |
653 | | /* Trust arguments when an API function is called. |
654 | | Used internally for calling API functions. */ |
655 | | sljit_s32 skip_checks; |
656 | | #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG || SLJIT_VERBOSE */ |
657 | | }; |
658 | | |
659 | | /* --------------------------------------------------------------------- */ |
660 | | /* Main functions */ |
661 | | /* --------------------------------------------------------------------- */ |
662 | | |
663 | | /* Creates an SLJIT compiler. The allocator_data is required by some |
664 | | custom memory managers. This pointer is passed to SLJIT_MALLOC |
665 | | and SLJIT_FREE macros. Most allocators (including the default |
666 | | one) ignores this value, and it is recommended to pass NULL |
667 | | as a dummy value for allocator_data. |
668 | | |
669 | | Returns NULL if failed. */ |
670 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data); |
671 | | |
672 | | /* Frees everything except the compiled machine code. */ |
673 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); |
674 | | |
675 | | /* Returns the current error code. If an error occurres, future calls |
676 | | which uses the same compiler argument returns early with the same |
677 | | error code. Thus there is no need for checking the error after every |
678 | | call, it is enough to do it after the code is compiled. Removing |
679 | | these checks increases the performance of the compiling process. */ |
680 | 511M | static SLJIT_INLINE sljit_s32 sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } |
681 | | |
682 | | /* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except |
683 | | if an error was detected before. After the error code is set |
684 | | the compiler behaves as if the allocation failure happened |
685 | | during an SLJIT function call. This can greatly simplify error |
686 | | checking, since it is enough to check the compiler status |
687 | | after the code is compiled. */ |
688 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler); |
689 | | |
690 | | /* Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, |
691 | | and <= 128 bytes on 64 bit architectures. The memory area is owned by the |
692 | | compiler, and freed by sljit_free_compiler. The returned pointer is |
693 | | sizeof(sljit_sw) aligned. Excellent for allocating small blocks during |
694 | | compiling, and no need to worry about freeing them. The size is enough |
695 | | to contain at most 16 pointers. If the size is outside of the range, |
696 | | the function will return with NULL. However, this return value does not |
697 | | indicate that there is no more memory (does not set the current error code |
698 | | of the compiler to out-of-memory status). */ |
699 | | SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size); |
700 | | |
701 | | /* Returns the allocator data passed to sljit_create_compiler. */ |
702 | 0 | static SLJIT_INLINE void* sljit_compiler_get_allocator_data(struct sljit_compiler *compiler) { return compiler->allocator_data; } |
703 | | /* Sets/get the user data for a compiler. */ |
704 | 0 | static SLJIT_INLINE void sljit_compiler_set_user_data(struct sljit_compiler *compiler, void *user_data) { compiler->user_data = user_data; } |
705 | 0 | static SLJIT_INLINE void* sljit_compiler_get_user_data(struct sljit_compiler *compiler) { return compiler->user_data; } |
706 | | |
707 | | #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) |
708 | | /* Passing NULL disables verbose. */ |
709 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); |
710 | | #endif /* SLJIT_VERBOSE */ |
711 | | |
712 | | /* Option bits for sljit_generate_code. */ |
713 | | |
714 | | /* The exec_allocator_data points to a pre-allocated |
715 | | buffer which type is sljit_generate_code_buffer. */ |
716 | | #define SLJIT_GENERATE_CODE_BUFFER 0x1 |
717 | | |
718 | | /* Create executable code from the instruction stream. This is the final step |
719 | | of the code generation, and no more instructions can be emitted after this call. |
720 | | |
721 | | options is the combination of SLJIT_GENERATE_CODE_* bits |
722 | | exec_allocator_data is passed to SLJIT_MALLOC_EXEC and |
723 | | SLJIT_MALLOC_FREE functions */ |
724 | | |
725 | | SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data); |
726 | | |
727 | | /* Free executable code. */ |
728 | | |
729 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data); |
730 | | |
731 | | /* When the protected executable allocator is used the JIT code is mapped |
732 | | twice. The first mapping has read/write and the second mapping has read/exec |
733 | | permissions. This function returns with the relative offset of the executable |
734 | | mapping using the writable mapping as the base after the machine code is |
735 | | successfully generated. The returned value is always 0 for the normal executable |
736 | | allocator, since it uses only one mapping with read/write/exec permissions. |
737 | | Dynamic code modifications requires this value. |
738 | | |
739 | | Before a successful code generation, this function returns with 0. */ |
740 | 0 | static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; } |
741 | | |
742 | | /* The executable memory consumption of the generated code can be retrieved by |
743 | | this function. The returned value can be used for statistical purposes. |
744 | | |
745 | | Before a successful code generation, this function returns with 0. */ |
746 | 586k | static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; } |
747 | | |
748 | | /* Returns with non-zero if the feature or limitation type passed as its |
749 | | argument is present on the current CPU. The return value is one, if a |
750 | | feature is fully supported, and it is two, if partially supported. |
751 | | |
752 | | Some features (e.g. floating point operations) require hardware (CPU) |
753 | | support while others (e.g. move with update) are emulated if not available. |
754 | | However, even when a feature is emulated, specialized code paths may be |
755 | | faster than the emulation. Some limitations are emulated as well so their |
756 | | general case is supported but it has extra performance costs. |
757 | | |
758 | | Note: sljitConfigInternal.h also provides several feature detection macros. */ |
759 | | |
760 | | /* [Not emulated] Floating-point support is available. */ |
761 | 0 | #define SLJIT_HAS_FPU 0 |
762 | | /* [Limitation] Some registers are virtual registers. */ |
763 | | #define SLJIT_HAS_VIRTUAL_REGISTERS 1 |
764 | | /* [Emulated] Has zero register (setting a memory location to zero is efficient). */ |
765 | 77.3k | #define SLJIT_HAS_ZERO_REGISTER 2 |
766 | | /* [Emulated] Count leading zero is supported. */ |
767 | 0 | #define SLJIT_HAS_CLZ 3 |
768 | | /* [Emulated] Count trailing zero is supported. */ |
769 | 0 | #define SLJIT_HAS_CTZ 4 |
770 | | /* [Emulated] Reverse the order of bytes is supported. */ |
771 | 0 | #define SLJIT_HAS_REV 5 |
772 | | /* [Emulated] Rotate left/right is supported. */ |
773 | 0 | #define SLJIT_HAS_ROT 6 |
774 | | /* [Emulated] Conditional move is supported. */ |
775 | 1.22G | #define SLJIT_HAS_CMOV 7 |
776 | | /* [Emulated] Prefetch instruction is available (emulated as a nop). */ |
777 | 0 | #define SLJIT_HAS_PREFETCH 8 |
778 | | /* [Emulated] Copy from/to f32 operation is available (see sljit_emit_fcopy). */ |
779 | 0 | #define SLJIT_HAS_COPY_F32 9 |
780 | | /* [Emulated] Copy from/to f64 operation is available (see sljit_emit_fcopy). */ |
781 | 0 | #define SLJIT_HAS_COPY_F64 10 |
782 | | /* [Not emulated] The 64 bit floating point registers can be used as |
783 | | two separate 32 bit floating point registers (e.g. ARM32). The |
784 | | second 32 bit part can be accessed by SLJIT_F64_SECOND. */ |
785 | | #define SLJIT_HAS_F64_AS_F32_PAIR 11 |
786 | | /* [Not emulated] Some SIMD operations are supported by the compiler. */ |
787 | 1.17M | #define SLJIT_HAS_SIMD 12 |
788 | | /* [Not emulated] SIMD registers are mapped to a pair of double precision |
789 | | floating point registers. E.g. passing either SLJIT_FR0 or SLJIT_FR1 to |
790 | | a simd operation represents the same 128 bit register, and both SLJIT_FR0 |
791 | | and SLJIT_FR1 are overwritten. */ |
792 | | #define SLJIT_SIMD_REGS_ARE_PAIRS 13 |
793 | | /* [Not emulated] Atomic support is available. */ |
794 | 0 | #define SLJIT_HAS_ATOMIC 14 |
795 | | /* [Not emulated] Memory barrier support is available. */ |
796 | 0 | #define SLJIT_HAS_MEMORY_BARRIER 15 |
797 | | |
798 | | #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) |
799 | | /* [Not emulated] AVX support is available on x86. */ |
800 | 0 | #define SLJIT_HAS_AVX 100 |
801 | | /* [Not emulated] AVX2 support is available on x86. */ |
802 | 0 | #define SLJIT_HAS_AVX2 101 |
803 | | #endif /* SLJIT_CONFIG_X86 */ |
804 | | |
805 | | #if (defined SLJIT_CONFIG_LOONGARCH) |
806 | | /* [Not emulated] LASX support is available on LoongArch */ |
807 | | #define SLJIT_HAS_LASX 201 |
808 | | #endif /* SLJIT_CONFIG_LOONGARCH */ |
809 | | |
810 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); |
811 | | |
812 | | /* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL, |
813 | | sljit_cmp_info returns with: |
814 | | zero - if the cpu supports the floating point comparison type |
815 | | one - if the comparison requires two machine instructions |
816 | | two - if the comparison requires more than two machine instructions |
817 | | |
818 | | When the result is non-zero, it is recommended to avoid |
819 | | using the specified comparison type if it is easy to do so. |
820 | | |
821 | | Otherwise it returns zero. */ |
822 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); |
823 | | |
824 | | /* The following functions generate machine code. If there is no |
825 | | error, they return with SLJIT_SUCCESS, otherwise they return |
826 | | with an error code. */ |
827 | | |
828 | | /* |
829 | | The executable code is a callable function from the viewpoint |
830 | | of the C language. Function calls must conform with the ABI |
831 | | (Application Binary Interface) of the target platform, which |
832 | | specify the purpose of machine registers and stack handling |
833 | | among other things. The sljit_emit_enter function emits the |
834 | | necessary instructions for setting up an entry point for the |
835 | | executable code. This is often called as function prologue. |
836 | | |
837 | | The "options" argument can be used to pass configuration options |
838 | | to the sljit compiler which affects the generated code, until |
839 | | another sljit_emit_enter or sljit_set_context is called. The |
840 | | available options are listed before sljit_emit_enter. |
841 | | |
842 | | The function argument list is specified by the SLJIT_ARGSx |
843 | | (SLJIT_ARGS0 .. SLJIT_ARGS4) macros. Currently maximum four |
844 | | arguments are supported. See the description of SLJIT_ARGSx |
845 | | macros about argument passing. |
846 | | |
847 | | The register set used by the function must be declared as well. |
848 | | The number of scratch and saved registers available to the |
849 | | function must be passed to sljit_emit_enter. Only R registers |
850 | | between R0 and "scratches" argument can be used later. E.g. |
851 | | if "scratches" is set to two, the scratch register set will |
852 | | be limited to SLJIT_R0 and SLJIT_R1. The S registers are |
853 | | declared in a similar manner, but their count is specified |
854 | | by "saveds" argument. The floating point scratch and saved |
855 | | registers can be set by using "scratches" and "saveds" argument |
856 | | as well, but their value must be passed to the SLJIT_ENTER_FLOAT |
857 | | macro, see below. |
858 | | |
859 | | The sljit_emit_enter is also capable of allocating a stack |
860 | | space for local data. The "local_size" argument contains the |
861 | | size in bytes of this local area, and it can be accessed using |
862 | | SLJIT_MEM1(SLJIT_SP). The memory area between SLJIT_SP (inclusive) |
863 | | and SLJIT_SP + local_size (exclusive) can be modified freely |
864 | | until the function returns. The alocated stack space is an |
865 | | uninitialized memory area. |
866 | | |
867 | | Floating point scratch and saved registers must be specified |
868 | | by the SLJIT_ENTER_FLOAT macro, which result value should be |
869 | | combined with scratches / saveds argument. |
870 | | |
871 | | Examples: |
872 | | To use three scratch and four floating point scratch |
873 | | registers, the "scratches" argument must be set to: |
874 | | 3 | SLJIT_ENTER_FLOAT(4) |
875 | | |
876 | | To use six saved and five floating point saved |
877 | | registers, the "saveds" argument must be set to: |
878 | | 6 | SLJIT_ENTER_FLOAT(5) |
879 | | |
880 | | Note: the following conditions must met: |
881 | | 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS |
882 | | 0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS |
883 | | scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS |
884 | | |
885 | | 0 <= float scratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS |
886 | | 0 <= float saveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS |
887 | | float scratches + float saveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS |
888 | | |
889 | | Note: the compiler can use saved registers as scratch registers, |
890 | | but the opposite is not supported |
891 | | |
892 | | Note: every call of sljit_emit_enter and sljit_set_context |
893 | | overwrites the previous context. |
894 | | */ |
895 | | |
896 | | /* The following options are available for sljit_emit_enter. */ |
897 | | |
898 | | /* Saved registers between SLJIT_S0 and SLJIT_S(n - 1) (inclusive) |
899 | | are not saved / restored on function enter / return. Instead, |
900 | | these registers can be used to pass / return data (such as |
901 | | global / local context pointers) across function calls. The |
902 | | value of n must be between 1 and 3. This option is only |
903 | | supported by SLJIT_ENTER_REG_ARG calling convention. */ |
904 | | #define SLJIT_ENTER_KEEP(n) (n) |
905 | | |
906 | | /* The compiled function uses an SLJIT specific register argument |
907 | | calling convention. This is a lightweight function call type where |
908 | | both the caller and the called functions must be compiled by |
909 | | SLJIT. The type argument of the call must be SLJIT_CALL_REG_ARG |
910 | | and all arguments must be stored in scratch registers. */ |
911 | 586k | #define SLJIT_ENTER_REG_ARG 0x00000004 |
912 | | |
913 | | #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) |
914 | | /* Use VEX prefix for all SIMD operations on x86. */ |
915 | 3.64M | #define SLJIT_ENTER_USE_VEX 0x00010000 |
916 | | #endif /* !SLJIT_CONFIG_X86 */ |
917 | | |
918 | | /* Macros for other sljit_emit_enter arguments. */ |
919 | | |
920 | | /* Floating point scratch and saved registers can be |
921 | | specified by SLJIT_ENTER_FLOAT. */ |
922 | | #define SLJIT_ENTER_FLOAT(regs) ((regs) << 8) |
923 | | |
924 | | /* Vector scratch and saved registers can be specified |
925 | | by SLJIT_ENTER_VECTOR. */ |
926 | 586k | #define SLJIT_ENTER_VECTOR(regs) ((regs) << 16) |
927 | | |
928 | | /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ |
929 | 739M | #define SLJIT_MAX_LOCAL_SIZE 1048576 |
930 | | |
931 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, |
932 | | sljit_s32 options, sljit_s32 arg_types, |
933 | | sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size); |
934 | | |
935 | | /* The SLJIT compiler has a current context (which contains the local |
936 | | stack space size, number of used registers, etc.) which is initialized |
937 | | by sljit_emit_enter. Several functions (such as sljit_emit_return) |
938 | | requires this context to be able to generate the appropriate code. |
939 | | However, some code fragments (compiled separately) may have no |
940 | | normal entry point so their context is unknown to the compiler. |
941 | | |
942 | | sljit_set_context and sljit_emit_enter have the same arguments, |
943 | | but sljit_set_context does not generate any machine code. |
944 | | |
945 | | Note: every call of sljit_emit_enter and sljit_set_context overwrites |
946 | | the previous context. */ |
947 | | |
948 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, |
949 | | sljit_s32 options, sljit_s32 arg_types, |
950 | | sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size); |
951 | | |
952 | | /* Return to the caller function. The sljit_emit_return_void function |
953 | | does not return with any value. The sljit_emit_return function returns |
954 | | with a single value loaded from its source operand. The load operation |
955 | | can be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1) and |
956 | | SLJIT_MOV_F32/SLJIT_MOV_F64 (see sljit_emit_fop1) depending on the |
957 | | return value specified by sljit_emit_enter/sljit_set_context. */ |
958 | | |
959 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler); |
960 | | |
961 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, |
962 | | sljit_s32 src, sljit_sw srcw); |
963 | | |
964 | | /* Restores the saved registers and free the stack area, then the execution |
965 | | continues from the address specified by the source operand. This |
966 | | operation is similar to sljit_emit_return, but it ignores the return |
967 | | address. The code where the exection continues should use the same context |
968 | | as the caller function (see sljit_set_context). A word (pointer) value |
969 | | can be passed in the SLJIT_RETURN_REG register. This function can be used |
970 | | to jump to exception handlers. */ |
971 | | |
972 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, |
973 | | sljit_s32 src, sljit_sw srcw); |
974 | | |
975 | | /* |
976 | | Source and destination operands for arithmetical instructions |
977 | | imm - a simple immediate value (cannot be used as a destination) |
978 | | reg - any of the available registers (immediate argument must be 0) |
979 | | [imm] - absolute memory address |
980 | | [reg+imm] - indirect memory address |
981 | | [reg+(reg<<imm)] - indirect indexed memory address (shift must be between 0 and 3) |
982 | | useful for accessing arrays (fully supported by both x86 and |
983 | | ARM architectures, and cheap operation on others) |
984 | | */ |
985 | | |
986 | | /* |
987 | | IMPORTANT NOTE: memory accesses MUST be naturally aligned unless |
988 | | SLJIT_UNALIGNED macro is defined and its value is 1. |
989 | | |
990 | | length | alignment |
991 | | ---------+----------- |
992 | | byte | 1 byte (any physical_address is accepted) |
993 | | half | 2 byte (physical_address & 0x1 == 0) |
994 | | int | 4 byte (physical_address & 0x3 == 0) |
995 | | word | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1 |
996 | | | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1 |
997 | | pointer | size of sljit_up type (4 byte on 32 bit machines, 4 or 8 byte |
998 | | | on 64 bit machines) |
999 | | |
1000 | | Note: Different architectures have different addressing limitations. |
1001 | | A single instruction is enough for the following addressing |
1002 | | modes. Other addressing modes are emulated by instruction |
1003 | | sequences. This information could help to improve those code |
1004 | | generators which focuses only a few architectures. |
1005 | | |
1006 | | x86: [reg+imm], -2^32+1 <= imm <= 2^32-1 (full address space on x86-32) |
1007 | | [reg+(reg<<imm)] is supported |
1008 | | [imm], -2^32+1 <= imm <= 2^32-1 is supported |
1009 | | Write-back is not supported |
1010 | | arm: [reg+imm], -4095 <= imm <= 4095 or -255 <= imm <= 255 for signed |
1011 | | bytes, any halfs or floating point values) |
1012 | | [reg+(reg<<imm)] is supported |
1013 | | Write-back is supported |
1014 | | arm-t2: [reg+imm], -255 <= imm <= 4095 |
1015 | | [reg+(reg<<imm)] is supported |
1016 | | Write back is supported only for [reg+imm], where -255 <= imm <= 255 |
1017 | | arm64: [reg+imm], -256 <= imm <= 255, 0 <= aligned imm <= 4095 * alignment |
1018 | | [reg+(reg<<imm)] is supported |
1019 | | Write back is supported only for [reg+imm], where -256 <= imm <= 255 |
1020 | | ppc: [reg+imm], -65536 <= imm <= 65535. 64 bit loads/stores and 32 bit |
1021 | | signed load on 64 bit requires immediates divisible by 4. |
1022 | | [reg+imm] is not supported for signed 8 bit values. |
1023 | | [reg+reg] is supported |
1024 | | Write-back is supported except for one instruction: 32 bit signed |
1025 | | load with [reg+imm] addressing mode on 64 bit. |
1026 | | mips: [reg+imm], -65536 <= imm <= 65535 |
1027 | | Write-back is not supported |
1028 | | riscv: [reg+imm], -2048 <= imm <= 2047 |
1029 | | Write-back is not supported |
1030 | | s390x: [reg+imm], -2^19 <= imm < 2^19 |
1031 | | [reg+reg] is supported |
1032 | | Write-back is not supported |
1033 | | loongarch: [reg+imm], -2048 <= imm <= 2047 |
1034 | | [reg+reg] is supported |
1035 | | Write-back is not supported |
1036 | | */ |
1037 | | |
1038 | | /* Macros for specifying operand types. */ |
1039 | 40.8G | #define SLJIT_MEM 0x80 |
1040 | | #define SLJIT_MEM0() (SLJIT_MEM) |
1041 | 21.2G | #define SLJIT_MEM1(r1) (SLJIT_MEM | (r1)) |
1042 | 0 | #define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8)) |
1043 | 51.0G | #define SLJIT_IMM 0x7f |
1044 | | #define SLJIT_REG_PAIR(r1, r2) ((r1) | ((r2) << 8)) |
1045 | | |
1046 | | /* Macros for checking operand types (only for valid arguments). */ |
1047 | | #define SLJIT_IS_REG(arg) ((arg) > 0 && (arg) < SLJIT_IMM) |
1048 | | #define SLJIT_IS_MEM(arg) ((arg) & SLJIT_MEM) |
1049 | | #define SLJIT_IS_MEM0(arg) ((arg) == SLJIT_MEM) |
1050 | | #define SLJIT_IS_MEM1(arg) ((arg) > SLJIT_MEM && (arg) < (SLJIT_MEM << 1)) |
1051 | | #define SLJIT_IS_MEM2(arg) (((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1)) |
1052 | | #define SLJIT_IS_IMM(arg) ((arg) == SLJIT_IMM) |
1053 | | #define SLJIT_IS_REG_PAIR(arg) (!((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1)) |
1054 | | |
1055 | | /* Macros for extracting registers from operands. */ |
1056 | | /* Support operands which contains a single register or |
1057 | | constructed using SLJIT_MEM1, SLJIT_MEM2, or SLJIT_REG_PAIR. */ |
1058 | | #define SLJIT_EXTRACT_REG(arg) ((arg) & 0x7f) |
1059 | | /* Support operands which constructed using SLJIT_MEM2, or SLJIT_REG_PAIR. */ |
1060 | | #define SLJIT_EXTRACT_SECOND_REG(arg) ((arg) >> 8) |
1061 | | |
1062 | | /* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on |
1063 | | 32 bit CPUs. When this option is set for an arithmetic operation, only |
1064 | | the lower 32 bits of the input registers are used, and the CPU status |
1065 | | flags are set according to the 32 bit result. Although the higher 32 bit |
1066 | | of the input and the result registers are not defined by SLJIT, it might |
1067 | | be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU |
1068 | | requirements all source registers must be the result of those operations |
1069 | | where this option was also set. Memory loads read 32 bit values rather |
1070 | | than 64 bit ones. In other words 32 bit and 64 bit operations cannot be |
1071 | | mixed. The only exception is SLJIT_MOV32 which source register can hold |
1072 | | any 32 or 64 bit value, and it is converted to a 32 bit compatible format |
1073 | | first. When the source and destination registers are the same, this |
1074 | | conversion is free (no instructions are emitted) on most CPUs. A 32 bit |
1075 | | value can also be converted to a 64 bit value by SLJIT_MOV_S32 |
1076 | | (sign extension) or SLJIT_MOV_U32 (zero extension). |
1077 | | |
1078 | | As for floating-point operations, this option sets 32 bit single |
1079 | | precision mode. Similar to the integer operations, all register arguments |
1080 | | must be the result of those operations where this option was also set. |
1081 | | |
1082 | | Note: memory addressing always uses 64 bit values on 64 bit systems so |
1083 | | the result of a 32 bit operation must not be used with SLJIT_MEMx |
1084 | | macros. |
1085 | | |
1086 | | This option is part of the instruction name, so there is no need to |
1087 | | manually set it. E.g: |
1088 | | |
1089 | | SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */ |
1090 | 20.6G | #define SLJIT_32 0x100 |
1091 | | |
1092 | | /* Many CPUs (x86, ARM, PPC) have status flag bits which can be set according |
1093 | | to the result of an operation. Other CPUs (MIPS) do not have status |
1094 | | flag bits, and results must be stored in registers. To cover both |
1095 | | architecture types efficiently only two flags are defined by SLJIT: |
1096 | | |
1097 | | * Zero (equal) flag: it is set if the result is zero |
1098 | | * Variable flag: its value is defined by the arithmetic operation |
1099 | | |
1100 | | SLJIT instructions can set any or both of these flags. The value of |
1101 | | these flags is undefined if the instruction does not specify their |
1102 | | value. The description of each instruction contains the list of |
1103 | | allowed flag types. |
1104 | | |
1105 | | Note: the logical or operation can be used to set flags. |
1106 | | |
1107 | | Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence |
1108 | | |
1109 | | sljit_op2(..., SLJIT_ADD, ...) |
1110 | | Both the zero and variable flags are undefined so they can |
1111 | | have any value after the operation is completed. |
1112 | | |
1113 | | sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) |
1114 | | Sets the zero flag if the result is zero, clears it otherwise. |
1115 | | The variable flag is undefined. |
1116 | | |
1117 | | sljit_op2(..., SLJIT_ADD | SLJIT_SET_OVERFLOW, ...) |
1118 | | Sets the variable flag if an integer overflow occurs, clears |
1119 | | it otherwise. The zero flag is undefined. |
1120 | | |
1121 | | sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...) |
1122 | | Sets the zero flag if the result is zero, clears it otherwise. |
1123 | | Sets the variable flag if unsigned overflow (carry) occurs, |
1124 | | clears it otherwise. |
1125 | | |
1126 | | Certain instructions (e.g. SLJIT_MOV) does not modify flags, so |
1127 | | status flags are unchanged. |
1128 | | |
1129 | | Example: |
1130 | | |
1131 | | sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) |
1132 | | sljit_op1(..., SLJIT_MOV, ...) |
1133 | | Zero flag is set according to the result of SLJIT_ADD. |
1134 | | |
1135 | | sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) |
1136 | | sljit_op2(..., SLJIT_ADD, ...) |
1137 | | Zero flag has unknown value. |
1138 | | |
1139 | | These flags can be used for code optimization. E.g. a fast loop can be |
1140 | | implemented by decreasing a counter register and set the zero flag |
1141 | | using a single instruction. The zero register can be used by a |
1142 | | conditional jump to restart the loop. A single comparison can set a |
1143 | | zero and less flags to check if a value is less, equal, or greater |
1144 | | than another value. |
1145 | | |
1146 | | Motivation: although some CPUs can set a large number of flag bits, |
1147 | | usually their values are ignored or only a few of them are used. Emulating |
1148 | | a large number of flags on systems without a flag register is complicated |
1149 | | so SLJIT instructions must specify the flag they want to use and only |
1150 | | that flag is computed. The last arithmetic instruction can be repeated if |
1151 | | multiple flags need to be checked. |
1152 | | */ |
1153 | | |
1154 | | /* Set Zero status flag. */ |
1155 | 6.67G | #define SLJIT_SET_Z 0x0200 |
1156 | | /* Set the variable status flag if condition is true. |
1157 | | See comparison types (e.g. SLJIT_SET_LESS, SLJIT_SET_F_EQUAL). */ |
1158 | 44.5k | #define SLJIT_SET(condition) ((condition) << 10) |
1159 | | |
1160 | | /* Starting index of opcodes for sljit_emit_op0. */ |
1161 | 26.1M | #define SLJIT_OP0_BASE 0 |
1162 | | |
1163 | | /* Flags: - (does not modify flags) |
1164 | | Note: breakpoint instruction is not supported by all architectures (e.g. ppc) |
1165 | | It falls back to SLJIT_NOP in those cases. */ |
1166 | 0 | #define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0) |
1167 | | /* Flags: - (does not modify flags) |
1168 | | Note: may or may not cause an extra cycle wait |
1169 | | it can even decrease the runtime in a few cases. */ |
1170 | 0 | #define SLJIT_NOP (SLJIT_OP0_BASE + 1) |
1171 | | /* Flags: - (may destroy flags) |
1172 | | Unsigned multiplication of SLJIT_R0 and SLJIT_R1. |
1173 | | Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ |
1174 | 0 | #define SLJIT_LMUL_UW (SLJIT_OP0_BASE + 2) |
1175 | | /* Flags: - (may destroy flags) |
1176 | | Signed multiplication of SLJIT_R0 and SLJIT_R1. |
1177 | | Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ |
1178 | 0 | #define SLJIT_LMUL_SW (SLJIT_OP0_BASE + 3) |
1179 | | /* Flags: - (may destroy flags) |
1180 | | Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. |
1181 | | The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. |
1182 | | Note: if SLJIT_R1 is 0, the behaviour is undefined. */ |
1183 | 0 | #define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4) |
1184 | | #define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_32) |
1185 | | /* Flags: - (may destroy flags) |
1186 | | Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. |
1187 | | The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. |
1188 | | Note: if SLJIT_R1 is 0, the behaviour is undefined. |
1189 | | Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), |
1190 | | the behaviour is undefined. */ |
1191 | 0 | #define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5) |
1192 | | #define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_32) |
1193 | | /* Flags: - (may destroy flags) |
1194 | | Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. |
1195 | | The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. |
1196 | | Note: if SLJIT_R1 is 0, the behaviour is undefined. */ |
1197 | 0 | #define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6) |
1198 | | #define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_32) |
1199 | | /* Flags: - (may destroy flags) |
1200 | | Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. |
1201 | | The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. |
1202 | | Note: if SLJIT_R1 is 0, the behaviour is undefined. |
1203 | | Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), |
1204 | | the behaviour is undefined. */ |
1205 | 0 | #define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) |
1206 | | #define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32) |
1207 | | /* Flags: - (does not modify flags) |
1208 | | May return with SLJIT_ERR_UNSUPPORTED if SLJIT_HAS_MEMORY_BARRIER |
1209 | | feature is not supported (calling sljit_has_cpu_feature() with |
1210 | | this feature option returns with 0). */ |
1211 | 0 | #define SLJIT_MEMORY_BARRIER (SLJIT_OP0_BASE + 8) |
1212 | | /* Flags: - (does not modify flags) |
1213 | | ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64 |
1214 | | when Intel Control-flow Enforcement Technology (CET) is enabled. |
1215 | | No instructions are emitted for other architectures. */ |
1216 | 24.9M | #define SLJIT_ENDBR (SLJIT_OP0_BASE + 9) |
1217 | | /* Flags: - (may destroy flags) |
1218 | | Skip stack frames before return when Intel Control-flow |
1219 | | Enforcement Technology (CET) is enabled. No instructions |
1220 | | are emitted for other architectures. */ |
1221 | 1.17M | #define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 10) |
1222 | | |
1223 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op); |
1224 | | |
1225 | | /* Starting index of opcodes for sljit_emit_op1. */ |
1226 | 15.9G | #define SLJIT_OP1_BASE 32 |
1227 | | |
1228 | | /* The MOV instruction transfers data from source to destination. |
1229 | | |
1230 | | MOV instruction suffixes: |
1231 | | |
1232 | | U8 - unsigned 8 bit data transfer |
1233 | | S8 - signed 8 bit data transfer |
1234 | | U16 - unsigned 16 bit data transfer |
1235 | | S16 - signed 16 bit data transfer |
1236 | | U32 - unsigned int (32 bit) data transfer |
1237 | | S32 - signed int (32 bit) data transfer |
1238 | | P - pointer (sljit_up) data transfer |
1239 | | */ |
1240 | | |
1241 | | /* Flags: - (does not modify flags) */ |
1242 | 11.7G | #define SLJIT_MOV (SLJIT_OP1_BASE + 0) |
1243 | | /* Flags: - (does not modify flags) */ |
1244 | 288M | #define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1) |
1245 | | #define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_32) |
1246 | | /* Flags: - (does not modify flags) */ |
1247 | 0 | #define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2) |
1248 | | #define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_32) |
1249 | | /* Flags: - (does not modify flags) */ |
1250 | 150M | #define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3) |
1251 | | #define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_32) |
1252 | | /* Flags: - (does not modify flags) */ |
1253 | 0 | #define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4) |
1254 | | #define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_32) |
1255 | | /* Flags: - (does not modify flags) |
1256 | | Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */ |
1257 | 807M | #define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5) |
1258 | | /* Flags: - (does not modify flags) |
1259 | | Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */ |
1260 | 203M | #define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6) |
1261 | | /* Flags: - (does not modify flags) */ |
1262 | 0 | #define SLJIT_MOV32 (SLJIT_OP1_BASE + 7) |
1263 | | /* Flags: - (does not modify flags) |
1264 | | Note: loads a pointer sized data, useful on x32 mode (a 64 bit mode |
1265 | | on x86-64 which uses 32 bit pointers) or similar compiling modes */ |
1266 | 7.13G | #define SLJIT_MOV_P (SLJIT_OP1_BASE + 8) |
1267 | | /* Count leading zeroes |
1268 | | Flags: - (may destroy flags) |
1269 | | Note: immediate source argument is not supported */ |
1270 | 0 | #define SLJIT_CLZ (SLJIT_OP1_BASE + 9) |
1271 | | #define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32) |
1272 | | /* Count trailing zeroes |
1273 | | Flags: - (may destroy flags) |
1274 | | Note: immediate source argument is not supported */ |
1275 | 0 | #define SLJIT_CTZ (SLJIT_OP1_BASE + 10) |
1276 | | #define SLJIT_CTZ32 (SLJIT_CTZ | SLJIT_32) |
1277 | | /* Reverse the order of bytes |
1278 | | Flags: - (may destroy flags) |
1279 | | Note: converts between little and big endian formats |
1280 | | Note: immediate source argument is not supported */ |
1281 | 0 | #define SLJIT_REV (SLJIT_OP1_BASE + 11) |
1282 | | #define SLJIT_REV32 (SLJIT_REV | SLJIT_32) |
1283 | | /* Reverse the order of bytes in the lower 16 bit and extend as unsigned |
1284 | | Flags: - (may destroy flags) |
1285 | | Note: converts between little and big endian formats |
1286 | | Note: immediate source argument is not supported */ |
1287 | 0 | #define SLJIT_REV_U16 (SLJIT_OP1_BASE + 12) |
1288 | | #define SLJIT_REV32_U16 (SLJIT_REV_U16 | SLJIT_32) |
1289 | | /* Reverse the order of bytes in the lower 16 bit and extend as signed |
1290 | | Flags: - (may destroy flags) |
1291 | | Note: converts between little and big endian formats |
1292 | | Note: immediate source argument is not supported */ |
1293 | 0 | #define SLJIT_REV_S16 (SLJIT_OP1_BASE + 13) |
1294 | | #define SLJIT_REV32_S16 (SLJIT_REV_S16 | SLJIT_32) |
1295 | | /* Reverse the order of bytes in the lower 32 bit and extend as unsigned |
1296 | | Flags: - (may destroy flags) |
1297 | | Note: converts between little and big endian formats |
1298 | | Note: immediate source argument is not supported */ |
1299 | 0 | #define SLJIT_REV_U32 (SLJIT_OP1_BASE + 14) |
1300 | | /* Reverse the order of bytes in the lower 32 bit and extend as signed |
1301 | | Flags: - (may destroy flags) |
1302 | | Note: converts between little and big endian formats |
1303 | | Note: immediate source argument is not supported */ |
1304 | 0 | #define SLJIT_REV_S32 (SLJIT_OP1_BASE + 15) |
1305 | | |
1306 | | /* The following unary operations are supported by using sljit_emit_op2: |
1307 | | - binary not: SLJIT_XOR with immedate -1 as src1 or src2 |
1308 | | - negate: SLJIT_SUB with immedate 0 as src1 |
1309 | | Note: these operations are optimized by the compiler if the |
1310 | | target CPU has specialized instruction forms for them. */ |
1311 | | |
1312 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, |
1313 | | sljit_s32 dst, sljit_sw dstw, |
1314 | | sljit_s32 src, sljit_sw srcw); |
1315 | | |
1316 | | /* Starting index of opcodes for sljit_emit_op2. */ |
1317 | 9.27G | #define SLJIT_OP2_BASE 64 |
1318 | | |
1319 | | /* Flags: Z | OVERFLOW | CARRY */ |
1320 | 1.65G | #define SLJIT_ADD (SLJIT_OP2_BASE + 0) |
1321 | | #define SLJIT_ADD32 (SLJIT_ADD | SLJIT_32) |
1322 | | /* Flags: CARRY */ |
1323 | 0 | #define SLJIT_ADDC (SLJIT_OP2_BASE + 1) |
1324 | | #define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_32) |
1325 | | /* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL |
1326 | | SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER |
1327 | | SIG_LESS_EQUAL | OVERFLOW | CARRY */ |
1328 | 10.5G | #define SLJIT_SUB (SLJIT_OP2_BASE + 2) |
1329 | | #define SLJIT_SUB32 (SLJIT_SUB | SLJIT_32) |
1330 | | /* Flags: CARRY */ |
1331 | 0 | #define SLJIT_SUBC (SLJIT_OP2_BASE + 3) |
1332 | | #define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_32) |
1333 | | /* Note: integer mul |
1334 | | Flags: OVERFLOW */ |
1335 | 0 | #define SLJIT_MUL (SLJIT_OP2_BASE + 4) |
1336 | | #define SLJIT_MUL32 (SLJIT_MUL | SLJIT_32) |
1337 | | /* Flags: Z */ |
1338 | 131M | #define SLJIT_AND (SLJIT_OP2_BASE + 5) |
1339 | | #define SLJIT_AND32 (SLJIT_AND | SLJIT_32) |
1340 | | /* Flags: Z */ |
1341 | 952M | #define SLJIT_OR (SLJIT_OP2_BASE + 6) |
1342 | | #define SLJIT_OR32 (SLJIT_OR | SLJIT_32) |
1343 | | /* Flags: Z */ |
1344 | 443k | #define SLJIT_XOR (SLJIT_OP2_BASE + 7) |
1345 | | #define SLJIT_XOR32 (SLJIT_XOR | SLJIT_32) |
1346 | | /* Flags: Z |
1347 | | Let bit_length be the length of the shift operation: 32 or 64. |
1348 | | If src2 is immediate, src2w is masked by (bit_length - 1). |
1349 | | Otherwise, if the content of src2 is outside the range from 0 |
1350 | | to bit_length - 1, the result is undefined. */ |
1351 | 84.2M | #define SLJIT_SHL (SLJIT_OP2_BASE + 8) |
1352 | | #define SLJIT_SHL32 (SLJIT_SHL | SLJIT_32) |
1353 | | /* Flags: Z |
1354 | | Same as SLJIT_SHL, except the the second operand is |
1355 | | always masked by the length of the shift operation. */ |
1356 | 84.2M | #define SLJIT_MSHL (SLJIT_OP2_BASE + 9) |
1357 | | #define SLJIT_MSHL32 (SLJIT_MSHL | SLJIT_32) |
1358 | | /* Flags: Z |
1359 | | Let bit_length be the length of the shift operation: 32 or 64. |
1360 | | If src2 is immediate, src2w is masked by (bit_length - 1). |
1361 | | Otherwise, if the content of src2 is outside the range from 0 |
1362 | | to bit_length - 1, the result is undefined. */ |
1363 | 21.0M | #define SLJIT_LSHR (SLJIT_OP2_BASE + 10) |
1364 | | #define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_32) |
1365 | | /* Flags: Z |
1366 | | Same as SLJIT_LSHR, except the the second operand is |
1367 | | always masked by the length of the shift operation. */ |
1368 | 21.0M | #define SLJIT_MLSHR (SLJIT_OP2_BASE + 11) |
1369 | | #define SLJIT_MLSHR32 (SLJIT_MLSHR | SLJIT_32) |
1370 | | /* Flags: Z |
1371 | | Let bit_length be the length of the shift operation: 32 or 64. |
1372 | | If src2 is immediate, src2w is masked by (bit_length - 1). |
1373 | | Otherwise, if the content of src2 is outside the range from 0 |
1374 | | to bit_length - 1, the result is undefined. */ |
1375 | 391k | #define SLJIT_ASHR (SLJIT_OP2_BASE + 12) |
1376 | | #define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_32) |
1377 | | /* Flags: Z |
1378 | | Same as SLJIT_ASHR, except the the second operand is |
1379 | | always masked by the length of the shift operation. */ |
1380 | 391k | #define SLJIT_MASHR (SLJIT_OP2_BASE + 13) |
1381 | | #define SLJIT_MASHR32 (SLJIT_MASHR | SLJIT_32) |
1382 | | /* Flags: - (may destroy flags) |
1383 | | Let bit_length be the length of the rotate operation: 32 or 64. |
1384 | | The second operand is always masked by (bit_length - 1). */ |
1385 | 0 | #define SLJIT_ROTL (SLJIT_OP2_BASE + 14) |
1386 | | #define SLJIT_ROTL32 (SLJIT_ROTL | SLJIT_32) |
1387 | | /* Flags: - (may destroy flags) |
1388 | | Let bit_length be the length of the rotate operation: 32 or 64. |
1389 | | The second operand is always masked by (bit_length - 1). */ |
1390 | 0 | #define SLJIT_ROTR (SLJIT_OP2_BASE + 15) |
1391 | | #define SLJIT_ROTR32 (SLJIT_ROTR | SLJIT_32) |
1392 | | |
1393 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, |
1394 | | sljit_s32 dst, sljit_sw dstw, |
1395 | | sljit_s32 src1, sljit_sw src1w, |
1396 | | sljit_s32 src2, sljit_sw src2w); |
1397 | | |
1398 | | /* The sljit_emit_op2u function is the same as sljit_emit_op2 |
1399 | | except the result is discarded. */ |
1400 | | |
1401 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, |
1402 | | sljit_s32 src1, sljit_sw src1w, |
1403 | | sljit_s32 src2, sljit_sw src2w); |
1404 | | |
1405 | | /* Starting index of opcodes for sljit_emit_op2r. */ |
1406 | | #define SLJIT_OP2R_BASE 96 |
1407 | | |
1408 | | /* Flags: - (may destroy flags) */ |
1409 | | #define SLJIT_MULADD (SLJIT_OP2R_BASE + 0) |
1410 | | #define SLJIT_MULADD32 (SLJIT_MULADD | SLJIT_32) |
1411 | | |
1412 | | /* Similar to sljit_emit_fop2, except the destination is always a register. */ |
1413 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op, |
1414 | | sljit_s32 dst_reg, |
1415 | | sljit_s32 src1, sljit_sw src1w, |
1416 | | sljit_s32 src2, sljit_sw src2w); |
1417 | | |
1418 | | /* Emit a left or right shift operation, where the bits shifted |
1419 | | in comes from a separate source operand. All operands are |
1420 | | interpreted as unsigned integers. |
1421 | | |
1422 | | In the followings the value_mask variable is 31 for 32 bit |
1423 | | operations and word_size - 1 otherwise. |
1424 | | |
1425 | | op must be one of the following operations: |
1426 | | SLJIT_SHL or SLJIT_SHL32: |
1427 | | dst_reg = src1_reg << src3_reg |
1428 | | dst_reg |= ((src2_reg >> 1) >> (src3 ^ value_mask)) |
1429 | | SLJIT_MSHL or SLJIT_MSHL32: |
1430 | | src3 &= value_mask |
1431 | | perform the SLJIT_SHL or SLJIT_SHL32 operation |
1432 | | SLJIT_LSHR or SLJIT_LSHR32: |
1433 | | dst_reg = src1_reg >> src3_reg |
1434 | | dst_reg |= ((src2_reg << 1) << (src3 ^ value_mask)) |
1435 | | SLJIT_MLSHR or SLJIT_MLSHR32: |
1436 | | src3 &= value_mask |
1437 | | perform the SLJIT_LSHR or SLJIT_LSHR32 operation |
1438 | | |
1439 | | op can be combined (or'ed) with SLJIT_SHIFT_INTO_NON_ZERO |
1440 | | |
1441 | | dst_reg specifies the destination register, where dst_reg |
1442 | | and src2_reg cannot be the same registers |
1443 | | src1_reg specifies the source register |
1444 | | src2_reg specifies the register which is shifted into src1_reg |
1445 | | src3 / src3w contains the shift amount |
1446 | | |
1447 | | Note: a rotate operation is performed if src1_reg and |
1448 | | src2_reg are the same registers |
1449 | | |
1450 | | Flags: - (may destroy flags) */ |
1451 | | |
1452 | | /* The src3 operand contains a non-zero value. Improves |
1453 | | the generated code on certain architectures, which |
1454 | | provides a small performance improvement. */ |
1455 | | #define SLJIT_SHIFT_INTO_NON_ZERO 0x200 |
1456 | | |
1457 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, |
1458 | | sljit_s32 dst_reg, |
1459 | | sljit_s32 src1_reg, |
1460 | | sljit_s32 src2_reg, |
1461 | | sljit_s32 src3, sljit_sw src3w); |
1462 | | |
1463 | | /* Starting index of opcodes for sljit_emit_op_src |
1464 | | and sljit_emit_op_dst. */ |
1465 | 4.12M | #define SLJIT_OP_SRC_DST_BASE 112 |
1466 | | |
1467 | | /* Fast return, see SLJIT_FAST_CALL for more details. |
1468 | | Note: src cannot be an immedate value |
1469 | | Flags: - (does not modify flags) */ |
1470 | 1.33M | #define SLJIT_FAST_RETURN (SLJIT_OP_SRC_DST_BASE + 0) |
1471 | | /* Skip stack frames before fast return. |
1472 | | Note: src cannot be an immedate value |
1473 | | Flags: may destroy flags. */ |
1474 | 0 | #define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN (SLJIT_OP_SRC_DST_BASE + 1) |
1475 | | /* Prefetch value into the level 1 data cache |
1476 | | Note: if the target CPU does not support data prefetch, |
1477 | | no instructions are emitted. |
1478 | | Note: this instruction never fails, even if the memory address is invalid. |
1479 | | Flags: - (does not modify flags) */ |
1480 | 0 | #define SLJIT_PREFETCH_L1 (SLJIT_OP_SRC_DST_BASE + 2) |
1481 | | /* Prefetch value into the level 2 data cache |
1482 | | Note: same as SLJIT_PREFETCH_L1 if the target CPU |
1483 | | does not support this instruction form. |
1484 | | Note: this instruction never fails, even if the memory address is invalid. |
1485 | | Flags: - (does not modify flags) */ |
1486 | 0 | #define SLJIT_PREFETCH_L2 (SLJIT_OP_SRC_DST_BASE + 3) |
1487 | | /* Prefetch value into the level 3 data cache |
1488 | | Note: same as SLJIT_PREFETCH_L2 if the target CPU |
1489 | | does not support this instruction form. |
1490 | | Note: this instruction never fails, even if the memory address is invalid. |
1491 | | Flags: - (does not modify flags) */ |
1492 | 0 | #define SLJIT_PREFETCH_L3 (SLJIT_OP_SRC_DST_BASE + 4) |
1493 | | /* Prefetch a value which is only used once (and can be discarded afterwards) |
1494 | | Note: same as SLJIT_PREFETCH_L1 if the target CPU |
1495 | | does not support this instruction form. |
1496 | | Note: this instruction never fails, even if the memory address is invalid. |
1497 | | Flags: - (does not modify flags) */ |
1498 | 0 | #define SLJIT_PREFETCH_ONCE (SLJIT_OP_SRC_DST_BASE + 5) |
1499 | | |
1500 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, |
1501 | | sljit_s32 src, sljit_sw srcw); |
1502 | | |
1503 | | /* Fast enter, see SLJIT_FAST_CALL for more details. |
1504 | | Flags: - (does not modify flags) */ |
1505 | 2.78M | #define SLJIT_FAST_ENTER (SLJIT_OP_SRC_DST_BASE + 6) |
1506 | | |
1507 | | /* Copies the return address into dst. The return address is the |
1508 | | address where the execution continues after the called function |
1509 | | returns (see: sljit_emit_return / sljit_emit_return_void). |
1510 | | Flags: - (does not modify flags) */ |
1511 | 0 | #define SLJIT_GET_RETURN_ADDRESS (SLJIT_OP_SRC_DST_BASE + 7) |
1512 | | |
1513 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, |
1514 | | sljit_s32 dst, sljit_sw dstw); |
1515 | | |
1516 | | /* Starting index of opcodes for sljit_emit_fop1. */ |
1517 | 586k | #define SLJIT_FOP1_BASE 144 |
1518 | | |
1519 | | /* Flags: - (does not modify flags) */ |
1520 | 586k | #define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) |
1521 | | #define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_32) |
1522 | | /* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] |
1523 | | SRC/DST TYPE can be: F64, F32, S32, SW |
1524 | | Rounding mode when the destination is SW or S32: round towards zero. */ |
1525 | | /* Flags: - (may destroy flags) */ |
1526 | 0 | #define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1) |
1527 | | #define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_32) |
1528 | | /* Flags: - (may destroy flags) */ |
1529 | 0 | #define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2) |
1530 | | #define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_32) |
1531 | | /* Flags: - (may destroy flags) */ |
1532 | 0 | #define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3) |
1533 | | #define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_32) |
1534 | | /* Flags: - (may destroy flags) */ |
1535 | 0 | #define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4) |
1536 | | #define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_32) |
1537 | | /* Flags: - (may destroy flags) */ |
1538 | 0 | #define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) |
1539 | | #define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32) |
1540 | | /* Flags: - (may destroy flags) */ |
1541 | | #define SLJIT_CONV_F64_FROM_UW (SLJIT_FOP1_BASE + 6) |
1542 | | #define SLJIT_CONV_F32_FROM_UW (SLJIT_CONV_F64_FROM_UW | SLJIT_32) |
1543 | | /* Flags: - (may destroy flags) */ |
1544 | 0 | #define SLJIT_CONV_F64_FROM_U32 (SLJIT_FOP1_BASE + 7) |
1545 | | #define SLJIT_CONV_F32_FROM_U32 (SLJIT_CONV_F64_FROM_U32 | SLJIT_32) |
1546 | | /* Note: dst is the left and src is the right operand for SLJIT_CMP_F32/64. |
1547 | | Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */ |
1548 | 0 | #define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 8) |
1549 | | #define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32) |
1550 | | /* Flags: - (may destroy flags) */ |
1551 | 0 | #define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 9) |
1552 | | #define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32) |
1553 | | /* Flags: - (may destroy flags) */ |
1554 | 0 | #define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 10) |
1555 | | #define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32) |
1556 | | |
1557 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, |
1558 | | sljit_s32 dst, sljit_sw dstw, |
1559 | | sljit_s32 src, sljit_sw srcw); |
1560 | | |
1561 | | /* Starting index of opcodes for sljit_emit_fop2. */ |
1562 | | #define SLJIT_FOP2_BASE 176 |
1563 | | |
1564 | | /* Flags: - (may destroy flags) */ |
1565 | | #define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) |
1566 | | #define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_32) |
1567 | | /* Flags: - (may destroy flags) */ |
1568 | | #define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1) |
1569 | | #define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_32) |
1570 | | /* Flags: - (may destroy flags) */ |
1571 | | #define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2) |
1572 | | #define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_32) |
1573 | | /* Flags: - (may destroy flags) */ |
1574 | | #define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3) |
1575 | | #define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_32) |
1576 | | |
1577 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, |
1578 | | sljit_s32 dst, sljit_sw dstw, |
1579 | | sljit_s32 src1, sljit_sw src1w, |
1580 | | sljit_s32 src2, sljit_sw src2w); |
1581 | | |
1582 | | /* Starting index of opcodes for sljit_emit_fop2r. */ |
1583 | | #define SLJIT_FOP2R_BASE 192 |
1584 | | |
1585 | | /* Flags: - (may destroy flags) */ |
1586 | | #define SLJIT_COPYSIGN_F64 (SLJIT_FOP2R_BASE + 0) |
1587 | | #define SLJIT_COPYSIGN_F32 (SLJIT_COPYSIGN_F64 | SLJIT_32) |
1588 | | |
1589 | | /* Similar to sljit_emit_fop2, except the destination is always a register. */ |
1590 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op, |
1591 | | sljit_s32 dst_freg, |
1592 | | sljit_s32 src1, sljit_sw src1w, |
1593 | | sljit_s32 src2, sljit_sw src2w); |
1594 | | |
1595 | | /* Sets a floating point register to an immediate value. */ |
1596 | | |
1597 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, |
1598 | | sljit_s32 freg, sljit_f32 value); |
1599 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, |
1600 | | sljit_s32 freg, sljit_f64 value); |
1601 | | |
1602 | | /* The following opcodes are used by sljit_emit_fcopy(). */ |
1603 | | |
1604 | | /* 64 bit: copy a 64 bit value from an integer register into a |
1605 | | 64 bit floating point register without any modifications. |
1606 | | 32 bit: copy a 32 bit register or register pair into a 64 bit |
1607 | | floating point register without any modifications. The |
1608 | | register, or the first register of the register pair |
1609 | | replaces the high order 32 bit of the floating point |
1610 | | register. If a register pair is passed, the low |
1611 | | order 32 bit is replaced by the second register. |
1612 | | Otherwise, the low order 32 bit is unchanged. */ |
1613 | | #define SLJIT_COPY_TO_F64 1 |
1614 | | /* Copy a 32 bit value from an integer register into a 32 bit |
1615 | | floating point register without any modifications. */ |
1616 | | #define SLJIT_COPY32_TO_F32 (SLJIT_COPY_TO_F64 | SLJIT_32) |
1617 | | /* 64 bit: copy the value of a 64 bit floating point register into |
1618 | | an integer register without any modifications. |
1619 | | 32 bit: copy a 64 bit floating point register into a 32 bit register |
1620 | | or a 32 bit register pair without any modifications. The |
1621 | | high order 32 bit of the floating point register is copied |
1622 | | into the register, or the first register of the register |
1623 | | pair. If a register pair is passed, the low order 32 bit |
1624 | | is copied into the second register. */ |
1625 | | #define SLJIT_COPY_FROM_F64 2 |
1626 | | /* Copy the value of a 32 bit floating point register into an integer |
1627 | | register without any modifications. The register should be processed |
1628 | | with 32 bit operations later. */ |
1629 | | #define SLJIT_COPY32_FROM_F32 (SLJIT_COPY_FROM_F64 | SLJIT_32) |
1630 | | |
1631 | | /* Special data copy which involves floating point registers. |
1632 | | |
1633 | | op must be between SLJIT_COPY_TO_F64 and SLJIT_COPY32_FROM_F32 |
1634 | | freg must be a floating point register |
1635 | | reg must be a register or register pair */ |
1636 | | |
1637 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, |
1638 | | sljit_s32 freg, sljit_s32 reg); |
1639 | | |
1640 | | /* Label and jump instructions. */ |
1641 | | |
1642 | | /* Emits a label which can be the target of jump / mov_addr instructions. */ |
1643 | | |
1644 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler); |
1645 | | |
1646 | | /* Alignment values for sljit_emit_aligned_label. */ |
1647 | | |
1648 | | #define SLJIT_LABEL_ALIGN_1 0 |
1649 | | #define SLJIT_LABEL_ALIGN_2 1 |
1650 | | #define SLJIT_LABEL_ALIGN_4 2 |
1651 | | #define SLJIT_LABEL_ALIGN_8 3 |
1652 | | #define SLJIT_LABEL_ALIGN_16 4 |
1653 | | #define SLJIT_LABEL_ALIGN_W SLJIT_WORD_SHIFT |
1654 | | #define SLJIT_LABEL_ALIGN_P SLJIT_POINTER_SHIFT |
1655 | | |
1656 | | /* Emits a label which address is aligned to a power of 2 value. When some |
1657 | | extra space needs to be added to align the label, that space is filled |
1658 | | with SLJIT_NOP instructions. These labels usually represent the end of a |
1659 | | compilation block, and a new function or some read-only data (e.g. a |
1660 | | jump table) follows it. In these typical cases the SLJIT_NOPs are never |
1661 | | executed. |
1662 | | |
1663 | | Optionally, buffers for storing read-only data or code can be allocated |
1664 | | by this operation. The buffers are passed as a chain list, and a separate |
1665 | | memory area is allocated for each item in the list. All buffers are aligned |
1666 | | to SLJIT_NOP instruction size, and their starting address is returned as |
1667 | | as a label. The sljit_get_label_abs_addr function or the SLJIT_MOV_ABS_ADDR |
1668 | | operation can be used to get the real address. The label of the first buffer |
1669 | | is always the same as the returned label. The buffers are initially |
1670 | | initialized with SLJIT_NOP instructions. The alignment of the buffers can |
1671 | | be controlled by their starting address and sizes. If the starting address |
1672 | | is aligned to N, and size is also divisible by N, the next buffer is aligned |
1673 | | to N. I.e. if a buffer is 16 byte aligned, and its size is divisible by 4, |
1674 | | the next buffer is 4 byte aligned. Note: if a buffer is N (>=2) byte aligned, |
1675 | | it is also N/2 byte aligned. |
1676 | | |
1677 | | align represents the alignment, and its value can |
1678 | | be specified by SLJIT_LABEL_* constants |
1679 | | |
1680 | | buffers is a list of read-only buffers stored in a chain list. |
1681 | | After calling sljit_generate_code, these buffers can be |
1682 | | modified by sljit_read_only_buffer_start_writing() / |
1683 | | sljit_read_only_buffer_end_writing() functions |
1684 | | |
1685 | | Note: the constant pool (if present) may be stored before the label. */ |
1686 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler, |
1687 | | sljit_s32 alignment, struct sljit_read_only_buffer *buffers); |
1688 | | |
1689 | | /* The SLJIT_FAST_CALL is a calling method for creating lightweight function |
1690 | | calls. This type of calls preserve the values of all registers and stack |
1691 | | frame. Unlike normal function calls, the enter and return operations must |
1692 | | be performed by the SLJIT_FAST_ENTER and SLJIT_FAST_RETURN operations |
1693 | | respectively. The return address is stored in the dst argument of the |
1694 | | SLJIT_FAST_ENTER operation, and this return address should be passed as |
1695 | | the src argument for the SLJIT_FAST_RETURN operation to return from the |
1696 | | called function. |
1697 | | |
1698 | | Fast calls are cheap operations (usually only a single call instruction is |
1699 | | emitted) but they do not preserve any registers. However the callee function |
1700 | | can freely use / update any registers and the locals area which can be |
1701 | | efficiently exploited by various optimizations. Registers can be saved |
1702 | | and restored manually if needed. |
1703 | | |
1704 | | Although returning to different address by SLJIT_FAST_RETURN is possible, |
1705 | | this address usually cannot be predicted by the return address predictor of |
1706 | | modern CPUs which may reduce performance. Furthermore certain security |
1707 | | enhancement technologies such as Intel Control-flow Enforcement Technology |
1708 | | (CET) may disallow returning to a different address (indirect jumps |
1709 | | can be used instead, see SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN). */ |
1710 | | |
1711 | | /* Invert (negate) conditional type: xor (^) with 0x1 */ |
1712 | | |
1713 | | /* Integer comparison types. */ |
1714 | 1.23G | #define SLJIT_EQUAL 0 |
1715 | | #define SLJIT_ZERO SLJIT_EQUAL |
1716 | 3.17G | #define SLJIT_NOT_EQUAL 1 |
1717 | 2.09G | #define SLJIT_NOT_ZERO SLJIT_NOT_EQUAL |
1718 | | |
1719 | 378M | #define SLJIT_LESS 2 |
1720 | | #define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS) |
1721 | 164M | #define SLJIT_GREATER_EQUAL 3 |
1722 | | #define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_LESS) |
1723 | 722M | #define SLJIT_GREATER 4 |
1724 | | #define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER) |
1725 | 75.6M | #define SLJIT_LESS_EQUAL 5 |
1726 | | #define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_GREATER) |
1727 | 0 | #define SLJIT_SIG_LESS 6 |
1728 | | #define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS) |
1729 | 0 | #define SLJIT_SIG_GREATER_EQUAL 7 |
1730 | | #define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_LESS) |
1731 | 99.6M | #define SLJIT_SIG_GREATER 8 |
1732 | | #define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER) |
1733 | 44.5k | #define SLJIT_SIG_LESS_EQUAL 9 |
1734 | 44.5k | #define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_GREATER) |
1735 | | |
1736 | 0 | #define SLJIT_OVERFLOW 10 |
1737 | | #define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) |
1738 | 0 | #define SLJIT_NOT_OVERFLOW 11 |
1739 | | |
1740 | | /* Unlike other flags, sljit_emit_jump may destroy the carry flag. */ |
1741 | 378M | #define SLJIT_CARRY 12 |
1742 | | #define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) |
1743 | 164M | #define SLJIT_NOT_CARRY 13 |
1744 | | |
1745 | 1.22G | #define SLJIT_ATOMIC_STORED 14 |
1746 | | #define SLJIT_SET_ATOMIC_STORED SLJIT_SET(SLJIT_ATOMIC_STORED) |
1747 | 1.08G | #define SLJIT_ATOMIC_NOT_STORED 15 |
1748 | | |
1749 | | /* Basic floating point comparison types. |
1750 | | |
1751 | | Note: when the comparison result is unordered, their behaviour is unspecified. */ |
1752 | | |
1753 | 1.22G | #define SLJIT_F_EQUAL 16 |
1754 | | #define SLJIT_SET_F_EQUAL SLJIT_SET(SLJIT_F_EQUAL) |
1755 | 1.08G | #define SLJIT_F_NOT_EQUAL 17 |
1756 | | #define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_EQUAL) |
1757 | 378M | #define SLJIT_F_LESS 18 |
1758 | | #define SLJIT_SET_F_LESS SLJIT_SET(SLJIT_F_LESS) |
1759 | 164M | #define SLJIT_F_GREATER_EQUAL 19 |
1760 | | #define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_LESS) |
1761 | 722M | #define SLJIT_F_GREATER 20 |
1762 | | #define SLJIT_SET_F_GREATER SLJIT_SET(SLJIT_F_GREATER) |
1763 | 75.6M | #define SLJIT_F_LESS_EQUAL 21 |
1764 | | #define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_GREATER) |
1765 | | |
1766 | | /* Jumps when either argument contains a NaN value. */ |
1767 | 0 | #define SLJIT_UNORDERED 22 |
1768 | | #define SLJIT_SET_UNORDERED SLJIT_SET(SLJIT_UNORDERED) |
1769 | | /* Jumps when neither argument contains a NaN value. */ |
1770 | 0 | #define SLJIT_ORDERED 23 |
1771 | | #define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_UNORDERED) |
1772 | | |
1773 | | /* Ordered / unordered floating point comparison types. |
1774 | | |
1775 | | Note: each comparison type has an ordered and unordered form. Some |
1776 | | architectures supports only either of them (see: sljit_cmp_info). */ |
1777 | | |
1778 | 0 | #define SLJIT_ORDERED_EQUAL 24 |
1779 | | #define SLJIT_SET_ORDERED_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) |
1780 | 0 | #define SLJIT_UNORDERED_OR_NOT_EQUAL 25 |
1781 | | #define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) |
1782 | 722M | #define SLJIT_ORDERED_LESS 26 |
1783 | | #define SLJIT_SET_ORDERED_LESS SLJIT_SET(SLJIT_ORDERED_LESS) |
1784 | 75.6M | #define SLJIT_UNORDERED_OR_GREATER_EQUAL 27 |
1785 | | #define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS) |
1786 | 722M | #define SLJIT_ORDERED_GREATER 28 |
1787 | | #define SLJIT_SET_ORDERED_GREATER SLJIT_SET(SLJIT_ORDERED_GREATER) |
1788 | 75.6M | #define SLJIT_UNORDERED_OR_LESS_EQUAL 29 |
1789 | | #define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER) |
1790 | | |
1791 | 1.22G | #define SLJIT_UNORDERED_OR_EQUAL 30 |
1792 | | #define SLJIT_SET_UNORDERED_OR_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) |
1793 | 1.08G | #define SLJIT_ORDERED_NOT_EQUAL 31 |
1794 | | #define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) |
1795 | 378M | #define SLJIT_UNORDERED_OR_LESS 32 |
1796 | | #define SLJIT_SET_UNORDERED_OR_LESS SLJIT_SET(SLJIT_UNORDERED_OR_LESS) |
1797 | 164M | #define SLJIT_ORDERED_GREATER_EQUAL 33 |
1798 | | #define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS) |
1799 | 378M | #define SLJIT_UNORDERED_OR_GREATER 34 |
1800 | | #define SLJIT_SET_UNORDERED_OR_GREATER SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) |
1801 | 164M | #define SLJIT_ORDERED_LESS_EQUAL 35 |
1802 | | #define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) |
1803 | | |
1804 | | /* Unconditional jump types. */ |
1805 | 18.0G | #define SLJIT_JUMP 36 |
1806 | | /* Fast calling method. See the description above. */ |
1807 | | #define SLJIT_FAST_CALL 37 |
1808 | | /* Default C calling convention. */ |
1809 | 209M | #define SLJIT_CALL 38 |
1810 | | /* Called function must be compiled by SLJIT. |
1811 | | See SLJIT_ENTER_REG_ARG option. */ |
1812 | 109M | #define SLJIT_CALL_REG_ARG 39 |
1813 | | |
1814 | | /* The target can be changed during runtime (see: sljit_set_jump_addr). */ |
1815 | 14.2G | #define SLJIT_REWRITABLE_JUMP 0x10000 |
1816 | | /* When this flag is passed, the execution of the current function ends and |
1817 | | the called function returns to the caller of the current function. The |
1818 | | stack usage is reduced before the call, but it is not necessarily reduced |
1819 | | to zero. In the latter case the compiler needs to allocate space for some |
1820 | | arguments and the return address must be stored on the stack as well. */ |
1821 | 219M | #define SLJIT_CALL_RETURN 0x20000 |
1822 | | |
1823 | | /* Emit a jump instruction. The destination is not set, only the type of the jump. |
1824 | | type must be between SLJIT_EQUAL and SLJIT_FAST_CALL |
1825 | | type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP |
1826 | | |
1827 | | Flags: does not modify flags. */ |
1828 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type); |
1829 | | |
1830 | | /* Emit a C compiler (ABI) compatible function call. |
1831 | | type must be SLJIT_CALL or SLJIT_CALL_REG_ARG |
1832 | | type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and/or SLJIT_CALL_RETURN |
1833 | | arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros |
1834 | | |
1835 | | Flags: destroy all flags. */ |
1836 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types); |
1837 | | |
1838 | | /* Integer comparison operation. In most architectures it is implemented |
1839 | | as a compare (sljit_emit_op2u with SLJIT_SUB) operation followed by |
1840 | | an sljit_emit_jump. However, some architectures (e.g: ARM64 or RISCV) |
1841 | | may optimize the generated code further. It is suggested to use this |
1842 | | comparison form when appropriate. |
1843 | | type must be between SLJIT_EQUAL and SLJIT_SIG_LESS_EQUAL |
1844 | | type can be combined (or'ed) with SLJIT_32 or SLJIT_REWRITABLE_JUMP |
1845 | | |
1846 | | Flags: may destroy flags. */ |
1847 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, |
1848 | | sljit_s32 src1, sljit_sw src1w, |
1849 | | sljit_s32 src2, sljit_sw src2w); |
1850 | | |
1851 | | /* Floating point comparison operation. In most architectures it is |
1852 | | implemented as a SLJIT_CMP_F32/64 operation (setting appropriate |
1853 | | flags) followed by a sljit_emit_jump. However, some architectures |
1854 | | (e.g: MIPS) may optimize the generated code further. It is suggested |
1855 | | to use this comparison form when appropriate. |
1856 | | type must be between SLJIT_F_EQUAL and SLJIT_ORDERED_LESS_EQUAL |
1857 | | type can be combined (or'ed) with SLJIT_32 or SLJIT_REWRITABLE_JUMP |
1858 | | |
1859 | | Flags: destroy flags. |
1860 | | Note: when any operand is NaN the behaviour depends on the comparison type. */ |
1861 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, |
1862 | | sljit_s32 src1, sljit_sw src1w, |
1863 | | sljit_s32 src2, sljit_sw src2w); |
1864 | | |
1865 | | /* The following flags are used by sljit_emit_op2cmpz(). */ |
1866 | | #define SLJIT_JUMP_IF_NON_ZERO 0 |
1867 | | #define SLJIT_JUMP_IF_ZERO SLJIT_SET_Z |
1868 | | |
1869 | | /* Perform an integer arithmetic operation, then its result is compared to |
1870 | | zero. In most architectures it is implemented as an sljit_emit_op2 |
1871 | | followed by an sljit_emit_jump. However, some architectures (e.g: RISCV) |
1872 | | may optimize the generated code further. It is suggested to use this |
1873 | | operation form when appropriate (e.g. for loops with counters). |
1874 | | |
1875 | | op must be an sljit_emit_op2 operation where zero flag can be set, |
1876 | | op can be combined with SLJIT_SET_* status flag setters except |
1877 | | SLJIT_SET_Z, SLJIT_REWRITABLE_JUMP or SLJIT_JUMP_IF_* option bits. |
1878 | | |
1879 | | Note: SLJIT_JUMP_IF_NON_ZERO is the default operation if neither |
1880 | | SLJIT_JUMP_IF_ZERO or SLJIT_JUMP_IF_NON_ZERO is specified. |
1881 | | Flags: sets the variable flag depending on op argument, the |
1882 | | zero flag is undefined. */ |
1883 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op2cmpz(struct sljit_compiler *compiler, sljit_s32 op, |
1884 | | sljit_s32 dst, sljit_sw dstw, |
1885 | | sljit_s32 src1, sljit_sw src1w, |
1886 | | sljit_s32 src2, sljit_sw src2w); |
1887 | | |
1888 | | /* Set the destination of the jump to this label. */ |
1889 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label); |
1890 | | /* Set the destination address of the jump to this label. */ |
1891 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target); |
1892 | | |
1893 | | /* Emit an indirect jump or fast call. |
1894 | | Direct form: set src to SLJIT_IMM() and srcw to the address |
1895 | | Indirect form: any other valid addressing mode |
1896 | | type must be between SLJIT_JUMP and SLJIT_FAST_CALL |
1897 | | |
1898 | | Flags: does not modify flags. */ |
1899 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw); |
1900 | | |
1901 | | /* Emit a C compiler (ABI) compatible function call. |
1902 | | Direct form: set src to SLJIT_IMM() and srcw to the address |
1903 | | Indirect form: any other valid addressing mode |
1904 | | type must be SLJIT_CALL or SLJIT_CALL_REG_ARG |
1905 | | type can be combined (or'ed) with SLJIT_CALL_RETURN |
1906 | | arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros |
1907 | | |
1908 | | Flags: destroy all flags. */ |
1909 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw); |
1910 | | |
1911 | | /* Perform an operation using the conditional flags as the second argument. |
1912 | | Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL. |
1913 | | The value represented by the type is 1, if the condition represented |
1914 | | by the type is fulfilled, and 0 otherwise. |
1915 | | |
1916 | | When op is SLJIT_MOV or SLJIT_MOV32: |
1917 | | Set dst to the value represented by the type (0 or 1). |
1918 | | Flags: - (does not modify flags) |
1919 | | When op is SLJIT_AND, SLJIT_AND32, SLJIT_OR, SLJIT_OR32, SLJIT_XOR, or SLJIT_XOR32 |
1920 | | Performs the binary operation using dst as the first, and the value |
1921 | | represented by type as the second argument. Result is written into dst. |
1922 | | Flags: Z (may destroy flags) */ |
1923 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, |
1924 | | sljit_s32 dst, sljit_sw dstw, |
1925 | | sljit_s32 type); |
1926 | | |
1927 | | /* Emit a conditional select instruction which moves src1 to dst_reg, |
1928 | | if the condition is satisfied, or src2_reg to dst_reg otherwise. |
1929 | | |
1930 | | type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL |
1931 | | type can be combined (or'ed) with SLJIT_32 to move 32 bit |
1932 | | register values instead of word sized ones |
1933 | | dst_reg and src2_reg must be valid registers |
1934 | | src1 must be valid operand |
1935 | | |
1936 | | Note: if src1 is a memory operand, its value |
1937 | | might be loaded even if the condition is false. |
1938 | | |
1939 | | Flags: - (does not modify flags) */ |
1940 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, |
1941 | | sljit_s32 dst_reg, |
1942 | | sljit_s32 src1, sljit_sw src1w, |
1943 | | sljit_s32 src2_reg); |
1944 | | |
1945 | | /* Emit a conditional floating point select instruction which moves |
1946 | | src1 to dst_reg, if the condition is satisfied, or src2_reg to |
1947 | | dst_reg otherwise. |
1948 | | |
1949 | | type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL |
1950 | | type can be combined (or'ed) with SLJIT_32 to move 32 bit |
1951 | | floating point values instead of 64 bit ones |
1952 | | dst_freg and src2_freg must be valid floating point registers |
1953 | | src1 must be valid operand |
1954 | | |
1955 | | Note: if src1 is a memory operand, its value |
1956 | | might be loaded even if the condition is false. |
1957 | | |
1958 | | Flags: - (does not modify flags) */ |
1959 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, |
1960 | | sljit_s32 dst_freg, |
1961 | | sljit_s32 src1, sljit_sw src1w, |
1962 | | sljit_s32 src2_freg); |
1963 | | |
1964 | | /* The following flags are used by sljit_emit_mem(), sljit_emit_mem_update(), |
1965 | | sljit_emit_fmem(), and sljit_emit_fmem_update(). */ |
1966 | | |
1967 | | /* Memory load operation. This is the default. */ |
1968 | | #define SLJIT_MEM_LOAD 0x000000 |
1969 | | /* Memory store operation. */ |
1970 | 28.6k | #define SLJIT_MEM_STORE 0x000200 |
1971 | | |
1972 | | /* The following flags are used by sljit_emit_mem() and sljit_emit_fmem(). */ |
1973 | | |
1974 | | /* Load or stora data from an unaligned (byte aligned) address. */ |
1975 | | #define SLJIT_MEM_UNALIGNED 0x000400 |
1976 | | /* Load or stora data from a 16 bit aligned address. */ |
1977 | | #define SLJIT_MEM_ALIGNED_16 0x000800 |
1978 | | /* Load or stora data from a 32 bit aligned address. */ |
1979 | | #define SLJIT_MEM_ALIGNED_32 0x001000 |
1980 | | |
1981 | | /* The following flags are used by sljit_emit_mem_update(), |
1982 | | and sljit_emit_fmem_update(). */ |
1983 | | |
1984 | | /* Base register is updated before the memory access (default). */ |
1985 | 1.48M | #define SLJIT_MEM_PRE 0x000000 |
1986 | | /* Base register is updated after the memory access. */ |
1987 | 33.3k | #define SLJIT_MEM_POST 0x000400 |
1988 | | |
1989 | | /* When SLJIT_MEM_SUPP is passed, no instructions are emitted. |
1990 | | Instead the function returns with SLJIT_SUCCESS if the instruction |
1991 | | form is supported and SLJIT_ERR_UNSUPPORTED otherwise. This flag |
1992 | | allows runtime checking of available instruction forms. */ |
1993 | 1.52M | #define SLJIT_MEM_SUPP 0x000800 |
1994 | | |
1995 | | /* The sljit_emit_mem emits instructions for various memory operations: |
1996 | | |
1997 | | When SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_16 / |
1998 | | SLJIT_MEM_ALIGNED_32 is set in type argument: |
1999 | | Emit instructions for unaligned memory loads or stores. When |
2000 | | SLJIT_UNALIGNED is not defined, the only way to access unaligned |
2001 | | memory data is using sljit_emit_mem. Otherwise all operations (e.g. |
2002 | | sljit_emit_op1/2, or sljit_emit_fop1/2) supports unaligned access. |
2003 | | In general, the performance of unaligned memory accesses are often |
2004 | | lower than aligned and should be avoided. |
2005 | | |
2006 | | When a pair of registers is passed in reg argument: |
2007 | | Emit instructions for moving data between a register pair and |
2008 | | memory. The register pair can be specified by the SLJIT_REG_PAIR |
2009 | | macro. The first register is loaded from or stored into the |
2010 | | location specified by the mem/memw arguments, and the end address |
2011 | | of this operation is the starting address of the data transfer |
2012 | | between the second register and memory. The type argument must |
2013 | | be SLJIT_MOV. The SLJIT_MEM_UNALIGNED / SLJIT_MEM_ALIGNED_* |
2014 | | options are allowed for this operation. |
2015 | | |
2016 | | type must be between SLJIT_MOV and SLJIT_MOV_P and can be |
2017 | | combined (or'ed) with SLJIT_MEM_* flags |
2018 | | reg is a register or register pair, which is the source or |
2019 | | destination of the operation |
2020 | | mem must be a memory operand |
2021 | | |
2022 | | Flags: - (does not modify flags) */ |
2023 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, |
2024 | | sljit_s32 reg, |
2025 | | sljit_s32 mem, sljit_sw memw); |
2026 | | |
2027 | | /* Emit a single memory load or store with update instruction. |
2028 | | When the requested instruction form is not supported by the CPU, |
2029 | | it returns with SLJIT_ERR_UNSUPPORTED instead of emulating the |
2030 | | instruction. This allows specializing tight loops based on |
2031 | | the supported instruction forms (see SLJIT_MEM_SUPP flag). |
2032 | | Absolute address (SLJIT_MEM0) forms are never supported |
2033 | | and the base (first) register specified by the mem argument |
2034 | | must not be SLJIT_SP and must also be different from the |
2035 | | register specified by the reg argument. |
2036 | | |
2037 | | type must be between SLJIT_MOV and SLJIT_MOV_P and can be |
2038 | | combined (or'ed) with SLJIT_MEM_* flags |
2039 | | reg is the source or destination register of the operation |
2040 | | mem must be a memory operand |
2041 | | |
2042 | | Flags: - (does not modify flags) */ |
2043 | | |
2044 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, |
2045 | | sljit_s32 reg, |
2046 | | sljit_s32 mem, sljit_sw memw); |
2047 | | |
2048 | | /* Same as sljit_emit_mem except the followings: |
2049 | | |
2050 | | Loading or storing a pair of registers is not supported. |
2051 | | |
2052 | | type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be |
2053 | | combined (or'ed) with SLJIT_MEM_* flags. |
2054 | | freg is the source or destination floating point register |
2055 | | of the operation |
2056 | | mem must be a memory operand |
2057 | | |
2058 | | Flags: - (does not modify flags) */ |
2059 | | |
2060 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, |
2061 | | sljit_s32 freg, |
2062 | | sljit_s32 mem, sljit_sw memw); |
2063 | | |
2064 | | /* Same as sljit_emit_mem_update except the followings: |
2065 | | |
2066 | | type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be |
2067 | | combined (or'ed) with SLJIT_MEM_* flags |
2068 | | freg is the source or destination floating point register |
2069 | | of the operation |
2070 | | mem must be a memory operand |
2071 | | |
2072 | | Flags: - (does not modify flags) */ |
2073 | | |
2074 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, |
2075 | | sljit_s32 freg, |
2076 | | sljit_s32 mem, sljit_sw memw); |
2077 | | |
2078 | | /* The following options are used by several simd operations. */ |
2079 | | |
2080 | | /* Load data into a vector register, this is the default */ |
2081 | | #define SLJIT_SIMD_LOAD 0x000000 |
2082 | | /* Store data from a vector register */ |
2083 | 2.16M | #define SLJIT_SIMD_STORE 0x000001 |
2084 | | /* The vector register contains floating point values */ |
2085 | 4.71M | #define SLJIT_SIMD_FLOAT 0x000400 |
2086 | | /* Tests whether the operation is available */ |
2087 | 3.64M | #define SLJIT_SIMD_TEST 0x000800 |
2088 | | /* Move data to/from a 64 bit (8 byte) long vector register */ |
2089 | | #define SLJIT_SIMD_REG_64 (3 << 12) |
2090 | | /* Move data to/from a 128 bit (16 byte) long vector register */ |
2091 | 12.8M | #define SLJIT_SIMD_REG_128 (4 << 12) |
2092 | | /* Move data to/from a 256 bit (32 byte) long vector register */ |
2093 | 5.99M | #define SLJIT_SIMD_REG_256 (5 << 12) |
2094 | | /* Move data to/from a 512 bit (64 byte) long vector register */ |
2095 | 0 | #define SLJIT_SIMD_REG_512 (6 << 12) |
2096 | | /* Element size is 8 bit long (this is the default), usually cannot be combined with SLJIT_SIMD_FLOAT */ |
2097 | 974k | #define SLJIT_SIMD_ELEM_8 (0 << 18) |
2098 | | /* Element size is 16 bit long, usually cannot be combined with SLJIT_SIMD_FLOAT */ |
2099 | | #define SLJIT_SIMD_ELEM_16 (1 << 18) |
2100 | | /* Element size is 32 bit long */ |
2101 | 1.12M | #define SLJIT_SIMD_ELEM_32 (2 << 18) |
2102 | | /* Element size is 64 bit long */ |
2103 | | #define SLJIT_SIMD_ELEM_64 (3 << 18) |
2104 | | /* Element size is 128 bit long */ |
2105 | | #define SLJIT_SIMD_ELEM_128 (4 << 18) |
2106 | | /* Element size is 256 bit long */ |
2107 | | #define SLJIT_SIMD_ELEM_256 (5 << 18) |
2108 | | |
2109 | | /* The following options are used by sljit_emit_simd_mov() |
2110 | | and sljit_emit_simd_op2(). */ |
2111 | | |
2112 | | /* Memory address is unaligned (this is the default) */ |
2113 | | #define SLJIT_SIMD_MEM_UNALIGNED (0 << 24) |
2114 | | /* Memory address is 16 bit aligned */ |
2115 | | #define SLJIT_SIMD_MEM_ALIGNED_16 (1 << 24) |
2116 | | /* Memory address is 32 bit aligned */ |
2117 | | #define SLJIT_SIMD_MEM_ALIGNED_32 (2 << 24) |
2118 | | /* Memory address is 64 bit aligned */ |
2119 | | #define SLJIT_SIMD_MEM_ALIGNED_64 (3 << 24) |
2120 | | /* Memory address is 128 bit aligned */ |
2121 | 1.94M | #define SLJIT_SIMD_MEM_ALIGNED_128 (4 << 24) |
2122 | | /* Memory address is 256 bit aligned */ |
2123 | 0 | #define SLJIT_SIMD_MEM_ALIGNED_256 (5 << 24) |
2124 | | /* Memory address is 512 bit aligned */ |
2125 | | #define SLJIT_SIMD_MEM_ALIGNED_512 (6 << 24) |
2126 | | |
2127 | | /* Moves data between a vector register and memory. |
2128 | | |
2129 | | If the operation is not supported, it returns with |
2130 | | SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, |
2131 | | it does not emit any instructions. |
2132 | | |
2133 | | type must be a combination of SLJIT_SIMD_* and |
2134 | | SLJIT_SIMD_MEM_* options |
2135 | | vreg is the source or destination vector register |
2136 | | of the operation |
2137 | | srcdst must be a memory operand or a vector register |
2138 | | |
2139 | | Note: |
2140 | | The alignment and element size must be |
2141 | | less or equal than vector register size. |
2142 | | |
2143 | | Flags: - (does not modify flags) */ |
2144 | | |
2145 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, |
2146 | | sljit_s32 vreg, |
2147 | | sljit_s32 srcdst, sljit_sw srcdstw); |
2148 | | |
2149 | | /* Replicates a scalar value to all lanes of a vector |
2150 | | register. |
2151 | | |
2152 | | If the operation is not supported, it returns with |
2153 | | SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, |
2154 | | it does not emit any instructions. |
2155 | | |
2156 | | type must be a combination of SLJIT_SIMD_* options |
2157 | | except SLJIT_SIMD_STORE. |
2158 | | vreg is the destination vector register of the operation |
2159 | | src is the value which is replicated |
2160 | | |
2161 | | Note: |
2162 | | The src == SLJIT_IMM and srcw == 0 can be used to |
2163 | | clear a register even when SLJIT_SIMD_FLOAT is set. |
2164 | | |
2165 | | Flags: - (does not modify flags) */ |
2166 | | |
2167 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, |
2168 | | sljit_s32 vreg, |
2169 | | sljit_s32 src, sljit_sw srcw); |
2170 | | |
2171 | | /* The following options are used by sljit_emit_simd_lane_mov(). */ |
2172 | | |
2173 | | /* Clear all bits of the simd register before loading the lane. */ |
2174 | 1.12M | #define SLJIT_SIMD_LANE_ZERO 0x000002 |
2175 | | /* Sign extend the integer value stored from the lane. */ |
2176 | 0 | #define SLJIT_SIMD_LANE_SIGNED 0x000004 |
2177 | | |
2178 | | /* Moves data between a vector register lane and a register or |
2179 | | memory. If the srcdst argument is a register, it must be |
2180 | | a floating point register when SLJIT_SIMD_FLOAT is specified, |
2181 | | or a general purpose register otherwise. |
2182 | | |
2183 | | If the operation is not supported, it returns with |
2184 | | SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, |
2185 | | it does not emit any instructions. |
2186 | | |
2187 | | type must be a combination of SLJIT_SIMD_* options |
2188 | | Further options: |
2189 | | SLJIT_32 - when SLJIT_SIMD_FLOAT is not set |
2190 | | SLJIT_SIMD_LANE_SIGNED - when SLJIT_SIMD_STORE |
2191 | | is set and SLJIT_SIMD_FLOAT is not set |
2192 | | SLJIT_SIMD_LANE_ZERO - when SLJIT_SIMD_LOAD |
2193 | | is specified |
2194 | | vreg is the source or destination vector register |
2195 | | of the operation |
2196 | | lane_index is the index of the lane |
2197 | | srcdst is the destination operand for loads, and |
2198 | | source operand for stores |
2199 | | |
2200 | | Note: |
2201 | | The elem size must be lower than register size. |
2202 | | |
2203 | | Flags: - (does not modify flags) */ |
2204 | | |
2205 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, |
2206 | | sljit_s32 vreg, sljit_s32 lane_index, |
2207 | | sljit_s32 srcdst, sljit_sw srcdstw); |
2208 | | |
2209 | | /* Replicates a scalar value from a lane to all lanes |
2210 | | of a vector register. |
2211 | | |
2212 | | If the operation is not supported, it returns with |
2213 | | SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, |
2214 | | it does not emit any instructions. |
2215 | | |
2216 | | type must be a combination of SLJIT_SIMD_* options |
2217 | | except SLJIT_SIMD_STORE. |
2218 | | vreg is the destination vector register of the operation |
2219 | | src is the vector register which lane is replicated |
2220 | | src_lane_index is the lane index of the src register |
2221 | | |
2222 | | Flags: - (does not modify flags) */ |
2223 | | |
2224 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, |
2225 | | sljit_s32 vreg, |
2226 | | sljit_s32 src, sljit_s32 src_lane_index); |
2227 | | |
2228 | | /* The following options are used by sljit_emit_simd_load_extend(). */ |
2229 | | |
2230 | | /* Sign extend the integer elements */ |
2231 | | #define SLJIT_SIMD_EXTEND_SIGNED 0x000002 |
2232 | | /* Extend data to 16 bit */ |
2233 | | #define SLJIT_SIMD_EXTEND_16 (1 << 24) |
2234 | | /* Extend data to 32 bit */ |
2235 | | #define SLJIT_SIMD_EXTEND_32 (2 << 24) |
2236 | | /* Extend data to 64 bit */ |
2237 | | #define SLJIT_SIMD_EXTEND_64 (3 << 24) |
2238 | | |
2239 | | /* Extend elements and stores them in a vector register. |
2240 | | The extension operation increases the size of the |
2241 | | elements (e.g. from 16 bit to 64 bit). For integer |
2242 | | values, the extension can be signed or unsigned. |
2243 | | |
2244 | | If the operation is not supported, it returns with |
2245 | | SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, |
2246 | | it does not emit any instructions. |
2247 | | |
2248 | | type must be a combination of SLJIT_SIMD_*, and |
2249 | | SLJIT_SIMD_EXTEND_* options except SLJIT_SIMD_STORE |
2250 | | vreg is the destination vector register of the operation |
2251 | | src must be a memory operand or a vector register. |
2252 | | In the latter case, the source elements are stored |
2253 | | in the lower half of the register. |
2254 | | |
2255 | | Flags: - (does not modify flags) */ |
2256 | | |
2257 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, |
2258 | | sljit_s32 vreg, |
2259 | | sljit_s32 src, sljit_sw srcw); |
2260 | | |
2261 | | /* Extract the highest bit (usually the sign bit) from |
2262 | | each elements of a vector. |
2263 | | |
2264 | | If the operation is not supported, it returns with |
2265 | | SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, |
2266 | | it does not emit any instructions. |
2267 | | |
2268 | | type must be a combination of SLJIT_SIMD_* and SLJIT_32 |
2269 | | options except SLJIT_SIMD_LOAD |
2270 | | vreg is the source vector register of the operation |
2271 | | dst is the destination operand |
2272 | | |
2273 | | Flags: - (does not modify flags) */ |
2274 | | |
2275 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, |
2276 | | sljit_s32 vreg, |
2277 | | sljit_s32 dst, sljit_sw dstw); |
2278 | | |
2279 | | /* The following operations are used by sljit_emit_simd_op2(). */ |
2280 | | |
2281 | | /* Binary 'and' operation */ |
2282 | 433k | #define SLJIT_SIMD_OP2_AND 0x000001 |
2283 | | /* Binary 'or' operation */ |
2284 | 0 | #define SLJIT_SIMD_OP2_OR 0x000002 |
2285 | | /* Binary 'xor' operation */ |
2286 | 0 | #define SLJIT_SIMD_OP2_XOR 0x000003 |
2287 | | /* Shuffle bytes of src1 using the indicies in src2 */ |
2288 | 0 | #define SLJIT_SIMD_OP2_SHUFFLE 0x000004 |
2289 | | |
2290 | | /* Perform simd operations using vector registers. |
2291 | | |
2292 | | If the operation is not supported, it returns with |
2293 | | SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed, |
2294 | | it does not emit any instructions. |
2295 | | |
2296 | | type must be a combination of SLJIT_SIMD_*, SLJIT_SIMD_MEM_* |
2297 | | and SLJIT_SIMD_OP2_* options except SLJIT_SIMD_LOAD |
2298 | | and SLJIT_SIMD_STORE |
2299 | | dst_vreg is the destination register of the operation |
2300 | | src1_vreg is the first source register of the operation |
2301 | | src2 is the second source operand of the operation |
2302 | | |
2303 | | Flags: - (does not modify flags) */ |
2304 | | |
2305 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, |
2306 | | sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w); |
2307 | | |
2308 | | /* The following operations are used by sljit_emit_atomic_load() and |
2309 | | sljit_emit_atomic_store() operations. */ |
2310 | | |
2311 | | /* Tests whether the atomic operation is available (does not generate |
2312 | | any instructions). When a load from is allowed, its corresponding |
2313 | | store form is allowed and vice versa. */ |
2314 | | #define SLJIT_ATOMIC_TEST 0x10000 |
2315 | | /* The compiler must generate compare and swap instruction. |
2316 | | When this bit is set, calling sljit_emit_atomic_load() is optional. */ |
2317 | | #define SLJIT_ATOMIC_USE_CAS 0x20000 |
2318 | | /* The compiler must generate load-acquire and store-release instructions. |
2319 | | When this bit is set, the temp_reg for sljit_emit_atomic_store is not used. */ |
2320 | | #define SLJIT_ATOMIC_USE_LS 0x40000 |
2321 | | |
2322 | | /* The sljit_emit_atomic_load and sljit_emit_atomic_store operation pair |
2323 | | can perform an atomic read-modify-write operation. First, an unsigned |
2324 | | value must be loaded from memory using sljit_emit_atomic_load. Then, |
2325 | | the updated value must be written back to the same memory location by |
2326 | | sljit_emit_atomic_store. A thread can only perform a single atomic |
2327 | | operation at a time. |
2328 | | |
2329 | | The following conditions must be satisfied, or the operation |
2330 | | is undefined: |
2331 | | - the address provided in mem_reg must be divisible by the size of |
2332 | | the value (only naturally aligned updates are supported) |
2333 | | - no memory operations are allowed between the load and store operations |
2334 | | - the memory operation (op) and the base address (stored in mem_reg) |
2335 | | passed to the load/store operations must be the same (the mem_reg |
2336 | | can be a different register, only its value must be the same) |
2337 | | - a store must always follow a load for the same transaction. |
2338 | | |
2339 | | op must be between SLJIT_MOV and SLJIT_MOV_P |
2340 | | dst_reg is the register where the data will be loaded into |
2341 | | mem_reg is the base address of the memory load (it cannot be |
2342 | | SLJIT_SP or a virtual register on x86-32) |
2343 | | |
2344 | | Flags: - (does not modify flags) */ |
2345 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, |
2346 | | sljit_s32 dst_reg, |
2347 | | sljit_s32 mem_reg); |
2348 | | |
2349 | | /* The sljit_emit_atomic_load and sljit_emit_atomic_store operations |
2350 | | allows performing an atomic read-modify-write operation. See the |
2351 | | description of sljit_emit_atomic_load. |
2352 | | |
2353 | | op must be between SLJIT_MOV and SLJIT_MOV_P |
2354 | | src_reg is the register which value is stored into the memory |
2355 | | mem_reg is the base address of the memory store (it cannot be |
2356 | | SLJIT_SP or a virtual register on x86-32) |
2357 | | temp_reg is a scratch register, which must be initialized with |
2358 | | the value loaded into the dst_reg during the corresponding |
2359 | | sljit_emit_atomic_load operation, or the operation is undefined. |
2360 | | The temp_reg register preserves its value, if the memory store |
2361 | | is successful. Otherwise, its value is undefined. |
2362 | | |
2363 | | Flags: ATOMIC_STORED |
2364 | | if ATOMIC_STORED flag is set, it represents that the memory |
2365 | | is updated with a new value. Otherwise the memory is unchanged. */ |
2366 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, |
2367 | | sljit_s32 src_reg, |
2368 | | sljit_s32 mem_reg, |
2369 | | sljit_s32 temp_reg); |
2370 | | |
2371 | | /* Copies the base address of SLJIT_SP + offset to dst. The offset can |
2372 | | represent the starting address of a value in the local data (stack). |
2373 | | The offset is not limited by the local data limits, it can be any value. |
2374 | | For example if an array of bytes are stored on the stack from |
2375 | | offset 0x40, and R0 contains the offset of an array item plus 0x120, |
2376 | | this item can be changed by two SLJIT instructions: |
2377 | | |
2378 | | sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120); |
2379 | | sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5); |
2380 | | |
2381 | | Flags: - (may destroy flags) */ |
2382 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset); |
2383 | | |
2384 | | /* Store a value that can be changed at runtime. The constant |
2385 | | can be managed by sljit_get_const_addr and sljit_set_const. |
2386 | | |
2387 | | op must be SLJIT_MOV, SLJIT_MOV32, SLJIT_MOV_S32, |
2388 | | SLJIT_MOV_U8, SLJIT_MOV32_U8 |
2389 | | |
2390 | | Note: when SLJIT_MOV_U8 is used, and dst is a register, |
2391 | | init_value supports a 9 bit signed value between [-256..255] |
2392 | | |
2393 | | Flags: - (does not modify flags) */ |
2394 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op, |
2395 | | sljit_s32 dst, sljit_sw dstw, |
2396 | | sljit_sw init_value); |
2397 | | |
2398 | | /* Opcodes for sljit_emit_mov_addr. */ |
2399 | | |
2400 | | /* The address is suitable for jump/call target. */ |
2401 | 16.6M | #define SLJIT_MOV_ADDR 0 |
2402 | | /* The address is suitable for reading memory. */ |
2403 | | #define SLJIT_MOV_ABS_ADDR 1 |
2404 | | /* Add absolute address. */ |
2405 | 8.32M | #define SLJIT_ADD_ABS_ADDR 2 |
2406 | | |
2407 | | /* Store the value of a label (see: sljit_set_label / sljit_set_target) |
2408 | | Flags: - (does not modify flags) */ |
2409 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op, |
2410 | | sljit_s32 dst, sljit_sw dstw); |
2411 | | |
2412 | | /* Returns the address of a label after sljit_generate_code is called, and |
2413 | | before the compiler is freed by sljit_free_compiler. It is recommended |
2414 | | to save these addresses elsewhere before sljit_free_compiler is called. |
2415 | | |
2416 | | The address returned by sljit_get_label_addr is suitable for a jump/call |
2417 | | target, and the address returned by sljit_get_label_abs_addr is suitable |
2418 | | for reading memory. */ |
2419 | | |
2420 | 0 | static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->u.addr; } |
2421 | | #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) |
2422 | | static SLJIT_INLINE sljit_uw sljit_get_label_abs_addr(struct sljit_label *label) { return label->u.addr & ~(sljit_uw)1; } |
2423 | | #else /* !SLJIT_CONFIG_ARM_THUMB2 */ |
2424 | 0 | static SLJIT_INLINE sljit_uw sljit_get_label_abs_addr(struct sljit_label *label) { return label->u.addr; } |
2425 | | #endif /* SLJIT_CONFIG_ARM_THUMB2 */ |
2426 | | |
2427 | | /* Returns the address of jump and const instructions after sljit_generate_code |
2428 | | is called, and before the compiler is freed by sljit_free_compiler. It is |
2429 | | recommended to save these addresses elsewhere before sljit_free_compiler is called. */ |
2430 | | |
2431 | 0 | static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } |
2432 | 0 | static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } |
2433 | | |
2434 | | /* Only the address and executable offset are required to perform dynamic |
2435 | | code modifications. See sljit_get_executable_offset function. */ |
2436 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset); |
2437 | | /* The op opcode must be set to the same value that was passed to sljit_emit_const. */ |
2438 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset); |
2439 | | |
2440 | | /* Only a single buffer is writable at a time, so sljit_read_only_buffer_end_writing() |
2441 | | must be called before sljit_read_only_buffer_start_writing() is called again. */ |
2442 | | SLJIT_API_FUNC_ATTRIBUTE void* sljit_read_only_buffer_start_writing(sljit_uw addr, sljit_uw size, sljit_sw executable_offset); |
2443 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_read_only_buffer_end_writing(sljit_uw addr, sljit_uw size, sljit_sw executable_offset); |
2444 | | |
2445 | | /* --------------------------------------------------------------------- */ |
2446 | | /* CPU specific functions */ |
2447 | | /* --------------------------------------------------------------------- */ |
2448 | | |
2449 | | /* Types for sljit_get_register_index */ |
2450 | | |
2451 | | /* General purpose (integer) registers. */ |
2452 | 10.8M | #define SLJIT_GP_REGISTER 0 |
2453 | | /* Floating point registers. */ |
2454 | 4.76M | #define SLJIT_FLOAT_REGISTER 1 |
2455 | | |
2456 | | /* The following function is a helper function for sljit_emit_op_custom. |
2457 | | It returns with the real machine register index ( >=0 ) of any registers. |
2458 | | |
2459 | | When type is SLJIT_GP_REGISTER: |
2460 | | reg must be an SLJIT_R(i), SLJIT_S(i), or SLJIT_SP register |
2461 | | |
2462 | | When type is SLJIT_FLOAT_REGISTER: |
2463 | | reg must be an SLJIT_FR(i) or SLJIT_FS(i) register |
2464 | | |
2465 | | When type is SLJIT_SIMD_REG_64 / 128 / 256 / 512 : |
2466 | | reg must be an SLJIT_FR(i) or SLJIT_FS(i) register |
2467 | | |
2468 | | Note: it returns with -1 for unknown registers, such as virtual |
2469 | | registers on x86-32 or unsupported simd registers. */ |
2470 | | |
2471 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg); |
2472 | | |
2473 | | /* Any instruction can be inserted into the instruction stream by |
2474 | | sljit_emit_op_custom. It has a similar purpose as inline assembly. |
2475 | | The size parameter must match to the instruction size of the target |
2476 | | architecture: |
2477 | | |
2478 | | x86: 0 < size <= 15, the instruction argument can be byte aligned. |
2479 | | Thumb2: if size == 2, the instruction argument must be 2 byte aligned. |
2480 | | if size == 4, the instruction argument must be 4 byte aligned. |
2481 | | s390x: size can be 2, 4, or 6, the instruction argument can be byte aligned. |
2482 | | Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ |
2483 | | |
2484 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, |
2485 | | void *instruction, sljit_u32 size); |
2486 | | |
2487 | | /* Flags were set by a 32 bit operation. */ |
2488 | | #define SLJIT_CURRENT_FLAGS_32 SLJIT_32 |
2489 | | |
2490 | | /* Flags were set by an ADD or ADDC operations. */ |
2491 | | #define SLJIT_CURRENT_FLAGS_ADD 0x01 |
2492 | | /* Flags were set by a SUB or SUBC operation. */ |
2493 | 44.5k | #define SLJIT_CURRENT_FLAGS_SUB 0x02 |
2494 | | |
2495 | | /* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode. |
2496 | | Must be combined with SLJIT_CURRENT_FLAGS_SUB. */ |
2497 | 44.5k | #define SLJIT_CURRENT_FLAGS_COMPARE 0x04 |
2498 | | |
2499 | | /* Flags were set by sljit_emit_op2cmpz operation. */ |
2500 | | #define SLJIT_CURRENT_FLAGS_OP2CMPZ 0x08 |
2501 | | |
2502 | | /* Define the currently available CPU status flags. It is usually used after |
2503 | | an sljit_emit_label or sljit_emit_op_custom operations to define which CPU |
2504 | | status flags are available. |
2505 | | |
2506 | | The current_flags must be a valid combination of SLJIT_SET_* and |
2507 | | SLJIT_CURRENT_FLAGS_* constants. */ |
2508 | | |
2509 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, |
2510 | | sljit_s32 current_flags); |
2511 | | |
2512 | | /* --------------------------------------------------------------------- */ |
2513 | | /* Serialization functions */ |
2514 | | /* --------------------------------------------------------------------- */ |
2515 | | |
2516 | | /* Label/jump/const enumeration functions. The items in each group |
2517 | | are enumerated in creation order. Serialization / deserialization |
2518 | | preserves this order for each group. For example the fifth label |
2519 | | after deserialization refers to the same machine code location as |
2520 | | the fifth label before the serialization. */ |
2521 | 0 | static SLJIT_INLINE struct sljit_label *sljit_get_first_label(struct sljit_compiler *compiler) { return compiler->labels; } |
2522 | 0 | static SLJIT_INLINE struct sljit_jump *sljit_get_first_jump(struct sljit_compiler *compiler) { return compiler->jumps; } |
2523 | 0 | static SLJIT_INLINE struct sljit_const *sljit_get_first_const(struct sljit_compiler *compiler) { return compiler->consts; } |
2524 | | |
2525 | 0 | static SLJIT_INLINE struct sljit_label *sljit_get_next_label(struct sljit_label *label) { return label->next; } |
2526 | 0 | static SLJIT_INLINE struct sljit_jump *sljit_get_next_jump(struct sljit_jump *jump) { return jump->next; } |
2527 | 0 | static SLJIT_INLINE struct sljit_const *sljit_get_next_const(struct sljit_const *const_) { return const_->next; } |
2528 | | |
2529 | | /* A number starting from 0 is assigned to each label, which |
2530 | | represents its creation index. The first label created by the |
2531 | | compiler has index 0, the second one has index 1, the third one |
2532 | | has index 2, and so on. The returned value is unspecified after |
2533 | | sljit_generate_code() is called. |
2534 | | |
2535 | | It is recommended to use this function to get the creation index |
2536 | | of a label, since sljit_emit_label() may return with the last label, |
2537 | | if no code is generated since the last sljit_emit_label() call. */ |
2538 | | SLJIT_API_FUNC_ATTRIBUTE sljit_uw sljit_get_label_index(struct sljit_label *label); |
2539 | | |
2540 | | /* The sljit_jump_has_label() and sljit_jump_has_target() functions |
2541 | | returns non-zero value if a label or target is set for the jump |
2542 | | respectively. Both may return with a zero value. The other two |
2543 | | functions return the value assigned to the jump. */ |
2544 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump); |
2545 | 0 | static SLJIT_INLINE struct sljit_label *sljit_jump_get_label(struct sljit_jump *jump) { return jump->u.label; } |
2546 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump); |
2547 | 0 | static SLJIT_INLINE sljit_uw sljit_jump_get_target(struct sljit_jump *jump) { return jump->u.target; } |
2548 | | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump); |
2549 | | |
2550 | | /* Option bits for sljit_serialize_compiler. */ |
2551 | | |
2552 | | /* When debugging is enabled, the serialized buffer contains |
2553 | | debugging information unless this option is specified. */ |
2554 | | #define SLJIT_SERIALIZE_IGNORE_DEBUG 0x1 |
2555 | | |
2556 | | /* Serialize the internal structure of the compiler into a buffer. |
2557 | | If the serialization is successful, the returned value is a newly |
2558 | | allocated buffer which is allocated by the memory allocator assigned |
2559 | | to the compiler. Otherwise the returned value is NULL. Unlike |
2560 | | sljit_generate_code(), serialization does not modify the internal |
2561 | | state of the compiler, so the code generation can be continued. |
2562 | | |
2563 | | options must be the combination of SLJIT_SERIALIZE_* option bits |
2564 | | size is an output argument, which is set to the byte size of |
2565 | | the result buffer if the operation is successful |
2566 | | |
2567 | | Notes: |
2568 | | - This function is useful for ahead-of-time compilation (AOT). |
2569 | | - The returned buffer must be freed later by the caller. |
2570 | | The SLJIT_FREE() macro is suitable for this purpose: |
2571 | | SLJIT_FREE(returned_buffer, sljit_get_allocator_data(compiler)) |
2572 | | - Memory allocated by sljit_alloc_memory() is not serialized. |
2573 | | - The type of the returned buffer is sljit_uw* to emphasize that |
2574 | | the buffer is word aligned. However, the 'size' output argument |
2575 | | contains the byte size, so this value is always divisible by |
2576 | | sizeof(sljit_uw). |
2577 | | */ |
2578 | | SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler, |
2579 | | sljit_s32 options, sljit_uw *size); |
2580 | | |
2581 | | /* Construct a new compiler instance from a buffer produced by |
2582 | | sljit_serialize_compiler(). If the operation is successful, the new |
2583 | | compiler instance is returned. Otherwise the returned value is NULL. |
2584 | | |
2585 | | buffer points to a word aligned memory data which was |
2586 | | created by sljit_serialize_compiler() |
2587 | | size is the byte size of the buffer |
2588 | | options must be 0 |
2589 | | allocator_data specify an allocator specific data, see |
2590 | | sljit_create_compiler() for further details |
2591 | | |
2592 | | Notes: |
2593 | | - Labels assigned to jumps are restored with their |
2594 | | corresponding label in the label set created by |
2595 | | the deserializer. Target addresses assigned to |
2596 | | jumps are also restored. Uninitialized jumps |
2597 | | remain uninitialized. |
2598 | | - After the deserialization, sljit_generate_code() does |
2599 | | not need to be the next operation on the returned |
2600 | | compiler, the code generation can be continued. |
2601 | | Even sljit_serialize_compiler() can be called again. |
2602 | | - When debugging is enabled, a buffers without debug |
2603 | | information cannot be deserialized. |
2604 | | */ |
2605 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size, |
2606 | | sljit_s32 options, void *allocator_data); |
2607 | | |
2608 | | /* --------------------------------------------------------------------- */ |
2609 | | /* Miscellaneous utility functions */ |
2610 | | /* --------------------------------------------------------------------- */ |
2611 | | |
2612 | | /* Get the human readable name of the platform. Can be useful on platforms |
2613 | | like ARM, where ARM and Thumb2 functions can be mixed, and it is useful |
2614 | | to know the type of the code generator. */ |
2615 | | SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void); |
2616 | | |
2617 | | /* Portable helper function to get an offset of a member. |
2618 | | Same as offsetof() macro defined in stddef.h */ |
2619 | 25.1G | #define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10) |
2620 | | |
2621 | | #if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) |
2622 | | |
2623 | | /* The sljit_stack structure and its manipulation functions provides |
2624 | | an implementation for a top-down stack. The stack top is stored |
2625 | | in the end field of the sljit_stack structure and the stack goes |
2626 | | down to the min_start field, so the memory region reserved for |
2627 | | this stack is between min_start (inclusive) and end (exclusive) |
2628 | | fields. However the application can only use the region between |
2629 | | start (inclusive) and end (exclusive) fields. The sljit_stack_resize |
2630 | | function can be used to extend this region up to min_start. |
2631 | | |
2632 | | This feature uses the "address space reserve" feature of modern |
2633 | | operating systems. Instead of allocating a large memory block |
2634 | | applications can allocate a small memory region and extend it |
2635 | | later without moving the content of the memory area. Therefore |
2636 | | after a successful resize by sljit_stack_resize all pointers into |
2637 | | this region are still valid. |
2638 | | |
2639 | | Note: |
2640 | | this structure may not be supported by all operating systems. |
2641 | | end and max_limit fields are aligned to PAGE_SIZE bytes (usually |
2642 | | 4 Kbyte or more). |
2643 | | stack should grow in larger steps, e.g. 4Kbyte, 16Kbyte or more. */ |
2644 | | |
2645 | | struct sljit_stack { |
2646 | | /* User data, anything can be stored here. |
2647 | | Initialized to the same value as the end field. */ |
2648 | | sljit_u8 *top; |
2649 | | /* These members are read only. */ |
2650 | | /* End address of the stack */ |
2651 | | sljit_u8 *end; |
2652 | | /* Current start address of the stack. */ |
2653 | | sljit_u8 *start; |
2654 | | /* Lowest start address of the stack. */ |
2655 | | sljit_u8 *min_start; |
2656 | | }; |
2657 | | |
2658 | | /* Allocates a new stack. Returns NULL if unsuccessful. |
2659 | | Note: see sljit_create_compiler for the explanation of allocator_data. */ |
2660 | | SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data); |
2661 | | SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data); |
2662 | | |
2663 | | /* Can be used to increase (extend) or decrease (shrink) the stack |
2664 | | memory area. Returns with new_start if successful and NULL otherwise. |
2665 | | It always fails if new_start is less than min_start or greater or equal |
2666 | | than end fields. The fields of the stack are not changed if the returned |
2667 | | value is NULL (the current memory content is never lost). */ |
2668 | | SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start); |
2669 | | |
2670 | | #endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ |
2671 | | |
2672 | | #if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) |
2673 | | |
2674 | | /* Get the entry address of a given function (signed, unsigned result). */ |
2675 | 209M | #define SLJIT_FUNC_ADDR(func_name) ((sljit_sw)func_name) |
2676 | | #define SLJIT_FUNC_UADDR(func_name) ((sljit_uw)func_name) |
2677 | | |
2678 | | #else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ |
2679 | | |
2680 | | /* All JIT related code should be placed in the same context (library, binary, etc.). */ |
2681 | | |
2682 | | /* Get the entry address of a given function (signed, unsigned result). */ |
2683 | | #define SLJIT_FUNC_ADDR(func_name) (*(sljit_sw*)(void*)func_name) |
2684 | | #define SLJIT_FUNC_UADDR(func_name) (*(sljit_uw*)(void*)func_name) |
2685 | | |
2686 | | /* For powerpc64, the function pointers point to a context descriptor. */ |
2687 | | struct sljit_function_context { |
2688 | | sljit_uw addr; |
2689 | | sljit_uw r2; |
2690 | | sljit_uw r11; |
2691 | | }; |
2692 | | |
2693 | | /* Fill the context arguments using the addr and the function. |
2694 | | If func_ptr is NULL, it will not be set to the address of context |
2695 | | If addr is NULL, the function address also comes from the func pointer. */ |
2696 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func); |
2697 | | |
2698 | | #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ |
2699 | | |
2700 | | #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) |
2701 | | /* Free unused executable memory. The allocator keeps some free memory |
2702 | | around to reduce the number of OS executable memory allocations. |
2703 | | This improves performance since these calls are costly. However |
2704 | | it is sometimes desired to free all unused memory regions, e.g. |
2705 | | before the application terminates. */ |
2706 | | SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); |
2707 | | #endif /* SLJIT_EXECUTABLE_ALLOCATOR */ |
2708 | | |
2709 | | #ifdef __cplusplus |
2710 | | } /* extern "C" */ |
2711 | | #endif /* __cplusplus */ |
2712 | | |
2713 | | #endif /* SLJIT_LIR_H_ */ |