Coverage Report

Created: 2026-02-26 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/pcre2/deps/sljit/sljit_src/sljitNativeX86_common.c
Line
Count
Source
1
/*
2
 *    Stack-less Just-In-Time compiler
3
 *
4
 *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without modification, are
7
 * permitted provided that the following conditions are met:
8
 *
9
 *   1. Redistributions of source code must retain the above copyright notice, this list of
10
 *      conditions and the following disclaimer.
11
 *
12
 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13
 *      of conditions and the following disclaimer in the documentation and/or other materials
14
 *      provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 */
26
27
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28
0
{
29
0
  return "x86" SLJIT_CPUINFO;
30
0
}
31
32
/*
33
   32b register indexes:
34
     0 - EAX
35
     1 - ECX
36
     2 - EDX
37
     3 - EBX
38
     4 - ESP
39
     5 - EBP
40
     6 - ESI
41
     7 - EDI
42
*/
43
44
/*
45
   64b register indexes:
46
     0 - RAX
47
     1 - RCX
48
     2 - RDX
49
     3 - RBX
50
     4 - RSP
51
     5 - RBP
52
     6 - RSI
53
     7 - RDI
54
     8 - R8   - From now on REX prefix is required
55
     9 - R9
56
    10 - R10
57
    11 - R11
58
    12 - R12
59
    13 - R13
60
    14 - R14
61
    15 - R15
62
*/
63
64
2.11G
#define TMP_REG1  (SLJIT_NUMBER_OF_REGISTERS + 2)
65
0
#define TMP_FREG  (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
66
67
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
68
69
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70
  0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3
71
};
72
73
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
74
  0, 1, 2, 3, 4, 5, 6, 7, 0
75
};
76
77
#define CHECK_EXTRA_REGS(p, w, do) \
78
  if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79
    w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
80
    p = SLJIT_MEM1(SLJIT_SP); \
81
    do; \
82
  }
83
84
#else /* SLJIT_CONFIG_X86_32 */
85
86
355M
#define TMP_REG2  (SLJIT_NUMBER_OF_REGISTERS + 3)
87
88
/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
89
   Note: avoid to use r12 and r13 for memory addressing
90
   therefore r12 is better to be a higher saved register. */
91
#ifndef _WIN64
92
/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
93
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
94
  0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
95
};
96
/* low-map. reg_map & 0x7. */
97
static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
98
  0, 0, 6, 7, 1, 0,  3,  2,  4, 5,  5,  6,  7, 3, 4, 2, 1
99
};
100
#else
101
/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
102
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103
  0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
104
};
105
/* low-map. reg_map & 0x7. */
106
static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
107
  0, 0, 2, 0, 1,  3,  4, 5,  5,  6,  7, 7, 6, 3, 4, 1,  2
108
};
109
#endif
110
111
/* Args: xmm0-xmm3 */
112
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
113
  0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4
114
};
115
/* low-map. freg_map & 0x7. */
116
static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
117
  0, 0, 1, 2, 3, 5, 6, 7, 0, 1,  2,  3,  4,  5,  6,  7, 4
118
};
119
120
7.97G
#define REX_W   0x48
121
809M
#define REX_R   0x44
122
37.6M
#define REX_X   0x42
123
3.20G
#define REX_B   0x41
124
6.27k
#define REX   0x40
125
126
#ifndef _WIN64
127
26.3G
#define HALFWORD_MAX 0x7fffffffl
128
14.0G
#define HALFWORD_MIN -0x80000000l
129
#else
130
#define HALFWORD_MAX 0x7fffffffll
131
#define HALFWORD_MIN -0x80000000ll
132
#endif
133
134
5.57G
#define IS_HALFWORD(x)    ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
135
4.75G
#define NOT_HALFWORD(x)   ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
136
137
#define CHECK_EXTRA_REGS(p, w, do)
138
139
#endif /* SLJIT_CONFIG_X86_32 */
140
141
32.5G
#define U8(v)     ((sljit_u8)(v))
142
143
/* Size flags for emit_x86_instruction: */
144
12.3G
#define EX86_BIN_INS    ((sljit_uw)0x000010)
145
10.3G
#define EX86_SHIFT_INS    ((sljit_uw)0x000020)
146
7.12G
#define EX86_BYTE_ARG   ((sljit_uw)0x000040)
147
2.39G
#define EX86_HALF_ARG   ((sljit_uw)0x000080)
148
/* Size flags for both emit_x86_instruction and emit_vex_instruction: */
149
645M
#define EX86_REX    ((sljit_uw)0x000100)
150
7.18G
#define EX86_NO_REXW    ((sljit_uw)0x000200)
151
15.6G
#define EX86_PREF_66    ((sljit_uw)0x000400)
152
15.6G
#define EX86_PREF_F2    ((sljit_uw)0x000800)
153
15.6G
#define EX86_PREF_F3    ((sljit_uw)0x001000)
154
9.94G
#define EX86_SSE2_OP1   ((sljit_uw)0x002000)
155
4.00G
#define EX86_SSE2_OP2   ((sljit_uw)0x004000)
156
1.25M
#define EX86_SSE2   (EX86_SSE2_OP1 | EX86_SSE2_OP2)
157
7.82G
#define EX86_VEX_EXT    ((sljit_uw)0x008000)
158
/* Op flags for emit_vex_instruction: */
159
172k
#define VEX_OP_0F38   ((sljit_uw)0x010000)
160
172k
#define VEX_OP_0F3A   ((sljit_uw)0x020000)
161
0
#define VEX_SSE2_OPV    ((sljit_uw)0x040000)
162
0
#define VEX_AUTO_W    ((sljit_uw)0x080000)
163
0
#define VEX_W     ((sljit_uw)0x100000)
164
1.08M
#define VEX_256     ((sljit_uw)0x200000)
165
166
0
#define EX86_SELECT_66(op)  (((op) & SLJIT_32) ? 0 : EX86_PREF_66)
167
#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)
168
169
/* --------------------------------------------------------------------- */
170
/*  Instruction forms                                                    */
171
/* --------------------------------------------------------------------- */
172
173
#define ADD     (/* BINARY */ 0 << 3)
174
670M
#define ADD_EAX_i32   0x05
175
670M
#define ADD_r_rm    0x03
176
678M
#define ADD_rm_r    0x01
177
#define ADDSD_x_xm    0x58
178
#define ADC     (/* BINARY */ 2 << 3)
179
0
#define ADC_EAX_i32   0x15
180
0
#define ADC_r_rm    0x13
181
0
#define ADC_rm_r    0x11
182
#define AND     (/* BINARY */ 4 << 3)
183
28.5M
#define AND_EAX_i32   0x25
184
28.5M
#define AND_r_rm    0x23
185
28.5M
#define AND_rm_r    0x21
186
0
#define ANDPD_x_xm    0x54
187
#define BSR_r_rm    (/* GROUP_0F */ 0xbd)
188
#define BSF_r_rm    (/* GROUP_0F */ 0xbc)
189
0
#define BSWAP_r     (/* GROUP_0F */ 0xc8)
190
327M
#define CALL_i32    0xe8
191
#define CALL_rm     (/* GROUP_FF */ 2 << 3)
192
0
#define CDQ     0x99
193
#define CMOVE_r_rm    (/* GROUP_0F */ 0x44)
194
#define CMP     (/* BINARY */ 7 << 3)
195
#define CMP_EAX_i32   0x3d
196
753M
#define CMP_r_rm    0x3b
197
12.8M
#define CMP_rm_r    0x39
198
#define CMPS_x_xm   0xc2
199
#define CMPXCHG_rm_r    0xb1
200
#define CMPXCHG_rm8_r   0xb0
201
#define CVTPD2PS_x_xm   0x5a
202
#define CVTPS2PD_x_xm   0x5a
203
#define CVTSI2SD_x_rm   0x2a
204
#define CVTTSD2SI_r_xm    0x2c
205
0
#define DIV     (/* GROUP_F7 */ 6 << 3)
206
#define DIVSD_x_xm    0x5e
207
#define EXTRACTPS_x_xm    0x17
208
#define FLDS      0xd9
209
#define FLDL      0xdd
210
#define FSTPS     0xd9
211
#define FSTPD     0xdd
212
#define INSERTPS_x_xm   0x21
213
0
#define INT3      0xcc
214
0
#define IDIV      (/* GROUP_F7 */ 7 << 3)
215
0
#define IMUL      (/* GROUP_F7 */ 5 << 3)
216
#define IMUL_r_rm   (/* GROUP_0F */ 0xaf)
217
0
#define IMUL_r_rm_i8    0x6b
218
0
#define IMUL_r_rm_i32   0x69
219
0
#define JL_i8     0x7c
220
#define JE_i8     0x74
221
#define JNC_i8      0x73
222
#define JNE_i8      0x75
223
83.6M
#define JMP_i8      0xeb
224
762M
#define JMP_i32     0xe9
225
#define JMP_rm      (/* GROUP_FF */ 4 << 3)
226
144M
#define LEA_r_m     0x8d
227
#define LOOP_i8     0xe2
228
#define LZCNT_r_rm    (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
229
1.17G
#define MOV_r_rm    0x8b
230
115M
#define MOV_r_i32   0xb8
231
1.58G
#define MOV_rm_r    0x89
232
985M
#define MOV_rm_i32    0xc7
233
0
#define MOV_rm8_i8    0xc6
234
0
#define MOV_rm8_r8    0x88
235
0
#define MOVAPS_x_xm   0x28
236
#define MOVAPS_xm_x   0x29
237
573k
#define MOVD_x_rm   0x6e
238
#define MOVD_rm_x   0x7e
239
0
#define MOVDDUP_x_xm    0x12
240
1.08M
#define MOVDQA_x_xm   0x6f
241
0
#define MOVDQA_xm_x   0x7f
242
0
#define MOVDQU_x_xm   0x6f
243
0
#define MOVHLPS_x_x   0x12
244
0
#define MOVHPD_m_x    0x17
245
0
#define MOVHPD_x_m    0x16
246
#define MOVLHPS_x_x   0x16
247
0
#define MOVLPD_m_x    0x13
248
0
#define MOVLPD_x_m    0x12
249
913k
#define MOVMSKPS_r_x    (/* GROUP_0F */ 0x50)
250
0
#define MOVQ_x_xm   (/* GROUP_0F */ 0x7e)
251
0
#define MOVSD_x_xm    0x10
252
0
#define MOVSD_xm_x    0x11
253
0
#define MOVSHDUP_x_xm   0x16
254
161M
#define MOVSXD_r_rm   0x63
255
#define MOVSX_r_rm8   (/* GROUP_0F */ 0xbe)
256
#define MOVSX_r_rm16    (/* GROUP_0F */ 0xbf)
257
0
#define MOVUPS_x_xm   0x10
258
397M
#define MOVZX_r_rm8   (/* GROUP_0F */ 0xb6)
259
#define MOVZX_r_rm16    (/* GROUP_0F */ 0xb7)
260
0
#define MUL     (/* GROUP_F7 */ 4 << 3)
261
#define MULSD_x_xm    0x59
262
1.76M
#define NEG_rm      (/* GROUP_F7 */ 3 << 3)
263
0
#define NOP     0x90
264
0
#define NOT_rm      (/* GROUP_F7 */ 2 << 3)
265
#define OR      (/* BINARY */ 1 << 3)
266
43.6M
#define OR_r_rm     0x0b
267
43.6M
#define OR_EAX_i32    0x0d
268
43.6M
#define OR_rm_r     0x09
269
13.1M
#define OR_rm8_r8   0x08
270
0
#define ORPD_x_xm   0x56
271
#define PACKSSWB_x_xm   (/* GROUP_0F */ 0x63)
272
344k
#define PAND_x_xm   0xdb
273
#define PCMPEQD_x_xm    0x76
274
0
#define PINSRB_x_rm_i8    0x20
275
0
#define PINSRW_x_rm_i8    0xc4
276
0
#define PINSRD_x_rm_i8    0x22
277
0
#define PEXTRB_rm_x_i8    0x14
278
0
#define PEXTRW_rm_x_i8    0x15
279
0
#define PEXTRD_rm_x_i8    0x16
280
913k
#define PMOVMSKB_r_x    (/* GROUP_0F */ 0xd7)
281
#define PMOVSXBD_x_xm   0x21
282
#define PMOVSXBQ_x_xm   0x22
283
#define PMOVSXBW_x_xm   0x20
284
#define PMOVSXDQ_x_xm   0x25
285
#define PMOVSXWD_x_xm   0x23
286
#define PMOVSXWQ_x_xm   0x24
287
#define PMOVZXBD_x_xm   0x31
288
#define PMOVZXBQ_x_xm   0x32
289
#define PMOVZXBW_x_xm   0x30
290
#define PMOVZXDQ_x_xm   0x35
291
#define PMOVZXWD_x_xm   0x33
292
#define PMOVZXWQ_x_xm   0x34
293
#define POP_r     0x58
294
658k
#define POP_rm      0x8f
295
#define POPF      0x9d
296
0
#define POR_x_xm    0xeb
297
0
#define PREFETCH    0x18
298
0
#define PSHUFB_x_xm   0x00
299
0
#define PSHUFD_x_xm   0x70
300
0
#define PSHUFLW_x_xm    0x70
301
#define PSRLDQ_x    0x73
302
0
#define PSLLD_x_i8    0x72
303
0
#define PSLLQ_x_i8    0x73
304
#define PUSH_i32    0x68
305
#define PUSH_r      0x50
306
0
#define PUSH_rm     (/* GROUP_FF */ 6 << 3)
307
#define PUSHF     0x9c
308
0
#define PXOR_x_xm   0xef
309
0
#define ROL     (/* SHIFT */ 0 << 3)
310
0
#define ROR     (/* SHIFT */ 1 << 3)
311
1.98M
#define RET_near    0xc3
312
#define RET_i16     0xc2
313
#define SBB     (/* BINARY */ 3 << 3)
314
0
#define SBB_EAX_i32   0x1d
315
0
#define SBB_r_rm    0x1b
316
0
#define SBB_rm_r    0x19
317
404k
#define SAR     (/* SHIFT */ 7 << 3)
318
58.7M
#define SHL     (/* SHIFT */ 4 << 3)
319
#define SHLD      (/* GROUP_0F */ 0xa5)
320
#define SHRD      (/* GROUP_0F */ 0xad)
321
16.7M
#define SHR     (/* SHIFT */ 5 << 3)
322
0
#define SHUFPS_x_xm   0xc6
323
#define SUB     (/* BINARY */ 5 << 3)
324
507M
#define SUB_EAX_i32   0x2d
325
507M
#define SUB_r_rm    0x2b
326
507M
#define SUB_rm_r    0x29
327
#define SUBSD_x_xm    0x5c
328
#define TEST_EAX_i32    0xa9
329
6.52M
#define TEST_rm_r   0x85
330
#define TZCNT_r_rm    (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
331
0
#define UCOMISD_x_xm    0x2e
332
#define UNPCKLPD_x_xm   0x14
333
#define UNPCKLPS_x_xm   0x14
334
0
#define VBROADCASTSD_x_xm 0x19
335
0
#define VBROADCASTSS_x_xm 0x18
336
#define VEXTRACTF128_x_ym 0x19
337
#define VEXTRACTI128_x_ym 0x39
338
#define VINSERTF128_y_y_xm  0x18
339
#define VINSERTI128_y_y_xm  0x38
340
0
#define VPBROADCASTB_x_xm 0x78
341
0
#define VPBROADCASTD_x_xm 0x58
342
0
#define VPBROADCASTQ_x_xm 0x59
343
0
#define VPBROADCASTW_x_xm 0x79
344
#define VPERMPD_y_ym    0x01
345
#define VPERMQ_y_ym   0x00
346
#define XCHG_EAX_r    0x90
347
#define XCHG_r_rm   0x87
348
0
#define XOR     (/* BINARY */ 6 << 3)
349
472k
#define XOR_EAX_i32   0x35
350
472k
#define XOR_r_rm    0x33
351
472k
#define XOR_rm_r    0x31
352
0
#define XORPD_x_xm    0x57
353
354
3.11G
#define GROUP_0F    0x0f
355
#define GROUP_66    0x66
356
#define GROUP_F3    0xf3
357
43.9M
#define GROUP_F7    0xf7
358
116M
#define GROUP_FF    0xff
359
324M
#define GROUP_BINARY_81   0x81
360
1.43G
#define GROUP_BINARY_83   0x83
361
16.4M
#define GROUP_SHIFT_1   0xd1
362
45.6M
#define GROUP_SHIFT_N   0xc1
363
13.8M
#define GROUP_SHIFT_CL    0xd3
364
#define GROUP_LOCK    0xf0
365
366
410M
#define MOD_REG     0xc0
367
#define MOD_DISP8   0x40
368
369
9.70G
#define INC_SIZE(s)   (*inst++ = U8(s), compiler->size += (s))
370
371
4.40M
#define PUSH_REG(r)   (*inst++ = U8(PUSH_r + (r)))
372
3.24M
#define POP_REG(r)    (*inst++ = U8(POP_r + (r)))
373
1.38M
#define RET()     (*inst++ = RET_near)
374
#define RET_I16(n)    (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
375
376
2.96G
#define SLJIT_INST_LABEL  255
377
7.03G
#define SLJIT_INST_JUMP   254
378
14.9M
#define SLJIT_INST_MOV_ADDR 253
379
14.7G
#define SLJIT_INST_CONST  252
380
381
/* Multithreading does not affect these static variables, since they store
382
   built-in CPU features. Therefore they can be overwritten by different threads
383
   if they detect the CPU features in the same time. */
384
9
#define CPU_FEATURE_DETECTED    0x001
385
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
386
#define CPU_FEATURE_SSE2    0x002
387
#endif
388
9
#define CPU_FEATURE_SSE41   0x004
389
9
#define CPU_FEATURE_LZCNT   0x008
390
9
#define CPU_FEATURE_TZCNT   0x010
391
552M
#define CPU_FEATURE_CMOV    0x020
392
3.31M
#define CPU_FEATURE_AVX     0x040
393
573k
#define CPU_FEATURE_AVX2    0x080
394
18
#define CPU_FEATURE_OSXSAVE   0x100
395
396
static sljit_u32 cpu_feature_list = 0;
397
398
#ifdef _WIN32_WCE
399
#include <cmnintrin.h>
400
#elif defined(_MSC_VER) && _MSC_VER >= 1400
401
#include <intrin.h>
402
#elif defined(__INTEL_COMPILER)
403
#include <cpuid.h>
404
#endif
405
406
#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \
407
  || (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__))
408
#include <immintrin.h>
409
#endif
410
411
/******************************************************/
412
/*    Unaligned-store functions                       */
413
/******************************************************/
414
415
static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
416
0
{
417
0
  SLJIT_MEMCPY(addr, &value, sizeof(value));
418
0
}
419
420
static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
421
5.76G
{
422
5.76G
  SLJIT_MEMCPY(addr, &value, sizeof(value));
423
5.76G
}
424
425
static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
426
398M
{
427
398M
  SLJIT_MEMCPY(addr, &value, sizeof(value));
428
398M
}
429
430
/******************************************************/
431
/*    Utility functions                               */
432
/******************************************************/
433
434
static void execute_cpu_id(sljit_u32 info[4])
435
45
{
436
#if (defined(_MSC_VER) && _MSC_VER >= 1400) \
437
  || (defined(__INTEL_COMPILER) && __INTEL_COMPILER == 2021 && __INTEL_COMPILER_UPDATE >= 7)
438
439
  __cpuidex((int*)info, (int)info[0], (int)info[2]);
440
441
#elif (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900)
442
443
  __get_cpuid_count(info[0], info[2], info, info + 1, info + 2, info + 3);
444
445
#elif (defined(_MSC_VER) || defined(__INTEL_COMPILER)) \
446
  && (defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32)
447
448
  /* Intel syntax. */
449
  __asm {
450
    mov esi, info
451
    mov eax, [esi]
452
    mov ecx, [esi + 8]
453
    cpuid
454
    mov [esi], eax
455
    mov [esi + 4], ebx
456
    mov [esi + 8], ecx
457
    mov [esi + 12], edx
458
  }
459
460
#else
461
462
45
  __asm__ __volatile__ (
463
45
    "cpuid\n"
464
45
    : "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
465
45
    : "0" (info[0]), "2" (info[2])
466
45
  );
467
468
45
#endif
469
45
}
470
471
static sljit_u32 execute_get_xcr0_low(void)
472
9
{
473
9
  sljit_u32 xcr0;
474
475
#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \
476
  || (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__))
477
478
  xcr0 = (sljit_u32)_xgetbv(0);
479
480
#elif defined(__TINYC__)
481
482
  __asm__ (
483
    "xorl %%ecx, %%ecx\n"
484
    ".byte 0x0f\n"
485
    ".byte 0x01\n"
486
    ".byte 0xd0\n"
487
    : "=a" (xcr0)
488
    :
489
#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32
490
    : "ecx", "edx"
491
#else /* !SLJIT_CONFIG_X86_32 */
492
    : "rcx", "rdx"
493
#endif /* SLJIT_CONFIG_X86_32 */
494
  );
495
496
#elif (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER < 20220100) \
497
  || (defined(__clang__) && __clang_major__ < 14) \
498
  || (defined(__GNUC__) && __GNUC__ < 3) \
499
  || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
500
501
  /* AT&T syntax. */
502
  __asm__ (
503
    "xorl %%ecx, %%ecx\n"
504
    "xgetbv\n"
505
    : "=a" (xcr0)
506
    :
507
#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32
508
    : "ecx", "edx"
509
#else /* !SLJIT_CONFIG_X86_32 */
510
    : "rcx", "rdx"
511
#endif /* SLJIT_CONFIG_X86_32 */
512
  );
513
514
#elif defined(_MSC_VER)
515
516
  /* Intel syntax. */
517
  __asm {
518
    xor ecx, ecx
519
    xgetbv
520
    mov xcr0, eax
521
  }
522
523
#else
524
525
9
  __asm__ (
526
9
    "xor{l %%ecx, %%ecx | ecx, ecx}\n"
527
9
    "xgetbv\n"
528
9
    : "=a" (xcr0)
529
9
    :
530
#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32
531
    : "ecx", "edx"
532
#else /* !SLJIT_CONFIG_X86_32 */
533
9
    : "rcx", "rdx"
534
9
#endif /* SLJIT_CONFIG_X86_32 */
535
9
  );
536
537
9
#endif
538
9
  return xcr0;
539
9
}
540
541
static void get_cpu_features(void)
542
9
{
543
9
  sljit_u32 feature_list = CPU_FEATURE_DETECTED;
544
9
  sljit_u32 info[4] = {0};
545
9
  sljit_u32 max_id;
546
547
9
  execute_cpu_id(info);
548
9
  max_id = info[0];
549
550
9
  if (max_id >= 7) {
551
9
    info[0] = 7;
552
9
    info[2] = 0;
553
9
    execute_cpu_id(info);
554
555
9
    if (info[1] & 0x8)
556
9
      feature_list |= CPU_FEATURE_TZCNT;
557
9
    if (info[1] & 0x20)
558
9
      feature_list |= CPU_FEATURE_AVX2;
559
9
  }
560
561
9
  if (max_id >= 1) {
562
9
    info[0] = 1;
563
#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32
564
    /* Winchip 2 and Cyrix MII bugs */
565
    info[1] = info[2] = 0;
566
#endif
567
9
    execute_cpu_id(info);
568
569
9
    if (info[2] & 0x80000)
570
9
      feature_list |= CPU_FEATURE_SSE41;
571
9
    if (info[2] & 0x8000000)
572
9
      feature_list |= CPU_FEATURE_OSXSAVE;
573
9
    if (info[2] & 0x10000000)
574
9
      feature_list |= CPU_FEATURE_AVX;
575
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
576
    if (info[3] & 0x4000000)
577
      feature_list |= CPU_FEATURE_SSE2;
578
#endif
579
9
    if (info[3] & 0x8000)
580
9
      feature_list |= CPU_FEATURE_CMOV;
581
9
  }
582
583
9
  info[0] = 0x80000000;
584
9
  execute_cpu_id(info);
585
9
  max_id = info[0];
586
587
9
  if (max_id >= 0x80000001) {
588
9
    info[0] = 0x80000001;
589
9
    execute_cpu_id(info);
590
591
9
    if (info[2] & 0x20)
592
9
      feature_list |= CPU_FEATURE_LZCNT;
593
9
  }
594
595
9
  if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0)
596
0
    feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2);
597
598
9
  cpu_feature_list = feature_list;
599
9
}
600
601
static sljit_u8 get_jump_code(sljit_uw type)
602
3.16G
{
603
3.16G
  switch (type) {
604
1.09G
  case SLJIT_EQUAL:
605
1.09G
  case SLJIT_ATOMIC_STORED:
606
1.09G
  case SLJIT_F_EQUAL:
607
1.09G
  case SLJIT_UNORDERED_OR_EQUAL:
608
1.09G
    return 0x84 /* je */;
609
610
799M
  case SLJIT_NOT_EQUAL:
611
799M
  case SLJIT_ATOMIC_NOT_STORED:
612
799M
  case SLJIT_F_NOT_EQUAL:
613
799M
  case SLJIT_ORDERED_NOT_EQUAL:
614
799M
    return 0x85 /* jne */;
615
616
342M
  case SLJIT_LESS:
617
342M
  case SLJIT_CARRY:
618
342M
  case SLJIT_F_LESS:
619
342M
  case SLJIT_UNORDERED_OR_LESS:
620
342M
  case SLJIT_UNORDERED_OR_GREATER:
621
342M
    return 0x82 /* jc */;
622
623
116M
  case SLJIT_GREATER_EQUAL:
624
116M
  case SLJIT_NOT_CARRY:
625
116M
  case SLJIT_F_GREATER_EQUAL:
626
116M
  case SLJIT_ORDERED_GREATER_EQUAL:
627
116M
  case SLJIT_ORDERED_LESS_EQUAL:
628
116M
    return 0x83 /* jae */;
629
630
647M
  case SLJIT_GREATER:
631
647M
  case SLJIT_F_GREATER:
632
647M
  case SLJIT_ORDERED_LESS:
633
647M
  case SLJIT_ORDERED_GREATER:
634
647M
    return 0x87 /* jnbe */;
635
636
57.5M
  case SLJIT_LESS_EQUAL:
637
57.5M
  case SLJIT_F_LESS_EQUAL:
638
57.5M
  case SLJIT_UNORDERED_OR_GREATER_EQUAL:
639
57.5M
  case SLJIT_UNORDERED_OR_LESS_EQUAL:
640
57.5M
    return 0x86 /* jbe */;
641
642
0
  case SLJIT_SIG_LESS:
643
0
    return 0x8c /* jl */;
644
645
0
  case SLJIT_SIG_GREATER_EQUAL:
646
0
    return 0x8d /* jnl */;
647
648
102M
  case SLJIT_SIG_GREATER:
649
102M
    return 0x8f /* jnle */;
650
651
41.0k
  case SLJIT_SIG_LESS_EQUAL:
652
41.0k
    return 0x8e /* jle */;
653
654
0
  case SLJIT_OVERFLOW:
655
0
    return 0x80 /* jo */;
656
657
0
  case SLJIT_NOT_OVERFLOW:
658
0
    return 0x81 /* jno */;
659
660
0
  case SLJIT_UNORDERED:
661
0
  case SLJIT_ORDERED_EQUAL: /* NaN. */
662
0
    return 0x8a /* jp */;
663
664
0
  case SLJIT_ORDERED:
665
0
  case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */
666
0
    return 0x8b /* jpo */;
667
3.16G
  }
668
0
  return 0;
669
3.16G
}
670
671
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
672
static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
673
#else /* !SLJIT_CONFIG_X86_32 */
674
static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr);
675
static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset);
676
#endif /* SLJIT_CONFIG_X86_32 */
677
678
static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
679
3.51G
{
680
3.51G
  sljit_uw type = jump->flags >> TYPE_SHIFT;
681
3.51G
  sljit_s32 short_jump;
682
3.51G
  sljit_uw label_addr;
683
3.51G
  sljit_uw jump_addr;
684
685
3.51G
  jump_addr = (sljit_uw)code_ptr;
686
3.51G
  if (!(jump->flags & JUMP_ADDR)) {
687
3.40G
    label_addr = (sljit_uw)(code + jump->u.label->size);
688
689
3.40G
    if (jump->u.label->size > jump->addr)
690
2.90G
      jump_addr = (sljit_uw)(code + jump->addr);
691
3.40G
  } else
692
115M
    label_addr = jump->u.target - (sljit_uw)executable_offset;
693
694
3.51G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
695
3.51G
  if ((sljit_sw)(label_addr - (jump_addr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump_addr + 5)) < HALFWORD_MIN)
696
115M
    return detect_far_jump_type(jump, code_ptr);
697
3.40G
#endif /* SLJIT_CONFIG_X86_64 */
698
699
3.40G
  short_jump = (sljit_sw)(label_addr - (jump_addr + 2)) >= -0x80 && (sljit_sw)(label_addr - (jump_addr + 2)) <= 0x7f;
700
701
3.40G
  if (type == SLJIT_JUMP) {
702
846M
    if (short_jump)
703
83.6M
      *code_ptr++ = JMP_i8;
704
762M
    else
705
762M
      *code_ptr++ = JMP_i32;
706
2.55G
  } else if (type > SLJIT_JUMP) {
707
327M
    short_jump = 0;
708
327M
    *code_ptr++ = CALL_i32;
709
2.22G
  } else if (short_jump) {
710
769M
    *code_ptr++ = U8(get_jump_code(type) - 0x10);
711
1.45G
  } else {
712
1.45G
    *code_ptr++ = GROUP_0F;
713
1.45G
    *code_ptr++ = get_jump_code(type);
714
1.45G
  }
715
716
3.40G
  jump->addr = (sljit_uw)code_ptr;
717
718
3.40G
  if (short_jump) {
719
853M
    jump->flags |= PATCH_MB;
720
853M
    code_ptr += sizeof(sljit_s8);
721
2.54G
  } else {
722
2.54G
    jump->flags |= PATCH_MW;
723
2.54G
    code_ptr += sizeof(sljit_s32);
724
2.54G
  }
725
726
3.40G
  return code_ptr;
727
3.51G
}
728
729
static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
730
3.52G
{
731
3.52G
  sljit_uw flags = jump->flags;
732
3.52G
  sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
733
3.52G
  sljit_uw jump_addr = jump->addr;
734
3.52G
  SLJIT_UNUSED_ARG(executable_offset);
735
736
3.52G
  if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) {
737
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
738
    sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
739
#else /* SLJIT_CONFIG_X86_32 */
740
7.45M
    if (flags & PATCH_MD) {
741
0
      SLJIT_ASSERT(addr > HALFWORD_MAX);
742
0
      sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
743
0
      return;
744
0
    }
745
746
7.45M
    if (flags & PATCH_MW) {
747
7.45M
      addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
748
7.45M
      SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);
749
7.45M
    } else {
750
0
      SLJIT_ASSERT(addr <= HALFWORD_MAX);
751
0
    }
752
7.45M
    sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr);
753
7.45M
#endif /* !SLJIT_CONFIG_X86_32 */
754
7.45M
    return;
755
7.45M
  }
756
757
3.51G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
758
3.51G
  if (SLJIT_UNLIKELY(flags & PATCH_MD)) {
759
0
    SLJIT_ASSERT(!(flags & JUMP_ADDR));
760
0
    sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
761
0
    return;
762
0
  }
763
3.51G
#endif /* SLJIT_CONFIG_X86_64 */
764
765
3.51G
  addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
766
767
3.51G
  if (flags & PATCH_MB) {
768
853M
    addr -= sizeof(sljit_s8);
769
853M
    SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80);
770
853M
    *(sljit_u8*)jump_addr = U8(addr);
771
853M
    return;
772
2.66G
  } else if (flags & PATCH_MW) {
773
2.54G
    addr -= sizeof(sljit_s32);
774
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
775
    sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
776
#else /* !SLJIT_CONFIG_X86_32 */
777
2.54G
    SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);
778
2.54G
    sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr);
779
2.54G
#endif /* SLJIT_CONFIG_X86_32 */
780
2.54G
  }
781
3.51G
}
782
783
static sljit_u8 *process_extended_label(sljit_u8 *code_ptr, struct sljit_extended_label *ext_label)
784
0
{
785
0
  sljit_uw mask;
786
0
  sljit_u8 *ptr = code_ptr;
787
788
0
  SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED);
789
0
  mask = ext_label->data;
790
791
0
  code_ptr = (sljit_u8*)(((sljit_uw)code_ptr + mask) & ~mask);
792
793
0
  while (ptr < code_ptr)
794
0
    *ptr++ = NOP;
795
796
0
  return code_ptr;
797
0
}
798
799
static void reduce_code_size(struct sljit_compiler *compiler)
800
604k
{
801
604k
  struct sljit_label *label;
802
604k
  struct sljit_jump *jump;
803
604k
  sljit_uw next_label_size;
804
604k
  sljit_uw next_jump_addr;
805
604k
  sljit_uw next_min_addr;
806
604k
  sljit_uw size_reduce = 0;
807
604k
  sljit_sw diff;
808
604k
  sljit_uw type;
809
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
810
  sljit_uw size_reduce_max;
811
#endif /* SLJIT_DEBUG */
812
813
604k
  label = compiler->labels;
814
604k
  jump = compiler->jumps;
815
816
604k
  next_label_size = SLJIT_GET_NEXT_SIZE(label);
817
604k
  next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
818
819
4.70G
  while (1) {
820
4.70G
    next_min_addr = next_label_size;
821
4.70G
    if (next_jump_addr < next_min_addr)
822
3.22G
      next_min_addr = next_jump_addr;
823
824
4.70G
    if (next_min_addr == SLJIT_MAX_ADDRESS)
825
604k
      break;
826
827
4.70G
    if (next_min_addr == next_label_size) {
828
1.48G
      label->size -= size_reduce;
829
830
1.48G
      label = label->next;
831
1.48G
      next_label_size = SLJIT_GET_NEXT_SIZE(label);
832
1.48G
    }
833
834
4.70G
    if (next_min_addr != next_jump_addr)
835
1.18G
      continue;
836
837
3.52G
    jump->addr -= size_reduce;
838
3.52G
    if (!(jump->flags & JUMP_MOV_ADDR)) {
839
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
840
      size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE);
841
#endif /* SLJIT_DEBUG */
842
843
3.51G
      if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
844
3.51G
        if (jump->flags & JUMP_ADDR) {
845
115M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
846
115M
          if (jump->u.target <= 0xffffffffl)
847
0
            size_reduce += sizeof(sljit_s32);
848
115M
#endif /* SLJIT_CONFIG_X86_64 */
849
3.40G
        } else {
850
          /* Unit size: instruction. */
851
3.40G
          diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
852
3.40G
          if (jump->u.label->size > jump->addr) {
853
2.90G
            SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
854
2.90G
            diff -= (sljit_sw)size_reduce;
855
2.90G
          }
856
3.40G
          type = jump->flags >> TYPE_SHIFT;
857
858
3.40G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
859
3.40G
          if (type == SLJIT_JUMP) {
860
846M
            if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
861
75.2M
              size_reduce += JUMP_MAX_SIZE - 2;
862
771M
            else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
863
771M
              size_reduce += JUMP_MAX_SIZE - 5;
864
2.55G
          } else if (type < SLJIT_JUMP) {
865
2.22G
            if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
866
636M
              size_reduce += CJUMP_MAX_SIZE - 2;
867
1.59G
            else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6)
868
1.59G
              size_reduce += CJUMP_MAX_SIZE - 6;
869
2.22G
          } else  {
870
327M
            if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
871
327M
              size_reduce += JUMP_MAX_SIZE - 5;
872
327M
          }
873
#else /* !SLJIT_CONFIG_X86_64 */
874
          if (type == SLJIT_JUMP) {
875
            if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
876
              size_reduce += JUMP_MAX_SIZE - 2;
877
          } else if (type < SLJIT_JUMP) {
878
            if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
879
              size_reduce += CJUMP_MAX_SIZE - 2;
880
          }
881
#endif /* SLJIT_CONFIG_X86_64 */
882
3.40G
        }
883
3.51G
      }
884
885
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
886
      jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
887
#endif /* SLJIT_DEBUG */
888
3.51G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
889
3.51G
    } else {
890
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
891
      size_reduce_max = size_reduce + 10;
892
#endif /* SLJIT_DEBUG */
893
894
7.45M
      if (!(jump->flags & JUMP_ADDR)) {
895
7.45M
        diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - 3);
896
897
7.45M
        if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN)
898
7.45M
          size_reduce += 3;
899
7.45M
      } else if (jump->u.target <= 0xffffffffl)
900
0
        size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5;
901
902
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
903
      jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
904
#endif /* SLJIT_DEBUG */
905
7.45M
#endif /* SLJIT_CONFIG_X86_64 */
906
7.45M
    }
907
908
3.52G
    jump = jump->next;
909
3.52G
    next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
910
3.52G
  }
911
912
604k
  compiler->size -= size_reduce;
913
604k
}
914
915
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
916
604k
{
917
604k
  struct sljit_memory_fragment *buf;
918
604k
  sljit_u8 *code;
919
604k
  sljit_u8 *code_ptr;
920
604k
  sljit_u8 *buf_ptr;
921
604k
  sljit_u8 *buf_end;
922
604k
  sljit_u8 len;
923
604k
  sljit_sw executable_offset;
924
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
925
  sljit_uw addr;
926
#endif /* SLJIT_DEBUG */
927
928
604k
  struct sljit_label *label;
929
604k
  struct sljit_jump *jump;
930
604k
  struct sljit_const *const_;
931
932
604k
  CHECK_ERROR_PTR();
933
604k
  CHECK_PTR(check_sljit_generate_code(compiler, options));
934
935
604k
  reduce_code_size(compiler);
936
937
  /* Second code generation pass. */
938
604k
  code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset);
939
604k
  PTR_FAIL_WITH_EXEC_IF(code);
940
941
604k
  reverse_buf(compiler);
942
604k
  buf = compiler->buf;
943
944
604k
  code_ptr = code;
945
604k
  label = compiler->labels;
946
604k
  jump = compiler->jumps;
947
604k
  const_ = compiler->consts;
948
949
16.3M
  do {
950
16.3M
    buf_ptr = buf->memory;
951
16.3M
    buf_end = buf_ptr + buf->used_size;
952
14.7G
    do {
953
14.7G
      len = *buf_ptr++;
954
14.7G
      SLJIT_ASSERT(len > 0);
955
14.7G
      if (len < SLJIT_INST_CONST) {
956
        /* The code is already generated. */
957
9.70G
        SLJIT_MEMCPY(code_ptr, buf_ptr, len);
958
9.70G
        code_ptr += len;
959
9.70G
        buf_ptr += len;
960
9.70G
      } else {
961
5.00G
        switch (len) {
962
1.48G
        case SLJIT_INST_LABEL:
963
1.48G
          if (label->u.index >= SLJIT_LABEL_ALIGNED)
964
0
            code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
965
966
1.48G
          label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
967
1.48G
          label->size = (sljit_uw)(code_ptr - code);
968
1.48G
          label = label->next;
969
1.48G
          break;
970
3.51G
        case SLJIT_INST_JUMP:
971
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
972
          addr = (sljit_uw)code_ptr;
973
#endif /* SLJIT_DEBUG */
974
3.51G
          if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
975
3.51G
            code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset);
976
0
          else {
977
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
978
            code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset);
979
#else /* !SLJIT_CONFIG_X86_32 */
980
0
            code_ptr = detect_far_jump_type(jump, code_ptr);
981
0
#endif /* SLJIT_CONFIG_X86_32 */
982
0
          }
983
984
3.51G
          SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0xff));
985
3.51G
          jump = jump->next;
986
3.51G
          break;
987
7.45M
        case SLJIT_INST_MOV_ADDR:
988
7.45M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
989
7.45M
          code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset);
990
7.45M
#endif /* SLJIT_CONFIG_X86_64 */
991
7.45M
          jump->addr = (sljit_uw)code_ptr;
992
7.45M
          jump = jump->next;
993
7.45M
          break;
994
0
        default:
995
0
          SLJIT_ASSERT(len == SLJIT_INST_CONST);
996
0
          const_->addr = (sljit_uw)code_ptr;
997
0
          const_ = const_->next;
998
0
          break;
999
5.00G
        }
1000
5.00G
      }
1001
14.7G
    } while (buf_ptr < buf_end);
1002
1003
16.3M
    SLJIT_ASSERT(buf_ptr == buf_end);
1004
16.3M
    buf = buf->next;
1005
16.3M
  } while (buf);
1006
1007
604k
  SLJIT_ASSERT(!label);
1008
604k
  SLJIT_ASSERT(!jump);
1009
604k
  SLJIT_ASSERT(!const_);
1010
604k
  SLJIT_ASSERT(code_ptr <= code + compiler->size);
1011
1012
604k
  jump = compiler->jumps;
1013
3.52G
  while (jump) {
1014
3.52G
    generate_jump_or_mov_addr(jump, executable_offset);
1015
3.52G
    jump = jump->next;
1016
3.52G
  }
1017
1018
604k
  compiler->error = SLJIT_ERR_COMPILED;
1019
604k
  compiler->executable_offset = executable_offset;
1020
604k
  compiler->executable_size = (sljit_uw)(code_ptr - code);
1021
1022
604k
  code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1023
1024
604k
  SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
1025
604k
  return (void*)code;
1026
604k
}
1027
1028
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1029
552M
{
1030
552M
  switch (feature_type) {
1031
0
  case SLJIT_HAS_FPU:
1032
#ifdef SLJIT_IS_FPU_AVAILABLE
1033
    return (SLJIT_IS_FPU_AVAILABLE) != 0;
1034
#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
1035
    if (cpu_feature_list == 0)
1036
      get_cpu_features();
1037
    return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
1038
#else /* SLJIT_DETECT_SSE2 */
1039
0
    return 1;
1040
0
#endif /* SLJIT_DETECT_SSE2 */
1041
1042
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1043
  case SLJIT_HAS_VIRTUAL_REGISTERS:
1044
    return 1;
1045
#endif /* SLJIT_CONFIG_X86_32 */
1046
1047
0
  case SLJIT_HAS_CLZ:
1048
0
    if (cpu_feature_list == 0)
1049
0
      get_cpu_features();
1050
1051
0
    return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
1052
1053
0
  case SLJIT_HAS_CTZ:
1054
0
    if (cpu_feature_list == 0)
1055
0
      get_cpu_features();
1056
1057
0
    return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
1058
1059
552M
  case SLJIT_HAS_CMOV:
1060
552M
    if (cpu_feature_list == 0)
1061
0
      get_cpu_features();
1062
552M
    return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
1063
1064
0
  case SLJIT_HAS_REV:
1065
0
  case SLJIT_HAS_ROT:
1066
0
  case SLJIT_HAS_PREFETCH:
1067
0
  case SLJIT_HAS_COPY_F32:
1068
0
  case SLJIT_HAS_COPY_F64:
1069
0
  case SLJIT_HAS_ATOMIC:
1070
0
  case SLJIT_HAS_MEMORY_BARRIER:
1071
0
    return 1;
1072
1073
0
#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE
1074
0
  case SLJIT_HAS_AVX:
1075
0
    if (cpu_feature_list == 0)
1076
0
      get_cpu_features();
1077
0
    return (cpu_feature_list & CPU_FEATURE_AVX) != 0;
1078
0
  case SLJIT_HAS_AVX2:
1079
0
    if (cpu_feature_list == 0)
1080
0
      get_cpu_features();
1081
0
    return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;
1082
0
  case SLJIT_HAS_SIMD:
1083
0
    if (cpu_feature_list == 0)
1084
0
      get_cpu_features();
1085
0
    return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;
1086
0
#endif /* SLJIT_IS_FPU_AVAILABLE */
1087
66.9k
  default:
1088
66.9k
    return 0;
1089
552M
  }
1090
552M
}
1091
1092
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1093
0
{
1094
0
  switch (type) {
1095
0
  case SLJIT_ORDERED_EQUAL:
1096
0
  case SLJIT_UNORDERED_OR_NOT_EQUAL:
1097
0
    return 2;
1098
0
  }
1099
0
1100
0
  return 0;
1101
0
}
1102
1103
/* --------------------------------------------------------------------- */
1104
/*  Operators                                                            */
1105
/* --------------------------------------------------------------------- */
1106
1107
1.25G
#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
1108
1109
#define BINARY_IMM32(op_imm, immw, arg, argw) \
1110
1.76G
  do { \
1111
1.76G
    inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1112
1.76G
    FAIL_IF(!inst); \
1113
1.76G
    *(inst + 1) |= (op_imm); \
1114
1.76G
  } while (0)
1115
1116
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1117
1118
#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1119
1.98G
  do { \
1120
1.98G
    if (IS_HALFWORD(immw) || compiler->mode32) { \
1121
1.75G
      BINARY_IMM32(op_imm, immw, arg, argw); \
1122
1.75G
    } \
1123
1.98G
    else { \
1124
228M
      FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \
1125
228M
      inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1126
228M
      FAIL_IF(!inst); \
1127
228M
      *inst = (op_mr); \
1128
228M
    } \
1129
1.98G
  } while (0)
1130
1131
#define BINARY_EAX_IMM(op_eax_imm, immw) \
1132
1.17G
  FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1133
1134
#else /* !SLJIT_CONFIG_X86_64 */
1135
1136
#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1137
  BINARY_IMM32(op_imm, immw, arg, argw)
1138
1139
#define BINARY_EAX_IMM(op_eax_imm, immw) \
1140
  FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1141
1142
#endif /* SLJIT_CONFIG_X86_64 */
1143
1144
static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)
1145
1.60M
{
1146
1.60M
  sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1147
1.60M
  FAIL_IF(!inst);
1148
1.60M
  INC_SIZE(1);
1149
1.60M
  *inst = byte;
1150
1.60M
  return SLJIT_SUCCESS;
1151
1.60M
}
1152
1153
static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1154
  sljit_s32 dst, sljit_sw dstw,
1155
  sljit_s32 src, sljit_sw srcw);
1156
1157
#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
1158
3.28G
  FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1159
1160
static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
1161
  sljit_uw op,
1162
  sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1163
1164
static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
1165
  sljit_uw op,
1166
  sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1167
1168
static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
1169
  sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
1170
1171
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
1172
  sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1173
1174
static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1175
  sljit_s32 src1, sljit_sw src1w,
1176
  sljit_s32 src2, sljit_sw src2w);
1177
1178
static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1179
  sljit_s32 dst_reg,
1180
  sljit_s32 src, sljit_sw srcw);
1181
1182
static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
1183
8.05M
{
1184
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
1185
  /* Emit endbr32/endbr64 when CET is enabled.  */
1186
  sljit_u8 *inst;
1187
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1188
  FAIL_IF(!inst);
1189
  INC_SIZE(4);
1190
  inst[0] = GROUP_F3;
1191
  inst[1] = GROUP_0F;
1192
  inst[2] = 0x1e;
1193
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1194
  inst[3] = 0xfb;
1195
#else /* !SLJIT_CONFIG_X86_32 */
1196
  inst[3] = 0xfa;
1197
#endif /* SLJIT_CONFIG_X86_32 */
1198
#else /* !SLJIT_CONFIG_X86_CET */
1199
8.05M
  SLJIT_UNUSED_ARG(compiler);
1200
8.05M
#endif /* SLJIT_CONFIG_X86_CET */
1201
8.05M
  return SLJIT_SUCCESS;
1202
8.05M
}
1203
1204
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1205
1206
static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
1207
{
1208
  sljit_u8 *inst;
1209
  sljit_s32 size;
1210
1211
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1212
  size = 5;
1213
#else
1214
  size = 4;
1215
#endif
1216
1217
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1218
  FAIL_IF(!inst);
1219
  INC_SIZE(size);
1220
  *inst++ = GROUP_F3;
1221
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1222
  *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1223
#endif
1224
  inst[0] = GROUP_0F;
1225
  inst[1] = 0x1e;
1226
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1227
  inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);
1228
#else
1229
  inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);
1230
#endif
1231
  return SLJIT_SUCCESS;
1232
}
1233
1234
static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
1235
{
1236
  sljit_u8 *inst;
1237
  sljit_s32 size;
1238
1239
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1240
  size = 5;
1241
#else
1242
  size = 4;
1243
#endif
1244
1245
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1246
  FAIL_IF(!inst);
1247
  INC_SIZE(size);
1248
  *inst++ = GROUP_F3;
1249
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1250
  *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1251
#endif
1252
  inst[0] = GROUP_0F;
1253
  inst[1] = 0xae;
1254
  inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
1255
  return SLJIT_SUCCESS;
1256
}
1257
1258
#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1259
1260
static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
1261
604k
{
1262
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1263
  return _get_ssp() != 0;
1264
#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1265
604k
  return 0;
1266
604k
#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1267
604k
}
1268
1269
static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
1270
  sljit_s32 src, sljit_sw srcw)
1271
0
{
1272
#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1273
  sljit_u8 *inst, *jz_after_cmp_inst;
1274
  sljit_uw size_jz_after_cmp_inst;
1275
1276
  sljit_uw size_before_rdssp_inst = compiler->size;
1277
1278
  /* Generate "RDSSP TMP_REG1". */
1279
  FAIL_IF(emit_rdssp(compiler, TMP_REG1));
1280
1281
  /* Load return address on shadow stack into TMP_REG1. */
1282
  EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
1283
1284
  /* Compare return address against TMP_REG1. */
1285
  FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
1286
1287
  /* Generate JZ to skip shadow stack ajdustment when shadow
1288
     stack matches normal stack. */
1289
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1290
  FAIL_IF(!inst);
1291
  INC_SIZE(2);
1292
  *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
1293
  size_jz_after_cmp_inst = compiler->size;
1294
  jz_after_cmp_inst = inst;
1295
1296
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1297
  /* REX_W is not necessary. */
1298
  compiler->mode32 = 1;
1299
#endif
1300
  /* Load 1 into TMP_REG1. */
1301
  EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
1302
1303
  /* Generate "INCSSP TMP_REG1". */
1304
  FAIL_IF(emit_incssp(compiler, TMP_REG1));
1305
1306
  /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
1307
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1308
  FAIL_IF(!inst);
1309
  INC_SIZE(2);
1310
  inst[0] = JMP_i8;
1311
  inst[1] = size_before_rdssp_inst - compiler->size;
1312
1313
  *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
1314
#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1315
0
  SLJIT_UNUSED_ARG(compiler);
1316
0
  SLJIT_UNUSED_ARG(src);
1317
0
  SLJIT_UNUSED_ARG(srcw);
1318
0
#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1319
0
  return SLJIT_SUCCESS;
1320
0
}
1321
1322
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1323
#include "sljitNativeX86_32.c"
1324
#else
1325
#include "sljitNativeX86_64.c"
1326
#endif
1327
1328
static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1329
  sljit_s32 dst, sljit_sw dstw,
1330
  sljit_s32 src, sljit_sw srcw)
1331
3.41G
{
1332
3.41G
  sljit_u8* inst;
1333
1334
3.41G
  if (FAST_IS_REG(src)) {
1335
1.44G
    inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
1336
1.44G
    FAIL_IF(!inst);
1337
1.44G
    *inst = MOV_rm_r;
1338
1.44G
    return SLJIT_SUCCESS;
1339
1.44G
  }
1340
1341
1.96G
  if (src == SLJIT_IMM) {
1342
790M
    if (FAST_IS_REG(dst)) {
1343
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1344
      return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1345
#else
1346
61.5M
      if (!compiler->mode32) {
1347
61.2M
        if (NOT_HALFWORD(srcw))
1348
9.30M
          return emit_load_imm64(compiler, dst, srcw);
1349
61.2M
      }
1350
391k
      else
1351
391k
        return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1352
61.5M
#endif
1353
61.5M
    }
1354
780M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1355
780M
    if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
1356
      /* Immediate to memory move. Only SLJIT_MOV operation copies
1357
         an immediate directly into memory so TMP_REG1 can be used. */
1358
539k
      FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
1359
539k
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1360
539k
      FAIL_IF(!inst);
1361
539k
      *inst = MOV_rm_r;
1362
539k
      return SLJIT_SUCCESS;
1363
539k
    }
1364
779M
#endif
1365
779M
    inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
1366
779M
    FAIL_IF(!inst);
1367
779M
    *inst = MOV_rm_i32;
1368
779M
    return SLJIT_SUCCESS;
1369
779M
  }
1370
1.17G
  if (FAST_IS_REG(dst)) {
1371
1.14G
    inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
1372
1.14G
    FAIL_IF(!inst);
1373
1.14G
    *inst = MOV_r_rm;
1374
1.14G
    return SLJIT_SUCCESS;
1375
1.14G
  }
1376
1377
  /* Memory to memory move. Only SLJIT_MOV operation copies
1378
     data from memory to memory so TMP_REG1 can be used. */
1379
28.2M
  inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
1380
28.2M
  FAIL_IF(!inst);
1381
28.2M
  *inst = MOV_r_rm;
1382
28.2M
  inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1383
28.2M
  FAIL_IF(!inst);
1384
28.2M
  *inst = MOV_rm_r;
1385
28.2M
  return SLJIT_SUCCESS;
1386
28.2M
}
1387
1388
static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1389
  sljit_s32 dst_reg,
1390
  sljit_s32 src, sljit_sw srcw)
1391
0
{
1392
0
  sljit_u8* inst;
1393
0
  sljit_uw size;
1394
1395
0
  SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
1396
1397
0
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1398
0
  FAIL_IF(!inst);
1399
0
  INC_SIZE(2);
1400
0
  inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);
1401
1402
0
  size = compiler->size;
1403
0
  EMIT_MOV(compiler, dst_reg, 0, src, srcw);
1404
1405
0
  inst[1] = U8(compiler->size - size);
1406
0
  return SLJIT_SUCCESS;
1407
0
}
1408
1409
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1410
8.05M
{
1411
8.05M
  sljit_u8 *inst;
1412
8.05M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1413
8.05M
  sljit_uw size;
1414
8.05M
#endif
1415
1416
8.05M
  CHECK_ERROR();
1417
8.05M
  CHECK(check_sljit_emit_op0(compiler, op));
1418
1419
8.05M
  switch (GET_OPCODE(op)) {
1420
0
  case SLJIT_BREAKPOINT:
1421
0
    return emit_byte(compiler, INT3);
1422
0
  case SLJIT_NOP:
1423
0
    return emit_byte(compiler, NOP);
1424
0
  case SLJIT_LMUL_UW:
1425
0
  case SLJIT_LMUL_SW:
1426
0
  case SLJIT_DIVMOD_UW:
1427
0
  case SLJIT_DIVMOD_SW:
1428
0
  case SLJIT_DIV_UW:
1429
0
  case SLJIT_DIV_SW:
1430
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1431
#ifdef _WIN64
1432
    SLJIT_ASSERT(
1433
      reg_map[SLJIT_R0] == 0
1434
      && reg_map[SLJIT_R1] == 2
1435
      && reg_map[TMP_REG1] > 7);
1436
#else
1437
0
    SLJIT_ASSERT(
1438
0
      reg_map[SLJIT_R0] == 0
1439
0
      && reg_map[SLJIT_R1] < 7
1440
0
      && reg_map[TMP_REG1] == 2);
1441
0
#endif
1442
0
    compiler->mode32 = op & SLJIT_32;
1443
0
#endif
1444
0
    SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1445
1446
0
    op = GET_OPCODE(op);
1447
0
    if ((op | 0x2) == SLJIT_DIV_UW) {
1448
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1449
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1450
      inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1451
#else
1452
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1453
0
#endif
1454
0
      FAIL_IF(!inst);
1455
0
      *inst = XOR_r_rm;
1456
0
    }
1457
1458
0
    if ((op | 0x2) == SLJIT_DIV_SW) {
1459
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1460
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1461
#endif
1462
1463
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1464
      FAIL_IF(emit_byte(compiler, CDQ));
1465
#else
1466
0
      if (!compiler->mode32) {
1467
0
        inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1468
0
        FAIL_IF(!inst);
1469
0
        INC_SIZE(2);
1470
0
        inst[0] = REX_W;
1471
0
        inst[1] = CDQ;
1472
0
      } else
1473
0
        FAIL_IF(emit_byte(compiler, CDQ));
1474
0
#endif
1475
0
    }
1476
1477
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1478
    inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1479
    FAIL_IF(!inst);
1480
    INC_SIZE(2);
1481
    inst[0] = GROUP_F7;
1482
    inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1483
#else /* !SLJIT_CONFIG_X86_32 */
1484
#ifdef _WIN64
1485
    size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1486
#else /* !_WIN64 */
1487
0
    size = (!compiler->mode32) ? 3 : 2;
1488
0
#endif /* _WIN64 */
1489
0
    inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1490
0
    FAIL_IF(!inst);
1491
0
    INC_SIZE(size);
1492
#ifdef _WIN64
1493
    if (!compiler->mode32)
1494
      *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1495
    else if (op >= SLJIT_DIVMOD_UW)
1496
      *inst++ = REX_B;
1497
    inst[0] = GROUP_F7;
1498
    inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1499
#else /* !_WIN64 */
1500
0
    if (!compiler->mode32)
1501
0
      *inst++ = REX_W;
1502
0
    inst[0] = GROUP_F7;
1503
0
    inst[1] = MOD_REG | reg_map[SLJIT_R1];
1504
0
#endif /* _WIN64 */
1505
0
#endif /* SLJIT_CONFIG_X86_32 */
1506
0
    switch (op) {
1507
0
    case SLJIT_LMUL_UW:
1508
0
      inst[1] |= MUL;
1509
0
      break;
1510
0
    case SLJIT_LMUL_SW:
1511
0
      inst[1] |= IMUL;
1512
0
      break;
1513
0
    case SLJIT_DIVMOD_UW:
1514
0
    case SLJIT_DIV_UW:
1515
0
      inst[1] |= DIV;
1516
0
      break;
1517
0
    case SLJIT_DIVMOD_SW:
1518
0
    case SLJIT_DIV_SW:
1519
0
      inst[1] |= IDIV;
1520
0
      break;
1521
0
    }
1522
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1523
0
    if (op <= SLJIT_DIVMOD_SW)
1524
0
      EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1525
#else
1526
    if (op >= SLJIT_DIV_UW)
1527
      EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1528
#endif
1529
0
    break;
1530
0
  case SLJIT_MEMORY_BARRIER:
1531
0
    inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
1532
0
    FAIL_IF(!inst);
1533
0
    INC_SIZE(3);
1534
0
    inst[0] = GROUP_0F;
1535
0
    inst[1] = 0xae;
1536
0
    inst[2] = 0xf0;
1537
0
    return SLJIT_SUCCESS;
1538
7.45M
  case SLJIT_ENDBR:
1539
7.45M
    return emit_endbranch(compiler);
1540
604k
  case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1541
604k
    return skip_frames_before_return(compiler);
1542
8.05M
  }
1543
1544
0
  return SLJIT_SUCCESS;
1545
8.05M
}
1546
1547
static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1548
  sljit_s32 dst, sljit_sw dstw,
1549
  sljit_s32 src, sljit_sw srcw)
1550
182M
{
1551
182M
  sljit_u8* inst;
1552
182M
  sljit_s32 dst_r;
1553
1554
182M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1555
182M
  compiler->mode32 = 0;
1556
182M
#endif
1557
1558
182M
  if (src == SLJIT_IMM) {
1559
0
    if (FAST_IS_REG(dst)) {
1560
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1561
      return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1562
#else
1563
0
      inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1564
0
      FAIL_IF(!inst);
1565
0
      *inst = MOV_rm_i32;
1566
0
      return SLJIT_SUCCESS;
1567
0
#endif
1568
0
    }
1569
0
    inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1570
0
    FAIL_IF(!inst);
1571
0
    *inst = MOV_rm8_i8;
1572
0
    return SLJIT_SUCCESS;
1573
0
  }
1574
1575
182M
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1576
1577
182M
  if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1578
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1579
    if (reg_map[src] >= 4) {
1580
      SLJIT_ASSERT(dst_r == TMP_REG1);
1581
      EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1582
    } else
1583
      dst_r = src;
1584
#else
1585
0
    dst_r = src;
1586
0
#endif
1587
182M
  } else {
1588
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1589
    if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1590
      /* Both src and dst are registers. */
1591
      SLJIT_ASSERT(FAST_IS_REG(dst));
1592
1593
      if (src == dst && !sign) {
1594
        inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1595
        FAIL_IF(!inst);
1596
        *(inst + 1) |= AND;
1597
        return SLJIT_SUCCESS;
1598
      }
1599
1600
      EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1601
      src = TMP_REG1;
1602
      srcw = 0;
1603
    }
1604
#endif /* !SLJIT_CONFIG_X86_32 */
1605
1606
    /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1607
182M
    FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));
1608
182M
  }
1609
1610
182M
  if (dst & SLJIT_MEM) {
1611
0
    inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1612
0
    FAIL_IF(!inst);
1613
0
    *inst = MOV_rm8_r8;
1614
0
  }
1615
1616
182M
  return SLJIT_SUCCESS;
1617
182M
}
1618
1619
static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1620
  sljit_s32 src, sljit_sw srcw)
1621
0
{
1622
0
  sljit_u8* inst;
1623
1624
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1625
0
  compiler->mode32 = 1;
1626
0
#endif
1627
1628
0
  inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1629
0
  FAIL_IF(!inst);
1630
0
  inst[0] = GROUP_0F;
1631
0
  inst[1] = PREFETCH;
1632
1633
0
  if (op == SLJIT_PREFETCH_L1)
1634
0
    inst[2] |= (1 << 3);
1635
0
  else if (op == SLJIT_PREFETCH_L2)
1636
0
    inst[2] |= (2 << 3);
1637
0
  else if (op == SLJIT_PREFETCH_L3)
1638
0
    inst[2] |= (3 << 3);
1639
1640
0
  return SLJIT_SUCCESS;
1641
0
}
1642
1643
static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1644
  sljit_s32 dst, sljit_sw dstw,
1645
  sljit_s32 src, sljit_sw srcw)
1646
142M
{
1647
142M
  sljit_u8* inst;
1648
142M
  sljit_s32 dst_r;
1649
1650
142M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1651
142M
  compiler->mode32 = 0;
1652
142M
#endif
1653
1654
142M
  if (src == SLJIT_IMM) {
1655
0
    if (FAST_IS_REG(dst)) {
1656
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1657
      return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1658
#else
1659
0
      inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1660
0
      FAIL_IF(!inst);
1661
0
      *inst = MOV_rm_i32;
1662
0
      return SLJIT_SUCCESS;
1663
0
#endif
1664
0
    }
1665
0
    inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1666
0
    FAIL_IF(!inst);
1667
0
    *inst = MOV_rm_i32;
1668
0
    return SLJIT_SUCCESS;
1669
0
  }
1670
1671
142M
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1672
1673
142M
  if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1674
0
    dst_r = src;
1675
142M
  else
1676
142M
    FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));
1677
1678
142M
  if (dst & SLJIT_MEM) {
1679
0
    inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1680
0
    FAIL_IF(!inst);
1681
0
    *inst = MOV_rm_r;
1682
0
  }
1683
1684
142M
  return SLJIT_SUCCESS;
1685
142M
}
1686
1687
static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1688
  sljit_s32 dst, sljit_sw dstw,
1689
  sljit_s32 src, sljit_sw srcw)
1690
1.76M
{
1691
1.76M
  sljit_u8* inst;
1692
1693
1.76M
  if (dst == src && dstw == srcw) {
1694
    /* Same input and output */
1695
1.76M
    inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1696
1.76M
    FAIL_IF(!inst);
1697
1.76M
    inst[0] = GROUP_F7;
1698
1.76M
    inst[1] |= opcode;
1699
1.76M
    return SLJIT_SUCCESS;
1700
1.76M
  }
1701
1702
0
  if (FAST_IS_REG(dst)) {
1703
0
    EMIT_MOV(compiler, dst, 0, src, srcw);
1704
0
    inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1705
0
    FAIL_IF(!inst);
1706
0
    inst[0] = GROUP_F7;
1707
0
    inst[1] |= opcode;
1708
0
    return SLJIT_SUCCESS;
1709
0
  }
1710
1711
0
  EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1712
0
  inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1713
0
  FAIL_IF(!inst);
1714
0
  inst[0] = GROUP_F7;
1715
0
  inst[1] |= opcode;
1716
0
  EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1717
0
  return SLJIT_SUCCESS;
1718
0
}
1719
1720
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1721
static const sljit_sw emit_clz_arg = 32 + 31;
1722
static const sljit_sw emit_ctz_arg = 32;
1723
#endif
1724
1725
static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
1726
  sljit_s32 dst, sljit_sw dstw,
1727
  sljit_s32 src, sljit_sw srcw)
1728
0
{
1729
0
  sljit_u8* inst;
1730
0
  sljit_s32 dst_r;
1731
0
  sljit_sw max;
1732
1733
0
  SLJIT_ASSERT(cpu_feature_list != 0);
1734
1735
0
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1736
1737
0
  if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
1738
0
    FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));
1739
1740
0
    if (dst & SLJIT_MEM)
1741
0
      EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1742
0
    return SLJIT_SUCCESS;
1743
0
  }
1744
1745
0
  FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));
1746
1747
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1748
  max = is_clz ? (32 + 31) : 32;
1749
1750
  if (cpu_feature_list & CPU_FEATURE_CMOV) {
1751
    if (dst_r != TMP_REG1) {
1752
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
1753
      inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1754
    }
1755
    else
1756
      inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
1757
1758
    FAIL_IF(!inst);
1759
    inst[0] = GROUP_0F;
1760
    inst[1] = CMOVE_r_rm;
1761
  }
1762
  else
1763
    FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1764
1765
  if (is_clz) {
1766
    inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1767
    FAIL_IF(!inst);
1768
    *(inst + 1) |= XOR;
1769
  }
1770
#else
1771
0
  if (is_clz)
1772
0
    max = compiler->mode32 ? (32 + 31) : (64 + 63);
1773
0
  else
1774
0
    max = compiler->mode32 ? 32 : 64;
1775
1776
0
  if (cpu_feature_list & CPU_FEATURE_CMOV) {
1777
0
    EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
1778
0
    FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));
1779
0
  } else
1780
0
    FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1781
1782
0
  if (is_clz) {
1783
0
    inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
1784
0
    FAIL_IF(!inst);
1785
0
    *(inst + 1) |= XOR;
1786
0
  }
1787
0
#endif
1788
1789
0
  if (dst & SLJIT_MEM)
1790
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1791
0
  return SLJIT_SUCCESS;
1792
0
}
1793
1794
static sljit_s32 emit_bswap(struct sljit_compiler *compiler,
1795
  sljit_s32 op,
1796
  sljit_s32 dst, sljit_sw dstw,
1797
  sljit_s32 src, sljit_sw srcw)
1798
0
{
1799
0
  sljit_u8 *inst;
1800
0
  sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1801
0
  sljit_uw size;
1802
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1803
0
  sljit_u8 rex = 0;
1804
#else /* !SLJIT_CONFIG_X86_64 */
1805
  sljit_s32 dst_is_ereg = op & SLJIT_32;
1806
#endif /* SLJIT_CONFIG_X86_64 */
1807
1808
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1809
0
  if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)
1810
0
    compiler->mode32 = 1;
1811
#else /* !SLJIT_CONFIG_X86_64 */
1812
  op &= ~SLJIT_32;
1813
#endif /* SLJIT_CONFIG_X86_64 */
1814
1815
0
  if (src != dst_r) {
1816
    /* Only the lower 16 bit is read for eregs. */
1817
0
    if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1818
0
      FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));
1819
0
    else
1820
0
      EMIT_MOV(compiler, dst_r, 0, src, srcw);
1821
0
  }
1822
1823
0
  size = 2;
1824
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1825
0
  if (!compiler->mode32)
1826
0
    rex = REX_W;
1827
1828
0
  if (reg_map[dst_r] >= 8)
1829
0
    rex |= REX_B;
1830
1831
0
  if (rex != 0)
1832
0
    size++;
1833
0
#endif /* SLJIT_CONFIG_X86_64 */
1834
1835
0
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1836
0
  FAIL_IF(!inst);
1837
0
  INC_SIZE(size);
1838
1839
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1840
0
  if (rex != 0)
1841
0
    *inst++ = rex;
1842
1843
0
  inst[0] = GROUP_0F;
1844
0
  inst[1] = BSWAP_r | reg_lmap[dst_r];
1845
#else /* !SLJIT_CONFIG_X86_64 */
1846
  inst[0] = GROUP_0F;
1847
  inst[1] = BSWAP_r | reg_map[dst_r];
1848
#endif /* SLJIT_CONFIG_X86_64 */
1849
1850
0
  if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
1851
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1852
0
    size = compiler->mode32 ? 16 : 48;
1853
#else /* !SLJIT_CONFIG_X86_64 */
1854
    size = 16;
1855
#endif /* SLJIT_CONFIG_X86_64 */
1856
1857
0
    inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);
1858
0
    FAIL_IF(!inst);
1859
0
    if (op == SLJIT_REV_U16)
1860
0
      inst[1] |= SHR;
1861
0
    else
1862
0
      inst[1] |= SAR;
1863
0
  }
1864
1865
0
  if (dst & SLJIT_MEM) {
1866
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1867
    if (dst_is_ereg)
1868
      op = SLJIT_REV;
1869
#endif /* SLJIT_CONFIG_X86_32 */
1870
0
    if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1871
0
      return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);
1872
1873
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1874
0
  }
1875
1876
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1877
0
  if (op == SLJIT_REV_S32) {
1878
0
    compiler->mode32 = 0;
1879
0
    inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1880
0
    FAIL_IF(!inst);
1881
0
    *inst = MOVSXD_r_rm;
1882
0
  }
1883
0
#endif /* SLJIT_CONFIG_X86_64 */
1884
1885
0
  return SLJIT_SUCCESS;
1886
0
}
1887
1888
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1889
  sljit_s32 dst, sljit_sw dstw,
1890
  sljit_s32 src, sljit_sw srcw)
1891
3.89G
{
1892
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1893
  sljit_s32 dst_is_ereg = 0;
1894
#else /* !SLJIT_CONFIG_X86_32 */
1895
3.89G
  sljit_s32 op_flags = GET_ALL_FLAGS(op);
1896
3.89G
#endif /* SLJIT_CONFIG_X86_32 */
1897
1898
3.89G
  CHECK_ERROR();
1899
3.89G
  CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1900
3.89G
  ADJUST_LOCAL_OFFSET(dst, dstw);
1901
3.89G
  ADJUST_LOCAL_OFFSET(src, srcw);
1902
1903
3.89G
  CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1904
3.89G
  CHECK_EXTRA_REGS(src, srcw, (void)0);
1905
3.89G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1906
3.89G
  compiler->mode32 = op_flags & SLJIT_32;
1907
3.89G
#endif /* SLJIT_CONFIG_X86_64 */
1908
1909
3.89G
  op = GET_OPCODE(op);
1910
1911
3.89G
  if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1912
3.89G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1913
3.89G
    compiler->mode32 = 0;
1914
3.89G
#endif /* SLJIT_CONFIG_X86_64 */
1915
1916
3.89G
    if (FAST_IS_REG(src) && src == dst) {
1917
0
      if (!TYPE_CAST_NEEDED(op))
1918
0
        return SLJIT_SUCCESS;
1919
0
    }
1920
1921
3.89G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1922
3.89G
    if (op_flags & SLJIT_32) {
1923
0
      if (src & SLJIT_MEM) {
1924
0
        if (op == SLJIT_MOV_S32)
1925
0
          op = SLJIT_MOV_U32;
1926
0
      }
1927
0
      else if (src == SLJIT_IMM) {
1928
0
        if (op == SLJIT_MOV_U32)
1929
0
          op = SLJIT_MOV_S32;
1930
0
      }
1931
0
    }
1932
3.89G
#endif /* SLJIT_CONFIG_X86_64 */
1933
1934
3.89G
    if (src == SLJIT_IMM) {
1935
969M
      switch (op) {
1936
0
      case SLJIT_MOV_U8:
1937
0
        srcw = (sljit_u8)srcw;
1938
0
        break;
1939
0
      case SLJIT_MOV_S8:
1940
0
        srcw = (sljit_s8)srcw;
1941
0
        break;
1942
0
      case SLJIT_MOV_U16:
1943
0
        srcw = (sljit_u16)srcw;
1944
0
        break;
1945
0
      case SLJIT_MOV_S16:
1946
0
        srcw = (sljit_s16)srcw;
1947
0
        break;
1948
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1949
205M
      case SLJIT_MOV_U32:
1950
205M
        srcw = (sljit_u32)srcw;
1951
205M
        break;
1952
0
      case SLJIT_MOV_S32:
1953
0
        srcw = (sljit_s32)srcw;
1954
0
        break;
1955
969M
#endif /* SLJIT_CONFIG_X86_64 */
1956
969M
      }
1957
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1958
      if (SLJIT_UNLIKELY(dst_is_ereg))
1959
        return emit_mov(compiler, dst, dstw, src, srcw);
1960
#endif /* SLJIT_CONFIG_X86_32 */
1961
969M
    }
1962
1963
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1964
    if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1965
      SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1966
      dst = TMP_REG1;
1967
    }
1968
#endif /* SLJIT_CONFIG_X86_32 */
1969
1970
3.89G
    switch (op) {
1971
2.89G
    case SLJIT_MOV:
1972
2.89G
    case SLJIT_MOV_P:
1973
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1974
    case SLJIT_MOV_U32:
1975
    case SLJIT_MOV_S32:
1976
    case SLJIT_MOV32:
1977
#endif /* SLJIT_CONFIG_X86_32 */
1978
2.89G
      EMIT_MOV(compiler, dst, dstw, src, srcw);
1979
2.89G
      break;
1980
182M
    case SLJIT_MOV_U8:
1981
182M
      FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1982
182M
      break;
1983
182M
    case SLJIT_MOV_S8:
1984
0
      FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1985
0
      break;
1986
142M
    case SLJIT_MOV_U16:
1987
142M
      FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1988
142M
      break;
1989
142M
    case SLJIT_MOV_S16:
1990
0
      FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1991
0
      break;
1992
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1993
514M
    case SLJIT_MOV_U32:
1994
514M
      FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1995
514M
      break;
1996
514M
    case SLJIT_MOV_S32:
1997
161M
      FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1998
161M
      break;
1999
161M
    case SLJIT_MOV32:
2000
0
      compiler->mode32 = 1;
2001
0
      EMIT_MOV(compiler, dst, dstw, src, srcw);
2002
0
      compiler->mode32 = 0;
2003
0
      break;
2004
3.89G
#endif /* SLJIT_CONFIG_X86_64 */
2005
3.89G
    }
2006
2007
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2008
    if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
2009
      return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
2010
#endif /* SLJIT_CONFIG_X86_32 */
2011
3.89G
    return SLJIT_SUCCESS;
2012
3.89G
  }
2013
2014
0
  switch (op) {
2015
0
  case SLJIT_CLZ:
2016
0
  case SLJIT_CTZ:
2017
0
    return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
2018
0
  case SLJIT_REV:
2019
0
  case SLJIT_REV_U16:
2020
0
  case SLJIT_REV_S16:
2021
0
  case SLJIT_REV_U32:
2022
0
  case SLJIT_REV_S32:
2023
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2024
    if (dst_is_ereg)
2025
      op |= SLJIT_32;
2026
#endif /* SLJIT_CONFIG_X86_32 */
2027
0
    return emit_bswap(compiler, op, dst, dstw, src, srcw);
2028
0
  }
2029
2030
0
  return SLJIT_SUCCESS;
2031
0
}
2032
2033
static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
2034
  sljit_u32 op_types,
2035
  sljit_s32 dst, sljit_sw dstw,
2036
  sljit_s32 src1, sljit_sw src1w,
2037
  sljit_s32 src2, sljit_sw src2w)
2038
743M
{
2039
743M
  sljit_u8* inst;
2040
743M
  sljit_u8 op_eax_imm = U8(op_types >> 24);
2041
743M
  sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
2042
743M
  sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
2043
743M
  sljit_u8 op_imm = U8(op_types & 0xff);
2044
2045
743M
  if (dst == src1 && dstw == src1w) {
2046
729M
    if (src2 == SLJIT_IMM) {
2047
699M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2048
699M
      if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2049
#else
2050
      if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
2051
#endif
2052
18.1M
        BINARY_EAX_IMM(op_eax_imm, src2w);
2053
18.1M
      }
2054
680M
      else {
2055
680M
        BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
2056
680M
      }
2057
699M
    }
2058
30.8M
    else if (FAST_IS_REG(dst)) {
2059
30.6M
      inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2060
30.6M
      FAIL_IF(!inst);
2061
30.6M
      *inst = op_rm;
2062
30.6M
    }
2063
199k
    else if (FAST_IS_REG(src2)) {
2064
      /* Special exception for sljit_emit_op_flags. */
2065
199k
      inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2066
199k
      FAIL_IF(!inst);
2067
199k
      *inst = op_mr;
2068
199k
    }
2069
0
    else {
2070
0
      EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2071
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2072
0
      FAIL_IF(!inst);
2073
0
      *inst = op_mr;
2074
0
    }
2075
729M
    return SLJIT_SUCCESS;
2076
729M
  }
2077
2078
  /* Only for cumulative operations. */
2079
13.3M
  if (dst == src2 && dstw == src2w) {
2080
0
    if (src1 == SLJIT_IMM) {
2081
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2082
0
      if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2083
#else
2084
      if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
2085
#endif
2086
0
        BINARY_EAX_IMM(op_eax_imm, src1w);
2087
0
      }
2088
0
      else {
2089
0
        BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
2090
0
      }
2091
0
    }
2092
0
    else if (FAST_IS_REG(dst)) {
2093
0
      inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
2094
0
      FAIL_IF(!inst);
2095
0
      *inst = op_rm;
2096
0
    }
2097
0
    else if (FAST_IS_REG(src1)) {
2098
0
      inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
2099
0
      FAIL_IF(!inst);
2100
0
      *inst = op_mr;
2101
0
    }
2102
0
    else {
2103
0
      EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2104
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2105
0
      FAIL_IF(!inst);
2106
0
      *inst = op_mr;
2107
0
    }
2108
0
    return SLJIT_SUCCESS;
2109
0
  }
2110
2111
  /* General version. */
2112
13.3M
  if (FAST_IS_REG(dst)) {
2113
13.3M
    EMIT_MOV(compiler, dst, 0, src1, src1w);
2114
13.3M
    if (src2 == SLJIT_IMM) {
2115
13.3M
      BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2116
13.3M
    }
2117
0
    else {
2118
0
      inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2119
0
      FAIL_IF(!inst);
2120
0
      *inst = op_rm;
2121
0
    }
2122
13.3M
  }
2123
0
  else {
2124
    /* This version requires less memory writing. */
2125
0
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2126
0
    if (src2 == SLJIT_IMM) {
2127
0
      BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2128
0
    }
2129
0
    else {
2130
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2131
0
      FAIL_IF(!inst);
2132
0
      *inst = op_rm;
2133
0
    }
2134
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2135
0
  }
2136
2137
13.3M
  return SLJIT_SUCCESS;
2138
13.3M
}
2139
2140
static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
2141
  sljit_u32 op_types,
2142
  sljit_s32 dst, sljit_sw dstw,
2143
  sljit_s32 src1, sljit_sw src1w,
2144
  sljit_s32 src2, sljit_sw src2w)
2145
508M
{
2146
508M
  sljit_u8* inst;
2147
508M
  sljit_u8 op_eax_imm = U8(op_types >> 24);
2148
508M
  sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
2149
508M
  sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
2150
508M
  sljit_u8 op_imm = U8(op_types & 0xff);
2151
2152
508M
  if (dst == src1 && dstw == src1w) {
2153
481M
    if (src2 == SLJIT_IMM) {
2154
473M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2155
473M
      if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2156
#else
2157
      if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
2158
#endif
2159
32.1M
        BINARY_EAX_IMM(op_eax_imm, src2w);
2160
32.1M
      }
2161
440M
      else {
2162
440M
        BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
2163
440M
      }
2164
473M
    }
2165
8.13M
    else if (FAST_IS_REG(dst)) {
2166
8.13M
      inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2167
8.13M
      FAIL_IF(!inst);
2168
8.13M
      *inst = op_rm;
2169
8.13M
    }
2170
0
    else if (FAST_IS_REG(src2)) {
2171
0
      inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2172
0
      FAIL_IF(!inst);
2173
0
      *inst = op_mr;
2174
0
    }
2175
0
    else {
2176
0
      EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2177
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2178
0
      FAIL_IF(!inst);
2179
0
      *inst = op_mr;
2180
0
    }
2181
481M
    return SLJIT_SUCCESS;
2182
481M
  }
2183
2184
  /* General version. */
2185
27.7M
  if (FAST_IS_REG(dst) && dst != src2) {
2186
27.7M
    EMIT_MOV(compiler, dst, 0, src1, src1w);
2187
27.7M
    if (src2 == SLJIT_IMM) {
2188
21.2M
      BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2189
21.2M
    }
2190
6.44M
    else {
2191
6.44M
      inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2192
6.44M
      FAIL_IF(!inst);
2193
6.44M
      *inst = op_rm;
2194
6.44M
    }
2195
27.7M
  }
2196
0
  else {
2197
    /* This version requires less memory writing. */
2198
0
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2199
0
    if (src2 == SLJIT_IMM) {
2200
0
      BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2201
0
    }
2202
0
    else {
2203
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2204
0
      FAIL_IF(!inst);
2205
0
      *inst = op_rm;
2206
0
    }
2207
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2208
0
  }
2209
2210
27.7M
  return SLJIT_SUCCESS;
2211
27.7M
}
2212
2213
static sljit_s32 emit_mul(struct sljit_compiler *compiler,
2214
  sljit_s32 dst, sljit_sw dstw,
2215
  sljit_s32 src1, sljit_sw src1w,
2216
  sljit_s32 src2, sljit_sw src2w)
2217
0
{
2218
0
  sljit_u8* inst;
2219
0
  sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2220
2221
  /* Register destination. */
2222
0
  if (dst_r == src1 && src2 != SLJIT_IMM) {
2223
0
    FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2224
0
  } else if (dst_r == src2 && src1 != SLJIT_IMM) {
2225
0
    FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));
2226
0
  } else if (src1 == SLJIT_IMM) {
2227
0
    if (src2 == SLJIT_IMM) {
2228
0
      EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
2229
0
      src2 = dst_r;
2230
0
      src2w = 0;
2231
0
    }
2232
2233
0
    if (src1w <= 127 && src1w >= -128) {
2234
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2235
0
      FAIL_IF(!inst);
2236
0
      *inst = IMUL_r_rm_i8;
2237
2238
0
      FAIL_IF(emit_byte(compiler, U8(src1w)));
2239
0
    }
2240
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2241
    else {
2242
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2243
      FAIL_IF(!inst);
2244
      *inst = IMUL_r_rm_i32;
2245
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2246
      FAIL_IF(!inst);
2247
      INC_SIZE(4);
2248
      sljit_unaligned_store_sw(inst, src1w);
2249
    }
2250
#else
2251
0
    else if (IS_HALFWORD(src1w)) {
2252
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2253
0
      FAIL_IF(!inst);
2254
0
      *inst = IMUL_r_rm_i32;
2255
0
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2256
0
      FAIL_IF(!inst);
2257
0
      INC_SIZE(4);
2258
0
      sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
2259
0
    }
2260
0
    else {
2261
0
      if (dst_r != src2)
2262
0
        EMIT_MOV(compiler, dst_r, 0, src2, src2w);
2263
0
      FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
2264
0
      FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2265
0
    }
2266
0
#endif
2267
0
  }
2268
0
  else if (src2 == SLJIT_IMM) {
2269
    /* Note: src1 is NOT immediate. */
2270
2271
0
    if (src2w <= 127 && src2w >= -128) {
2272
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2273
0
      FAIL_IF(!inst);
2274
0
      *inst = IMUL_r_rm_i8;
2275
2276
0
      FAIL_IF(emit_byte(compiler, U8(src2w)));
2277
0
    }
2278
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2279
    else {
2280
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2281
      FAIL_IF(!inst);
2282
      *inst = IMUL_r_rm_i32;
2283
2284
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2285
      FAIL_IF(!inst);
2286
      INC_SIZE(4);
2287
      sljit_unaligned_store_sw(inst, src2w);
2288
    }
2289
#else
2290
0
    else if (IS_HALFWORD(src2w)) {
2291
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2292
0
      FAIL_IF(!inst);
2293
0
      *inst = IMUL_r_rm_i32;
2294
2295
0
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2296
0
      FAIL_IF(!inst);
2297
0
      INC_SIZE(4);
2298
0
      sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
2299
0
    } else {
2300
0
      if (dst_r != src1)
2301
0
        EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2302
0
      FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2303
0
      FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2304
0
    }
2305
0
#endif
2306
0
  } else {
2307
    /* Neither argument is immediate. */
2308
0
    if (ADDRESSING_DEPENDS_ON(src2, dst_r))
2309
0
      dst_r = TMP_REG1;
2310
0
    EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2311
0
    FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2312
0
  }
2313
2314
0
  if (dst & SLJIT_MEM)
2315
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2316
2317
0
  return SLJIT_SUCCESS;
2318
0
}
2319
2320
static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
2321
  sljit_s32 dst, sljit_sw dstw,
2322
  sljit_s32 src1, sljit_sw src1w,
2323
  sljit_s32 src2, sljit_sw src2w)
2324
1.19G
{
2325
1.19G
  sljit_u8* inst;
2326
1.19G
  sljit_s32 dst_r, done = 0;
2327
2328
  /* These cases better be left to handled by normal way. */
2329
1.19G
  if (dst == src1 && dstw == src1w)
2330
1.04G
    return SLJIT_ERR_UNSUPPORTED;
2331
150M
  if (dst == src2 && dstw == src2w)
2332
0
    return SLJIT_ERR_UNSUPPORTED;
2333
2334
150M
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2335
2336
150M
  if (FAST_IS_REG(src1)) {
2337
128M
    if (FAST_IS_REG(src2)) {
2338
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
2339
0
      FAIL_IF(!inst);
2340
0
      *inst = LEA_r_m;
2341
0
      done = 1;
2342
0
    }
2343
128M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2344
128M
    if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {
2345
128M
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
2346
#else
2347
    if (src2 == SLJIT_IMM) {
2348
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
2349
#endif
2350
128M
      FAIL_IF(!inst);
2351
128M
      *inst = LEA_r_m;
2352
128M
      done = 1;
2353
128M
    }
2354
128M
  }
2355
21.7M
  else if (FAST_IS_REG(src2)) {
2356
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2357
0
    if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {
2358
0
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
2359
#else
2360
    if (src1 == SLJIT_IMM) {
2361
      inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
2362
#endif
2363
0
      FAIL_IF(!inst);
2364
0
      *inst = LEA_r_m;
2365
0
      done = 1;
2366
0
    }
2367
0
  }
2368
2369
150M
  if (done) {
2370
128M
    if (dst_r == TMP_REG1)
2371
1.75M
      return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2372
127M
    return SLJIT_SUCCESS;
2373
128M
  }
2374
21.7M
  return SLJIT_ERR_UNSUPPORTED;
2375
150M
}
2376
2377
static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
2378
  sljit_s32 src1, sljit_sw src1w,
2379
  sljit_s32 src2, sljit_sw src2w)
2380
2.71G
{
2381
2.71G
  sljit_u8* inst;
2382
2383
2.71G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2384
2.71G
  if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2385
#else
2386
  if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2387
#endif
2388
1.12G
    BINARY_EAX_IMM(CMP_EAX_i32, src2w);
2389
1.12G
    return SLJIT_SUCCESS;
2390
1.12G
  }
2391
2392
1.59G
  if (FAST_IS_REG(src1)) {
2393
1.58G
    if (src2 == SLJIT_IMM) {
2394
829M
      BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2395
829M
    }
2396
753M
    else {
2397
753M
      inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2398
753M
      FAIL_IF(!inst);
2399
753M
      *inst = CMP_r_rm;
2400
753M
    }
2401
1.58G
    return SLJIT_SUCCESS;
2402
1.58G
  }
2403
2404
15.4M
  if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {
2405
12.8M
    inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2406
12.8M
    FAIL_IF(!inst);
2407
12.8M
    *inst = CMP_rm_r;
2408
12.8M
    return SLJIT_SUCCESS;
2409
12.8M
  }
2410
2411
2.67M
  if (src2 == SLJIT_IMM) {
2412
2.57M
    if (src1 == SLJIT_IMM) {
2413
0
      EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2414
0
      src1 = TMP_REG1;
2415
0
      src1w = 0;
2416
0
    }
2417
2.57M
    BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2418
2.57M
  }
2419
99.4k
  else {
2420
99.4k
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2421
99.4k
    inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2422
99.4k
    FAIL_IF(!inst);
2423
99.4k
    *inst = CMP_r_rm;
2424
99.4k
  }
2425
2.67M
  return SLJIT_SUCCESS;
2426
2.67M
}
2427
2428
static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2429
  sljit_s32 src1, sljit_sw src1w,
2430
  sljit_s32 src2, sljit_sw src2w)
2431
51.5M
{
2432
51.5M
  sljit_u8* inst;
2433
2434
51.5M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2435
51.5M
  if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2436
#else
2437
  if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2438
#endif
2439
2.90M
    BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2440
2.90M
    return SLJIT_SUCCESS;
2441
2.90M
  }
2442
2443
48.6M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2444
48.6M
  if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2445
#else
2446
  if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {
2447
#endif
2448
0
    BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2449
0
    return SLJIT_SUCCESS;
2450
0
  }
2451
2452
48.6M
  if (src1 != SLJIT_IMM) {
2453
48.6M
    if (src2 == SLJIT_IMM) {
2454
42.1M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2455
42.1M
      if (IS_HALFWORD(src2w) || compiler->mode32) {
2456
42.1M
        inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2457
42.1M
        FAIL_IF(!inst);
2458
42.1M
        *inst = GROUP_F7;
2459
42.1M
      } else {
2460
0
        FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w));
2461
0
        inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w);
2462
0
        FAIL_IF(!inst);
2463
0
        *inst = TEST_rm_r;
2464
0
      }
2465
#else
2466
      inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2467
      FAIL_IF(!inst);
2468
      *inst = GROUP_F7;
2469
#endif
2470
42.1M
      return SLJIT_SUCCESS;
2471
42.1M
    }
2472
6.52M
    else if (FAST_IS_REG(src1)) {
2473
6.52M
      inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2474
6.52M
      FAIL_IF(!inst);
2475
6.52M
      *inst = TEST_rm_r;
2476
6.52M
      return SLJIT_SUCCESS;
2477
6.52M
    }
2478
48.6M
  }
2479
2480
0
  if (src2 != SLJIT_IMM) {
2481
0
    if (src1 == SLJIT_IMM) {
2482
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2483
0
      if (IS_HALFWORD(src1w) || compiler->mode32) {
2484
0
        inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2485
0
        FAIL_IF(!inst);
2486
0
        *inst = GROUP_F7;
2487
0
      }
2488
0
      else {
2489
0
        FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2490
0
        inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2491
0
        FAIL_IF(!inst);
2492
0
        *inst = TEST_rm_r;
2493
0
      }
2494
#else
2495
      inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2496
      FAIL_IF(!inst);
2497
      *inst = GROUP_F7;
2498
#endif
2499
0
      return SLJIT_SUCCESS;
2500
0
    }
2501
0
    else if (FAST_IS_REG(src2)) {
2502
0
      inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2503
0
      FAIL_IF(!inst);
2504
0
      *inst = TEST_rm_r;
2505
0
      return SLJIT_SUCCESS;
2506
0
    }
2507
0
  }
2508
2509
0
  EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2510
0
  if (src2 == SLJIT_IMM) {
2511
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2512
0
    if (IS_HALFWORD(src2w) || compiler->mode32) {
2513
0
      inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2514
0
      FAIL_IF(!inst);
2515
0
      *inst = GROUP_F7;
2516
0
    }
2517
0
    else {
2518
0
      FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2519
0
      inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2520
0
      FAIL_IF(!inst);
2521
0
      *inst = TEST_rm_r;
2522
0
    }
2523
#else
2524
    inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2525
    FAIL_IF(!inst);
2526
    *inst = GROUP_F7;
2527
#endif
2528
0
  }
2529
0
  else {
2530
0
    inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2531
0
    FAIL_IF(!inst);
2532
0
    *inst = TEST_rm_r;
2533
0
  }
2534
0
  return SLJIT_SUCCESS;
2535
0
}
2536
2537
static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2538
  sljit_u8 mode,
2539
  sljit_s32 dst, sljit_sw dstw,
2540
  sljit_s32 src1, sljit_sw src1w,
2541
  sljit_s32 src2, sljit_sw src2w)
2542
68.1M
{
2543
68.1M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2544
68.1M
  sljit_s32 mode32;
2545
68.1M
#endif
2546
68.1M
  sljit_u8* inst;
2547
2548
68.1M
  if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {
2549
68.1M
    if (dst == src1 && dstw == src1w) {
2550
44.5M
      inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2551
44.5M
      FAIL_IF(!inst);
2552
44.5M
      inst[1] |= mode;
2553
44.5M
      return SLJIT_SUCCESS;
2554
44.5M
    }
2555
23.5M
    if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2556
6.49M
      EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2557
6.49M
      inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2558
6.49M
      FAIL_IF(!inst);
2559
6.49M
      inst[1] |= mode;
2560
6.49M
      EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2561
6.49M
      return SLJIT_SUCCESS;
2562
6.49M
    }
2563
17.0M
    if (FAST_IS_REG(dst)) {
2564
17.0M
      EMIT_MOV(compiler, dst, 0, src1, src1w);
2565
17.0M
      inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2566
17.0M
      FAIL_IF(!inst);
2567
17.0M
      inst[1] |= mode;
2568
17.0M
      return SLJIT_SUCCESS;
2569
17.0M
    }
2570
2571
0
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2572
0
    inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2573
0
    FAIL_IF(!inst);
2574
0
    inst[1] |= mode;
2575
0
    EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2576
0
    return SLJIT_SUCCESS;
2577
0
  }
2578
2579
12.5k
  if (dst == SLJIT_PREF_SHIFT_REG) {
2580
12.5k
    EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2581
12.5k
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2582
12.5k
    inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2583
12.5k
    FAIL_IF(!inst);
2584
12.5k
    inst[1] |= mode;
2585
12.5k
    return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2586
12.5k
  }
2587
2588
0
  if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2589
0
    if (src1 != dst)
2590
0
      EMIT_MOV(compiler, dst, 0, src1, src1w);
2591
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2592
0
    mode32 = compiler->mode32;
2593
0
    compiler->mode32 = 0;
2594
0
#endif
2595
0
    EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2596
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2597
0
    compiler->mode32 = mode32;
2598
0
#endif
2599
0
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2600
0
    inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2601
0
    FAIL_IF(!inst);
2602
0
    inst[1] |= mode;
2603
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2604
0
    compiler->mode32 = 0;
2605
0
#endif
2606
0
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2607
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2608
0
    compiler->mode32 = mode32;
2609
0
#endif
2610
0
    return SLJIT_SUCCESS;
2611
0
  }
2612
2613
  /* This case is complex since ecx itself may be used for
2614
     addressing, and this case must be supported as well. */
2615
0
  EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2616
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2617
  EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2618
#else /* !SLJIT_CONFIG_X86_32 */
2619
0
  mode32 = compiler->mode32;
2620
0
  compiler->mode32 = 0;
2621
0
  EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2622
0
  compiler->mode32 = mode32;
2623
0
#endif /* SLJIT_CONFIG_X86_32 */
2624
2625
0
  EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2626
0
  inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2627
0
  FAIL_IF(!inst);
2628
0
  inst[1] |= mode;
2629
2630
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2631
  EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2632
#else
2633
0
  compiler->mode32 = 0;
2634
0
  EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2635
0
  compiler->mode32 = mode32;
2636
0
#endif /* SLJIT_CONFIG_X86_32 */
2637
2638
0
  if (dst != TMP_REG1)
2639
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2640
2641
0
  return SLJIT_SUCCESS;
2642
0
}
2643
2644
static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2645
  sljit_u8 mode, sljit_s32 set_flags,
2646
  sljit_s32 dst, sljit_sw dstw,
2647
  sljit_s32 src1, sljit_sw src1w,
2648
  sljit_s32 src2, sljit_sw src2w)
2649
68.1M
{
2650
  /* The CPU does not set flags if the shift count is 0. */
2651
68.1M
  if (src2 == SLJIT_IMM) {
2652
54.2M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2653
54.2M
    src2w &= compiler->mode32 ? 0x1f : 0x3f;
2654
#else /* !SLJIT_CONFIG_X86_64 */
2655
    src2w &= 0x1f;
2656
#endif /* SLJIT_CONFIG_X86_64 */
2657
54.2M
    if (src2w != 0)
2658
54.2M
      return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2659
2660
0
    if (!set_flags)
2661
0
      return emit_mov(compiler, dst, dstw, src1, src1w);
2662
    /* OR dst, src, 0 */
2663
0
    return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2664
0
      dst, dstw, src1, src1w, SLJIT_IMM, 0);
2665
0
  }
2666
2667
13.8M
  if (!set_flags)
2668
13.8M
    return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2669
2670
0
  if (!FAST_IS_REG(dst))
2671
0
    FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2672
2673
0
  FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2674
2675
0
  if (FAST_IS_REG(dst))
2676
0
    return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2677
0
  return SLJIT_SUCCESS;
2678
0
}
2679
2680
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2681
  sljit_s32 dst, sljit_sw dstw,
2682
  sljit_s32 src1, sljit_sw src1w,
2683
  sljit_s32 src2, sljit_sw src2w)
2684
1.34G
{
2685
1.34G
  CHECK_ERROR();
2686
1.34G
  CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2687
1.34G
  ADJUST_LOCAL_OFFSET(dst, dstw);
2688
1.34G
  ADJUST_LOCAL_OFFSET(src1, src1w);
2689
1.34G
  ADJUST_LOCAL_OFFSET(src2, src2w);
2690
2691
1.34G
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
2692
1.34G
  CHECK_EXTRA_REGS(src1, src1w, (void)0);
2693
1.34G
  CHECK_EXTRA_REGS(src2, src2w, (void)0);
2694
1.34G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2695
1.34G
  compiler->mode32 = op & SLJIT_32;
2696
1.34G
#endif
2697
2698
1.34G
  switch (GET_OPCODE(op)) {
2699
685M
  case SLJIT_ADD:
2700
685M
    if (!HAS_FLAGS(op)) {
2701
685M
      if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2702
14.7M
        return compiler->error;
2703
685M
    }
2704
670M
    return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2705
670M
      dst, dstw, src1, src1w, src2, src2w);
2706
0
  case SLJIT_ADDC:
2707
0
    return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2708
0
      dst, dstw, src1, src1w, src2, src2w);
2709
515M
  case SLJIT_SUB:
2710
515M
    if (src1 == SLJIT_IMM && src1w == 0)
2711
0
      return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2712
2713
515M
    if (!HAS_FLAGS(op)) {
2714
413M
      if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2715
6.99M
        return compiler->error;
2716
406M
      if (FAST_IS_REG(dst) && src2 == dst) {
2717
1.76M
        FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2718
1.76M
        return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2719
1.76M
      }
2720
406M
    }
2721
2722
507M
    return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2723
507M
      dst, dstw, src1, src1w, src2, src2w);
2724
0
  case SLJIT_SUBC:
2725
0
    return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2726
0
      dst, dstw, src1, src1w, src2, src2w);
2727
0
  case SLJIT_MUL:
2728
0
    return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2729
28.5M
  case SLJIT_AND:
2730
28.5M
    return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2731
28.5M
      dst, dstw, src1, src1w, src2, src2w);
2732
43.6M
  case SLJIT_OR:
2733
43.6M
    return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2734
43.6M
      dst, dstw, src1, src1w, src2, src2w);
2735
472k
  case SLJIT_XOR:
2736
472k
    if (!HAS_FLAGS(op)) {
2737
447k
      if (src2 == SLJIT_IMM && src2w == -1)
2738
0
        return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);
2739
447k
      if (src1 == SLJIT_IMM && src1w == -1)
2740
0
        return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);
2741
447k
    }
2742
2743
472k
    return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2744
472k
      dst, dstw, src1, src1w, src2, src2w);
2745
50.9M
  case SLJIT_SHL:
2746
50.9M
  case SLJIT_MSHL:
2747
50.9M
    return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2748
50.9M
      dst, dstw, src1, src1w, src2, src2w);
2749
16.7M
  case SLJIT_LSHR:
2750
16.7M
  case SLJIT_MLSHR:
2751
16.7M
    return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2752
16.7M
      dst, dstw, src1, src1w, src2, src2w);
2753
404k
  case SLJIT_ASHR:
2754
404k
  case SLJIT_MASHR:
2755
404k
    return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2756
404k
      dst, dstw, src1, src1w, src2, src2w);
2757
0
  case SLJIT_ROTL:
2758
0
    return emit_shift_with_flags(compiler, ROL, 0,
2759
0
      dst, dstw, src1, src1w, src2, src2w);
2760
0
  case SLJIT_ROTR:
2761
0
    return emit_shift_with_flags(compiler, ROR, 0,
2762
0
      dst, dstw, src1, src1w, src2, src2w);
2763
1.34G
  }
2764
2765
0
  return SLJIT_SUCCESS;
2766
1.34G
}
2767
2768
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2769
  sljit_s32 src1, sljit_sw src1w,
2770
  sljit_s32 src2, sljit_sw src2w)
2771
2.77G
{
2772
2.77G
  sljit_s32 opcode = GET_OPCODE(op);
2773
2774
2.77G
  CHECK_ERROR();
2775
2.77G
  CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2776
2777
2.77G
  if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2778
0
    SLJIT_SKIP_CHECKS(compiler);
2779
0
    return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2780
0
  }
2781
2782
2.77G
  ADJUST_LOCAL_OFFSET(src1, src1w);
2783
2.77G
  ADJUST_LOCAL_OFFSET(src2, src2w);
2784
2785
2.77G
  CHECK_EXTRA_REGS(src1, src1w, (void)0);
2786
2.77G
  CHECK_EXTRA_REGS(src2, src2w, (void)0);
2787
2.77G
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2788
2.77G
  compiler->mode32 = op & SLJIT_32;
2789
2.77G
#endif
2790
2791
2.77G
  if (opcode == SLJIT_SUB)
2792
2.71G
    return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2793
2794
51.5M
  return emit_test_binary(compiler, src1, src1w, src2, src2w);
2795
2.77G
}
2796
2797
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2798
  sljit_s32 dst_reg,
2799
  sljit_s32 src1, sljit_sw src1w,
2800
  sljit_s32 src2, sljit_sw src2w)
2801
0
{
2802
0
  sljit_u8* inst;
2803
0
  sljit_sw dstw = 0;
2804
0
2805
0
  CHECK_ERROR();
2806
0
  CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2807
0
  ADJUST_LOCAL_OFFSET(src1, src1w);
2808
0
  ADJUST_LOCAL_OFFSET(src2, src2w);
2809
0
2810
0
  CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2811
0
  CHECK_EXTRA_REGS(src1, src1w, (void)0);
2812
0
  CHECK_EXTRA_REGS(src2, src2w, (void)0);
2813
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2814
0
  compiler->mode32 = op & SLJIT_32;
2815
0
#endif
2816
0
2817
0
  switch (GET_OPCODE(op)) {
2818
0
  case SLJIT_MULADD:
2819
0
    FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w));
2820
0
    inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw);
2821
0
    FAIL_IF(!inst);
2822
0
    *inst = ADD_rm_r;
2823
0
    return SLJIT_SUCCESS;
2824
0
  }
2825
0
2826
0
  return SLJIT_SUCCESS;
2827
0
}
2828
2829
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2830
  sljit_s32 dst_reg,
2831
  sljit_s32 src1_reg,
2832
  sljit_s32 src2_reg,
2833
  sljit_s32 src3, sljit_sw src3w)
2834
0
{
2835
0
  sljit_s32 is_rotate, is_left, move_src1;
2836
0
  sljit_u8* inst;
2837
0
  sljit_sw src1w = 0;
2838
0
  sljit_sw dstw = 0;
2839
0
  /* The whole register must be saved even for 32 bit operations. */
2840
0
  sljit_u8 restore_ecx = 0;
2841
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2842
0
  sljit_sw src2w = 0;
2843
0
  sljit_s32 restore_sp4 = 0;
2844
0
#endif /* SLJIT_CONFIG_X86_32 */
2845
0
2846
0
  CHECK_ERROR();
2847
0
  CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2848
0
  ADJUST_LOCAL_OFFSET(src3, src3w);
2849
0
2850
0
  CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2851
0
  CHECK_EXTRA_REGS(src3, src3w, (void)0);
2852
0
2853
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2854
0
  compiler->mode32 = op & SLJIT_32;
2855
0
#endif /* SLJIT_CONFIG_X86_64 */
2856
0
2857
0
  if (src3 == SLJIT_IMM) {
2858
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2859
0
    src3w &= 0x1f;
2860
0
#else /* !SLJIT_CONFIG_X86_32 */
2861
0
    src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;
2862
0
#endif /* SLJIT_CONFIG_X86_32 */
2863
0
2864
0
    if (src3w == 0)
2865
0
      return SLJIT_SUCCESS;
2866
0
  }
2867
0
2868
0
  is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2869
0
2870
0
  is_rotate = (src1_reg == src2_reg);
2871
0
  CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);
2872
0
  CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
2873
0
2874
0
  if (is_rotate)
2875
0
    return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);
2876
0
2877
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2878
0
  if (src2_reg & SLJIT_MEM) {
2879
0
    EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
2880
0
    src2_reg = TMP_REG1;
2881
0
  }
2882
0
#endif /* SLJIT_CONFIG_X86_32 */
2883
0
2884
0
  if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
2885
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2886
0
    EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2887
0
    src1_reg = TMP_REG1;
2888
0
    src1w = 0;
2889
0
#else /* !SLJIT_CONFIG_X86_64 */
2890
0
    if (src2_reg != TMP_REG1) {
2891
0
      EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2892
0
      src1_reg = TMP_REG1;
2893
0
      src1w = 0;
2894
0
    } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2895
0
      restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2896
0
      EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2897
0
      EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2898
0
      src1_reg = restore_sp4;
2899
0
      src1w = 0;
2900
0
    } else {
2901
0
      EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2902
0
      restore_sp4 = src1_reg;
2903
0
    }
2904
0
#endif /* SLJIT_CONFIG_X86_64 */
2905
0
2906
0
    if (src3 != SLJIT_PREF_SHIFT_REG)
2907
0
      EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2908
0
  } else {
2909
0
    if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2910
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2911
0
      compiler->mode32 = 0;
2912
0
#endif /* SLJIT_CONFIG_X86_64 */
2913
0
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2914
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2915
0
      compiler->mode32 = op & SLJIT_32;
2916
0
#endif /* SLJIT_CONFIG_X86_64 */
2917
0
      src2_reg = TMP_REG1;
2918
0
      restore_ecx = 1;
2919
0
    }
2920
0
2921
0
    move_src1 = 0;
2922
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2923
0
    if (dst_reg != src1_reg) {
2924
0
      if (dst_reg != src3) {
2925
0
        EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2926
0
        src1_reg = dst_reg;
2927
0
        src1w = 0;
2928
0
      } else
2929
0
        move_src1 = 1;
2930
0
    }
2931
0
#else /* !SLJIT_CONFIG_X86_64 */
2932
0
    if (dst_reg & SLJIT_MEM) {
2933
0
      if (src2_reg != TMP_REG1) {
2934
0
        EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2935
0
        src1_reg = TMP_REG1;
2936
0
        src1w = 0;
2937
0
      } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2938
0
        restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2939
0
        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2940
0
        EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2941
0
        src1_reg = restore_sp4;
2942
0
        src1w = 0;
2943
0
      } else {
2944
0
        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2945
0
        restore_sp4 = src1_reg;
2946
0
      }
2947
0
    } else if (dst_reg != src1_reg) {
2948
0
      if (dst_reg != src3) {
2949
0
        EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2950
0
        src1_reg = dst_reg;
2951
0
        src1w = 0;
2952
0
      } else
2953
0
        move_src1 = 1;
2954
0
    }
2955
0
#endif /* SLJIT_CONFIG_X86_64 */
2956
0
2957
0
    if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2958
0
      if (!restore_ecx) {
2959
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2960
0
        compiler->mode32 = 0;
2961
0
        EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2962
0
        compiler->mode32 = op & SLJIT_32;
2963
0
        restore_ecx = 1;
2964
0
#else /* !SLJIT_CONFIG_X86_64 */
2965
0
        if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {
2966
0
          EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2967
0
          restore_ecx = 1;
2968
0
        } else {
2969
0
          EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2970
0
          restore_ecx = 2;
2971
0
        }
2972
0
#endif /* SLJIT_CONFIG_X86_64 */
2973
0
      }
2974
0
      EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2975
0
    }
2976
0
2977
0
    if (move_src1) {
2978
0
      EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2979
0
      src1_reg = dst_reg;
2980
0
      src1w = 0;
2981
0
    }
2982
0
  }
2983
0
2984
0
  inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);
2985
0
  FAIL_IF(!inst);
2986
0
  inst[0] = GROUP_0F;
2987
0
2988
0
  if (src3 == SLJIT_IMM) {
2989
0
    inst[1] = U8((is_left ? SHLD : SHRD) - 1);
2990
0
2991
0
    /* Immediate argument is added separately. */
2992
0
    FAIL_IF(emit_byte(compiler, U8(src3w)));
2993
0
  } else
2994
0
    inst[1] = U8(is_left ? SHLD : SHRD);
2995
0
2996
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2997
0
  if (restore_ecx) {
2998
0
    compiler->mode32 = 0;
2999
0
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
3000
0
  }
3001
0
3002
0
  if (src1_reg != dst_reg) {
3003
0
    compiler->mode32 = op & SLJIT_32;
3004
0
    return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);
3005
0
  }
3006
0
#else /* !SLJIT_CONFIG_X86_64 */
3007
0
  if (restore_ecx)
3008
0
    EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);
3009
0
3010
0
  if (src1_reg != dst_reg)
3011
0
    EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);
3012
0
3013
0
  if (restore_sp4)
3014
0
    return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));
3015
0
#endif /* SLJIT_CONFIG_X86_32 */
3016
0
3017
0
  return SLJIT_SUCCESS;
3018
0
}
3019
3020
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2_shift(struct sljit_compiler *compiler, sljit_s32 op,
3021
  sljit_s32 dst, sljit_sw dstw,
3022
  sljit_s32 src1, sljit_sw src1w,
3023
  sljit_s32 src2, sljit_sw src2w,
3024
  sljit_sw shift_arg)
3025
15.6M
{
3026
15.6M
  sljit_s32 dst_r;
3027
15.6M
  int use_lea = 0;
3028
15.6M
  sljit_u8* inst;
3029
3030
15.6M
  CHECK_ERROR();
3031
15.6M
  CHECK(check_sljit_emit_op2_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w, shift_arg));
3032
15.6M
  ADJUST_LOCAL_OFFSET(dst, dstw);
3033
15.6M
  ADJUST_LOCAL_OFFSET(src1, src1w);
3034
15.6M
  ADJUST_LOCAL_OFFSET(src2, src2w);
3035
3036
15.6M
  shift_arg &= (sljit_sw)((sizeof(sljit_sw) * 8) - 1);
3037
3038
15.6M
  if (src2 == SLJIT_IMM) {
3039
0
    src2w = src2w << shift_arg;
3040
0
    shift_arg = 0;
3041
0
  }
3042
3043
15.6M
  if (shift_arg == 0) {
3044
0
    SLJIT_SKIP_CHECKS(compiler);
3045
0
    return sljit_emit_op2(compiler, GET_OPCODE(op), dst, dstw, src1, src1w, src2, src2w);
3046
0
  }
3047
3048
15.6M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
3049
15.6M
  CHECK_EXTRA_REGS(src1, src1w, (void)0);
3050
15.6M
  CHECK_EXTRA_REGS(src2, src2w, (void)0);
3051
3052
15.6M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3053
15.6M
  compiler->mode32 = 0;
3054
15.6M
#endif
3055
3056
15.6M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3057
15.6M
  if (shift_arg <= 3) {
3058
7.81M
    use_lea = 1;
3059
7.81M
    if (!FAST_IS_REG(src2)) {
3060
0
      EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
3061
0
      src2 = TMP_REG1;
3062
0
    }
3063
3064
7.81M
    if (!FAST_IS_REG(src1)) {
3065
0
      EMIT_MOV(compiler, src2 == TMP_REG1 ? TMP_REG2 : TMP_REG1, 0, src1, src1w);
3066
0
      src1 = src2 == TMP_REG1 ? TMP_REG2 : TMP_REG1;
3067
0
    }
3068
7.81M
  }
3069
#else /* !SLJIT_CONFIG_X86_64 */
3070
  if (shift_arg <= 3 && (FAST_IS_REG(src1) || (FAST_IS_REG(src2) && src2 != TMP_REG1))) {
3071
    use_lea = 1;
3072
    if (!FAST_IS_REG(src2)) {
3073
      EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
3074
      src2 = TMP_REG1;
3075
    }
3076
3077
    if (!FAST_IS_REG(src1)) {
3078
      EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
3079
      src1 = TMP_REG1;
3080
    }
3081
  }
3082
#endif /* SLJIT_CONFIG_X86_64 */
3083
3084
15.6M
  if (use_lea) {
3085
7.81M
    dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3086
3087
7.81M
    inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), shift_arg);
3088
7.81M
    FAIL_IF(!inst);
3089
7.81M
    *inst = LEA_r_m;
3090
3091
7.81M
    if (!FAST_IS_REG(dst))
3092
0
      return emit_mov(compiler, dst, dstw, dst_r, 0);
3093
3094
7.81M
    return SLJIT_SUCCESS;
3095
7.81M
  }
3096
3097
7.82M
  if ((op & SLJIT_SRC2_UNDEFINED) != 0 && FAST_IS_REG(src2) && src1 != src2)
3098
7.82M
    dst_r = src2;
3099
0
  else {
3100
0
    dst_r = FAST_IS_REG(dst) && (dst != src1) ? dst : TMP_REG1;
3101
3102
0
    if (src2 != dst_r) {
3103
0
      EMIT_MOV(compiler, dst_r, 0, src2, src2w);
3104
0
    }
3105
0
  }
3106
3107
7.82M
  inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, shift_arg, dst_r, 0);
3108
7.82M
  FAIL_IF(!inst);
3109
7.82M
  inst[1] |= SHL;
3110
3111
7.82M
  if (dst == src1 && dstw == src1w) {
3112
7.82M
    inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
3113
7.82M
    FAIL_IF(!inst);
3114
7.82M
    *inst = ADD_rm_r;
3115
7.82M
    return SLJIT_SUCCESS;
3116
7.82M
  }
3117
3118
0
  if (FAST_IS_REG(dst) && FAST_IS_REG(src1)) {
3119
0
    inst = emit_x86_instruction(compiler, 1, dst, 0, SLJIT_MEM2(src1, dst_r), 0);
3120
0
    FAIL_IF(!inst);
3121
0
    *inst = LEA_r_m;
3122
0
    return SLJIT_SUCCESS;
3123
0
  }
3124
3125
0
  if (src1 == SLJIT_IMM) {
3126
0
    BINARY_IMM(ADD, ADD_rm_r, src1w, dst_r, 0);
3127
0
  } else {
3128
0
    inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
3129
0
    FAIL_IF(!inst);
3130
0
    *inst = ADD_r_rm;
3131
0
  }
3132
3133
0
  if (dst != dst_r)
3134
0
    return emit_mov(compiler, dst, dstw, dst_r, 0);
3135
3136
0
  return SLJIT_SUCCESS;
3137
0
}
3138
3139
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
3140
  sljit_s32 src, sljit_sw srcw)
3141
1.38M
{
3142
1.38M
  CHECK_ERROR();
3143
1.38M
  CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3144
1.38M
  ADJUST_LOCAL_OFFSET(src, srcw);
3145
3146
1.38M
  CHECK_EXTRA_REGS(src, srcw, (void)0);
3147
3148
1.38M
  switch (op) {
3149
1.38M
  case SLJIT_FAST_RETURN:
3150
1.38M
    return emit_fast_return(compiler, src, srcw);
3151
0
  case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3152
    /* Don't adjust shadow stack if it isn't enabled.  */
3153
0
    if (!cpu_has_shadow_stack ())
3154
0
      return SLJIT_SUCCESS;
3155
0
    return adjust_shadow_stack(compiler, src, srcw);
3156
0
  case SLJIT_PREFETCH_L1:
3157
0
  case SLJIT_PREFETCH_L2:
3158
0
  case SLJIT_PREFETCH_L3:
3159
0
  case SLJIT_PREFETCH_ONCE:
3160
0
    return emit_prefetch(compiler, op, src, srcw);
3161
1.38M
  }
3162
3163
0
  return SLJIT_SUCCESS;
3164
1.38M
}
3165
3166
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
3167
  sljit_s32 dst, sljit_sw dstw)
3168
1.30M
{
3169
1.30M
  CHECK_ERROR();
3170
1.30M
  CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3171
1.30M
  ADJUST_LOCAL_OFFSET(dst, dstw);
3172
3173
1.30M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
3174
3175
1.30M
  switch (op) {
3176
1.30M
  case SLJIT_FAST_ENTER:
3177
1.30M
    return emit_fast_enter(compiler, dst, dstw);
3178
0
  case SLJIT_GET_RETURN_ADDRESS:
3179
0
    return sljit_emit_get_return_address(compiler, dst, dstw);
3180
1.30M
  }
3181
3182
0
  return SLJIT_SUCCESS;
3183
1.30M
}
3184
3185
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3186
7.01M
{
3187
7.01M
  CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3188
3189
7.01M
  if (type == SLJIT_GP_REGISTER) {
3190
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3191
    if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
3192
      return -1;
3193
#endif /* SLJIT_CONFIG_X86_32 */
3194
4.84M
    return reg_map[reg];
3195
4.84M
  }
3196
3197
2.17M
  if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512)
3198
0
    return -1;
3199
3200
2.17M
  return freg_map[reg];
3201
2.17M
}
3202
3203
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3204
  void *instruction, sljit_u32 size)
3205
1.84M
{
3206
1.84M
  sljit_u8 *inst;
3207
3208
1.84M
  CHECK_ERROR();
3209
1.84M
  CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3210
3211
1.84M
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3212
1.84M
  FAIL_IF(!inst);
3213
1.84M
  INC_SIZE(size);
3214
1.84M
  SLJIT_MEMCPY(inst, instruction, size);
3215
1.84M
  return SLJIT_SUCCESS;
3216
1.84M
}
3217
3218
/* --------------------------------------------------------------------- */
3219
/*  Floating point operators                                             */
3220
/* --------------------------------------------------------------------- */
3221
3222
/* Alignment(3) + 4 * 16 bytes. */
3223
static sljit_u32 sse2_data[3 + (4 * 4)];
3224
static sljit_u32 *sse2_buffer;
3225
3226
static void init_compiler(void)
3227
9
{
3228
9
  get_cpu_features();
3229
3230
  /* Align to 16 bytes. */
3231
9
  sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
3232
3233
  /* Single precision constants (each constant is 16 byte long). */
3234
9
  sse2_buffer[0] = 0x80000000;
3235
9
  sse2_buffer[4] = 0x7fffffff;
3236
  /* Double precision constants (each constant is 16 byte long). */
3237
9
  sse2_buffer[8] = 0;
3238
9
  sse2_buffer[9] = 0x80000000;
3239
9
  sse2_buffer[12] = 0xffffffff;
3240
9
  sse2_buffer[13] = 0x7fffffff;
3241
9
}
3242
3243
static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
3244
  sljit_uw op,
3245
  sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3246
851M
{
3247
851M
  sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);
3248
851M
  FAIL_IF(!inst);
3249
851M
  inst[0] = GROUP_0F;
3250
851M
  inst[1] = op & 0xff;
3251
851M
  return SLJIT_SUCCESS;
3252
851M
}
3253
3254
static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
3255
  sljit_uw op,
3256
  sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3257
0
{
3258
0
  sljit_u8 *inst;
3259
3260
0
  SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));
3261
3262
0
  inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);
3263
0
  FAIL_IF(!inst);
3264
0
  inst[0] = GROUP_0F;
3265
0
  inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);
3266
0
  inst[2] = op & 0xff;
3267
0
  return SLJIT_SUCCESS;
3268
0
}
3269
3270
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
3271
  sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3272
0
{
3273
0
  return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);
3274
0
}
3275
3276
static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
3277
  sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
3278
0
{
3279
0
  return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);
3280
0
}
3281
3282
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3283
  sljit_s32 dst, sljit_sw dstw,
3284
  sljit_s32 src, sljit_sw srcw)
3285
0
{
3286
0
  sljit_s32 dst_r;
3287
3288
0
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
3289
0
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3290
3291
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3292
0
  if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3293
0
    compiler->mode32 = 0;
3294
0
#endif
3295
3296
0
  FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));
3297
3298
0
  if (dst & SLJIT_MEM)
3299
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3300
0
  return SLJIT_SUCCESS;
3301
0
}
3302
3303
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3304
  sljit_s32 dst, sljit_sw dstw,
3305
  sljit_s32 src, sljit_sw srcw)
3306
0
{
3307
0
  sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3308
3309
0
  CHECK_EXTRA_REGS(src, srcw, (void)0);
3310
3311
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3312
0
  if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3313
0
    compiler->mode32 = 0;
3314
0
#endif
3315
3316
0
  if (src == SLJIT_IMM) {
3317
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3318
0
    if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3319
0
      srcw = (sljit_s32)srcw;
3320
0
#endif
3321
0
    EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
3322
0
    src = TMP_REG1;
3323
0
    srcw = 0;
3324
0
  }
3325
3326
0
  FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));
3327
3328
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3329
0
  compiler->mode32 = 1;
3330
0
#endif
3331
0
  if (dst_r == TMP_FREG)
3332
0
    return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3333
0
  return SLJIT_SUCCESS;
3334
0
}
3335
3336
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3337
  sljit_s32 src1, sljit_sw src1w,
3338
  sljit_s32 src2, sljit_sw src2w)
3339
0
{
3340
0
  switch (GET_FLAG_TYPE(op)) {
3341
0
  case SLJIT_ORDERED_EQUAL:
3342
    /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */
3343
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3344
0
    FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));
3345
3346
    /* EQ */
3347
0
    FAIL_IF(emit_byte(compiler, 0));
3348
3349
0
    src1 = TMP_FREG;
3350
0
    src2 = TMP_FREG;
3351
0
    src2w = 0;
3352
0
    break;
3353
3354
0
  case SLJIT_ORDERED_LESS:
3355
0
  case SLJIT_UNORDERED_OR_GREATER:
3356
    /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL  */
3357
0
    if (!FAST_IS_REG(src2)) {
3358
0
      FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3359
0
      src2 = TMP_FREG;
3360
0
    }
3361
3362
0
    return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);
3363
0
  }
3364
3365
0
  if (!FAST_IS_REG(src1)) {
3366
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3367
0
    src1 = TMP_FREG;
3368
0
  }
3369
3370
0
  return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);
3371
0
}
3372
3373
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3374
  sljit_s32 dst, sljit_sw dstw,
3375
  sljit_s32 src, sljit_sw srcw)
3376
0
{
3377
0
  sljit_s32 dst_r;
3378
0
  sljit_u8 *inst;
3379
3380
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3381
0
  compiler->mode32 = 1;
3382
0
#endif
3383
3384
0
  CHECK_ERROR();
3385
0
  SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3386
3387
0
  if (GET_OPCODE(op) == SLJIT_MOV_F64) {
3388
0
    if (FAST_IS_REG(dst))
3389
0
      return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
3390
0
    if (FAST_IS_REG(src))
3391
0
      return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
3392
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3393
0
    return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3394
0
  }
3395
3396
0
  if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
3397
0
    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3398
0
    if (FAST_IS_REG(src)) {
3399
      /* We overwrite the high bits of source. From SLJIT point of view,
3400
         this is not an issue.
3401
         Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
3402
0
      FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));
3403
0
    } else {
3404
0
      FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
3405
0
      src = TMP_FREG;
3406
0
    }
3407
3408
0
    FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));
3409
0
    if (dst_r == TMP_FREG)
3410
0
      return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3411
0
    return SLJIT_SUCCESS;
3412
0
  }
3413
3414
0
  if (FAST_IS_REG(dst)) {
3415
0
    dst_r = (dst == src) ? TMP_FREG : dst;
3416
3417
0
    if (src & SLJIT_MEM)
3418
0
      FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3419
3420
0
    FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));
3421
3422
0
    inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);
3423
0
    inst[0] = GROUP_0F;
3424
    /* Same as PSRLD_x / PSRLQ_x */
3425
0
    inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;
3426
3427
0
    if (GET_OPCODE(op) == SLJIT_ABS_F64) {
3428
0
      inst[2] |= 2 << 3;
3429
0
      FAIL_IF(emit_byte(compiler, 1));
3430
0
    } else {
3431
0
      inst[2] |= 6 << 3;
3432
0
      FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));
3433
0
    }
3434
3435
0
    if (dst_r != TMP_FREG)
3436
0
      dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;
3437
0
    return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);
3438
0
  }
3439
3440
0
  FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3441
3442
0
  switch (GET_OPCODE(op)) {
3443
0
  case SLJIT_NEG_F64:
3444
0
    FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3445
0
    break;
3446
3447
0
  case SLJIT_ABS_F64:
3448
0
    FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
3449
0
    break;
3450
0
  }
3451
3452
0
  return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3453
0
}
3454
3455
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3456
  sljit_s32 dst, sljit_sw dstw,
3457
  sljit_s32 src1, sljit_sw src1w,
3458
  sljit_s32 src2, sljit_sw src2w)
3459
0
{
3460
0
  sljit_s32 dst_r;
3461
0
3462
0
  CHECK_ERROR();
3463
0
  CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3464
0
  ADJUST_LOCAL_OFFSET(dst, dstw);
3465
0
  ADJUST_LOCAL_OFFSET(src1, src1w);
3466
0
  ADJUST_LOCAL_OFFSET(src2, src2w);
3467
0
3468
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3469
0
  compiler->mode32 = 1;
3470
0
#endif
3471
0
3472
0
  if (FAST_IS_REG(dst)) {
3473
0
    dst_r = dst;
3474
0
    if (dst == src1)
3475
0
      ; /* Do nothing here. */
3476
0
    else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) {
3477
0
      /* Swap arguments. */
3478
0
      src2 = src1;
3479
0
      src2w = src1w;
3480
0
    } else if (dst != src2)
3481
0
      FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
3482
0
    else {
3483
0
      dst_r = TMP_FREG;
3484
0
      FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3485
0
    }
3486
0
  } else {
3487
0
    dst_r = TMP_FREG;
3488
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3489
0
  }
3490
0
3491
0
  switch (GET_OPCODE(op)) {
3492
0
  case SLJIT_ADD_F64:
3493
0
    FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3494
0
    break;
3495
0
3496
0
  case SLJIT_SUB_F64:
3497
0
    FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3498
0
    break;
3499
0
3500
0
  case SLJIT_MUL_F64:
3501
0
    FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3502
0
    break;
3503
0
3504
0
  case SLJIT_DIV_F64:
3505
0
    FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3506
0
    break;
3507
0
  }
3508
0
3509
0
  if (dst_r != dst)
3510
0
    return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3511
0
  return SLJIT_SUCCESS;
3512
0
}
3513
3514
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3515
  sljit_s32 dst_freg,
3516
  sljit_s32 src1, sljit_sw src1w,
3517
  sljit_s32 src2, sljit_sw src2w)
3518
0
{
3519
0
  sljit_uw pref;
3520
0
3521
0
  CHECK_ERROR();
3522
0
  CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3523
0
  ADJUST_LOCAL_OFFSET(src1, src1w);
3524
0
  ADJUST_LOCAL_OFFSET(src2, src2w);
3525
0
3526
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3527
0
  compiler->mode32 = 1;
3528
0
#endif
3529
0
3530
0
  if (dst_freg == src1) {
3531
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3532
0
    pref = EX86_SELECT_66(op) | EX86_SSE2;
3533
0
    FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));
3534
0
    FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3535
0
    return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);
3536
0
  }
3537
0
3538
0
  if (src1 & SLJIT_MEM) {
3539
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3540
0
    src1 = TMP_FREG;
3541
0
    src1w = 0;
3542
0
  }
3543
0
3544
0
  if (dst_freg != src2)
3545
0
    FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));
3546
0
3547
0
  pref = EX86_SELECT_66(op) | EX86_SSE2;
3548
0
  FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));
3549
0
  FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3550
0
  return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);
3551
0
}
3552
3553
/* --------------------------------------------------------------------- */
3554
/*  Conditional instructions                                             */
3555
/* --------------------------------------------------------------------- */
3556
3557
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3558
1.78G
{
3559
1.78G
  sljit_u8 *inst;
3560
1.78G
  struct sljit_label *label;
3561
3562
1.78G
  CHECK_ERROR_PTR();
3563
1.78G
  CHECK_PTR(check_sljit_emit_label(compiler));
3564
3565
1.78G
  if (compiler->last_label && compiler->last_label->size == compiler->size)
3566
303M
    return compiler->last_label;
3567
3568
1.48G
  label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3569
1.48G
  PTR_FAIL_IF(!label);
3570
1.48G
  set_label(label, compiler);
3571
3572
1.48G
  inst = (sljit_u8*)ensure_buf(compiler, 1);
3573
1.48G
  PTR_FAIL_IF(!inst);
3574
1.48G
  inst[0] = SLJIT_INST_LABEL;
3575
3576
1.48G
  return label;
3577
1.48G
}
3578
3579
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,
3580
  sljit_s32 alignment, struct sljit_read_only_buffer *buffers)
3581
0
{
3582
0
  sljit_uw mask, size;
3583
0
  sljit_u8 *inst;
3584
0
  struct sljit_label *label;
3585
0
  struct sljit_label *next_label;
3586
0
  struct sljit_extended_label *ext_label;
3587
0
3588
0
  CHECK_ERROR_PTR();
3589
0
  CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers));
3590
0
3591
0
  sljit_reset_read_only_buffers(buffers);
3592
0
3593
0
  if (alignment <= SLJIT_LABEL_ALIGN_1) {
3594
0
    SLJIT_SKIP_CHECKS(compiler);
3595
0
    label = sljit_emit_label(compiler);
3596
0
    PTR_FAIL_IF(!label);
3597
0
  } else {
3598
0
    /* The used space is filled with NOPs. */
3599
0
    mask = ((sljit_uw)1 << alignment) - 1;
3600
0
    compiler->size += mask;
3601
0
3602
0
    inst = (sljit_u8*)ensure_buf(compiler, 1);
3603
0
    PTR_FAIL_IF(!inst);
3604
0
    inst[0] = SLJIT_INST_LABEL;
3605
0
3606
0
    ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label));
3607
0
    PTR_FAIL_IF(!ext_label);
3608
0
    set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask);
3609
0
    label = &ext_label->label;
3610
0
  }
3611
0
3612
0
  if (buffers == NULL)
3613
0
    return label;
3614
0
3615
0
  next_label = label;
3616
0
3617
0
  while (1) {
3618
0
    buffers->u.label = next_label;
3619
0
    size = buffers->size;
3620
0
3621
0
    while (size >= 4) {
3622
0
      inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
3623
0
      PTR_FAIL_IF(!inst);
3624
0
      INC_SIZE(4);
3625
0
      inst[0] = NOP;
3626
0
      inst[1] = NOP;
3627
0
      inst[2] = NOP;
3628
0
      inst[3] = NOP;
3629
0
      size -= 4;
3630
0
    }
3631
0
3632
0
    if (size > 0) {
3633
0
      inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3634
0
      PTR_FAIL_IF(!inst);
3635
0
      INC_SIZE(size);
3636
0
3637
0
      do {
3638
0
        *inst++ = NOP;
3639
0
      } while (--size != 0);
3640
0
    }
3641
0
3642
0
    buffers = buffers->next;
3643
0
3644
0
    if (buffers == NULL)
3645
0
      break;
3646
0
3647
0
    SLJIT_SKIP_CHECKS(compiler);
3648
0
    next_label = sljit_emit_label(compiler);
3649
0
    PTR_FAIL_IF(!next_label);
3650
0
  }
3651
0
3652
0
  return label;
3653
0
}
3654
3655
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3656
3.40G
{
3657
3.40G
  sljit_u8 *inst;
3658
3.40G
  struct sljit_jump *jump;
3659
3660
3.40G
  CHECK_ERROR_PTR();
3661
3.40G
  CHECK_PTR(check_sljit_emit_jump(compiler, type));
3662
3663
3.40G
  jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3664
3.40G
  PTR_FAIL_IF_NULL(jump);
3665
3.40G
  set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
3666
3.40G
  type &= 0xff;
3667
3668
3.40G
  jump->addr = compiler->size;
3669
  /* Worst case size. */
3670
3.40G
  compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE;
3671
3.40G
  inst = (sljit_u8*)ensure_buf(compiler, 1);
3672
3.40G
  PTR_FAIL_IF_NULL(inst);
3673
3674
3.40G
  inst[0] = SLJIT_INST_JUMP;
3675
3.40G
  return jump;
3676
3.40G
}
3677
3678
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3679
116M
{
3680
116M
  sljit_u8 *inst;
3681
116M
  struct sljit_jump *jump;
3682
3683
116M
  CHECK_ERROR();
3684
116M
  CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3685
116M
  ADJUST_LOCAL_OFFSET(src, srcw);
3686
3687
116M
  CHECK_EXTRA_REGS(src, srcw, (void)0);
3688
3689
116M
  if (src == SLJIT_IMM) {
3690
115M
    jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3691
115M
    FAIL_IF_NULL(jump);
3692
115M
    set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
3693
115M
    jump->u.target = (sljit_uw)srcw;
3694
3695
115M
    jump->addr = compiler->size;
3696
    /* Worst case size. */
3697
115M
    compiler->size += JUMP_MAX_SIZE;
3698
115M
    inst = (sljit_u8*)ensure_buf(compiler, 1);
3699
115M
    FAIL_IF_NULL(inst);
3700
3701
115M
    inst[0] = SLJIT_INST_JUMP;
3702
115M
  } else {
3703
1.05M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3704
    /* REX_W is not necessary (src is not immediate). */
3705
1.05M
    compiler->mode32 = 1;
3706
1.05M
#endif
3707
1.05M
    inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
3708
1.05M
    FAIL_IF(!inst);
3709
1.05M
    inst[0] = GROUP_FF;
3710
1.05M
    inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
3711
1.05M
  }
3712
116M
  return SLJIT_SUCCESS;
3713
116M
}
3714
3715
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3716
  sljit_s32 dst, sljit_sw dstw,
3717
  sljit_s32 type)
3718
410M
{
3719
410M
  sljit_u8 *inst;
3720
410M
  sljit_u8 cond_set;
3721
410M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3722
410M
  sljit_s32 reg;
3723
410M
  sljit_uw size;
3724
410M
#endif /* !SLJIT_CONFIG_X86_64 */
3725
  /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
3726
410M
  sljit_s32 dst_save = dst;
3727
410M
  sljit_sw dstw_save = dstw;
3728
3729
410M
  CHECK_ERROR();
3730
410M
  CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3731
3732
410M
  ADJUST_LOCAL_OFFSET(dst, dstw);
3733
410M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
3734
3735
  /* setcc = jcc + 0x10. */
3736
410M
  cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
3737
3738
410M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3739
410M
  if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
3740
13.1M
    size = 3 + 2;
3741
13.1M
    if (reg_map[TMP_REG1] >= 4)
3742
0
      size += 1 + 1;
3743
13.1M
    else if (reg_map[dst] >= 4)
3744
0
      size++;
3745
3746
13.1M
    inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3747
13.1M
    FAIL_IF(!inst);
3748
13.1M
    INC_SIZE(size);
3749
    /* Set low register to conditional flag. */
3750
13.1M
    if (reg_map[TMP_REG1] >= 4)
3751
0
      *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
3752
3753
13.1M
    inst[0] = GROUP_0F;
3754
13.1M
    inst[1] = cond_set;
3755
13.1M
    inst[2] = MOD_REG | reg_lmap[TMP_REG1];
3756
13.1M
    inst += 3;
3757
3758
13.1M
    if (reg_map[TMP_REG1] >= 4 || reg_map[dst] >= 4)
3759
0
      *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
3760
3761
13.1M
    inst[0] = OR_rm8_r8;
3762
13.1M
    inst[1] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
3763
13.1M
    return SLJIT_SUCCESS;
3764
13.1M
  }
3765
3766
397M
  reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
3767
3768
397M
  size = 3 + (reg_map[reg] >= 4) + 4;
3769
397M
  inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3770
397M
  FAIL_IF(!inst);
3771
397M
  INC_SIZE(size);
3772
  /* Set low register to conditional flag. */
3773
3774
397M
  if (reg_map[reg] >= 4)
3775
6.27k
    *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
3776
3777
397M
  inst[0] = GROUP_0F;
3778
397M
  inst[1] = cond_set;
3779
397M
  inst[2] = MOD_REG | reg_lmap[reg];
3780
3781
397M
  inst[3] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
3782
  /* The movzx instruction does not affect flags. */
3783
397M
  inst[4] = GROUP_0F;
3784
397M
  inst[5] = MOVZX_r_rm8;
3785
397M
  inst[6] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
3786
3787
397M
  if (reg != TMP_REG1)
3788
392M
    return SLJIT_SUCCESS;
3789
3790
4.14M
  if (GET_OPCODE(op) < SLJIT_ADD) {
3791
0
    compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
3792
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3793
0
  }
3794
3795
4.14M
  SLJIT_SKIP_CHECKS(compiler);
3796
4.14M
  return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3797
3798
#else /* !SLJIT_CONFIG_X86_64 */
3799
  SLJIT_ASSERT(reg_map[TMP_REG1] < 4);
3800
3801
  /* The SLJIT_CONFIG_X86_32 code path starts here. */
3802
  if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3803
    /* Low byte is accessible. */
3804
    inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3805
    FAIL_IF(!inst);
3806
    INC_SIZE(3 + 3);
3807
    /* Set low byte to conditional flag. */
3808
    inst[0] = GROUP_0F;
3809
    inst[1] = cond_set;
3810
    inst[2] = U8(MOD_REG | reg_map[dst]);
3811
3812
    inst[3] = GROUP_0F;
3813
    inst[4] = MOVZX_r_rm8;
3814
    inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
3815
    return SLJIT_SUCCESS;
3816
  }
3817
3818
  if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3819
    inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);
3820
    FAIL_IF(!inst);
3821
    INC_SIZE(3 + 2);
3822
3823
    /* Set low byte to conditional flag. */
3824
    inst[0] = GROUP_0F;
3825
    inst[1] = cond_set;
3826
    inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3827
3828
    inst[3] = OR_rm8_r8;
3829
    inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);
3830
    return SLJIT_SUCCESS;
3831
  }
3832
3833
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3834
  FAIL_IF(!inst);
3835
  INC_SIZE(3 + 3);
3836
  /* Set low byte to conditional flag. */
3837
  inst[0] = GROUP_0F;
3838
  inst[1] = cond_set;
3839
  inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3840
3841
  inst[3] = GROUP_0F;
3842
  inst[4] = MOVZX_r_rm8;
3843
  inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);
3844
3845
  if (GET_OPCODE(op) < SLJIT_ADD)
3846
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3847
3848
  SLJIT_SKIP_CHECKS(compiler);
3849
  return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3850
#endif /* SLJIT_CONFIG_X86_64 */
3851
4.14M
}
3852
3853
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3854
  sljit_s32 dst_freg,
3855
  sljit_s32 src1, sljit_sw src1w,
3856
  sljit_s32 src2_freg)
3857
0
{
3858
0
  sljit_u8* inst;
3859
0
  sljit_uw size;
3860
0
3861
0
  CHECK_ERROR();
3862
0
  CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3863
0
3864
0
  ADJUST_LOCAL_OFFSET(src1, src1w);
3865
0
3866
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3867
0
  compiler->mode32 = 1;
3868
0
#endif /* SLJIT_CONFIG_X86_64 */
3869
0
3870
0
  if (dst_freg != src2_freg) {
3871
0
    if (dst_freg == src1) {
3872
0
      src1 = src2_freg;
3873
0
      src1w = 0;
3874
0
      type ^= 0x1;
3875
0
    } else
3876
0
      FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));
3877
0
  }
3878
0
3879
0
  inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
3880
0
  FAIL_IF(!inst);
3881
0
  INC_SIZE(2);
3882
0
  inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);
3883
0
3884
0
  size = compiler->size;
3885
0
  FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));
3886
0
3887
0
  inst[1] = U8(compiler->size - size);
3888
0
  return SLJIT_SUCCESS;
3889
0
}
3890
3891
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3892
  sljit_s32 vreg,
3893
  sljit_s32 srcdst, sljit_sw srcdstw)
3894
1.08M
{
3895
1.08M
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3896
1.08M
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3897
1.08M
  sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3898
1.08M
  sljit_uw op;
3899
3900
1.08M
  CHECK_ERROR();
3901
1.08M
  CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3902
3903
1.08M
  ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3904
3905
1.08M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3906
1.08M
  compiler->mode32 = 1;
3907
1.08M
#endif /* SLJIT_CONFIG_X86_64 */
3908
3909
1.08M
  switch (reg_size) {
3910
1.08M
  case 4:
3911
1.08M
    op = EX86_SSE2;
3912
1.08M
    break;
3913
0
  case 5:
3914
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3915
0
      return SLJIT_ERR_UNSUPPORTED;
3916
0
    op = EX86_SSE2 | VEX_256;
3917
0
    break;
3918
0
  default:
3919
0
    return SLJIT_ERR_UNSUPPORTED;
3920
1.08M
  }
3921
3922
1.08M
  if (!(srcdst & SLJIT_MEM))
3923
0
    alignment = reg_size;
3924
3925
1.08M
  if (type & SLJIT_SIMD_FLOAT) {
3926
0
    if (elem_size == 2 || elem_size == 3) {
3927
0
      op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;
3928
3929
0
      if (elem_size == 3)
3930
0
        op |= EX86_PREF_66;
3931
3932
0
      if (type & SLJIT_SIMD_STORE)
3933
0
        op += 1;
3934
0
    } else
3935
0
      return SLJIT_ERR_UNSUPPORTED;
3936
1.08M
  } else {
3937
1.08M
    op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm)
3938
1.08M
      | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);
3939
1.08M
  }
3940
3941
1.08M
  if (type & SLJIT_SIMD_TEST)
3942
0
    return SLJIT_SUCCESS;
3943
3944
1.08M
  if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX)))
3945
0
    return emit_vex_instruction(compiler, op, vreg, 0, srcdst, srcdstw);
3946
3947
1.08M
  return emit_groupf(compiler, op, vreg, srcdst, srcdstw);
3948
1.08M
}
3949
3950
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3951
  sljit_s32 vreg,
3952
  sljit_s32 src, sljit_sw srcw)
3953
0
{
3954
0
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3955
0
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3956
0
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
3957
0
  sljit_u8 *inst;
3958
0
  sljit_u8 opcode = 0;
3959
0
  sljit_uw op;
3960
0
3961
0
  CHECK_ERROR();
3962
0
  CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
3963
0
3964
0
  ADJUST_LOCAL_OFFSET(src, srcw);
3965
0
3966
0
  if (!(type & SLJIT_SIMD_FLOAT)) {
3967
0
    CHECK_EXTRA_REGS(src, srcw, (void)0);
3968
0
  }
3969
0
3970
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3971
0
  if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3972
0
    return SLJIT_ERR_UNSUPPORTED;
3973
0
#else /* !SLJIT_CONFIG_X86_32 */
3974
0
  compiler->mode32 = 1;
3975
0
3976
0
  if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3977
0
    return SLJIT_ERR_UNSUPPORTED;
3978
0
#endif /* SLJIT_CONFIG_X86_32 */
3979
0
3980
0
  if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)))
3981
0
    return SLJIT_ERR_UNSUPPORTED;
3982
0
3983
0
  if (type & SLJIT_SIMD_TEST)
3984
0
    return SLJIT_SUCCESS;
3985
0
3986
0
  if (reg_size == 5)
3987
0
    use_vex = 1;
3988
0
3989
0
  if (use_vex && src != SLJIT_IMM) {
3990
0
    op = 0;
3991
0
3992
0
    switch (elem_size) {
3993
0
    case 0:
3994
0
      if (cpu_feature_list & CPU_FEATURE_AVX2)
3995
0
        op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3996
0
      break;
3997
0
    case 1:
3998
0
      if (cpu_feature_list & CPU_FEATURE_AVX2)
3999
0
        op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4000
0
      break;
4001
0
    case 2:
4002
0
      if (type & SLJIT_SIMD_FLOAT) {
4003
0
        if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM)))
4004
0
          op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4005
0
      } else if (cpu_feature_list & CPU_FEATURE_AVX2)
4006
0
        op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4007
0
      break;
4008
0
    default:
4009
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4010
0
      if (!(type & SLJIT_SIMD_FLOAT)) {
4011
0
        if (cpu_feature_list & CPU_FEATURE_AVX2)
4012
0
          op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4013
0
        break;
4014
0
      }
4015
0
#endif /* SLJIT_CONFIG_X86_64 */
4016
0
4017
0
      if (reg_size == 5)
4018
0
        op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4019
0
      break;
4020
0
    }
4021
0
4022
0
    if (op != 0) {
4023
0
      if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {
4024
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4025
0
        if (elem_size >= 3)
4026
0
          compiler->mode32 = 0;
4027
0
#endif /* SLJIT_CONFIG_X86_64 */
4028
0
        FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw));
4029
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4030
0
        compiler->mode32 = 1;
4031
0
#endif /* SLJIT_CONFIG_X86_64 */
4032
0
        src = vreg;
4033
0
        srcw = 0;
4034
0
      }
4035
0
4036
0
      if (reg_size == 5)
4037
0
        op |= VEX_256;
4038
0
4039
0
      return emit_vex_instruction(compiler, op, vreg, 0, src, srcw);
4040
0
    }
4041
0
  }
4042
0
4043
0
  if (type & SLJIT_SIMD_FLOAT) {
4044
0
    if (src == SLJIT_IMM) {
4045
0
      if (use_vex)
4046
0
        return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0);
4047
0
4048
0
      return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, vreg, vreg, 0);
4049
0
    }
4050
0
4051
0
    SLJIT_ASSERT(reg_size == 4);
4052
0
4053
0
    if (use_vex) {
4054
0
      if (elem_size == 3)
4055
0
        return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, srcw);
4056
0
4057
0
      SLJIT_ASSERT(!(src & SLJIT_MEM));
4058
0
      FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0));
4059
0
      return emit_byte(compiler, 0);
4060
0
    }
4061
0
4062
0
    if (elem_size == 2 && vreg != src) {
4063
0
      FAIL_IF(emit_sse2_load(compiler, 1, vreg, src, srcw));
4064
0
      src = vreg;
4065
0
      srcw = 0;
4066
0
    }
4067
0
4068
0
    op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2;
4069
0
    FAIL_IF(emit_groupf(compiler, op, vreg, src, srcw));
4070
0
4071
0
    if (elem_size == 2)
4072
0
      return emit_byte(compiler, 0);
4073
0
    return SLJIT_SUCCESS;
4074
0
  }
4075
0
4076
0
  if (src == SLJIT_IMM) {
4077
0
    if (elem_size == 0) {
4078
0
      srcw = (sljit_u8)srcw;
4079
0
      srcw |= srcw << 8;
4080
0
      srcw |= srcw << 16;
4081
0
      elem_size = 2;
4082
0
    } else if (elem_size == 1) {
4083
0
      srcw = (sljit_u16)srcw;
4084
0
      srcw |= srcw << 16;
4085
0
      elem_size = 2;
4086
0
    }
4087
0
4088
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4089
0
    if (elem_size == 2 && (sljit_s32)srcw == -1)
4090
0
      srcw = -1;
4091
0
#endif /* SLJIT_CONFIG_X86_64 */
4092
0
4093
0
    if (srcw == 0 || srcw == -1) {
4094
0
      if (use_vex)
4095
0
        return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0);
4096
0
4097
0
      return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0);
4098
0
    }
4099
0
4100
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4101
0
    if (elem_size == 3)
4102
0
      FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
4103
0
    else
4104
0
#endif /* SLJIT_CONFIG_X86_64 */
4105
0
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
4106
0
4107
0
    src = TMP_REG1;
4108
0
    srcw = 0;
4109
0
4110
0
  }
4111
0
4112
0
  op = 2;
4113
0
  opcode = MOVD_x_rm;
4114
0
4115
0
  switch (elem_size) {
4116
0
  case 0:
4117
0
    if (!FAST_IS_REG(src)) {
4118
0
      opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
4119
0
      op = 3;
4120
0
    }
4121
0
    break;
4122
0
  case 1:
4123
0
    if (!FAST_IS_REG(src))
4124
0
      opcode = PINSRW_x_rm_i8;
4125
0
    break;
4126
0
  case 2:
4127
0
    break;
4128
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4129
0
  case 3:
4130
0
    /* MOVQ */
4131
0
    compiler->mode32 = 0;
4132
0
    break;
4133
0
#endif /* SLJIT_CONFIG_X86_64 */
4134
0
  }
4135
0
4136
0
  if (use_vex) {
4137
0
    if (opcode != MOVD_x_rm) {
4138
0
      op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode;
4139
0
      FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, vreg, src, srcw));
4140
0
    } else
4141
0
      FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw));
4142
0
  } else {
4143
0
    inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw);
4144
0
    FAIL_IF(!inst);
4145
0
    inst[0] = GROUP_0F;
4146
0
    inst[1] = opcode;
4147
0
4148
0
    if (op == 3) {
4149
0
      SLJIT_ASSERT(opcode == 0x3a);
4150
0
      inst[2] = PINSRB_x_rm_i8;
4151
0
    }
4152
0
  }
4153
0
4154
0
  if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && elem_size >= 2) {
4155
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4156
0
    op = VPBROADCASTD_x_xm;
4157
0
#else /* !SLJIT_CONFIG_X86_32 */
4158
0
    op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;
4159
0
#endif /* SLJIT_CONFIG_X86_32 */
4160
0
    return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);
4161
0
  }
4162
0
4163
0
  SLJIT_ASSERT(reg_size == 4);
4164
0
4165
0
  if (opcode != MOVD_x_rm)
4166
0
    FAIL_IF(emit_byte(compiler, 0));
4167
0
4168
0
  switch (elem_size) {
4169
0
  case 0:
4170
0
    if (use_vex) {
4171
0
      FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
4172
0
      return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, TMP_FREG, 0);
4173
0
    }
4174
0
    FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
4175
0
    return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0);
4176
0
  case 1:
4177
0
    if (use_vex)
4178
0
      FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, vreg, 0));
4179
0
    else
4180
0
      FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, vreg, 0));
4181
0
    FAIL_IF(emit_byte(compiler, 0));
4182
0
    SLJIT_FALLTHROUGH
4183
0
  default:
4184
0
    if (use_vex)
4185
0
      FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0));
4186
0
    else
4187
0
      FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0));
4188
0
    return emit_byte(compiler, 0);
4189
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4190
0
  case 3:
4191
0
    compiler->mode32 = 1;
4192
0
    if (use_vex)
4193
0
      FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0));
4194
0
    else
4195
0
      FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0));
4196
0
    return emit_byte(compiler, 0x44);
4197
0
#endif /* SLJIT_CONFIG_X86_64 */
4198
0
  }
4199
0
}
4200
4201
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4202
  sljit_s32 vreg, sljit_s32 lane_index,
4203
  sljit_s32 srcdst, sljit_sw srcdstw)
4204
573k
{
4205
573k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4206
573k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4207
573k
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4208
573k
  sljit_u8 *inst;
4209
573k
  sljit_u8 opcode = 0;
4210
573k
  sljit_uw op;
4211
573k
  sljit_s32 vreg_orig = vreg;
4212
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4213
  sljit_s32 srcdst_is_ereg = 0;
4214
  sljit_s32 srcdst_orig = 0;
4215
  sljit_sw srcdstw_orig = 0;
4216
#endif /* SLJIT_CONFIG_X86_32 */
4217
4218
573k
  CHECK_ERROR();
4219
573k
  CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
4220
4221
573k
  ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4222
4223
573k
  if (reg_size == 5) {
4224
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4225
0
      return SLJIT_ERR_UNSUPPORTED;
4226
0
    use_vex = 1;
4227
573k
  } else if (reg_size != 4)
4228
0
    return SLJIT_ERR_UNSUPPORTED;
4229
4230
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4231
  if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
4232
    return SLJIT_ERR_UNSUPPORTED;
4233
#else /* SLJIT_CONFIG_X86_32 */
4234
573k
  if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
4235
0
    return SLJIT_ERR_UNSUPPORTED;
4236
573k
#endif /* SLJIT_CONFIG_X86_32 */
4237
4238
573k
  if (type & SLJIT_SIMD_TEST)
4239
0
    return SLJIT_SUCCESS;
4240
4241
573k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4242
573k
  compiler->mode32 = 1;
4243
#else /* !SLJIT_CONFIG_X86_64 */
4244
  if (!(type & SLJIT_SIMD_FLOAT)) {
4245
    CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);
4246
4247
    if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {
4248
      srcdst_orig = srcdst;
4249
      srcdstw_orig = srcdstw;
4250
      srcdst = TMP_REG1;
4251
      srcdstw = 0;
4252
    }
4253
  }
4254
#endif /* SLJIT_CONFIG_X86_64 */
4255
4256
573k
  if (type & SLJIT_SIMD_LANE_ZERO) {
4257
573k
    if (lane_index == 0) {
4258
573k
      if (!(type & SLJIT_SIMD_FLOAT)) {
4259
573k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4260
573k
        if (elem_size == 3) {
4261
0
          compiler->mode32 = 0;
4262
0
          elem_size = 2;
4263
0
        }
4264
573k
#endif /* SLJIT_CONFIG_X86_64 */
4265
573k
        if (srcdst == SLJIT_IMM) {
4266
391k
          if (elem_size == 0)
4267
0
            srcdstw = (sljit_u8)srcdstw;
4268
391k
          else if (elem_size == 1)
4269
0
            srcdstw = (sljit_u16)srcdstw;
4270
4271
391k
          EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4272
391k
          srcdst = TMP_REG1;
4273
391k
          srcdstw = 0;
4274
391k
          elem_size = 2;
4275
391k
        }
4276
4277
573k
        if (elem_size == 2) {
4278
573k
          if (use_vex)
4279
0
            return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw);
4280
573k
          return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, vreg, srcdst, srcdstw);
4281
573k
        }
4282
573k
      } else if (srcdst & SLJIT_MEM) {
4283
0
        SLJIT_ASSERT(elem_size == 2 || elem_size == 3);
4284
4285
0
        if (use_vex)
4286
0
          return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, 0, srcdst, srcdstw);
4287
0
        return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, srcdst, srcdstw);
4288
0
      } else if (elem_size == 3) {
4289
0
        if (use_vex)
4290
0
          return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, 0, srcdst, 0);
4291
0
        return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, srcdst, 0);
4292
0
      } else if (use_vex) {
4293
0
        FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
4294
0
        return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, vreg, TMP_FREG, srcdst, 0);
4295
0
      }
4296
573k
    }
4297
4298
0
    if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4299
0
      vreg = TMP_FREG;
4300
0
      lane_index -= (1 << (4 - elem_size));
4301
0
    } else if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {
4302
0
      if (use_vex)
4303
0
        FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw));
4304
0
      else
4305
0
        FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));
4306
0
      srcdst = TMP_FREG;
4307
0
      srcdstw = 0;
4308
0
    }
4309
4310
0
    op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)
4311
0
      | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2;
4312
4313
0
    if (use_vex)
4314
0
      FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, vreg, vreg, vreg, 0));
4315
0
    else
4316
0
      FAIL_IF(emit_groupf(compiler, op, vreg, vreg, 0));
4317
0
  } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4318
0
    FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0));
4319
0
    FAIL_IF(emit_byte(compiler, 1));
4320
4321
0
    vreg = TMP_FREG;
4322
0
    lane_index -= (1 << (4 - elem_size));
4323
0
  }
4324
4325
0
  if (type & SLJIT_SIMD_FLOAT) {
4326
0
    if (elem_size == 3) {
4327
0
      if (srcdst & SLJIT_MEM) {
4328
0
        if (type & SLJIT_SIMD_STORE)
4329
0
          op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
4330
0
        else
4331
0
          op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
4332
4333
        /* VEX prefix clears upper bits of the target register. */
4334
0
        if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || vreg == TMP_FREG))
4335
0
          FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2
4336
0
            | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), vreg, (type & SLJIT_SIMD_STORE) ? 0 : vreg, srcdst, srcdstw));
4337
0
        else
4338
0
          FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, vreg, srcdst, srcdstw));
4339
4340
        /* In case of store, vreg is not TMP_FREG. */
4341
0
      } else if (type & SLJIT_SIMD_STORE) {
4342
0
        if (lane_index == 1) {
4343
0
          if (use_vex)
4344
0
            return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0);
4345
0
          return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, vreg, 0);
4346
0
        }
4347
0
        if (use_vex)
4348
0
          return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0);
4349
0
        return emit_sse2_load(compiler, 0, srcdst, vreg, 0);
4350
0
      } else if (use_vex && (reg_size == 4 || vreg == TMP_FREG)) {
4351
0
        if (lane_index == 1)
4352
0
          FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0));
4353
0
        else
4354
0
          FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0));
4355
0
      } else {
4356
0
        if (lane_index == 1)
4357
0
          FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, vreg, srcdst, 0));
4358
0
        else
4359
0
          FAIL_IF(emit_sse2_load(compiler, 0, vreg, srcdst, 0));
4360
0
      }
4361
0
    } else if (type & SLJIT_SIMD_STORE) {
4362
0
      if (lane_index == 0) {
4363
0
        if (use_vex)
4364
0
          return emit_vex_instruction(compiler, MOVSD_xm_x | EX86_PREF_F3 | EX86_SSE2 | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV),
4365
0
            vreg, ((srcdst & SLJIT_MEM) ? 0 : srcdst), srcdst, srcdstw);
4366
0
        return emit_sse2_store(compiler, 1, srcdst, srcdstw, vreg);
4367
0
      }
4368
4369
0
      if (srcdst & SLJIT_MEM) {
4370
0
        if (use_vex)
4371
0
          FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, srcdst, srcdstw));
4372
0
        else
4373
0
          FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw));
4374
0
        return emit_byte(compiler, U8(lane_index));
4375
0
      }
4376
4377
0
      if (use_vex) {
4378
0
        FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, vreg, vreg, 0));
4379
0
        return emit_byte(compiler, U8(lane_index));
4380
0
      }
4381
4382
0
      if (srcdst == vreg)
4383
0
        op = SHUFPS_x_xm | EX86_SSE2;
4384
0
      else {
4385
0
        switch (lane_index) {
4386
0
        case 1:
4387
0
          op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2;
4388
0
          break;
4389
0
        case 2:
4390
0
          op = MOVHLPS_x_x | EX86_SSE2;
4391
0
          break;
4392
0
        default:
4393
0
          SLJIT_ASSERT(lane_index == 3);
4394
0
          op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2;
4395
0
          break;
4396
0
        }
4397
0
      }
4398
4399
0
      FAIL_IF(emit_groupf(compiler, op, srcdst, vreg, 0));
4400
4401
0
      op &= 0xff;
4402
0
      if (op == SHUFPS_x_xm || op == PSHUFD_x_xm)
4403
0
        return emit_byte(compiler, U8(lane_index));
4404
4405
0
      return SLJIT_SUCCESS;
4406
0
    } else {
4407
0
      if (lane_index != 0 || (srcdst & SLJIT_MEM)) {
4408
0
        FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw));
4409
0
        FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));
4410
0
      } else
4411
0
        FAIL_IF(emit_sse2_store(compiler, 1, vreg, 0, srcdst));
4412
0
    }
4413
4414
0
    if (vreg != TMP_FREG || (type & SLJIT_SIMD_STORE))
4415
0
      return SLJIT_SUCCESS;
4416
4417
0
    SLJIT_ASSERT(reg_size == 5);
4418
4419
0
    if (type & SLJIT_SIMD_LANE_ZERO) {
4420
0
      FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0));
4421
0
      return emit_byte(compiler, 0x4e);
4422
0
    }
4423
4424
0
    FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0));
4425
0
    return emit_byte(compiler, 1);
4426
0
  }
4427
4428
0
  if (srcdst == SLJIT_IMM) {
4429
0
    EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4430
0
    srcdst = TMP_REG1;
4431
0
    srcdstw = 0;
4432
0
  }
4433
4434
0
  op = 3;
4435
4436
0
  switch (elem_size) {
4437
0
  case 0:
4438
0
    opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;
4439
0
    break;
4440
0
  case 1:
4441
0
    if (!(type & SLJIT_SIMD_STORE)) {
4442
0
      op = 2;
4443
0
      opcode = PINSRW_x_rm_i8;
4444
0
    } else
4445
0
      opcode = PEXTRW_rm_x_i8;
4446
0
    break;
4447
0
  case 2:
4448
0
    opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4449
0
    break;
4450
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4451
0
  case 3:
4452
    /* PINSRQ / PEXTRQ */
4453
0
    opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4454
0
    compiler->mode32 = 0;
4455
0
    break;
4456
0
#endif /* SLJIT_CONFIG_X86_64 */
4457
0
  }
4458
4459
0
  if (use_vex && (type & SLJIT_SIMD_STORE)) {
4460
0
    op = opcode | ((op == 3) ? VEX_OP_0F3A : 0);
4461
0
    FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, 0, srcdst, srcdstw));
4462
0
  } else {
4463
0
    inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw);
4464
0
    FAIL_IF(!inst);
4465
0
    inst[0] = GROUP_0F;
4466
4467
0
    if (op == 3) {
4468
0
      inst[1] = 0x3a;
4469
0
      inst[2] = opcode;
4470
0
    } else
4471
0
      inst[1] = opcode;
4472
0
  }
4473
4474
0
  FAIL_IF(emit_byte(compiler, U8(lane_index)));
4475
4476
0
  if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {
4477
0
    if (vreg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {
4478
0
      SLJIT_ASSERT(reg_size == 5);
4479
4480
0
      if (type & SLJIT_SIMD_LANE_ZERO) {
4481
0
        FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0));
4482
0
        return emit_byte(compiler, 0x4e);
4483
0
      }
4484
4485
0
      FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0));
4486
0
      return emit_byte(compiler, 1);
4487
0
    }
4488
4489
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4490
    if (srcdst_orig & SLJIT_MEM)
4491
      return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4492
#endif /* SLJIT_CONFIG_X86_32 */
4493
0
    return SLJIT_SUCCESS;
4494
0
  }
4495
4496
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4497
0
  if (elem_size >= 3)
4498
0
    return SLJIT_SUCCESS;
4499
4500
0
  compiler->mode32 = (type & SLJIT_32);
4501
4502
0
  op = 2;
4503
4504
0
  if (elem_size == 0)
4505
0
    op |= EX86_REX;
4506
4507
0
  if (elem_size == 2) {
4508
0
    if (type & SLJIT_32)
4509
0
      return SLJIT_SUCCESS;
4510
4511
0
    SLJIT_ASSERT(!(compiler->mode32));
4512
0
    op = 1;
4513
0
  }
4514
4515
0
  inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0);
4516
0
  FAIL_IF(!inst);
4517
4518
0
  if (op != 1) {
4519
0
    inst[0] = GROUP_0F;
4520
0
    inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);
4521
0
  } else
4522
0
    inst[0] = MOVSXD_r_rm;
4523
#else /* !SLJIT_CONFIG_X86_64 */
4524
  if (elem_size >= 2)
4525
    return SLJIT_SUCCESS;
4526
4527
  FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,
4528
    (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));
4529
4530
  if (srcdst_orig & SLJIT_MEM)
4531
    return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4532
#endif /* SLJIT_CONFIG_X86_64 */
4533
0
  return SLJIT_SUCCESS;
4534
0
}
4535
4536
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4537
  sljit_s32 vreg,
4538
  sljit_s32 src, sljit_s32 src_lane_index)
4539
573k
{
4540
573k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4541
573k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4542
573k
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4543
573k
  sljit_uw pref;
4544
573k
  sljit_u8 byte;
4545
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4546
  sljit_s32 opcode3 = TMP_REG1;
4547
#else /* !SLJIT_CONFIG_X86_32 */
4548
573k
  sljit_s32 opcode3 = SLJIT_S0;
4549
573k
#endif /* SLJIT_CONFIG_X86_32 */
4550
4551
573k
  CHECK_ERROR();
4552
573k
  CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
4553
4554
573k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4555
573k
  compiler->mode32 = 1;
4556
573k
#endif /* SLJIT_CONFIG_X86_64 */
4557
573k
  SLJIT_ASSERT(reg_map[opcode3] == 3);
4558
4559
573k
  if (reg_size == 5) {
4560
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4561
0
      return SLJIT_ERR_UNSUPPORTED;
4562
0
    use_vex = 1;
4563
573k
  } else if (reg_size != 4)
4564
0
    return SLJIT_ERR_UNSUPPORTED;
4565
4566
573k
  if (type & SLJIT_SIMD_FLOAT) {
4567
0
    pref = 0;
4568
0
    byte = U8(src_lane_index);
4569
4570
0
    if (elem_size == 3) {
4571
0
      if (type & SLJIT_SIMD_TEST)
4572
0
        return SLJIT_SUCCESS;
4573
4574
0
      if (reg_size == 5) {
4575
0
        if (src_lane_index == 0)
4576
0
          return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);
4577
4578
0
        FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4579
4580
0
        byte = U8(byte | (byte << 2));
4581
0
        return emit_byte(compiler, U8(byte | (byte << 4)));
4582
0
      }
4583
4584
0
      if (src_lane_index == 0) {
4585
0
        if (use_vex)
4586
0
          return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, 0);
4587
0
        return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, src, 0);
4588
0
      }
4589
4590
      /* Changes it to SHUFPD_x_xm. */
4591
0
      pref = EX86_PREF_66;
4592
0
    } else if (elem_size != 2)
4593
0
      return SLJIT_ERR_UNSUPPORTED;
4594
0
    else if (type & SLJIT_SIMD_TEST)
4595
0
      return SLJIT_SUCCESS;
4596
4597
0
    if (reg_size == 5) {
4598
0
      SLJIT_ASSERT(elem_size == 2);
4599
4600
0
      if (src_lane_index == 0)
4601
0
        return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);
4602
4603
0
      FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4604
4605
0
      byte = 0x44;
4606
0
      if (src_lane_index >= 4) {
4607
0
        byte = 0xee;
4608
0
        src_lane_index -= 4;
4609
0
      }
4610
4611
0
      FAIL_IF(emit_byte(compiler, byte));
4612
0
      FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0));
4613
0
      byte = U8(src_lane_index);
4614
0
    } else if (use_vex) {
4615
0
      FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0));
4616
0
    } else {
4617
0
      if (vreg != src)
4618
0
        FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, vreg, src, 0));
4619
4620
0
      FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, vreg, vreg, 0));
4621
0
    }
4622
4623
0
    if (elem_size == 2) {
4624
0
      byte = U8(byte | (byte << 2));
4625
0
      byte = U8(byte | (byte << 4));
4626
0
    } else
4627
0
      byte = U8(byte | (byte << 1));
4628
4629
0
    return emit_byte(compiler, U8(byte));
4630
0
  }
4631
4632
573k
  if (type & SLJIT_SIMD_TEST)
4633
0
    return SLJIT_SUCCESS;
4634
4635
573k
  if (elem_size == 0) {
4636
0
    if (reg_size == 5 && src_lane_index >= 16) {
4637
0
      FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4638
0
      FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));
4639
0
      src_lane_index &= 0x7;
4640
0
      src = vreg;
4641
0
    }
4642
4643
0
    if (src_lane_index != 0 || (vreg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) {
4644
0
      pref = 0;
4645
4646
0
      if ((src_lane_index & 0x3) == 0) {
4647
0
        pref = EX86_PREF_66;
4648
0
        byte = U8(src_lane_index >> 2);
4649
0
      } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {
4650
0
        pref = EX86_PREF_F2;
4651
0
        byte = U8(src_lane_index >> 1);
4652
0
      } else {
4653
0
        if (!use_vex) {
4654
0
          if (vreg != src)
4655
0
            FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0));
4656
4657
0
          FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, vreg, 0));
4658
0
        } else
4659
0
          FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, vreg, src, 0));
4660
4661
0
        FAIL_IF(emit_byte(compiler, U8(src_lane_index)));
4662
0
      }
4663
4664
0
      if (pref != 0) {
4665
0
        if (use_vex)
4666
0
          FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0));
4667
0
        else
4668
0
          FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0));
4669
0
        FAIL_IF(emit_byte(compiler, byte));
4670
0
      }
4671
4672
0
      src = vreg;
4673
0
    }
4674
4675
0
    if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2))
4676
0
      return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);
4677
4678
0
    SLJIT_ASSERT(reg_size == 4);
4679
0
    FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
4680
0
    return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0);
4681
0
  }
4682
4683
573k
  if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) {
4684
0
    switch (elem_size) {
4685
0
    case 1:
4686
0
      pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4687
0
      break;
4688
0
    case 2:
4689
0
      pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4690
0
      break;
4691
0
    default:
4692
0
      pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4693
0
      break;
4694
0
    }
4695
4696
0
    if (reg_size == 5)
4697
0
      pref |= VEX_256;
4698
4699
0
    return emit_vex_instruction(compiler, pref, vreg, 0, src, 0);
4700
0
  }
4701
4702
573k
  if (reg_size == 5) {
4703
0
    switch (elem_size) {
4704
0
    case 1:
4705
0
      byte = U8(src_lane_index & 0x3);
4706
0
      src_lane_index >>= 2;
4707
0
      pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;
4708
0
      break;
4709
0
    case 2:
4710
0
      byte = U8(src_lane_index & 0x3);
4711
0
      src_lane_index >>= 1;
4712
0
      pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;
4713
0
      break;
4714
0
    case 3:
4715
0
      pref = 0;
4716
0
      break;
4717
0
    default:
4718
0
      FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4719
0
      return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));
4720
0
    }
4721
4722
0
    if (pref != 0) {
4723
0
      FAIL_IF(emit_vex_instruction(compiler, pref, vreg, 0, src, 0));
4724
0
      byte = U8(byte | (byte << 2));
4725
0
      FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4726
4727
0
      if (src_lane_index == 0)
4728
0
        return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);
4729
4730
0
      src = vreg;
4731
0
    }
4732
4733
0
    FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));
4734
0
    byte = U8(src_lane_index);
4735
0
    byte = U8(byte | (byte << 2));
4736
0
    return emit_byte(compiler, U8(byte | (byte << 4)));
4737
0
  }
4738
4739
573k
  switch (elem_size) {
4740
0
  case 1:
4741
0
    byte = U8(src_lane_index & 0x3);
4742
0
    src_lane_index >>= 1;
4743
0
    pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;
4744
4745
0
    if (use_vex)
4746
0
      FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0));
4747
0
    else
4748
0
      FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0));
4749
0
    byte = U8(byte | (byte << 2));
4750
0
    FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4751
4752
0
    if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2)
4753
0
      return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);
4754
4755
0
    src = vreg;
4756
0
    SLJIT_FALLTHROUGH
4757
573k
  case 2:
4758
573k
    byte = U8(src_lane_index);
4759
573k
    byte = U8(byte | (byte << 2));
4760
573k
    break;
4761
0
  default:
4762
0
    byte = U8(src_lane_index << 1);
4763
0
    byte = U8(byte | (byte << 2) | 0x4);
4764
0
    break;
4765
573k
  }
4766
4767
573k
  if (use_vex)
4768
0
    FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, src, 0));
4769
573k
  else
4770
573k
    FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0));
4771
573k
  return emit_byte(compiler, U8(byte | (byte << 4)));
4772
573k
}
4773
4774
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4775
  sljit_s32 vreg,
4776
  sljit_s32 src, sljit_sw srcw)
4777
0
{
4778
0
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4779
0
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4780
0
  sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4781
0
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4782
0
  sljit_u8 opcode;
4783
0
4784
0
  CHECK_ERROR();
4785
0
  CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
4786
0
4787
0
  ADJUST_LOCAL_OFFSET(src, srcw);
4788
0
4789
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4790
0
  compiler->mode32 = 1;
4791
0
#endif /* SLJIT_CONFIG_X86_64 */
4792
0
4793
0
  if (reg_size == 5) {
4794
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4795
0
      return SLJIT_ERR_UNSUPPORTED;
4796
0
    use_vex = 1;
4797
0
  } else if (reg_size != 4)
4798
0
    return SLJIT_ERR_UNSUPPORTED;
4799
0
4800
0
  if (type & SLJIT_SIMD_FLOAT) {
4801
0
    if (elem_size != 2 || elem2_size != 3)
4802
0
      return SLJIT_ERR_UNSUPPORTED;
4803
0
4804
0
    if (type & SLJIT_SIMD_TEST)
4805
0
      return SLJIT_SUCCESS;
4806
0
4807
0
    if (use_vex)
4808
0
      return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, vreg, 0, src, srcw);
4809
0
    return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, vreg, src, srcw);
4810
0
  }
4811
0
4812
0
  switch (elem_size) {
4813
0
  case 0:
4814
0
    if (elem2_size == 1)
4815
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;
4816
0
    else if (elem2_size == 2)
4817
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;
4818
0
    else if (elem2_size == 3)
4819
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;
4820
0
    else
4821
0
      return SLJIT_ERR_UNSUPPORTED;
4822
0
    break;
4823
0
  case 1:
4824
0
    if (elem2_size == 2)
4825
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;
4826
0
    else if (elem2_size == 3)
4827
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;
4828
0
    else
4829
0
      return SLJIT_ERR_UNSUPPORTED;
4830
0
    break;
4831
0
  case 2:
4832
0
    if (elem2_size == 3)
4833
0
      opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;
4834
0
    else
4835
0
      return SLJIT_ERR_UNSUPPORTED;
4836
0
    break;
4837
0
  default:
4838
0
    return SLJIT_ERR_UNSUPPORTED;
4839
0
  }
4840
0
4841
0
  if (type & SLJIT_SIMD_TEST)
4842
0
    return SLJIT_SUCCESS;
4843
0
4844
0
  if (use_vex)
4845
0
    return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, srcw);
4846
0
  return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, src, srcw);
4847
0
}
4848
4849
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4850
  sljit_s32 vreg,
4851
  sljit_s32 dst, sljit_sw dstw)
4852
913k
{
4853
913k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4854
913k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4855
913k
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4856
913k
  sljit_s32 dst_r;
4857
913k
  sljit_uw op;
4858
913k
  sljit_u8 *inst;
4859
4860
913k
  CHECK_ERROR();
4861
913k
  CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
4862
4863
913k
  ADJUST_LOCAL_OFFSET(dst, dstw);
4864
4865
913k
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
4866
913k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4867
913k
  compiler->mode32 = 1;
4868
913k
#endif /* SLJIT_CONFIG_X86_64 */
4869
4870
913k
  if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
4871
0
    return SLJIT_ERR_UNSUPPORTED;
4872
4873
913k
  if (reg_size == 4) {
4874
913k
    if (type & SLJIT_SIMD_TEST)
4875
0
      return SLJIT_SUCCESS;
4876
4877
913k
    op = EX86_PREF_66 | EX86_SSE2_OP2;
4878
4879
913k
    switch (elem_size) {
4880
0
    case 1:
4881
0
      if (use_vex)
4882
0
        FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, vreg, 0));
4883
0
      else
4884
0
        FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, vreg, 0));
4885
0
      vreg = TMP_FREG;
4886
0
      break;
4887
0
    case 2:
4888
0
      op = EX86_SSE2_OP2;
4889
0
      break;
4890
913k
    }
4891
4892
913k
    dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4893
913k
    op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x;
4894
4895
913k
    if (use_vex)
4896
0
      FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0));
4897
913k
    else
4898
913k
      FAIL_IF(emit_groupf(compiler, op, dst_r, vreg, 0));
4899
4900
913k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4901
913k
    compiler->mode32 = type & SLJIT_32;
4902
913k
#endif /* SLJIT_CONFIG_X86_64 */
4903
4904
913k
    if (elem_size == 1) {
4905
0
      inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);
4906
0
      FAIL_IF(!inst);
4907
0
      inst[1] |= SHR;
4908
0
    }
4909
4910
913k
    if (dst_r == TMP_REG1)
4911
0
      return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4912
4913
913k
    return SLJIT_SUCCESS;
4914
913k
  }
4915
4916
0
  if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))
4917
0
    return SLJIT_ERR_UNSUPPORTED;
4918
4919
0
  if (type & SLJIT_SIMD_TEST)
4920
0
    return SLJIT_SUCCESS;
4921
4922
0
  dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4923
4924
0
  if (elem_size == 1) {
4925
0
    FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0));
4926
0
    FAIL_IF(emit_byte(compiler, 1));
4927
0
    FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, TMP_FREG, 0));
4928
0
    FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));
4929
0
  } else {
4930
0
    op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;
4931
4932
0
    if (elem_size == 0)
4933
0
      op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;
4934
0
    else if (elem_size == 3)
4935
0
      op |= EX86_PREF_66;
4936
4937
0
    FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0));
4938
0
  }
4939
4940
0
  if (dst_r == TMP_REG1) {
4941
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4942
0
    compiler->mode32 = type & SLJIT_32;
4943
0
#endif /* SLJIT_CONFIG_X86_64 */
4944
0
    return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4945
0
  }
4946
4947
0
  return SLJIT_SUCCESS;
4948
0
}
4949
4950
static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
4951
  sljit_s32 dst_vreg, sljit_s32 src_vreg)
4952
0
{
4953
0
  sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2;
4954
4955
0
  SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);
4956
4957
0
  if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)
4958
0
    op |= EX86_PREF_66;
4959
4960
0
  return emit_groupf(compiler, op, dst_vreg, src_vreg, 0);
4961
0
}
4962
4963
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4964
  sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
4965
172k
{
4966
172k
  sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4967
172k
  sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4968
172k
  sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4969
172k
  sljit_uw op = 0;
4970
172k
  sljit_uw mov_op = 0;
4971
4972
172k
  CHECK_ERROR();
4973
172k
  CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
4974
172k
  ADJUST_LOCAL_OFFSET(src2, src2w);
4975
4976
172k
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4977
172k
  compiler->mode32 = 1;
4978
172k
#endif /* SLJIT_CONFIG_X86_64 */
4979
4980
172k
  if (reg_size == 5) {
4981
0
    if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4982
0
      return SLJIT_ERR_UNSUPPORTED;
4983
172k
  } else if (reg_size != 4)
4984
0
    return SLJIT_ERR_UNSUPPORTED;
4985
4986
172k
  if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4987
0
    return SLJIT_ERR_UNSUPPORTED;
4988
4989
172k
  switch (SLJIT_SIMD_GET_OPCODE(type)) {
4990
172k
  case SLJIT_SIMD_OP2_AND:
4991
172k
    op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm;
4992
4993
172k
    if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4994
172k
      op |= EX86_PREF_66;
4995
172k
    break;
4996
0
  case SLJIT_SIMD_OP2_OR:
4997
0
    op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm;
4998
4999
0
    if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
5000
0
      op |= EX86_PREF_66;
5001
0
    break;
5002
0
  case SLJIT_SIMD_OP2_XOR:
5003
0
    op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm;
5004
5005
0
    if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
5006
0
      op |= EX86_PREF_66;
5007
0
    break;
5008
5009
0
  case SLJIT_SIMD_OP2_SHUFFLE:
5010
0
    if (reg_size != 4)
5011
0
      return SLJIT_ERR_UNSUPPORTED;
5012
5013
0
    op = PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38;
5014
0
    break;
5015
172k
  }
5016
5017
172k
  if (type & SLJIT_SIMD_TEST)
5018
0
    return SLJIT_SUCCESS;
5019
5020
172k
  if ((src2 & SLJIT_MEM) && SLJIT_SIMD_GET_ELEM2_SIZE(type) < reg_size) {
5021
0
    mov_op = ((type & SLJIT_SIMD_FLOAT) ? (MOVUPS_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0)) : (MOVDQU_x_xm | EX86_PREF_F3)) | EX86_SSE2;
5022
0
    if (use_vex)
5023
0
      FAIL_IF(emit_vex_instruction(compiler, mov_op, TMP_FREG, 0, src2, src2w));
5024
0
    else
5025
0
      FAIL_IF(emit_groupf(compiler, mov_op, TMP_FREG, src2, src2w));
5026
5027
0
    src2 = TMP_FREG;
5028
0
    src2w = 0;
5029
0
  }
5030
5031
172k
  if (reg_size == 5 || use_vex) {
5032
0
    if (reg_size == 5)
5033
0
      op |= VEX_256;
5034
5035
0
    return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_vreg, src1_vreg, src2, src2w);
5036
0
  }
5037
5038
172k
  if (dst_vreg != src1_vreg) {
5039
0
    if (dst_vreg == src2) {
5040
0
      if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) {
5041
0
        FAIL_IF(emit_simd_mov(compiler, type, TMP_FREG, src2));
5042
0
        FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg));
5043
0
        src2 = TMP_FREG;
5044
0
        src2w = 0;
5045
0
      } else
5046
0
        src2 = src1_vreg;
5047
0
    } else
5048
0
      FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg));
5049
0
  }
5050
5051
172k
  if (op & (VEX_OP_0F38 | VEX_OP_0F3A))
5052
0
    return emit_groupf_ext(compiler, op | EX86_SSE2, dst_vreg, src2, src2w);
5053
172k
  return emit_groupf(compiler, op | EX86_SSE2, dst_vreg, src2, src2w);
5054
172k
}
5055
5056
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
5057
  sljit_s32 dst_reg,
5058
  sljit_s32 mem_reg)
5059
0
{
5060
0
  CHECK_ERROR();
5061
0
  CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
5062
0
5063
0
  if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32)
5064
0
    return SLJIT_ERR_UNSUPPORTED;
5065
0
5066
0
  if (op & SLJIT_ATOMIC_TEST)
5067
0
    return SLJIT_SUCCESS;
5068
0
5069
0
  SLJIT_SKIP_CHECKS(compiler);
5070
0
  return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
5071
0
}
5072
5073
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
5074
  sljit_s32 src_reg,
5075
  sljit_s32 mem_reg,
5076
  sljit_s32 temp_reg)
5077
0
{
5078
0
  sljit_uw pref;
5079
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5080
0
  sljit_s32 saved_reg = TMP_REG1;
5081
0
  sljit_s32 swap_tmp = 0;
5082
0
  sljit_sw srcw = 0;
5083
0
  sljit_sw tempw = 0;
5084
0
#endif /* SLJIT_CONFIG_X86_32 */
5085
0
5086
0
  CHECK_ERROR();
5087
0
  CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
5088
0
  CHECK_EXTRA_REGS(src_reg, srcw, (void)0);
5089
0
  CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);
5090
0
5091
0
  SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));
5092
0
  SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));
5093
0
5094
0
  if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32)
5095
0
    return SLJIT_ERR_UNSUPPORTED;
5096
0
5097
0
  if (op & SLJIT_ATOMIC_TEST)
5098
0
    return SLJIT_SUCCESS;
5099
0
5100
0
  op = GET_OPCODE(op);
5101
0
5102
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5103
0
  if (temp_reg == SLJIT_TMP_DEST_REG) {
5104
0
    FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1]));
5105
0
5106
0
    if (src_reg == SLJIT_R0)
5107
0
      src_reg = TMP_REG1;
5108
0
    if (mem_reg == SLJIT_R0)
5109
0
      mem_reg = TMP_REG1;
5110
0
5111
0
    temp_reg = SLJIT_R0;
5112
0
    swap_tmp = 1;
5113
0
  }
5114
0
5115
0
  /* Src is virtual register or its low byte is not accessible. */
5116
0
  if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {
5117
0
    SLJIT_ASSERT(src_reg != SLJIT_R1 && temp_reg != SLJIT_TMP_DEST_REG);
5118
0
5119
0
    if (swap_tmp) {
5120
0
      saved_reg = (mem_reg != SLJIT_R1) ? SLJIT_R1 : SLJIT_R2;
5121
0
5122
0
      EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, saved_reg, 0);
5123
0
      EMIT_MOV(compiler, saved_reg, 0, src_reg, srcw);
5124
0
    } else
5125
0
      EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);
5126
0
5127
0
    src_reg = saved_reg;
5128
0
5129
0
    if (mem_reg == src_reg)
5130
0
      mem_reg = saved_reg;
5131
0
  }
5132
0
#endif /* SLJIT_CONFIG_X86_32 */
5133
0
5134
0
  if (temp_reg != SLJIT_R0) {
5135
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5136
0
    compiler->mode32 = 0;
5137
0
5138
0
    EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_R0, 0);
5139
0
    EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);
5140
0
5141
0
    if (src_reg == SLJIT_R0)
5142
0
      src_reg = TMP_REG2;
5143
0
    if (mem_reg == SLJIT_R0)
5144
0
      mem_reg = TMP_REG2;
5145
0
#else /* !SLJIT_CONFIG_X86_64 */
5146
0
    SLJIT_ASSERT(!swap_tmp);
5147
0
5148
0
    if (src_reg == TMP_REG1) {
5149
0
      if (mem_reg == SLJIT_R0) {
5150
0
        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);
5151
0
        EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);
5152
0
        EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
5153
0
5154
0
        mem_reg = SLJIT_R1;
5155
0
        saved_reg = SLJIT_R1;
5156
0
      } else {
5157
0
        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
5158
0
        EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
5159
0
        saved_reg = SLJIT_R0;
5160
0
      }
5161
0
    } else {
5162
0
      EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R0, 0);
5163
0
      EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
5164
0
5165
0
      if (src_reg == SLJIT_R0)
5166
0
        src_reg = TMP_REG1;
5167
0
      if (mem_reg == SLJIT_R0)
5168
0
        mem_reg = TMP_REG1;
5169
0
    }
5170
0
#endif /* SLJIT_CONFIG_X86_64 */
5171
0
  }
5172
0
5173
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5174
0
  compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;
5175
0
#endif /* SLJIT_CONFIG_X86_64 */
5176
0
5177
0
  /* Lock prefix. */
5178
0
  FAIL_IF(emit_byte(compiler, GROUP_LOCK));
5179
0
5180
0
  pref = 0;
5181
0
  if (op == SLJIT_MOV_U16)
5182
0
    pref = EX86_HALF_ARG | EX86_PREF_66;
5183
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5184
0
  if (op == SLJIT_MOV_U8)
5185
0
    pref = EX86_REX;
5186
0
#endif /* SLJIT_CONFIG_X86_64 */
5187
0
5188
0
  FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));
5189
0
5190
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5191
0
  if (swap_tmp) {
5192
0
    SLJIT_ASSERT(temp_reg == SLJIT_R0);
5193
0
    FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1]));
5194
0
5195
0
    if (saved_reg != TMP_REG1)
5196
0
      return emit_mov(compiler, saved_reg, 0, SLJIT_MEM1(SLJIT_SP), 0);
5197
0
    return SLJIT_SUCCESS;
5198
0
  }
5199
0
#endif /* SLJIT_CONFIG_X86_32 */
5200
0
5201
0
  if (temp_reg != SLJIT_R0) {
5202
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5203
0
    compiler->mode32 = 0;
5204
0
    return emit_mov(compiler, SLJIT_R0, 0, TMP_REG2, 0);
5205
0
#else /* !SLJIT_CONFIG_X86_64 */
5206
0
    EMIT_MOV(compiler, SLJIT_R0, 0, (saved_reg == SLJIT_R0) ? SLJIT_MEM1(SLJIT_SP) : saved_reg, 0);
5207
0
    if (saved_reg == SLJIT_R1)
5208
0
      return emit_mov(compiler, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_SP), 0);
5209
0
#endif /* SLJIT_CONFIG_X86_64 */
5210
0
  }
5211
0
  return SLJIT_SUCCESS;
5212
0
}
5213
5214
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
5215
107M
{
5216
107M
  CHECK_ERROR();
5217
107M
  CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
5218
107M
  ADJUST_LOCAL_OFFSET(dst, dstw);
5219
107M
  ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
5220
5221
107M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
5222
5223
107M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5224
107M
  compiler->mode32 = 0;
5225
107M
#endif
5226
5227
107M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5228
107M
  if (NOT_HALFWORD(offset)) {
5229
0
    FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
5230
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
5231
    SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
5232
    return compiler->error;
5233
#else
5234
0
    return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
5235
0
#endif
5236
0
  }
5237
107M
#endif
5238
5239
107M
  if (offset != 0)
5240
107M
    return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
5241
82.0k
  return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
5242
107M
}
5243
5244
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,
5245
  sljit_s32 dst, sljit_sw dstw,
5246
  sljit_sw init_value)
5247
0
{
5248
0
  sljit_u8 *inst;
5249
0
  struct sljit_const *const_;
5250
0
  sljit_s32 reg;
5251
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5252
0
  sljit_s32 dst_is_ereg = 0;
5253
0
#endif /* !SLJIT_CONFIG_X86_32 */
5254
0
5255
0
  CHECK_ERROR_PTR();
5256
0
  CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value));
5257
0
  ADJUST_LOCAL_OFFSET(dst, dstw);
5258
0
5259
0
  CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
5260
0
5261
0
  const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
5262
0
  PTR_FAIL_IF(!const_);
5263
0
  set_const(const_, compiler);
5264
0
5265
0
  switch (GET_OPCODE(op)) {
5266
0
  case SLJIT_MOV_U8:
5267
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5268
0
    compiler->mode32 = (op & SLJIT_32);
5269
0
#endif /* SLJIT_CONFIG_X86_64 */
5270
0
5271
0
    if ((init_value & 0x100) != 0)
5272
0
      init_value = init_value | -(sljit_sw)0x100;
5273
0
    else
5274
0
      init_value = (sljit_u8)init_value;
5275
0
5276
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5277
0
    if (dst_is_ereg) {
5278
0
      if (emit_mov(compiler, dst, dstw, SLJIT_IMM, (sljit_s32)init_value))
5279
0
        return NULL;
5280
0
      dst = 0;
5281
0
      break;
5282
0
    }
5283
0
#endif /* !SLJIT_CONFIG_X86_32 */
5284
0
5285
0
    reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
5286
0
5287
0
    if (emit_mov(compiler, reg, 0, SLJIT_IMM, init_value))
5288
0
      return NULL;
5289
0
    break;
5290
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5291
0
  case SLJIT_MOV:
5292
0
    compiler->mode32 = 0;
5293
0
    reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
5294
0
5295
0
    if (emit_load_imm64(compiler, reg, init_value))
5296
0
      return NULL;
5297
0
    break;
5298
0
#endif /* SLJIT_CONFIG_X86_64 */
5299
0
  default:
5300
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5301
0
    compiler->mode32 = (op == SLJIT_MOV32);
5302
0
#endif /* SLJIT_CONFIG_X86_64 */
5303
0
5304
0
    if (emit_mov(compiler, dst, dstw, SLJIT_IMM, (sljit_s32)init_value))
5305
0
      return NULL;
5306
0
    dst = 0;
5307
0
    break;
5308
0
  }
5309
0
5310
0
  inst = (sljit_u8*)ensure_buf(compiler, 1);
5311
0
  PTR_FAIL_IF(!inst);
5312
0
5313
0
  inst[0] = SLJIT_INST_CONST;
5314
0
5315
0
  if (dst & SLJIT_MEM) {
5316
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5317
0
    if (op == SLJIT_MOV) {
5318
0
      if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
5319
0
        return NULL;
5320
0
      return const_;
5321
0
    }
5322
0
#endif
5323
0
5324
0
    if (emit_mov_byte(compiler, 0, dst, dstw, TMP_REG1, 0))
5325
0
      return NULL;
5326
0
  }
5327
0
5328
0
  return const_;
5329
0
}
5330
5331
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,
5332
  sljit_s32 dst, sljit_sw dstw)
5333
7.45M
{
5334
7.45M
  struct sljit_jump *jump;
5335
7.45M
  sljit_u8 *inst;
5336
7.45M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5337
7.45M
  sljit_s32 reg;
5338
7.45M
#endif /* SLJIT_CONFIG_X86_64 */
5339
7.45M
  SLJIT_UNUSED_ARG(op);
5340
5341
7.45M
  CHECK_ERROR_PTR();
5342
7.45M
  CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw));
5343
7.45M
  ADJUST_LOCAL_OFFSET(dst, dstw);
5344
5345
7.45M
  CHECK_EXTRA_REGS(dst, dstw, (void)0);
5346
5347
7.45M
  jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
5348
7.45M
  PTR_FAIL_IF(!jump);
5349
7.45M
  set_mov_addr(jump, compiler, 0);
5350
5351
7.45M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5352
7.45M
  compiler->mode32 = 0;
5353
7.45M
  if (dst & SLJIT_MEM)
5354
7.45M
    reg = TMP_REG1;
5355
0
  else
5356
0
    reg = (op != SLJIT_ADD_ABS_ADDR) ? dst : TMP_REG2;
5357
5358
7.45M
  PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0));
5359
7.45M
  jump->addr = compiler->size;
5360
5361
7.45M
  if (reg_map[reg] >= 8)
5362
0
    jump->flags |= MOV_ADDR_HI;
5363
#else /* !SLJIT_CONFIG_X86_64 */
5364
  if (op == SLJIT_ADD_ABS_ADDR) {
5365
    if (dst != SLJIT_R0) {
5366
      /* Must not be a signed byte argument. */
5367
      inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0x100, dst, dstw);
5368
      PTR_FAIL_IF(!inst);
5369
      *(inst + 1) |= ADD;
5370
    } else
5371
      PTR_FAIL_IF(emit_do_imm(compiler, ADD_EAX_i32, 0));
5372
  } else {
5373
    PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0));
5374
  }
5375
#endif /* SLJIT_CONFIG_X86_64 */
5376
5377
7.45M
  inst = (sljit_u8*)ensure_buf(compiler, 1);
5378
7.45M
  PTR_FAIL_IF(!inst);
5379
5380
7.45M
  inst[0] = SLJIT_INST_MOV_ADDR;
5381
5382
7.45M
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5383
7.45M
  if (op == SLJIT_ADD_ABS_ADDR) {
5384
0
    inst = emit_x86_instruction(compiler, 1, reg, 0, dst, dstw);
5385
0
    PTR_FAIL_IF(!inst);
5386
0
    *inst = ADD_rm_r;
5387
7.45M
  } else if (dst & SLJIT_MEM)
5388
7.45M
    PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0));
5389
7.45M
#endif /* SLJIT_CONFIG_X86_64 */
5390
5391
7.45M
  return jump;
5392
7.45M
}
5393
5394
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
5395
0
{
5396
0
  SLJIT_UNUSED_ARG(executable_offset);
5397
0
5398
0
  SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
5399
0
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
5400
0
  sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
5401
0
#else
5402
0
  sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
5403
0
#endif
5404
0
  SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
5405
0
}
5406
5407
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset)
5408
0
{
5409
0
  void *start_addr;
5410
0
  SLJIT_UNUSED_ARG(executable_offset);
5411
0
5412
0
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
5413
0
  if (op == SLJIT_MOV) {
5414
0
    start_addr = (void*)(addr - sizeof(sljit_sw));
5415
0
    SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 0);
5416
0
    sljit_unaligned_store_sw(start_addr, new_constant);
5417
0
    SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 1);
5418
0
    return;
5419
0
  }
5420
0
#endif
5421
0
5422
0
  start_addr = (void*)(addr - sizeof(sljit_s32));
5423
0
5424
0
  if ((op | SLJIT_32) == SLJIT_MOV32_U8) {
5425
0
    if ((new_constant & 0x100) != 0)
5426
0
      new_constant = new_constant | -(sljit_sw)0x100;
5427
0
    else
5428
0
      new_constant = (sljit_u8)new_constant;
5429
0
  }
5430
0
5431
0
  SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 0);
5432
0
  sljit_unaligned_store_s32(start_addr, (sljit_s32)new_constant);
5433
0
  SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 1);
5434
0
}