Coverage Report

Created: 2025-11-16 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_emit.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (Native code generator based on DynAsm)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 */
7
8
#include "ir.h"
9
10
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
11
# include "ir_x86.h"
12
#elif defined(IR_TARGET_AARCH64)
13
# include "ir_aarch64.h"
14
#else
15
# error "Unknown IR target"
16
#endif
17
18
#include "ir_private.h"
19
#ifndef _WIN32
20
# include <dlfcn.h>
21
#else
22
# define WIN32_LEAN_AND_MEAN
23
# include <windows.h>
24
# include <psapi.h>
25
#endif
26
27
#if defined(__linux__) || defined(__sun)
28
# include <alloca.h>
29
#endif
30
31
#define DASM_M_GROW(ctx, t, p, sz, need) \
32
0
  do { \
33
0
    size_t _sz = (sz), _need = (need); \
34
0
    if (_sz < _need) { \
35
0
      size_t _limit = sizeof(t) * DASM_SEC2POS(1); \
36
0
      if (_need > _limit) { \
37
0
        Dst_REF->status = DASM_S_NOMEM; \
38
0
        return; \
39
0
      } \
40
0
      if (_sz < 16) _sz = 16; \
41
0
      while (_sz < _need) _sz += _sz; \
42
0
      if (_sz > _limit) _sz = _limit; \
43
0
      (p) = (t *)ir_mem_realloc((p), _sz); \
44
0
      (sz) = _sz; \
45
0
    } \
46
0
  } while(0)
47
48
0
#define DASM_M_FREE(ctx, p, sz) ir_mem_free(p)
49
50
#ifdef IR_DEBUG
51
# define DASM_CHECKS
52
#endif
53
54
typedef struct _ir_copy {
55
  ir_type type;
56
  ir_reg  from;
57
  ir_reg  to;
58
} ir_copy;
59
60
typedef struct _ir_dessa_copy {
61
  ir_type type;
62
  int32_t from; /* negative - constant ref, [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
63
  int32_t to;   /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg  */
64
} ir_dessa_copy;
65
66
#if IR_REG_INT_ARGS
67
static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS];
68
#else
69
static const int8_t *_ir_int_reg_params;
70
#endif
71
#if IR_REG_FP_ARGS
72
static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS];
73
#else
74
static const int8_t *_ir_fp_reg_params;
75
#endif
76
77
static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn)
78
0
{
79
0
  if (IR_IS_CONST_REF(insn->op2)) {
80
0
    const ir_insn *func = &ctx->ir_base[insn->op2];
81
82
0
    if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
83
0
      if (func->proto) {
84
0
        return (const ir_proto_t *)ir_get_str(ctx, func->proto);
85
0
      }
86
0
    }
87
0
  } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
88
0
    return (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
89
0
  }
90
0
  return NULL;
91
0
}
92
93
#ifdef IR_HAVE_FASTCALL
94
static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS];
95
static const int8_t *_ir_fp_fc_reg_params;
96
97
bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
98
{
99
  if (sizeof(void*) == 4) {
100
    if (IR_IS_CONST_REF(insn->op2)) {
101
      const ir_insn *func = &ctx->ir_base[insn->op2];
102
103
      if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
104
        if (func->proto) {
105
          const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto);
106
107
          return (proto->flags & IR_FASTCALL_FUNC) != 0;
108
        }
109
      }
110
    } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
111
      const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
112
113
      return (proto->flags & IR_FASTCALL_FUNC) != 0;
114
    }
115
    return 0;
116
  }
117
  return 0;
118
}
119
#else
120
bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
121
0
{
122
0
  return 0;
123
0
}
124
#endif
125
126
bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn)
127
0
{
128
0
  const ir_proto_t *proto = ir_call_proto(ctx, insn);
129
130
0
  if (proto) {
131
0
    return (proto->flags & IR_VARARG_FUNC) != 0;
132
0
  }
133
0
  return 0;
134
0
}
135
136
IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref)
137
0
{
138
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
139
0
  return ctx->rules[ref];
140
0
}
141
142
IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref)
143
0
{
144
0
  return ref > ctx->bb_start;
145
0
}
146
147
148
static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
149
0
{
150
0
  ir_use_list *use_list = &ctx->use_lists[1];
151
0
  int i;
152
0
  ir_ref use, *p;
153
0
  ir_insn *insn;
154
0
  int int_param = 0;
155
0
  int fp_param = 0;
156
0
  int int_reg_params_count = IR_REG_INT_ARGS;
157
0
  int fp_reg_params_count = IR_REG_FP_ARGS;
158
0
  const int8_t *int_reg_params = _ir_int_reg_params;
159
0
  const int8_t *fp_reg_params = _ir_fp_reg_params;
160
161
#ifdef IR_HAVE_FASTCALL
162
  if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
163
    int_reg_params_count = IR_REG_INT_FCARGS;
164
    fp_reg_params_count = IR_REG_FP_FCARGS;
165
    int_reg_params = _ir_int_fc_reg_params;
166
    fp_reg_params = _ir_fp_fc_reg_params;
167
  }
168
#endif
169
170
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
171
0
    use = *p;
172
0
    insn = &ctx->ir_base[use];
173
0
    if (insn->op == IR_PARAM) {
174
0
      if (IR_IS_TYPE_INT(insn->type)) {
175
0
        if (use == ref) {
176
0
#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86)
177
0
          if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
178
            /* struct passed by value on stack */
179
0
            return IR_REG_NONE;
180
0
          } else
181
0
#endif
182
0
          if (int_param < int_reg_params_count) {
183
0
            return int_reg_params[int_param];
184
0
          } else {
185
0
            return IR_REG_NONE;
186
0
          }
187
0
#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86)
188
0
        } else {
189
0
          if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
190
            /* struct passed by value on stack */
191
0
            continue;
192
0
          }
193
0
#endif
194
0
        }
195
0
        int_param++;
196
#ifdef _WIN64
197
        /* WIN64 calling convention use common couter for int and fp registers */
198
        fp_param++;
199
#endif
200
0
      } else {
201
0
        IR_ASSERT(IR_IS_TYPE_FP(insn->type));
202
0
        if (use == ref) {
203
0
          if (fp_param < fp_reg_params_count) {
204
0
            return fp_reg_params[fp_param];
205
0
          } else {
206
0
            return IR_REG_NONE;
207
0
          }
208
0
        }
209
0
        fp_param++;
210
#ifdef _WIN64
211
        /* WIN64 calling convention use common couter for int and fp registers */
212
        int_param++;
213
#endif
214
0
      }
215
0
    }
216
0
  }
217
0
  return IR_REG_NONE;
218
0
}
219
220
static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs)
221
0
{
222
0
  int j, n;
223
0
  ir_type type;
224
0
  int int_param = 0;
225
0
  int fp_param = 0;
226
0
  int count = 0;
227
0
  int int_reg_params_count = IR_REG_INT_ARGS;
228
0
  int fp_reg_params_count = IR_REG_FP_ARGS;
229
0
  const int8_t *int_reg_params = _ir_int_reg_params;
230
0
  const int8_t *fp_reg_params = _ir_fp_reg_params;
231
232
#ifdef IR_HAVE_FASTCALL
233
  if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
234
    int_reg_params_count = IR_REG_INT_FCARGS;
235
    fp_reg_params_count = IR_REG_FP_FCARGS;
236
    int_reg_params = _ir_int_fc_reg_params;
237
    fp_reg_params = _ir_fp_fc_reg_params;
238
  }
239
#endif
240
241
0
  n = insn->inputs_count;
242
0
  n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
243
0
  for (j = 3; j <= n; j++) {
244
0
    ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)];
245
0
    type = arg->type;
246
0
    if (IR_IS_TYPE_INT(type)) {
247
0
      if (arg->op == IR_ARGVAL) {
248
0
        continue;
249
0
      } else if (int_param < int_reg_params_count) {
250
0
        regs[j] = int_reg_params[int_param];
251
0
        count = j + 1;
252
0
      } else {
253
0
        regs[j] = IR_REG_NONE;
254
0
      }
255
0
      int_param++;
256
#ifdef _WIN64
257
      /* WIN64 calling convention use common couter for int and fp registers */
258
      fp_param++;
259
#endif
260
0
    } else {
261
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
262
0
      if (fp_param < fp_reg_params_count) {
263
0
        regs[j] = fp_reg_params[fp_param];
264
0
        count = j + 1;
265
0
      } else {
266
0
        regs[j] = IR_REG_NONE;
267
0
      }
268
0
      fp_param++;
269
#ifdef _WIN64
270
      /* WIN64 calling convention use common couter for int and fp registers */
271
      int_param++;
272
#endif
273
0
    }
274
0
  }
275
0
  return count;
276
0
}
277
278
static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset)
279
0
{
280
0
  ir_live_interval *ival1;
281
0
  int32_t o1;
282
283
0
  if (IR_IS_CONST_REF(r1)) {
284
0
    return 0;
285
0
  }
286
287
0
  IR_ASSERT(ctx->vregs[r1]);
288
0
  ival1 = ctx->live_intervals[ctx->vregs[r1]];
289
0
  IR_ASSERT(ival1);
290
0
  o1 = ival1->stack_spill_pos;
291
0
  IR_ASSERT(o1 != -1);
292
0
  return o1 == offset;
293
0
}
294
295
void *ir_resolve_sym_name(const char *name)
296
0
{
297
0
  void *addr;
298
299
0
#ifndef _WIN32
300
0
  void *handle = NULL;
301
0
# ifdef RTLD_DEFAULT
302
0
  handle = RTLD_DEFAULT;
303
0
# endif
304
0
  addr = dlsym(handle, name);
305
#else
306
  HMODULE mods[256];
307
  DWORD cbNeeded;
308
  uint32_t i = 0;
309
310
  addr = NULL;
311
312
  EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded);
313
314
  while(i < (cbNeeded / sizeof(HMODULE))) {
315
    addr = GetProcAddress(mods[i], name);
316
    if (addr) {
317
      return addr;
318
    }
319
    i++;
320
  }
321
#endif
322
0
  return addr;
323
0
}
324
325
#ifdef IR_SNAPSHOT_HANDLER_DCL
326
  IR_SNAPSHOT_HANDLER_DCL();
327
#endif
328
329
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
330
static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn)
331
0
{
332
0
  const char *name = ir_get_str(ctx, addr_insn->val.name);
333
0
  void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
334
0
    ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) :
335
0
    ir_resolve_sym_name(name);
336
337
0
  return addr;
338
0
}
339
#endif
340
341
static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn)
342
0
{
343
0
  const char *name = ir_get_str(ctx, addr_insn->val.name);
344
0
  void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
345
0
    ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) :
346
0
    ir_resolve_sym_name(name);
347
348
0
  IR_ASSERT(addr);
349
0
  return addr;
350
0
}
351
352
static void *ir_call_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
353
0
{
354
0
  void *addr;
355
356
0
  IR_ASSERT(addr_insn->type == IR_ADDR);
357
0
  if (addr_insn->op == IR_FUNC) {
358
0
    addr = ir_sym_val(ctx, addr_insn);
359
0
  } else {
360
0
    IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR);
361
0
    addr = (void*)addr_insn->val.addr;
362
0
  }
363
0
  return addr;
364
0
}
365
366
static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
367
0
{
368
0
  void *addr = ir_call_addr(ctx, insn, addr_insn);
369
370
0
#ifdef IR_SNAPSHOT_HANDLER
371
0
  if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) {
372
0
    addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr);
373
0
  }
374
0
#endif
375
0
  return addr;
376
0
}
377
378
static int8_t ir_get_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op)
379
0
{
380
0
  if (ctx->fused_regs) {
381
0
    char key[10];
382
0
    ir_ref val;
383
384
0
    memcpy(key, &root, sizeof(ir_ref));
385
0
    memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
386
387
0
    val = ir_strtab_find(ctx->fused_regs, key, 8);
388
0
    if (val) {
389
0
      return val;
390
0
    }
391
0
  }
392
0
  return ((int8_t*)ctx->regs)[ref_and_op];
393
0
}
394
395
#if defined(__GNUC__)
396
# pragma GCC diagnostic push
397
# pragma GCC diagnostic ignored "-Warray-bounds"
398
# pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
399
#endif
400
401
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
402
# include "dynasm/dasm_proto.h"
403
# include "dynasm/dasm_x86.h"
404
#elif defined(IR_TARGET_AARCH64)
405
# include "dynasm/dasm_proto.h"
406
static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset);
407
# define DASM_ADD_VENEER ir_add_veneer
408
# include "dynasm/dasm_arm64.h"
409
#else
410
# error "Unknown IR target"
411
#endif
412
413
#if defined(__GNUC__)
414
# pragma GCC diagnostic pop
415
#endif
416
417
/* Forward Declarations */
418
static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb);
419
static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg);
420
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb);
421
422
typedef struct _ir_common_backend_data {
423
    ir_reg_alloc_data  ra_data;
424
  uint32_t           dessa_from_block;
425
  dasm_State        *dasm_state;
426
  ir_bitset          emit_constants;
427
} ir_common_backend_data;
428
429
static int ir_const_label(ir_ctx *ctx, ir_ref ref)
430
0
{
431
0
  ir_common_backend_data *data = ctx->data;
432
0
  int label = ctx->cfg_blocks_count - ref;
433
434
0
  IR_ASSERT(IR_IS_CONST_REF(ref));
435
0
  ir_bitset_incl(data->emit_constants, -ref);
436
0
  return label;
437
0
}
438
439
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
440
# include <ir_emit_x86.h>
441
#elif defined(IR_TARGET_AARCH64)
442
# include <ir_emit_aarch64.h>
443
#else
444
# error "Unknown IR target"
445
#endif
446
447
static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb)
448
0
{
449
0
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
450
0
  int pos = 0, count, i;
451
0
  ir_ref ref;
452
453
0
  IR_ASSERT(ctx->binding);
454
0
  IR_ASSERT(list);
455
0
  while (1) {
456
0
    i = ir_list_at(list, pos);
457
0
    if (b == i) {
458
0
      break;
459
0
    }
460
0
    IR_ASSERT(i != 0); /* end marker */
461
0
    pos++;
462
0
    count = ir_list_at(list, pos);
463
0
    pos += count + 1;
464
0
  }
465
0
  pos++;
466
0
  count = ir_list_at(list, pos);
467
0
  pos++;
468
469
0
  for (i = 0; i < count; i++, pos++) {
470
0
    ref = ir_list_at(list, pos);
471
0
    IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
472
0
    if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) {
473
      /* not spilled */
474
0
      ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg;
475
0
      ir_type type = ctx->ir_base[ref].type;
476
0
      int32_t offset = -ir_binding_find(ctx, ref);
477
478
0
      IR_ASSERT(offset > 0);
479
0
      ir_emit_load_mem(ctx, type, reg, IR_MEM_BO(ctx->spill_base, offset));
480
0
    } else {
481
0
      IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL);
482
0
    }
483
0
  }
484
0
}
485
486
/*
487
 * Parallel copy sequentialization algorithm
488
 *
489
 * The implementation is based on algorithm 1 desriebed in
490
 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
491
 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
492
 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
493
 * pp. 114-125, doi: 10.1109/CGO.2009.19.
494
 */
495
static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
496
0
{
497
0
  int i;
498
0
  int8_t *pred, *loc, *types;
499
0
  ir_reg to, from;
500
0
  ir_type type;
501
0
  ir_regset todo, ready, srcs;
502
503
0
  if (count == 1) {
504
0
    to = copies[0].to;
505
0
    from = copies[0].from;
506
0
    IR_ASSERT(from != to);
507
0
    type = copies[0].type;
508
0
    if (IR_IS_TYPE_INT(type)) {
509
0
      ir_emit_mov(ctx, type, to, from);
510
0
    } else {
511
0
      ir_emit_fp_mov(ctx, type, to, from);
512
0
    }
513
0
    return 1;
514
0
  }
515
516
0
  loc = alloca(IR_REG_NUM * 3 * sizeof(int8_t));
517
0
  pred = loc + IR_REG_NUM;
518
0
  types = pred + IR_REG_NUM;
519
0
  todo = IR_REGSET_EMPTY;
520
0
  srcs = IR_REGSET_EMPTY;
521
522
0
  for (i = 0; i < count; i++) {
523
0
    from = copies[i].from;
524
0
    to = copies[i].to;
525
0
    IR_ASSERT(from != to);
526
0
    IR_REGSET_INCL(srcs, from);
527
0
    loc[from] = from;
528
0
    pred[to] = from;
529
0
    types[from] = copies[i].type;
530
0
    IR_ASSERT(!IR_REGSET_IN(todo, to));
531
0
    IR_REGSET_INCL(todo, to);
532
0
  }
533
534
0
  ready = IR_REGSET_DIFFERENCE(todo, srcs);
535
536
0
  if (ready == todo) {
537
0
    for (i = 0; i < count; i++) {
538
0
      from = copies[i].from;
539
0
      to = copies[i].to;
540
0
      IR_ASSERT(from != to);
541
0
      type = copies[i].type;
542
0
      if (IR_IS_TYPE_INT(type)) {
543
0
        ir_emit_mov(ctx, type, to, from);
544
0
      } else {
545
0
        ir_emit_fp_mov(ctx, type, to, from);
546
0
      }
547
0
    }
548
0
    return 1;
549
0
  }
550
551
  /* temporary registers can't be the same as some of the destinations */
552
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_reg));
553
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_fp_reg));
554
555
  /* first we resolve all "windmill blades" - trees (this doesn't requre temporary registers) */
556
0
  while (ready != IR_REGSET_EMPTY) {
557
0
    ir_reg r;
558
559
0
    to = ir_regset_pop_first(&ready);
560
0
    from = pred[to];
561
0
    r = loc[from];
562
0
    type = types[from];
563
0
    if (IR_IS_TYPE_INT(type)) {
564
0
      ir_emit_mov_ext(ctx, type, to, r);
565
0
    } else {
566
0
      ir_emit_fp_mov(ctx, type, to, r);
567
0
    }
568
0
    IR_REGSET_EXCL(todo, to);
569
0
    loc[from] = to;
570
0
    if (from == r && IR_REGSET_IN(todo, from)) {
571
0
      IR_REGSET_INCL(ready, from);
572
0
    }
573
0
  }
574
0
  if (todo == IR_REGSET_EMPTY) {
575
0
    return 1;
576
0
  }
577
578
  /* at this point the sources that are the same as temoraries are already moved */
579
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_reg) || pred[loc[tmp_reg]] == tmp_reg);
580
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_fp_reg) || pred[loc[tmp_fp_reg]] == tmp_fp_reg);
581
582
  /* now we resolve all "windmill axles" - cycles (this reuires temporary registers) */
583
0
  while (todo != IR_REGSET_EMPTY) {
584
0
    to = ir_regset_pop_first(&todo);
585
0
    from = pred[to];
586
0
    IR_ASSERT(to != loc[from]);
587
0
    type = types[from];
588
0
    if (IR_IS_TYPE_INT(type)) {
589
0
#ifdef IR_HAVE_SWAP_INT
590
0
      if (pred[from] == to) {
591
0
        if (ir_type_size[types[to]] > ir_type_size[type]) {
592
0
          type = types[to];
593
0
        }
594
0
        ir_emit_swap(ctx, type, to, from);
595
0
        IR_REGSET_EXCL(todo, from);
596
0
        loc[to] = from;
597
0
        loc[from] = to;
598
0
        continue;
599
0
      }
600
0
#endif
601
0
      IR_ASSERT(tmp_reg != IR_REG_NONE);
602
0
      IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
603
0
      ir_emit_mov(ctx, type, tmp_reg, to);
604
0
      loc[to] = tmp_reg;
605
0
    } else {
606
#ifdef IR_HAVE_SWAP_FP
607
      if (pred[from] == to && types[to] == type) {
608
        ir_emit_swap_fp(ctx, type, to, from);
609
        IR_REGSET_EXCL(todo, from);
610
        loc[to] = from;
611
        loc[from] = to;
612
        continue;
613
      }
614
#endif
615
0
      IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
616
0
      IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
617
0
      ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
618
0
      loc[to] = tmp_fp_reg;
619
0
    }
620
0
    while (1) {
621
0
      ir_reg r;
622
623
0
      from = pred[to];
624
0
      r = loc[from];
625
0
      type = types[from];
626
0
      if (IR_IS_TYPE_INT(type)) {
627
0
        ir_emit_mov_ext(ctx, type, to, r);
628
0
      } else {
629
0
        ir_emit_fp_mov(ctx, type, to, r);
630
0
      }
631
0
      IR_REGSET_EXCL(todo, to);
632
0
      loc[from] = to;
633
0
      if (from == r && IR_REGSET_IN(todo, from)) {
634
0
        to = from;
635
0
      } else {
636
0
        break;
637
0
      }
638
0
    }
639
0
  }
640
641
0
  return 1;
642
0
}
643
644
static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from, ir_reg tmp_reg, ir_reg tmp_fp_reg)
645
0
{
646
0
  ir_mem mem_from, mem_to;
647
648
0
  IR_ASSERT(from != to);
649
0
  if (to < IR_REG_NUM) {
650
0
    if (IR_IS_CONST_REF(from)) {
651
0
      if (-from < ctx->consts_count) {
652
        /* constant reference */
653
0
        ir_emit_load(ctx, type, to, from);
654
0
      } else {
655
        /* local variable address */
656
0
        ir_load_local_addr(ctx, to, -from - ctx->consts_count);
657
0
      }
658
0
    } else if (from < IR_REG_NUM) {
659
0
      if (IR_IS_TYPE_INT(type)) {
660
0
        ir_emit_mov(ctx, type, to, from);
661
0
      } else {
662
0
        ir_emit_fp_mov(ctx, type, to, from);
663
0
      }
664
0
    } else {
665
0
      mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
666
0
      ir_emit_load_mem(ctx, type, to, mem_from);
667
0
    }
668
0
  } else {
669
0
    mem_to = ir_vreg_spill_slot(ctx, to - IR_REG_NUM);
670
0
    if (IR_IS_CONST_REF(from)) {
671
0
      if (-from < ctx->consts_count) {
672
        /* constant reference */
673
0
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
674
0
        if (IR_IS_TYPE_INT(type)
675
0
         && !IR_IS_SYM_CONST(ctx->ir_base[from].op)
676
0
         && (ir_type_size[type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[from].val.i64))) {
677
0
          ir_emit_store_mem_imm(ctx, type, mem_to, ctx->ir_base[from].val.i32);
678
0
          return;
679
0
        }
680
0
#endif
681
0
        ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
682
0
        IR_ASSERT(tmp != IR_REG_NONE);
683
0
        ir_emit_load(ctx, type, tmp, from);
684
0
        ir_emit_store_mem(ctx, type, mem_to, tmp);
685
0
      } else {
686
        /* local variable address */
687
0
        IR_ASSERT(IR_IS_TYPE_INT(type));
688
0
        IR_ASSERT(tmp_reg != IR_REG_NONE);
689
0
        ir_load_local_addr(ctx, tmp_reg, -from - ctx->consts_count);
690
0
        ir_emit_store_mem(ctx, type, mem_to, tmp_reg);
691
0
      }
692
0
    } else if (from < IR_REG_NUM) {
693
0
      ir_emit_store_mem(ctx, type, mem_to, from);
694
0
    } else {
695
0
      mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
696
0
      IR_ASSERT(IR_MEM_VAL(mem_to) != IR_MEM_VAL(mem_from));
697
0
      ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
698
0
      IR_ASSERT(tmp != IR_REG_NONE);
699
0
      ir_emit_load_mem(ctx, type, tmp, mem_from);
700
0
      ir_emit_store_mem(ctx, type, mem_to, tmp);
701
0
    }
702
0
  }
703
0
}
704
705
IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, int8_t *types, ir_bitset todo, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg)
706
0
{
707
0
  ir_ref from;
708
0
  ir_mem tmp_spill_slot;
709
0
  ir_type type;
710
711
0
  IR_MEM_VAL(tmp_spill_slot) = 0;
712
0
  IR_ASSERT(!IR_IS_CONST_REF(to));
713
0
  from = pred[to];
714
0
  type = types[from];
715
0
  IR_ASSERT(!IR_IS_CONST_REF(from));
716
0
  IR_ASSERT(from != to);
717
0
  IR_ASSERT(loc[from] == from);
718
719
0
  if (IR_IS_TYPE_INT(type)) {
720
0
#ifdef IR_HAVE_SWAP_INT
721
0
    if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) {
722
      /* a simple cycle from 2 elements */
723
0
      if (ir_type_size[types[to]] > ir_type_size[type]) {
724
0
        type = types[to];
725
0
      }
726
0
      ir_emit_swap(ctx, type, to, from);
727
0
      ir_bitset_excl(todo, from);
728
0
      ir_bitset_excl(todo, to);
729
0
      loc[to] = from;
730
0
      loc[from] = to;
731
0
      return;
732
0
    }
733
0
#endif
734
0
    IR_ASSERT(tmp_reg != IR_REG_NONE);
735
0
    IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
736
0
    loc[to] = tmp_reg;
737
0
    if (to < IR_REG_NUM) {
738
0
      ir_emit_mov(ctx, type, tmp_reg, to);
739
0
    } else {
740
0
      ir_emit_load_mem_int(ctx, type, tmp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
741
0
    }
742
0
  } else {
743
#ifdef IR_HAVE_SWAP_FP
744
    if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM && types[to] == type) {
745
      /* a simple cycle from 2 elements */
746
      ir_emit_swap_fp(ctx, type, to, from);
747
      IR_REGSET_EXCL(todo, from);
748
      IR_REGSET_EXCL(todo, to);
749
      loc[to] = from;
750
      loc[from] = to;
751
      return;
752
    }
753
#endif
754
0
    IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
755
0
    IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
756
0
    loc[to] = tmp_fp_reg;
757
0
    if (to < IR_REG_NUM) {
758
0
      ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
759
0
    } else {
760
0
      ir_emit_load_mem_fp(ctx, type, tmp_fp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
761
0
    }
762
0
  }
763
764
0
  while (1) {
765
0
    int32_t r;
766
767
0
    from = pred[to];
768
0
    r = loc[from];
769
0
    type = types[to];
770
771
0
    if (from == r && ir_bitset_in(todo, from)) {
772
      /* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */
773
0
      if (to >= IR_REG_NUM && r >= IR_REG_NUM) {
774
0
        ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
775
776
0
        if (!IR_MEM_VAL(tmp_spill_slot)) {
777
          /* Free a register, saving it in a temporary spill slot */
778
0
          tmp_spill_slot = IR_MEM_BO(IR_REG_STACK_POINTER, -16);
779
0
          ir_emit_store_mem(ctx, type, tmp_spill_slot, tmp);
780
0
        }
781
0
        ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
782
0
      } else {
783
0
        ir_emit_dessa_move(ctx, type, to, r, IR_REG_NONE, IR_REG_NONE);
784
0
      }
785
0
      ir_bitset_excl(todo, to);
786
0
      loc[from] = to;
787
0
      to = from;
788
0
    } else {
789
0
      break;
790
0
    }
791
0
  }
792
793
0
  type = types[to];
794
0
  if (IR_MEM_VAL(tmp_spill_slot)) {
795
0
    ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot);
796
0
  }
797
0
  ir_emit_dessa_move(ctx, type, to, loc[from], IR_REG_NONE, IR_REG_NONE);
798
0
  ir_bitset_excl(todo, to);
799
0
  loc[from] = to;
800
0
}
801
802
static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
803
0
{
804
0
  int i;
805
0
  int32_t *pred, *loc, to, from;
806
0
  int8_t *types;
807
0
  ir_type type;
808
0
  uint32_t len;
809
0
  ir_bitset todo, ready, srcs, visited;
810
811
0
  if (count == 1) {
812
0
    to = copies[0].to;
813
0
    from = copies[0].from;
814
0
    IR_ASSERT(from != to);
815
0
    type = copies[0].type;
816
0
    ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
817
0
    return 1;
818
0
  }
819
820
0
  len = IR_REG_NUM + ctx->vregs_count + 1;
821
0
  todo = ir_bitset_malloc(len);
822
0
  srcs = ir_bitset_malloc(len);
823
0
  loc = ir_mem_malloc(len * 2 * sizeof(int32_t) + len * sizeof(int8_t));
824
0
  pred = loc + len;
825
0
  types = (int8_t*)(pred + len);
826
827
0
  for (i = 0; i < count; i++) {
828
0
    from = copies[i].from;
829
0
    to = copies[i].to;
830
0
    IR_ASSERT(from != to);
831
0
    if (!IR_IS_CONST_REF(from)) {
832
0
      ir_bitset_incl(srcs, from);
833
0
      loc[from] = from;
834
0
    }
835
0
    pred[to] = from;
836
0
    types[to] = copies[i].type;
837
0
    IR_ASSERT(!ir_bitset_in(todo, to));
838
0
    ir_bitset_incl(todo, to);
839
0
  }
840
841
  /* temporary registers can't be the same as some of the sources */
842
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_reg));
843
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_fp_reg));
844
845
  /* first we resolve all "windmill blades" - trees, that don't set temporary registers */
846
0
  ready = ir_bitset_malloc(len);
847
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
848
0
  ir_bitset_difference(ready, srcs, ir_bitset_len(len));
849
0
  if (tmp_reg != IR_REG_NONE) {
850
0
    ir_bitset_excl(ready, tmp_reg);
851
0
  }
852
0
  if (tmp_fp_reg != IR_REG_NONE) {
853
0
    ir_bitset_excl(ready, tmp_fp_reg);
854
0
  }
855
0
  while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
856
0
    ir_bitset_excl(todo, to);
857
0
    type = types[to];
858
0
    from = pred[to];
859
0
    if (IR_IS_CONST_REF(from)) {
860
0
      ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
861
0
    } else {
862
0
      int32_t r = loc[from];
863
0
      ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
864
0
      loc[from] = to;
865
0
      if (from == r && ir_bitset_in(todo, from) && from != tmp_reg && from != tmp_fp_reg) {
866
0
        ir_bitset_incl(ready, from);
867
0
      }
868
0
    }
869
0
  }
870
871
  /* then we resolve all "windmill axles" - cycles (this requres temporary registers) */
872
0
  visited = ir_bitset_malloc(len);
873
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
874
0
  ir_bitset_intersection(ready, srcs, ir_bitset_len(len));
875
0
  while ((to = ir_bitset_first(ready, ir_bitset_len(len))) >= 0) {
876
0
    ir_bitset_clear(visited, ir_bitset_len(len));
877
0
    ir_bitset_incl(visited, to);
878
0
    to = pred[to];
879
0
    while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) {
880
0
      to = pred[to];
881
0
      if (IR_IS_CONST_REF(to)) {
882
0
        break;
883
0
      } else if (ir_bitset_in(visited, to)) {
884
        /* We found a cycle. Resolve it. */
885
0
        ir_bitset_incl(visited, to);
886
0
        ir_dessa_resolve_cycle(ctx, pred, loc, types, todo, to, tmp_reg, tmp_fp_reg);
887
0
        break;
888
0
      }
889
0
      ir_bitset_incl(visited, to);
890
0
    }
891
0
    ir_bitset_difference(ready, visited, ir_bitset_len(len));
892
0
  }
893
894
  /* finally we resolve remaining "windmill blades" - trees that set temporary registers */
895
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
896
0
  ir_bitset_difference(ready, srcs, ir_bitset_len(len));
897
0
  while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
898
0
    ir_bitset_excl(todo, to);
899
0
    type = types[to];
900
0
    from = pred[to];
901
0
    if (IR_IS_CONST_REF(from)) {
902
0
      ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
903
0
    } else {
904
0
      int32_t r = loc[from];
905
0
      ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
906
0
      loc[from] = to;
907
0
      if (from == r && ir_bitset_in(todo, from)) {
908
0
        ir_bitset_incl(ready, from);
909
0
      }
910
0
    }
911
0
  }
912
913
0
  IR_ASSERT(ir_bitset_empty(todo, ir_bitset_len(len)));
914
915
0
  ir_mem_free(visited);
916
0
  ir_mem_free(ready);
917
0
  ir_mem_free(loc);
918
0
  ir_mem_free(srcs);
919
0
  ir_mem_free(todo);
920
0
  return 1;
921
0
}
922
923
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
924
0
{
925
0
  uint32_t succ, k, n = 0;
926
0
  ir_block *succ_bb;
927
0
  ir_use_list *use_list;
928
0
  ir_ref i, *p;
929
0
  ir_dessa_copy *copies;
930
0
  ir_reg tmp_reg = ctx->regs[bb->end][0];
931
0
  ir_reg tmp_fp_reg = ctx->regs[bb->end][1];
932
933
0
  IR_ASSERT(bb->successors_count == 1);
934
0
  succ = ctx->cfg_edges[bb->successors];
935
0
  succ_bb = &ctx->cfg_blocks[succ];
936
0
  IR_ASSERT(succ_bb->predecessors_count > 1);
937
0
  use_list = &ctx->use_lists[succ_bb->start];
938
0
  k = ir_phi_input_number(ctx, succ_bb, b);
939
940
0
  copies = alloca(use_list->count * sizeof(ir_dessa_copy));
941
942
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
943
0
    ir_ref ref = *p;
944
0
    ir_insn *insn = &ctx->ir_base[ref];
945
946
0
    if (insn->op == IR_PHI) {
947
0
      ir_ref input = ir_insn_op(insn, k);
948
0
      ir_reg src = ir_get_alocated_reg(ctx, ref, k);
949
0
      ir_reg dst = ctx->regs[ref][0];
950
0
      ir_ref from, to;
951
952
0
      IR_ASSERT(dst == IR_REG_NONE || !IR_REG_SPILLED(dst));
953
0
      if (IR_IS_CONST_REF(input)) {
954
0
        from = input;
955
0
      } else if (ir_rule(ctx, input) == IR_STATIC_ALLOCA) {
956
        /* encode local variable address */
957
0
        from = -(ctx->consts_count + input);
958
0
      } else {
959
0
        from = (src != IR_REG_NONE && !IR_REG_SPILLED(src)) ?
960
0
          (ir_ref)src : (ir_ref)(IR_REG_NUM + ctx->vregs[input]);
961
0
      }
962
0
      to = (dst != IR_REG_NONE) ?
963
0
        (ir_ref)dst : (ir_ref)(IR_REG_NUM + ctx->vregs[ref]);
964
0
      if (to != from) {
965
0
        if (to >= IR_REG_NUM
966
0
         && from >= IR_REG_NUM
967
0
         && IR_MEM_VAL(ir_vreg_spill_slot(ctx, from - IR_REG_NUM)) ==
968
0
            IR_MEM_VAL(ir_vreg_spill_slot(ctx, to - IR_REG_NUM))) {
969
          /* It's possible that different virtual registers share the same special spill slot */
970
          // TODO: See ext/opcache/tests/jit/gh11917.phpt failure on Linux 32-bit
971
0
          continue;
972
0
        }
973
0
        copies[n].type = insn->type;
974
0
        copies[n].from = from;
975
0
        copies[n].to = to;
976
0
        n++;
977
0
      }
978
0
    }
979
0
  }
980
981
0
  if (n > 0) {
982
0
    ir_dessa_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
983
0
  }
984
0
}
985
986
int ir_match(ir_ctx *ctx)
987
0
{
988
0
  uint32_t b;
989
0
  ir_ref start, ref, *prev_ref;
990
0
  ir_block *bb;
991
0
  ir_insn *insn;
992
0
  uint32_t entries_count = 0;
993
994
0
  ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
995
996
0
  prev_ref = ctx->prev_ref;
997
0
  if (!prev_ref) {
998
0
    ir_build_prev_refs(ctx);
999
0
    prev_ref = ctx->prev_ref;
1000
0
  }
1001
1002
0
  if (ctx->entries_count) {
1003
0
    ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
1004
0
  }
1005
1006
0
  for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
1007
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1008
0
    start = bb->start;
1009
0
    if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) {
1010
0
      IR_ASSERT(entries_count < ctx->entries_count);
1011
0
      insn = &ctx->ir_base[start];
1012
0
      IR_ASSERT(insn->op == IR_ENTRY);
1013
0
      insn->op3 = entries_count;
1014
0
      ctx->entries[entries_count] = b;
1015
0
      entries_count++;
1016
0
    }
1017
0
    ctx->rules[start] = IR_SKIPPED | IR_NOP;
1018
0
    ref = bb->end;
1019
0
    if (bb->successors_count == 1) {
1020
0
      insn = &ctx->ir_base[ref];
1021
0
      if (insn->op == IR_END || insn->op == IR_LOOP_END) {
1022
0
        ctx->rules[ref] = insn->op;
1023
0
        ref = prev_ref[ref];
1024
0
        if (ref == start && ctx->cfg_edges[bb->successors] != b) {
1025
0
          if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) {
1026
0
            bb->flags |= IR_BB_EMPTY;
1027
0
          } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) {
1028
0
            bb->flags |= IR_BB_EMPTY;
1029
0
            if (ctx->cfg_edges[bb->successors] == b + 1) {
1030
0
              (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
1031
0
            }
1032
0
          }
1033
0
          continue;
1034
0
        }
1035
0
      }
1036
0
    }
1037
1038
0
    ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */
1039
1040
0
    while (ref != start) {
1041
0
      uint32_t rule = ctx->rules[ref];
1042
1043
0
      if (!rule) {
1044
0
        ctx->rules[ref] = rule = ir_match_insn(ctx, ref);
1045
0
      }
1046
0
      ir_match_insn2(ctx, ref, rule);
1047
0
      ref = prev_ref[ref];
1048
0
    }
1049
0
  }
1050
1051
0
  if (ctx->entries_count) {
1052
0
    ctx->entries_count = entries_count;
1053
0
    if (!entries_count) {
1054
0
      ir_mem_free(ctx->entries);
1055
0
      ctx->entries = NULL;
1056
0
    }
1057
0
  }
1058
1059
0
  return 1;
1060
0
}
1061
1062
int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
1063
0
{
1064
0
  int32_t offset;
1065
1066
0
  IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
1067
0
  offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
1068
0
  IR_ASSERT(offset != -1);
1069
0
  return IR_SPILL_POS_TO_OFFSET(offset);
1070
0
}