Coverage Report

Created: 2025-09-27 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_emit.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (Native code generator based on DynAsm)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 */
7
8
#include "ir.h"
9
10
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
11
# include "ir_x86.h"
12
#elif defined(IR_TARGET_AARCH64)
13
# include "ir_aarch64.h"
14
#else
15
# error "Unknown IR target"
16
#endif
17
18
#include "ir_private.h"
19
#ifndef _WIN32
20
# include <dlfcn.h>
21
#else
22
# define WIN32_LEAN_AND_MEAN
23
# include <windows.h>
24
# include <psapi.h>
25
#endif
26
27
#if defined(__linux__) || defined(__sun)
28
# include <alloca.h>
29
#endif
30
31
#define DASM_M_GROW(ctx, t, p, sz, need) \
32
0
  do { \
33
0
    size_t _sz = (sz), _need = (need); \
34
0
    if (_sz < _need) { \
35
0
      if (_sz < 16) _sz = 16; \
36
0
      while (_sz < _need) _sz += _sz; \
37
0
      (p) = (t *)ir_mem_realloc((p), _sz); \
38
0
      (sz) = _sz; \
39
0
    } \
40
0
  } while(0)
41
42
0
#define DASM_M_FREE(ctx, p, sz) ir_mem_free(p)
43
44
#ifdef IR_DEBUG
45
# define DASM_CHECKS
46
#endif
47
48
typedef struct _ir_copy {
49
  ir_type type;
50
  ir_reg  from;
51
  ir_reg  to;
52
} ir_copy;
53
54
typedef struct _ir_dessa_copy {
55
  ir_type type;
56
  int32_t from; /* negative - constant ref, [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
57
  int32_t to;   /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg  */
58
} ir_dessa_copy;
59
60
#if IR_REG_INT_ARGS
61
static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS];
62
#else
63
static const int8_t *_ir_int_reg_params;
64
#endif
65
#if IR_REG_FP_ARGS
66
static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS];
67
#else
68
static const int8_t *_ir_fp_reg_params;
69
#endif
70
71
static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn)
72
0
{
73
0
  if (IR_IS_CONST_REF(insn->op2)) {
74
0
    const ir_insn *func = &ctx->ir_base[insn->op2];
75
76
0
    if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
77
0
      if (func->proto) {
78
0
        return (const ir_proto_t *)ir_get_str(ctx, func->proto);
79
0
      }
80
0
    }
81
0
  } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
82
0
    return (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
83
0
  }
84
0
  return NULL;
85
0
}
86
87
#ifdef IR_HAVE_FASTCALL
88
static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS];
89
static const int8_t *_ir_fp_fc_reg_params;
90
91
bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
92
{
93
  if (sizeof(void*) == 4) {
94
    if (IR_IS_CONST_REF(insn->op2)) {
95
      const ir_insn *func = &ctx->ir_base[insn->op2];
96
97
      if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
98
        if (func->proto) {
99
          const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto);
100
101
          return (proto->flags & IR_FASTCALL_FUNC) != 0;
102
        }
103
      }
104
    } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
105
      const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
106
107
      return (proto->flags & IR_FASTCALL_FUNC) != 0;
108
    }
109
    return 0;
110
  }
111
  return 0;
112
}
113
#else
114
bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
115
0
{
116
0
  return 0;
117
0
}
118
#endif
119
120
bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn)
121
0
{
122
0
  const ir_proto_t *proto = ir_call_proto(ctx, insn);
123
124
0
  if (proto) {
125
0
    return (proto->flags & IR_VARARG_FUNC) != 0;
126
0
  }
127
0
  return 0;
128
0
}
129
130
IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref)
131
0
{
132
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
133
0
  return ctx->rules[ref];
134
0
}
135
136
IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref)
137
0
{
138
0
  return ref > ctx->bb_start;
139
0
}
140
141
142
static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
143
0
{
144
0
  ir_use_list *use_list = &ctx->use_lists[1];
145
0
  int i;
146
0
  ir_ref use, *p;
147
0
  ir_insn *insn;
148
0
  int int_param = 0;
149
0
  int fp_param = 0;
150
0
  int int_reg_params_count = IR_REG_INT_ARGS;
151
0
  int fp_reg_params_count = IR_REG_FP_ARGS;
152
0
  const int8_t *int_reg_params = _ir_int_reg_params;
153
0
  const int8_t *fp_reg_params = _ir_fp_reg_params;
154
155
#ifdef IR_HAVE_FASTCALL
156
  if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
157
    int_reg_params_count = IR_REG_INT_FCARGS;
158
    fp_reg_params_count = IR_REG_FP_FCARGS;
159
    int_reg_params = _ir_int_fc_reg_params;
160
    fp_reg_params = _ir_fp_fc_reg_params;
161
  }
162
#endif
163
164
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
165
0
    use = *p;
166
0
    insn = &ctx->ir_base[use];
167
0
    if (insn->op == IR_PARAM) {
168
0
      if (IR_IS_TYPE_INT(insn->type)) {
169
0
        if (use == ref) {
170
0
#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86)
171
0
          if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
172
            /* struct passed by value on stack */
173
0
            return IR_REG_NONE;
174
0
          } else
175
0
#endif
176
0
          if (int_param < int_reg_params_count) {
177
0
            return int_reg_params[int_param];
178
0
          } else {
179
0
            return IR_REG_NONE;
180
0
          }
181
0
#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86)
182
0
        } else {
183
0
          if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) {
184
            /* struct passed by value on stack */
185
0
            continue;
186
0
          }
187
0
#endif
188
0
        }
189
0
        int_param++;
190
#ifdef _WIN64
191
        /* WIN64 calling convention use common couter for int and fp registers */
192
        fp_param++;
193
#endif
194
0
      } else {
195
0
        IR_ASSERT(IR_IS_TYPE_FP(insn->type));
196
0
        if (use == ref) {
197
0
          if (fp_param < fp_reg_params_count) {
198
0
            return fp_reg_params[fp_param];
199
0
          } else {
200
0
            return IR_REG_NONE;
201
0
          }
202
0
        }
203
0
        fp_param++;
204
#ifdef _WIN64
205
        /* WIN64 calling convention use common couter for int and fp registers */
206
        int_param++;
207
#endif
208
0
      }
209
0
    }
210
0
  }
211
0
  return IR_REG_NONE;
212
0
}
213
214
static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs)
215
0
{
216
0
  int j, n;
217
0
  ir_type type;
218
0
  int int_param = 0;
219
0
  int fp_param = 0;
220
0
  int count = 0;
221
0
  int int_reg_params_count = IR_REG_INT_ARGS;
222
0
  int fp_reg_params_count = IR_REG_FP_ARGS;
223
0
  const int8_t *int_reg_params = _ir_int_reg_params;
224
0
  const int8_t *fp_reg_params = _ir_fp_reg_params;
225
226
#ifdef IR_HAVE_FASTCALL
227
  if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
228
    int_reg_params_count = IR_REG_INT_FCARGS;
229
    fp_reg_params_count = IR_REG_FP_FCARGS;
230
    int_reg_params = _ir_int_fc_reg_params;
231
    fp_reg_params = _ir_fp_fc_reg_params;
232
  }
233
#endif
234
235
0
  n = insn->inputs_count;
236
0
  n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
237
0
  for (j = 3; j <= n; j++) {
238
0
    ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)];
239
0
    type = arg->type;
240
0
    if (IR_IS_TYPE_INT(type)) {
241
0
      if (arg->op == IR_ARGVAL) {
242
0
        continue;
243
0
      } else if (int_param < int_reg_params_count) {
244
0
        regs[j] = int_reg_params[int_param];
245
0
        count = j + 1;
246
0
      } else {
247
0
        regs[j] = IR_REG_NONE;
248
0
      }
249
0
      int_param++;
250
#ifdef _WIN64
251
      /* WIN64 calling convention use common couter for int and fp registers */
252
      fp_param++;
253
#endif
254
0
    } else {
255
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
256
0
      if (fp_param < fp_reg_params_count) {
257
0
        regs[j] = fp_reg_params[fp_param];
258
0
        count = j + 1;
259
0
      } else {
260
0
        regs[j] = IR_REG_NONE;
261
0
      }
262
0
      fp_param++;
263
#ifdef _WIN64
264
      /* WIN64 calling convention use common couter for int and fp registers */
265
      int_param++;
266
#endif
267
0
    }
268
0
  }
269
0
  return count;
270
0
}
271
272
static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset)
273
0
{
274
0
  ir_live_interval *ival1;
275
0
  int32_t o1;
276
277
0
  if (IR_IS_CONST_REF(r1)) {
278
0
    return 0;
279
0
  }
280
281
0
  IR_ASSERT(ctx->vregs[r1]);
282
0
  ival1 = ctx->live_intervals[ctx->vregs[r1]];
283
0
  IR_ASSERT(ival1);
284
0
  o1 = ival1->stack_spill_pos;
285
0
  IR_ASSERT(o1 != -1);
286
0
  return o1 == offset;
287
0
}
288
289
void *ir_resolve_sym_name(const char *name)
290
0
{
291
0
  void *addr;
292
293
0
#ifndef _WIN32
294
0
  void *handle = NULL;
295
0
# ifdef RTLD_DEFAULT
296
0
  handle = RTLD_DEFAULT;
297
0
# endif
298
0
  addr = dlsym(handle, name);
299
#else
300
  HMODULE mods[256];
301
  DWORD cbNeeded;
302
  uint32_t i = 0;
303
304
  addr = NULL;
305
306
  EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded);
307
308
  while(i < (cbNeeded / sizeof(HMODULE))) {
309
    addr = GetProcAddress(mods[i], name);
310
    if (addr) {
311
      return addr;
312
    }
313
    i++;
314
  }
315
#endif
316
0
  return addr;
317
0
}
318
319
#ifdef IR_SNAPSHOT_HANDLER_DCL
320
  IR_SNAPSHOT_HANDLER_DCL();
321
#endif
322
323
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
324
static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn)
325
0
{
326
0
  const char *name = ir_get_str(ctx, addr_insn->val.name);
327
0
  void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
328
0
    ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) :
329
0
    ir_resolve_sym_name(name);
330
331
0
  return addr;
332
0
}
333
#endif
334
335
static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn)
336
0
{
337
0
  const char *name = ir_get_str(ctx, addr_insn->val.name);
338
0
  void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
339
0
    ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) :
340
0
    ir_resolve_sym_name(name);
341
342
0
  IR_ASSERT(addr);
343
0
  return addr;
344
0
}
345
346
static void *ir_call_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
347
0
{
348
0
  void *addr;
349
350
0
  IR_ASSERT(addr_insn->type == IR_ADDR);
351
0
  if (addr_insn->op == IR_FUNC) {
352
0
    addr = ir_sym_val(ctx, addr_insn);
353
0
  } else {
354
0
    IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR);
355
0
    addr = (void*)addr_insn->val.addr;
356
0
  }
357
0
  return addr;
358
0
}
359
360
static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
361
0
{
362
0
  void *addr = ir_call_addr(ctx, insn, addr_insn);
363
364
0
#ifdef IR_SNAPSHOT_HANDLER
365
0
  if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) {
366
0
    addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr);
367
0
  }
368
0
#endif
369
0
  return addr;
370
0
}
371
372
static int8_t ir_get_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op)
373
0
{
374
0
  if (ctx->fused_regs) {
375
0
    char key[10];
376
0
    ir_ref val;
377
378
0
    memcpy(key, &root, sizeof(ir_ref));
379
0
    memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
380
381
0
    val = ir_strtab_find(ctx->fused_regs, key, 8);
382
0
    if (val) {
383
0
      return val;
384
0
    }
385
0
  }
386
0
  return ((int8_t*)ctx->regs)[ref_and_op];
387
0
}
388
389
#if defined(__GNUC__)
390
# pragma GCC diagnostic push
391
# pragma GCC diagnostic ignored "-Warray-bounds"
392
# pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
393
#endif
394
395
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
396
# include "dynasm/dasm_proto.h"
397
# include "dynasm/dasm_x86.h"
398
#elif defined(IR_TARGET_AARCH64)
399
# include "dynasm/dasm_proto.h"
400
static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset);
401
# define DASM_ADD_VENEER ir_add_veneer
402
# include "dynasm/dasm_arm64.h"
403
#else
404
# error "Unknown IR target"
405
#endif
406
407
#if defined(__GNUC__)
408
# pragma GCC diagnostic pop
409
#endif
410
411
/* Forward Declarations */
412
static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb);
413
static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg);
414
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb);
415
416
typedef struct _ir_common_backend_data {
417
    ir_reg_alloc_data  ra_data;
418
  uint32_t           dessa_from_block;
419
  dasm_State        *dasm_state;
420
  ir_bitset          emit_constants;
421
} ir_common_backend_data;
422
423
static int ir_const_label(ir_ctx *ctx, ir_ref ref)
424
0
{
425
0
  ir_common_backend_data *data = ctx->data;
426
0
  int label = ctx->cfg_blocks_count - ref;
427
428
0
  IR_ASSERT(IR_IS_CONST_REF(ref));
429
0
  ir_bitset_incl(data->emit_constants, -ref);
430
0
  return label;
431
0
}
432
433
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
434
# include <ir_emit_x86.h>
435
#elif defined(IR_TARGET_AARCH64)
436
# include <ir_emit_aarch64.h>
437
#else
438
# error "Unknown IR target"
439
#endif
440
441
static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb)
442
0
{
443
0
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
444
0
  int pos = 0, count, i;
445
0
  ir_ref ref;
446
447
0
  IR_ASSERT(ctx->binding);
448
0
  IR_ASSERT(list);
449
0
  while (1) {
450
0
    i = ir_list_at(list, pos);
451
0
    if (b == i) {
452
0
      break;
453
0
    }
454
0
    IR_ASSERT(i != 0); /* end marker */
455
0
    pos++;
456
0
    count = ir_list_at(list, pos);
457
0
    pos += count + 1;
458
0
  }
459
0
  pos++;
460
0
  count = ir_list_at(list, pos);
461
0
  pos++;
462
463
0
  for (i = 0; i < count; i++, pos++) {
464
0
    ref = ir_list_at(list, pos);
465
0
    IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
466
0
    if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) {
467
      /* not spilled */
468
0
      ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg;
469
0
      ir_type type = ctx->ir_base[ref].type;
470
0
      int32_t offset = -ir_binding_find(ctx, ref);
471
472
0
      IR_ASSERT(offset > 0);
473
0
      ir_emit_load_mem(ctx, type, reg, IR_MEM_BO(ctx->spill_base, offset));
474
0
    } else {
475
0
      IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL);
476
0
    }
477
0
  }
478
0
}
479
480
/*
481
 * Parallel copy sequentialization algorithm
482
 *
483
 * The implementation is based on algorithm 1 desriebed in
484
 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
485
 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
486
 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
487
 * pp. 114-125, doi: 10.1109/CGO.2009.19.
488
 */
489
static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
490
0
{
491
0
  int i;
492
0
  int8_t *pred, *loc, *types;
493
0
  ir_reg to, from;
494
0
  ir_type type;
495
0
  ir_regset todo, ready, srcs;
496
497
0
  if (count == 1) {
498
0
    to = copies[0].to;
499
0
    from = copies[0].from;
500
0
    IR_ASSERT(from != to);
501
0
    type = copies[0].type;
502
0
    if (IR_IS_TYPE_INT(type)) {
503
0
      ir_emit_mov(ctx, type, to, from);
504
0
    } else {
505
0
      ir_emit_fp_mov(ctx, type, to, from);
506
0
    }
507
0
    return 1;
508
0
  }
509
510
0
  loc = alloca(IR_REG_NUM * 3 * sizeof(int8_t));
511
0
  pred = loc + IR_REG_NUM;
512
0
  types = pred + IR_REG_NUM;
513
0
  todo = IR_REGSET_EMPTY;
514
0
  srcs = IR_REGSET_EMPTY;
515
516
0
  for (i = 0; i < count; i++) {
517
0
    from = copies[i].from;
518
0
    to = copies[i].to;
519
0
    IR_ASSERT(from != to);
520
0
    IR_REGSET_INCL(srcs, from);
521
0
    loc[from] = from;
522
0
    pred[to] = from;
523
0
    types[from] = copies[i].type;
524
0
    IR_ASSERT(!IR_REGSET_IN(todo, to));
525
0
    IR_REGSET_INCL(todo, to);
526
0
  }
527
528
0
  ready = IR_REGSET_DIFFERENCE(todo, srcs);
529
530
0
  if (ready == todo) {
531
0
    for (i = 0; i < count; i++) {
532
0
      from = copies[i].from;
533
0
      to = copies[i].to;
534
0
      IR_ASSERT(from != to);
535
0
      type = copies[i].type;
536
0
      if (IR_IS_TYPE_INT(type)) {
537
0
        ir_emit_mov(ctx, type, to, from);
538
0
      } else {
539
0
        ir_emit_fp_mov(ctx, type, to, from);
540
0
      }
541
0
    }
542
0
    return 1;
543
0
  }
544
545
  /* temporary registers can't be the same as some of the destinations */
546
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_reg));
547
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_fp_reg));
548
549
  /* first we resolve all "windmill blades" - trees (this doesn't requre temporary registers) */
550
0
  while (ready != IR_REGSET_EMPTY) {
551
0
    ir_reg r;
552
553
0
    to = ir_regset_pop_first(&ready);
554
0
    from = pred[to];
555
0
    r = loc[from];
556
0
    type = types[from];
557
0
    if (IR_IS_TYPE_INT(type)) {
558
0
      ir_emit_mov_ext(ctx, type, to, r);
559
0
    } else {
560
0
      ir_emit_fp_mov(ctx, type, to, r);
561
0
    }
562
0
    IR_REGSET_EXCL(todo, to);
563
0
    loc[from] = to;
564
0
    if (from == r && IR_REGSET_IN(todo, from)) {
565
0
      IR_REGSET_INCL(ready, from);
566
0
    }
567
0
  }
568
0
  if (todo == IR_REGSET_EMPTY) {
569
0
    return 1;
570
0
  }
571
572
  /* at this point the sources that are the same as temoraries are already moved */
573
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_reg) || pred[loc[tmp_reg]] == tmp_reg);
574
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_fp_reg) || pred[loc[tmp_fp_reg]] == tmp_fp_reg);
575
576
  /* now we resolve all "windmill axles" - cycles (this reuires temporary registers) */
577
0
  while (todo != IR_REGSET_EMPTY) {
578
0
    to = ir_regset_pop_first(&todo);
579
0
    from = pred[to];
580
0
    IR_ASSERT(to != loc[from]);
581
0
    type = types[from];
582
0
    if (IR_IS_TYPE_INT(type)) {
583
0
#ifdef IR_HAVE_SWAP_INT
584
0
      if (pred[from] == to) {
585
0
        if (ir_type_size[types[to]] > ir_type_size[type]) {
586
0
          type = types[to];
587
0
        }
588
0
        ir_emit_swap(ctx, type, to, from);
589
0
        IR_REGSET_EXCL(todo, from);
590
0
        loc[to] = from;
591
0
        loc[from] = to;
592
0
        continue;
593
0
      }
594
0
#endif
595
0
      IR_ASSERT(tmp_reg != IR_REG_NONE);
596
0
      IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
597
0
      ir_emit_mov(ctx, type, tmp_reg, to);
598
0
      loc[to] = tmp_reg;
599
0
    } else {
600
#ifdef IR_HAVE_SWAP_FP
601
      if (pred[from] == to && types[to] == type) {
602
        ir_emit_swap_fp(ctx, type, to, from);
603
        IR_REGSET_EXCL(todo, from);
604
        loc[to] = from;
605
        loc[from] = to;
606
        continue;
607
      }
608
#endif
609
0
      IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
610
0
      IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
611
0
      ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
612
0
      loc[to] = tmp_fp_reg;
613
0
    }
614
0
    while (1) {
615
0
      ir_reg r;
616
617
0
      from = pred[to];
618
0
      r = loc[from];
619
0
      type = types[from];
620
0
      if (IR_IS_TYPE_INT(type)) {
621
0
        ir_emit_mov_ext(ctx, type, to, r);
622
0
      } else {
623
0
        ir_emit_fp_mov(ctx, type, to, r);
624
0
      }
625
0
      IR_REGSET_EXCL(todo, to);
626
0
      loc[from] = to;
627
0
      if (from == r && IR_REGSET_IN(todo, from)) {
628
0
        to = from;
629
0
      } else {
630
0
        break;
631
0
      }
632
0
    }
633
0
  }
634
635
0
  return 1;
636
0
}
637
638
static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from, ir_reg tmp_reg, ir_reg tmp_fp_reg)
639
0
{
640
0
  ir_mem mem_from, mem_to;
641
642
0
  IR_ASSERT(from != to);
643
0
  if (to < IR_REG_NUM) {
644
0
    if (IR_IS_CONST_REF(from)) {
645
0
      if (-from < ctx->consts_count) {
646
        /* constant reference */
647
0
        ir_emit_load(ctx, type, to, from);
648
0
      } else {
649
        /* local variable address */
650
0
        ir_load_local_addr(ctx, to, -from - ctx->consts_count);
651
0
      }
652
0
    } else if (from < IR_REG_NUM) {
653
0
      if (IR_IS_TYPE_INT(type)) {
654
0
        ir_emit_mov(ctx, type, to, from);
655
0
      } else {
656
0
        ir_emit_fp_mov(ctx, type, to, from);
657
0
      }
658
0
    } else {
659
0
      mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
660
0
      ir_emit_load_mem(ctx, type, to, mem_from);
661
0
    }
662
0
  } else {
663
0
    mem_to = ir_vreg_spill_slot(ctx, to - IR_REG_NUM);
664
0
    if (IR_IS_CONST_REF(from)) {
665
0
      if (-from < ctx->consts_count) {
666
        /* constant reference */
667
0
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
668
0
        if (IR_IS_TYPE_INT(type)
669
0
         && !IR_IS_SYM_CONST(ctx->ir_base[from].op)
670
0
         && (ir_type_size[type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[from].val.i64))) {
671
0
          ir_emit_store_mem_imm(ctx, type, mem_to, ctx->ir_base[from].val.i32);
672
0
          return;
673
0
        }
674
0
#endif
675
0
        ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
676
0
        IR_ASSERT(tmp != IR_REG_NONE);
677
0
        ir_emit_load(ctx, type, tmp, from);
678
0
        ir_emit_store_mem(ctx, type, mem_to, tmp);
679
0
      } else {
680
        /* local variable address */
681
0
        IR_ASSERT(IR_IS_TYPE_INT(type));
682
0
        IR_ASSERT(tmp_reg != IR_REG_NONE);
683
0
        ir_load_local_addr(ctx, tmp_reg, -from - ctx->consts_count);
684
0
        ir_emit_store_mem(ctx, type, mem_to, tmp_reg);
685
0
      }
686
0
    } else if (from < IR_REG_NUM) {
687
0
      ir_emit_store_mem(ctx, type, mem_to, from);
688
0
    } else {
689
0
      mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
690
0
      IR_ASSERT(IR_MEM_VAL(mem_to) != IR_MEM_VAL(mem_from));
691
0
      ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
692
0
      IR_ASSERT(tmp != IR_REG_NONE);
693
0
      ir_emit_load_mem(ctx, type, tmp, mem_from);
694
0
      ir_emit_store_mem(ctx, type, mem_to, tmp);
695
0
    }
696
0
  }
697
0
}
698
699
IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, int8_t *types, ir_bitset todo, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg)
700
0
{
701
0
  ir_ref from;
702
0
  ir_mem tmp_spill_slot;
703
0
  ir_type type;
704
705
0
  IR_MEM_VAL(tmp_spill_slot) = 0;
706
0
  IR_ASSERT(!IR_IS_CONST_REF(to));
707
0
  from = pred[to];
708
0
  type = types[from];
709
0
  IR_ASSERT(!IR_IS_CONST_REF(from));
710
0
  IR_ASSERT(from != to);
711
0
  IR_ASSERT(loc[from] == from);
712
713
0
  if (IR_IS_TYPE_INT(type)) {
714
0
#ifdef IR_HAVE_SWAP_INT
715
0
    if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) {
716
      /* a simple cycle from 2 elements */
717
0
      if (ir_type_size[types[to]] > ir_type_size[type]) {
718
0
        type = types[to];
719
0
      }
720
0
      ir_emit_swap(ctx, type, to, from);
721
0
      ir_bitset_excl(todo, from);
722
0
      ir_bitset_excl(todo, to);
723
0
      loc[to] = from;
724
0
      loc[from] = to;
725
0
      return;
726
0
    }
727
0
#endif
728
0
    IR_ASSERT(tmp_reg != IR_REG_NONE);
729
0
    IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
730
0
    loc[to] = tmp_reg;
731
0
    if (to < IR_REG_NUM) {
732
0
      ir_emit_mov(ctx, type, tmp_reg, to);
733
0
    } else {
734
0
      ir_emit_load_mem_int(ctx, type, tmp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
735
0
    }
736
0
  } else {
737
#ifdef IR_HAVE_SWAP_FP
738
    if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM && types[to] == type) {
739
      /* a simple cycle from 2 elements */
740
      ir_emit_swap_fp(ctx, type, to, from);
741
      IR_REGSET_EXCL(todo, from);
742
      IR_REGSET_EXCL(todo, to);
743
      loc[to] = from;
744
      loc[from] = to;
745
      return;
746
    }
747
#endif
748
0
    IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
749
0
    IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
750
0
    loc[to] = tmp_fp_reg;
751
0
    if (to < IR_REG_NUM) {
752
0
      ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
753
0
    } else {
754
0
      ir_emit_load_mem_fp(ctx, type, tmp_fp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
755
0
    }
756
0
  }
757
758
0
  while (1) {
759
0
    int32_t r;
760
761
0
    from = pred[to];
762
0
    r = loc[from];
763
0
    type = types[to];
764
765
0
    if (from == r && ir_bitset_in(todo, from)) {
766
      /* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */
767
0
      if (to >= IR_REG_NUM && r >= IR_REG_NUM) {
768
0
        ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
769
770
0
        if (!IR_MEM_VAL(tmp_spill_slot)) {
771
          /* Free a register, saving it in a temporary spill slot */
772
0
          tmp_spill_slot = IR_MEM_BO(IR_REG_STACK_POINTER, -16);
773
0
          ir_emit_store_mem(ctx, type, tmp_spill_slot, tmp);
774
0
        }
775
0
        ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
776
0
      } else {
777
0
        ir_emit_dessa_move(ctx, type, to, r, IR_REG_NONE, IR_REG_NONE);
778
0
      }
779
0
      ir_bitset_excl(todo, to);
780
0
      loc[from] = to;
781
0
      to = from;
782
0
    } else {
783
0
      break;
784
0
    }
785
0
  }
786
787
0
  type = types[to];
788
0
  if (IR_MEM_VAL(tmp_spill_slot)) {
789
0
    ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot);
790
0
  }
791
0
  ir_emit_dessa_move(ctx, type, to, loc[from], IR_REG_NONE, IR_REG_NONE);
792
0
  ir_bitset_excl(todo, to);
793
0
  loc[from] = to;
794
0
}
795
796
static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
797
0
{
798
0
  int i;
799
0
  int32_t *pred, *loc, to, from;
800
0
  int8_t *types;
801
0
  ir_type type;
802
0
  uint32_t len;
803
0
  ir_bitset todo, ready, srcs, visited;
804
805
0
  if (count == 1) {
806
0
    to = copies[0].to;
807
0
    from = copies[0].from;
808
0
    IR_ASSERT(from != to);
809
0
    type = copies[0].type;
810
0
    ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
811
0
    return 1;
812
0
  }
813
814
0
  len = IR_REG_NUM + ctx->vregs_count + 1;
815
0
  todo = ir_bitset_malloc(len);
816
0
  srcs = ir_bitset_malloc(len);
817
0
  loc = ir_mem_malloc(len * 2 * sizeof(int32_t) + len * sizeof(int8_t));
818
0
  pred = loc + len;
819
0
  types = (int8_t*)(pred + len);
820
821
0
  for (i = 0; i < count; i++) {
822
0
    from = copies[i].from;
823
0
    to = copies[i].to;
824
0
    IR_ASSERT(from != to);
825
0
    if (!IR_IS_CONST_REF(from)) {
826
0
      ir_bitset_incl(srcs, from);
827
0
      loc[from] = from;
828
0
    }
829
0
    pred[to] = from;
830
0
    types[to] = copies[i].type;
831
0
    IR_ASSERT(!ir_bitset_in(todo, to));
832
0
    ir_bitset_incl(todo, to);
833
0
  }
834
835
  /* temporary registers can't be the same as some of the sources */
836
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_reg));
837
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_fp_reg));
838
839
  /* first we resolve all "windmill blades" - trees, that don't set temporary registers */
840
0
  ready = ir_bitset_malloc(len);
841
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
842
0
  ir_bitset_difference(ready, srcs, ir_bitset_len(len));
843
0
  if (tmp_reg != IR_REG_NONE) {
844
0
    ir_bitset_excl(ready, tmp_reg);
845
0
  }
846
0
  if (tmp_fp_reg != IR_REG_NONE) {
847
0
    ir_bitset_excl(ready, tmp_fp_reg);
848
0
  }
849
0
  while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
850
0
    ir_bitset_excl(todo, to);
851
0
    type = types[to];
852
0
    from = pred[to];
853
0
    if (IR_IS_CONST_REF(from)) {
854
0
      ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
855
0
    } else {
856
0
      int32_t r = loc[from];
857
0
      ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
858
0
      loc[from] = to;
859
0
      if (from == r && ir_bitset_in(todo, from) && from != tmp_reg && from != tmp_fp_reg) {
860
0
        ir_bitset_incl(ready, from);
861
0
      }
862
0
    }
863
0
  }
864
865
  /* then we resolve all "windmill axles" - cycles (this requres temporary registers) */
866
0
  visited = ir_bitset_malloc(len);
867
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
868
0
  ir_bitset_intersection(ready, srcs, ir_bitset_len(len));
869
0
  while ((to = ir_bitset_first(ready, ir_bitset_len(len))) >= 0) {
870
0
    ir_bitset_clear(visited, ir_bitset_len(len));
871
0
    ir_bitset_incl(visited, to);
872
0
    to = pred[to];
873
0
    while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) {
874
0
      to = pred[to];
875
0
      if (IR_IS_CONST_REF(to)) {
876
0
        break;
877
0
      } else if (ir_bitset_in(visited, to)) {
878
        /* We found a cycle. Resolve it. */
879
0
        ir_bitset_incl(visited, to);
880
0
        ir_dessa_resolve_cycle(ctx, pred, loc, types, todo, to, tmp_reg, tmp_fp_reg);
881
0
        break;
882
0
      }
883
0
      ir_bitset_incl(visited, to);
884
0
    }
885
0
    ir_bitset_difference(ready, visited, ir_bitset_len(len));
886
0
  }
887
888
  /* finally we resolve remaining "windmill blades" - trees that set temporary registers */
889
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
890
0
  ir_bitset_difference(ready, srcs, ir_bitset_len(len));
891
0
  while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
892
0
    ir_bitset_excl(todo, to);
893
0
    type = types[to];
894
0
    from = pred[to];
895
0
    if (IR_IS_CONST_REF(from)) {
896
0
      ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
897
0
    } else {
898
0
      int32_t r = loc[from];
899
0
      ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
900
0
      loc[from] = to;
901
0
      if (from == r && ir_bitset_in(todo, from)) {
902
0
        ir_bitset_incl(ready, from);
903
0
      }
904
0
    }
905
0
  }
906
907
0
  IR_ASSERT(ir_bitset_empty(todo, ir_bitset_len(len)));
908
909
0
  ir_mem_free(visited);
910
0
  ir_mem_free(ready);
911
0
  ir_mem_free(loc);
912
0
  ir_mem_free(srcs);
913
0
  ir_mem_free(todo);
914
0
  return 1;
915
0
}
916
917
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
918
0
{
919
0
  uint32_t succ, k, n = 0;
920
0
  ir_block *succ_bb;
921
0
  ir_use_list *use_list;
922
0
  ir_ref i, *p;
923
0
  ir_dessa_copy *copies;
924
0
  ir_reg tmp_reg = ctx->regs[bb->end][0];
925
0
  ir_reg tmp_fp_reg = ctx->regs[bb->end][1];
926
927
0
  IR_ASSERT(bb->successors_count == 1);
928
0
  succ = ctx->cfg_edges[bb->successors];
929
0
  succ_bb = &ctx->cfg_blocks[succ];
930
0
  IR_ASSERT(succ_bb->predecessors_count > 1);
931
0
  use_list = &ctx->use_lists[succ_bb->start];
932
0
  k = ir_phi_input_number(ctx, succ_bb, b);
933
934
0
  copies = alloca(use_list->count * sizeof(ir_dessa_copy));
935
936
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
937
0
    ir_ref ref = *p;
938
0
    ir_insn *insn = &ctx->ir_base[ref];
939
940
0
    if (insn->op == IR_PHI) {
941
0
      ir_ref input = ir_insn_op(insn, k);
942
0
      ir_reg src = ir_get_alocated_reg(ctx, ref, k);
943
0
      ir_reg dst = ctx->regs[ref][0];
944
0
      ir_ref from, to;
945
946
0
      IR_ASSERT(dst == IR_REG_NONE || !IR_REG_SPILLED(dst));
947
0
      if (IR_IS_CONST_REF(input)) {
948
0
        from = input;
949
0
      } else if (ir_rule(ctx, input) == IR_STATIC_ALLOCA) {
950
        /* encode local variable address */
951
0
        from = -(ctx->consts_count + input);
952
0
      } else {
953
0
        from = (src != IR_REG_NONE && !IR_REG_SPILLED(src)) ?
954
0
          (ir_ref)src : (ir_ref)(IR_REG_NUM + ctx->vregs[input]);
955
0
      }
956
0
      to = (dst != IR_REG_NONE) ?
957
0
        (ir_ref)dst : (ir_ref)(IR_REG_NUM + ctx->vregs[ref]);
958
0
      if (to != from) {
959
0
        if (to >= IR_REG_NUM
960
0
         && from >= IR_REG_NUM
961
0
         && IR_MEM_VAL(ir_vreg_spill_slot(ctx, from - IR_REG_NUM)) ==
962
0
            IR_MEM_VAL(ir_vreg_spill_slot(ctx, to - IR_REG_NUM))) {
963
          /* It's possible that different virtual registers share the same special spill slot */
964
          // TODO: See ext/opcache/tests/jit/gh11917.phpt failure on Linux 32-bit
965
0
          continue;
966
0
        }
967
0
        copies[n].type = insn->type;
968
0
        copies[n].from = from;
969
0
        copies[n].to = to;
970
0
        n++;
971
0
      }
972
0
    }
973
0
  }
974
975
0
  if (n > 0) {
976
0
    ir_dessa_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
977
0
  }
978
0
}
979
980
int ir_match(ir_ctx *ctx)
981
0
{
982
0
  uint32_t b;
983
0
  ir_ref start, ref, *prev_ref;
984
0
  ir_block *bb;
985
0
  ir_insn *insn;
986
0
  uint32_t entries_count = 0;
987
988
0
  ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
989
990
0
  prev_ref = ctx->prev_ref;
991
0
  if (!prev_ref) {
992
0
    ir_build_prev_refs(ctx);
993
0
    prev_ref = ctx->prev_ref;
994
0
  }
995
996
0
  if (ctx->entries_count) {
997
0
    ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
998
0
  }
999
1000
0
  for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
1001
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1002
0
    start = bb->start;
1003
0
    if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) {
1004
0
      IR_ASSERT(entries_count < ctx->entries_count);
1005
0
      insn = &ctx->ir_base[start];
1006
0
      IR_ASSERT(insn->op == IR_ENTRY);
1007
0
      insn->op3 = entries_count;
1008
0
      ctx->entries[entries_count] = b;
1009
0
      entries_count++;
1010
0
    }
1011
0
    ctx->rules[start] = IR_SKIPPED | IR_NOP;
1012
0
    ref = bb->end;
1013
0
    if (bb->successors_count == 1) {
1014
0
      insn = &ctx->ir_base[ref];
1015
0
      if (insn->op == IR_END || insn->op == IR_LOOP_END) {
1016
0
        ctx->rules[ref] = insn->op;
1017
0
        ref = prev_ref[ref];
1018
0
        if (ref == start && ctx->cfg_edges[bb->successors] != b) {
1019
0
          if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) {
1020
0
            bb->flags |= IR_BB_EMPTY;
1021
0
          } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) {
1022
0
            bb->flags |= IR_BB_EMPTY;
1023
0
            if (ctx->cfg_edges[bb->successors] == b + 1) {
1024
0
              (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
1025
0
            }
1026
0
          }
1027
0
          continue;
1028
0
        }
1029
0
      }
1030
0
    }
1031
1032
0
    ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */
1033
1034
0
    while (ref != start) {
1035
0
      uint32_t rule = ctx->rules[ref];
1036
1037
0
      if (!rule) {
1038
0
        ctx->rules[ref] = rule = ir_match_insn(ctx, ref);
1039
0
      }
1040
0
      ir_match_insn2(ctx, ref, rule);
1041
0
      ref = prev_ref[ref];
1042
0
    }
1043
0
  }
1044
1045
0
  if (ctx->entries_count) {
1046
0
    ctx->entries_count = entries_count;
1047
0
    if (!entries_count) {
1048
0
      ir_mem_free(ctx->entries);
1049
0
      ctx->entries = NULL;
1050
0
    }
1051
0
  }
1052
1053
0
  return 1;
1054
0
}
1055
1056
int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
1057
0
{
1058
0
  int32_t offset;
1059
1060
0
  IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
1061
0
  offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
1062
0
  IR_ASSERT(offset != -1);
1063
0
  return IR_SPILL_POS_TO_OFFSET(offset);
1064
0
}