Coverage Report

Created: 2026-04-01 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_emit.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (Native code generator based on DynAsm)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 */
7
8
#ifndef _GNU_SOURCE
9
# define _GNU_SOURCE
10
#endif
11
12
#include "ir.h"
13
14
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
15
# include "ir_x86.h"
16
#elif defined(IR_TARGET_AARCH64)
17
# include "ir_aarch64.h"
18
#else
19
# error "Unknown IR target"
20
#endif
21
22
#include "ir_private.h"
23
#ifndef _WIN32
24
# include <dlfcn.h>
25
#else
26
# define WIN32_LEAN_AND_MEAN
27
# include <windows.h>
28
# include <psapi.h>
29
#endif
30
31
#if defined(__linux__) || defined(__sun)
32
# include <alloca.h>
33
#endif
34
35
#define DASM_M_GROW(ctx, t, p, sz, need) \
36
0
  do { \
37
0
    size_t _sz = (sz), _need = (need); \
38
0
    if (_sz < _need) { \
39
0
      size_t _limit = sizeof(t) * DASM_SEC2POS(1); \
40
0
      if (_need > _limit) { \
41
0
        Dst_REF->status = DASM_S_NOMEM; \
42
0
        return; \
43
0
      } \
44
0
      if (_sz < 16) _sz = 16; \
45
0
      while (_sz < _need) _sz += _sz; \
46
0
      if (_sz > _limit) _sz = _limit; \
47
0
      (p) = (t *)ir_mem_realloc((p), _sz); \
48
0
      (sz) = _sz; \
49
0
    } \
50
0
  } while(0)
51
52
0
#define DASM_M_FREE(ctx, p, sz) ir_mem_free(p)
53
54
#ifdef IR_DEBUG
55
# define DASM_CHECKS
56
#endif
57
58
typedef struct _ir_copy {
59
  ir_type type;
60
  ir_reg  from;
61
  ir_reg  to;
62
} ir_copy;
63
64
typedef struct _ir_dessa_copy {
65
  ir_type type;
66
  int32_t from; /* negative - constant ref, [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
67
  int32_t to;   /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg  */
68
} ir_dessa_copy;
69
70
const ir_proto_t *ir_call_proto(const ir_ctx *ctx, const ir_insn *insn)
71
0
{
72
0
  if (IR_IS_CONST_REF(insn->op2)) {
73
0
    const ir_insn *func = &ctx->ir_base[insn->op2];
74
75
0
    if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
76
0
      if (func->proto) {
77
0
        return (const ir_proto_t *)ir_get_str(ctx, func->proto);
78
0
      }
79
0
    }
80
0
  } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
81
0
    return (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
82
0
  }
83
0
  return NULL;
84
0
}
85
86
IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref)
87
0
{
88
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
89
0
  return ctx->rules[ref];
90
0
}
91
92
IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref)
93
0
{
94
0
  return ref > ctx->bb_start;
95
0
}
96
97
98
static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
99
0
{
100
0
  ir_use_list *use_list = &ctx->use_lists[1];
101
0
  int i;
102
0
  ir_ref use, *p;
103
0
  ir_insn *insn;
104
0
  int int_param = 0;
105
0
  int fp_param = 0;
106
0
  const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(ctx->flags);
107
108
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
109
0
    use = *p;
110
0
    insn = &ctx->ir_base[use];
111
0
    if (insn->op == IR_PARAM) {
112
0
      if (IR_IS_TYPE_INT(insn->type)) {
113
0
        if (use == ref) {
114
0
          if (ctx->value_params && ctx->value_params[insn->op3 - 1].align && cc->pass_struct_by_val) {
115
            /* struct passed by value on stack */
116
0
            return IR_REG_NONE;
117
0
          } else if (int_param < cc->int_param_regs_count) {
118
0
            return cc->int_param_regs[int_param];
119
0
          } else {
120
0
            return IR_REG_NONE;
121
0
          }
122
0
        } else if (ctx->value_params && ctx->value_params[insn->op3 - 1].align && cc->pass_struct_by_val) {
123
          /* struct passed by value on stack */
124
0
          continue;
125
0
        }
126
0
        int_param++;
127
0
        if (cc->shadow_param_regs) {
128
0
          fp_param++;
129
0
        }
130
0
      } else {
131
0
        IR_ASSERT(IR_IS_TYPE_FP(insn->type));
132
0
        if (use == ref) {
133
0
          if (fp_param < cc->fp_param_regs_count) {
134
0
            return cc->fp_param_regs[fp_param];
135
0
          } else {
136
0
            return IR_REG_NONE;
137
0
          }
138
0
        }
139
0
        fp_param++;
140
0
        if (cc->shadow_param_regs) {
141
0
          int_param++;
142
0
        }
143
0
      }
144
0
    }
145
0
  }
146
0
  return IR_REG_NONE;
147
0
}
148
149
static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, const ir_call_conv_dsc *cc, int8_t *regs)
150
0
{
151
0
  int j, n;
152
0
  ir_type type;
153
0
  int int_param = 0;
154
0
  int fp_param = 0;
155
0
  int count = 0;
156
157
0
  n = insn->inputs_count;
158
0
  n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
159
0
  for (j = 3; j <= n; j++) {
160
0
    ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)];
161
0
    type = arg->type;
162
0
    if (IR_IS_TYPE_INT(type)) {
163
0
      if (int_param < cc->int_param_regs_count && arg->op != IR_ARGVAL) {
164
0
        regs[j] = cc->int_param_regs[int_param];
165
0
        count = j + 1;
166
0
        int_param++;
167
0
        if (cc->shadow_param_regs) {
168
0
          fp_param++;
169
0
        }
170
0
      } else {
171
0
        regs[j] = IR_REG_NONE;
172
0
      }
173
0
    } else {
174
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
175
0
      if (fp_param < cc->fp_param_regs_count) {
176
0
        regs[j] = cc->fp_param_regs[fp_param];
177
0
        count = j + 1;
178
0
        fp_param++;
179
0
        if (cc->shadow_param_regs) {
180
0
          int_param++;
181
0
        }
182
0
      } else {
183
0
        regs[j] = IR_REG_NONE;
184
0
      }
185
0
    }
186
0
  }
187
0
  return count;
188
0
}
189
190
static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset)
191
0
{
192
0
  ir_live_interval *ival1;
193
0
  int32_t o1;
194
195
0
  if (IR_IS_CONST_REF(r1)) {
196
0
    return 0;
197
0
  }
198
199
0
  IR_ASSERT(ctx->vregs[r1]);
200
0
  ival1 = ctx->live_intervals[ctx->vregs[r1]];
201
0
  IR_ASSERT(ival1);
202
0
  o1 = ival1->stack_spill_pos;
203
0
  IR_ASSERT(o1 != -1);
204
0
  return o1 == offset;
205
0
}
206
207
void *ir_resolve_sym_name(const char *name)
208
0
{
209
0
  void *addr;
210
211
0
#ifndef _WIN32
212
0
  void *handle = NULL;
213
0
# ifdef RTLD_DEFAULT
214
0
  handle = RTLD_DEFAULT;
215
0
# endif
216
0
  addr = dlsym(handle, name);
217
#else
218
  HMODULE mods[256];
219
  DWORD cbNeeded;
220
  uint32_t i = 0;
221
222
  addr = NULL;
223
224
  EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded);
225
226
  while(i < (cbNeeded / sizeof(HMODULE))) {
227
    addr = GetProcAddress(mods[i], name);
228
    if (addr) {
229
      return addr;
230
    }
231
    i++;
232
  }
233
#endif
234
0
  return addr;
235
0
}
236
237
#ifdef IR_SNAPSHOT_HANDLER_DCL
238
  IR_SNAPSHOT_HANDLER_DCL();
239
#endif
240
241
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
242
static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn)
243
0
{
244
0
  const char *name = ir_get_str(ctx, addr_insn->val.name);
245
0
  void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
246
0
    ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) :
247
0
    ir_resolve_sym_name(name);
248
249
0
  return addr;
250
0
}
251
#endif
252
253
static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn)
254
0
{
255
0
  const char *name = ir_get_str(ctx, addr_insn->val.name);
256
0
  void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
257
0
    ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) :
258
0
    ir_resolve_sym_name(name);
259
260
0
  IR_ASSERT(addr);
261
0
  return addr;
262
0
}
263
264
static void *ir_call_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
265
0
{
266
0
  void *addr;
267
268
0
  IR_ASSERT(addr_insn->type == IR_ADDR);
269
0
  if (addr_insn->op == IR_FUNC) {
270
0
    addr = ir_sym_val(ctx, addr_insn);
271
0
  } else {
272
0
    IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR);
273
0
    addr = (void*)addr_insn->val.addr;
274
0
  }
275
0
  return addr;
276
0
}
277
278
static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
279
0
{
280
0
  void *addr = ir_call_addr(ctx, insn, addr_insn);
281
282
0
#ifdef IR_SNAPSHOT_HANDLER
283
0
  if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) {
284
0
    addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr);
285
0
  }
286
0
#endif
287
0
  return addr;
288
0
}
289
290
static int8_t ir_get_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op)
291
0
{
292
0
  if (ctx->fused_regs) {
293
0
    char key[10];
294
0
    ir_ref val;
295
296
0
    memcpy(key, &root, sizeof(ir_ref));
297
0
    memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
298
299
0
    val = ir_strtab_find(ctx->fused_regs, key, 8);
300
0
    if (val) {
301
0
      return val;
302
0
    }
303
0
  }
304
0
  return ((int8_t*)ctx->regs)[ref_and_op];
305
0
}
306
307
#if defined(__GNUC__)
308
# pragma GCC diagnostic push
309
# pragma GCC diagnostic ignored "-Warray-bounds"
310
# pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
311
#endif
312
313
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
314
# include "dynasm/dasm_proto.h"
315
# include "dynasm/dasm_x86.h"
316
#elif defined(IR_TARGET_AARCH64)
317
# include "dynasm/dasm_proto.h"
318
static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset);
319
# define DASM_ADD_VENEER ir_add_veneer
320
# include "dynasm/dasm_arm64.h"
321
#else
322
# error "Unknown IR target"
323
#endif
324
325
#if defined(__GNUC__)
326
# pragma GCC diagnostic pop
327
#endif
328
329
/* Forward Declarations */
330
static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb);
331
static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg);
332
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb);
333
334
typedef struct _ir_common_backend_data {
335
    ir_reg_alloc_data  ra_data;
336
  dasm_State        *dasm_state;
337
  ir_bitset          emit_constants;
338
} ir_common_backend_data;
339
340
static int ir_get_const_label(ir_ctx *ctx, ir_ref ref)
341
0
{
342
0
  ir_common_backend_data *data = ctx->data;
343
0
  int label = ctx->cfg_blocks_count - ref;
344
345
0
  IR_ASSERT(IR_IS_CONST_REF(ref));
346
0
  ir_bitset_incl(data->emit_constants, -ref);
347
0
  return label;
348
0
}
349
350
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
351
# include <ir_emit_x86.h>
352
#elif defined(IR_TARGET_AARCH64)
353
# include <ir_emit_aarch64.h>
354
#else
355
# error "Unknown IR target"
356
#endif
357
358
static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb)
359
0
{
360
0
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
361
0
  int pos = 0, count, i;
362
0
  ir_ref ref;
363
364
0
  IR_ASSERT(ctx->binding);
365
0
  IR_ASSERT(list);
366
0
  while (1) {
367
0
    i = ir_list_at(list, pos);
368
0
    if (b == i) {
369
0
      break;
370
0
    }
371
0
    IR_ASSERT(i != 0); /* end marker */
372
0
    pos++;
373
0
    count = ir_list_at(list, pos);
374
0
    pos += count + 1;
375
0
  }
376
0
  pos++;
377
0
  count = ir_list_at(list, pos);
378
0
  pos++;
379
380
0
  for (i = 0; i < count; i++, pos++) {
381
0
    ref = ir_list_at(list, pos);
382
0
    IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
383
0
    if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) {
384
      /* not spilled */
385
0
      ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg;
386
0
      ir_type type = ctx->ir_base[ref].type;
387
0
      int32_t offset = -ir_binding_find(ctx, ref);
388
389
0
      IR_ASSERT(offset > 0);
390
0
      ir_emit_load_mem(ctx, type, reg, IR_MEM_BO(ctx->spill_base, offset));
391
0
    } else {
392
0
      IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL);
393
0
    }
394
0
  }
395
0
}
396
397
/*
398
 * Parallel copy sequentialization algorithm
399
 *
400
 * The implementation is based on algorithm 1 desriebed in
401
 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
402
 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
403
 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
404
 * pp. 114-125, doi: 10.1109/CGO.2009.19.
405
 */
406
static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
407
0
{
408
0
  int i;
409
0
  int8_t *pred, *loc, *types;
410
0
  ir_reg to, from;
411
0
  ir_type type;
412
0
  ir_regset todo, ready, srcs;
413
414
0
  if (count == 1) {
415
0
    to = copies[0].to;
416
0
    from = copies[0].from;
417
0
    IR_ASSERT(from != to);
418
0
    type = copies[0].type;
419
0
    if (IR_IS_TYPE_INT(type)) {
420
0
      ir_emit_mov(ctx, type, to, from);
421
0
    } else {
422
0
      ir_emit_fp_mov(ctx, type, to, from);
423
0
    }
424
0
    return 1;
425
0
  }
426
427
0
  loc = alloca(IR_REG_NUM * 3 * sizeof(int8_t));
428
0
  pred = loc + IR_REG_NUM;
429
0
  types = pred + IR_REG_NUM;
430
0
  todo = IR_REGSET_EMPTY;
431
0
  srcs = IR_REGSET_EMPTY;
432
433
0
  for (i = 0; i < count; i++) {
434
0
    from = copies[i].from;
435
0
    to = copies[i].to;
436
0
    IR_ASSERT(from != to);
437
0
    IR_REGSET_INCL(srcs, from);
438
0
    loc[from] = from;
439
0
    pred[to] = from;
440
0
    types[from] = copies[i].type;
441
0
    IR_ASSERT(!IR_REGSET_IN(todo, to));
442
0
    IR_REGSET_INCL(todo, to);
443
0
  }
444
445
0
  ready = IR_REGSET_DIFFERENCE(todo, srcs);
446
447
0
  if (ready == todo) {
448
0
    for (i = 0; i < count; i++) {
449
0
      from = copies[i].from;
450
0
      to = copies[i].to;
451
0
      IR_ASSERT(from != to);
452
0
      type = copies[i].type;
453
0
      if (IR_IS_TYPE_INT(type)) {
454
0
        ir_emit_mov(ctx, type, to, from);
455
0
      } else {
456
0
        ir_emit_fp_mov(ctx, type, to, from);
457
0
      }
458
0
    }
459
0
    return 1;
460
0
  }
461
462
  /* temporary registers can't be the same as some of the destinations */
463
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_reg));
464
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_fp_reg));
465
466
  /* first we resolve all "windmill blades" - trees (this doesn't requre temporary registers) */
467
0
  while (ready != IR_REGSET_EMPTY) {
468
0
    ir_reg r;
469
470
0
    to = ir_regset_pop_first(&ready);
471
0
    from = pred[to];
472
0
    r = loc[from];
473
0
    type = types[from];
474
0
    if (IR_IS_TYPE_INT(type)) {
475
0
      ir_emit_mov_ext(ctx, type, to, r);
476
0
    } else {
477
0
      ir_emit_fp_mov(ctx, type, to, r);
478
0
    }
479
0
    IR_REGSET_EXCL(todo, to);
480
0
    loc[from] = to;
481
0
    if (from == r && IR_REGSET_IN(todo, from)) {
482
0
      IR_REGSET_INCL(ready, from);
483
0
    }
484
0
  }
485
0
  if (todo == IR_REGSET_EMPTY) {
486
0
    return 1;
487
0
  }
488
489
  /* at this point the sources that are the same as temoraries are already moved */
490
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_reg) || pred[loc[tmp_reg]] == tmp_reg);
491
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_fp_reg) || pred[loc[tmp_fp_reg]] == tmp_fp_reg);
492
493
  /* now we resolve all "windmill axles" - cycles (this reuires temporary registers) */
494
0
  while (todo != IR_REGSET_EMPTY) {
495
0
    to = ir_regset_pop_first(&todo);
496
0
    from = pred[to];
497
0
    IR_ASSERT(to != loc[from]);
498
0
    type = types[from];
499
0
    if (IR_IS_TYPE_INT(type)) {
500
0
#ifdef IR_HAVE_SWAP_INT
501
0
      if (pred[from] == to) {
502
0
        if (ir_type_size[types[to]] > ir_type_size[type]) {
503
0
          type = types[to];
504
0
        }
505
0
        ir_emit_swap(ctx, type, to, from);
506
0
        IR_REGSET_EXCL(todo, from);
507
0
        loc[to] = from;
508
0
        loc[from] = to;
509
0
        continue;
510
0
      }
511
0
#endif
512
0
      IR_ASSERT(tmp_reg != IR_REG_NONE);
513
0
      IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
514
0
      ir_emit_mov(ctx, type, tmp_reg, to);
515
0
      loc[to] = tmp_reg;
516
0
    } else {
517
#ifdef IR_HAVE_SWAP_FP
518
      if (pred[from] == to && types[to] == type) {
519
        ir_emit_swap_fp(ctx, type, to, from);
520
        IR_REGSET_EXCL(todo, from);
521
        loc[to] = from;
522
        loc[from] = to;
523
        continue;
524
      }
525
#endif
526
0
      IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
527
0
      IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
528
0
      ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
529
0
      loc[to] = tmp_fp_reg;
530
0
    }
531
0
    while (1) {
532
0
      ir_reg r;
533
534
0
      from = pred[to];
535
0
      r = loc[from];
536
0
      type = types[from];
537
0
      if (IR_IS_TYPE_INT(type)) {
538
0
        ir_emit_mov_ext(ctx, type, to, r);
539
0
      } else {
540
0
        ir_emit_fp_mov(ctx, type, to, r);
541
0
      }
542
0
      IR_REGSET_EXCL(todo, to);
543
0
      loc[from] = to;
544
0
      if (from == r && IR_REGSET_IN(todo, from)) {
545
0
        to = from;
546
0
      } else {
547
0
        break;
548
0
      }
549
0
    }
550
0
  }
551
552
0
  return 1;
553
0
}
554
555
static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from, ir_reg tmp_reg, ir_reg tmp_fp_reg)
556
0
{
557
0
  ir_mem mem_from, mem_to;
558
559
0
  IR_ASSERT(from != to);
560
0
  if (to < IR_REG_NUM) {
561
0
    if (IR_IS_CONST_REF(from)) {
562
0
      if (-from < ctx->consts_count) {
563
        /* constant reference */
564
0
        ir_emit_load(ctx, type, to, from);
565
0
      } else {
566
        /* local variable address */
567
0
        ir_load_local_addr(ctx, to, -from - ctx->consts_count);
568
0
      }
569
0
    } else if (from < IR_REG_NUM) {
570
0
      if (IR_IS_TYPE_INT(type)) {
571
0
        ir_emit_mov(ctx, type, to, from);
572
0
      } else {
573
0
        ir_emit_fp_mov(ctx, type, to, from);
574
0
      }
575
0
    } else {
576
0
      mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
577
0
      ir_emit_load_mem(ctx, type, to, mem_from);
578
0
    }
579
0
  } else {
580
0
    mem_to = ir_vreg_spill_slot(ctx, to - IR_REG_NUM);
581
0
    if (IR_IS_CONST_REF(from)) {
582
0
      if (-from < ctx->consts_count) {
583
        /* constant reference */
584
0
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
585
0
        if (IR_IS_TYPE_INT(type)
586
0
         && !IR_IS_SYM_CONST(ctx->ir_base[from].op)
587
0
         && (ir_type_size[type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[from].val.i64))) {
588
0
          ir_emit_store_mem_imm(ctx, type, mem_to, ctx->ir_base[from].val.i32);
589
0
          return;
590
0
        }
591
0
#endif
592
0
        ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
593
0
        IR_ASSERT(tmp != IR_REG_NONE);
594
0
        ir_emit_load(ctx, type, tmp, from);
595
0
        ir_emit_store_mem(ctx, type, mem_to, tmp);
596
0
      } else {
597
        /* local variable address */
598
0
        IR_ASSERT(IR_IS_TYPE_INT(type));
599
0
        IR_ASSERT(tmp_reg != IR_REG_NONE);
600
0
        ir_load_local_addr(ctx, tmp_reg, -from - ctx->consts_count);
601
0
        ir_emit_store_mem(ctx, type, mem_to, tmp_reg);
602
0
      }
603
0
    } else if (from < IR_REG_NUM) {
604
0
      ir_emit_store_mem(ctx, type, mem_to, from);
605
0
    } else {
606
0
      mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
607
0
      IR_ASSERT(IR_MEM_VAL(mem_to) != IR_MEM_VAL(mem_from));
608
0
      ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
609
0
      IR_ASSERT(tmp != IR_REG_NONE);
610
0
      ir_emit_load_mem(ctx, type, tmp, mem_from);
611
0
      ir_emit_store_mem(ctx, type, mem_to, tmp);
612
0
    }
613
0
  }
614
0
}
615
616
IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, int8_t *types, ir_bitset todo, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg)
617
0
{
618
0
  ir_ref from;
619
0
  ir_mem tmp_spill_slot;
620
0
  ir_type type;
621
622
0
  IR_MEM_VAL(tmp_spill_slot) = 0;
623
0
  IR_ASSERT(!IR_IS_CONST_REF(to));
624
0
  from = pred[to];
625
0
  type = types[from];
626
0
  IR_ASSERT(!IR_IS_CONST_REF(from));
627
0
  IR_ASSERT(from != to);
628
0
  IR_ASSERT(loc[from] == from);
629
630
0
  if (IR_IS_TYPE_INT(type)) {
631
0
#ifdef IR_HAVE_SWAP_INT
632
0
    if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) {
633
      /* a simple cycle from 2 elements */
634
0
      if (ir_type_size[types[to]] > ir_type_size[type]) {
635
0
        type = types[to];
636
0
      }
637
0
      ir_emit_swap(ctx, type, to, from);
638
0
      ir_bitset_excl(todo, from);
639
0
      ir_bitset_excl(todo, to);
640
0
      loc[to] = from;
641
0
      loc[from] = to;
642
0
      return;
643
0
    }
644
0
#endif
645
0
    IR_ASSERT(tmp_reg != IR_REG_NONE);
646
0
    IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
647
0
    loc[to] = tmp_reg;
648
0
    if (to < IR_REG_NUM) {
649
0
      ir_emit_mov(ctx, type, tmp_reg, to);
650
0
    } else {
651
0
      ir_emit_load_mem_int(ctx, type, tmp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
652
0
    }
653
0
  } else {
654
#ifdef IR_HAVE_SWAP_FP
655
    if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM && types[to] == type) {
656
      /* a simple cycle from 2 elements */
657
      ir_emit_swap_fp(ctx, type, to, from);
658
      IR_REGSET_EXCL(todo, from);
659
      IR_REGSET_EXCL(todo, to);
660
      loc[to] = from;
661
      loc[from] = to;
662
      return;
663
    }
664
#endif
665
0
    IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
666
0
    IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
667
0
    loc[to] = tmp_fp_reg;
668
0
    if (to < IR_REG_NUM) {
669
0
      ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
670
0
    } else {
671
0
      ir_emit_load_mem_fp(ctx, type, tmp_fp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
672
0
    }
673
0
  }
674
675
0
  while (1) {
676
0
    int32_t r;
677
678
0
    from = pred[to];
679
0
    r = loc[from];
680
0
    type = types[to];
681
682
0
    if (from == r && ir_bitset_in(todo, from)) {
683
      /* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */
684
0
      if (to >= IR_REG_NUM && r >= IR_REG_NUM) {
685
0
        ir_reg tmp = IR_IS_TYPE_INT(type) ?  tmp_reg : tmp_fp_reg;
686
687
0
        if (!IR_MEM_VAL(tmp_spill_slot)) {
688
          /* Free a register, saving it in a temporary spill slot */
689
0
          tmp_spill_slot = IR_MEM_BO(IR_REG_STACK_POINTER, -16);
690
0
          ir_emit_store_mem(ctx, type, tmp_spill_slot, tmp);
691
0
        }
692
0
        ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
693
0
      } else {
694
0
        ir_emit_dessa_move(ctx, type, to, r, IR_REG_NONE, IR_REG_NONE);
695
0
      }
696
0
      ir_bitset_excl(todo, to);
697
0
      loc[from] = to;
698
0
      to = from;
699
0
    } else {
700
0
      break;
701
0
    }
702
0
  }
703
704
0
  type = types[to];
705
0
  if (IR_MEM_VAL(tmp_spill_slot)) {
706
0
    ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot);
707
0
  }
708
0
  ir_emit_dessa_move(ctx, type, to, loc[from], IR_REG_NONE, IR_REG_NONE);
709
0
  ir_bitset_excl(todo, to);
710
0
  loc[from] = to;
711
0
}
712
713
static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
714
0
{
715
0
  int i;
716
0
  int32_t *pred, *loc, to, from;
717
0
  int8_t *types;
718
0
  ir_type type;
719
0
  uint32_t len;
720
0
  ir_bitset todo, ready, srcs, visited;
721
722
0
  if (count == 1) {
723
0
    to = copies[0].to;
724
0
    from = copies[0].from;
725
0
    IR_ASSERT(from != to);
726
0
    type = copies[0].type;
727
0
    ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
728
0
    return 1;
729
0
  }
730
731
0
  len = IR_REG_NUM + ctx->vregs_count + 1;
732
0
  todo = ir_bitset_malloc(len);
733
0
  srcs = ir_bitset_malloc(len);
734
0
  loc = ir_mem_malloc(len * 2 * sizeof(int32_t) + len * sizeof(int8_t));
735
0
  pred = loc + len;
736
0
  types = (int8_t*)(pred + len);
737
738
0
  for (i = 0; i < count; i++) {
739
0
    from = copies[i].from;
740
0
    to = copies[i].to;
741
0
    IR_ASSERT(from != to);
742
0
    if (!IR_IS_CONST_REF(from)) {
743
0
      ir_bitset_incl(srcs, from);
744
0
      loc[from] = from;
745
0
    }
746
0
    pred[to] = from;
747
0
    types[to] = copies[i].type;
748
0
    IR_ASSERT(!ir_bitset_in(todo, to));
749
0
    ir_bitset_incl(todo, to);
750
0
  }
751
752
  /* temporary registers can't be the same as some of the sources */
753
0
  IR_ASSERT(tmp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_reg));
754
0
  IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_fp_reg));
755
756
  /* first we resolve all "windmill blades" - trees, that don't set temporary registers */
757
0
  ready = ir_bitset_malloc(len);
758
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
759
0
  ir_bitset_difference(ready, srcs, ir_bitset_len(len));
760
0
  if (tmp_reg != IR_REG_NONE) {
761
0
    ir_bitset_excl(ready, tmp_reg);
762
0
  }
763
0
  if (tmp_fp_reg != IR_REG_NONE) {
764
0
    ir_bitset_excl(ready, tmp_fp_reg);
765
0
  }
766
0
  while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
767
0
    ir_bitset_excl(todo, to);
768
0
    type = types[to];
769
0
    from = pred[to];
770
0
    if (IR_IS_CONST_REF(from)) {
771
0
      ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
772
0
    } else {
773
0
      int32_t r = loc[from];
774
0
      ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
775
0
      loc[from] = to;
776
0
      if (from == r && ir_bitset_in(todo, from) && from != tmp_reg && from != tmp_fp_reg) {
777
0
        ir_bitset_incl(ready, from);
778
0
      }
779
0
    }
780
0
  }
781
782
  /* then we resolve all "windmill axles" - cycles (this requres temporary registers) */
783
0
  visited = ir_bitset_malloc(len);
784
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
785
0
  ir_bitset_intersection(ready, srcs, ir_bitset_len(len));
786
0
  while ((to = ir_bitset_first(ready, ir_bitset_len(len))) >= 0) {
787
0
    ir_bitset_clear(visited, ir_bitset_len(len));
788
0
    ir_bitset_incl(visited, to);
789
0
    to = pred[to];
790
0
    while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) {
791
0
      to = pred[to];
792
0
      if (IR_IS_CONST_REF(to)) {
793
0
        break;
794
0
      } else if (ir_bitset_in(visited, to)) {
795
        /* We found a cycle. Resolve it. */
796
0
        ir_bitset_incl(visited, to);
797
0
        ir_dessa_resolve_cycle(ctx, pred, loc, types, todo, to, tmp_reg, tmp_fp_reg);
798
0
        break;
799
0
      }
800
0
      ir_bitset_incl(visited, to);
801
0
    }
802
0
    ir_bitset_difference(ready, visited, ir_bitset_len(len));
803
0
  }
804
805
  /* finally we resolve remaining "windmill blades" - trees that set temporary registers */
806
0
  ir_bitset_copy(ready, todo, ir_bitset_len(len));
807
0
  ir_bitset_difference(ready, srcs, ir_bitset_len(len));
808
0
  while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
809
0
    ir_bitset_excl(todo, to);
810
0
    type = types[to];
811
0
    from = pred[to];
812
0
    if (IR_IS_CONST_REF(from)) {
813
0
      ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
814
0
    } else {
815
0
      int32_t r = loc[from];
816
0
      ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
817
0
      loc[from] = to;
818
0
      if (from == r && ir_bitset_in(todo, from)) {
819
0
        ir_bitset_incl(ready, from);
820
0
      }
821
0
    }
822
0
  }
823
824
0
  IR_ASSERT(ir_bitset_empty(todo, ir_bitset_len(len)));
825
826
0
  ir_mem_free(visited);
827
0
  ir_mem_free(ready);
828
0
  ir_mem_free(loc);
829
0
  ir_mem_free(srcs);
830
0
  ir_mem_free(todo);
831
0
  return 1;
832
0
}
833
834
static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
835
0
{
836
0
  uint32_t succ, k, n = 0;
837
0
  ir_block *succ_bb;
838
0
  ir_use_list *use_list;
839
0
  ir_ref i, *p;
840
0
  ir_dessa_copy *copies;
841
0
  ir_reg tmp_reg = ctx->regs[bb->end][0];
842
0
  ir_reg tmp_fp_reg = ctx->regs[bb->end][1];
843
844
0
  IR_ASSERT(bb->successors_count == 1);
845
0
  succ = ctx->cfg_edges[bb->successors];
846
0
  succ_bb = &ctx->cfg_blocks[succ];
847
0
  IR_ASSERT(succ_bb->predecessors_count > 1);
848
0
  use_list = &ctx->use_lists[succ_bb->start];
849
0
  k = ir_phi_input_number(ctx, succ_bb, b);
850
851
0
  copies = alloca(use_list->count * sizeof(ir_dessa_copy));
852
853
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
854
0
    ir_ref ref = *p;
855
0
    ir_insn *insn = &ctx->ir_base[ref];
856
857
0
    if (insn->op == IR_PHI) {
858
0
      ir_ref input = ir_insn_op(insn, k);
859
0
      ir_reg src = ir_get_alocated_reg(ctx, ref, k);
860
0
      ir_reg dst = ctx->regs[ref][0];
861
0
      ir_ref from, to;
862
863
0
      IR_ASSERT(dst == IR_REG_NONE || !IR_REG_SPILLED(dst));
864
0
      if (IR_IS_CONST_REF(input)) {
865
0
        from = input;
866
0
      } else if (ir_rule(ctx, input) == IR_STATIC_ALLOCA) {
867
        /* encode local variable address */
868
0
        from = -(ctx->consts_count + input);
869
0
      } else {
870
0
        from = (src != IR_REG_NONE && !IR_REG_SPILLED(src)) ?
871
0
          (ir_ref)src : (ir_ref)(IR_REG_NUM + ctx->vregs[input]);
872
0
      }
873
0
      to = (dst != IR_REG_NONE) ?
874
0
        (ir_ref)dst : (ir_ref)(IR_REG_NUM + ctx->vregs[ref]);
875
0
      if (to != from) {
876
0
        if (to >= IR_REG_NUM
877
0
         && from >= IR_REG_NUM
878
0
         && IR_MEM_VAL(ir_vreg_spill_slot(ctx, from - IR_REG_NUM)) ==
879
0
            IR_MEM_VAL(ir_vreg_spill_slot(ctx, to - IR_REG_NUM))) {
880
          /* It's possible that different virtual registers share the same special spill slot */
881
          // TODO: See ext/opcache/tests/jit/gh11917.phpt failure on Linux 32-bit
882
0
          continue;
883
0
        }
884
0
        copies[n].type = insn->type;
885
0
        copies[n].from = from;
886
0
        copies[n].to = to;
887
0
        n++;
888
0
      }
889
0
    }
890
0
  }
891
892
0
  if (n > 0) {
893
0
    ir_dessa_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
894
0
  }
895
0
}
896
897
int ir_match(ir_ctx *ctx)
898
0
{
899
0
  uint32_t b;
900
0
  ir_ref start, ref, *prev_ref;
901
0
  ir_block *bb;
902
0
  ir_insn *insn;
903
0
  uint32_t entries_count = 0;
904
905
0
  ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
906
907
0
  prev_ref = ctx->prev_ref;
908
0
  if (!prev_ref) {
909
0
    ir_build_prev_refs(ctx);
910
0
    prev_ref = ctx->prev_ref;
911
0
  }
912
913
0
  if (ctx->entries_count) {
914
0
    ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
915
0
  }
916
917
0
  for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
918
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
919
0
    start = bb->start;
920
0
    if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) {
921
0
      IR_ASSERT(entries_count < ctx->entries_count);
922
0
      insn = &ctx->ir_base[start];
923
0
      IR_ASSERT(insn->op == IR_ENTRY);
924
0
      insn->op3 = entries_count;
925
0
      ctx->entries[entries_count] = b;
926
0
      entries_count++;
927
0
    }
928
0
    ctx->rules[start] = IR_SKIPPED | IR_NOP;
929
0
    if (ctx->ir_base[start].op == IR_BEGIN && ctx->ir_base[start].op2) {
930
0
      ctx->flags2 |= IR_HAS_BLOCK_ADDR;
931
0
    }
932
0
    ref = bb->end;
933
0
    if (bb->successors_count == 1) {
934
0
      insn = &ctx->ir_base[ref];
935
0
      if (insn->op == IR_END || insn->op == IR_LOOP_END) {
936
0
        if (!ctx->rules[ref]) {
937
0
          ctx->rules[ref] = insn->op;
938
0
        }
939
0
        ref = prev_ref[ref];
940
0
        if (ref == start && ctx->cfg_edges[bb->successors] != b) {
941
0
          if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) {
942
0
            bb->flags |= IR_BB_EMPTY;
943
0
          } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) {
944
0
            bb->flags |= IR_BB_EMPTY;
945
0
            if (ctx->cfg_edges[bb->successors] == b + 1) {
946
0
              (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
947
0
            }
948
0
          }
949
0
          continue;
950
0
        }
951
0
      }
952
0
    }
953
954
0
    ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */
955
956
0
    while (ref != start) {
957
0
      uint32_t rule = ctx->rules[ref];
958
959
0
      if (!rule) {
960
0
        ctx->rules[ref] = rule = ir_match_insn(ctx, ref);
961
0
      }
962
0
      ir_match_insn2(ctx, ref, rule);
963
0
      ref = prev_ref[ref];
964
0
    }
965
0
  }
966
967
0
  if (ctx->entries_count) {
968
0
    ctx->entries_count = entries_count;
969
0
    if (!entries_count) {
970
0
      ir_mem_free(ctx->entries);
971
0
      ctx->entries = NULL;
972
0
    }
973
0
  }
974
975
0
  return 1;
976
0
}
977
978
int32_t ir_get_spill_slot_offset(const ir_ctx *ctx, ir_ref ref)
979
0
{
980
0
  int32_t offset;
981
982
0
  IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
983
0
  offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
984
0
  IR_ASSERT(offset != -1);
985
0
  return IR_SPILL_POS_TO_OFFSET(offset);
986
0
}
987
988
const ir_call_conv_dsc *ir_get_call_conv_dsc(uint32_t flags)
989
0
{
990
#ifdef IR_TARGET_X86
991
  if ((flags & IR_CALL_CONV_MASK) == IR_CC_FASTCALL) {
992
    return &ir_call_conv_x86_fastcall;
993
  }
994
#elif defined(IR_TARGET_X64)
995
0
  switch (flags & IR_CALL_CONV_MASK) {
996
0
    case IR_CC_DEFAULT:              return &ir_call_conv_default;
997
0
    case IR_CC_FASTCALL:             return &ir_call_conv_default;
998
0
    case IR_CC_PRESERVE_NONE:        return &ir_call_conv_x86_64_preserve_none;
999
0
    case IR_CC_X86_64_SYSV:          return &ir_call_conv_x86_64_sysv;
1000
0
    case IR_CC_X86_64_MS:            return &ir_call_conv_x86_64_ms;
1001
0
    default: break;
1002
0
  }
1003
#elif defined(IR_TARGET_AARCH64)
1004
  switch (flags & IR_CALL_CONV_MASK) {
1005
    case IR_CC_DEFAULT:              return &ir_call_conv_default;
1006
    case IR_CC_FASTCALL:             return &ir_call_conv_default;
1007
    case IR_CC_PRESERVE_NONE:        return &ir_call_conv_aarch64_preserve_none;
1008
    case IR_CC_AARCH64_SYSV:         return &ir_call_conv_aarch64_sysv;
1009
    case IR_CC_AARCH64_DARWIN:       return &ir_call_conv_aarch64_darwin;
1010
    default: break;
1011
  }
1012
#endif
1013
0
  IR_ASSERT((flags & IR_CALL_CONV_MASK) == IR_CC_DEFAULT || (flags & IR_CALL_CONV_MASK) == IR_CC_BUILTIN);
1014
0
  return &ir_call_conv_default;
1015
0
}