Coverage Report

Created: 2025-12-31 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_ra.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (RA - Register Allocation, Liveness, Coalescing, SSA Deconstruction)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 *
7
 * See: "Linear Scan Register Allocation on SSA Form", Christian Wimmer and
8
 * Michael Franz, CGO'10 (2010)
9
 * See: "Optimized Interval Splitting in a Linear Scan Register Allocator",
10
 * Christian Wimmer VEE'10 (2005)
11
 */
12
13
#ifndef _GNU_SOURCE
14
# define _GNU_SOURCE
15
#endif
16
17
#include <stdlib.h>
18
#include "ir.h"
19
20
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
21
# include "ir_x86.h"
22
#elif defined(IR_TARGET_AARCH64)
23
# include "ir_aarch64.h"
24
#else
25
# error "Unknown IR target"
26
#endif
27
28
#include "ir_private.h"
29
30
int ir_regs_number(void)
31
0
{
32
0
  return IR_REG_NUM;
33
0
}
34
35
bool ir_reg_is_int(int32_t reg)
36
0
{
37
0
  IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
38
0
  return reg >= IR_REG_GP_FIRST && reg <= IR_REG_GP_LAST;
39
0
}
40
41
static int ir_assign_virtual_registers_slow(ir_ctx *ctx)
42
0
{
43
0
  uint32_t *vregs;
44
0
  uint32_t vregs_count = 0;
45
0
  uint32_t b;
46
0
  ir_ref i, n;
47
0
  ir_block *bb;
48
0
  ir_insn *insn;
49
0
  uint32_t flags;
50
51
  /* Assign unique virtual register to each data node */
52
0
  vregs = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref));
53
0
  n = 1;
54
0
  for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
55
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
56
0
    i = bb->start;
57
58
    /* skip first instruction */
59
0
    insn = ctx->ir_base + i;
60
0
    n = ir_insn_len(insn);
61
0
    i += n;
62
0
    insn += n;
63
0
    while (i < bb->end) {
64
0
      flags = ir_op_flags[insn->op];
65
0
      if (((flags & IR_OP_FLAG_DATA) && insn->op != IR_VAR && (insn->op != IR_PARAM || ctx->use_lists[i].count > 0))
66
0
       || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
67
0
        if (!ctx->rules || !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
68
0
          vregs[i] = ++vregs_count;
69
0
        }
70
0
      }
71
0
      n = ir_insn_len(insn);
72
0
      i += n;
73
0
      insn += n;
74
0
    }
75
0
  }
76
0
  ctx->vregs_count = vregs_count;
77
0
  ctx->vregs = vregs;
78
79
0
  return 1;
80
0
}
81
82
int ir_assign_virtual_registers(ir_ctx *ctx)
83
0
{
84
0
  uint32_t *vregs;
85
0
  uint32_t vregs_count = 0;
86
0
  ir_ref i;
87
0
  ir_insn *insn;
88
89
0
  if (!ctx->rules) {
90
0
    return ir_assign_virtual_registers_slow(ctx);
91
0
  }
92
93
  /* Assign unique virtual register to each rule that needs it */
94
0
  vregs = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
95
96
0
  for (i = 1, insn = &ctx->ir_base[1]; i < ctx->insns_count; i++, insn++) {
97
0
    uint32_t v = 0;
98
99
0
    if (ctx->rules[i] && !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
100
0
      uint32_t flags = ir_op_flags[insn->op];
101
102
0
      if ((flags & IR_OP_FLAG_DATA)
103
0
       || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
104
0
        v = ++vregs_count;
105
0
      }
106
0
    }
107
0
    vregs[i] = v;
108
0
  }
109
110
0
  ctx->vregs_count = vregs_count;
111
0
  ctx->vregs = vregs;
112
113
0
  return 1;
114
0
}
115
116
/* Lifetime intervals construction */
117
118
static ir_live_interval *ir_new_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
119
0
{
120
0
  ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
121
122
0
  ival->type = IR_VOID;
123
0
  ival->reg = IR_REG_NONE;
124
0
  ival->flags = 0;
125
0
  ival->vreg = v;
126
0
  ival->stack_spill_pos = -1; // not allocated
127
0
  ival->range.start = start;
128
0
  ival->range.end = ival->end = end;
129
0
  ival->range.next = NULL;
130
0
  ival->use_pos = NULL;
131
0
  ival->next = NULL;
132
133
0
  ctx->live_intervals[v] = ival;
134
0
  return ival;
135
0
}
136
137
static ir_live_interval *ir_add_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
138
0
{
139
0
  ir_live_interval *ival = ctx->live_intervals[v];
140
0
  ir_live_range *p, *q;
141
142
0
  if (!ival) {
143
0
    return ir_new_live_range(ctx, v, start, end);
144
0
  }
145
146
0
  p = &ival->range;
147
0
  if (end >= p->start) {
148
0
    ir_live_range *prev = NULL;
149
150
0
    do {
151
0
      if (p->end >= start) {
152
0
        if (start < p->start) {
153
0
          p->start = start;
154
0
        }
155
0
        if (end > p->end) {
156
          /* merge with next */
157
0
          ir_live_range *next = p->next;
158
159
0
          p->end = end;
160
0
          while (next && p->end >= next->start) {
161
0
            if (next->end > p->end) {
162
0
              p->end = next->end;
163
0
            }
164
0
            p->next = next->next;
165
            /* remember in the "unused_ranges" list */
166
0
            next->next = ctx->unused_ranges;
167
0
            ctx->unused_ranges = next;
168
0
            next = p->next;
169
0
          }
170
0
          if (!p->next) {
171
0
            ival->end = p->end;
172
0
          }
173
0
        }
174
0
        return ival;
175
0
      }
176
0
      prev = p;
177
0
      p = prev->next;
178
0
    } while (p && end >= p->start);
179
0
    if (!p) {
180
0
      ival->end = end;
181
0
    }
182
0
    if (prev) {
183
0
      if (ctx->unused_ranges) {
184
        /* reuse */
185
0
        q = ctx->unused_ranges;
186
0
        ctx->unused_ranges = q->next;
187
0
      } else {
188
0
        q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
189
0
      }
190
0
      prev->next = q;
191
0
      q->start = start;
192
0
      q->end = end;
193
0
      q->next = p;
194
0
      return ival;
195
0
    }
196
0
  }
197
198
0
  if (ctx->unused_ranges) {
199
    /* reuse */
200
0
    q = ctx->unused_ranges;
201
0
    ctx->unused_ranges = q->next;
202
0
  } else {
203
0
    q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
204
0
  }
205
0
  q->start = p->start;
206
0
  q->end = p->end;
207
0
  q->next = p->next;
208
0
  p->start = start;
209
0
  p->end = end;
210
0
  p->next = q;
211
0
  return ival;
212
0
}
213
214
IR_ALWAYS_INLINE ir_live_interval *ir_add_prev_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
215
0
{
216
0
  ir_live_interval *ival = ctx->live_intervals[v];
217
218
0
  if (ival && ival->range.start == end) {
219
0
    ival->range.start = start;
220
0
    return ival;
221
0
  }
222
0
  return ir_add_live_range(ctx, v, start, end);
223
0
}
224
225
static void ir_add_fixed_live_range(ir_ctx *ctx, ir_reg reg, ir_live_pos start, ir_live_pos end)
226
0
{
227
0
  int v = ctx->vregs_count + 1 + reg;
228
0
  ir_live_interval *ival = ctx->live_intervals[v];
229
0
  ir_live_range *q;
230
231
0
  if (!ival) {
232
0
    ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
233
0
    ival->type = IR_VOID;
234
0
    ival->reg = reg;
235
0
    ival->flags = IR_LIVE_INTERVAL_FIXED;
236
0
    ival->vreg = v;
237
0
    ival->stack_spill_pos = -1; // not allocated
238
0
    ival->range.start = start;
239
0
    ival->range.end = ival->end = end;
240
0
    ival->range.next = NULL;
241
0
    ival->use_pos = NULL;
242
0
    ival->next = NULL;
243
244
0
    ctx->live_intervals[v] = ival;
245
0
  } else if (EXPECTED(end < ival->range.start)) {
246
0
    if (ctx->unused_ranges) {
247
      /* reuse */
248
0
      q = ctx->unused_ranges;
249
0
      ctx->unused_ranges = q->next;
250
0
    } else {
251
0
      q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
252
0
    }
253
254
0
    q->start = ival->range.start;
255
0
    q->end = ival->range.end;
256
0
    q->next = ival->range.next;
257
0
    ival->range.start = start;
258
0
    ival->range.end = end;
259
0
    ival->range.next = q;
260
0
  } else if (end == ival->range.start) {
261
0
    ival->range.start = start;
262
0
  } else {
263
0
    ir_add_live_range(ctx, v, start, end);
264
0
  }
265
0
}
266
267
static void ir_add_tmp(ir_ctx *ctx, ir_ref ref, ir_ref tmp_ref, int32_t tmp_op_num, ir_tmp_reg tmp_reg)
268
0
{
269
0
  ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
270
271
0
  ival->type = tmp_reg.type;
272
0
  ival->reg = IR_REG_NONE;
273
0
  ival->flags = IR_LIVE_INTERVAL_TEMP;
274
0
  ival->tmp_ref = tmp_ref;
275
0
  ival->tmp_op_num = tmp_op_num;
276
0
  ival->range.start = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.start;
277
0
  ival->range.end = ival->end = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.end;
278
0
  ival->range.next = NULL;
279
0
  ival->use_pos = NULL;
280
281
0
  if (!ctx->live_intervals[0]) {
282
0
    ival->next = NULL;
283
0
    ctx->live_intervals[0] = ival;
284
0
  } else if (ival->range.start >= ctx->live_intervals[0]->range.start) {
285
0
    ir_live_interval *prev = ctx->live_intervals[0];
286
287
0
    while (prev->next && ival->range.start >= prev->next->range.start) {
288
0
      prev = prev->next;
289
0
    }
290
0
    ival->next = prev->next;
291
0
    prev->next = ival;
292
0
  } else {
293
0
    ir_live_interval *next = ctx->live_intervals[0];
294
295
0
    ival->next = next;
296
0
    ctx->live_intervals[0] = ival;
297
0
  }
298
0
  return;
299
0
}
300
301
static bool ir_has_tmp(ir_ctx *ctx, ir_ref ref, int32_t op_num)
302
0
{
303
0
  ir_live_interval *ival = ctx->live_intervals[0];
304
305
0
  if (ival) {
306
0
    while (ival && IR_LIVE_POS_TO_REF(ival->range.start) <= ref) {
307
0
      if (ival->tmp_ref == ref && ival->tmp_op_num == op_num) {
308
0
        return 1;
309
0
      }
310
0
      ival = ival->next;
311
0
    }
312
0
  }
313
0
  return 0;
314
0
}
315
316
static ir_live_interval *ir_fix_live_range(ir_ctx *ctx, int v, ir_live_pos old_start, ir_live_pos new_start)
317
0
{
318
0
  ir_live_interval *ival = ctx->live_intervals[v];
319
0
  ir_live_range *p = &ival->range;
320
321
#if 0
322
  while (p && p->start < old_start) {
323
    p = p->next;
324
  }
325
#endif
326
0
  IR_ASSERT(ival && p->start == old_start);
327
0
  p->start = new_start;
328
0
  return ival;
329
0
}
330
331
static void ir_add_use_pos(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
332
0
{
333
0
  ir_use_pos *p = ival->use_pos;
334
335
0
  if (EXPECTED(!p || p->pos > use_pos->pos)) {
336
0
    use_pos->next = p;
337
0
    ival->use_pos = use_pos;
338
0
  } else {
339
0
    ir_use_pos *prev;
340
341
0
    do {
342
0
      prev = p;
343
0
      p = p->next;
344
0
    } while (p && p->pos < use_pos->pos);
345
346
0
    use_pos->next = prev->next;
347
0
    prev->next = use_pos;
348
0
  }
349
0
}
350
351
352
IR_ALWAYS_INLINE void ir_add_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_reg hint, uint8_t use_flags, ir_ref hint_ref)
353
0
{
354
0
  ir_use_pos *use_pos;
355
356
0
  use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
357
0
  use_pos->op_num = op_num;
358
0
  use_pos->hint = hint;
359
0
  use_pos->flags = use_flags;
360
0
  use_pos->hint_ref = hint_ref;
361
0
  use_pos->pos = pos;
362
363
0
  if (hint != IR_REG_NONE) {
364
0
    ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
365
0
  }
366
0
  if (hint_ref > 0) {
367
0
    ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
368
0
  }
369
370
0
  ir_add_use_pos(ctx, ival, use_pos);
371
0
}
372
373
static void ir_add_phi_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_ref phi_ref)
374
0
{
375
0
  ir_use_pos *use_pos;
376
377
0
  IR_ASSERT(phi_ref > 0);
378
0
  use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
379
0
  use_pos->op_num = op_num;
380
0
  use_pos->hint = IR_REG_NONE;
381
0
  use_pos->flags = IR_PHI_USE | IR_USE_SHOULD_BE_IN_REG; // TODO: ???
382
0
  use_pos->hint_ref = -phi_ref;
383
0
  use_pos->pos = pos;
384
385
0
  ir_add_use_pos(ctx, ival, use_pos);
386
0
}
387
388
static void ir_add_hint(ir_ctx *ctx, ir_ref ref, ir_live_pos pos, ir_reg hint)
389
0
{
390
0
  ir_live_interval *ival = ctx->live_intervals[ctx->vregs[ref]];
391
392
0
  if (!(ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS)) {
393
0
    ir_use_pos *use_pos = ival->use_pos;
394
395
0
    while (use_pos) {
396
0
      if (use_pos->pos == pos) {
397
0
        if (use_pos->hint == IR_REG_NONE) {
398
0
          use_pos->hint = hint;
399
0
          ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
400
0
        }
401
0
      }
402
0
      use_pos = use_pos->next;
403
0
    }
404
0
  }
405
0
}
406
407
static void ir_hint_propagation(ir_ctx *ctx)
408
0
{
409
0
  int i;
410
0
  ir_live_interval *ival;
411
0
  ir_use_pos *use_pos;
412
0
  ir_use_pos *hint_use_pos;
413
414
0
  for (i = ctx->vregs_count; i > 0; i--) {
415
0
    ival = ctx->live_intervals[i];
416
0
    if (ival
417
0
     && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) == (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
418
0
      use_pos = ival->use_pos;
419
0
      hint_use_pos = NULL;
420
0
      while (use_pos) {
421
0
        if (use_pos->op_num == 0) {
422
0
          if (use_pos->hint_ref > 0) {
423
0
            hint_use_pos = use_pos;
424
0
          }
425
0
        } else if (use_pos->hint != IR_REG_NONE) {
426
0
          if (hint_use_pos) {
427
0
            ir_add_hint(ctx, hint_use_pos->hint_ref, hint_use_pos->pos, use_pos->hint);
428
0
            hint_use_pos = NULL;
429
0
          }
430
0
        }
431
0
        use_pos = use_pos->next;
432
0
      }
433
0
    }
434
0
  }
435
0
}
436
437
#ifdef IR_BITSET_LIVENESS
438
/* DFS + Loop-Forest livness for SSA using bitset(s) */
439
static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b)
440
{
441
  bool ok = 1;
442
  int count = 0;
443
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
444
  ir_ref i;
445
446
  IR_BITSET_FOREACH(live, len, i) {
447
    /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
448
    ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
449
    ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
450
451
    if (use_pos->op_num) {
452
      ir_ref *ops = ctx->ir_base[ref].ops;
453
      ref = ops[use_pos->op_num];
454
    }
455
456
    if (ctx->ir_base[ref].op == IR_PARAM) {
457
      continue;
458
    }
459
    if (ctx->binding) {
460
      ir_ref var = ir_binding_find(ctx, ref);
461
      if (var < 0) {
462
        /* We may load the value at OSR entry-point */
463
        if (!count) {
464
          bb->flags &= ~IR_BB_EMPTY;
465
          bb->flags |= IR_BB_OSR_ENTRY_LOADS;
466
          if (!ctx->osr_entry_loads) {
467
            list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
468
            ir_list_init(list, 16);
469
          }
470
          ir_list_push(list, b);
471
          ir_list_push(list, 0);
472
        }
473
        ir_list_push(list, ref);
474
        count++;
475
        continue;
476
      }
477
    }
478
    fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
479
    ok = 0;
480
  } IR_BITSET_FOREACH_END();
481
482
  if (!ok) {
483
    IR_ASSERT(0);
484
  }
485
  if (count) {
486
    ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
487
488
#if 0
489
    /* ENTRY "clobbers" all registers */
490
    ir_ref ref = ctx->ir_base[bb->start].op1;
491
    ir_add_fixed_live_range(ctx, IR_REG_ALL,
492
      IR_DEF_LIVE_POS_FROM_REF(ref),
493
      IR_SAVE_LIVE_POS_FROM_REF(ref));
494
#endif
495
  }
496
}
497
498
static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, ir_bitset live)
499
{
500
  ir_ref stack[4];
501
  int stack_pos = 0;
502
  ir_target_constraints constraints;
503
  ir_insn *insn;
504
  uint32_t j, n, flags, def_flags;
505
  ir_ref *p, child;
506
  uint8_t use_flags;
507
  ir_reg reg;
508
  ir_live_pos use_pos;
509
  ir_live_interval *ival;
510
511
  while (1) {
512
    IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
513
514
    if (!(ctx->rules[input] & IR_SIMPLE)) {
515
      def_flags = ir_get_target_constraints(ctx, input, &constraints);
516
      n = constraints.tmps_count;
517
      while (n > 0) {
518
        n--;
519
        if (constraints.tmp_regs[n].type) {
520
          ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
521
        } else {
522
          /* CPU specific constraints */
523
          ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
524
            IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
525
            IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
526
        }
527
      }
528
    } else {
529
      def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
530
      constraints.hints_count = 0;
531
    }
532
533
    insn = &ctx->ir_base[input];
534
    flags = ir_op_flags[insn->op];
535
    n = IR_INPUT_EDGES_COUNT(flags);
536
    j = 1;
537
    p = insn->ops + j;
538
    if (flags & IR_OP_FLAG_CONTROL) {
539
      j++;
540
      p++;
541
    }
542
    for (; j <= n; j++, p++) {
543
      IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
544
      child = *p;
545
      if (child > 0) {
546
        uint32_t v = ctx->vregs[child];
547
548
        if (v) {
549
          use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
550
          reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
551
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
552
          if (EXPECTED(reg == IR_REG_NONE)) {
553
            use_pos += IR_USE_SUB_REF;
554
          }
555
556
          if (!ir_bitset_in(live, v)) {
557
            /* live.add(opd) */
558
            ir_bitset_incl(live, v);
559
            /* intervals[opd].addRange(b.from, op.id) */
560
            ival = ir_add_live_range(ctx, v,
561
              IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
562
          } else {
563
            ival = ctx->live_intervals[v];
564
          }
565
          ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
566
        } else if (ctx->rules[child] & IR_FUSED) {
567
          IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
568
          stack[stack_pos++] = child;
569
        } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
570
          ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
571
        }
572
      }
573
    }
574
    if (!stack_pos) {
575
      break;
576
    }
577
    input = stack[--stack_pos];
578
  }
579
}
580
581
int ir_compute_live_ranges(ir_ctx *ctx)
582
{
583
  uint32_t b, i, j, k, n, succ, *p;
584
  ir_ref ref;
585
  uint32_t len;
586
  ir_insn *insn;
587
  ir_block *bb, *succ_bb;
588
#ifdef IR_DEBUG
589
  ir_bitset visited;
590
#endif
591
  ir_bitset live, bb_live;
592
  ir_bitset loops = NULL;
593
  ir_bitqueue queue;
594
  ir_live_interval *ival;
595
596
  if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
597
    return 0;
598
  }
599
600
  if (ctx->rules) {
601
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
602
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
603
  }
604
605
  /* Root of the list of IR_VARs */
606
  ctx->vars = IR_UNUSED;
607
608
  /* Compute Live Ranges */
609
  ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
610
  len = ir_bitset_len(ctx->vregs_count + 1);
611
  bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t));
612
613
  /* vregs + tmp + fixed + SRATCH + ALL */
614
  ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
615
616
#ifdef IR_DEBUG
617
  visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
618
#endif
619
620
    if (!ctx->arena) {
621
    ctx->arena = ir_arena_create(16 * 1024);
622
  }
623
624
  /* for each basic block in reverse order */
625
  for (b = ctx->cfg_blocks_count; b > 0; b--) {
626
    bb = &ctx->cfg_blocks[b];
627
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
628
    /* for each successor of b */
629
630
#ifdef IR_DEBUG
631
    ir_bitset_incl(visited, b);
632
#endif
633
    live = bb_live + (len * b);
634
    n = bb->successors_count;
635
    if (n == 0) {
636
      ir_bitset_clear(live, len);
637
    } else {
638
      p = &ctx->cfg_edges[bb->successors];
639
      succ = *p;
640
641
#ifdef IR_DEBUG
642
      /* blocks must be ordered where all dominators of a block are before this block */
643
      IR_ASSERT(ir_bitset_in(visited, succ) || bb->loop_header == succ);
644
#endif
645
646
      /* live = union of successors.liveIn */
647
      if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
648
        ir_bitset_copy(live, bb_live + (len * succ), len);
649
      } else {
650
        IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
651
        ir_bitset_clear(live, len);
652
      }
653
      if (n > 1) {
654
        for (p++, n--; n > 0; p++, n--) {
655
          succ = *p;
656
          if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
657
            ir_bitset_union(live, bb_live + (len * succ), len);
658
          } else {
659
            IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
660
          }
661
        }
662
      }
663
664
      /* for each opd in live */
665
      IR_BITSET_FOREACH(live, len, i) {
666
        /* intervals[opd].addRange(b.from, b.to) */
667
        ir_add_prev_live_range(ctx, i,
668
          IR_START_LIVE_POS_FROM_REF(bb->start),
669
          IR_END_LIVE_POS_FROM_REF(bb->end));
670
      } IR_BITSET_FOREACH_END();
671
    }
672
673
    if (bb->successors_count == 1) {
674
      /* for each phi function phi of successor */
675
      succ = ctx->cfg_edges[bb->successors];
676
      succ_bb = &ctx->cfg_blocks[succ];
677
      if (succ_bb->flags & IR_BB_HAS_PHI) {
678
        ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
679
680
        k = ir_phi_input_number(ctx, succ_bb, b);
681
        IR_ASSERT(k != 0);
682
        for (ref = 0; ref < use_list->count; ref++) {
683
          ir_ref use = ctx->use_edges[use_list->refs + ref];
684
          insn = &ctx->ir_base[use];
685
          if (insn->op == IR_PHI) {
686
            ir_ref input = ir_insn_op(insn, k);
687
            if (input > 0) {
688
              uint32_t v = ctx->vregs[input];
689
690
              /* live.add(phi.inputOf(b)) */
691
              IR_ASSERT(v);
692
              ir_bitset_incl(live, v);
693
              /* intervals[phi.inputOf(b)].addRange(b.from, b.to) */
694
              ival = ir_add_prev_live_range(ctx, v,
695
                IR_START_LIVE_POS_FROM_REF(bb->start),
696
                IR_END_LIVE_POS_FROM_REF(bb->end));
697
              ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
698
            }
699
          }
700
        }
701
      }
702
    }
703
704
    /* for each operation op of b in reverse order */
705
    ref = bb->end;
706
    insn = &ctx->ir_base[ref];
707
    if (insn->op == IR_END || insn->op == IR_LOOP_END) {
708
      ref = ctx->prev_ref[ref];
709
    }
710
    for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
711
      uint32_t def_flags;
712
      uint32_t flags;
713
      ir_ref *p;
714
      ir_target_constraints constraints;
715
      uint32_t v;
716
717
      if (ctx->rules) {
718
        int n;
719
720
        if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
721
          if (((ctx->rules[ref] & IR_RULE_MASK) == IR_VAR
722
            || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
723
           && ctx->use_lists[ref].count > 0) {
724
            insn = &ctx->ir_base[ref];
725
            if (insn->op != IR_VADDR) {
726
              insn->op3 = ctx->vars;
727
              ctx->vars = ref;
728
            }
729
          }
730
          continue;
731
        }
732
733
        def_flags = ir_get_target_constraints(ctx, ref, &constraints);
734
        n = constraints.tmps_count;
735
        while (n > 0) {
736
          n--;
737
          if (constraints.tmp_regs[n].type) {
738
            ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
739
          } else {
740
            /* CPU specific constraints */
741
            ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
742
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
743
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
744
          }
745
        }
746
      } else {
747
        def_flags = 0;
748
        constraints.def_reg = IR_REG_NONE;
749
        constraints.hints_count = 0;
750
      }
751
752
      insn = &ctx->ir_base[ref];
753
      v = ctx->vregs[ref];
754
      if (v) {
755
        IR_ASSERT(ir_bitset_in(live, v));
756
757
        if (insn->op != IR_PHI) {
758
          ir_live_pos def_pos;
759
          ir_ref hint_ref = 0;
760
          ir_reg reg = constraints.def_reg;
761
762
          if (reg != IR_REG_NONE) {
763
            def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
764
            if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
765
              /* parameter register must be kept before it's copied */
766
              ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
767
            }
768
          } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
769
            if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
770
              hint_ref = insn->op1;
771
            }
772
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
773
          } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
774
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
775
          } else {
776
            if (insn->op == IR_PARAM) {
777
              /* We may reuse parameter stack slot for spilling */
778
              ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
779
            }
780
            def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
781
          }
782
          /* live.remove(opd) */
783
          ir_bitset_excl(live, v);
784
          /* intervals[opd].setFrom(op.id) */
785
          ival = ir_fix_live_range(ctx, v,
786
            IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
787
          ival->type = insn->type;
788
          ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
789
        } else {
790
          /* live.remove(opd) */
791
          ir_bitset_excl(live, v);
792
          /* PHIs inputs must not be processed */
793
          ival = ctx->live_intervals[v];
794
          if (UNEXPECTED(!ival)) {
795
            /* Dead PHI */
796
            ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
797
          }
798
          ival->type = insn->type;
799
          ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
800
          continue;
801
        }
802
      }
803
804
      IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
805
      flags = ir_op_flags[insn->op];
806
      j = 1;
807
      p = insn->ops + 1;
808
      if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
809
        j++;
810
        p++;
811
      }
812
      for (; j <= insn->inputs_count; j++, p++) {
813
        ir_ref input = *p;
814
        ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
815
        ir_live_pos use_pos;
816
        ir_ref hint_ref = 0;
817
        uint32_t v;
818
819
        if (input > 0) {
820
          v = ctx->vregs[input];
821
          if (v) {
822
            use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
823
            if (reg != IR_REG_NONE) {
824
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
825
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
826
            } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
827
              if (j == 1) {
828
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
829
                IR_ASSERT(ctx->vregs[ref]);
830
                hint_ref = ref;
831
              } else if (input == insn->op1) {
832
                /* Input is the same as "op1" */
833
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
834
              }
835
            }
836
            if (!ir_bitset_in(live, v)) {
837
              /* live.add(opd) */
838
              ir_bitset_incl(live, v);
839
              /* intervals[opd].addRange(b.from, op.id) */
840
              ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
841
            } else {
842
              ival = ctx->live_intervals[v];
843
            }
844
            ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
845
          } else {
846
            if (ctx->rules) {
847
              if ((ctx->rules[input] & (IR_FUSED|IR_SKIPPED)) == IR_FUSED) {
848
                ir_add_fusion_ranges(ctx, ref, input, bb, live);
849
              } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
850
                ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
851
              }
852
            }
853
            if (reg != IR_REG_NONE) {
854
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
855
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
856
            }
857
          }
858
        } else if (reg != IR_REG_NONE) {
859
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
860
          ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
861
        }
862
      }
863
    }
864
865
    /* if b is loop header */
866
    if ((bb->flags & IR_BB_LOOP_HEADER)
867
     && !ir_bitset_empty(live, len)) {
868
      /* variables live at loop header are alive at the whole loop body */
869
      uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1);
870
      uint32_t child;
871
      ir_block *child_bb;
872
      ir_bitset child_live_in;
873
874
      if (!loops) {
875
        loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
876
        ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1);
877
      } else {
878
        ir_bitset_clear(loops, bb_set_len);
879
        ir_bitqueue_clear(&queue);
880
      }
881
      ir_bitset_incl(loops, b);
882
      child = b;
883
      do {
884
        child_bb = &ctx->cfg_blocks[child];
885
        child_live_in = bb_live + (len * child);
886
887
        IR_BITSET_FOREACH(live, len, i) {
888
          ir_bitset_incl(child_live_in, i);
889
          ir_add_live_range(ctx, i,
890
            IR_START_LIVE_POS_FROM_REF(child_bb->start),
891
            IR_END_LIVE_POS_FROM_REF(child_bb->end));
892
        } IR_BITSET_FOREACH_END();
893
894
        child = child_bb->dom_child;
895
        while (child) {
896
          child_bb = &ctx->cfg_blocks[child];
897
          if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) {
898
            ir_bitqueue_add(&queue, child);
899
            if (child_bb->flags & IR_BB_LOOP_HEADER) {
900
              ir_bitset_incl(loops, child);
901
            }
902
          }
903
          child = child_bb->dom_next_child;
904
        }
905
      } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1);
906
    }
907
  }
908
909
  if (ctx->entries) {
910
    for (i = 0; i < ctx->entries_count; i++) {
911
      b = ctx->entries[i];
912
      bb = &ctx->cfg_blocks[b];
913
      live = bb_live + (len * b);
914
      ir_add_osr_entry_loads(ctx, bb, live, len, b);
915
    }
916
    if (ctx->osr_entry_loads) {
917
      ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
918
    }
919
  }
920
921
  if (loops) {
922
    ir_mem_free(loops);
923
    ir_bitqueue_free(&queue);
924
  }
925
926
  ir_mem_free(bb_live);
927
#ifdef IR_DEBUG
928
  ir_mem_free(visited);
929
#endif
930
931
  return 1;
932
}
933
934
#else
935
/* Path exploration by definition liveness for SSA using sets represented by linked lists */
936
937
#define IS_LIVE_IN_BLOCK(v, b) \
938
0
  (live_in_block[v] == b)
939
0
#define SET_LIVE_IN_BLOCK(v, b) do { \
940
0
    live_in_block[v] = b; \
941
0
  } while (0)
942
943
/* Returns the last virtual register alive at the end of the block (it is used as an already-visited marker) */
944
IR_ALWAYS_INLINE uint32_t ir_live_out_top(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b)
945
0
{
946
#if 0
947
  return live_outs[b];
948
#else
949
0
  if (!live_outs[b]) {
950
0
    return -1;
951
0
  }
952
0
  return ir_list_at(live_lists, live_outs[b]);
953
0
#endif
954
0
}
955
956
/* Remember a virtual register alive at the end of the block */
957
IR_ALWAYS_INLINE void ir_live_out_push(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b, uint32_t v)
958
0
{
959
#if 0
960
  ir_block *bb = &ctx->cfg_blocks[b];
961
  live_outs[b] = v;
962
  ir_add_prev_live_range(ctx, v,
963
    IR_START_LIVE_POS_FROM_REF(bb->start),
964
    IR_END_LIVE_POS_FROM_REF(bb->end));
965
#else
966
0
  if (live_lists->len >= live_lists->a.size) {
967
0
    ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
968
0
  }
969
  /* Form a linked list of virtual register live at the end of the block */
970
0
  ir_list_push_unchecked(live_lists, live_outs[b]); /* push old root of the list (previous element of the list) */
971
0
  live_outs[b] = ir_list_len(live_lists);           /* remember the new root */
972
0
  ir_list_push_unchecked(live_lists, v);            /* push a virtual register */
973
0
#endif
974
0
}
975
976
/*
977
 * Computes live-out sets for each basic-block per variable using def-use chains.
978
 *
979
 * The implementation is based on algorithms 6 and 7 desriebed in
980
 * "Computing Liveness Sets for SSA-Form Programs", Florian Brandner, Benoit Boissinot.
981
 * Alain Darte, Benoit Dupont de Dinechin, Fabrice Rastello. TR Inria RR-7503, 2011
982
 */
983
static void ir_compute_live_sets(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists)
984
0
{
985
0
  ir_list block_queue, fuse_queue;
986
0
  ir_ref i;
987
988
0
  ir_list_init(&fuse_queue, 16);
989
0
  ir_list_init(&block_queue, 256);
990
991
  /* For each virtual register explore paths from all uses to definition */
992
0
  for (i = ctx->insns_count - 1; i > 0; i--) {
993
0
    uint32_t v = ctx->vregs[i];
994
995
0
    if (v) {
996
0
      uint32_t def_block = ctx->cfg_map[i];
997
0
      ir_use_list *use_list = &ctx->use_lists[i];
998
0
      ir_ref *p, n = use_list->count;
999
1000
      /* Collect all blocks where 'v' is used into a 'block_queue' */
1001
0
      for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1002
0
        ir_ref use = *p;
1003
0
        ir_insn *insn = &ctx->ir_base[use];
1004
1005
0
        if (UNEXPECTED(insn->op == IR_PHI)) {
1006
0
          ir_ref n = insn->inputs_count - 1;
1007
0
          ir_ref *p = insn->ops + 2; /* PHI data inputs */
1008
0
          ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
1009
1010
0
          for (;n > 0; p++, q++, n--) {
1011
0
            if (*p == i) {
1012
0
              uint32_t pred_block = ctx->cfg_map[*q];
1013
1014
0
              if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1015
0
                ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1016
0
                if (pred_block != def_block) {
1017
0
                  ir_list_push(&block_queue, pred_block);
1018
0
                }
1019
0
              }
1020
0
            }
1021
0
          }
1022
0
        } else if (ctx->rules && UNEXPECTED(ctx->rules[use] & IR_FUSED)) {
1023
0
          while (1) {
1024
0
            ir_use_list *use_list = &ctx->use_lists[use];
1025
0
            ir_ref *p, n = use_list->count;
1026
1027
0
            for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1028
0
              ir_ref use = *p;
1029
1030
0
              if (ctx->rules[use] & IR_FUSED) {
1031
0
                ir_list_push(&fuse_queue, use);
1032
0
              } else {
1033
0
                uint32_t use_block = ctx->cfg_map[use];
1034
1035
0
                if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1036
0
                  ir_list_push(&block_queue, use_block);
1037
0
                }
1038
0
              }
1039
0
            }
1040
0
            if (!ir_list_len(&fuse_queue)) {
1041
0
              break;
1042
0
            }
1043
0
            use = ir_list_pop(&fuse_queue);
1044
0
          }
1045
0
        } else {
1046
0
          uint32_t use_block = ctx->cfg_map[use];
1047
1048
          /* Check if the virtual register is alive at the start of 'use_block' */
1049
0
          if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1050
0
            ir_list_push(&block_queue, use_block);
1051
0
          }
1052
0
        }
1053
0
      }
1054
1055
      /* UP_AND_MARK: Traverse through predecessor blocks until we reach the block where 'v' is defined*/
1056
0
      while (ir_list_len(&block_queue)) {
1057
0
        uint32_t b = ir_list_pop(&block_queue);
1058
0
        ir_block *bb = &ctx->cfg_blocks[b];
1059
0
        uint32_t *p, n = bb->predecessors_count;
1060
1061
0
        if (bb->flags & IR_BB_ENTRY) {
1062
          /* live_in_push(ENTRY, v) */
1063
0
          ir_insn *insn = &ctx->ir_base[bb->start];
1064
1065
0
          IR_ASSERT(insn->op == IR_ENTRY);
1066
0
          IR_ASSERT(insn->op3 >= 0 && insn->op3 < (ir_ref)ctx->entries_count);
1067
0
          if (live_lists->len >= live_lists->a.size) {
1068
0
            ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
1069
0
          }
1070
0
          ir_list_push_unchecked(live_lists, live_outs[ctx->cfg_blocks_count + 1 + insn->op3]);
1071
0
          ir_list_push_unchecked(live_lists, v);
1072
0
          live_outs[ctx->cfg_blocks_count + 1 + insn->op3] = ir_list_len(live_lists) - 1;
1073
0
          continue;
1074
0
        }
1075
0
        for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
1076
0
          uint32_t pred_block = *p;
1077
1078
          /* Check if 'pred_block' wasn't traversed before */
1079
0
          if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1080
            /* Mark a virtual register 'v' alive at the end of 'pred_block' */
1081
0
            ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1082
0
            if (pred_block != def_block) {
1083
0
              ir_list_push(&block_queue, pred_block);
1084
0
            }
1085
0
          }
1086
0
        }
1087
0
      }
1088
0
    }
1089
0
  }
1090
1091
0
  ir_list_free(&block_queue);
1092
0
  ir_list_free(&fuse_queue);
1093
0
}
1094
1095
static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, uint32_t pos, ir_list *live_lists, uint32_t b)
1096
0
{
1097
0
  bool ok = 1;
1098
0
  int count = 0;
1099
0
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
1100
0
  ir_ref i;
1101
1102
0
  while (pos) {
1103
0
    i = ir_list_at(live_lists, pos);
1104
0
    pos = ir_list_at(live_lists, pos - 1);
1105
1106
    /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
1107
0
    ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
1108
0
    ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
1109
1110
0
    if (use_pos->op_num) {
1111
0
      ir_ref *ops = ctx->ir_base[ref].ops;
1112
0
      ref = ops[use_pos->op_num];
1113
0
    }
1114
1115
0
    if (ctx->ir_base[ref].op == IR_PARAM) {
1116
0
      continue;
1117
0
    }
1118
0
    if (ctx->binding) {
1119
0
      ir_ref var = ir_binding_find(ctx, ref);
1120
0
      if (var < 0) {
1121
        /* We may load the value at OSR entry-point */
1122
0
        if (!count) {
1123
0
          bb->flags &= ~IR_BB_EMPTY;
1124
0
          bb->flags |= IR_BB_OSR_ENTRY_LOADS;
1125
0
          if (!ctx->osr_entry_loads) {
1126
0
            list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
1127
0
            ir_list_init(list, 16);
1128
0
          }
1129
0
          ir_list_push(list, b);
1130
0
          ir_list_push(list, 0);
1131
0
        }
1132
0
        ir_list_push(list, ref);
1133
0
        count++;
1134
0
        continue;
1135
0
      }
1136
0
    }
1137
0
    fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
1138
0
    ok = 0;
1139
0
  }
1140
1141
0
  if (!ok) {
1142
0
    IR_ASSERT(0);
1143
0
  }
1144
0
  if (count) {
1145
0
    ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
1146
1147
#if 0
1148
    /* ENTRY "clobbers" all registers */
1149
    ir_ref ref = ctx->ir_base[bb->start].op1;
1150
    ir_add_fixed_live_range(ctx, IR_REG_ALL,
1151
      IR_DEF_LIVE_POS_FROM_REF(ref),
1152
      IR_SAVE_LIVE_POS_FROM_REF(ref));
1153
#endif
1154
0
  }
1155
0
}
1156
1157
static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, uint32_t *live_in_block, uint32_t b)
1158
0
{
1159
0
  ir_ref stack[4];
1160
0
  int stack_pos = 0;
1161
0
  ir_target_constraints constraints;
1162
0
  ir_insn *insn;
1163
0
  uint32_t j, n, flags, def_flags;
1164
0
  ir_ref *p, child;
1165
0
  uint8_t use_flags;
1166
0
  ir_reg reg;
1167
0
  ir_live_pos pos = IR_START_LIVE_POS_FROM_REF(ref);
1168
0
  ir_live_pos use_pos;
1169
0
  ir_live_interval *ival;
1170
1171
0
  while (1) {
1172
0
    IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
1173
1174
0
    if (!(ctx->rules[input] & IR_SIMPLE)) {
1175
0
      def_flags = ir_get_target_constraints(ctx, input, &constraints);
1176
0
      n = constraints.tmps_count;
1177
0
      while (n > 0) {
1178
0
        n--;
1179
0
        if (constraints.tmp_regs[n].type) {
1180
0
          ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1181
0
        } else {
1182
          /* CPU specific constraints */
1183
0
          ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1184
0
            pos + constraints.tmp_regs[n].start,
1185
0
            pos + constraints.tmp_regs[n].end);
1186
0
        }
1187
0
      }
1188
0
    } else {
1189
0
      def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
1190
0
      constraints.hints_count = 0;
1191
0
    }
1192
1193
0
    insn = &ctx->ir_base[input];
1194
0
    flags = ir_op_flags[insn->op];
1195
0
    IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
1196
0
    n = IR_INPUT_EDGES_COUNT(flags);
1197
0
    j = 1;
1198
0
    p = insn->ops + j;
1199
0
    if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_PINNED)) {
1200
0
      j++;
1201
0
      p++;
1202
0
    }
1203
0
    for (; j <= n; j++, p++) {
1204
0
      IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
1205
0
      child = *p;
1206
0
      if (child > 0) {
1207
0
        uint32_t v = ctx->vregs[child];
1208
1209
0
        if (v) {
1210
0
          reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1211
0
          use_pos = pos;
1212
0
          if (EXPECTED(reg == IR_REG_NONE)) {
1213
0
            use_pos += IR_USE_SUB_REF;
1214
0
          }
1215
1216
0
          if (!IS_LIVE_IN_BLOCK(v, b)) {
1217
            /* live.add(opd) */
1218
0
            SET_LIVE_IN_BLOCK(v, b);
1219
            /* intervals[opd].addRange(b.from, op.id) */
1220
0
            ival = ir_add_live_range(ctx, v,
1221
0
              IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1222
0
          } else {
1223
0
            ival = ctx->live_intervals[v];
1224
0
          }
1225
0
          use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
1226
0
          ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
1227
0
        } else if (ctx->rules[child] & IR_FUSED) {
1228
0
          IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
1229
0
          stack[stack_pos++] = child;
1230
0
        } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
1231
0
          ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
1232
0
        }
1233
0
      }
1234
0
    }
1235
0
    if (!stack_pos) {
1236
0
      break;
1237
0
    }
1238
0
    input = stack[--stack_pos];
1239
0
  }
1240
0
}
1241
1242
int ir_compute_live_ranges(ir_ctx *ctx)
1243
0
{
1244
0
  uint32_t b, i, j, k, n, succ;
1245
0
  ir_ref ref;
1246
0
  ir_insn *insn;
1247
0
  ir_block *bb, *succ_bb;
1248
0
  uint32_t *live_outs;
1249
0
  uint32_t *live_in_block;
1250
0
  ir_list live_lists;
1251
0
  ir_live_interval *ival;
1252
1253
0
  if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
1254
0
    return 0;
1255
0
  }
1256
1257
0
  if (ctx->rules) {
1258
0
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
1259
0
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
1260
0
  }
1261
1262
  /* Root of the list of IR_VARs */
1263
0
  ctx->vars = IR_UNUSED;
1264
1265
  /* Compute Live Ranges */
1266
0
  ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
1267
1268
  /* vregs + tmp + fixed + SRATCH + ALL */
1269
0
  ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
1270
1271
0
    if (!ctx->arena) {
1272
0
    ctx->arena = ir_arena_create(16 * 1024);
1273
0
  }
1274
1275
0
  live_outs = ir_mem_calloc(ctx->cfg_blocks_count + 1 + ctx->entries_count, sizeof(uint32_t));
1276
0
  ir_list_init(&live_lists, 1024);
1277
0
  ir_compute_live_sets(ctx, live_outs, &live_lists);
1278
0
  live_in_block = ir_mem_calloc(ctx->vregs_count + 1, sizeof(uint32_t));
1279
1280
  /* for each basic block in reverse order */
1281
0
  for (b = ctx->cfg_blocks_count; b > 0; b--) {
1282
0
    bb = &ctx->cfg_blocks[b];
1283
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1284
1285
    /* For all virtual register alive at the end of the block */
1286
0
    n = live_outs[b];
1287
0
    while (n != 0) {
1288
0
      i = ir_list_at(&live_lists, n);
1289
0
      SET_LIVE_IN_BLOCK(i, b);
1290
0
      ir_add_prev_live_range(ctx, i,
1291
0
        IR_START_LIVE_POS_FROM_REF(bb->start),
1292
0
        IR_END_LIVE_POS_FROM_REF(bb->end));
1293
0
      n = ir_list_at(&live_lists, n - 1);
1294
0
    }
1295
1296
0
    if (bb->successors_count == 1) {
1297
      /* for each phi function of the successor */
1298
0
      succ = ctx->cfg_edges[bb->successors];
1299
0
      succ_bb = &ctx->cfg_blocks[succ];
1300
0
      if (succ_bb->flags & IR_BB_HAS_PHI) {
1301
0
        ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
1302
0
        ir_ref n, *p;
1303
1304
0
        k = ir_phi_input_number(ctx, succ_bb, b);
1305
0
        IR_ASSERT(k != 0);
1306
0
        n = use_list->count;
1307
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1308
0
          ir_ref use = *p;
1309
0
          insn = &ctx->ir_base[use];
1310
0
          if (insn->op == IR_PHI) {
1311
0
            ir_ref input = ir_insn_op(insn, k);
1312
0
            if (input > 0) {
1313
0
              uint32_t v = ctx->vregs[input];
1314
1315
0
              if (v) {
1316
0
                ival = ctx->live_intervals[v];
1317
0
                ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
1318
0
              }
1319
0
            }
1320
0
          }
1321
0
        }
1322
0
      }
1323
0
    }
1324
1325
    /* for each operation of the block in reverse order */
1326
0
    ref = bb->end;
1327
0
    insn = &ctx->ir_base[ref];
1328
0
    if (insn->op == IR_END || insn->op == IR_LOOP_END) {
1329
0
      ref = ctx->prev_ref[ref];
1330
0
    }
1331
0
    for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
1332
0
      uint32_t def_flags;
1333
0
      uint32_t flags;
1334
0
      ir_ref *p;
1335
0
      ir_target_constraints constraints;
1336
0
      uint32_t v;
1337
1338
0
      if (ctx->rules) {
1339
0
        int n;
1340
1341
0
        if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
1342
0
          if (((ctx->rules[ref] & IR_RULE_MASK) == IR_VAR
1343
0
            || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
1344
0
           && ctx->use_lists[ref].count > 0) {
1345
0
            insn = &ctx->ir_base[ref];
1346
0
            if (insn->op != IR_VADDR && insn->op != IR_PARAM) {
1347
0
              insn->op3 = ctx->vars;
1348
0
              ctx->vars = ref;
1349
0
            }
1350
0
          }
1351
0
          continue;
1352
0
        }
1353
1354
0
        def_flags = ir_get_target_constraints(ctx, ref, &constraints);
1355
0
        n = constraints.tmps_count;
1356
0
        while (n > 0) {
1357
0
          n--;
1358
0
          if (constraints.tmp_regs[n].type) {
1359
0
            ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1360
0
          } else {
1361
            /* CPU specific constraints */
1362
0
            ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1363
0
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
1364
0
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
1365
0
          }
1366
0
        }
1367
0
      } else {
1368
0
        def_flags = 0;
1369
0
        constraints.def_reg = IR_REG_NONE;
1370
0
        constraints.hints_count = 0;
1371
0
      }
1372
1373
0
      insn = &ctx->ir_base[ref];
1374
0
      v = ctx->vregs[ref];
1375
0
      if (v) {
1376
0
        if (insn->op != IR_PHI) {
1377
0
          ir_live_pos def_pos;
1378
0
          ir_ref hint_ref = 0;
1379
0
          ir_reg reg = constraints.def_reg;
1380
1381
0
          if (reg != IR_REG_NONE) {
1382
0
            def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
1383
0
            if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
1384
              /* parameter register must be kept before it's copied */
1385
0
              ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1386
0
            }
1387
0
          } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1388
0
            if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
1389
0
              hint_ref = insn->op1;
1390
0
            }
1391
0
            if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1392
0
              def_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1393
0
            } else {
1394
0
              def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1395
0
            }
1396
0
          } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1397
0
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1398
0
          } else {
1399
0
            if (insn->op == IR_PARAM) {
1400
              /* We may reuse parameter stack slot for spilling */
1401
0
              ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
1402
0
            }
1403
0
            def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
1404
0
          }
1405
          /* intervals[opd].setFrom(op.id) */
1406
0
          ival = ir_fix_live_range(ctx, v,
1407
0
            IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1408
0
          ival->type = insn->type;
1409
0
          ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
1410
0
        } else {
1411
          /* PHIs inputs must not be processed */
1412
0
          ival = ctx->live_intervals[v];
1413
0
          if (UNEXPECTED(!ival)) {
1414
            /* Dead PHI */
1415
0
            ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
1416
0
          }
1417
0
          ival->type = insn->type;
1418
0
          ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
1419
0
          continue;
1420
0
        }
1421
0
      }
1422
1423
0
      IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
1424
0
      flags = ir_op_flags[insn->op];
1425
0
      j = 1;
1426
0
      p = insn->ops + 1;
1427
0
      if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
1428
0
        j++;
1429
0
        p++;
1430
0
      }
1431
0
      for (; j <= insn->inputs_count; j++, p++) {
1432
0
        ir_ref input = *p;
1433
0
        ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1434
0
        ir_live_pos use_pos;
1435
0
        ir_ref hint_ref = 0;
1436
0
        uint32_t v;
1437
1438
0
        if (input > 0) {
1439
0
          v = ctx->vregs[input];
1440
0
          if (v) {
1441
0
            use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1442
0
            if (reg != IR_REG_NONE) {
1443
0
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1444
0
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1445
0
            } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1446
0
              if (j == 1) {
1447
0
                if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1448
0
                  use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1449
0
                } else {
1450
0
                  use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1451
0
                }
1452
0
                IR_ASSERT(ctx->vregs[ref]);
1453
0
                hint_ref = ref;
1454
0
              } else if (input == insn->op1) {
1455
                /* Input is the same as "op1" */
1456
0
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1457
0
              }
1458
0
            }
1459
0
            if (!IS_LIVE_IN_BLOCK(v, b)) {
1460
              /* live.add(opd) */
1461
0
              SET_LIVE_IN_BLOCK(v, b);
1462
              /* intervals[opd].addRange(b.from, op.id) */
1463
0
              ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1464
0
            } else {
1465
0
              ival = ctx->live_intervals[v];
1466
0
            }
1467
0
            ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
1468
0
          } else {
1469
0
            if (ctx->rules) {
1470
0
              if ((ctx->rules[input] & (IR_FUSED|IR_SKIPPED)) == IR_FUSED) {
1471
0
                ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b);
1472
0
              } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
1473
0
                ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
1474
0
              }
1475
0
            }
1476
0
            if (reg != IR_REG_NONE) {
1477
0
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1478
0
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1479
0
            }
1480
0
          }
1481
0
        } else if (reg != IR_REG_NONE) {
1482
0
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1483
0
          ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1484
0
        }
1485
0
      }
1486
0
    }
1487
0
  }
1488
1489
0
  if (ctx->entries) {
1490
0
    for (i = 0; i < ctx->entries_count; i++) {
1491
0
      b = ctx->entries[i];
1492
0
      bb = &ctx->cfg_blocks[b];
1493
0
      IR_ASSERT(bb->predecessors_count == 1);
1494
0
      ir_add_osr_entry_loads(ctx, bb, live_outs[ctx->cfg_blocks_count + 1 + i], &live_lists, b);
1495
0
    }
1496
0
    if (ctx->osr_entry_loads) {
1497
0
      ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
1498
0
    }
1499
0
  }
1500
1501
0
  ir_list_free(&live_lists);
1502
0
  ir_mem_free(live_outs);
1503
0
  ir_mem_free(live_in_block);
1504
1505
0
  return 1;
1506
0
}
1507
1508
#endif
1509
1510
/* Live Ranges coalescing */
1511
1512
static ir_live_pos ir_ivals_overlap(ir_live_range *lrg1, ir_live_range *lrg2)
1513
0
{
1514
0
  while (1) {
1515
0
    if (lrg2->start < lrg1->end) {
1516
0
      if (lrg1->start < lrg2->end) {
1517
0
        return IR_MAX(lrg1->start, lrg2->start);
1518
0
      } else {
1519
0
        lrg2 = lrg2->next;
1520
0
        if (!lrg2) {
1521
0
          return 0;
1522
0
        }
1523
0
      }
1524
0
    } else {
1525
0
      lrg1 = lrg1->next;
1526
0
      if (!lrg1) {
1527
0
        return 0;
1528
0
      }
1529
0
    }
1530
0
  }
1531
0
}
1532
1533
static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1534
0
{
1535
0
  ir_live_interval *ival1 = ctx->live_intervals[r1];
1536
0
  ir_live_interval *ival2 = ctx->live_intervals[r2];
1537
1538
#if 0
1539
  if (ival2->range.start >= ival1->end
1540
   || ival1->range.start >= ival2->end) {
1541
    return 0;
1542
  }
1543
#endif
1544
0
  return ir_ivals_overlap(&ival1->range, &ival2->range);
1545
0
}
1546
1547
static bool ir_ivals_inside(ir_live_range *parent, ir_live_range *child)
1548
0
{
1549
0
  do {
1550
0
    while (parent && parent->end < child->start) {
1551
0
      parent = parent->next;
1552
0
    }
1553
0
    if (!parent || parent->start > child->start || parent->end < child->end) {
1554
0
      return 0;
1555
0
    }
1556
0
    child = child->next;
1557
0
  } while (child);
1558
0
  return 1;
1559
0
}
1560
1561
static bool ir_vregs_inside(ir_ctx *ctx, uint32_t parent, uint32_t child)
1562
0
{
1563
0
  ir_live_interval *child_ival = ctx->live_intervals[child];
1564
0
  ir_live_interval *parent_ival = ctx->live_intervals[parent];
1565
1566
0
  if ((child_ival->flags | parent_ival->flags) & IR_LIVE_INTERVAL_COALESCED) {
1567
    // TODO: Support valid cases with already coalesced "parent_ival
1568
0
    return 0;
1569
0
  }
1570
#if 0
1571
  if (child_ival->end >= parent_ival->end) {
1572
    return 0;
1573
  }
1574
#endif
1575
0
  return ir_ivals_inside(&parent_ival->range, &child_ival->range);
1576
0
}
1577
1578
static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1579
0
{
1580
0
  ir_live_interval *ival = ctx->live_intervals[r2];
1581
0
  ir_live_range *live_range = &ival->range;
1582
0
  ir_live_range *next;
1583
0
  ir_use_pos *use_pos, *next_pos, **prev;
1584
1585
#if 0
1586
  fprintf(stderr, "COALESCE %d -> %d\n", r2, r1);
1587
#endif
1588
1589
0
  ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1590
0
  live_range = live_range->next;
1591
0
  while (live_range) {
1592
0
    next = live_range->next;
1593
0
    live_range->next = ctx->unused_ranges;
1594
0
    ctx->unused_ranges = live_range;
1595
0
    ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1596
0
    live_range = next;
1597
0
  }
1598
1599
  /* merge sorted use_pos lists */
1600
0
  prev = &ctx->live_intervals[r1]->use_pos;
1601
0
  use_pos = ival->use_pos;
1602
0
  while (use_pos) {
1603
0
    if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r1) {
1604
0
      use_pos->hint_ref = 0;
1605
0
    }
1606
0
    while (*prev && ((*prev)->pos < use_pos->pos ||
1607
0
      ((*prev)->pos == use_pos->pos &&
1608
0
        (use_pos->op_num == 0 || ((*prev)->op_num != 0 && (*prev)->op_num < use_pos->op_num))))) {
1609
0
      if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) {
1610
0
        (*prev)->hint_ref = 0;
1611
0
      }
1612
0
      prev = &(*prev)->next;
1613
0
    }
1614
0
    next_pos = use_pos->next;
1615
0
    use_pos->next = *prev;
1616
0
    *prev = use_pos;
1617
0
    prev = &use_pos->next;
1618
0
      use_pos = next_pos;
1619
0
  }
1620
0
  use_pos = *prev;
1621
0
  while (use_pos) {
1622
0
    if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r2) {
1623
0
      use_pos->hint_ref = 0;
1624
0
    }
1625
0
    use_pos = use_pos->next;
1626
0
  }
1627
1628
0
  ctx->live_intervals[r1]->flags |=
1629
0
    IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS));
1630
0
  if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
1631
0
    IR_ASSERT(!(ctx->live_intervals[r1]->flags & IR_LIVE_INTERVAL_MEM_PARAM));
1632
0
    ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
1633
0
  }
1634
0
  ctx->live_intervals[r2] = NULL;
1635
1636
  // TODO: remember to reuse ???
1637
  //ir_mem_free(ival);
1638
0
}
1639
1640
static void ir_vregs_coalesce(ir_ctx *ctx, uint32_t v1, uint32_t v2, ir_ref from, ir_ref to)
1641
0
{
1642
0
  ir_ref i;
1643
0
  uint16_t f1 = ctx->live_intervals[v1]->flags;
1644
0
  uint16_t f2 = ctx->live_intervals[v2]->flags;
1645
1646
#if 0
1647
  if (ctx->binding) {
1648
    ir_ref b1 = ir_binding_find(ctx, from);
1649
    ir_ref b2 = ir_binding_find(ctx, to);
1650
    IR_ASSERT(b1 == b2);
1651
  }
1652
#endif
1653
0
  if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) {
1654
0
    ir_vregs_join(ctx, v1, v2);
1655
0
    ctx->vregs[to] = v1;
1656
0
  } else if ((f2 & IR_LIVE_INTERVAL_COALESCED) && !(f1 & IR_LIVE_INTERVAL_COALESCED)) {
1657
0
    ir_vregs_join(ctx, v2, v1);
1658
0
    ctx->vregs[from] = v2;
1659
0
  } else if (from < to) {
1660
0
    ir_vregs_join(ctx, v1, v2);
1661
0
    if (f2 & IR_LIVE_INTERVAL_COALESCED) {
1662
0
      for (i = 1; i < ctx->insns_count; i++) {
1663
0
        if (ctx->vregs[i] == v2) {
1664
0
          ctx->vregs[i] = v1;
1665
0
        }
1666
0
      }
1667
0
    } else {
1668
0
      ctx->vregs[to] = v1;
1669
0
    }
1670
0
  } else {
1671
0
    ir_vregs_join(ctx, v2, v1);
1672
0
    if (f1 & IR_LIVE_INTERVAL_COALESCED) {
1673
0
      for (i = 1; i < ctx->insns_count; i++) {
1674
0
        if (ctx->vregs[i] == v1) {
1675
0
          ctx->vregs[i] = v2;
1676
0
        }
1677
0
      }
1678
0
    } else {
1679
0
      ctx->vregs[from] = v2;
1680
0
    }
1681
0
  }
1682
0
}
1683
1684
static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to)
1685
0
{
1686
0
  if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) {
1687
0
    ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY;
1688
0
    ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES;
1689
0
    ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
1690
#if 0
1691
    fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to);
1692
#endif
1693
0
  }
1694
0
}
1695
1696
typedef struct _ir_coalesce_block {
1697
  uint32_t b;
1698
  uint32_t loop_depth;
1699
} ir_coalesce_block;
1700
1701
static int ir_block_cmp(const void *b1, const void *b2)
1702
0
{
1703
0
  ir_coalesce_block *d1 = (ir_coalesce_block*)b1;
1704
0
  ir_coalesce_block *d2 = (ir_coalesce_block*)b2;
1705
1706
0
  if (d1->loop_depth > d2->loop_depth) {
1707
0
    return -1;
1708
0
  } else if (d1->loop_depth == d2->loop_depth) {
1709
0
    if (d1->b < d2->b) {
1710
0
      return -1;
1711
0
    } else {
1712
0
      return 1;
1713
0
    }
1714
0
  } else {
1715
0
    return 1;
1716
0
  }
1717
0
}
1718
1719
static void ir_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1720
0
{
1721
0
  ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1722
0
  ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1723
0
  ir_live_interval *ival;
1724
0
  ir_live_range *r;
1725
0
  ir_use_pos *p, *p1 = NULL, *p2 = NULL;
1726
0
  ir_ref tmp;
1727
1728
0
  tmp = insn->op1;
1729
0
  insn->op1 = insn->op2;
1730
0
  insn->op2 = tmp;
1731
1732
0
  ival = ctx->live_intervals[ctx->vregs[insn->op1]];
1733
0
  p = ival->use_pos;
1734
0
  while (p) {
1735
0
    if (p->pos == pos) {
1736
0
      p->pos = load_pos;
1737
0
      p->op_num = 1;
1738
0
      p1 = p;
1739
0
      break;
1740
0
    }
1741
0
    p = p->next;
1742
0
  }
1743
1744
0
  ival = ctx->live_intervals[ctx->vregs[i]];
1745
0
  p = ival->use_pos;
1746
0
  while (p) {
1747
0
    if (p->pos == load_pos) {
1748
0
      p->hint_ref = insn->op1;
1749
0
      break;
1750
0
    }
1751
0
    p = p->next;
1752
0
  }
1753
1754
0
  if (insn->op2 > 0 && ctx->vregs[insn->op2]) {
1755
0
    ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1756
0
    r = &ival->range;
1757
0
    while (r) {
1758
0
      if (r->end == load_pos) {
1759
0
        r->end = pos;
1760
0
        if (!r->next) {
1761
0
          ival->end = pos;
1762
0
        }
1763
0
        break;
1764
0
      }
1765
0
      r = r->next;
1766
0
    }
1767
0
    p = ival->use_pos;
1768
0
    while (p) {
1769
0
      if (p->pos == load_pos) {
1770
0
        p->pos = pos;
1771
0
        p->op_num = 2;
1772
0
        p2 = p;
1773
0
        break;
1774
0
      }
1775
0
      p = p->next;
1776
0
    }
1777
0
  }
1778
0
  if (p1 && p2) {
1779
0
    uint8_t tmp = p1->flags;
1780
0
    p1->flags = p2->flags;
1781
0
    p2->flags = tmp;
1782
0
  }
1783
0
}
1784
1785
static int ir_hint_conflict(ir_ctx *ctx, ir_ref ref, int use, int def)
1786
0
{
1787
0
  ir_use_pos *p;
1788
0
  ir_reg r1 = IR_REG_NONE;
1789
0
  ir_reg r2 = IR_REG_NONE;
1790
1791
0
  p = ctx->live_intervals[use]->use_pos;
1792
0
  while (p) {
1793
0
    if (IR_LIVE_POS_TO_REF(p->pos) == ref) {
1794
0
      break;
1795
0
    }
1796
0
    if (p->hint != IR_REG_NONE) {
1797
0
      r1 = p->hint;
1798
0
    }
1799
0
    p = p->next;
1800
0
  }
1801
1802
0
  p = ctx->live_intervals[def]->use_pos;
1803
0
  while (p) {
1804
0
    if (IR_LIVE_POS_TO_REF(p->pos) > ref) {
1805
0
      if (p->hint != IR_REG_NONE) {
1806
0
        r2 = p->hint;
1807
0
        break;
1808
0
      }
1809
0
    }
1810
0
    p = p->next;
1811
0
  }
1812
0
  return r1 != r2 && r1 != IR_REG_NONE && r2 != IR_REG_NONE;
1813
0
}
1814
1815
static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1816
0
{
1817
0
  if (ctx->vregs[insn->op1]
1818
0
   && ctx->vregs[insn->op1] != ctx->vregs[i]
1819
0
   && !ir_vregs_overlap(ctx, ctx->vregs[insn->op1], ctx->vregs[i])
1820
0
   && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op1], ctx->vregs[i])) {
1821
    /* pass */
1822
0
  } else {
1823
0
    if (ctx->vregs[insn->op2] && ctx->vregs[insn->op2] != ctx->vregs[i]) {
1824
0
      ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1825
0
      ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1826
0
      ir_live_interval *ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1827
0
      ir_live_range *r = &ival->range;
1828
1829
0
      if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ctx->use_lists[insn->op2].count == 1) {
1830
0
        return 0;
1831
0
      }
1832
0
      while (r) {
1833
0
        if (r->end == pos) {
1834
0
          r->end = load_pos;
1835
0
          if (!r->next) {
1836
0
            ival->end = load_pos;
1837
0
          }
1838
0
          if (!ir_vregs_overlap(ctx, ctx->vregs[insn->op2], ctx->vregs[i])
1839
0
           && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op2], ctx->vregs[i])) {
1840
0
            ir_swap_operands(ctx, i, insn);
1841
0
            return 1;
1842
0
          } else {
1843
0
            r->end = pos;
1844
0
            if (!r->next) {
1845
0
              ival->end = pos;
1846
0
            }
1847
0
          }
1848
0
          break;
1849
0
        }
1850
0
        r = r->next;
1851
0
      }
1852
0
    }
1853
0
  }
1854
0
  return 0;
1855
0
}
1856
1857
int ir_coalesce(ir_ctx *ctx)
1858
0
{
1859
0
  uint32_t b, n, succ, pred_b, count = 0;
1860
0
  ir_ref *p, use, input, k, j;
1861
0
  ir_block *bb, *succ_bb;
1862
0
  ir_use_list *use_list;
1863
0
  ir_insn *insn;
1864
0
  ir_bitset visited;
1865
0
  ir_coalesce_block *list;
1866
0
  bool compact = 0;
1867
1868
  /* Collect a list of blocks which are predecossors to block with phi functions */
1869
0
  list = ir_mem_malloc(sizeof(ir_coalesce_block) * ctx->cfg_blocks_count);
1870
0
  visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
1871
0
  for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
1872
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1873
0
    if (bb->flags & IR_BB_HAS_PHI) {
1874
0
      k = bb->predecessors_count;
1875
0
      if (k > 1) {
1876
0
        use_list = &ctx->use_lists[bb->start];
1877
0
        n = use_list->count;
1878
0
        IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
1879
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1880
0
          use = *p;
1881
0
          insn = &ctx->ir_base[use];
1882
0
          if (insn->op == IR_PHI) {
1883
0
            do {
1884
0
              k--;
1885
0
              pred_b = ctx->cfg_edges[bb->predecessors + k];
1886
0
              if (!ir_bitset_in(visited, pred_b)) {
1887
0
                ir_bitset_incl(visited, pred_b);
1888
0
                list[count].b = pred_b;
1889
0
                list[count].loop_depth = ctx->cfg_blocks[pred_b].loop_depth;
1890
0
                count++;
1891
0
              }
1892
0
            } while (k > 0);
1893
0
            break;
1894
0
          }
1895
0
        }
1896
0
      }
1897
0
    }
1898
0
  }
1899
0
  ir_mem_free(visited);
1900
1901
  /* Sort blocks according to their loop depth */
1902
0
  qsort(list, count, sizeof(ir_coalesce_block), ir_block_cmp);
1903
1904
0
  while (count > 0) {
1905
1906
0
    count--;
1907
0
    b = list[count].b;
1908
0
    bb = &ctx->cfg_blocks[b];
1909
0
    IR_ASSERT(bb->successors_count == 1);
1910
0
    succ = ctx->cfg_edges[bb->successors];
1911
0
    succ_bb = &ctx->cfg_blocks[succ];
1912
0
    IR_ASSERT(succ_bb->predecessors_count > 1);
1913
0
    k = ir_phi_input_number(ctx, succ_bb, b);
1914
0
    use_list = &ctx->use_lists[succ_bb->start];
1915
0
    n = use_list->count;
1916
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1917
0
      use = *p;
1918
0
      insn = &ctx->ir_base[use];
1919
0
      if (insn->op == IR_PHI) {
1920
0
        input = ir_insn_op(insn, k);
1921
0
        if (input > 0 && ctx->vregs[input]) {
1922
0
          uint32_t v1 = ctx->vregs[input];
1923
0
          uint32_t v2 = ctx->vregs[use];
1924
1925
0
          if (v1 == v2) {
1926
            /* already coalesced */
1927
0
          } else {
1928
0
            if (!ir_vregs_overlap(ctx, v1, v2)) {
1929
0
              ir_vregs_coalesce(ctx, v1, v2, input, use);
1930
0
              compact = 1;
1931
0
            } else {
1932
0
#if 1
1933
0
              if (ctx->rules && (ctx->rules[input] & IR_MAY_SWAP)) {
1934
0
                ir_insn *input_insn = &ctx->ir_base[input];
1935
1936
0
                IR_ASSERT(ir_op_flags[input_insn->op] & IR_OP_FLAG_COMMUTATIVE);
1937
0
                if (input_insn->op2 == use
1938
0
                 && input_insn->op1 != use
1939
0
                 && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG)) {
1940
0
                  ir_live_range *r = &ctx->live_intervals[v2]->range;
1941
1942
0
                  do {
1943
0
                    if (r->end == IR_USE_LIVE_POS_FROM_REF(input)) {
1944
0
                      break;
1945
0
                    }
1946
0
                    r = r->next;
1947
0
                  } while (r);
1948
0
                  if (r) {
1949
0
                    r->end = IR_LOAD_LIVE_POS_FROM_REF(input);
1950
0
                    if (!r->next) {
1951
0
                      ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input);
1952
0
                    }
1953
0
                    if (ir_vregs_overlap(ctx, v1, v2)) {
1954
0
                      r->end = IR_USE_LIVE_POS_FROM_REF(input);
1955
0
                      if (!r->next) {
1956
0
                        ctx->live_intervals[v2]->end = IR_USE_LIVE_POS_FROM_REF(input);
1957
0
                      }
1958
0
                    } else {
1959
0
                      ir_swap_operands(ctx, input, input_insn);
1960
0
                      IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2));
1961
0
                      ir_vregs_coalesce(ctx, v1, v2, input, use);
1962
0
                      compact = 1;
1963
0
                      continue;
1964
0
                    }
1965
0
                  }
1966
0
                }
1967
0
              }
1968
0
#endif
1969
0
              ir_add_phi_move(ctx, b, input, use);
1970
0
            }
1971
0
          }
1972
0
        } else {
1973
          /* Move for constant input */
1974
0
          ir_add_phi_move(ctx, b, input, use);
1975
0
        }
1976
0
      }
1977
0
    }
1978
0
  }
1979
0
  ir_mem_free(list);
1980
1981
0
  ir_hint_propagation(ctx);
1982
1983
0
  if (ctx->rules) {
1984
    /* try to swap operands of commutative instructions for better register allocation */
1985
0
    uint32_t *rule = ctx->rules + 1;
1986
0
    ir_ref i;
1987
1988
0
    for (i = 1; i < ctx->insns_count; rule++, i++) {
1989
0
      if ((*rule) & (IR_MAY_SWAP|IR_MAY_REUSE)) {
1990
0
        insn = &ctx->ir_base[i];
1991
0
        IR_ASSERT(ctx->vregs[i]);
1992
0
        if ((*rule) & IR_MAY_SWAP) {
1993
0
          IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE);
1994
0
          if (ctx->live_intervals[ctx->vregs[i]]->use_pos
1995
0
           && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG)
1996
0
           && insn->op2 > 0
1997
0
           && insn->op1 > 0
1998
0
           && insn->op1 != insn->op2) {
1999
0
            ir_try_swap_operands(ctx, i, insn);
2000
0
          }
2001
0
        } else {
2002
0
          IR_ASSERT((*rule) & IR_MAY_REUSE);
2003
0
          if (insn->op1 > 0
2004
0
           && ctx->vregs[insn->op1]
2005
0
           && ctx->vregs[i] != ctx->vregs[insn->op1]) {
2006
0
            if (ir_vregs_inside(ctx, ctx->vregs[insn->op1], ctx->vregs[i])) {
2007
0
              if (ctx->binding) {
2008
0
                ir_ref b1 = ir_binding_find(ctx, i);
2009
0
                ir_ref b2 = ir_binding_find(ctx, insn->op1);
2010
0
                if (b1 && b1 != b2) {
2011
0
                  continue;
2012
0
                }
2013
0
              }
2014
0
              ir_vregs_coalesce(ctx, ctx->vregs[i], ctx->vregs[insn->op1], i, insn->op1);
2015
0
              compact = 1;
2016
0
            }
2017
0
          }
2018
0
        }
2019
0
      }
2020
0
    }
2021
0
  }
2022
2023
0
  if (compact) {
2024
0
    ir_ref i, n;
2025
0
    uint32_t *xlat = ir_mem_malloc((ctx->vregs_count + 1) * sizeof(uint32_t));
2026
2027
0
    for (i = 1, n = 1; i <= ctx->vregs_count; i++) {
2028
0
      if (ctx->live_intervals[i]) {
2029
0
        xlat[i] = n;
2030
0
        if (i != n) {
2031
0
          ctx->live_intervals[n] = ctx->live_intervals[i];
2032
0
          ctx->live_intervals[n]->vreg = n;
2033
0
        }
2034
0
        n++;
2035
0
      }
2036
0
    }
2037
0
    n--;
2038
0
    if (n != ctx->vregs_count) {
2039
0
      j = ctx->vregs_count - n;
2040
      /* vregs + tmp + fixed + SRATCH + ALL */
2041
0
      for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) {
2042
0
        ctx->live_intervals[i] = ctx->live_intervals[i + j];
2043
0
        if (ctx->live_intervals[i]) {
2044
0
          ctx->live_intervals[i]->vreg = i;
2045
0
        }
2046
0
      }
2047
0
      for (j = 1; j < ctx->insns_count; j++) {
2048
0
        if (ctx->vregs[j]) {
2049
0
          ctx->vregs[j] = xlat[ctx->vregs[j]];
2050
0
        }
2051
0
      }
2052
0
      ctx->vregs_count = n;
2053
0
    }
2054
0
    ir_mem_free(xlat);
2055
0
  }
2056
2057
0
  return 1;
2058
0
}
2059
2060
/* SSA Deconstruction */
2061
2062
int ir_compute_dessa_moves(ir_ctx *ctx)
2063
0
{
2064
0
  uint32_t b, n;
2065
0
  ir_ref j, k, *p, use;
2066
0
  ir_block *bb;
2067
0
  ir_use_list *use_list;
2068
0
  ir_insn *insn;
2069
2070
0
  for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
2071
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
2072
0
    k = bb->predecessors_count;
2073
0
    if (k > 1) {
2074
0
      use_list = &ctx->use_lists[bb->start];
2075
0
      n = use_list->count;
2076
0
      if (n > 1) {
2077
0
        IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
2078
0
        k++;
2079
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2080
0
          use = *p;
2081
0
          insn = &ctx->ir_base[use];
2082
0
          if (insn->op == IR_PHI) {
2083
0
            for (j = 2; j <= k; j++) {
2084
0
              if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) {
2085
0
                int pred = ctx->cfg_edges[bb->predecessors + (j-2)];
2086
0
                ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY;
2087
0
                ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES;
2088
0
                ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
2089
0
              }
2090
0
            }
2091
0
          }
2092
0
        }
2093
0
      }
2094
0
    }
2095
0
  }
2096
0
  return 1;
2097
0
}
2098
2099
/*
2100
 * Parallel copy sequentialization algorithm
2101
 *
2102
 * The implementation is based on algorithm 1 desriebed in
2103
 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
2104
 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
2105
 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
2106
 * pp. 114-125, doi: 10.1109/CGO.2009.19.
2107
 */
2108
int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
2109
0
{
2110
0
  uint32_t succ, k, n = 0;
2111
0
  ir_block *bb, *succ_bb;
2112
0
  ir_use_list *use_list;
2113
0
  ir_ref *loc, *pred, *src, *dst, i, *p, ref, input;
2114
0
  ir_ref s, d;
2115
0
  ir_insn *insn;
2116
0
  uint32_t len;
2117
0
  ir_bitset todo, ready;
2118
0
  bool have_constants_or_addresses = 0;
2119
2120
0
  bb = &ctx->cfg_blocks[b];
2121
0
  if (!(bb->flags & IR_BB_DESSA_MOVES)) {
2122
0
    return 0;
2123
0
  }
2124
0
  IR_ASSERT(bb->successors_count == 1);
2125
0
  succ = ctx->cfg_edges[bb->successors];
2126
0
  succ_bb = &ctx->cfg_blocks[succ];
2127
0
  IR_ASSERT(succ_bb->predecessors_count > 1);
2128
0
  use_list = &ctx->use_lists[succ_bb->start];
2129
2130
0
  k = ir_phi_input_number(ctx, succ_bb, b);
2131
2132
0
  loc = ir_mem_malloc((ctx->vregs_count + 1) * 4 * sizeof(ir_ref));
2133
0
  pred = loc + ctx->vregs_count + 1;
2134
0
  src = pred + ctx->vregs_count + 1;
2135
0
  dst = src + ctx->vregs_count + 1;
2136
0
  len = ir_bitset_len(ctx->vregs_count + 1);
2137
0
  todo = ir_bitset_malloc(ctx->vregs_count + 1);
2138
2139
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
2140
0
    ref = *p;
2141
0
    insn = &ctx->ir_base[ref];
2142
0
    if (insn->op == IR_PHI) {
2143
0
      input = ir_insn_op(insn, k);
2144
0
      if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2145
0
        have_constants_or_addresses = 1;
2146
0
      } else if (ctx->vregs[input] != ctx->vregs[ref]) {
2147
0
        s = ctx->vregs[input];
2148
0
        d = ctx->vregs[ref];
2149
0
        src[s] = input;
2150
0
        dst[d] = ref;
2151
0
        loc[d] = pred[s] = 0;
2152
0
        ir_bitset_incl(todo, d);
2153
0
        n++;
2154
0
      }
2155
0
    }
2156
0
  }
2157
2158
0
  if (n > 0) {
2159
0
    src[0] = dst[0] = 0;
2160
0
    ready = ir_bitset_malloc(ctx->vregs_count + 1);
2161
0
    IR_BITSET_FOREACH(todo, len, d) {
2162
0
      ref = dst[d];
2163
0
      insn = &ctx->ir_base[ref];
2164
0
      IR_ASSERT(insn->op == IR_PHI);
2165
0
      input = ir_insn_op(insn, k);
2166
0
      s = ctx->vregs[input];
2167
0
      loc[s] = s;
2168
0
      pred[d] = s;
2169
0
    } IR_BITSET_FOREACH_END();
2170
2171
0
    IR_BITSET_FOREACH(todo, len, i) {
2172
0
      if (!loc[i]) {
2173
0
        ir_bitset_incl(ready, i);
2174
0
      }
2175
0
    } IR_BITSET_FOREACH_END();
2176
2177
0
    while (1) {
2178
0
      ir_ref a, b, c;
2179
2180
0
      while ((b = ir_bitset_pop_first(ready, len)) >= 0) {
2181
0
        a = pred[b];
2182
0
        c = loc[a];
2183
0
        emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b]);
2184
0
        ir_bitset_excl(todo, b);
2185
0
        loc[a] = b;
2186
0
        src[b] = dst[b];
2187
0
        if (a == c && pred[a]) {
2188
0
          ir_bitset_incl(ready, a);
2189
0
        }
2190
0
      }
2191
0
      b = ir_bitset_pop_first(todo, len);
2192
0
      if (b < 0) {
2193
0
        break;
2194
0
      }
2195
0
      IR_ASSERT(b != loc[pred[b]]);
2196
0
      emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0);
2197
0
      loc[b] = 0;
2198
0
      ir_bitset_incl(ready, b);
2199
0
    }
2200
2201
0
    ir_mem_free(ready);
2202
0
  }
2203
2204
0
  ir_mem_free(todo);
2205
0
  ir_mem_free(loc);
2206
2207
0
  if (have_constants_or_addresses) {
2208
0
    for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
2209
0
      ref = *p;
2210
0
      insn = &ctx->ir_base[ref];
2211
0
      if (insn->op == IR_PHI) {
2212
0
        input = ir_insn_op(insn, k);
2213
0
        if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2214
0
          emit_copy(ctx, insn->type, input, ref);
2215
0
        }
2216
0
      }
2217
0
    }
2218
0
  }
2219
2220
0
  return 1;
2221
0
}
2222
2223
/* Linear Scan Register Allocation */
2224
2225
#ifdef IR_DEBUG
2226
0
# define IR_LOG_LSRA(action, ival, comment) do { \
2227
0
    if (ctx->flags & IR_DEBUG_RA) { \
2228
0
      ir_live_interval *_ival = (ival); \
2229
0
      ir_live_pos _start = _ival->range.start; \
2230
0
      ir_live_pos _end = _ival->end; \
2231
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \
2232
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2233
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2234
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \
2235
0
    } \
2236
0
  } while (0)
2237
0
# define IR_LOG_LSRA_ASSIGN(action, ival, comment) do { \
2238
0
    if (ctx->flags & IR_DEBUG_RA) { \
2239
0
      ir_live_interval *_ival = (ival); \
2240
0
      ir_live_pos _start = _ival->range.start; \
2241
0
      ir_live_pos _end = _ival->end; \
2242
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \
2243
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2244
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2245
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2246
0
        ir_reg_name(_ival->reg, _ival->type)); \
2247
0
    } \
2248
0
  } while (0)
2249
0
# define IR_LOG_LSRA_SPLIT(ival, pos) do { \
2250
0
    if (ctx->flags & IR_DEBUG_RA) { \
2251
0
      ir_live_interval *_ival = (ival); \
2252
0
      ir_live_pos _start = _ival->range.start; \
2253
0
      ir_live_pos _end = _ival->end; \
2254
0
      ir_live_pos _pos = (pos); \
2255
0
      fprintf(stderr, "      ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \
2256
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2257
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2258
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2259
0
        IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2260
0
    } \
2261
0
  } while (0)
2262
0
# define IR_LOG_LSRA_CONFLICT(action, ival, pos) do { \
2263
0
    if (ctx->flags & IR_DEBUG_RA) { \
2264
0
      ir_live_interval *_ival = (ival); \
2265
0
      ir_live_pos _start = _ival->range.start; \
2266
0
      ir_live_pos _end = _ival->end; \
2267
0
      ir_live_pos _pos = (pos); \
2268
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \
2269
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2270
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2271
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2272
0
        ir_reg_name(_ival->reg, _ival->type), \
2273
0
        IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2274
0
    } \
2275
0
  } while (0)
2276
#else
2277
# define IR_LOG_LSRA(action, ival, comment)
2278
# define IR_LOG_LSRA_ASSIGN(action, ival, comment)
2279
# define IR_LOG_LSRA_SPLIT(ival, pos)
2280
# define IR_LOG_LSRA_CONFLICT(action, ival, pos);
2281
#endif
2282
2283
static bool ir_ival_covers(ir_live_interval *ival, ir_live_pos position)
2284
0
{
2285
0
  ir_live_range *live_range = &ival->range;
2286
2287
0
  do {
2288
0
    if (position < live_range->end) {
2289
0
      return position >= live_range->start;
2290
0
    }
2291
0
    live_range = live_range->next;
2292
0
  } while (live_range);
2293
2294
0
  return 0;
2295
0
}
2296
2297
static bool ir_ival_has_hole_between(ir_live_interval *ival, ir_live_pos from, ir_live_pos to)
2298
0
{
2299
0
  ir_live_range *r = &ival->range;
2300
2301
0
  while (r) {
2302
0
    if (from < r->start) {
2303
0
      return 1;
2304
0
    } else if (to <= r->end) {
2305
0
      return 0;
2306
0
    }
2307
0
    r = r->next;
2308
0
  }
2309
0
  return 0;
2310
0
}
2311
2312
2313
static ir_live_pos ir_last_use_pos_before(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2314
0
{
2315
0
  ir_live_pos ret = 0;
2316
0
  ir_use_pos *p = ival->use_pos;
2317
2318
0
  while (p && p->pos <= pos) {
2319
0
    if (p->flags & flags) {
2320
0
      ret = p->pos;
2321
0
    }
2322
0
    p = p->next;
2323
0
  }
2324
0
  return ret;
2325
0
}
2326
2327
static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2328
0
{
2329
0
  ir_use_pos *p = ival->use_pos;
2330
2331
0
  while (p && p->pos < pos) {
2332
0
    p = p->next;
2333
0
  }
2334
0
  if (p && p->pos == pos && p->op_num != 0) {
2335
0
    p = p->next;
2336
0
  }
2337
0
  while (p && !(p->flags & flags)) {
2338
0
    p = p->next;
2339
0
  }
2340
0
  return p ? p->pos : 0x7fffffff;
2341
0
}
2342
2343
static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos)
2344
0
{
2345
0
  ir_ref ref = IR_LIVE_POS_TO_REF(pos);
2346
0
  uint32_t b = ctx->cfg_map[ref];
2347
2348
0
  while (!b) {
2349
0
    ref--;
2350
0
    IR_ASSERT(ref > 0);
2351
0
    b = ctx->cfg_map[ref];
2352
0
  }
2353
0
  IR_ASSERT(b <= ctx->cfg_blocks_count);
2354
0
  return &ctx->cfg_blocks[b];
2355
0
}
2356
2357
static ir_live_pos ir_find_optimal_split_position(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos min_pos, ir_live_pos max_pos, bool prefer_max)
2358
0
{
2359
0
  ir_block *min_bb, *max_bb;
2360
2361
0
  if (min_pos == max_pos) {
2362
0
    return max_pos;
2363
0
  }
2364
2365
0
  IR_ASSERT(min_pos < max_pos);
2366
0
  IR_ASSERT(min_pos >= ival->range.start);
2367
0
  IR_ASSERT(max_pos < ival->end);
2368
2369
0
  min_bb = ir_block_from_live_pos(ctx, min_pos);
2370
0
  max_bb = ir_block_from_live_pos(ctx, max_pos);
2371
2372
0
  if (min_bb == max_bb
2373
0
   || ir_ival_has_hole_between(ival, min_pos, max_pos)) {  // TODO: ???
2374
0
    return (prefer_max) ? max_pos : min_pos;
2375
0
  }
2376
2377
0
  if (max_bb->loop_depth > 0) {
2378
    /* Split at the end of the loop entry */
2379
0
    do {
2380
0
      ir_block *bb;
2381
2382
0
      if (max_bb->flags & IR_BB_LOOP_HEADER) {
2383
0
        bb = max_bb;
2384
0
      } else {
2385
0
        IR_ASSERT(max_bb->loop_header);
2386
0
        bb = &ctx->cfg_blocks[max_bb->loop_header];
2387
0
      }
2388
0
      bb = &ctx->cfg_blocks[bb->idom];
2389
0
      if (IR_DEF_LIVE_POS_FROM_REF(bb->end) < min_pos) {
2390
0
        break;
2391
0
      }
2392
0
      max_bb = bb;
2393
0
    } while (max_bb->loop_depth > 0);
2394
2395
0
    if (IR_DEF_LIVE_POS_FROM_REF(max_bb->end) < max_pos) {
2396
0
      return IR_DEF_LIVE_POS_FROM_REF(max_bb->end);
2397
0
    }
2398
0
  }
2399
2400
0
  if (IR_LOAD_LIVE_POS_FROM_REF(max_bb->start) > min_pos) {
2401
0
    return IR_LOAD_LIVE_POS_FROM_REF(max_bb->start);
2402
0
  } else {
2403
    // TODO: "min_bb" is in a deeper loop than "max_bb" ???
2404
0
    return max_pos;
2405
0
  }
2406
0
}
2407
2408
static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos pos)
2409
0
{
2410
0
  ir_live_interval *child;
2411
0
  ir_live_range *p, *prev;
2412
0
  ir_use_pos *use_pos, *prev_use_pos;
2413
2414
0
  IR_LOG_LSRA_SPLIT(ival, pos);
2415
0
  IR_ASSERT(pos > ival->range.start);
2416
0
  ctx->flags2 |= IR_RA_HAVE_SPLITS;
2417
2418
0
  p = &ival->range;
2419
0
  prev = NULL;
2420
0
  while (p && pos >= p->end) {
2421
0
    prev = p;
2422
0
    p = prev->next;
2423
0
  }
2424
0
  IR_ASSERT(p);
2425
2426
0
  if (pos < p->start) {
2427
    /* split between ranges */
2428
0
    pos = p->start;
2429
0
  }
2430
2431
0
  use_pos = ival->use_pos;
2432
0
  prev_use_pos = NULL;
2433
2434
0
  ival->flags &= ~(IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS);
2435
0
  if (p->start == pos) {
2436
0
    while (use_pos && pos > use_pos->pos) {
2437
0
      if (use_pos->hint != IR_REG_NONE) {
2438
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2439
0
      }
2440
0
      if (use_pos->hint_ref > 0) {
2441
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2442
0
      }
2443
0
      prev_use_pos = use_pos;
2444
0
      use_pos = use_pos->next;
2445
0
    }
2446
0
  } else {
2447
0
    while (use_pos && pos >= use_pos->pos) {
2448
0
      if (use_pos->hint != IR_REG_NONE) {
2449
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2450
0
      }
2451
0
      if (use_pos->hint_ref > 0) {
2452
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2453
0
      }
2454
0
      prev_use_pos = use_pos;
2455
0
      use_pos = use_pos->next;
2456
0
    }
2457
0
  }
2458
2459
0
  child = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
2460
0
  child->type = ival->type;
2461
0
  child->reg = IR_REG_NONE;
2462
0
  child->flags = IR_LIVE_INTERVAL_SPLIT_CHILD;
2463
0
  child->vreg = ival->vreg;
2464
0
  child->stack_spill_pos = -1; // not allocated
2465
0
  child->range.start = pos;
2466
0
  child->range.end = p->end;
2467
0
  child->range.next = p->next;
2468
0
  child->end = ival->end;
2469
0
  child->use_pos = prev_use_pos ? prev_use_pos->next : use_pos;
2470
2471
0
  child->next = ival->next;
2472
0
  ival->next = child;
2473
2474
0
  if (pos == p->start) {
2475
0
    prev->next = NULL;
2476
0
    ival->end = prev->end;
2477
    /* Cache to reuse */
2478
0
    p->next = ctx->unused_ranges;
2479
0
    ctx->unused_ranges = p;
2480
0
  } else {
2481
0
    p->end = ival->end = pos;
2482
0
    p->next = NULL;
2483
0
  }
2484
0
  if (prev_use_pos) {
2485
0
    prev_use_pos->next = NULL;
2486
0
  } else {
2487
0
    ival->use_pos = NULL;
2488
0
  }
2489
2490
0
  use_pos = child->use_pos;
2491
0
  while (use_pos) {
2492
0
    if (use_pos->hint != IR_REG_NONE) {
2493
0
      child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2494
0
    }
2495
0
    if (use_pos->hint_ref > 0) {
2496
0
      child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2497
0
    }
2498
0
    use_pos = use_pos->next;
2499
0
  }
2500
2501
0
  return child;
2502
0
}
2503
2504
static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size, ir_reg_alloc_data *data)
2505
0
{
2506
0
  int32_t ret;
2507
2508
0
  IR_ASSERT(size == 0 || size == 1 || size == 2 || size == 4 || size == 8);
2509
0
  if (data->handled && data->handled[size]) {
2510
0
    ret = data->handled[size]->stack_spill_pos;
2511
0
    data->handled[size] = data->handled[size]->list_next;
2512
0
  } else if (size == 8) {
2513
0
    ret = ctx->stack_frame_size;
2514
0
    ctx->stack_frame_size += 8;
2515
0
  } else if (size == 4) {
2516
0
    if (data->unused_slot_4) {
2517
0
      ret = data->unused_slot_4;
2518
0
      data->unused_slot_4 = 0;
2519
0
      } else if (data->handled && data->handled[8]) {
2520
0
      ret = data->handled[8]->stack_spill_pos;
2521
0
      data->handled[8] = data->handled[8]->list_next;
2522
0
      data->unused_slot_4 = ret + 4;
2523
0
    } else {
2524
0
      ret = ctx->stack_frame_size;
2525
0
      if (sizeof(void*) == 8) {
2526
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2527
0
        ctx->stack_frame_size += 8;
2528
0
      } else {
2529
0
        ctx->stack_frame_size += 4;
2530
0
      }
2531
0
    }
2532
0
  } else if (size == 2) {
2533
0
    if (data->unused_slot_2) {
2534
0
      ret = data->unused_slot_2;
2535
0
      data->unused_slot_2 = 0;
2536
0
    } else if (data->unused_slot_4) {
2537
0
      ret = data->unused_slot_4;
2538
0
      data->unused_slot_2 = data->unused_slot_4 + 2;
2539
0
      data->unused_slot_4 = 0;
2540
0
      } else if (data->handled && data->handled[4]) {
2541
0
      ret = data->handled[4]->stack_spill_pos;
2542
0
      data->handled[4] = data->handled[4]->list_next;
2543
0
      data->unused_slot_2 = ret + 2;
2544
0
      } else if (data->handled && data->handled[8]) {
2545
0
      ret = data->handled[8]->stack_spill_pos;
2546
0
      data->handled[8] = data->handled[8]->list_next;
2547
0
      data->unused_slot_2 = ret + 2;
2548
0
      data->unused_slot_4 = ret + 4;
2549
0
    } else {
2550
0
      ret = ctx->stack_frame_size;
2551
0
      data->unused_slot_2 = ctx->stack_frame_size + 2;
2552
0
      if (sizeof(void*) == 8) {
2553
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2554
0
        ctx->stack_frame_size += 8;
2555
0
      } else {
2556
0
        ctx->stack_frame_size += 4;
2557
0
      }
2558
0
    }
2559
0
  } else if (size == 1) {
2560
0
    if (data->unused_slot_1) {
2561
0
      ret = data->unused_slot_1;
2562
0
      data->unused_slot_1 = 0;
2563
0
    } else if (data->unused_slot_2) {
2564
0
      ret = data->unused_slot_2;
2565
0
      data->unused_slot_1 = data->unused_slot_2 + 1;
2566
0
      data->unused_slot_2 = 0;
2567
0
    } else if (data->unused_slot_4) {
2568
0
      ret = data->unused_slot_4;
2569
0
      data->unused_slot_1 = data->unused_slot_4 + 1;
2570
0
      data->unused_slot_2 = data->unused_slot_4 + 2;
2571
0
      data->unused_slot_4 = 0;
2572
0
      } else if (data->handled && data->handled[2]) {
2573
0
      ret = data->handled[2]->stack_spill_pos;
2574
0
      data->handled[2] = data->handled[2]->list_next;
2575
0
      data->unused_slot_1 = ret + 1;
2576
0
      } else if (data->handled && data->handled[4]) {
2577
0
      ret = data->handled[4]->stack_spill_pos;
2578
0
      data->handled[4] = data->handled[4]->list_next;
2579
0
      data->unused_slot_1 = ret + 1;
2580
0
      data->unused_slot_2 = ret + 2;
2581
0
      } else if (data->handled && data->handled[8]) {
2582
0
      ret = data->handled[8]->stack_spill_pos;
2583
0
      data->handled[8] = data->handled[8]->list_next;
2584
0
      data->unused_slot_1 = ret + 1;
2585
0
      data->unused_slot_2 = ret + 2;
2586
0
      data->unused_slot_4 = ret + 4;
2587
0
    } else {
2588
0
      ret = ctx->stack_frame_size;
2589
0
      data->unused_slot_1 = ctx->stack_frame_size + 1;
2590
0
      data->unused_slot_2 = ctx->stack_frame_size + 2;
2591
0
      if (sizeof(void*) == 8) {
2592
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2593
0
        ctx->stack_frame_size += 8;
2594
0
      } else {
2595
0
        ctx->stack_frame_size += 4;
2596
0
      }
2597
0
    }
2598
0
  } else {
2599
0
    ret = IR_NULL;
2600
0
  }
2601
0
  return ret;
2602
0
}
2603
2604
int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data)
2605
0
{
2606
0
  return ir_allocate_small_spill_slot(ctx, ir_type_size[type], data);
2607
0
}
2608
2609
static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size, ir_reg_alloc_data *data)
2610
0
{
2611
0
  int32_t ret;
2612
2613
0
  if (size <= 8) {
2614
0
    if (size == 3) {
2615
0
      size = 4;
2616
0
    } else if (size > 4 && size < 8) {
2617
0
      size = 8;
2618
0
    }
2619
0
    return ir_allocate_small_spill_slot(ctx, size, data);
2620
0
  }
2621
2622
  /* Align stack allocated data to 16 byte */
2623
0
  ctx->flags2 |= IR_16B_FRAME_ALIGNMENT;
2624
0
  ret = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16);
2625
0
  size = IR_ALIGNED_SIZE(size, 8);
2626
0
  ctx->stack_frame_size = ret + size;
2627
2628
0
  return ret;
2629
0
}
2630
2631
static ir_reg ir_get_first_reg_hint(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2632
0
{
2633
0
  ir_use_pos *use_pos;
2634
0
  ir_reg reg;
2635
2636
0
  use_pos = ival->use_pos;
2637
0
  while (use_pos) {
2638
0
    reg = use_pos->hint;
2639
0
    if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2640
0
      return reg;
2641
0
    }
2642
0
    use_pos = use_pos->next;
2643
0
  }
2644
2645
0
  return IR_REG_NONE;
2646
0
}
2647
2648
static ir_reg ir_try_allocate_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available, ir_live_pos *freeUntilPos)
2649
0
{
2650
0
  ir_use_pos *use_pos;
2651
0
  ir_reg reg;
2652
2653
0
  if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2654
0
    use_pos = ival->use_pos;
2655
0
    while (use_pos) {
2656
0
      reg = use_pos->hint;
2657
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2658
0
        if (ival->end <= freeUntilPos[reg]) {
2659
          /* register available for the whole interval */
2660
0
          return reg;
2661
0
        }
2662
0
      }
2663
0
      use_pos = use_pos->next;
2664
0
    }
2665
0
  }
2666
2667
0
  if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REFS) {
2668
0
    use_pos = ival->use_pos;
2669
0
    while (use_pos) {
2670
0
      if (use_pos->hint_ref > 0) {
2671
0
        reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2672
0
        if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2673
0
          if (ival->end <= freeUntilPos[reg]) {
2674
            /* register available for the whole interval */
2675
0
            return reg;
2676
0
          }
2677
0
        }
2678
0
      }
2679
0
      use_pos = use_pos->next;
2680
0
    }
2681
0
  }
2682
2683
0
  return IR_REG_NONE;
2684
0
}
2685
2686
static ir_reg ir_get_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2687
0
{
2688
0
  ir_use_pos *use_pos;
2689
0
  ir_reg reg;
2690
2691
0
  use_pos = ival->use_pos;
2692
0
  while (use_pos) {
2693
0
    reg = use_pos->hint;
2694
0
    if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2695
0
      return reg;
2696
0
    } else if (use_pos->hint_ref > 0) {
2697
0
      reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2698
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2699
0
        return reg;
2700
0
      }
2701
0
    }
2702
0
    use_pos = use_pos->next;
2703
0
  }
2704
2705
0
  return IR_REG_NONE;
2706
0
}
2707
2708
static void ir_add_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2709
0
{
2710
0
  ir_live_pos pos = ival->range.start;
2711
2712
0
  if (*unhandled == NULL
2713
0
   || pos < (*unhandled)->range.start
2714
0
   || (pos == (*unhandled)->range.start
2715
0
    && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2716
0
    && !((*unhandled)->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2717
0
   || (pos == (*unhandled)->range.start
2718
0
    && ival->vreg > (*unhandled)->vreg)) {
2719
0
    ival->list_next = *unhandled;
2720
0
    *unhandled = ival;
2721
0
  } else {
2722
0
    ir_live_interval *prev = *unhandled;
2723
2724
0
    while (prev->list_next) {
2725
0
      if (pos < prev->list_next->range.start
2726
0
       || (pos == prev->list_next->range.start
2727
0
        && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2728
0
        && !(prev->list_next->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2729
0
       || (pos == prev->list_next->range.start
2730
0
        && ival->vreg > prev->list_next->vreg)) {
2731
0
        break;
2732
0
      }
2733
0
      prev = prev->list_next;
2734
0
    }
2735
0
    ival->list_next = prev->list_next;
2736
0
    prev->list_next = ival;
2737
0
  }
2738
0
}
2739
2740
/* merge sorted lists */
2741
static void ir_merge_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2742
0
{
2743
0
  ir_live_interval **prev;
2744
0
  ir_live_pos pos;
2745
2746
0
  if (*unhandled == NULL) {
2747
0
    *unhandled = ival;
2748
0
    while (ival) {
2749
0
      ival = ival->list_next = ival->next;
2750
0
    }
2751
0
  } else {
2752
0
    prev = unhandled;
2753
0
    while (ival) {
2754
0
      pos = ival->range.start;
2755
0
      while (*prev && pos >= (*prev)->range.start) {
2756
0
        prev = &(*prev)->list_next;
2757
0
      }
2758
0
      ival->list_next = *prev;
2759
0
      *prev = ival;
2760
0
      prev = &ival->list_next;
2761
0
      ival = ival->next;
2762
0
    }
2763
0
  }
2764
0
#ifdef IR_DEBUG
2765
0
  ival = *unhandled;
2766
0
  pos = 0;
2767
2768
0
  while (ival) {
2769
0
    IR_ASSERT(ival->range.start >= pos);
2770
0
    pos = ival->range.start;
2771
0
    ival = ival->list_next;
2772
0
  }
2773
0
#endif
2774
0
}
2775
2776
static void ir_add_to_unhandled_spill(ir_live_interval **unhandled, ir_live_interval *ival)
2777
0
{
2778
0
  ir_live_pos pos = ival->range.start;
2779
2780
0
  if (*unhandled == NULL
2781
0
   || pos <= (*unhandled)->range.start) {
2782
0
    ival->list_next = *unhandled;
2783
0
    *unhandled = ival;
2784
0
  } else {
2785
0
    ir_live_interval *prev = *unhandled;
2786
2787
0
    while (prev->list_next) {
2788
0
      if (pos <= prev->list_next->range.start) {
2789
0
        break;
2790
0
      }
2791
0
      prev = prev->list_next;
2792
0
    }
2793
0
    ival->list_next = prev->list_next;
2794
0
    prev->list_next = ival;
2795
0
  }
2796
0
}
2797
2798
static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval *inactive, ir_live_interval **unhandled)
2799
0
{
2800
0
  ir_live_pos freeUntilPos[IR_REG_NUM];
2801
0
  int i, reg;
2802
0
  ir_live_pos pos, next;
2803
0
  ir_live_interval *other;
2804
0
  ir_regset available, overlapped, scratch;
2805
2806
0
  if (IR_IS_TYPE_FP(ival->type)) {
2807
0
    available = IR_REGSET_FP;
2808
    /* set freeUntilPos of all physical registers to maxInt */
2809
0
    for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
2810
0
      freeUntilPos[i] = 0x7fffffff;
2811
0
    }
2812
0
  } else {
2813
0
    available = IR_REGSET_GP;
2814
0
    if (ctx->flags & IR_USE_FRAME_POINTER) {
2815
0
      IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
2816
0
    }
2817
#if defined(IR_TARGET_X86)
2818
    if (ir_type_size[ival->type] == 1) {
2819
      /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
2820
      IR_REGSET_EXCL(available, IR_REG_RBP);
2821
      IR_REGSET_EXCL(available, IR_REG_RSI);
2822
      IR_REGSET_EXCL(available, IR_REG_RDI);
2823
    }
2824
#endif
2825
    /* set freeUntilPos of all physical registers to maxInt */
2826
0
    for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
2827
0
      freeUntilPos[i] = 0x7fffffff;
2828
0
    }
2829
0
  }
2830
2831
0
  available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
2832
2833
  /* for each interval it in active */
2834
0
  other = *active;
2835
0
  while (other) {
2836
    /* freeUntilPos[it.reg] = 0 */
2837
0
    reg = other->reg;
2838
0
    IR_ASSERT(reg >= 0);
2839
0
    if (reg >= IR_REG_SCRATCH) {
2840
0
      if (reg == IR_REG_SCRATCH) {
2841
0
        available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
2842
0
      } else {
2843
0
        IR_ASSERT(reg == IR_REG_ALL);
2844
0
        available = IR_REGSET_EMPTY;
2845
0
      }
2846
0
    } else {
2847
0
      IR_REGSET_EXCL(available, reg);
2848
0
    }
2849
0
    other = other->list_next;
2850
0
  }
2851
2852
  /* for each interval it in inactive intersecting with current
2853
   *
2854
   * This loop is not necessary for program in SSA form (see LSRA on SSA fig. 6),
2855
   * but it is still necessary after coalescing and splitting
2856
   */
2857
0
  overlapped = IR_REGSET_EMPTY;
2858
0
  other = inactive;
2859
0
  pos = ival->end;
2860
0
  while (other) {
2861
    /* freeUntilPos[it.reg] = next intersection of it with current */
2862
0
    if (other->current_range->start < pos) {
2863
0
      next = ir_ivals_overlap(&ival->range, other->current_range);
2864
0
      if (next) {
2865
0
        reg = other->reg;
2866
0
        IR_ASSERT(reg >= 0);
2867
0
        if (reg >= IR_REG_SCRATCH) {
2868
0
          ir_regset regset;
2869
2870
0
          if (reg == IR_REG_SCRATCH) {
2871
0
            regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2872
0
          } else {
2873
0
            IR_ASSERT(reg == IR_REG_ALL);
2874
0
            regset = available;
2875
0
          }
2876
0
          overlapped = IR_REGSET_UNION(overlapped, regset);
2877
0
          IR_REGSET_FOREACH(regset, reg) {
2878
0
            if (next < freeUntilPos[reg]) {
2879
0
              freeUntilPos[reg] = next;
2880
0
            }
2881
0
          } IR_REGSET_FOREACH_END();
2882
0
        } else if (IR_REGSET_IN(available, reg)) {
2883
0
          IR_REGSET_INCL(overlapped, reg);
2884
0
          if (next < freeUntilPos[reg]) {
2885
0
            freeUntilPos[reg] = next;
2886
0
          }
2887
0
        }
2888
0
      }
2889
0
    }
2890
0
    other = other->list_next;
2891
0
  }
2892
2893
0
  available = IR_REGSET_DIFFERENCE(available, overlapped);
2894
0
  if (available != IR_REGSET_EMPTY) {
2895
2896
0
    if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
2897
      /* Try to use hint */
2898
0
      reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos);
2899
0
      if (reg != IR_REG_NONE) {
2900
0
        ival->reg = reg;
2901
0
        IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (hint available without spilling)");
2902
0
        if (*unhandled && ival->end > (*unhandled)->range.start) {
2903
0
          ival->list_next = *active;
2904
0
          *active = ival;
2905
0
        }
2906
0
        return reg;
2907
0
      }
2908
0
    }
2909
2910
0
    if (ival->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) {
2911
      /* Try to reuse the register previously allocated for splited interval */
2912
0
      reg = ctx->live_intervals[ival->vreg]->reg;
2913
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2914
0
        ival->reg = reg;
2915
0
        IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling)");
2916
0
        if (*unhandled && ival->end > (*unhandled)->range.start) {
2917
0
          ival->list_next = *active;
2918
0
          *active = ival;
2919
0
        }
2920
0
        return reg;
2921
0
      }
2922
0
    }
2923
2924
    /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2925
0
    scratch = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2926
0
    if (scratch != IR_REGSET_EMPTY) {
2927
      /* prefer registers that don't conflict with the hints for the following unhandled intervals */
2928
0
      if (1) {
2929
0
        ir_regset non_conflicting = scratch;
2930
2931
0
        other = *unhandled;
2932
0
        while (other && other->range.start < ival->range.end) {
2933
0
          if (other->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2934
0
            reg = ir_get_first_reg_hint(ctx, other, non_conflicting);
2935
2936
0
            if (reg >= 0) {
2937
0
              IR_REGSET_EXCL(non_conflicting, reg);
2938
0
              if (non_conflicting == IR_REGSET_EMPTY) {
2939
0
                break;
2940
0
              }
2941
0
            }
2942
0
          }
2943
0
          other = other->list_next;
2944
0
        }
2945
0
        if (non_conflicting != IR_REGSET_EMPTY) {
2946
0
          reg = IR_REGSET_FIRST(non_conflicting);
2947
0
        } else {
2948
0
          reg = IR_REGSET_FIRST(scratch);
2949
0
        }
2950
0
      } else {
2951
0
        reg = IR_REGSET_FIRST(scratch);
2952
0
      }
2953
0
    } else {
2954
0
      reg = IR_REGSET_FIRST(available);
2955
0
    }
2956
0
    ival->reg = reg;
2957
0
    IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling)");
2958
0
    if (*unhandled && ival->end > (*unhandled)->range.start) {
2959
0
      ival->list_next = *active;
2960
0
      *active = ival;
2961
0
    }
2962
0
    return reg;
2963
0
  }
2964
2965
  /* reg = register with highest freeUntilPos */
2966
0
  reg = IR_REG_NONE;
2967
0
  pos = 0;
2968
0
  IR_REGSET_FOREACH(overlapped, i) {
2969
0
    if (freeUntilPos[i] > pos) {
2970
0
      pos = freeUntilPos[i];
2971
0
      reg = i;
2972
0
    } else if (freeUntilPos[i] == pos
2973
0
        && !IR_REGSET_IN(IR_REGSET_SCRATCH, reg)
2974
0
        && IR_REGSET_IN(IR_REGSET_SCRATCH, i)) {
2975
      /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2976
0
      pos = freeUntilPos[i];
2977
0
      reg = i;
2978
0
    }
2979
0
  } IR_REGSET_FOREACH_END();
2980
2981
0
  if (pos > ival->range.start) {
2982
    /* register available for the first part of the interval */
2983
    /* split current before freeUntilPos[reg] */
2984
0
    ir_live_pos split_pos = ir_last_use_pos_before(ival, pos,
2985
0
      IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
2986
0
    if (split_pos > ival->range.start) {
2987
0
      split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, pos, 0);
2988
0
      other = ir_split_interval_at(ctx, ival, split_pos);
2989
0
      if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
2990
0
        ir_reg pref_reg = ir_try_allocate_preferred_reg(ctx, ival, IR_REGSET_UNION(available, overlapped), freeUntilPos);
2991
2992
0
        if (pref_reg != IR_REG_NONE) {
2993
0
          ival->reg = pref_reg;
2994
0
        } else {
2995
0
          ival->reg = reg;
2996
0
        }
2997
0
      } else {
2998
0
        ival->reg = reg;
2999
0
      }
3000
0
      IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling for the first part)");
3001
0
      if (*unhandled && ival->end > (*unhandled)->range.start) {
3002
0
        ival->list_next = *active;
3003
0
        *active = ival;
3004
0
      }
3005
0
      ir_add_to_unhandled(unhandled, other);
3006
0
      IR_LOG_LSRA("      ---- Queue", other, "");
3007
0
      return reg;
3008
0
    }
3009
0
  }
3010
0
  return IR_REG_NONE;
3011
0
}
3012
3013
static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval **inactive, ir_live_interval **unhandled)
3014
0
{
3015
0
  ir_live_pos nextUsePos[IR_REG_NUM];
3016
0
  ir_live_pos blockPos[IR_REG_NUM];
3017
0
  int i, reg;
3018
0
  ir_live_pos pos, next_use_pos;
3019
0
  ir_live_interval *other, *prev;
3020
0
  ir_use_pos *use_pos;
3021
0
  ir_regset available, tmp_regset;
3022
3023
0
  if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3024
0
    use_pos = ival->use_pos;
3025
0
    while (use_pos && !(use_pos->flags & IR_USE_MUST_BE_IN_REG)) {
3026
0
      use_pos = use_pos->next;
3027
0
    }
3028
0
    if (!use_pos) {
3029
      /* spill */
3030
0
      IR_LOG_LSRA("    ---- Spill", ival, " (no use pos that must be in reg)");
3031
0
      ctx->flags2 |= IR_RA_HAVE_SPILLS;
3032
0
      return IR_REG_NONE;
3033
0
    }
3034
0
    next_use_pos = use_pos->pos;
3035
0
  } else {
3036
0
    next_use_pos = ival->range.end;
3037
0
  }
3038
3039
0
  if (IR_IS_TYPE_FP(ival->type)) {
3040
0
    available = IR_REGSET_FP;
3041
    /* set nextUsePos of all physical registers to maxInt */
3042
0
    for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
3043
0
      nextUsePos[i] = 0x7fffffff;
3044
0
      blockPos[i] = 0x7fffffff;
3045
0
    }
3046
0
  } else {
3047
0
    available = IR_REGSET_GP;
3048
0
    if (ctx->flags & IR_USE_FRAME_POINTER) {
3049
0
      IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
3050
0
    }
3051
#if defined(IR_TARGET_X86)
3052
    if (ir_type_size[ival->type] == 1) {
3053
      /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
3054
      IR_REGSET_EXCL(available, IR_REG_RBP);
3055
      IR_REGSET_EXCL(available, IR_REG_RSI);
3056
      IR_REGSET_EXCL(available, IR_REG_RDI);
3057
    }
3058
#endif
3059
    /* set nextUsePos of all physical registers to maxInt */
3060
0
    for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
3061
0
      nextUsePos[i] = 0x7fffffff;
3062
0
      blockPos[i] = 0x7fffffff;
3063
0
    }
3064
0
  }
3065
3066
0
  available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
3067
3068
0
  if (IR_REGSET_IS_EMPTY(available)) {
3069
0
    fprintf(stderr, "LSRA Internal Error: No registers available. Allocation is not possible\n");
3070
0
    IR_ASSERT(0);
3071
0
    exit(-1);
3072
0
  }
3073
3074
  /* for each interval it in active */
3075
0
  other = *active;
3076
0
  while (other) {
3077
    /* nextUsePos[it.reg] = next use of it after start of current */
3078
0
    reg = other->reg;
3079
0
    IR_ASSERT(reg >= 0);
3080
0
    if (reg >= IR_REG_SCRATCH) {
3081
0
      ir_regset regset;
3082
3083
0
      if (reg == IR_REG_SCRATCH) {
3084
0
        regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3085
0
      } else {
3086
0
        IR_ASSERT(reg == IR_REG_ALL);
3087
0
        regset = available;
3088
0
      }
3089
0
      IR_REGSET_FOREACH(regset, reg) {
3090
0
        blockPos[reg] = nextUsePos[reg] = 0;
3091
0
      } IR_REGSET_FOREACH_END();
3092
0
    } else if (IR_REGSET_IN(available, reg)) {
3093
0
      if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3094
0
        blockPos[reg] = nextUsePos[reg] = 0;
3095
0
      } else {
3096
0
        pos = ir_first_use_pos_after(other, ival->range.start,
3097
0
          IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3098
0
        if (pos < nextUsePos[reg]) {
3099
0
          nextUsePos[reg] = pos;
3100
0
        }
3101
0
      }
3102
0
    }
3103
0
    other = other->list_next;
3104
0
  }
3105
3106
  /* for each interval it in inactive intersecting with current */
3107
0
  other = *inactive;
3108
0
  while (other) {
3109
    /* freeUntilPos[it.reg] = next intersection of it with current */
3110
0
    reg = other->reg;
3111
0
    IR_ASSERT(reg >= 0);
3112
0
    if (reg >= IR_REG_SCRATCH) {
3113
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3114
3115
0
      if (overlap) {
3116
0
        ir_regset regset;
3117
3118
0
        if (reg == IR_REG_SCRATCH) {
3119
0
          regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3120
0
        } else {
3121
0
          IR_ASSERT(reg == IR_REG_ALL);
3122
0
          regset = available;
3123
0
        }
3124
0
        IR_REGSET_FOREACH(regset, reg) {
3125
0
          if (overlap < nextUsePos[reg]) {
3126
0
            nextUsePos[reg] = overlap;
3127
0
          }
3128
0
          if (overlap < blockPos[reg]) {
3129
0
            blockPos[reg] = overlap;
3130
0
          }
3131
0
        } IR_REGSET_FOREACH_END();
3132
0
      }
3133
0
    } else if (IR_REGSET_IN(available, reg)) {
3134
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3135
3136
0
      if (overlap) {
3137
0
        if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3138
0
          if (overlap < nextUsePos[reg]) {
3139
0
            nextUsePos[reg] = overlap;
3140
0
          }
3141
0
          if (overlap < blockPos[reg]) {
3142
0
            blockPos[reg] = overlap;
3143
0
          }
3144
0
        } else {
3145
0
          pos = ir_first_use_pos_after(other, ival->range.start,
3146
0
            IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3147
0
          if (pos < nextUsePos[reg]) {
3148
0
            nextUsePos[reg] = pos;
3149
0
          }
3150
0
        }
3151
0
      }
3152
0
    }
3153
0
    other = other->list_next;
3154
0
  }
3155
3156
  /* register hinting */
3157
0
  reg = IR_REG_NONE;
3158
0
  if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
3159
0
    reg = ir_get_preferred_reg(ctx, ival, available);
3160
0
  }
3161
0
  if (reg == IR_REG_NONE) {
3162
0
select_register:
3163
0
    reg = IR_REGSET_FIRST(available);
3164
0
  }
3165
3166
  /* reg = register with highest nextUsePos */
3167
0
  pos = nextUsePos[reg];
3168
0
  tmp_regset = available;
3169
0
  IR_REGSET_EXCL(tmp_regset, reg);
3170
0
  IR_REGSET_FOREACH(tmp_regset, i) {
3171
0
    if (nextUsePos[i] > pos) {
3172
0
      pos = nextUsePos[i];
3173
0
      reg = i;
3174
0
    }
3175
0
  } IR_REGSET_FOREACH_END();
3176
3177
  /* if first usage of current is after nextUsePos[reg] then */
3178
0
  if (next_use_pos > pos && !(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3179
    /* all other intervals are used before current, so it is best to spill current itself */
3180
    /* assign spill slot to current */
3181
    /* split current before its first use position that requires a register */
3182
0
    ir_live_pos split_pos;
3183
3184
0
    if (next_use_pos == ival->range.start) {
3185
0
      IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
3186
      /* split right after definition */
3187
0
      split_pos = next_use_pos + 1;
3188
0
    } else {
3189
0
      split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
3190
0
    }
3191
3192
0
    if (split_pos > ival->range.start) {
3193
0
      IR_LOG_LSRA("    ---- Conflict with others", ival, " (all others are used before)");
3194
0
      other = ir_split_interval_at(ctx, ival, split_pos);
3195
0
      IR_LOG_LSRA("    ---- Spill", ival, "");
3196
0
      ir_add_to_unhandled(unhandled, other);
3197
0
      IR_LOG_LSRA("    ---- Queue", other, "");
3198
0
      return IR_REG_NONE;
3199
0
    }
3200
0
  }
3201
3202
0
  if (ival->end > blockPos[reg]) {
3203
    /* spilling make a register free only for the first part of current */
3204
0
    IR_LOG_LSRA("    ---- Conflict with others", ival, " (spilling make a register free only for the first part)");
3205
    /* split current at optimal position before block_pos[reg] */
3206
0
    ir_live_pos split_pos = ir_last_use_pos_before(ival,  blockPos[reg] + 1,
3207
0
      IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3208
0
    if (split_pos == 0) {
3209
0
      split_pos = ir_first_use_pos_after(ival, blockPos[reg],
3210
0
        IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1;
3211
0
      other = ir_split_interval_at(ctx, ival, split_pos);
3212
0
      ir_add_to_unhandled(unhandled, other);
3213
0
      IR_LOG_LSRA("      ---- Queue", other, "");
3214
0
      return IR_REG_NONE;
3215
0
    }
3216
0
    if (split_pos >= blockPos[reg]) {
3217
0
      IR_REGSET_EXCL(available, reg);
3218
0
      if (IR_REGSET_IS_EMPTY(available)) {
3219
0
        fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n");
3220
0
        IR_ASSERT(0);
3221
0
        exit(-1);
3222
0
      }
3223
0
      IR_LOG_LSRA("      ---- Restart", ival, "");
3224
0
      goto select_register;
3225
0
    }
3226
0
    split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, blockPos[reg], 1);
3227
0
    other = ir_split_interval_at(ctx, ival, split_pos);
3228
0
    ir_add_to_unhandled(unhandled, other);
3229
0
    IR_LOG_LSRA("      ---- Queue", other, "");
3230
0
  }
3231
3232
  /* spill intervals that currently block reg */
3233
0
  prev = NULL;
3234
0
  other = *active;
3235
0
  while (other) {
3236
0
    ir_live_pos split_pos;
3237
3238
0
    if (reg == other->reg) {
3239
      /* split active interval for reg at position */
3240
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3241
3242
0
      if (overlap) {
3243
0
        ir_live_interval *child, *child2;
3244
3245
0
        IR_ASSERT(other->type != IR_VOID);
3246
0
        IR_LOG_LSRA_CONFLICT("      ---- Conflict with active", other, overlap);
3247
3248
0
        split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3249
0
        if (split_pos == 0) {
3250
0
          split_pos = ival->range.start;
3251
0
        }
3252
0
        split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1);
3253
0
        if (split_pos > other->range.start) {
3254
0
          child = ir_split_interval_at(ctx, other, split_pos);
3255
0
          if (prev) {
3256
0
            prev->list_next = other->list_next;
3257
0
          } else {
3258
0
            *active = other->list_next;
3259
0
          }
3260
0
          IR_LOG_LSRA("      ---- Finish", other, "");
3261
0
        } else {
3262
0
          child = other;
3263
0
          other->reg = IR_REG_NONE;
3264
0
          if (prev) {
3265
0
            prev->list_next = other->list_next;
3266
0
          } else {
3267
0
            *active = other->list_next;
3268
0
          }
3269
0
          IR_LOG_LSRA("      ---- Spill and Finish", other, " (it must not be in reg)");
3270
0
        }
3271
3272
0
        split_pos = ir_first_use_pos_after(child, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; // TODO: ???
3273
0
        if (split_pos > child->range.start && split_pos < child->end) {
3274
0
          ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->range.start, split_pos, 1);
3275
0
          if (opt_split_pos > child->range.start) {
3276
0
            split_pos = opt_split_pos;
3277
0
          }
3278
0
          child2 = ir_split_interval_at(ctx, child, split_pos);
3279
0
          IR_LOG_LSRA("      ---- Spill", child, "");
3280
0
          ir_add_to_unhandled(unhandled, child2);
3281
0
          IR_LOG_LSRA("      ---- Queue", child2, "");
3282
0
        } else if (child != other) {
3283
          // TODO: this may cause endless loop
3284
0
          ir_add_to_unhandled(unhandled, child);
3285
0
          IR_LOG_LSRA("      ---- Queue", child, "");
3286
0
        }
3287
0
      }
3288
0
      break;
3289
0
    }
3290
0
    prev = other;
3291
0
    other = other->list_next;
3292
0
  }
3293
3294
  /* split any inactive interval for reg at the end of its lifetime hole */
3295
0
  other = *inactive;
3296
0
  while (other) {
3297
    /* freeUntilPos[it.reg] = next intersection of it with current */
3298
0
    if (reg == other->reg) {
3299
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3300
3301
0
      if (overlap) {
3302
0
        ir_live_interval *child;
3303
3304
0
        IR_ASSERT(other->type != IR_VOID);
3305
0
        IR_LOG_LSRA_CONFLICT("      ---- Conflict with inactive", other, overlap);
3306
        // TODO: optimal split position (this case is not tested)
3307
0
        child = ir_split_interval_at(ctx, other, overlap);
3308
        /* reset range cache */
3309
0
        other->current_range = &other->range;
3310
0
        ir_add_to_unhandled(unhandled, child);
3311
0
        IR_LOG_LSRA("      ---- Queue", child, "");
3312
0
      }
3313
0
    }
3314
0
    other = other->list_next;
3315
0
  }
3316
3317
  /* current.reg = reg */
3318
0
  ival->reg = reg;
3319
0
  IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (after splitting others)");
3320
3321
0
  if (*unhandled && ival->end > (*unhandled)->range.start) {
3322
0
    ival->list_next = *active;
3323
0
    *active = ival;
3324
0
  }
3325
0
  return reg;
3326
0
}
3327
3328
static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
3329
0
{
3330
0
  ir_block *bb = ctx->data;
3331
0
  ir_tmp_reg tmp_reg;
3332
3333
0
  if (to == 0) {
3334
0
    if (IR_IS_TYPE_INT(type)) {
3335
0
      tmp_reg.num = 0;
3336
0
      tmp_reg.type = type;
3337
0
      tmp_reg.start = IR_USE_SUB_REF;
3338
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3339
0
    } else {
3340
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
3341
0
      tmp_reg.num = 1;
3342
0
      tmp_reg.type = type;
3343
0
      tmp_reg.start = IR_USE_SUB_REF;
3344
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3345
0
    }
3346
0
  } else if (from != 0) {
3347
0
    if (IR_IS_TYPE_INT(type)) {
3348
0
      tmp_reg.num = 0;
3349
0
      tmp_reg.type = type;
3350
0
      tmp_reg.start = IR_USE_SUB_REF;
3351
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3352
0
    } else {
3353
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
3354
0
      tmp_reg.num = 1;
3355
0
      tmp_reg.type = type;
3356
0
      tmp_reg.start = IR_USE_SUB_REF;
3357
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3358
0
    }
3359
0
  } else {
3360
0
    return 1;
3361
0
  }
3362
0
  if (!ir_has_tmp(ctx, bb->end, tmp_reg.num)) {
3363
0
    ir_add_tmp(ctx, bb->end, bb->end, tmp_reg.num, tmp_reg);
3364
0
  }
3365
0
  return 1;
3366
0
}
3367
3368
static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data)
3369
0
{
3370
0
  ir_use_pos *use_pos = ival->use_pos;
3371
3372
0
  if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
3373
0
    IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0);
3374
0
#if IR_DEBUG
3375
0
    ir_insn *insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
3376
0
    IR_ASSERT(insn->op == IR_PARAM);
3377
0
#endif
3378
0
    use_pos = use_pos->next;
3379
0
    if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
3380
0
      return 0;
3381
0
    }
3382
3383
0
    if (use_pos) {
3384
0
      ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
3385
0
      if (bb->loop_depth) {
3386
0
        return 0;
3387
0
      }
3388
0
    }
3389
3390
0
    return 1;
3391
0
  }
3392
0
  return 0;
3393
0
}
3394
3395
static void ir_assign_bound_spill_slots(ir_ctx *ctx)
3396
0
{
3397
0
  ir_hashtab_bucket *b = ctx->binding->data;
3398
0
  uint32_t n = ctx->binding->count;
3399
0
  uint32_t v;
3400
0
  ir_live_interval *ival;
3401
3402
0
  while (n > 0) {
3403
0
    v = ctx->vregs[b->key];
3404
0
    if (v) {
3405
0
      ival = ctx->live_intervals[v];
3406
0
      if (ival
3407
0
       && ival->stack_spill_pos == -1
3408
0
       && (ival->next || ival->reg == IR_REG_NONE)) {
3409
0
        IR_ASSERT(b->val < 0);
3410
        /* special spill slot */
3411
0
        ival->stack_spill_pos = -b->val;
3412
0
        ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL;
3413
0
      }
3414
0
    }
3415
0
    b++;
3416
0
    n--;
3417
0
  }
3418
0
}
3419
3420
static int ir_linear_scan(ir_ctx *ctx)
3421
0
{
3422
0
  uint32_t b;
3423
0
  ir_block *bb;
3424
0
  ir_live_interval *unhandled = NULL;
3425
0
  ir_live_interval *active = NULL;
3426
0
  ir_live_interval *inactive = NULL;
3427
0
  ir_live_interval *ival, *other, *prev;
3428
0
  int j;
3429
0
  ir_live_pos position;
3430
0
  ir_reg reg;
3431
0
  ir_reg_alloc_data data;
3432
0
  ir_ref vars = ctx->vars;
3433
3434
0
  if (!ctx->live_intervals) {
3435
0
    return 0;
3436
0
  }
3437
3438
0
  if (ctx->flags2 & IR_LR_HAVE_DESSA_MOVES) {
3439
    /* Add fixed intervals for temporary registers used for DESSA moves */
3440
0
    for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
3441
0
      IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
3442
0
      if (bb->flags & IR_BB_DESSA_MOVES) {
3443
0
        ctx->data = bb;
3444
0
        ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
3445
0
      }
3446
0
    }
3447
0
  }
3448
3449
0
  ctx->data = &data;
3450
0
  ctx->stack_frame_size = 0;
3451
0
  data.unused_slot_4 = 0;
3452
0
  data.unused_slot_2 = 0;
3453
0
  data.unused_slot_1 = 0;
3454
0
  data.handled = NULL;
3455
3456
0
  while (vars) {
3457
0
    ir_ref var = vars;
3458
0
    ir_insn *insn = &ctx->ir_base[var];
3459
3460
0
    IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA);
3461
0
    vars = insn->op3; /* list next */
3462
3463
0
    if (insn->op == IR_VAR) {
3464
0
      ir_ref slot = ir_allocate_spill_slot(ctx, insn->type, &data);;
3465
0
      ir_use_list *use_list;
3466
0
      ir_ref n, *p;
3467
3468
0
      insn->op3 = slot;
3469
0
      use_list = &ctx->use_lists[var];
3470
0
      n = use_list->count;
3471
0
      p = &ctx->use_edges[use_list->refs];
3472
0
      for (; n > 0; p++, n--) {
3473
0
        insn = &ctx->ir_base[*p];
3474
0
        if (insn->op == IR_VADDR) {
3475
0
          insn->op3 = slot;
3476
0
        }
3477
0
      }
3478
0
    } else {
3479
0
      ir_insn *val = &ctx->ir_base[insn->op2];
3480
3481
0
      IR_ASSERT(IR_IS_CONST_REF(insn->op2));
3482
0
      IR_ASSERT(IR_IS_TYPE_INT(val->type));
3483
0
      IR_ASSERT(!IR_IS_SYM_CONST(val->op));
3484
0
      IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0);
3485
0
      IR_ASSERT(val->val.i64 < 0x7fffffff);
3486
3487
0
      insn->op3 = ir_allocate_big_spill_slot(ctx, val->val.i32, &data);
3488
0
    }
3489
0
  }
3490
3491
0
  for (j = ctx->vregs_count; j != 0; j--) {
3492
0
    ival = ctx->live_intervals[j];
3493
0
    if (ival) {
3494
0
      if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
3495
0
          || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) {
3496
0
        ir_add_to_unhandled(&unhandled, ival);
3497
0
      }
3498
0
    }
3499
0
  }
3500
3501
0
  ival = ctx->live_intervals[0];
3502
0
  if (ival) {
3503
0
    ir_merge_to_unhandled(&unhandled, ival);
3504
0
  }
3505
3506
  /* vregs + tmp + fixed + SRATCH + ALL */
3507
0
  for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) {
3508
0
    ival = ctx->live_intervals[j];
3509
0
    if (ival) {
3510
0
      ival->current_range = &ival->range;
3511
0
      ival->list_next = inactive;
3512
0
      inactive = ival;
3513
0
    }
3514
0
  }
3515
3516
0
  ctx->flags2 &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS);
3517
3518
0
#ifdef IR_DEBUG
3519
0
  if (ctx->flags & IR_DEBUG_RA) {
3520
0
    fprintf(stderr, "----\n");
3521
0
    ir_dump_live_ranges(ctx, stderr);
3522
0
    fprintf(stderr, "---- Start LSRA\n");
3523
0
  }
3524
0
#endif
3525
3526
0
  while (unhandled) {
3527
0
    ival = unhandled;
3528
0
    ival->current_range = &ival->range;
3529
0
    unhandled = ival->list_next;
3530
0
    position = ival->range.start;
3531
3532
0
    IR_LOG_LSRA("  ---- Processing", ival, "...");
3533
3534
    /* for each interval i in active */
3535
0
    other = active;
3536
0
    prev = NULL;
3537
0
    while (other) {
3538
0
      ir_live_range *r = other->current_range;
3539
3540
0
      IR_ASSERT(r);
3541
0
      if (r->end <= position) {
3542
0
        do {
3543
0
          r = r->next;
3544
0
        } while (r && r->end <= position);
3545
0
        if (!r) {
3546
          /* move i from active to handled */
3547
0
          other = other->list_next;
3548
0
          if (prev) {
3549
0
            prev->list_next = other;
3550
0
          } else {
3551
0
            active = other;
3552
0
          }
3553
0
          continue;
3554
0
        }
3555
0
        other->current_range = r;
3556
0
      }
3557
0
      if (position < r->start) {
3558
        /* move i from active to inactive */
3559
0
        if (prev) {
3560
0
          prev->list_next = other->list_next;
3561
0
        } else {
3562
0
          active = other->list_next;
3563
0
        }
3564
0
        other->list_next = inactive;
3565
0
        inactive = other;
3566
0
      } else {
3567
0
        prev = other;
3568
0
      }
3569
0
      other = prev ? prev->list_next : active;
3570
0
    }
3571
3572
    /* for each interval i in inactive */
3573
0
    other = inactive;
3574
0
    prev = NULL;
3575
0
    while (other) {
3576
0
      ir_live_range *r = other->current_range;
3577
3578
0
      IR_ASSERT(r);
3579
0
      if (r->end <= position) {
3580
0
        do {
3581
0
          r = r->next;
3582
0
        } while (r && r->end <= position);
3583
0
        if (!r) {
3584
          /* move i from inactive to handled */
3585
0
          other = other->list_next;
3586
0
          if (prev) {
3587
0
            prev->list_next = other;
3588
0
          } else {
3589
0
            inactive = other;
3590
0
          }
3591
0
          continue;
3592
0
        }
3593
0
        other->current_range = r;
3594
0
      }
3595
0
      if (position >= r->start) {
3596
        /* move i from inactive to active */
3597
0
        if (prev) {
3598
0
          prev->list_next = other->list_next;
3599
0
        } else {
3600
0
          inactive = other->list_next;
3601
0
        }
3602
0
        other->list_next = active;
3603
0
        active = other;
3604
0
      } else {
3605
0
        prev = other;
3606
0
      }
3607
0
      other = prev ? prev->list_next : inactive;
3608
0
    }
3609
3610
0
    reg = ir_try_allocate_free_reg(ctx, ival, &active, inactive, &unhandled);
3611
0
    if (reg == IR_REG_NONE) {
3612
0
      reg = ir_allocate_blocked_reg(ctx, ival, &active, &inactive, &unhandled);
3613
0
    }
3614
0
  }
3615
3616
#if 0 //def IR_DEBUG
3617
  /* all intervals must be processed */
3618
  ival = active;
3619
  while (ival) {
3620
    IR_ASSERT(!ival->next);
3621
    ival = ival->list_next;
3622
  }
3623
  ival = inactive;
3624
  while (ival) {
3625
    IR_ASSERT(!ival->next);
3626
    ival = ival->list_next;
3627
  }
3628
#endif
3629
3630
0
  if (ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) {
3631
3632
0
    if (ctx->binding) {
3633
0
      ir_assign_bound_spill_slots(ctx);
3634
0
    }
3635
3636
    /* Use simple linear-scan (without holes) to allocate and reuse spill slots */
3637
0
    unhandled = NULL;
3638
0
    for (j = ctx->vregs_count; j != 0; j--) {
3639
0
      ival = ctx->live_intervals[j];
3640
0
      if (ival
3641
0
       && (ival->next || ival->reg == IR_REG_NONE)
3642
0
       && ival->stack_spill_pos == -1) {
3643
0
        ival->flags |= IR_LIVE_INTERVAL_SPILLED;
3644
0
        if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3645
0
          ir_live_range *r;
3646
3647
0
          other = ival;
3648
0
          while (other->next) {
3649
0
            other = other->next;
3650
0
          }
3651
0
          r = &other->range;
3652
0
          while (r->next) {
3653
0
            r = r->next;
3654
0
          }
3655
0
          ival->end = r->end;
3656
0
          ir_add_to_unhandled_spill(&unhandled, ival);
3657
0
        }
3658
0
      }
3659
0
    }
3660
3661
0
    if (unhandled) {
3662
0
      uint8_t size;
3663
0
      ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
3664
0
      ir_live_interval *old;
3665
3666
0
      data.handled = handled;
3667
0
      active = NULL;
3668
0
      while (unhandled) {
3669
0
        ival = unhandled;
3670
0
        ival->current_range = &ival->range;
3671
0
        unhandled = ival->list_next;
3672
0
        position = ival->range.start;
3673
3674
        /* for each interval i in active */
3675
0
        other = active;
3676
0
        prev = NULL;
3677
0
        while (other) {
3678
0
          if (other->end <= position) {
3679
            /* move i from active to handled */
3680
0
            if (prev) {
3681
0
              prev->list_next = other->list_next;
3682
0
            } else {
3683
0
              active = other->list_next;
3684
0
            }
3685
0
            size = ir_type_size[other->type];
3686
0
            IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3687
0
            old = handled[size];
3688
0
            while (old) {
3689
0
              if (old->stack_spill_pos == other->stack_spill_pos) {
3690
0
                break;
3691
0
              }
3692
0
              old = old->list_next;
3693
0
            }
3694
0
            if (!old) {
3695
0
              other->list_next = handled[size];
3696
0
              handled[size] = other;
3697
0
            }
3698
0
          } else {
3699
0
            prev = other;
3700
0
          }
3701
0
          other = prev ? prev->list_next : active;
3702
0
        }
3703
3704
0
        ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data);
3705
0
        if (unhandled && ival->end > unhandled->range.start) {
3706
0
          ival->list_next = active;
3707
0
          active = ival;
3708
0
        } else {
3709
0
          size = ir_type_size[ival->type];
3710
0
          IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3711
0
          old = handled[size];
3712
0
          while (old) {
3713
0
            if (old->stack_spill_pos == ival->stack_spill_pos) {
3714
0
              break;
3715
0
            }
3716
0
            old = old->list_next;
3717
0
          }
3718
0
          if (!old) {
3719
0
            ival->list_next = handled[size];
3720
0
            handled[size] = ival;
3721
0
          }
3722
0
        }
3723
0
      }
3724
0
      data.handled = NULL;
3725
0
    }
3726
0
  }
3727
3728
#ifdef IR_TARGET_X86
3729
  if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
3730
    ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data);
3731
  } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
3732
    ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data);
3733
  } else {
3734
    ctx->ret_slot = -1;
3735
  }
3736
#endif
3737
3738
0
#ifdef IR_DEBUG
3739
0
  if (ctx->flags & IR_DEBUG_RA) {
3740
0
    fprintf(stderr, "---- Finish LSRA\n");
3741
0
    ir_dump_live_ranges(ctx, stderr);
3742
0
    fprintf(stderr, "----\n");
3743
0
  }
3744
0
#endif
3745
3746
0
  return 1;
3747
0
}
3748
3749
static bool needs_spill_reload(ir_ctx *ctx, ir_live_interval *ival, uint32_t b0, ir_bitset available)
3750
0
{
3751
0
    ir_worklist worklist;
3752
0
  ir_block *bb;
3753
0
  uint32_t b, *p, n;
3754
3755
0
  ir_worklist_init(&worklist, ctx->cfg_blocks_count + 1);
3756
0
  ir_worklist_push(&worklist, b0);
3757
0
  while (ir_worklist_len(&worklist) != 0) {
3758
0
    b = ir_worklist_pop(&worklist);
3759
0
        bb = &ctx->cfg_blocks[b];
3760
0
    if (bb->flags & (IR_BB_ENTRY|IR_BB_START)) {
3761
0
      ir_worklist_free(&worklist);
3762
0
      return 1;
3763
0
    }
3764
0
    n = bb->predecessors_count;
3765
0
    for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
3766
0
      b = *p;
3767
0
      bb = &ctx->cfg_blocks[b];
3768
3769
0
      if (!ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(bb->end))) {
3770
0
        ir_worklist_free(&worklist);
3771
0
        return 1;
3772
0
      } else if (!ir_bitset_in(available, b)) {
3773
0
        ir_worklist_push(&worklist, b);
3774
0
      }
3775
0
    }
3776
0
  }
3777
0
  ir_worklist_free(&worklist);
3778
0
  return 0;
3779
0
}
3780
3781
static bool needs_spill_load(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
3782
0
{
3783
0
  if (use_pos->next
3784
0
   && use_pos->op_num == 1
3785
0
   && use_pos->next->pos == use_pos->pos
3786
0
   && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3787
    /* Support for R2 = ADD(R1, R1) */
3788
0
    use_pos = use_pos->next;
3789
0
  }
3790
0
  return use_pos->next && use_pos->next->op_num != 0;
3791
0
}
3792
3793
static void ir_set_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op, int8_t reg)
3794
0
{
3795
0
  char key[10];
3796
3797
0
  IR_ASSERT(reg != IR_REG_NONE);
3798
0
  if (!ctx->fused_regs) {
3799
0
    ctx->fused_regs = ir_mem_malloc(sizeof(ir_strtab));
3800
0
    ir_strtab_init(ctx->fused_regs, 8, 128);
3801
0
  }
3802
0
  memcpy(key, &root, sizeof(ir_ref));
3803
0
  memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
3804
0
  ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | reg);
3805
0
}
3806
3807
static void assign_regs(ir_ctx *ctx)
3808
0
{
3809
0
  ir_ref i;
3810
0
  ir_live_interval *ival, *top_ival;
3811
0
  ir_use_pos *use_pos;
3812
0
  int8_t reg, old_reg;
3813
0
  ir_ref ref;
3814
0
  ir_regset used_regs = 0;
3815
3816
0
  if (!ctx->regs) {
3817
0
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
3818
0
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
3819
0
  }
3820
3821
0
  if (!(ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) {
3822
0
    for (i = 1; i <= ctx->vregs_count; i++) {
3823
0
      ival = ctx->live_intervals[i];
3824
0
      if (ival) {
3825
0
        do {
3826
0
          if (ival->reg != IR_REG_NONE) {
3827
0
            reg = ival->reg;
3828
0
            IR_REGSET_INCL(used_regs, reg);
3829
0
            use_pos = ival->use_pos;
3830
0
            while (use_pos) {
3831
0
              ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
3832
0
              ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3833
0
              use_pos = use_pos->next;
3834
0
            }
3835
0
          }
3836
0
          ival = ival->next;
3837
0
        } while (ival);
3838
0
      }
3839
0
    }
3840
0
  } else {
3841
0
    ir_bitset available = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
3842
3843
0
    for (i = 1; i <= ctx->vregs_count; i++) {
3844
0
      top_ival = ival = ctx->live_intervals[i];
3845
0
      if (ival) {
3846
0
        if (!(ival->flags & IR_LIVE_INTERVAL_SPILLED)) {
3847
0
          do {
3848
0
            if (ival->reg != IR_REG_NONE) {
3849
0
              IR_REGSET_INCL(used_regs, ival->reg);
3850
0
              use_pos = ival->use_pos;
3851
0
              while (use_pos) {
3852
0
                reg = ival->reg;
3853
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3854
0
                if (use_pos->hint_ref < 0) {
3855
0
                  ref = -use_pos->hint_ref;
3856
0
                }
3857
0
                ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3858
3859
0
                use_pos = use_pos->next;
3860
0
              }
3861
0
            }
3862
0
            ival = ival->next;
3863
0
          } while (ival);
3864
0
        } else {
3865
0
          do {
3866
0
            if (ival->reg != IR_REG_NONE) {
3867
0
              ir_ref prev_use_ref = IR_UNUSED;
3868
3869
0
              ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3870
0
              IR_REGSET_INCL(used_regs, ival->reg);
3871
0
              use_pos = ival->use_pos;
3872
0
              while (use_pos) {
3873
0
                reg = ival->reg;
3874
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3875
                // TODO: Insert spill loads and stores in optimal positions (resolution)
3876
0
                if (use_pos->op_num == 0) {
3877
0
                  if ((ctx->ir_base[ref].op == IR_COPY
3878
0
                    || ctx->ir_base[ref].op == IR_BITCAST
3879
0
                    || ctx->ir_base[ref].op == IR_TRUNC)
3880
0
                   && !IR_IS_CONST_REF(ctx->ir_base[ref].op1)
3881
0
                   && ctx->vregs[ctx->ir_base[ref].op1] == (uint32_t)i) {
3882
                    /* register reuse */
3883
0
                    ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3884
0
                    prev_use_ref = ref;
3885
0
                    use_pos = use_pos->next;
3886
0
                    continue;
3887
0
                  }
3888
0
                  ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3889
0
                  if (ctx->ir_base[ref].op == IR_PHI) {
3890
                    /* Spilled PHI var is passed through memory */
3891
0
                    reg = IR_REG_NONE;
3892
0
                    prev_use_ref = IR_UNUSED;
3893
0
                  } else if (ctx->ir_base[ref].op == IR_PARAM
3894
0
                   && (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3895
                    /* Stack PARAM var is passed through memory */
3896
0
                    reg = IR_REG_NONE;
3897
0
                  } else {
3898
0
                    uint32_t use_b = ctx->cfg_map[ref];
3899
3900
0
                    if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
3901
0
                      ir_bitset_incl(available, use_b);
3902
0
                    }
3903
0
                    if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3904
0
                      reg |= IR_REG_SPILL_SPECIAL;
3905
0
                    } else {
3906
0
                      reg |= IR_REG_SPILL_STORE;
3907
0
                    }
3908
0
                    prev_use_ref = ref;
3909
0
                  }
3910
0
                } else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
3911
0
                 && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
3912
0
                  if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
3913
0
                   && use_pos->hint != reg
3914
//                   && ctx->ir_base[ref].op != IR_CALL
3915
//                   && ctx->ir_base[ref].op != IR_TAILCALL) {
3916
0
                   && ctx->ir_base[ref].op != IR_SNAPSHOT
3917
0
                   && !needs_spill_load(ctx, ival, use_pos)) {
3918
                    /* fuse spill load (valid only when register is not reused) */
3919
0
                    reg = IR_REG_NONE;
3920
0
                    if (use_pos->next
3921
0
                     && use_pos->op_num == 1
3922
0
                     && use_pos->next->pos == use_pos->pos
3923
0
                     && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3924
                      /* Support for R2 = BINOP(R1, R1) */
3925
0
                      if (use_pos->hint_ref < 0) {
3926
0
                        ref = -use_pos->hint_ref;
3927
0
                      }
3928
0
                      ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3929
0
                      use_pos = use_pos->next;
3930
0
                    }
3931
0
                  } else {
3932
0
                    if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3933
0
                      reg |= IR_REG_SPILL_SPECIAL;
3934
0
                    } else {
3935
0
                      reg |= IR_REG_SPILL_LOAD;
3936
0
                    }
3937
0
                    if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
3938
0
                      uint32_t use_b = ctx->cfg_map[ref];
3939
3940
0
                      if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
3941
0
                        ir_bitset_incl(available, use_b);
3942
0
                      }
3943
0
                      prev_use_ref = ref;
3944
0
                    }
3945
0
                  }
3946
0
                  if (use_pos->hint_ref < 0
3947
0
                   && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
3948
0
                    if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3949
0
                      reg |= IR_REG_SPILL_SPECIAL;
3950
0
                    } else {
3951
0
                      reg |= IR_REG_SPILL_LOAD;
3952
0
                    }
3953
0
                    if (reg != old_reg) {
3954
0
                      IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
3955
0
                      ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
3956
0
                      ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
3957
0
                      use_pos = use_pos->next;
3958
0
                      continue;
3959
0
                    }
3960
0
                  }
3961
0
                } else if (use_pos->flags & IR_PHI_USE) {
3962
0
                  IR_ASSERT(use_pos->hint_ref < 0);
3963
0
                  IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
3964
0
                  IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
3965
0
                  if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
3966
                    /* Spilled PHI var is passed through memory */
3967
0
                    reg = IR_REG_NONE;
3968
0
                  }
3969
0
                } else if (use_pos->hint_ref < 0
3970
0
                    && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
3971
0
                  if (reg != old_reg) {
3972
0
                    IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
3973
0
                    ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
3974
0
                    ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
3975
0
                    use_pos = use_pos->next;
3976
0
                    continue;
3977
0
                  }
3978
0
                } else {
3979
                  /* reuse register without spill load */
3980
0
                }
3981
0
                if (use_pos->hint_ref < 0) {
3982
0
                  ref = -use_pos->hint_ref;
3983
0
                }
3984
0
                ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3985
3986
0
                use_pos = use_pos->next;
3987
0
              }
3988
0
            } else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
3989
0
              use_pos = ival->use_pos;
3990
0
              while (use_pos) {
3991
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3992
0
                if (ctx->ir_base[ref].op == IR_SNAPSHOT) {
3993
0
                  IR_ASSERT(use_pos->hint_ref >= 0);
3994
                  /* A reference to a CPU spill slot */
3995
0
                  reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER;
3996
0
                  ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3997
0
                }
3998
0
                use_pos = use_pos->next;
3999
0
              }
4000
0
            }
4001
0
            ival = ival->next;
4002
0
          } while (ival);
4003
0
        }
4004
0
      }
4005
0
    }
4006
0
    ir_mem_free(available);
4007
0
  }
4008
4009
  /* Temporary registers */
4010
0
  ival = ctx->live_intervals[0];
4011
0
  if (ival) {
4012
0
    do {
4013
0
      IR_ASSERT(ival->reg != IR_REG_NONE);
4014
0
      IR_REGSET_INCL(used_regs, ival->reg);
4015
0
      reg = ival->reg;
4016
0
      if (ival->tmp_op_num > 0) {
4017
0
        ir_insn *insn = &ctx->ir_base[ival->tmp_ref];
4018
4019
0
        if (ival->tmp_op_num <= insn->inputs_count) {
4020
0
          ir_ref *ops = insn->ops;
4021
0
          if (IR_IS_CONST_REF(ops[ival->tmp_op_num])) {
4022
            /* constant rematerialization */
4023
0
            reg |= IR_REG_SPILL_LOAD;
4024
0
          } else if (ctx->ir_base[ops[ival->tmp_op_num]].op == IR_ALLOCA
4025
0
              || ctx->ir_base[ops[ival->tmp_op_num]].op == IR_VADDR) {
4026
            /* local address rematerialization */
4027
0
            reg |= IR_REG_SPILL_LOAD;
4028
0
          }
4029
0
        }
4030
0
      }
4031
0
      ir_set_alocated_reg(ctx, ival->tmp_ref, ival->tmp_op_num, reg);
4032
0
      ival = ival->next;
4033
0
    } while (ival);
4034
0
  }
4035
4036
0
  if (ctx->fixed_stack_frame_size != -1) {
4037
0
    ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset;
4038
0
    if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
4039
0
      ctx->used_preserved_regs)) {
4040
      // TODO: Preserved reg and fixed frame conflict ???
4041
      // IR_ASSERT(0 && "Preserved reg and fixed frame conflict");
4042
0
    }
4043
0
  } else {
4044
0
    ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset,
4045
0
      IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
4046
0
        (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED));
4047
0
  }
4048
4049
0
  ir_fix_stack_frame(ctx);
4050
0
}
4051
4052
int ir_reg_alloc(ir_ctx *ctx)
4053
0
{
4054
0
  if (ir_linear_scan(ctx)) {
4055
0
    assign_regs(ctx);
4056
0
    return 1;
4057
0
  }
4058
0
  return 0;
4059
0
}