Coverage Report

Created: 2025-09-27 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_ra.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (RA - Register Allocation, Liveness, Coalescing, SSA Deconstruction)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 *
7
 * See: "Linear Scan Register Allocation on SSA Form", Christian Wimmer and
8
 * Michael Franz, CGO'10 (2010)
9
 * See: "Optimized Interval Splitting in a Linear Scan Register Allocator",
10
 * Christian Wimmer VEE'10 (2005)
11
 */
12
13
#ifndef _GNU_SOURCE
14
# define _GNU_SOURCE
15
#endif
16
17
#include <stdlib.h>
18
#include "ir.h"
19
20
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
21
# include "ir_x86.h"
22
#elif defined(IR_TARGET_AARCH64)
23
# include "ir_aarch64.h"
24
#else
25
# error "Unknown IR target"
26
#endif
27
28
#include "ir_private.h"
29
30
int ir_regs_number(void)
31
0
{
32
0
  return IR_REG_NUM;
33
0
}
34
35
bool ir_reg_is_int(int32_t reg)
36
0
{
37
0
  IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
38
0
  return reg >= IR_REG_GP_FIRST && reg <= IR_REG_GP_LAST;
39
0
}
40
41
static int ir_assign_virtual_registers_slow(ir_ctx *ctx)
42
0
{
43
0
  uint32_t *vregs;
44
0
  uint32_t vregs_count = 0;
45
0
  uint32_t b;
46
0
  ir_ref i, n;
47
0
  ir_block *bb;
48
0
  ir_insn *insn;
49
0
  uint32_t flags;
50
51
  /* Assign unique virtual register to each data node */
52
0
  vregs = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref));
53
0
  n = 1;
54
0
  for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
55
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
56
0
    i = bb->start;
57
58
    /* skip first instruction */
59
0
    insn = ctx->ir_base + i;
60
0
    n = ir_insn_len(insn);
61
0
    i += n;
62
0
    insn += n;
63
0
    while (i < bb->end) {
64
0
      flags = ir_op_flags[insn->op];
65
0
      if (((flags & IR_OP_FLAG_DATA) && insn->op != IR_VAR && (insn->op != IR_PARAM || ctx->use_lists[i].count > 0))
66
0
       || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
67
0
        if (!ctx->rules || !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
68
0
          vregs[i] = ++vregs_count;
69
0
        }
70
0
      }
71
0
      n = ir_insn_len(insn);
72
0
      i += n;
73
0
      insn += n;
74
0
    }
75
0
  }
76
0
  ctx->vregs_count = vregs_count;
77
0
  ctx->vregs = vregs;
78
79
0
  return 1;
80
0
}
81
82
int ir_assign_virtual_registers(ir_ctx *ctx)
83
0
{
84
0
  uint32_t *vregs;
85
0
  uint32_t vregs_count = 0;
86
0
  ir_ref i;
87
0
  ir_insn *insn;
88
89
0
  if (!ctx->rules) {
90
0
    return ir_assign_virtual_registers_slow(ctx);
91
0
  }
92
93
  /* Assign unique virtual register to each rule that needs it */
94
0
  vregs = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
95
96
0
  for (i = 1, insn = &ctx->ir_base[1]; i < ctx->insns_count; i++, insn++) {
97
0
    uint32_t v = 0;
98
99
0
    if (ctx->rules[i] && !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
100
0
      uint32_t flags = ir_op_flags[insn->op];
101
102
0
      if ((flags & IR_OP_FLAG_DATA)
103
0
       || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
104
0
        v = ++vregs_count;
105
0
      }
106
0
    }
107
0
    vregs[i] = v;
108
0
  }
109
110
0
  ctx->vregs_count = vregs_count;
111
0
  ctx->vregs = vregs;
112
113
0
  return 1;
114
0
}
115
116
/* Lifetime intervals construction */
117
118
static ir_live_interval *ir_new_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
119
0
{
120
0
  ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
121
122
0
  ival->type = IR_VOID;
123
0
  ival->reg = IR_REG_NONE;
124
0
  ival->flags = 0;
125
0
  ival->vreg = v;
126
0
  ival->stack_spill_pos = -1; // not allocated
127
0
  ival->range.start = start;
128
0
  ival->range.end = ival->end = end;
129
0
  ival->range.next = NULL;
130
0
  ival->use_pos = NULL;
131
0
  ival->next = NULL;
132
133
0
  ctx->live_intervals[v] = ival;
134
0
  return ival;
135
0
}
136
137
static ir_live_interval *ir_add_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
138
0
{
139
0
  ir_live_interval *ival = ctx->live_intervals[v];
140
0
  ir_live_range *p, *q;
141
142
0
  if (!ival) {
143
0
    return ir_new_live_range(ctx, v, start, end);
144
0
  }
145
146
0
  p = &ival->range;
147
0
  if (end >= p->start) {
148
0
    ir_live_range *prev = NULL;
149
150
0
    do {
151
0
      if (p->end >= start) {
152
0
        if (start < p->start) {
153
0
          p->start = start;
154
0
        }
155
0
        if (end > p->end) {
156
          /* merge with next */
157
0
          ir_live_range *next = p->next;
158
159
0
          p->end = end;
160
0
          while (next && p->end >= next->start) {
161
0
            if (next->end > p->end) {
162
0
              p->end = next->end;
163
0
            }
164
0
            p->next = next->next;
165
            /* remember in the "unused_ranges" list */
166
0
            next->next = ctx->unused_ranges;
167
0
            ctx->unused_ranges = next;
168
0
            next = p->next;
169
0
          }
170
0
          if (!p->next) {
171
0
            ival->end = p->end;
172
0
          }
173
0
        }
174
0
        return ival;
175
0
      }
176
0
      prev = p;
177
0
      p = prev->next;
178
0
    } while (p && end >= p->start);
179
0
    if (!p) {
180
0
      ival->end = end;
181
0
    }
182
0
    if (prev) {
183
0
      if (ctx->unused_ranges) {
184
        /* reuse */
185
0
        q = ctx->unused_ranges;
186
0
        ctx->unused_ranges = q->next;
187
0
      } else {
188
0
        q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
189
0
      }
190
0
      prev->next = q;
191
0
      q->start = start;
192
0
      q->end = end;
193
0
      q->next = p;
194
0
      return ival;
195
0
    }
196
0
  }
197
198
0
  if (ctx->unused_ranges) {
199
    /* reuse */
200
0
    q = ctx->unused_ranges;
201
0
    ctx->unused_ranges = q->next;
202
0
  } else {
203
0
    q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
204
0
  }
205
0
  q->start = p->start;
206
0
  q->end = p->end;
207
0
  q->next = p->next;
208
0
  p->start = start;
209
0
  p->end = end;
210
0
  p->next = q;
211
0
  return ival;
212
0
}
213
214
IR_ALWAYS_INLINE ir_live_interval *ir_add_prev_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
215
0
{
216
0
  ir_live_interval *ival = ctx->live_intervals[v];
217
218
0
  if (ival && ival->range.start == end) {
219
0
    ival->range.start = start;
220
0
    return ival;
221
0
  }
222
0
  return ir_add_live_range(ctx, v, start, end);
223
0
}
224
225
static void ir_add_fixed_live_range(ir_ctx *ctx, ir_reg reg, ir_live_pos start, ir_live_pos end)
226
0
{
227
0
  int v = ctx->vregs_count + 1 + reg;
228
0
  ir_live_interval *ival = ctx->live_intervals[v];
229
0
  ir_live_range *q;
230
231
0
  if (!ival) {
232
0
    ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
233
0
    ival->type = IR_VOID;
234
0
    ival->reg = reg;
235
0
    ival->flags = IR_LIVE_INTERVAL_FIXED;
236
0
    ival->vreg = v;
237
0
    ival->stack_spill_pos = -1; // not allocated
238
0
    ival->range.start = start;
239
0
    ival->range.end = ival->end = end;
240
0
    ival->range.next = NULL;
241
0
    ival->use_pos = NULL;
242
0
    ival->next = NULL;
243
244
0
    ctx->live_intervals[v] = ival;
245
0
  } else if (EXPECTED(end < ival->range.start)) {
246
0
    if (ctx->unused_ranges) {
247
      /* reuse */
248
0
      q = ctx->unused_ranges;
249
0
      ctx->unused_ranges = q->next;
250
0
    } else {
251
0
      q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
252
0
    }
253
254
0
    q->start = ival->range.start;
255
0
    q->end = ival->range.end;
256
0
    q->next = ival->range.next;
257
0
    ival->range.start = start;
258
0
    ival->range.end = end;
259
0
    ival->range.next = q;
260
0
  } else if (end == ival->range.start) {
261
0
    ival->range.start = start;
262
0
  } else {
263
0
    ir_add_live_range(ctx, v, start, end);
264
0
  }
265
0
}
266
267
static void ir_add_tmp(ir_ctx *ctx, ir_ref ref, ir_ref tmp_ref, int32_t tmp_op_num, ir_tmp_reg tmp_reg)
268
0
{
269
0
  ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
270
271
0
  ival->type = tmp_reg.type;
272
0
  ival->reg = IR_REG_NONE;
273
0
  ival->flags = IR_LIVE_INTERVAL_TEMP;
274
0
  ival->tmp_ref = tmp_ref;
275
0
  ival->tmp_op_num = tmp_op_num;
276
0
  ival->range.start = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.start;
277
0
  ival->range.end = ival->end = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.end;
278
0
  ival->range.next = NULL;
279
0
  ival->use_pos = NULL;
280
281
0
  if (!ctx->live_intervals[0]) {
282
0
    ival->next = NULL;
283
0
    ctx->live_intervals[0] = ival;
284
0
  } else if (ival->range.start >= ctx->live_intervals[0]->range.start) {
285
0
    ir_live_interval *prev = ctx->live_intervals[0];
286
287
0
    while (prev->next && ival->range.start >= prev->next->range.start) {
288
0
      prev = prev->next;
289
0
    }
290
0
    ival->next = prev->next;
291
0
    prev->next = ival;
292
0
  } else {
293
0
    ir_live_interval *next = ctx->live_intervals[0];
294
295
0
    ival->next = next;
296
0
    ctx->live_intervals[0] = ival;
297
0
  }
298
0
  return;
299
0
}
300
301
static bool ir_has_tmp(ir_ctx *ctx, ir_ref ref, int32_t op_num)
302
0
{
303
0
  ir_live_interval *ival = ctx->live_intervals[0];
304
305
0
  if (ival) {
306
0
    while (ival && IR_LIVE_POS_TO_REF(ival->range.start) <= ref) {
307
0
      if (ival->tmp_ref == ref && ival->tmp_op_num == op_num) {
308
0
        return 1;
309
0
      }
310
0
      ival = ival->next;
311
0
    }
312
0
  }
313
0
  return 0;
314
0
}
315
316
static ir_live_interval *ir_fix_live_range(ir_ctx *ctx, int v, ir_live_pos old_start, ir_live_pos new_start)
317
0
{
318
0
  ir_live_interval *ival = ctx->live_intervals[v];
319
0
  ir_live_range *p = &ival->range;
320
321
#if 0
322
  while (p && p->start < old_start) {
323
    p = p->next;
324
  }
325
#endif
326
0
  IR_ASSERT(ival && p->start == old_start);
327
0
  p->start = new_start;
328
0
  return ival;
329
0
}
330
331
static void ir_add_use_pos(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
332
0
{
333
0
  ir_use_pos *p = ival->use_pos;
334
335
0
  if (EXPECTED(!p || p->pos > use_pos->pos)) {
336
0
    use_pos->next = p;
337
0
    ival->use_pos = use_pos;
338
0
  } else {
339
0
    ir_use_pos *prev;
340
341
0
    do {
342
0
      prev = p;
343
0
      p = p->next;
344
0
    } while (p && p->pos < use_pos->pos);
345
346
0
    use_pos->next = prev->next;
347
0
    prev->next = use_pos;
348
0
  }
349
0
}
350
351
352
IR_ALWAYS_INLINE void ir_add_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_reg hint, uint8_t use_flags, ir_ref hint_ref)
353
0
{
354
0
  ir_use_pos *use_pos;
355
356
0
  use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
357
0
  use_pos->op_num = op_num;
358
0
  use_pos->hint = hint;
359
0
  use_pos->flags = use_flags;
360
0
  use_pos->hint_ref = hint_ref;
361
0
  use_pos->pos = pos;
362
363
0
  if (hint != IR_REG_NONE) {
364
0
    ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
365
0
  }
366
0
  if (hint_ref > 0) {
367
0
    ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
368
0
  }
369
370
0
  ir_add_use_pos(ctx, ival, use_pos);
371
0
}
372
373
static void ir_add_phi_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_ref phi_ref)
374
0
{
375
0
  ir_use_pos *use_pos;
376
377
0
  IR_ASSERT(phi_ref > 0);
378
0
  use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
379
0
  use_pos->op_num = op_num;
380
0
  use_pos->hint = IR_REG_NONE;
381
0
  use_pos->flags = IR_PHI_USE | IR_USE_SHOULD_BE_IN_REG; // TODO: ???
382
0
  use_pos->hint_ref = -phi_ref;
383
0
  use_pos->pos = pos;
384
385
0
  ir_add_use_pos(ctx, ival, use_pos);
386
0
}
387
388
static void ir_add_hint(ir_ctx *ctx, ir_ref ref, ir_live_pos pos, ir_reg hint)
389
0
{
390
0
  ir_live_interval *ival = ctx->live_intervals[ctx->vregs[ref]];
391
392
0
  if (!(ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS)) {
393
0
    ir_use_pos *use_pos = ival->use_pos;
394
395
0
    while (use_pos) {
396
0
      if (use_pos->pos == pos) {
397
0
        if (use_pos->hint == IR_REG_NONE) {
398
0
          use_pos->hint = hint;
399
0
          ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
400
0
        }
401
0
      }
402
0
      use_pos = use_pos->next;
403
0
    }
404
0
  }
405
0
}
406
407
static void ir_hint_propagation(ir_ctx *ctx)
408
0
{
409
0
  int i;
410
0
  ir_live_interval *ival;
411
0
  ir_use_pos *use_pos;
412
0
  ir_use_pos *hint_use_pos;
413
414
0
  for (i = ctx->vregs_count; i > 0; i--) {
415
0
    ival = ctx->live_intervals[i];
416
0
    if (ival
417
0
     && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) == (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
418
0
      use_pos = ival->use_pos;
419
0
      hint_use_pos = NULL;
420
0
      while (use_pos) {
421
0
        if (use_pos->op_num == 0) {
422
0
          if (use_pos->hint_ref > 0) {
423
0
            hint_use_pos = use_pos;
424
0
          }
425
0
        } else if (use_pos->hint != IR_REG_NONE) {
426
0
          if (hint_use_pos) {
427
0
            ir_add_hint(ctx, hint_use_pos->hint_ref, hint_use_pos->pos, use_pos->hint);
428
0
            hint_use_pos = NULL;
429
0
          }
430
0
        }
431
0
        use_pos = use_pos->next;
432
0
      }
433
0
    }
434
0
  }
435
0
}
436
437
#ifdef IR_BITSET_LIVENESS
438
/* DFS + Loop-Forest livness for SSA using bitset(s) */
439
static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b)
440
{
441
  bool ok = 1;
442
  int count = 0;
443
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
444
  ir_ref i;
445
446
  IR_BITSET_FOREACH(live, len, i) {
447
    /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
448
    ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
449
    ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
450
451
    if (use_pos->op_num) {
452
      ir_ref *ops = ctx->ir_base[ref].ops;
453
      ref = ops[use_pos->op_num];
454
    }
455
456
    if (ctx->ir_base[ref].op == IR_PARAM) {
457
      continue;
458
    }
459
    if (ctx->binding) {
460
      ir_ref var = ir_binding_find(ctx, ref);
461
      if (var < 0) {
462
        /* We may load the value at OSR entry-point */
463
        if (!count) {
464
          bb->flags &= ~IR_BB_EMPTY;
465
          bb->flags |= IR_BB_OSR_ENTRY_LOADS;
466
          if (!ctx->osr_entry_loads) {
467
            list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
468
            ir_list_init(list, 16);
469
          }
470
          ir_list_push(list, b);
471
          ir_list_push(list, 0);
472
        }
473
        ir_list_push(list, ref);
474
        count++;
475
        continue;
476
      }
477
    }
478
    fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
479
    ok = 0;
480
  } IR_BITSET_FOREACH_END();
481
482
  if (!ok) {
483
    IR_ASSERT(0);
484
  }
485
  if (count) {
486
    ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
487
488
#if 0
489
    /* ENTRY "clobbers" all registers */
490
    ir_ref ref = ctx->ir_base[bb->start].op1;
491
    ir_add_fixed_live_range(ctx, IR_REG_ALL,
492
      IR_DEF_LIVE_POS_FROM_REF(ref),
493
      IR_SAVE_LIVE_POS_FROM_REF(ref));
494
#endif
495
  }
496
}
497
498
static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, ir_bitset live)
499
{
500
  ir_ref stack[4];
501
  int stack_pos = 0;
502
  ir_target_constraints constraints;
503
  ir_insn *insn;
504
  uint32_t j, n, flags, def_flags;
505
  ir_ref *p, child;
506
  uint8_t use_flags;
507
  ir_reg reg;
508
  ir_live_pos use_pos;
509
  ir_live_interval *ival;
510
511
  while (1) {
512
    IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
513
514
    if (!(ctx->rules[input] & IR_SIMPLE)) {
515
      def_flags = ir_get_target_constraints(ctx, input, &constraints);
516
      n = constraints.tmps_count;
517
      while (n > 0) {
518
        n--;
519
        if (constraints.tmp_regs[n].type) {
520
          ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
521
        } else {
522
          /* CPU specific constraints */
523
          ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
524
            IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
525
            IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
526
        }
527
      }
528
    } else {
529
      def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
530
      constraints.hints_count = 0;
531
    }
532
533
    insn = &ctx->ir_base[input];
534
    flags = ir_op_flags[insn->op];
535
    n = IR_INPUT_EDGES_COUNT(flags);
536
    j = 1;
537
    p = insn->ops + j;
538
    if (flags & IR_OP_FLAG_CONTROL) {
539
      j++;
540
      p++;
541
    }
542
    for (; j <= n; j++, p++) {
543
      IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
544
      child = *p;
545
      if (child > 0) {
546
        uint32_t v = ctx->vregs[child];
547
548
        if (v) {
549
          use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
550
          reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
551
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
552
          if (EXPECTED(reg == IR_REG_NONE)) {
553
            use_pos += IR_USE_SUB_REF;
554
          }
555
556
          if (!ir_bitset_in(live, v)) {
557
            /* live.add(opd) */
558
            ir_bitset_incl(live, v);
559
            /* intervals[opd].addRange(b.from, op.id) */
560
            ival = ir_add_live_range(ctx, v,
561
              IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
562
          } else {
563
            ival = ctx->live_intervals[v];
564
          }
565
          ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
566
        } else if (ctx->rules[child] & IR_FUSED) {
567
          IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
568
          stack[stack_pos++] = child;
569
        } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
570
          ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
571
        }
572
      }
573
    }
574
    if (!stack_pos) {
575
      break;
576
    }
577
    input = stack[--stack_pos];
578
  }
579
}
580
581
int ir_compute_live_ranges(ir_ctx *ctx)
582
{
583
  uint32_t b, i, j, k, n, succ, *p;
584
  ir_ref ref;
585
  uint32_t len;
586
  ir_insn *insn;
587
  ir_block *bb, *succ_bb;
588
#ifdef IR_DEBUG
589
  ir_bitset visited;
590
#endif
591
  ir_bitset live, bb_live;
592
  ir_bitset loops = NULL;
593
  ir_bitqueue queue;
594
  ir_live_interval *ival;
595
596
  if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
597
    return 0;
598
  }
599
600
  if (ctx->rules) {
601
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
602
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
603
  }
604
605
  /* Root of the list of IR_VARs */
606
  ctx->vars = IR_UNUSED;
607
608
  /* Compute Live Ranges */
609
  ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
610
  len = ir_bitset_len(ctx->vregs_count + 1);
611
  bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t));
612
613
  /* vregs + tmp + fixed + SRATCH + ALL */
614
  ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
615
616
#ifdef IR_DEBUG
617
  visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
618
#endif
619
620
    if (!ctx->arena) {
621
    ctx->arena = ir_arena_create(16 * 1024);
622
  }
623
624
  /* for each basic block in reverse order */
625
  for (b = ctx->cfg_blocks_count; b > 0; b--) {
626
    bb = &ctx->cfg_blocks[b];
627
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
628
    /* for each successor of b */
629
630
#ifdef IR_DEBUG
631
    ir_bitset_incl(visited, b);
632
#endif
633
    live = bb_live + (len * b);
634
    n = bb->successors_count;
635
    if (n == 0) {
636
      ir_bitset_clear(live, len);
637
    } else {
638
      p = &ctx->cfg_edges[bb->successors];
639
      succ = *p;
640
641
#ifdef IR_DEBUG
642
      /* blocks must be ordered where all dominators of a block are before this block */
643
      IR_ASSERT(ir_bitset_in(visited, succ) || bb->loop_header == succ);
644
#endif
645
646
      /* live = union of successors.liveIn */
647
      if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
648
        ir_bitset_copy(live, bb_live + (len * succ), len);
649
      } else {
650
        IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
651
        ir_bitset_clear(live, len);
652
      }
653
      if (n > 1) {
654
        for (p++, n--; n > 0; p++, n--) {
655
          succ = *p;
656
          if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
657
            ir_bitset_union(live, bb_live + (len * succ), len);
658
          } else {
659
            IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
660
          }
661
        }
662
      }
663
664
      /* for each opd in live */
665
      IR_BITSET_FOREACH(live, len, i) {
666
        /* intervals[opd].addRange(b.from, b.to) */
667
        ir_add_prev_live_range(ctx, i,
668
          IR_START_LIVE_POS_FROM_REF(bb->start),
669
          IR_END_LIVE_POS_FROM_REF(bb->end));
670
      } IR_BITSET_FOREACH_END();
671
    }
672
673
    if (bb->successors_count == 1) {
674
      /* for each phi function phi of successor */
675
      succ = ctx->cfg_edges[bb->successors];
676
      succ_bb = &ctx->cfg_blocks[succ];
677
      if (succ_bb->flags & IR_BB_HAS_PHI) {
678
        ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
679
680
        k = ir_phi_input_number(ctx, succ_bb, b);
681
        IR_ASSERT(k != 0);
682
        for (ref = 0; ref < use_list->count; ref++) {
683
          ir_ref use = ctx->use_edges[use_list->refs + ref];
684
          insn = &ctx->ir_base[use];
685
          if (insn->op == IR_PHI) {
686
            ir_ref input = ir_insn_op(insn, k);
687
            if (input > 0) {
688
              uint32_t v = ctx->vregs[input];
689
690
              /* live.add(phi.inputOf(b)) */
691
              IR_ASSERT(v);
692
              ir_bitset_incl(live, v);
693
              /* intervals[phi.inputOf(b)].addRange(b.from, b.to) */
694
              ival = ir_add_prev_live_range(ctx, v,
695
                IR_START_LIVE_POS_FROM_REF(bb->start),
696
                IR_END_LIVE_POS_FROM_REF(bb->end));
697
              ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
698
            }
699
          }
700
        }
701
      }
702
    }
703
704
    /* for each operation op of b in reverse order */
705
    ref = bb->end;
706
    insn = &ctx->ir_base[ref];
707
    if (insn->op == IR_END || insn->op == IR_LOOP_END) {
708
      ref = ctx->prev_ref[ref];
709
    }
710
    for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
711
      uint32_t def_flags;
712
      uint32_t flags;
713
      ir_ref *p;
714
      ir_target_constraints constraints;
715
      uint32_t v;
716
717
      if (ctx->rules) {
718
        int n;
719
720
        if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
721
          if (((ctx->rules[ref] & IR_RULE_MASK) == IR_VAR
722
            || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
723
           && ctx->use_lists[ref].count > 0) {
724
            insn = &ctx->ir_base[ref];
725
            if (insn->op != IR_VADDR) {
726
              insn->op3 = ctx->vars;
727
              ctx->vars = ref;
728
            }
729
          }
730
          continue;
731
        }
732
733
        def_flags = ir_get_target_constraints(ctx, ref, &constraints);
734
        n = constraints.tmps_count;
735
        while (n > 0) {
736
          n--;
737
          if (constraints.tmp_regs[n].type) {
738
            ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
739
          } else {
740
            /* CPU specific constraints */
741
            ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
742
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
743
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
744
          }
745
        }
746
      } else {
747
        def_flags = 0;
748
        constraints.def_reg = IR_REG_NONE;
749
        constraints.hints_count = 0;
750
      }
751
752
      insn = &ctx->ir_base[ref];
753
      v = ctx->vregs[ref];
754
      if (v) {
755
        IR_ASSERT(ir_bitset_in(live, v));
756
757
        if (insn->op != IR_PHI) {
758
          ir_live_pos def_pos;
759
          ir_ref hint_ref = 0;
760
          ir_reg reg = constraints.def_reg;
761
762
          if (reg != IR_REG_NONE) {
763
            def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
764
            if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
765
              /* parameter register must be kept before it's copied */
766
              ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
767
            }
768
          } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
769
            if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
770
              hint_ref = insn->op1;
771
            }
772
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
773
          } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
774
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
775
          } else {
776
            if (insn->op == IR_PARAM) {
777
              /* We may reuse parameter stack slot for spilling */
778
              ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
779
            } else if (insn->op == IR_VLOAD) {
780
              /* Load may be fused into the usage instruction */
781
              ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
782
            }
783
            def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
784
          }
785
          /* live.remove(opd) */
786
          ir_bitset_excl(live, v);
787
          /* intervals[opd].setFrom(op.id) */
788
          ival = ir_fix_live_range(ctx, v,
789
            IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
790
          ival->type = insn->type;
791
          ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
792
        } else {
793
          /* live.remove(opd) */
794
          ir_bitset_excl(live, v);
795
          /* PHIs inputs must not be processed */
796
          ival = ctx->live_intervals[v];
797
          if (UNEXPECTED(!ival)) {
798
            /* Dead PHI */
799
            ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
800
          }
801
          ival->type = insn->type;
802
          ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
803
          continue;
804
        }
805
      }
806
807
      IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
808
      flags = ir_op_flags[insn->op];
809
      j = 1;
810
      p = insn->ops + 1;
811
      if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
812
        j++;
813
        p++;
814
      }
815
      for (; j <= insn->inputs_count; j++, p++) {
816
        ir_ref input = *p;
817
        ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
818
        ir_live_pos use_pos;
819
        ir_ref hint_ref = 0;
820
        uint32_t v;
821
822
        if (input > 0) {
823
          v = ctx->vregs[input];
824
          if (v) {
825
            use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
826
            if (reg != IR_REG_NONE) {
827
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
828
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
829
            } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
830
              if (j == 1) {
831
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
832
                IR_ASSERT(ctx->vregs[ref]);
833
                hint_ref = ref;
834
              } else if (input == insn->op1) {
835
                /* Input is the same as "op1" */
836
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
837
              }
838
            }
839
            if (!ir_bitset_in(live, v)) {
840
              /* live.add(opd) */
841
              ir_bitset_incl(live, v);
842
              /* intervals[opd].addRange(b.from, op.id) */
843
              ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
844
            } else {
845
              ival = ctx->live_intervals[v];
846
            }
847
            ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
848
          } else if (ctx->rules) {
849
            if (ctx->rules[input] & IR_FUSED) {
850
                ir_add_fusion_ranges(ctx, ref, input, bb, live);
851
            } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
852
              ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
853
            }
854
          }
855
        } else if (reg != IR_REG_NONE) {
856
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
857
          ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
858
        }
859
      }
860
    }
861
862
    /* if b is loop header */
863
    if ((bb->flags & IR_BB_LOOP_HEADER)
864
     && !ir_bitset_empty(live, len)) {
865
      /* variables live at loop header are alive at the whole loop body */
866
      uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1);
867
      uint32_t child;
868
      ir_block *child_bb;
869
      ir_bitset child_live_in;
870
871
      if (!loops) {
872
        loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
873
        ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1);
874
      } else {
875
        ir_bitset_clear(loops, bb_set_len);
876
        ir_bitqueue_clear(&queue);
877
      }
878
      ir_bitset_incl(loops, b);
879
      child = b;
880
      do {
881
        child_bb = &ctx->cfg_blocks[child];
882
        child_live_in = bb_live + (len * child);
883
884
        IR_BITSET_FOREACH(live, len, i) {
885
          ir_bitset_incl(child_live_in, i);
886
          ir_add_live_range(ctx, i,
887
            IR_START_LIVE_POS_FROM_REF(child_bb->start),
888
            IR_END_LIVE_POS_FROM_REF(child_bb->end));
889
        } IR_BITSET_FOREACH_END();
890
891
        child = child_bb->dom_child;
892
        while (child) {
893
          child_bb = &ctx->cfg_blocks[child];
894
          if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) {
895
            ir_bitqueue_add(&queue, child);
896
            if (child_bb->flags & IR_BB_LOOP_HEADER) {
897
              ir_bitset_incl(loops, child);
898
            }
899
          }
900
          child = child_bb->dom_next_child;
901
        }
902
      } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1);
903
    }
904
  }
905
906
  if (ctx->entries) {
907
    for (i = 0; i < ctx->entries_count; i++) {
908
      b = ctx->entries[i];
909
      bb = &ctx->cfg_blocks[b];
910
      live = bb_live + (len * b);
911
      ir_add_osr_entry_loads(ctx, bb, live, len, b);
912
    }
913
    if (ctx->osr_entry_loads) {
914
      ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
915
    }
916
  }
917
918
  if (loops) {
919
    ir_mem_free(loops);
920
    ir_bitqueue_free(&queue);
921
  }
922
923
  ir_mem_free(bb_live);
924
#ifdef IR_DEBUG
925
  ir_mem_free(visited);
926
#endif
927
928
  return 1;
929
}
930
931
#else
932
/* Path exploration by definition liveness for SSA using sets represented by linked lists */
933
934
#define IS_LIVE_IN_BLOCK(v, b) \
935
0
  (live_in_block[v] == b)
936
0
#define SET_LIVE_IN_BLOCK(v, b) do { \
937
0
    live_in_block[v] = b; \
938
0
  } while (0)
939
940
/* Returns the last virtual register alive at the end of the block (it is used as an already-visited marker) */
941
IR_ALWAYS_INLINE uint32_t ir_live_out_top(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b)
942
0
{
943
#if 0
944
  return live_outs[b];
945
#else
946
0
  if (!live_outs[b]) {
947
0
    return -1;
948
0
  }
949
0
  return ir_list_at(live_lists, live_outs[b]);
950
0
#endif
951
0
}
952
953
/* Remember a virtual register alive at the end of the block */
954
IR_ALWAYS_INLINE void ir_live_out_push(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b, uint32_t v)
955
0
{
956
#if 0
957
  ir_block *bb = &ctx->cfg_blocks[b];
958
  live_outs[b] = v;
959
  ir_add_prev_live_range(ctx, v,
960
    IR_START_LIVE_POS_FROM_REF(bb->start),
961
    IR_END_LIVE_POS_FROM_REF(bb->end));
962
#else
963
0
  if (live_lists->len >= live_lists->a.size) {
964
0
    ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
965
0
  }
966
  /* Form a linked list of virtual register live at the end of the block */
967
0
  ir_list_push_unchecked(live_lists, live_outs[b]); /* push old root of the list (previous element of the list) */
968
0
  live_outs[b] = ir_list_len(live_lists);           /* remember the new root */
969
0
  ir_list_push_unchecked(live_lists, v);            /* push a virtual register */
970
0
#endif
971
0
}
972
973
/*
974
 * Computes live-out sets for each basic-block per variable using def-use chains.
975
 *
976
 * The implementation is based on algorithms 6 and 7 desriebed in
977
 * "Computing Liveness Sets for SSA-Form Programs", Florian Brandner, Benoit Boissinot.
978
 * Alain Darte, Benoit Dupont de Dinechin, Fabrice Rastello. TR Inria RR-7503, 2011
979
 */
980
static void ir_compute_live_sets(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists)
981
0
{
982
0
  ir_list block_queue, fuse_queue;
983
0
  ir_ref i;
984
985
0
  ir_list_init(&fuse_queue, 16);
986
0
  ir_list_init(&block_queue, 256);
987
988
  /* For each virtual register explore paths from all uses to definition */
989
0
  for (i = ctx->insns_count - 1; i > 0; i--) {
990
0
    uint32_t v = ctx->vregs[i];
991
992
0
    if (v) {
993
0
      uint32_t def_block = ctx->cfg_map[i];
994
0
      ir_use_list *use_list = &ctx->use_lists[i];
995
0
      ir_ref *p, n = use_list->count;
996
997
      /* Collect all blocks where 'v' is used into a 'block_queue' */
998
0
      for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
999
0
        ir_ref use = *p;
1000
0
        ir_insn *insn = &ctx->ir_base[use];
1001
1002
0
        if (UNEXPECTED(insn->op == IR_PHI)) {
1003
0
          ir_ref n = insn->inputs_count - 1;
1004
0
          ir_ref *p = insn->ops + 2; /* PHI data inputs */
1005
0
          ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
1006
1007
0
          for (;n > 0; p++, q++, n--) {
1008
0
            if (*p == i) {
1009
0
              uint32_t pred_block = ctx->cfg_map[*q];
1010
1011
0
              if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1012
0
                ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1013
0
                if (pred_block != def_block) {
1014
0
                  ir_list_push(&block_queue, pred_block);
1015
0
                }
1016
0
              }
1017
0
            }
1018
0
          }
1019
0
        } else if (ctx->rules && UNEXPECTED(ctx->rules[use] & IR_FUSED)) {
1020
0
          while (1) {
1021
0
            ir_use_list *use_list = &ctx->use_lists[use];
1022
0
            ir_ref *p, n = use_list->count;
1023
1024
0
            for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1025
0
              ir_ref use = *p;
1026
1027
0
              if (ctx->rules[use] & IR_FUSED) {
1028
0
                ir_list_push(&fuse_queue, use);
1029
0
              } else {
1030
0
                uint32_t use_block = ctx->cfg_map[use];
1031
1032
0
                if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1033
0
                  ir_list_push(&block_queue, use_block);
1034
0
                }
1035
0
              }
1036
0
            }
1037
0
            if (!ir_list_len(&fuse_queue)) {
1038
0
              break;
1039
0
            }
1040
0
            use = ir_list_pop(&fuse_queue);
1041
0
          }
1042
0
        } else {
1043
0
          uint32_t use_block = ctx->cfg_map[use];
1044
1045
          /* Check if the virtual register is alive at the start of 'use_block' */
1046
0
          if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1047
0
            ir_list_push(&block_queue, use_block);
1048
0
          }
1049
0
        }
1050
0
      }
1051
1052
      /* UP_AND_MARK: Traverse through predecessor blocks until we reach the block where 'v' is defined*/
1053
0
      while (ir_list_len(&block_queue)) {
1054
0
        uint32_t b = ir_list_pop(&block_queue);
1055
0
        ir_block *bb = &ctx->cfg_blocks[b];
1056
0
        uint32_t *p, n = bb->predecessors_count;
1057
1058
0
        if (bb->flags & IR_BB_ENTRY) {
1059
          /* live_in_push(ENTRY, v) */
1060
0
          ir_insn *insn = &ctx->ir_base[bb->start];
1061
1062
0
          IR_ASSERT(insn->op == IR_ENTRY);
1063
0
          IR_ASSERT(insn->op3 >= 0 && insn->op3 < (ir_ref)ctx->entries_count);
1064
0
          if (live_lists->len >= live_lists->a.size) {
1065
0
            ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
1066
0
          }
1067
0
          ir_list_push_unchecked(live_lists, live_outs[ctx->cfg_blocks_count + 1 + insn->op3]);
1068
0
          ir_list_push_unchecked(live_lists, v);
1069
0
          live_outs[ctx->cfg_blocks_count + 1 + insn->op3] = ir_list_len(live_lists) - 1;
1070
0
          continue;
1071
0
        }
1072
0
        for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
1073
0
          uint32_t pred_block = *p;
1074
1075
          /* Check if 'pred_block' wasn't traversed before */
1076
0
          if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1077
            /* Mark a virtual register 'v' alive at the end of 'pred_block' */
1078
0
            ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1079
0
            if (pred_block != def_block) {
1080
0
              ir_list_push(&block_queue, pred_block);
1081
0
            }
1082
0
          }
1083
0
        }
1084
0
      }
1085
0
    }
1086
0
  }
1087
1088
0
  ir_list_free(&block_queue);
1089
0
  ir_list_free(&fuse_queue);
1090
0
}
1091
1092
static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, uint32_t pos, ir_list *live_lists, uint32_t b)
1093
0
{
1094
0
  bool ok = 1;
1095
0
  int count = 0;
1096
0
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
1097
0
  ir_ref i;
1098
1099
0
  while (pos) {
1100
0
    i = ir_list_at(live_lists, pos);
1101
0
    pos = ir_list_at(live_lists, pos - 1);
1102
1103
    /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
1104
0
    ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
1105
0
    ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
1106
1107
0
    if (use_pos->op_num) {
1108
0
      ir_ref *ops = ctx->ir_base[ref].ops;
1109
0
      ref = ops[use_pos->op_num];
1110
0
    }
1111
1112
0
    if (ctx->ir_base[ref].op == IR_PARAM) {
1113
0
      continue;
1114
0
    }
1115
0
    if (ctx->binding) {
1116
0
      ir_ref var = ir_binding_find(ctx, ref);
1117
0
      if (var < 0) {
1118
        /* We may load the value at OSR entry-point */
1119
0
        if (!count) {
1120
0
          bb->flags &= ~IR_BB_EMPTY;
1121
0
          bb->flags |= IR_BB_OSR_ENTRY_LOADS;
1122
0
          if (!ctx->osr_entry_loads) {
1123
0
            list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
1124
0
            ir_list_init(list, 16);
1125
0
          }
1126
0
          ir_list_push(list, b);
1127
0
          ir_list_push(list, 0);
1128
0
        }
1129
0
        ir_list_push(list, ref);
1130
0
        count++;
1131
0
        continue;
1132
0
      }
1133
0
    }
1134
0
    fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
1135
0
    ok = 0;
1136
0
  }
1137
1138
0
  if (!ok) {
1139
0
    IR_ASSERT(0);
1140
0
  }
1141
0
  if (count) {
1142
0
    ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
1143
1144
#if 0
1145
    /* ENTRY "clobbers" all registers */
1146
    ir_ref ref = ctx->ir_base[bb->start].op1;
1147
    ir_add_fixed_live_range(ctx, IR_REG_ALL,
1148
      IR_DEF_LIVE_POS_FROM_REF(ref),
1149
      IR_SAVE_LIVE_POS_FROM_REF(ref));
1150
#endif
1151
0
  }
1152
0
}
1153
1154
static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, uint32_t *live_in_block, uint32_t b)
1155
0
{
1156
0
  ir_ref stack[4];
1157
0
  int stack_pos = 0;
1158
0
  ir_target_constraints constraints;
1159
0
  ir_insn *insn;
1160
0
  uint32_t j, n, flags, def_flags;
1161
0
  ir_ref *p, child;
1162
0
  uint8_t use_flags;
1163
0
  ir_reg reg;
1164
0
  ir_live_pos pos = IR_START_LIVE_POS_FROM_REF(ref);
1165
0
  ir_live_pos use_pos;
1166
0
  ir_live_interval *ival;
1167
1168
0
  while (1) {
1169
0
    IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
1170
1171
0
    if (!(ctx->rules[input] & IR_SIMPLE)) {
1172
0
      def_flags = ir_get_target_constraints(ctx, input, &constraints);
1173
0
      n = constraints.tmps_count;
1174
0
      while (n > 0) {
1175
0
        n--;
1176
0
        if (constraints.tmp_regs[n].type) {
1177
0
          ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1178
0
        } else {
1179
          /* CPU specific constraints */
1180
0
          ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1181
0
            pos + constraints.tmp_regs[n].start,
1182
0
            pos + constraints.tmp_regs[n].end);
1183
0
        }
1184
0
      }
1185
0
    } else {
1186
0
      def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
1187
0
      constraints.hints_count = 0;
1188
0
    }
1189
1190
0
    insn = &ctx->ir_base[input];
1191
0
    flags = ir_op_flags[insn->op];
1192
0
    IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
1193
0
    n = IR_INPUT_EDGES_COUNT(flags);
1194
0
    j = 1;
1195
0
    p = insn->ops + j;
1196
0
    if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_PINNED)) {
1197
0
      j++;
1198
0
      p++;
1199
0
    }
1200
0
    for (; j <= n; j++, p++) {
1201
0
      IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
1202
0
      child = *p;
1203
0
      if (child > 0) {
1204
0
        uint32_t v = ctx->vregs[child];
1205
1206
0
        if (v) {
1207
0
          reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1208
0
          use_pos = pos;
1209
0
          if (EXPECTED(reg == IR_REG_NONE)) {
1210
0
            use_pos += IR_USE_SUB_REF;
1211
0
          }
1212
1213
0
          if (!IS_LIVE_IN_BLOCK(v, b)) {
1214
            /* live.add(opd) */
1215
0
            SET_LIVE_IN_BLOCK(v, b);
1216
            /* intervals[opd].addRange(b.from, op.id) */
1217
0
            ival = ir_add_live_range(ctx, v,
1218
0
              IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1219
0
          } else {
1220
0
            ival = ctx->live_intervals[v];
1221
0
          }
1222
0
          use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
1223
0
          ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
1224
0
        } else if (ctx->rules[child] & IR_FUSED) {
1225
0
          IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
1226
0
          stack[stack_pos++] = child;
1227
0
        } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
1228
0
          ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
1229
0
        }
1230
0
      }
1231
0
    }
1232
0
    if (!stack_pos) {
1233
0
      break;
1234
0
    }
1235
0
    input = stack[--stack_pos];
1236
0
  }
1237
0
}
1238
1239
int ir_compute_live_ranges(ir_ctx *ctx)
1240
0
{
1241
0
  uint32_t b, i, j, k, n, succ;
1242
0
  ir_ref ref;
1243
0
  ir_insn *insn;
1244
0
  ir_block *bb, *succ_bb;
1245
0
  uint32_t *live_outs;
1246
0
  uint32_t *live_in_block;
1247
0
  ir_list live_lists;
1248
0
  ir_live_interval *ival;
1249
1250
0
  if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
1251
0
    return 0;
1252
0
  }
1253
1254
0
  if (ctx->rules) {
1255
0
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
1256
0
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
1257
0
  }
1258
1259
  /* Root of the list of IR_VARs */
1260
0
  ctx->vars = IR_UNUSED;
1261
1262
  /* Compute Live Ranges */
1263
0
  ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
1264
1265
  /* vregs + tmp + fixed + SRATCH + ALL */
1266
0
  ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
1267
1268
0
    if (!ctx->arena) {
1269
0
    ctx->arena = ir_arena_create(16 * 1024);
1270
0
  }
1271
1272
0
  live_outs = ir_mem_calloc(ctx->cfg_blocks_count + 1 + ctx->entries_count, sizeof(uint32_t));
1273
0
  ir_list_init(&live_lists, 1024);
1274
0
  ir_compute_live_sets(ctx, live_outs, &live_lists);
1275
0
  live_in_block = ir_mem_calloc(ctx->vregs_count + 1, sizeof(uint32_t));
1276
1277
  /* for each basic block in reverse order */
1278
0
  for (b = ctx->cfg_blocks_count; b > 0; b--) {
1279
0
    bb = &ctx->cfg_blocks[b];
1280
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1281
1282
    /* For all virtual register alive at the end of the block */
1283
0
    n = live_outs[b];
1284
0
    while (n != 0) {
1285
0
      i = ir_list_at(&live_lists, n);
1286
0
      SET_LIVE_IN_BLOCK(i, b);
1287
0
      ir_add_prev_live_range(ctx, i,
1288
0
        IR_START_LIVE_POS_FROM_REF(bb->start),
1289
0
        IR_END_LIVE_POS_FROM_REF(bb->end));
1290
0
      n = ir_list_at(&live_lists, n - 1);
1291
0
    }
1292
1293
0
    if (bb->successors_count == 1) {
1294
      /* for each phi function of the successor */
1295
0
      succ = ctx->cfg_edges[bb->successors];
1296
0
      succ_bb = &ctx->cfg_blocks[succ];
1297
0
      if (succ_bb->flags & IR_BB_HAS_PHI) {
1298
0
        ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
1299
0
        ir_ref n, *p;
1300
1301
0
        k = ir_phi_input_number(ctx, succ_bb, b);
1302
0
        IR_ASSERT(k != 0);
1303
0
        n = use_list->count;
1304
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1305
0
          ir_ref use = *p;
1306
0
          insn = &ctx->ir_base[use];
1307
0
          if (insn->op == IR_PHI) {
1308
0
            ir_ref input = ir_insn_op(insn, k);
1309
0
            if (input > 0) {
1310
0
              uint32_t v = ctx->vregs[input];
1311
1312
0
              if (v) {
1313
0
                ival = ctx->live_intervals[v];
1314
0
                ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
1315
0
              }
1316
0
            }
1317
0
          }
1318
0
        }
1319
0
      }
1320
0
    }
1321
1322
    /* for each operation of the block in reverse order */
1323
0
    ref = bb->end;
1324
0
    insn = &ctx->ir_base[ref];
1325
0
    if (insn->op == IR_END || insn->op == IR_LOOP_END) {
1326
0
      ref = ctx->prev_ref[ref];
1327
0
    }
1328
0
    for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
1329
0
      uint32_t def_flags;
1330
0
      uint32_t flags;
1331
0
      ir_ref *p;
1332
0
      ir_target_constraints constraints;
1333
0
      uint32_t v;
1334
1335
0
      if (ctx->rules) {
1336
0
        int n;
1337
1338
0
        if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
1339
0
          if (((ctx->rules[ref] & IR_RULE_MASK) == IR_VAR
1340
0
            || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
1341
0
           && ctx->use_lists[ref].count > 0) {
1342
0
            insn = &ctx->ir_base[ref];
1343
0
            if (insn->op != IR_VADDR && insn->op != IR_PARAM) {
1344
0
              insn->op3 = ctx->vars;
1345
0
              ctx->vars = ref;
1346
0
            }
1347
0
          }
1348
0
          continue;
1349
0
        }
1350
1351
0
        def_flags = ir_get_target_constraints(ctx, ref, &constraints);
1352
0
        n = constraints.tmps_count;
1353
0
        while (n > 0) {
1354
0
          n--;
1355
0
          if (constraints.tmp_regs[n].type) {
1356
0
            ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1357
0
          } else {
1358
            /* CPU specific constraints */
1359
0
            ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1360
0
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
1361
0
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
1362
0
          }
1363
0
        }
1364
0
      } else {
1365
0
        def_flags = 0;
1366
0
        constraints.def_reg = IR_REG_NONE;
1367
0
        constraints.hints_count = 0;
1368
0
      }
1369
1370
0
      insn = &ctx->ir_base[ref];
1371
0
      v = ctx->vregs[ref];
1372
0
      if (v) {
1373
0
        if (insn->op != IR_PHI) {
1374
0
          ir_live_pos def_pos;
1375
0
          ir_ref hint_ref = 0;
1376
0
          ir_reg reg = constraints.def_reg;
1377
1378
0
          if (reg != IR_REG_NONE) {
1379
0
            def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
1380
0
            if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
1381
              /* parameter register must be kept before it's copied */
1382
0
              ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1383
0
            }
1384
0
          } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1385
0
            if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
1386
0
              hint_ref = insn->op1;
1387
0
            }
1388
0
            if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1389
0
              def_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1390
0
            } else {
1391
0
              def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1392
0
            }
1393
0
          } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1394
0
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1395
0
          } else {
1396
0
            if (insn->op == IR_PARAM) {
1397
              /* We may reuse parameter stack slot for spilling */
1398
0
              ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
1399
0
            } else if (insn->op == IR_VLOAD) {
1400
              /* Load may be fused into the usage instruction */
1401
0
              ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
1402
0
            }
1403
0
            def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
1404
0
          }
1405
          /* intervals[opd].setFrom(op.id) */
1406
0
          ival = ir_fix_live_range(ctx, v,
1407
0
            IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1408
0
          ival->type = insn->type;
1409
0
          ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
1410
0
        } else {
1411
          /* PHIs inputs must not be processed */
1412
0
          ival = ctx->live_intervals[v];
1413
0
          if (UNEXPECTED(!ival)) {
1414
            /* Dead PHI */
1415
0
            ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
1416
0
          }
1417
0
          ival->type = insn->type;
1418
0
          ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
1419
0
          continue;
1420
0
        }
1421
0
      }
1422
1423
0
      IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
1424
0
      flags = ir_op_flags[insn->op];
1425
0
      j = 1;
1426
0
      p = insn->ops + 1;
1427
0
      if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
1428
0
        j++;
1429
0
        p++;
1430
0
      }
1431
0
      for (; j <= insn->inputs_count; j++, p++) {
1432
0
        ir_ref input = *p;
1433
0
        ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1434
0
        ir_live_pos use_pos;
1435
0
        ir_ref hint_ref = 0;
1436
0
        uint32_t v;
1437
1438
0
        if (input > 0) {
1439
0
          v = ctx->vregs[input];
1440
0
          if (v) {
1441
0
            use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1442
0
            if (reg != IR_REG_NONE) {
1443
0
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1444
0
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1445
0
            } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1446
0
              if (j == 1) {
1447
0
                if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1448
0
                  use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1449
0
                } else {
1450
0
                  use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1451
0
                }
1452
0
                IR_ASSERT(ctx->vregs[ref]);
1453
0
                hint_ref = ref;
1454
0
              } else if (input == insn->op1) {
1455
                /* Input is the same as "op1" */
1456
0
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1457
0
              }
1458
0
            }
1459
0
            if (!IS_LIVE_IN_BLOCK(v, b)) {
1460
              /* live.add(opd) */
1461
0
              SET_LIVE_IN_BLOCK(v, b);
1462
              /* intervals[opd].addRange(b.from, op.id) */
1463
0
              ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1464
0
            } else {
1465
0
              ival = ctx->live_intervals[v];
1466
0
            }
1467
0
            ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
1468
0
          } else if (ctx->rules) {
1469
0
            if (ctx->rules[input] & IR_FUSED) {
1470
0
                ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b);
1471
0
            } else {
1472
0
              if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
1473
0
                ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
1474
0
              }
1475
0
              if (reg != IR_REG_NONE) {
1476
0
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1477
0
                ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1478
0
              }
1479
0
            }
1480
0
          }
1481
0
        } else if (reg != IR_REG_NONE) {
1482
0
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1483
0
          ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1484
0
        }
1485
0
      }
1486
0
    }
1487
0
  }
1488
1489
0
  if (ctx->entries) {
1490
0
    for (i = 0; i < ctx->entries_count; i++) {
1491
0
      b = ctx->entries[i];
1492
0
      bb = &ctx->cfg_blocks[b];
1493
0
      IR_ASSERT(bb->predecessors_count == 1);
1494
0
      ir_add_osr_entry_loads(ctx, bb, live_outs[ctx->cfg_blocks_count + 1 + i], &live_lists, b);
1495
0
    }
1496
0
    if (ctx->osr_entry_loads) {
1497
0
      ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
1498
0
    }
1499
0
  }
1500
1501
0
  ir_list_free(&live_lists);
1502
0
  ir_mem_free(live_outs);
1503
0
  ir_mem_free(live_in_block);
1504
1505
0
  return 1;
1506
0
}
1507
1508
#endif
1509
1510
/* Live Ranges coalescing */
1511
1512
static ir_live_pos ir_ivals_overlap(ir_live_range *lrg1, ir_live_range *lrg2)
1513
0
{
1514
0
  while (1) {
1515
0
    if (lrg2->start < lrg1->end) {
1516
0
      if (lrg1->start < lrg2->end) {
1517
0
        return IR_MAX(lrg1->start, lrg2->start);
1518
0
      } else {
1519
0
        lrg2 = lrg2->next;
1520
0
        if (!lrg2) {
1521
0
          return 0;
1522
0
        }
1523
0
      }
1524
0
    } else {
1525
0
      lrg1 = lrg1->next;
1526
0
      if (!lrg1) {
1527
0
        return 0;
1528
0
      }
1529
0
    }
1530
0
  }
1531
0
}
1532
1533
static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1534
0
{
1535
0
  ir_live_interval *ival1 = ctx->live_intervals[r1];
1536
0
  ir_live_interval *ival2 = ctx->live_intervals[r2];
1537
1538
#if 0
1539
  if (ival2->range.start >= ival1->end
1540
   || ival1->range.start >= ival2->end) {
1541
    return 0;
1542
  }
1543
#endif
1544
0
  return ir_ivals_overlap(&ival1->range, &ival2->range);
1545
0
}
1546
1547
static bool ir_ivals_inside(ir_live_range *parent, ir_live_range *child)
1548
0
{
1549
0
  do {
1550
0
    while (parent && parent->end < child->start) {
1551
0
      parent = parent->next;
1552
0
    }
1553
0
    if (!parent || parent->start > child->start || parent->end < child->end) {
1554
0
      return 0;
1555
0
    }
1556
0
    child = child->next;
1557
0
  } while (child);
1558
0
  return 1;
1559
0
}
1560
1561
static bool ir_vregs_inside(ir_ctx *ctx, uint32_t parent, uint32_t child)
1562
0
{
1563
0
  ir_live_interval *child_ival = ctx->live_intervals[child];
1564
0
  ir_live_interval *parent_ival = ctx->live_intervals[parent];
1565
1566
0
  if ((child_ival->flags | parent_ival->flags) & IR_LIVE_INTERVAL_COALESCED) {
1567
    // TODO: Support valid cases with already coalesced "parent_ival
1568
0
    return 0;
1569
0
  }
1570
#if 0
1571
  if (child_ival->end >= parent_ival->end) {
1572
    return 0;
1573
  }
1574
#endif
1575
0
  return ir_ivals_inside(&parent_ival->range, &child_ival->range);
1576
0
}
1577
1578
static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1579
0
{
1580
0
  ir_live_interval *ival = ctx->live_intervals[r2];
1581
0
  ir_live_range *live_range = &ival->range;
1582
0
  ir_live_range *next;
1583
0
  ir_use_pos *use_pos, *next_pos, **prev;
1584
1585
#if 0
1586
  fprintf(stderr, "COALESCE %d -> %d\n", r2, r1);
1587
#endif
1588
1589
0
  ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1590
0
  live_range = live_range->next;
1591
0
  while (live_range) {
1592
0
    next = live_range->next;
1593
0
    live_range->next = ctx->unused_ranges;
1594
0
    ctx->unused_ranges = live_range;
1595
0
    ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1596
0
    live_range = next;
1597
0
  }
1598
1599
  /* merge sorted use_pos lists */
1600
0
  prev = &ctx->live_intervals[r1]->use_pos;
1601
0
  use_pos = ival->use_pos;
1602
0
  while (use_pos) {
1603
0
    if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r1) {
1604
0
      use_pos->hint_ref = 0;
1605
0
    }
1606
0
    while (*prev && ((*prev)->pos < use_pos->pos ||
1607
0
      ((*prev)->pos == use_pos->pos &&
1608
0
        (use_pos->op_num == 0 || (*prev)->op_num < use_pos->op_num)))) {
1609
0
      if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) {
1610
0
        (*prev)->hint_ref = 0;
1611
0
      }
1612
0
      prev = &(*prev)->next;
1613
0
    }
1614
0
    next_pos = use_pos->next;
1615
0
    use_pos->next = *prev;
1616
0
    *prev = use_pos;
1617
0
    prev = &use_pos->next;
1618
0
      use_pos = next_pos;
1619
0
  }
1620
0
  use_pos = *prev;
1621
0
  while (use_pos) {
1622
0
    if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r2) {
1623
0
      use_pos->hint_ref = 0;
1624
0
    }
1625
0
    use_pos = use_pos->next;
1626
0
  }
1627
1628
0
  ctx->live_intervals[r1]->flags |=
1629
0
    IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS));
1630
0
  if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) {
1631
0
    ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD;
1632
0
  }
1633
0
  if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
1634
0
    IR_ASSERT(!(ctx->live_intervals[r1]->flags & IR_LIVE_INTERVAL_MEM_PARAM));
1635
0
    ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
1636
0
  }
1637
0
  ctx->live_intervals[r2] = NULL;
1638
1639
  // TODO: remember to reuse ???
1640
  //ir_mem_free(ival);
1641
0
}
1642
1643
static void ir_vregs_coalesce(ir_ctx *ctx, uint32_t v1, uint32_t v2, ir_ref from, ir_ref to)
1644
0
{
1645
0
  ir_ref i;
1646
0
  uint16_t f1 = ctx->live_intervals[v1]->flags;
1647
0
  uint16_t f2 = ctx->live_intervals[v2]->flags;
1648
1649
#if 0
1650
  if (ctx->binding) {
1651
    ir_ref b1 = ir_binding_find(ctx, from);
1652
    ir_ref b2 = ir_binding_find(ctx, to);
1653
    IR_ASSERT(b1 == b2);
1654
  }
1655
#endif
1656
0
  if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) {
1657
0
    ir_vregs_join(ctx, v1, v2);
1658
0
    ctx->vregs[to] = v1;
1659
0
  } else if ((f2 & IR_LIVE_INTERVAL_COALESCED) && !(f1 & IR_LIVE_INTERVAL_COALESCED)) {
1660
0
    ir_vregs_join(ctx, v2, v1);
1661
0
    ctx->vregs[from] = v2;
1662
0
  } else if (from < to) {
1663
0
    ir_vregs_join(ctx, v1, v2);
1664
0
    if (f2 & IR_LIVE_INTERVAL_COALESCED) {
1665
0
      for (i = 1; i < ctx->insns_count; i++) {
1666
0
        if (ctx->vregs[i] == v2) {
1667
0
          ctx->vregs[i] = v1;
1668
0
        }
1669
0
      }
1670
0
    } else {
1671
0
      ctx->vregs[to] = v1;
1672
0
    }
1673
0
  } else {
1674
0
    ir_vregs_join(ctx, v2, v1);
1675
0
    if (f1 & IR_LIVE_INTERVAL_COALESCED) {
1676
0
      for (i = 1; i < ctx->insns_count; i++) {
1677
0
        if (ctx->vregs[i] == v1) {
1678
0
          ctx->vregs[i] = v2;
1679
0
        }
1680
0
      }
1681
0
    } else {
1682
0
      ctx->vregs[from] = v2;
1683
0
    }
1684
0
  }
1685
0
}
1686
1687
static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to)
1688
0
{
1689
0
  if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) {
1690
0
    ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY;
1691
0
    ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES;
1692
0
    ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
1693
#if 0
1694
    fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to);
1695
#endif
1696
0
  }
1697
0
}
1698
1699
typedef struct _ir_coalesce_block {
1700
  uint32_t b;
1701
  uint32_t loop_depth;
1702
} ir_coalesce_block;
1703
1704
static int ir_block_cmp(const void *b1, const void *b2)
1705
0
{
1706
0
  ir_coalesce_block *d1 = (ir_coalesce_block*)b1;
1707
0
  ir_coalesce_block *d2 = (ir_coalesce_block*)b2;
1708
1709
0
  if (d1->loop_depth > d2->loop_depth) {
1710
0
    return -1;
1711
0
  } else if (d1->loop_depth == d2->loop_depth) {
1712
0
    if (d1->b < d2->b) {
1713
0
      return -1;
1714
0
    } else {
1715
0
      return 1;
1716
0
    }
1717
0
  } else {
1718
0
    return 1;
1719
0
  }
1720
0
}
1721
1722
static void ir_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1723
0
{
1724
0
  ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1725
0
  ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1726
0
  ir_live_interval *ival;
1727
0
  ir_live_range *r;
1728
0
  ir_use_pos *p, *p1 = NULL, *p2 = NULL;
1729
0
  ir_ref tmp;
1730
1731
0
  tmp = insn->op1;
1732
0
  insn->op1 = insn->op2;
1733
0
  insn->op2 = tmp;
1734
1735
0
  ival = ctx->live_intervals[ctx->vregs[insn->op1]];
1736
0
  p = ival->use_pos;
1737
0
  while (p) {
1738
0
    if (p->pos == pos) {
1739
0
      p->pos = load_pos;
1740
0
      p->op_num = 1;
1741
0
      p1 = p;
1742
0
      break;
1743
0
    }
1744
0
    p = p->next;
1745
0
  }
1746
1747
0
  ival = ctx->live_intervals[ctx->vregs[i]];
1748
0
  p = ival->use_pos;
1749
0
  while (p) {
1750
0
    if (p->pos == load_pos) {
1751
0
      p->hint_ref = insn->op1;
1752
0
      break;
1753
0
    }
1754
0
    p = p->next;
1755
0
  }
1756
1757
0
  if (insn->op2 > 0 && ctx->vregs[insn->op2]) {
1758
0
    ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1759
0
    r = &ival->range;
1760
0
    while (r) {
1761
0
      if (r->end == load_pos) {
1762
0
        r->end = pos;
1763
0
        if (!r->next) {
1764
0
          ival->end = pos;
1765
0
        }
1766
0
        break;
1767
0
      }
1768
0
      r = r->next;
1769
0
    }
1770
0
    p = ival->use_pos;
1771
0
    while (p) {
1772
0
      if (p->pos == load_pos) {
1773
0
        p->pos = pos;
1774
0
        p->op_num = 2;
1775
0
        p2 = p;
1776
0
        break;
1777
0
      }
1778
0
      p = p->next;
1779
0
    }
1780
0
  }
1781
0
  if (p1 && p2) {
1782
0
    uint8_t tmp = p1->flags;
1783
0
    p1->flags = p2->flags;
1784
0
    p2->flags = tmp;
1785
0
  }
1786
0
}
1787
1788
static int ir_hint_conflict(ir_ctx *ctx, ir_ref ref, int use, int def)
1789
0
{
1790
0
  ir_use_pos *p;
1791
0
  ir_reg r1 = IR_REG_NONE;
1792
0
  ir_reg r2 = IR_REG_NONE;
1793
1794
0
  p = ctx->live_intervals[use]->use_pos;
1795
0
  while (p) {
1796
0
    if (IR_LIVE_POS_TO_REF(p->pos) == ref) {
1797
0
      break;
1798
0
    }
1799
0
    if (p->hint != IR_REG_NONE) {
1800
0
      r1 = p->hint;
1801
0
    }
1802
0
    p = p->next;
1803
0
  }
1804
1805
0
  p = ctx->live_intervals[def]->use_pos;
1806
0
  while (p) {
1807
0
    if (IR_LIVE_POS_TO_REF(p->pos) > ref) {
1808
0
      if (p->hint != IR_REG_NONE) {
1809
0
        r2 = p->hint;
1810
0
        break;
1811
0
      }
1812
0
    }
1813
0
    p = p->next;
1814
0
  }
1815
0
  return r1 != r2 && r1 != IR_REG_NONE && r2 != IR_REG_NONE;
1816
0
}
1817
1818
static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1819
0
{
1820
0
  if (ctx->vregs[insn->op1]
1821
0
   && ctx->vregs[insn->op1] != ctx->vregs[i]
1822
0
   && !ir_vregs_overlap(ctx, ctx->vregs[insn->op1], ctx->vregs[i])
1823
0
   && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op1], ctx->vregs[i])) {
1824
    /* pass */
1825
0
  } else {
1826
0
    if (ctx->vregs[insn->op2] && ctx->vregs[insn->op2] != ctx->vregs[i]) {
1827
0
      ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1828
0
      ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1829
0
      ir_live_interval *ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1830
0
      ir_live_range *r = &ival->range;
1831
1832
0
      if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ctx->use_lists[insn->op2].count == 1) {
1833
0
        return 0;
1834
0
      }
1835
0
      while (r) {
1836
0
        if (r->end == pos) {
1837
0
          r->end = load_pos;
1838
0
          if (!r->next) {
1839
0
            ival->end = load_pos;
1840
0
          }
1841
0
          if (!ir_vregs_overlap(ctx, ctx->vregs[insn->op2], ctx->vregs[i])
1842
0
           && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op2], ctx->vregs[i])) {
1843
0
            ir_swap_operands(ctx, i, insn);
1844
0
            return 1;
1845
0
          } else {
1846
0
            r->end = pos;
1847
0
            if (!r->next) {
1848
0
              ival->end = pos;
1849
0
            }
1850
0
          }
1851
0
          break;
1852
0
        }
1853
0
        r = r->next;
1854
0
      }
1855
0
    }
1856
0
  }
1857
0
  return 0;
1858
0
}
1859
1860
int ir_coalesce(ir_ctx *ctx)
1861
0
{
1862
0
  uint32_t b, n, succ, pred_b, count = 0;
1863
0
  ir_ref *p, use, input, k, j;
1864
0
  ir_block *bb, *succ_bb;
1865
0
  ir_use_list *use_list;
1866
0
  ir_insn *insn;
1867
0
  ir_bitset visited;
1868
0
  ir_coalesce_block *list;
1869
0
  bool compact = 0;
1870
1871
  /* Collect a list of blocks which are predecossors to block with phi functions */
1872
0
  list = ir_mem_malloc(sizeof(ir_coalesce_block) * ctx->cfg_blocks_count);
1873
0
  visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
1874
0
  for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
1875
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1876
0
    if (bb->flags & IR_BB_HAS_PHI) {
1877
0
      k = bb->predecessors_count;
1878
0
      if (k > 1) {
1879
0
        use_list = &ctx->use_lists[bb->start];
1880
0
        n = use_list->count;
1881
0
        IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
1882
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1883
0
          use = *p;
1884
0
          insn = &ctx->ir_base[use];
1885
0
          if (insn->op == IR_PHI) {
1886
0
            do {
1887
0
              k--;
1888
0
              pred_b = ctx->cfg_edges[bb->predecessors + k];
1889
0
              if (!ir_bitset_in(visited, pred_b)) {
1890
0
                ir_bitset_incl(visited, pred_b);
1891
0
                list[count].b = pred_b;
1892
0
                list[count].loop_depth = ctx->cfg_blocks[pred_b].loop_depth;
1893
0
                count++;
1894
0
              }
1895
0
            } while (k > 0);
1896
0
            break;
1897
0
          }
1898
0
        }
1899
0
      }
1900
0
    }
1901
0
  }
1902
0
  ir_mem_free(visited);
1903
1904
  /* Sort blocks according to their loop depth */
1905
0
  qsort(list, count, sizeof(ir_coalesce_block), ir_block_cmp);
1906
1907
0
  while (count > 0) {
1908
1909
0
    count--;
1910
0
    b = list[count].b;
1911
0
    bb = &ctx->cfg_blocks[b];
1912
0
    IR_ASSERT(bb->successors_count == 1);
1913
0
    succ = ctx->cfg_edges[bb->successors];
1914
0
    succ_bb = &ctx->cfg_blocks[succ];
1915
0
    IR_ASSERT(succ_bb->predecessors_count > 1);
1916
0
    k = ir_phi_input_number(ctx, succ_bb, b);
1917
0
    use_list = &ctx->use_lists[succ_bb->start];
1918
0
    n = use_list->count;
1919
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1920
0
      use = *p;
1921
0
      insn = &ctx->ir_base[use];
1922
0
      if (insn->op == IR_PHI) {
1923
0
        input = ir_insn_op(insn, k);
1924
0
        if (input > 0 && ctx->vregs[input]) {
1925
0
          uint32_t v1 = ctx->vregs[input];
1926
0
          uint32_t v2 = ctx->vregs[use];
1927
1928
0
          if (v1 == v2) {
1929
            /* already coalesced */
1930
0
          } else {
1931
0
            if (!ir_vregs_overlap(ctx, v1, v2)) {
1932
0
              ir_vregs_coalesce(ctx, v1, v2, input, use);
1933
0
              compact = 1;
1934
0
            } else {
1935
0
#if 1
1936
0
              if (ctx->rules && (ctx->rules[input] & IR_MAY_SWAP)) {
1937
0
                ir_insn *input_insn = &ctx->ir_base[input];
1938
1939
0
                IR_ASSERT(ir_op_flags[input_insn->op] & IR_OP_FLAG_COMMUTATIVE);
1940
0
                if (input_insn->op2 == use
1941
0
                 && input_insn->op1 != use
1942
0
                 && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG)) {
1943
0
                  ir_live_range *r = &ctx->live_intervals[v2]->range;
1944
1945
0
                  do {
1946
0
                    if (r->end == IR_USE_LIVE_POS_FROM_REF(input)) {
1947
0
                      break;
1948
0
                    }
1949
0
                    r = r->next;
1950
0
                  } while (r);
1951
0
                  if (r) {
1952
0
                    r->end = IR_LOAD_LIVE_POS_FROM_REF(input);
1953
0
                    if (!r->next) {
1954
0
                      ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input);
1955
0
                    }
1956
0
                    if (ir_vregs_overlap(ctx, v1, v2)) {
1957
0
                      r->end = IR_USE_LIVE_POS_FROM_REF(input);
1958
0
                      if (!r->next) {
1959
0
                        ctx->live_intervals[v2]->end = IR_USE_LIVE_POS_FROM_REF(input);
1960
0
                      }
1961
0
                    } else {
1962
0
                      ir_swap_operands(ctx, input, input_insn);
1963
0
                      IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2));
1964
0
                      ir_vregs_coalesce(ctx, v1, v2, input, use);
1965
0
                      compact = 1;
1966
0
                      continue;
1967
0
                    }
1968
0
                  }
1969
0
                }
1970
0
              }
1971
0
#endif
1972
0
              ir_add_phi_move(ctx, b, input, use);
1973
0
            }
1974
0
          }
1975
0
        } else {
1976
          /* Move for constant input */
1977
0
          ir_add_phi_move(ctx, b, input, use);
1978
0
        }
1979
0
      }
1980
0
    }
1981
0
  }
1982
0
  ir_mem_free(list);
1983
1984
0
  ir_hint_propagation(ctx);
1985
1986
0
  if (ctx->rules) {
1987
    /* try to swap operands of commutative instructions for better register allocation */
1988
0
    uint32_t *rule = ctx->rules + 1;
1989
0
    ir_ref i;
1990
1991
0
    for (i = 1; i < ctx->insns_count; rule++, i++) {
1992
0
      if ((*rule) & (IR_MAY_SWAP|IR_MAY_REUSE)) {
1993
0
        insn = &ctx->ir_base[i];
1994
0
        IR_ASSERT(ctx->vregs[i]);
1995
0
        if ((*rule) & IR_MAY_SWAP) {
1996
0
          IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE);
1997
0
          if (ctx->live_intervals[ctx->vregs[i]]->use_pos
1998
0
           && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG)
1999
0
           && insn->op2 > 0
2000
0
           && insn->op1 > 0
2001
0
           && insn->op1 != insn->op2) {
2002
0
            ir_try_swap_operands(ctx, i, insn);
2003
0
          }
2004
0
        } else {
2005
0
          IR_ASSERT((*rule) & IR_MAY_REUSE);
2006
0
          if (insn->op1 > 0
2007
0
           && ctx->vregs[insn->op1]
2008
0
           && ctx->vregs[i] != ctx->vregs[insn->op1]) {
2009
0
            if (ir_vregs_inside(ctx, ctx->vregs[insn->op1], ctx->vregs[i])) {
2010
0
              if (ctx->binding) {
2011
0
                ir_ref b1 = ir_binding_find(ctx, i);
2012
0
                ir_ref b2 = ir_binding_find(ctx, insn->op1);
2013
0
                if (b1 && b1 != b2) {
2014
0
                  continue;
2015
0
                }
2016
0
              }
2017
0
              ir_vregs_coalesce(ctx, ctx->vregs[i], ctx->vregs[insn->op1], i, insn->op1);
2018
0
              compact = 1;
2019
0
            }
2020
0
          }
2021
0
        }
2022
0
      }
2023
0
    }
2024
0
  }
2025
2026
0
  if (compact) {
2027
0
    ir_ref i, n;
2028
0
    uint32_t *xlat = ir_mem_malloc((ctx->vregs_count + 1) * sizeof(uint32_t));
2029
2030
0
    for (i = 1, n = 1; i <= ctx->vregs_count; i++) {
2031
0
      if (ctx->live_intervals[i]) {
2032
0
        xlat[i] = n;
2033
0
        if (i != n) {
2034
0
          ctx->live_intervals[n] = ctx->live_intervals[i];
2035
0
          ctx->live_intervals[n]->vreg = n;
2036
0
        }
2037
0
        n++;
2038
0
      }
2039
0
    }
2040
0
    n--;
2041
0
    if (n != ctx->vregs_count) {
2042
0
      j = ctx->vregs_count - n;
2043
      /* vregs + tmp + fixed + SRATCH + ALL */
2044
0
      for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) {
2045
0
        ctx->live_intervals[i] = ctx->live_intervals[i + j];
2046
0
        if (ctx->live_intervals[i]) {
2047
0
          ctx->live_intervals[i]->vreg = i;
2048
0
        }
2049
0
      }
2050
0
      for (j = 1; j < ctx->insns_count; j++) {
2051
0
        if (ctx->vregs[j]) {
2052
0
          ctx->vregs[j] = xlat[ctx->vregs[j]];
2053
0
        }
2054
0
      }
2055
0
      ctx->vregs_count = n;
2056
0
    }
2057
0
    ir_mem_free(xlat);
2058
0
  }
2059
2060
0
  return 1;
2061
0
}
2062
2063
/* SSA Deconstruction */
2064
2065
int ir_compute_dessa_moves(ir_ctx *ctx)
2066
0
{
2067
0
  uint32_t b, n;
2068
0
  ir_ref j, k, *p, use;
2069
0
  ir_block *bb;
2070
0
  ir_use_list *use_list;
2071
0
  ir_insn *insn;
2072
2073
0
  for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
2074
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
2075
0
    k = bb->predecessors_count;
2076
0
    if (k > 1) {
2077
0
      use_list = &ctx->use_lists[bb->start];
2078
0
      n = use_list->count;
2079
0
      if (n > 1) {
2080
0
        IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
2081
0
        k++;
2082
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2083
0
          use = *p;
2084
0
          insn = &ctx->ir_base[use];
2085
0
          if (insn->op == IR_PHI) {
2086
0
            for (j = 2; j <= k; j++) {
2087
0
              if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) {
2088
0
                int pred = ctx->cfg_edges[bb->predecessors + (j-2)];
2089
0
                ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY;
2090
0
                ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES;
2091
0
                ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
2092
0
              }
2093
0
            }
2094
0
          }
2095
0
        }
2096
0
      }
2097
0
    }
2098
0
  }
2099
0
  return 1;
2100
0
}
2101
2102
/*
2103
 * Parallel copy sequentialization algorithm
2104
 *
2105
 * The implementation is based on algorithm 1 desriebed in
2106
 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
2107
 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
2108
 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
2109
 * pp. 114-125, doi: 10.1109/CGO.2009.19.
2110
 */
2111
int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
2112
0
{
2113
0
  uint32_t succ, k, n = 0;
2114
0
  ir_block *bb, *succ_bb;
2115
0
  ir_use_list *use_list;
2116
0
  ir_ref *loc, *pred, *src, *dst, i, *p, ref, input;
2117
0
  ir_ref s, d;
2118
0
  ir_insn *insn;
2119
0
  uint32_t len;
2120
0
  ir_bitset todo, ready;
2121
0
  bool have_constants_or_addresses = 0;
2122
2123
0
  bb = &ctx->cfg_blocks[b];
2124
0
  if (!(bb->flags & IR_BB_DESSA_MOVES)) {
2125
0
    return 0;
2126
0
  }
2127
0
  IR_ASSERT(bb->successors_count == 1);
2128
0
  succ = ctx->cfg_edges[bb->successors];
2129
0
  succ_bb = &ctx->cfg_blocks[succ];
2130
0
  IR_ASSERT(succ_bb->predecessors_count > 1);
2131
0
  use_list = &ctx->use_lists[succ_bb->start];
2132
2133
0
  k = ir_phi_input_number(ctx, succ_bb, b);
2134
2135
0
  loc = ir_mem_malloc((ctx->vregs_count + 1) * 4 * sizeof(ir_ref));
2136
0
  pred = loc + ctx->vregs_count + 1;
2137
0
  src = pred + ctx->vregs_count + 1;
2138
0
  dst = src + ctx->vregs_count + 1;
2139
0
  len = ir_bitset_len(ctx->vregs_count + 1);
2140
0
  todo = ir_bitset_malloc(ctx->vregs_count + 1);
2141
2142
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
2143
0
    ref = *p;
2144
0
    insn = &ctx->ir_base[ref];
2145
0
    if (insn->op == IR_PHI) {
2146
0
      input = ir_insn_op(insn, k);
2147
0
      if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2148
0
        have_constants_or_addresses = 1;
2149
0
      } else if (ctx->vregs[input] != ctx->vregs[ref]) {
2150
0
        s = ctx->vregs[input];
2151
0
        d = ctx->vregs[ref];
2152
0
        src[s] = input;
2153
0
        dst[d] = ref;
2154
0
        loc[d] = pred[s] = 0;
2155
0
        ir_bitset_incl(todo, d);
2156
0
        n++;
2157
0
      }
2158
0
    }
2159
0
  }
2160
2161
0
  if (n > 0) {
2162
0
    src[0] = dst[0] = 0;
2163
0
    ready = ir_bitset_malloc(ctx->vregs_count + 1);
2164
0
    IR_BITSET_FOREACH(todo, len, d) {
2165
0
      ref = dst[d];
2166
0
      insn = &ctx->ir_base[ref];
2167
0
      IR_ASSERT(insn->op == IR_PHI);
2168
0
      input = ir_insn_op(insn, k);
2169
0
      s = ctx->vregs[input];
2170
0
      loc[s] = s;
2171
0
      pred[d] = s;
2172
0
    } IR_BITSET_FOREACH_END();
2173
2174
0
    IR_BITSET_FOREACH(todo, len, i) {
2175
0
      if (!loc[i]) {
2176
0
        ir_bitset_incl(ready, i);
2177
0
      }
2178
0
    } IR_BITSET_FOREACH_END();
2179
2180
0
    while (1) {
2181
0
      ir_ref a, b, c;
2182
2183
0
      while ((b = ir_bitset_pop_first(ready, len)) >= 0) {
2184
0
        a = pred[b];
2185
0
        c = loc[a];
2186
0
        emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b]);
2187
0
        ir_bitset_excl(todo, b);
2188
0
        loc[a] = b;
2189
0
        src[b] = dst[b];
2190
0
        if (a == c && pred[a]) {
2191
0
          ir_bitset_incl(ready, a);
2192
0
        }
2193
0
      }
2194
0
      b = ir_bitset_pop_first(todo, len);
2195
0
      if (b < 0) {
2196
0
        break;
2197
0
      }
2198
0
      IR_ASSERT(b != loc[pred[b]]);
2199
0
      emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0);
2200
0
      loc[b] = 0;
2201
0
      ir_bitset_incl(ready, b);
2202
0
    }
2203
2204
0
    ir_mem_free(ready);
2205
0
  }
2206
2207
0
  ir_mem_free(todo);
2208
0
  ir_mem_free(loc);
2209
2210
0
  if (have_constants_or_addresses) {
2211
0
    for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
2212
0
      ref = *p;
2213
0
      insn = &ctx->ir_base[ref];
2214
0
      if (insn->op == IR_PHI) {
2215
0
        input = ir_insn_op(insn, k);
2216
0
        if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2217
0
          emit_copy(ctx, insn->type, input, ref);
2218
0
        }
2219
0
      }
2220
0
    }
2221
0
  }
2222
2223
0
  return 1;
2224
0
}
2225
2226
/* Linear Scan Register Allocation */
2227
2228
#ifdef IR_DEBUG
2229
0
# define IR_LOG_LSRA(action, ival, comment) do { \
2230
0
    if (ctx->flags & IR_DEBUG_RA) { \
2231
0
      ir_live_interval *_ival = (ival); \
2232
0
      ir_live_pos _start = _ival->range.start; \
2233
0
      ir_live_pos _end = _ival->end; \
2234
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \
2235
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2236
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2237
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \
2238
0
    } \
2239
0
  } while (0)
2240
0
# define IR_LOG_LSRA_ASSIGN(action, ival, comment) do { \
2241
0
    if (ctx->flags & IR_DEBUG_RA) { \
2242
0
      ir_live_interval *_ival = (ival); \
2243
0
      ir_live_pos _start = _ival->range.start; \
2244
0
      ir_live_pos _end = _ival->end; \
2245
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \
2246
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2247
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2248
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2249
0
        ir_reg_name(_ival->reg, _ival->type)); \
2250
0
    } \
2251
0
  } while (0)
2252
0
# define IR_LOG_LSRA_SPLIT(ival, pos) do { \
2253
0
    if (ctx->flags & IR_DEBUG_RA) { \
2254
0
      ir_live_interval *_ival = (ival); \
2255
0
      ir_live_pos _start = _ival->range.start; \
2256
0
      ir_live_pos _end = _ival->end; \
2257
0
      ir_live_pos _pos = (pos); \
2258
0
      fprintf(stderr, "      ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \
2259
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2260
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2261
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2262
0
        IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2263
0
    } \
2264
0
  } while (0)
2265
0
# define IR_LOG_LSRA_CONFLICT(action, ival, pos) do { \
2266
0
    if (ctx->flags & IR_DEBUG_RA) { \
2267
0
      ir_live_interval *_ival = (ival); \
2268
0
      ir_live_pos _start = _ival->range.start; \
2269
0
      ir_live_pos _end = _ival->end; \
2270
0
      ir_live_pos _pos = (pos); \
2271
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \
2272
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2273
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2274
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2275
0
        ir_reg_name(_ival->reg, _ival->type), \
2276
0
        IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2277
0
    } \
2278
0
  } while (0)
2279
#else
2280
# define IR_LOG_LSRA(action, ival, comment)
2281
# define IR_LOG_LSRA_ASSIGN(action, ival, comment)
2282
# define IR_LOG_LSRA_SPLIT(ival, pos)
2283
# define IR_LOG_LSRA_CONFLICT(action, ival, pos);
2284
#endif
2285
2286
static bool ir_ival_covers(ir_live_interval *ival, ir_live_pos position)
2287
0
{
2288
0
  ir_live_range *live_range = &ival->range;
2289
2290
0
  do {
2291
0
    if (position < live_range->end) {
2292
0
      return position >= live_range->start;
2293
0
    }
2294
0
    live_range = live_range->next;
2295
0
  } while (live_range);
2296
2297
0
  return 0;
2298
0
}
2299
2300
static bool ir_ival_has_hole_between(ir_live_interval *ival, ir_live_pos from, ir_live_pos to)
2301
0
{
2302
0
  ir_live_range *r = &ival->range;
2303
2304
0
  while (r) {
2305
0
    if (from < r->start) {
2306
0
      return 1;
2307
0
    } else if (to <= r->end) {
2308
0
      return 0;
2309
0
    }
2310
0
    r = r->next;
2311
0
  }
2312
0
  return 0;
2313
0
}
2314
2315
2316
static ir_live_pos ir_last_use_pos_before(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2317
0
{
2318
0
  ir_live_pos ret = 0;
2319
0
  ir_use_pos *p = ival->use_pos;
2320
2321
0
  while (p && p->pos <= pos) {
2322
0
    if (p->flags & flags) {
2323
0
      ret = p->pos;
2324
0
    }
2325
0
    p = p->next;
2326
0
  }
2327
0
  return ret;
2328
0
}
2329
2330
static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2331
0
{
2332
0
  ir_use_pos *p = ival->use_pos;
2333
2334
0
  while (p && p->pos < pos) {
2335
0
    p = p->next;
2336
0
  }
2337
0
  if (p && p->pos == pos && p->op_num != 0) {
2338
0
    p = p->next;
2339
0
  }
2340
0
  while (p && !(p->flags & flags)) {
2341
0
    p = p->next;
2342
0
  }
2343
0
  return p ? p->pos : 0x7fffffff;
2344
0
}
2345
2346
static ir_live_pos ir_first_use_pos(ir_live_interval *ival, uint8_t flags)
2347
0
{
2348
0
  ir_use_pos *p = ival->use_pos;
2349
2350
0
  while (p && !(p->flags & flags)) {
2351
0
    p = p->next;
2352
0
  }
2353
0
  return p ? p->pos : 0x7fffffff;
2354
0
}
2355
2356
static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos)
2357
0
{
2358
0
  ir_ref ref = IR_LIVE_POS_TO_REF(pos);
2359
0
  uint32_t b = ctx->cfg_map[ref];
2360
2361
0
  while (!b) {
2362
0
    ref--;
2363
0
    IR_ASSERT(ref > 0);
2364
0
    b = ctx->cfg_map[ref];
2365
0
  }
2366
0
  IR_ASSERT(b <= ctx->cfg_blocks_count);
2367
0
  return &ctx->cfg_blocks[b];
2368
0
}
2369
2370
static ir_live_pos ir_find_optimal_split_position(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos min_pos, ir_live_pos max_pos, bool prefer_max)
2371
0
{
2372
0
  ir_block *min_bb, *max_bb;
2373
2374
0
  if (min_pos == max_pos) {
2375
0
    return max_pos;
2376
0
  }
2377
2378
0
  IR_ASSERT(min_pos < max_pos);
2379
0
  IR_ASSERT(min_pos >= ival->range.start);
2380
0
  IR_ASSERT(max_pos < ival->end);
2381
2382
0
  min_bb = ir_block_from_live_pos(ctx, min_pos);
2383
0
  max_bb = ir_block_from_live_pos(ctx, max_pos);
2384
2385
0
  if (min_bb == max_bb
2386
0
   || ir_ival_has_hole_between(ival, min_pos, max_pos)) {  // TODO: ???
2387
0
    return (prefer_max) ? max_pos : min_pos;
2388
0
  }
2389
2390
0
  if (max_bb->loop_depth > 0) {
2391
    /* Split at the end of the loop entry */
2392
0
    do {
2393
0
      ir_block *bb;
2394
2395
0
      if (max_bb->flags & IR_BB_LOOP_HEADER) {
2396
0
        bb = max_bb;
2397
0
      } else {
2398
0
        IR_ASSERT(max_bb->loop_header);
2399
0
        bb = &ctx->cfg_blocks[max_bb->loop_header];
2400
0
      }
2401
0
      bb = &ctx->cfg_blocks[bb->idom];
2402
0
      if (IR_DEF_LIVE_POS_FROM_REF(bb->end) < min_pos) {
2403
0
        break;
2404
0
      }
2405
0
      max_bb = bb;
2406
0
    } while (max_bb->loop_depth > 0);
2407
2408
0
    if (IR_DEF_LIVE_POS_FROM_REF(max_bb->end) < max_pos) {
2409
0
      return IR_DEF_LIVE_POS_FROM_REF(max_bb->end);
2410
0
    }
2411
0
  }
2412
2413
0
  if (IR_LOAD_LIVE_POS_FROM_REF(max_bb->start) > min_pos) {
2414
0
    return IR_LOAD_LIVE_POS_FROM_REF(max_bb->start);
2415
0
  } else {
2416
    // TODO: "min_bb" is in a deeper loop than "max_bb" ???
2417
0
    return max_pos;
2418
0
  }
2419
0
}
2420
2421
static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos pos)
2422
0
{
2423
0
  ir_live_interval *child;
2424
0
  ir_live_range *p, *prev;
2425
0
  ir_use_pos *use_pos, *prev_use_pos;
2426
2427
0
  IR_LOG_LSRA_SPLIT(ival, pos);
2428
0
  IR_ASSERT(pos > ival->range.start);
2429
0
  ctx->flags2 |= IR_RA_HAVE_SPLITS;
2430
2431
0
  p = &ival->range;
2432
0
  prev = NULL;
2433
0
  while (p && pos >= p->end) {
2434
0
    prev = p;
2435
0
    p = prev->next;
2436
0
  }
2437
0
  IR_ASSERT(p);
2438
2439
0
  if (pos < p->start) {
2440
    /* split between ranges */
2441
0
    pos = p->start;
2442
0
  }
2443
2444
0
  use_pos = ival->use_pos;
2445
0
  prev_use_pos = NULL;
2446
2447
0
  ival->flags &= ~(IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS);
2448
0
  if (p->start == pos) {
2449
0
    while (use_pos && pos > use_pos->pos) {
2450
0
      if (use_pos->hint != IR_REG_NONE) {
2451
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2452
0
      }
2453
0
      if (use_pos->hint_ref > 0) {
2454
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2455
0
      }
2456
0
      prev_use_pos = use_pos;
2457
0
      use_pos = use_pos->next;
2458
0
    }
2459
0
  } else {
2460
0
    while (use_pos && pos >= use_pos->pos) {
2461
0
      if (use_pos->hint != IR_REG_NONE) {
2462
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2463
0
      }
2464
0
      if (use_pos->hint_ref > 0) {
2465
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2466
0
      }
2467
0
      prev_use_pos = use_pos;
2468
0
      use_pos = use_pos->next;
2469
0
    }
2470
0
  }
2471
2472
0
  child = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
2473
0
  child->type = ival->type;
2474
0
  child->reg = IR_REG_NONE;
2475
0
  child->flags = IR_LIVE_INTERVAL_SPLIT_CHILD;
2476
0
  child->vreg = ival->vreg;
2477
0
  child->stack_spill_pos = -1; // not allocated
2478
0
  child->range.start = pos;
2479
0
  child->range.end = p->end;
2480
0
  child->range.next = p->next;
2481
0
  child->end = ival->end;
2482
0
  child->use_pos = prev_use_pos ? prev_use_pos->next : use_pos;
2483
2484
0
  child->next = ival->next;
2485
0
  ival->next = child;
2486
2487
0
  if (pos == p->start) {
2488
0
    prev->next = NULL;
2489
0
    ival->end = prev->end;
2490
    /* Cache to reuse */
2491
0
    p->next = ctx->unused_ranges;
2492
0
    ctx->unused_ranges = p;
2493
0
  } else {
2494
0
    p->end = ival->end = pos;
2495
0
    p->next = NULL;
2496
0
  }
2497
0
  if (prev_use_pos) {
2498
0
    prev_use_pos->next = NULL;
2499
0
  } else {
2500
0
    ival->use_pos = NULL;
2501
0
  }
2502
2503
0
  use_pos = child->use_pos;
2504
0
  while (use_pos) {
2505
0
    if (use_pos->hint != IR_REG_NONE) {
2506
0
      child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2507
0
    }
2508
0
    if (use_pos->hint_ref > 0) {
2509
0
      child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2510
0
    }
2511
0
    use_pos = use_pos->next;
2512
0
  }
2513
2514
0
  return child;
2515
0
}
2516
2517
static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size, ir_reg_alloc_data *data)
2518
0
{
2519
0
  int32_t ret;
2520
2521
0
  IR_ASSERT(size == 0 || size == 1 || size == 2 || size == 4 || size == 8);
2522
0
  if (data->handled && data->handled[size]) {
2523
0
    ret = data->handled[size]->stack_spill_pos;
2524
0
    data->handled[size] = data->handled[size]->list_next;
2525
0
  } else if (size == 8) {
2526
0
    ret = ctx->stack_frame_size;
2527
0
    ctx->stack_frame_size += 8;
2528
0
  } else if (size == 4) {
2529
0
    if (data->unused_slot_4) {
2530
0
      ret = data->unused_slot_4;
2531
0
      data->unused_slot_4 = 0;
2532
0
      } else if (data->handled && data->handled[8]) {
2533
0
      ret = data->handled[8]->stack_spill_pos;
2534
0
      data->handled[8] = data->handled[8]->list_next;
2535
0
      data->unused_slot_4 = ret + 4;
2536
0
    } else {
2537
0
      ret = ctx->stack_frame_size;
2538
0
      if (sizeof(void*) == 8) {
2539
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2540
0
        ctx->stack_frame_size += 8;
2541
0
      } else {
2542
0
        ctx->stack_frame_size += 4;
2543
0
      }
2544
0
    }
2545
0
  } else if (size == 2) {
2546
0
    if (data->unused_slot_2) {
2547
0
      ret = data->unused_slot_2;
2548
0
      data->unused_slot_2 = 0;
2549
0
    } else if (data->unused_slot_4) {
2550
0
      ret = data->unused_slot_4;
2551
0
      data->unused_slot_2 = data->unused_slot_4 + 2;
2552
0
      data->unused_slot_4 = 0;
2553
0
      } else if (data->handled && data->handled[4]) {
2554
0
      ret = data->handled[4]->stack_spill_pos;
2555
0
      data->handled[4] = data->handled[4]->list_next;
2556
0
      data->unused_slot_2 = ret + 2;
2557
0
      } else if (data->handled && data->handled[8]) {
2558
0
      ret = data->handled[8]->stack_spill_pos;
2559
0
      data->handled[8] = data->handled[8]->list_next;
2560
0
      data->unused_slot_2 = ret + 2;
2561
0
      data->unused_slot_4 = ret + 4;
2562
0
    } else {
2563
0
      ret = ctx->stack_frame_size;
2564
0
      data->unused_slot_2 = ctx->stack_frame_size + 2;
2565
0
      if (sizeof(void*) == 8) {
2566
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2567
0
        ctx->stack_frame_size += 8;
2568
0
      } else {
2569
0
        ctx->stack_frame_size += 4;
2570
0
      }
2571
0
    }
2572
0
  } else if (size == 1) {
2573
0
    if (data->unused_slot_1) {
2574
0
      ret = data->unused_slot_1;
2575
0
      data->unused_slot_1 = 0;
2576
0
    } else if (data->unused_slot_2) {
2577
0
      ret = data->unused_slot_2;
2578
0
      data->unused_slot_1 = data->unused_slot_2 + 1;
2579
0
      data->unused_slot_2 = 0;
2580
0
    } else if (data->unused_slot_4) {
2581
0
      ret = data->unused_slot_4;
2582
0
      data->unused_slot_1 = data->unused_slot_4 + 1;
2583
0
      data->unused_slot_2 = data->unused_slot_4 + 2;
2584
0
      data->unused_slot_4 = 0;
2585
0
      } else if (data->handled && data->handled[2]) {
2586
0
      ret = data->handled[2]->stack_spill_pos;
2587
0
      data->handled[2] = data->handled[2]->list_next;
2588
0
      data->unused_slot_1 = ret + 1;
2589
0
      } else if (data->handled && data->handled[4]) {
2590
0
      ret = data->handled[4]->stack_spill_pos;
2591
0
      data->handled[4] = data->handled[4]->list_next;
2592
0
      data->unused_slot_1 = ret + 1;
2593
0
      data->unused_slot_2 = ret + 2;
2594
0
      } else if (data->handled && data->handled[8]) {
2595
0
      ret = data->handled[8]->stack_spill_pos;
2596
0
      data->handled[8] = data->handled[8]->list_next;
2597
0
      data->unused_slot_1 = ret + 1;
2598
0
      data->unused_slot_2 = ret + 2;
2599
0
      data->unused_slot_4 = ret + 4;
2600
0
    } else {
2601
0
      ret = ctx->stack_frame_size;
2602
0
      data->unused_slot_1 = ctx->stack_frame_size + 1;
2603
0
      data->unused_slot_2 = ctx->stack_frame_size + 2;
2604
0
      if (sizeof(void*) == 8) {
2605
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2606
0
        ctx->stack_frame_size += 8;
2607
0
      } else {
2608
0
        ctx->stack_frame_size += 4;
2609
0
      }
2610
0
    }
2611
0
  } else {
2612
0
    ret = IR_NULL;
2613
0
  }
2614
0
  return ret;
2615
0
}
2616
2617
int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data)
2618
0
{
2619
0
  return ir_allocate_small_spill_slot(ctx, ir_type_size[type], data);
2620
0
}
2621
2622
static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size, ir_reg_alloc_data *data)
2623
0
{
2624
0
  int32_t ret;
2625
2626
0
  if (size <= 8) {
2627
0
    if (size == 3) {
2628
0
      size = 4;
2629
0
    } else if (size > 4 && size < 8) {
2630
0
      size = 8;
2631
0
    }
2632
0
    return ir_allocate_small_spill_slot(ctx, size, data);
2633
0
  }
2634
2635
  /* Align stack allocated data to 16 byte */
2636
0
  ctx->flags2 |= IR_16B_FRAME_ALIGNMENT;
2637
0
  ret = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16);
2638
0
  size = IR_ALIGNED_SIZE(size, 8);
2639
0
  ctx->stack_frame_size = ret + size;
2640
2641
0
  return ret;
2642
0
}
2643
2644
static ir_reg ir_get_first_reg_hint(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2645
0
{
2646
0
  ir_use_pos *use_pos;
2647
0
  ir_reg reg;
2648
2649
0
  use_pos = ival->use_pos;
2650
0
  while (use_pos) {
2651
0
    reg = use_pos->hint;
2652
0
    if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2653
0
      return reg;
2654
0
    }
2655
0
    use_pos = use_pos->next;
2656
0
  }
2657
2658
0
  return IR_REG_NONE;
2659
0
}
2660
2661
static ir_reg ir_try_allocate_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available, ir_live_pos *freeUntilPos)
2662
0
{
2663
0
  ir_use_pos *use_pos;
2664
0
  ir_reg reg;
2665
2666
0
  if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2667
0
    use_pos = ival->use_pos;
2668
0
    while (use_pos) {
2669
0
      reg = use_pos->hint;
2670
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2671
0
        if (ival->end <= freeUntilPos[reg]) {
2672
          /* register available for the whole interval */
2673
0
          return reg;
2674
0
        }
2675
0
      }
2676
0
      use_pos = use_pos->next;
2677
0
    }
2678
0
  }
2679
2680
0
  if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REFS) {
2681
0
    use_pos = ival->use_pos;
2682
0
    while (use_pos) {
2683
0
      if (use_pos->hint_ref > 0) {
2684
0
        reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2685
0
        if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2686
0
          if (ival->end <= freeUntilPos[reg]) {
2687
            /* register available for the whole interval */
2688
0
            return reg;
2689
0
          }
2690
0
        }
2691
0
      }
2692
0
      use_pos = use_pos->next;
2693
0
    }
2694
0
  }
2695
2696
0
  return IR_REG_NONE;
2697
0
}
2698
2699
static ir_reg ir_get_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2700
0
{
2701
0
  ir_use_pos *use_pos;
2702
0
  ir_reg reg;
2703
2704
0
  use_pos = ival->use_pos;
2705
0
  while (use_pos) {
2706
0
    reg = use_pos->hint;
2707
0
    if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2708
0
      return reg;
2709
0
    } else if (use_pos->hint_ref > 0) {
2710
0
      reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2711
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2712
0
        return reg;
2713
0
      }
2714
0
    }
2715
0
    use_pos = use_pos->next;
2716
0
  }
2717
2718
0
  return IR_REG_NONE;
2719
0
}
2720
2721
static void ir_add_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2722
0
{
2723
0
  ir_live_pos pos = ival->range.start;
2724
2725
0
  if (*unhandled == NULL
2726
0
   || pos < (*unhandled)->range.start
2727
0
   || (pos == (*unhandled)->range.start
2728
0
    && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2729
0
    && !((*unhandled)->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2730
0
   || (pos == (*unhandled)->range.start
2731
0
    && ival->vreg > (*unhandled)->vreg)) {
2732
0
    ival->list_next = *unhandled;
2733
0
    *unhandled = ival;
2734
0
  } else {
2735
0
    ir_live_interval *prev = *unhandled;
2736
2737
0
    while (prev->list_next) {
2738
0
      if (pos < prev->list_next->range.start
2739
0
       || (pos == prev->list_next->range.start
2740
0
        && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2741
0
        && !(prev->list_next->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2742
0
       || (pos == prev->list_next->range.start
2743
0
        && ival->vreg > prev->list_next->vreg)) {
2744
0
        break;
2745
0
      }
2746
0
      prev = prev->list_next;
2747
0
    }
2748
0
    ival->list_next = prev->list_next;
2749
0
    prev->list_next = ival;
2750
0
  }
2751
0
}
2752
2753
/* merge sorted lists */
2754
static void ir_merge_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2755
0
{
2756
0
  ir_live_interval **prev;
2757
0
  ir_live_pos pos;
2758
2759
0
  if (*unhandled == NULL) {
2760
0
    *unhandled = ival;
2761
0
    while (ival) {
2762
0
      ival = ival->list_next = ival->next;
2763
0
    }
2764
0
  } else {
2765
0
    prev = unhandled;
2766
0
    while (ival) {
2767
0
      pos = ival->range.start;
2768
0
      while (*prev && pos >= (*prev)->range.start) {
2769
0
        prev = &(*prev)->list_next;
2770
0
      }
2771
0
      ival->list_next = *prev;
2772
0
      *prev = ival;
2773
0
      prev = &ival->list_next;
2774
0
      ival = ival->next;
2775
0
    }
2776
0
  }
2777
0
#ifdef IR_DEBUG
2778
0
  ival = *unhandled;
2779
0
  pos = 0;
2780
2781
0
  while (ival) {
2782
0
    IR_ASSERT(ival->range.start >= pos);
2783
0
    pos = ival->range.start;
2784
0
    ival = ival->list_next;
2785
0
  }
2786
0
#endif
2787
0
}
2788
2789
static void ir_add_to_unhandled_spill(ir_live_interval **unhandled, ir_live_interval *ival)
2790
0
{
2791
0
  ir_live_pos pos = ival->range.start;
2792
2793
0
  if (*unhandled == NULL
2794
0
   || pos <= (*unhandled)->range.start) {
2795
0
    ival->list_next = *unhandled;
2796
0
    *unhandled = ival;
2797
0
  } else {
2798
0
    ir_live_interval *prev = *unhandled;
2799
2800
0
    while (prev->list_next) {
2801
0
      if (pos <= prev->list_next->range.start) {
2802
0
        break;
2803
0
      }
2804
0
      prev = prev->list_next;
2805
0
    }
2806
0
    ival->list_next = prev->list_next;
2807
0
    prev->list_next = ival;
2808
0
  }
2809
0
}
2810
2811
static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval *inactive, ir_live_interval **unhandled)
2812
0
{
2813
0
  ir_live_pos freeUntilPos[IR_REG_NUM];
2814
0
  int i, reg;
2815
0
  ir_live_pos pos, next;
2816
0
  ir_live_interval *other;
2817
0
  ir_regset available, overlapped, scratch;
2818
2819
0
  if (IR_IS_TYPE_FP(ival->type)) {
2820
0
    available = IR_REGSET_FP;
2821
    /* set freeUntilPos of all physical registers to maxInt */
2822
0
    for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
2823
0
      freeUntilPos[i] = 0x7fffffff;
2824
0
    }
2825
0
  } else {
2826
0
    available = IR_REGSET_GP;
2827
0
    if (ctx->flags & IR_USE_FRAME_POINTER) {
2828
0
      IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
2829
0
    }
2830
#if defined(IR_TARGET_X86)
2831
    if (ir_type_size[ival->type] == 1) {
2832
      /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
2833
      IR_REGSET_EXCL(available, IR_REG_RBP);
2834
      IR_REGSET_EXCL(available, IR_REG_RSI);
2835
      IR_REGSET_EXCL(available, IR_REG_RDI);
2836
    }
2837
#endif
2838
    /* set freeUntilPos of all physical registers to maxInt */
2839
0
    for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
2840
0
      freeUntilPos[i] = 0x7fffffff;
2841
0
    }
2842
0
  }
2843
2844
0
  available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
2845
2846
  /* for each interval it in active */
2847
0
  other = *active;
2848
0
  while (other) {
2849
    /* freeUntilPos[it.reg] = 0 */
2850
0
    reg = other->reg;
2851
0
    IR_ASSERT(reg >= 0);
2852
0
    if (reg >= IR_REG_SCRATCH) {
2853
0
      if (reg == IR_REG_SCRATCH) {
2854
0
        available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
2855
0
      } else {
2856
0
        IR_ASSERT(reg == IR_REG_ALL);
2857
0
        available = IR_REGSET_EMPTY;
2858
0
      }
2859
0
    } else {
2860
0
      IR_REGSET_EXCL(available, reg);
2861
0
    }
2862
0
    other = other->list_next;
2863
0
  }
2864
2865
  /* for each interval it in inactive intersecting with current
2866
   *
2867
   * This loop is not necessary for program in SSA form (see LSRA on SSA fig. 6),
2868
   * but it is still necessary after coalescing and splitting
2869
   */
2870
0
  overlapped = IR_REGSET_EMPTY;
2871
0
  other = inactive;
2872
0
  pos = ival->end;
2873
0
  while (other) {
2874
    /* freeUntilPos[it.reg] = next intersection of it with current */
2875
0
    if (other->current_range->start < pos) {
2876
0
      next = ir_ivals_overlap(&ival->range, other->current_range);
2877
0
      if (next) {
2878
0
        reg = other->reg;
2879
0
        IR_ASSERT(reg >= 0);
2880
0
        if (reg >= IR_REG_SCRATCH) {
2881
0
          ir_regset regset;
2882
2883
0
          if (reg == IR_REG_SCRATCH) {
2884
0
            regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2885
0
          } else {
2886
0
            IR_ASSERT(reg == IR_REG_ALL);
2887
0
            regset = available;
2888
0
          }
2889
0
          overlapped = IR_REGSET_UNION(overlapped, regset);
2890
0
          IR_REGSET_FOREACH(regset, reg) {
2891
0
            if (next < freeUntilPos[reg]) {
2892
0
              freeUntilPos[reg] = next;
2893
0
            }
2894
0
          } IR_REGSET_FOREACH_END();
2895
0
        } else if (IR_REGSET_IN(available, reg)) {
2896
0
          IR_REGSET_INCL(overlapped, reg);
2897
0
          if (next < freeUntilPos[reg]) {
2898
0
            freeUntilPos[reg] = next;
2899
0
          }
2900
0
        }
2901
0
      }
2902
0
    }
2903
0
    other = other->list_next;
2904
0
  }
2905
2906
0
  available = IR_REGSET_DIFFERENCE(available, overlapped);
2907
0
  if (available != IR_REGSET_EMPTY) {
2908
2909
0
    if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
2910
      /* Try to use hint */
2911
0
      reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos);
2912
0
      if (reg != IR_REG_NONE) {
2913
0
        ival->reg = reg;
2914
0
        IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (hint available without spilling)");
2915
0
        if (*unhandled && ival->end > (*unhandled)->range.start) {
2916
0
          ival->list_next = *active;
2917
0
          *active = ival;
2918
0
        }
2919
0
        return reg;
2920
0
      }
2921
0
    }
2922
2923
0
    if (ival->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) {
2924
      /* Try to reuse the register previously allocated for splited interval */
2925
0
      reg = ctx->live_intervals[ival->vreg]->reg;
2926
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2927
0
        ival->reg = reg;
2928
0
        IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling)");
2929
0
        if (*unhandled && ival->end > (*unhandled)->range.start) {
2930
0
          ival->list_next = *active;
2931
0
          *active = ival;
2932
0
        }
2933
0
        return reg;
2934
0
      }
2935
0
    }
2936
2937
    /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2938
0
    scratch = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2939
0
    if (scratch != IR_REGSET_EMPTY) {
2940
      /* prefer registers that don't conflict with the hints for the following unhandled intervals */
2941
0
      if (1) {
2942
0
        ir_regset non_conflicting = scratch;
2943
2944
0
        other = *unhandled;
2945
0
        while (other && other->range.start < ival->range.end) {
2946
0
          if (other->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2947
0
            reg = ir_get_first_reg_hint(ctx, other, non_conflicting);
2948
2949
0
            if (reg >= 0) {
2950
0
              IR_REGSET_EXCL(non_conflicting, reg);
2951
0
              if (non_conflicting == IR_REGSET_EMPTY) {
2952
0
                break;
2953
0
              }
2954
0
            }
2955
0
          }
2956
0
          other = other->list_next;
2957
0
        }
2958
0
        if (non_conflicting != IR_REGSET_EMPTY) {
2959
0
          reg = IR_REGSET_FIRST(non_conflicting);
2960
0
        } else {
2961
0
          reg = IR_REGSET_FIRST(scratch);
2962
0
        }
2963
0
      } else {
2964
0
        reg = IR_REGSET_FIRST(scratch);
2965
0
      }
2966
0
    } else {
2967
0
      reg = IR_REGSET_FIRST(available);
2968
0
    }
2969
0
    ival->reg = reg;
2970
0
    IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling)");
2971
0
    if (*unhandled && ival->end > (*unhandled)->range.start) {
2972
0
      ival->list_next = *active;
2973
0
      *active = ival;
2974
0
    }
2975
0
    return reg;
2976
0
  }
2977
2978
  /* reg = register with highest freeUntilPos */
2979
0
  reg = IR_REG_NONE;
2980
0
  pos = 0;
2981
0
  IR_REGSET_FOREACH(overlapped, i) {
2982
0
    if (freeUntilPos[i] > pos) {
2983
0
      pos = freeUntilPos[i];
2984
0
      reg = i;
2985
0
    } else if (freeUntilPos[i] == pos
2986
0
        && !IR_REGSET_IN(IR_REGSET_SCRATCH, reg)
2987
0
        && IR_REGSET_IN(IR_REGSET_SCRATCH, i)) {
2988
      /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2989
0
      pos = freeUntilPos[i];
2990
0
      reg = i;
2991
0
    }
2992
0
  } IR_REGSET_FOREACH_END();
2993
2994
0
  if (pos > ival->range.start) {
2995
    /* register available for the first part of the interval */
2996
    /* split current before freeUntilPos[reg] */
2997
0
    ir_live_pos split_pos = ir_last_use_pos_before(ival, pos,
2998
0
      IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
2999
0
    if (split_pos > ival->range.start) {
3000
0
      split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, pos, 0);
3001
0
      other = ir_split_interval_at(ctx, ival, split_pos);
3002
0
      if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
3003
0
        ir_reg pref_reg = ir_try_allocate_preferred_reg(ctx, ival, IR_REGSET_UNION(available, overlapped), freeUntilPos);
3004
3005
0
        if (pref_reg != IR_REG_NONE) {
3006
0
          ival->reg = pref_reg;
3007
0
        } else {
3008
0
          ival->reg = reg;
3009
0
        }
3010
0
      } else {
3011
0
        ival->reg = reg;
3012
0
      }
3013
0
      IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling for the first part)");
3014
0
      if (*unhandled && ival->end > (*unhandled)->range.start) {
3015
0
        ival->list_next = *active;
3016
0
        *active = ival;
3017
0
      }
3018
0
      ir_add_to_unhandled(unhandled, other);
3019
0
      IR_LOG_LSRA("      ---- Queue", other, "");
3020
0
      return reg;
3021
0
    }
3022
0
  }
3023
0
  return IR_REG_NONE;
3024
0
}
3025
3026
static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval **inactive, ir_live_interval **unhandled)
3027
0
{
3028
0
  ir_live_pos nextUsePos[IR_REG_NUM];
3029
0
  ir_live_pos blockPos[IR_REG_NUM];
3030
0
  int i, reg;
3031
0
  ir_live_pos pos, next_use_pos;
3032
0
  ir_live_interval *other, *prev;
3033
0
  ir_use_pos *use_pos;
3034
0
  ir_regset available, tmp_regset;
3035
3036
0
  if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3037
0
    use_pos = ival->use_pos;
3038
0
    while (use_pos && !(use_pos->flags & IR_USE_MUST_BE_IN_REG)) {
3039
0
      use_pos = use_pos->next;
3040
0
    }
3041
0
    if (!use_pos) {
3042
      /* spill */
3043
0
      IR_LOG_LSRA("    ---- Spill", ival, " (no use pos that must be in reg)");
3044
0
      ctx->flags2 |= IR_RA_HAVE_SPILLS;
3045
0
      return IR_REG_NONE;
3046
0
    }
3047
0
    next_use_pos = use_pos->pos;
3048
0
  } else {
3049
0
    next_use_pos = ival->range.end;
3050
0
  }
3051
3052
0
  if (IR_IS_TYPE_FP(ival->type)) {
3053
0
    available = IR_REGSET_FP;
3054
    /* set nextUsePos of all physical registers to maxInt */
3055
0
    for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
3056
0
      nextUsePos[i] = 0x7fffffff;
3057
0
      blockPos[i] = 0x7fffffff;
3058
0
    }
3059
0
  } else {
3060
0
    available = IR_REGSET_GP;
3061
0
    if (ctx->flags & IR_USE_FRAME_POINTER) {
3062
0
      IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
3063
0
    }
3064
#if defined(IR_TARGET_X86)
3065
    if (ir_type_size[ival->type] == 1) {
3066
      /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
3067
      IR_REGSET_EXCL(available, IR_REG_RBP);
3068
      IR_REGSET_EXCL(available, IR_REG_RSI);
3069
      IR_REGSET_EXCL(available, IR_REG_RDI);
3070
    }
3071
#endif
3072
    /* set nextUsePos of all physical registers to maxInt */
3073
0
    for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
3074
0
      nextUsePos[i] = 0x7fffffff;
3075
0
      blockPos[i] = 0x7fffffff;
3076
0
    }
3077
0
  }
3078
3079
0
  available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
3080
3081
0
  if (IR_REGSET_IS_EMPTY(available)) {
3082
0
    fprintf(stderr, "LSRA Internal Error: No registers available. Allocation is not possible\n");
3083
0
    IR_ASSERT(0);
3084
0
    exit(-1);
3085
0
  }
3086
3087
  /* for each interval it in active */
3088
0
  other = *active;
3089
0
  while (other) {
3090
    /* nextUsePos[it.reg] = next use of it after start of current */
3091
0
    reg = other->reg;
3092
0
    IR_ASSERT(reg >= 0);
3093
0
    if (reg >= IR_REG_SCRATCH) {
3094
0
      ir_regset regset;
3095
3096
0
      if (reg == IR_REG_SCRATCH) {
3097
0
        regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3098
0
      } else {
3099
0
        IR_ASSERT(reg == IR_REG_ALL);
3100
0
        regset = available;
3101
0
      }
3102
0
      IR_REGSET_FOREACH(regset, reg) {
3103
0
        blockPos[reg] = nextUsePos[reg] = 0;
3104
0
      } IR_REGSET_FOREACH_END();
3105
0
    } else if (IR_REGSET_IN(available, reg)) {
3106
0
      if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3107
0
        blockPos[reg] = nextUsePos[reg] = 0;
3108
0
      } else {
3109
0
        pos = ir_first_use_pos_after(other, ival->range.start,
3110
0
          IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3111
0
        if (pos < nextUsePos[reg]) {
3112
0
          nextUsePos[reg] = pos;
3113
0
        }
3114
0
      }
3115
0
    }
3116
0
    other = other->list_next;
3117
0
  }
3118
3119
  /* for each interval it in inactive intersecting with current */
3120
0
  other = *inactive;
3121
0
  while (other) {
3122
    /* freeUntilPos[it.reg] = next intersection of it with current */
3123
0
    reg = other->reg;
3124
0
    IR_ASSERT(reg >= 0);
3125
0
    if (reg >= IR_REG_SCRATCH) {
3126
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3127
3128
0
      if (overlap) {
3129
0
        ir_regset regset;
3130
3131
0
        if (reg == IR_REG_SCRATCH) {
3132
0
          regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3133
0
        } else {
3134
0
          IR_ASSERT(reg == IR_REG_ALL);
3135
0
          regset = available;
3136
0
        }
3137
0
        IR_REGSET_FOREACH(regset, reg) {
3138
0
          if (overlap < nextUsePos[reg]) {
3139
0
            nextUsePos[reg] = overlap;
3140
0
          }
3141
0
          if (overlap < blockPos[reg]) {
3142
0
            blockPos[reg] = overlap;
3143
0
          }
3144
0
        } IR_REGSET_FOREACH_END();
3145
0
      }
3146
0
    } else if (IR_REGSET_IN(available, reg)) {
3147
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3148
3149
0
      if (overlap) {
3150
0
        if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3151
0
          if (overlap < nextUsePos[reg]) {
3152
0
            nextUsePos[reg] = overlap;
3153
0
          }
3154
0
          if (overlap < blockPos[reg]) {
3155
0
            blockPos[reg] = overlap;
3156
0
          }
3157
0
        } else {
3158
0
          pos = ir_first_use_pos_after(other, ival->range.start,
3159
0
            IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3160
0
          if (pos < nextUsePos[reg]) {
3161
0
            nextUsePos[reg] = pos;
3162
0
          }
3163
0
        }
3164
0
      }
3165
0
    }
3166
0
    other = other->list_next;
3167
0
  }
3168
3169
  /* register hinting */
3170
0
  reg = IR_REG_NONE;
3171
0
  if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
3172
0
    reg = ir_get_preferred_reg(ctx, ival, available);
3173
0
  }
3174
0
  if (reg == IR_REG_NONE) {
3175
0
select_register:
3176
0
    reg = IR_REGSET_FIRST(available);
3177
0
  }
3178
3179
  /* reg = register with highest nextUsePos */
3180
0
  pos = nextUsePos[reg];
3181
0
  tmp_regset = available;
3182
0
  IR_REGSET_EXCL(tmp_regset, reg);
3183
0
  IR_REGSET_FOREACH(tmp_regset, i) {
3184
0
    if (nextUsePos[i] > pos) {
3185
0
      pos = nextUsePos[i];
3186
0
      reg = i;
3187
0
    }
3188
0
  } IR_REGSET_FOREACH_END();
3189
3190
  /* if first usage of current is after nextUsePos[reg] then */
3191
0
  if (next_use_pos > pos && !(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3192
    /* all other intervals are used before current, so it is best to spill current itself */
3193
    /* assign spill slot to current */
3194
    /* split current before its first use position that requires a register */
3195
0
    ir_live_pos split_pos;
3196
3197
0
spill_current:
3198
0
    if (next_use_pos == ival->range.start) {
3199
0
      IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
3200
      /* split right after definition */
3201
0
      split_pos = next_use_pos + 1;
3202
0
    } else {
3203
0
      split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
3204
0
    }
3205
3206
0
    if (split_pos > ival->range.start) {
3207
0
      IR_LOG_LSRA("    ---- Conflict with others", ival, " (all others are used before)");
3208
0
      other = ir_split_interval_at(ctx, ival, split_pos);
3209
0
      IR_LOG_LSRA("    ---- Spill", ival, "");
3210
0
      ir_add_to_unhandled(unhandled, other);
3211
0
      IR_LOG_LSRA("    ---- Queue", other, "");
3212
0
      return IR_REG_NONE;
3213
0
    }
3214
0
  }
3215
3216
0
  if (ival->end > blockPos[reg]) {
3217
    /* spilling make a register free only for the first part of current */
3218
0
    IR_LOG_LSRA("    ---- Conflict with others", ival, " (spilling make a register free only for the first part)");
3219
    /* split current at optimal position before block_pos[reg] */
3220
0
    ir_live_pos split_pos = ir_last_use_pos_before(ival,  blockPos[reg] + 1,
3221
0
      IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3222
0
    if (split_pos == 0) {
3223
0
      split_pos = ir_first_use_pos_after(ival, blockPos[reg],
3224
0
        IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1;
3225
0
      other = ir_split_interval_at(ctx, ival, split_pos);
3226
0
      ir_add_to_unhandled(unhandled, other);
3227
0
      IR_LOG_LSRA("      ---- Queue", other, "");
3228
0
      return IR_REG_NONE;
3229
0
    }
3230
0
    if (split_pos >= blockPos[reg]) {
3231
0
try_next_available_register:
3232
0
      IR_REGSET_EXCL(available, reg);
3233
0
      if (IR_REGSET_IS_EMPTY(available)) {
3234
0
        fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n");
3235
0
        IR_ASSERT(0);
3236
0
        exit(-1);
3237
0
      }
3238
0
      IR_LOG_LSRA("      ---- Restart", ival, "");
3239
0
      goto select_register;
3240
0
    }
3241
0
    split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, blockPos[reg], 1);
3242
0
    other = ir_split_interval_at(ctx, ival, split_pos);
3243
0
    ir_add_to_unhandled(unhandled, other);
3244
0
    IR_LOG_LSRA("      ---- Queue", other, "");
3245
0
  }
3246
3247
  /* spill intervals that currently block reg */
3248
0
  prev = NULL;
3249
0
  other = *active;
3250
0
  while (other) {
3251
0
    ir_live_pos split_pos;
3252
3253
0
    if (reg == other->reg) {
3254
      /* split active interval for reg at position */
3255
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3256
3257
0
      if (overlap) {
3258
0
        ir_live_interval *child, *child2;
3259
3260
0
        IR_ASSERT(other->type != IR_VOID);
3261
0
        IR_LOG_LSRA_CONFLICT("      ---- Conflict with active", other, overlap);
3262
3263
0
        split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3264
0
        if (split_pos == 0) {
3265
0
          split_pos = ival->range.start;
3266
0
        }
3267
0
        split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1);
3268
0
        if (split_pos > other->range.start) {
3269
0
          child = ir_split_interval_at(ctx, other, split_pos);
3270
0
          if (prev) {
3271
0
            prev->list_next = other->list_next;
3272
0
          } else {
3273
0
            *active = other->list_next;
3274
0
          }
3275
0
          IR_LOG_LSRA("      ---- Finish", other, "");
3276
0
        } else {
3277
0
          if (ir_first_use_pos(other, IR_USE_MUST_BE_IN_REG) <= other->end) {
3278
0
            if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3279
0
              next_use_pos = ir_first_use_pos(ival, IR_USE_MUST_BE_IN_REG);
3280
0
              if (next_use_pos == ival->range.start) {
3281
0
                IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
3282
                /* split right after definition */
3283
0
                split_pos = next_use_pos + 1;
3284
0
              } else {
3285
0
                split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
3286
0
              }
3287
3288
0
              if (split_pos > ival->range.start) {
3289
0
                goto spill_current;
3290
0
              }
3291
0
            }
3292
0
            goto try_next_available_register;
3293
0
          }
3294
0
          child = other;
3295
0
          other->reg = IR_REG_NONE;
3296
0
          if (prev) {
3297
0
            prev->list_next = other->list_next;
3298
0
          } else {
3299
0
            *active = other->list_next;
3300
0
          }
3301
0
          IR_LOG_LSRA("      ---- Spill and Finish", other, " (it must not be in reg)");
3302
0
        }
3303
3304
0
        split_pos = ir_first_use_pos_after(child, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; // TODO: ???
3305
0
        if (split_pos > child->range.start && split_pos < child->end) {
3306
0
          ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->range.start, split_pos, 1);
3307
0
          if (opt_split_pos > child->range.start) {
3308
0
            split_pos = opt_split_pos;
3309
0
          }
3310
0
          child2 = ir_split_interval_at(ctx, child, split_pos);
3311
0
          IR_LOG_LSRA("      ---- Spill", child, "");
3312
0
          ir_add_to_unhandled(unhandled, child2);
3313
0
          IR_LOG_LSRA("      ---- Queue", child2, "");
3314
0
        } else if (child != other) {
3315
          // TODO: this may cause endless loop
3316
0
          ir_add_to_unhandled(unhandled, child);
3317
0
          IR_LOG_LSRA("      ---- Queue", child, "");
3318
0
        }
3319
0
      }
3320
0
      break;
3321
0
    }
3322
0
    prev = other;
3323
0
    other = other->list_next;
3324
0
  }
3325
3326
  /* split any inactive interval for reg at the end of its lifetime hole */
3327
0
  other = *inactive;
3328
0
  while (other) {
3329
    /* freeUntilPos[it.reg] = next intersection of it with current */
3330
0
    if (reg == other->reg) {
3331
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3332
3333
0
      if (overlap) {
3334
0
        ir_live_interval *child;
3335
3336
0
        IR_ASSERT(other->type != IR_VOID);
3337
0
        IR_LOG_LSRA_CONFLICT("      ---- Conflict with inactive", other, overlap);
3338
        // TODO: optimal split position (this case is not tested)
3339
0
        child = ir_split_interval_at(ctx, other, overlap);
3340
        /* reset range cache */
3341
0
        other->current_range = &other->range;
3342
0
        ir_add_to_unhandled(unhandled, child);
3343
0
        IR_LOG_LSRA("      ---- Queue", child, "");
3344
0
      }
3345
0
    }
3346
0
    other = other->list_next;
3347
0
  }
3348
3349
  /* current.reg = reg */
3350
0
  ival->reg = reg;
3351
0
  IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (after splitting others)");
3352
3353
0
  if (*unhandled && ival->end > (*unhandled)->range.start) {
3354
0
    ival->list_next = *active;
3355
0
    *active = ival;
3356
0
  }
3357
0
  return reg;
3358
0
}
3359
3360
static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
3361
0
{
3362
0
  ir_block *bb = ctx->data;
3363
0
  ir_tmp_reg tmp_reg;
3364
3365
0
  if (to == 0) {
3366
0
    if (IR_IS_TYPE_INT(type)) {
3367
0
      tmp_reg.num = 0;
3368
0
      tmp_reg.type = type;
3369
0
      tmp_reg.start = IR_USE_SUB_REF;
3370
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3371
0
    } else {
3372
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
3373
0
      tmp_reg.num = 1;
3374
0
      tmp_reg.type = type;
3375
0
      tmp_reg.start = IR_USE_SUB_REF;
3376
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3377
0
    }
3378
0
  } else if (from != 0) {
3379
0
    if (IR_IS_TYPE_INT(type)) {
3380
0
      tmp_reg.num = 0;
3381
0
      tmp_reg.type = type;
3382
0
      tmp_reg.start = IR_USE_SUB_REF;
3383
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3384
0
    } else {
3385
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
3386
0
      tmp_reg.num = 1;
3387
0
      tmp_reg.type = type;
3388
0
      tmp_reg.start = IR_USE_SUB_REF;
3389
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3390
0
    }
3391
0
  } else {
3392
0
    return 1;
3393
0
  }
3394
0
  if (!ir_has_tmp(ctx, bb->end, tmp_reg.num)) {
3395
0
    ir_add_tmp(ctx, bb->end, bb->end, tmp_reg.num, tmp_reg);
3396
0
  }
3397
0
  return 1;
3398
0
}
3399
3400
static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data)
3401
0
{
3402
0
  ir_use_pos *use_pos = ival->use_pos;
3403
0
  ir_insn *insn;
3404
3405
0
  if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
3406
0
    IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0);
3407
0
    insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
3408
0
    IR_ASSERT(insn->op == IR_PARAM);
3409
0
    use_pos = use_pos->next;
3410
0
    if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
3411
0
      return 0;
3412
0
    }
3413
3414
0
    if (use_pos) {
3415
0
      ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
3416
0
      if (bb->loop_depth) {
3417
0
        return 0;
3418
0
      }
3419
0
    }
3420
3421
0
    return 1;
3422
0
  } else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) {
3423
0
    insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
3424
0
    IR_ASSERT(insn->op == IR_VLOAD);
3425
0
    IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR);
3426
0
    use_pos = use_pos->next;
3427
0
    if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
3428
0
      return 0;
3429
0
    }
3430
3431
0
    if (use_pos) {
3432
0
      ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
3433
0
      if (bb->loop_depth && bb != ir_block_from_live_pos(ctx, ival->use_pos->pos)) {
3434
0
        return 0;
3435
0
      }
3436
      /* check if VAR may be clobbered between VLOAD and use */
3437
0
      ir_use_list *use_list = &ctx->use_lists[insn->op2];
3438
0
      ir_ref n = use_list->count;
3439
0
      ir_ref *p = &ctx->use_edges[use_list->refs];
3440
0
      for (; n > 0; p++, n--) {
3441
0
        ir_ref use = *p;
3442
0
        if (ctx->ir_base[use].op == IR_VSTORE) {
3443
0
          if (use > IR_LIVE_POS_TO_REF(ival->use_pos->pos) && use < IR_LIVE_POS_TO_REF(use_pos->pos)) {
3444
0
            return 0;
3445
0
          }
3446
0
        } else if (ctx->ir_base[use].op == IR_VADDR) {
3447
0
          return 0;
3448
0
        }
3449
0
      }
3450
0
    }
3451
0
    ival->stack_spill_pos = ctx->ir_base[insn->op2].op3;
3452
3453
0
    return 1;
3454
0
  }
3455
0
  return 0;
3456
0
}
3457
3458
static void ir_assign_bound_spill_slots(ir_ctx *ctx)
3459
0
{
3460
0
  ir_hashtab_bucket *b = ctx->binding->data;
3461
0
  uint32_t n = ctx->binding->count;
3462
0
  uint32_t v;
3463
0
  ir_live_interval *ival;
3464
3465
0
  while (n > 0) {
3466
0
    v = ctx->vregs[b->key];
3467
0
    if (v) {
3468
0
      ival = ctx->live_intervals[v];
3469
0
      if (ival
3470
0
       && ival->stack_spill_pos == -1
3471
0
       && (ival->next || ival->reg == IR_REG_NONE)) {
3472
0
        IR_ASSERT(b->val < 0);
3473
        /* special spill slot */
3474
0
        ival->stack_spill_pos = -b->val;
3475
0
        ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL;
3476
0
      }
3477
0
    }
3478
0
    b++;
3479
0
    n--;
3480
0
  }
3481
0
}
3482
3483
static int ir_linear_scan(ir_ctx *ctx)
3484
0
{
3485
0
  uint32_t b;
3486
0
  ir_block *bb;
3487
0
  ir_live_interval *unhandled = NULL;
3488
0
  ir_live_interval *active = NULL;
3489
0
  ir_live_interval *inactive = NULL;
3490
0
  ir_live_interval *ival, *other, *prev;
3491
0
  int j;
3492
0
  ir_live_pos position;
3493
0
  ir_reg reg;
3494
0
  ir_reg_alloc_data data;
3495
0
  ir_ref vars = ctx->vars;
3496
3497
0
  if (!ctx->live_intervals) {
3498
0
    return 0;
3499
0
  }
3500
3501
0
  if (ctx->flags2 & IR_LR_HAVE_DESSA_MOVES) {
3502
    /* Add fixed intervals for temporary registers used for DESSA moves */
3503
0
    for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
3504
0
      IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
3505
0
      if (bb->flags & IR_BB_DESSA_MOVES) {
3506
0
        ctx->data = bb;
3507
0
        ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
3508
0
      }
3509
0
    }
3510
0
  }
3511
3512
0
  ctx->data = &data;
3513
0
  ctx->stack_frame_size = 0;
3514
0
  data.unused_slot_4 = 0;
3515
0
  data.unused_slot_2 = 0;
3516
0
  data.unused_slot_1 = 0;
3517
0
  data.handled = NULL;
3518
3519
0
  while (vars) {
3520
0
    ir_ref var = vars;
3521
0
    ir_insn *insn = &ctx->ir_base[var];
3522
3523
0
    IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA);
3524
0
    vars = insn->op3; /* list next */
3525
3526
0
    if (insn->op == IR_VAR) {
3527
0
      ir_ref slot = ir_allocate_spill_slot(ctx, insn->type, &data);;
3528
0
      ir_use_list *use_list;
3529
0
      ir_ref n, *p;
3530
3531
0
      insn->op3 = slot;
3532
0
      use_list = &ctx->use_lists[var];
3533
0
      n = use_list->count;
3534
0
      p = &ctx->use_edges[use_list->refs];
3535
0
      for (; n > 0; p++, n--) {
3536
0
        insn = &ctx->ir_base[*p];
3537
0
        if (insn->op == IR_VADDR) {
3538
0
          insn->op3 = slot;
3539
0
        }
3540
0
      }
3541
0
    } else {
3542
0
      ir_insn *val = &ctx->ir_base[insn->op2];
3543
3544
0
      IR_ASSERT(IR_IS_CONST_REF(insn->op2));
3545
0
      IR_ASSERT(IR_IS_TYPE_INT(val->type));
3546
0
      IR_ASSERT(!IR_IS_SYM_CONST(val->op));
3547
0
      IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0);
3548
0
      IR_ASSERT(val->val.i64 < 0x7fffffff);
3549
3550
0
      insn->op3 = ir_allocate_big_spill_slot(ctx, val->val.i32, &data);
3551
0
    }
3552
0
  }
3553
3554
0
  for (j = ctx->vregs_count; j != 0; j--) {
3555
0
    ival = ctx->live_intervals[j];
3556
0
    if (ival) {
3557
0
      if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD))
3558
0
          || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) {
3559
0
        ir_add_to_unhandled(&unhandled, ival);
3560
0
      }
3561
0
    }
3562
0
  }
3563
3564
0
  ival = ctx->live_intervals[0];
3565
0
  if (ival) {
3566
0
    ir_merge_to_unhandled(&unhandled, ival);
3567
0
  }
3568
3569
  /* vregs + tmp + fixed + SRATCH + ALL */
3570
0
  for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) {
3571
0
    ival = ctx->live_intervals[j];
3572
0
    if (ival) {
3573
0
      ival->current_range = &ival->range;
3574
0
      ival->list_next = inactive;
3575
0
      inactive = ival;
3576
0
    }
3577
0
  }
3578
3579
0
  ctx->flags2 &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS);
3580
3581
0
#ifdef IR_DEBUG
3582
0
  if (ctx->flags & IR_DEBUG_RA) {
3583
0
    fprintf(stderr, "----\n");
3584
0
    ir_dump_live_ranges(ctx, stderr);
3585
0
    fprintf(stderr, "---- Start LSRA\n");
3586
0
  }
3587
0
#endif
3588
3589
0
  while (unhandled) {
3590
0
    ival = unhandled;
3591
0
    ival->current_range = &ival->range;
3592
0
    unhandled = ival->list_next;
3593
0
    position = ival->range.start;
3594
3595
0
    IR_LOG_LSRA("  ---- Processing", ival, "...");
3596
3597
    /* for each interval i in active */
3598
0
    other = active;
3599
0
    prev = NULL;
3600
0
    while (other) {
3601
0
      ir_live_range *r = other->current_range;
3602
3603
0
      IR_ASSERT(r);
3604
0
      if (r->end <= position) {
3605
0
        do {
3606
0
          r = r->next;
3607
0
        } while (r && r->end <= position);
3608
0
        if (!r) {
3609
          /* move i from active to handled */
3610
0
          other = other->list_next;
3611
0
          if (prev) {
3612
0
            prev->list_next = other;
3613
0
          } else {
3614
0
            active = other;
3615
0
          }
3616
0
          continue;
3617
0
        }
3618
0
        other->current_range = r;
3619
0
      }
3620
0
      if (position < r->start) {
3621
        /* move i from active to inactive */
3622
0
        if (prev) {
3623
0
          prev->list_next = other->list_next;
3624
0
        } else {
3625
0
          active = other->list_next;
3626
0
        }
3627
0
        other->list_next = inactive;
3628
0
        inactive = other;
3629
0
      } else {
3630
0
        prev = other;
3631
0
      }
3632
0
      other = prev ? prev->list_next : active;
3633
0
    }
3634
3635
    /* for each interval i in inactive */
3636
0
    other = inactive;
3637
0
    prev = NULL;
3638
0
    while (other) {
3639
0
      ir_live_range *r = other->current_range;
3640
3641
0
      IR_ASSERT(r);
3642
0
      if (r->end <= position) {
3643
0
        do {
3644
0
          r = r->next;
3645
0
        } while (r && r->end <= position);
3646
0
        if (!r) {
3647
          /* move i from inactive to handled */
3648
0
          other = other->list_next;
3649
0
          if (prev) {
3650
0
            prev->list_next = other;
3651
0
          } else {
3652
0
            inactive = other;
3653
0
          }
3654
0
          continue;
3655
0
        }
3656
0
        other->current_range = r;
3657
0
      }
3658
0
      if (position >= r->start) {
3659
        /* move i from inactive to active */
3660
0
        if (prev) {
3661
0
          prev->list_next = other->list_next;
3662
0
        } else {
3663
0
          inactive = other->list_next;
3664
0
        }
3665
0
        other->list_next = active;
3666
0
        active = other;
3667
0
      } else {
3668
0
        prev = other;
3669
0
      }
3670
0
      other = prev ? prev->list_next : inactive;
3671
0
    }
3672
3673
0
    reg = ir_try_allocate_free_reg(ctx, ival, &active, inactive, &unhandled);
3674
0
    if (reg == IR_REG_NONE) {
3675
0
      reg = ir_allocate_blocked_reg(ctx, ival, &active, &inactive, &unhandled);
3676
0
    }
3677
0
  }
3678
3679
#if 0 //def IR_DEBUG
3680
  /* all intervals must be processed */
3681
  ival = active;
3682
  while (ival) {
3683
    IR_ASSERT(!ival->next);
3684
    ival = ival->list_next;
3685
  }
3686
  ival = inactive;
3687
  while (ival) {
3688
    IR_ASSERT(!ival->next);
3689
    ival = ival->list_next;
3690
  }
3691
#endif
3692
3693
0
  if (ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) {
3694
3695
0
    if (ctx->binding) {
3696
0
      ir_assign_bound_spill_slots(ctx);
3697
0
    }
3698
3699
    /* Use simple linear-scan (without holes) to allocate and reuse spill slots */
3700
0
    unhandled = NULL;
3701
0
    for (j = ctx->vregs_count; j != 0; j--) {
3702
0
      ival = ctx->live_intervals[j];
3703
0
      if (ival
3704
0
       && (ival->next || ival->reg == IR_REG_NONE)
3705
0
       && ival->stack_spill_pos == -1) {
3706
0
        ival->flags |= IR_LIVE_INTERVAL_SPILLED;
3707
0
        if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3708
0
          ir_live_range *r;
3709
3710
0
          other = ival;
3711
0
          while (other->next) {
3712
0
            other = other->next;
3713
0
          }
3714
0
          r = &other->range;
3715
0
          while (r->next) {
3716
0
            r = r->next;
3717
0
          }
3718
0
          ival->end = r->end;
3719
0
          ir_add_to_unhandled_spill(&unhandled, ival);
3720
0
        }
3721
0
      }
3722
0
    }
3723
3724
0
    if (unhandled) {
3725
0
      uint8_t size;
3726
0
      ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
3727
0
      ir_live_interval *old;
3728
3729
0
      data.handled = handled;
3730
0
      active = NULL;
3731
0
      while (unhandled) {
3732
0
        ival = unhandled;
3733
0
        ival->current_range = &ival->range;
3734
0
        unhandled = ival->list_next;
3735
0
        position = ival->range.start;
3736
3737
        /* for each interval i in active */
3738
0
        other = active;
3739
0
        prev = NULL;
3740
0
        while (other) {
3741
0
          if (other->end <= position) {
3742
            /* move i from active to handled */
3743
0
            if (prev) {
3744
0
              prev->list_next = other->list_next;
3745
0
            } else {
3746
0
              active = other->list_next;
3747
0
            }
3748
0
            size = ir_type_size[other->type];
3749
0
            IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3750
0
            old = handled[size];
3751
0
            while (old) {
3752
0
              if (old->stack_spill_pos == other->stack_spill_pos) {
3753
0
                break;
3754
0
              }
3755
0
              old = old->list_next;
3756
0
            }
3757
0
            if (!old) {
3758
0
              other->list_next = handled[size];
3759
0
              handled[size] = other;
3760
0
            }
3761
0
          } else {
3762
0
            prev = other;
3763
0
          }
3764
0
          other = prev ? prev->list_next : active;
3765
0
        }
3766
3767
0
        ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data);
3768
0
        if (unhandled && ival->end > unhandled->range.start) {
3769
0
          ival->list_next = active;
3770
0
          active = ival;
3771
0
        } else {
3772
0
          size = ir_type_size[ival->type];
3773
0
          IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3774
0
          old = handled[size];
3775
0
          while (old) {
3776
0
            if (old->stack_spill_pos == ival->stack_spill_pos) {
3777
0
              break;
3778
0
            }
3779
0
            old = old->list_next;
3780
0
          }
3781
0
          if (!old) {
3782
0
            ival->list_next = handled[size];
3783
0
            handled[size] = ival;
3784
0
          }
3785
0
        }
3786
0
      }
3787
0
      data.handled = NULL;
3788
0
    }
3789
0
  }
3790
3791
#ifdef IR_TARGET_X86
3792
  if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
3793
    ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data);
3794
  } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
3795
    ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data);
3796
  } else {
3797
    ctx->ret_slot = -1;
3798
  }
3799
#endif
3800
3801
0
#ifdef IR_DEBUG
3802
0
  if (ctx->flags & IR_DEBUG_RA) {
3803
0
    fprintf(stderr, "---- Finish LSRA\n");
3804
0
    ir_dump_live_ranges(ctx, stderr);
3805
0
    fprintf(stderr, "----\n");
3806
0
  }
3807
0
#endif
3808
3809
0
  return 1;
3810
0
}
3811
3812
static bool needs_spill_reload(ir_ctx *ctx, ir_live_interval *ival, uint32_t b0, ir_bitset available)
3813
0
{
3814
0
    ir_worklist worklist;
3815
0
  ir_block *bb;
3816
0
  uint32_t b, *p, n;
3817
3818
0
  ir_worklist_init(&worklist, ctx->cfg_blocks_count + 1);
3819
0
  ir_worklist_push(&worklist, b0);
3820
0
  while (ir_worklist_len(&worklist) != 0) {
3821
0
    b = ir_worklist_pop(&worklist);
3822
0
        bb = &ctx->cfg_blocks[b];
3823
0
    if (bb->flags & (IR_BB_ENTRY|IR_BB_START)) {
3824
0
      ir_worklist_free(&worklist);
3825
0
      return 1;
3826
0
    }
3827
0
    n = bb->predecessors_count;
3828
0
    for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
3829
0
      b = *p;
3830
0
      bb = &ctx->cfg_blocks[b];
3831
3832
0
      if (!ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(bb->end))) {
3833
0
        ir_worklist_free(&worklist);
3834
0
        return 1;
3835
0
      } else if (!ir_bitset_in(available, b)) {
3836
0
        ir_worklist_push(&worklist, b);
3837
0
      }
3838
0
    }
3839
0
  }
3840
0
  ir_worklist_free(&worklist);
3841
0
  return 0;
3842
0
}
3843
3844
static bool needs_spill_load(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
3845
0
{
3846
0
  if (use_pos->next
3847
0
   && use_pos->op_num == 1
3848
0
   && use_pos->next->pos == use_pos->pos
3849
0
   && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3850
    /* Support for R2 = ADD(R1, R1) */
3851
0
    use_pos = use_pos->next;
3852
0
  }
3853
0
  return use_pos->next && use_pos->next->op_num != 0;
3854
0
}
3855
3856
static void ir_set_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op, int8_t reg)
3857
0
{
3858
0
  char key[10];
3859
3860
0
  IR_ASSERT(reg != IR_REG_NONE);
3861
0
  if (!ctx->fused_regs) {
3862
0
    ctx->fused_regs = ir_mem_malloc(sizeof(ir_strtab));
3863
0
    ir_strtab_init(ctx->fused_regs, 8, 128);
3864
0
  }
3865
0
  memcpy(key, &root, sizeof(ir_ref));
3866
0
  memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
3867
0
  ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | reg);
3868
0
}
3869
3870
static void assign_regs(ir_ctx *ctx)
3871
0
{
3872
0
  ir_ref i;
3873
0
  ir_live_interval *ival, *top_ival;
3874
0
  ir_use_pos *use_pos;
3875
0
  int8_t reg, old_reg;
3876
0
  ir_ref ref;
3877
0
  ir_regset used_regs = 0;
3878
3879
0
  if (!ctx->regs) {
3880
0
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
3881
0
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
3882
0
  }
3883
3884
0
  if (!(ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) {
3885
0
    for (i = 1; i <= ctx->vregs_count; i++) {
3886
0
      ival = ctx->live_intervals[i];
3887
0
      if (ival) {
3888
0
        do {
3889
0
          if (ival->reg != IR_REG_NONE) {
3890
0
            reg = ival->reg;
3891
0
            IR_REGSET_INCL(used_regs, reg);
3892
0
            use_pos = ival->use_pos;
3893
0
            while (use_pos) {
3894
0
              ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
3895
0
              ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3896
0
              use_pos = use_pos->next;
3897
0
            }
3898
0
          }
3899
0
          ival = ival->next;
3900
0
        } while (ival);
3901
0
      }
3902
0
    }
3903
0
  } else {
3904
0
    ir_bitset available = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
3905
3906
0
    for (i = 1; i <= ctx->vregs_count; i++) {
3907
0
      top_ival = ival = ctx->live_intervals[i];
3908
0
      if (ival) {
3909
0
        if (!(ival->flags & IR_LIVE_INTERVAL_SPILLED)) {
3910
0
          do {
3911
0
            if (ival->reg != IR_REG_NONE) {
3912
0
              IR_REGSET_INCL(used_regs, ival->reg);
3913
0
              use_pos = ival->use_pos;
3914
0
              while (use_pos) {
3915
0
                reg = ival->reg;
3916
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3917
0
                if (use_pos->hint_ref < 0) {
3918
0
                  ref = -use_pos->hint_ref;
3919
0
                }
3920
0
                ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3921
3922
0
                use_pos = use_pos->next;
3923
0
              }
3924
0
            }
3925
0
            ival = ival->next;
3926
0
          } while (ival);
3927
0
        } else {
3928
0
          do {
3929
0
            if (ival->reg != IR_REG_NONE) {
3930
0
              ir_ref prev_use_ref = IR_UNUSED;
3931
3932
0
              ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3933
0
              IR_REGSET_INCL(used_regs, ival->reg);
3934
0
              use_pos = ival->use_pos;
3935
0
              while (use_pos) {
3936
0
                reg = ival->reg;
3937
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3938
                // TODO: Insert spill loads and stores in optimal positions (resolution)
3939
0
                if (use_pos->op_num == 0) {
3940
0
                  if ((ctx->ir_base[ref].op == IR_COPY
3941
0
                    || ctx->ir_base[ref].op == IR_BITCAST
3942
0
                    || ctx->ir_base[ref].op == IR_TRUNC)
3943
0
                   && !IR_IS_CONST_REF(ctx->ir_base[ref].op1)
3944
0
                   && ctx->vregs[ctx->ir_base[ref].op1] == (uint32_t)i) {
3945
                    /* register reuse */
3946
0
                    ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3947
0
                    prev_use_ref = ref;
3948
0
                    use_pos = use_pos->next;
3949
0
                    continue;
3950
0
                  }
3951
0
                  ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3952
0
                  if (ctx->ir_base[ref].op == IR_PHI) {
3953
                    /* Spilled PHI var is passed through memory */
3954
0
                    reg = IR_REG_NONE;
3955
0
                    prev_use_ref = IR_UNUSED;
3956
0
                  } else if (ctx->ir_base[ref].op == IR_PARAM
3957
0
                   && (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3958
                    /* Stack PARAM var is passed through memory */
3959
0
                    reg = IR_REG_NONE;
3960
0
                  } else {
3961
0
                    uint32_t use_b = ctx->cfg_map[ref];
3962
3963
0
                    if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
3964
0
                      ir_bitset_incl(available, use_b);
3965
0
                    }
3966
0
                    if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3967
0
                      reg |= IR_REG_SPILL_SPECIAL;
3968
0
                    } else {
3969
0
                      reg |= IR_REG_SPILL_STORE;
3970
0
                    }
3971
0
                    prev_use_ref = ref;
3972
0
                  }
3973
0
                } else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
3974
0
                 && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
3975
0
                  if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
3976
0
                   && use_pos->hint != reg
3977
//                   && ctx->ir_base[ref].op != IR_CALL
3978
//                   && ctx->ir_base[ref].op != IR_TAILCALL) {
3979
0
                   && ctx->ir_base[ref].op != IR_SNAPSHOT
3980
0
                   && !needs_spill_load(ctx, ival, use_pos)) {
3981
                    /* fuse spill load (valid only when register is not reused) */
3982
0
                    reg = IR_REG_NONE;
3983
0
                    if (use_pos->next
3984
0
                     && use_pos->op_num == 1
3985
0
                     && use_pos->next->pos == use_pos->pos
3986
0
                     && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3987
                      /* Support for R2 = BINOP(R1, R1) */
3988
0
                      if (use_pos->hint_ref < 0) {
3989
0
                        ref = -use_pos->hint_ref;
3990
0
                      }
3991
0
                      ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3992
0
                      use_pos = use_pos->next;
3993
0
                    }
3994
0
                  } else {
3995
0
                    if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3996
0
                      reg |= IR_REG_SPILL_SPECIAL;
3997
0
                    } else {
3998
0
                      reg |= IR_REG_SPILL_LOAD;
3999
0
                    }
4000
0
                    if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
4001
0
                      uint32_t use_b = ctx->cfg_map[ref];
4002
4003
0
                      if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
4004
0
                        ir_bitset_incl(available, use_b);
4005
0
                      }
4006
0
                      prev_use_ref = ref;
4007
0
                    }
4008
0
                  }
4009
0
                  if (use_pos->hint_ref < 0
4010
0
                   && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
4011
0
                    if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
4012
0
                      reg |= IR_REG_SPILL_SPECIAL;
4013
0
                    } else {
4014
0
                      reg |= IR_REG_SPILL_LOAD;
4015
0
                    }
4016
0
                    if (reg != old_reg) {
4017
0
                      IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
4018
0
                      ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
4019
0
                      ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
4020
0
                      use_pos = use_pos->next;
4021
0
                      continue;
4022
0
                    }
4023
0
                  }
4024
0
                } else if (use_pos->flags & IR_PHI_USE) {
4025
0
                  IR_ASSERT(use_pos->hint_ref < 0);
4026
0
                  IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
4027
0
                  IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
4028
0
                  if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
4029
                    /* Spilled PHI var is passed through memory */
4030
0
                    reg = IR_REG_NONE;
4031
0
                  }
4032
0
                } else if (use_pos->hint_ref < 0
4033
0
                    && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
4034
0
                  if (reg != old_reg) {
4035
0
                    IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
4036
0
                    ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
4037
0
                    ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
4038
0
                    use_pos = use_pos->next;
4039
0
                    continue;
4040
0
                  }
4041
0
                } else {
4042
                  /* reuse register without spill load */
4043
0
                }
4044
0
                if (use_pos->hint_ref < 0) {
4045
0
                  ref = -use_pos->hint_ref;
4046
0
                }
4047
0
                ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
4048
4049
0
                use_pos = use_pos->next;
4050
0
              }
4051
0
            } else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
4052
0
              use_pos = ival->use_pos;
4053
0
              while (use_pos) {
4054
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
4055
0
                if (ctx->ir_base[ref].op == IR_SNAPSHOT) {
4056
0
                  IR_ASSERT(use_pos->hint_ref >= 0);
4057
                  /* A reference to a CPU spill slot */
4058
0
                  reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER;
4059
0
                  ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
4060
0
                }
4061
0
                use_pos = use_pos->next;
4062
0
              }
4063
0
            }
4064
0
            ival = ival->next;
4065
0
          } while (ival);
4066
0
        }
4067
0
      }
4068
0
    }
4069
0
    ir_mem_free(available);
4070
0
  }
4071
4072
  /* Temporary registers */
4073
0
  ival = ctx->live_intervals[0];
4074
0
  if (ival) {
4075
0
    do {
4076
0
      IR_ASSERT(ival->reg != IR_REG_NONE);
4077
0
      IR_REGSET_INCL(used_regs, ival->reg);
4078
0
      reg = ival->reg;
4079
0
      if (ival->tmp_op_num > 0) {
4080
0
        ir_insn *insn = &ctx->ir_base[ival->tmp_ref];
4081
4082
0
        if (ival->tmp_op_num <= insn->inputs_count) {
4083
0
          ir_ref *ops = insn->ops;
4084
0
          if (IR_IS_CONST_REF(ops[ival->tmp_op_num])) {
4085
            /* constant rematerialization */
4086
0
            reg |= IR_REG_SPILL_LOAD;
4087
0
          } else if (ctx->ir_base[ops[ival->tmp_op_num]].op == IR_ALLOCA
4088
0
              || ctx->ir_base[ops[ival->tmp_op_num]].op == IR_VADDR) {
4089
            /* local address rematerialization */
4090
0
            reg |= IR_REG_SPILL_LOAD;
4091
0
          }
4092
0
        }
4093
0
      }
4094
0
      ir_set_alocated_reg(ctx, ival->tmp_ref, ival->tmp_op_num, reg);
4095
0
      ival = ival->next;
4096
0
    } while (ival);
4097
0
  }
4098
4099
0
  if (ctx->fixed_stack_frame_size != -1) {
4100
0
    ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset;
4101
0
    if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
4102
0
      ctx->used_preserved_regs)) {
4103
      // TODO: Preserved reg and fixed frame conflict ???
4104
      // IR_ASSERT(0 && "Preserved reg and fixed frame conflict");
4105
0
    }
4106
0
  } else {
4107
0
    ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset,
4108
0
      IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
4109
0
        (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED));
4110
0
  }
4111
4112
0
  ir_fix_stack_frame(ctx);
4113
0
}
4114
4115
int ir_reg_alloc(ir_ctx *ctx)
4116
0
{
4117
0
  if (ir_linear_scan(ctx)) {
4118
0
    assign_regs(ctx);
4119
0
    return 1;
4120
0
  }
4121
0
  return 0;
4122
0
}