Coverage Report

Created: 2026-06-02 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_ra.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (RA - Register Allocation, Liveness, Coalescing, SSA Deconstruction)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 *
7
 * See: "Linear Scan Register Allocation on SSA Form", Christian Wimmer and
8
 * Michael Franz, CGO'10 (2010)
9
 * See: "Optimized Interval Splitting in a Linear Scan Register Allocator",
10
 * Christian Wimmer VEE'10 (2005)
11
 */
12
13
#ifndef _GNU_SOURCE
14
# define _GNU_SOURCE
15
#endif
16
17
#include <stdlib.h>
18
#include "ir.h"
19
20
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
21
# include "ir_x86.h"
22
#elif defined(IR_TARGET_AARCH64)
23
# include "ir_aarch64.h"
24
#else
25
# error "Unknown IR target"
26
#endif
27
28
#include "ir_private.h"
29
30
int ir_regs_number(void)
31
0
{
32
0
  return IR_REG_NUM;
33
0
}
34
35
bool ir_reg_is_int(int32_t reg)
36
0
{
37
0
  IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
38
0
  return reg >= IR_REG_GP_FIRST && reg <= IR_REG_GP_LAST;
39
0
}
40
41
static int ir_assign_virtual_registers_slow(ir_ctx *ctx)
42
0
{
43
0
  uint32_t *vregs;
44
0
  uint32_t vregs_count = 0;
45
0
  uint32_t b;
46
0
  ir_ref i, n;
47
0
  ir_block *bb;
48
0
  ir_insn *insn;
49
0
  uint32_t flags;
50
51
  /* Assign unique virtual register to each data node */
52
0
  vregs = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref));
53
0
  n = 1;
54
0
  for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
55
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
56
0
    i = bb->start;
57
58
    /* skip first instruction */
59
0
    insn = ctx->ir_base + i;
60
0
    n = ir_insn_len(insn);
61
0
    i += n;
62
0
    insn += n;
63
0
    while (i < bb->end) {
64
0
      flags = ir_op_flags[insn->op];
65
0
      if (((flags & IR_OP_FLAG_DATA) && insn->op != IR_VAR && (insn->op != IR_PARAM || ctx->use_lists[i].count > 0))
66
0
       || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
67
0
        if (!ctx->rules || !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
68
0
          vregs[i] = ++vregs_count;
69
0
        }
70
0
      }
71
0
      n = ir_insn_len(insn);
72
0
      i += n;
73
0
      insn += n;
74
0
    }
75
0
  }
76
0
  ctx->vregs_count = vregs_count;
77
0
  ctx->vregs = vregs;
78
79
0
  return 1;
80
0
}
81
82
int ir_assign_virtual_registers(ir_ctx *ctx)
83
0
{
84
0
  uint32_t *vregs;
85
0
  uint32_t vregs_count = 0;
86
0
  ir_ref i;
87
0
  ir_insn *insn;
88
89
0
  if (!ctx->rules) {
90
0
    return ir_assign_virtual_registers_slow(ctx);
91
0
  }
92
93
  /* Assign unique virtual register to each rule that needs it */
94
0
  vregs = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
95
96
0
  for (i = 1, insn = &ctx->ir_base[1]; i < ctx->insns_count; i++, insn++) {
97
0
    uint32_t v = 0;
98
99
0
    if (ctx->rules[i] && !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
100
0
      uint32_t flags = ir_op_flags[insn->op];
101
102
0
      if ((flags & IR_OP_FLAG_DATA)
103
0
       || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
104
0
        v = ++vregs_count;
105
0
      }
106
0
    }
107
0
    vregs[i] = v;
108
0
  }
109
110
0
  ctx->vregs_count = vregs_count;
111
0
  ctx->vregs = vregs;
112
113
0
  return 1;
114
0
}
115
116
/* Lifetime intervals construction */
117
118
static ir_live_interval *ir_new_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
119
0
{
120
0
  ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
121
122
0
  ival->type = IR_VOID;
123
0
  ival->reg = IR_REG_NONE;
124
0
  ival->flags = 0;
125
0
  ival->vreg = v;
126
0
  ival->stack_spill_pos = -1; // not allocated
127
0
  ival->range.start = start;
128
0
  ival->range.end = ival->end = end;
129
0
  ival->range.next = NULL;
130
0
  ival->use_pos = NULL;
131
0
  ival->next = NULL;
132
133
0
  ctx->live_intervals[v] = ival;
134
0
  return ival;
135
0
}
136
137
static ir_live_interval *ir_add_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
138
0
{
139
0
  ir_live_interval *ival = ctx->live_intervals[v];
140
0
  ir_live_range *p, *q;
141
142
0
  if (!ival) {
143
0
    return ir_new_live_range(ctx, v, start, end);
144
0
  }
145
146
0
  p = &ival->range;
147
0
  if (end >= p->start) {
148
0
    ir_live_range *prev = NULL;
149
150
0
    do {
151
0
      if (p->end >= start) {
152
0
        if (start < p->start) {
153
0
          p->start = start;
154
0
        }
155
0
        if (end > p->end) {
156
          /* merge with next */
157
0
          ir_live_range *next = p->next;
158
159
0
          p->end = end;
160
0
          while (next && p->end >= next->start) {
161
0
            if (next->end > p->end) {
162
0
              p->end = next->end;
163
0
            }
164
0
            p->next = next->next;
165
            /* remember in the "unused_ranges" list */
166
0
            next->next = ctx->unused_ranges;
167
0
            ctx->unused_ranges = next;
168
0
            next = p->next;
169
0
          }
170
0
          if (!p->next) {
171
0
            ival->end = p->end;
172
0
          }
173
0
        }
174
0
        return ival;
175
0
      }
176
0
      prev = p;
177
0
      p = prev->next;
178
0
    } while (p && end >= p->start);
179
0
    if (!p) {
180
0
      ival->end = end;
181
0
    }
182
0
    if (prev) {
183
0
      if (ctx->unused_ranges) {
184
        /* reuse */
185
0
        q = ctx->unused_ranges;
186
0
        ctx->unused_ranges = q->next;
187
0
      } else {
188
0
        q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
189
0
      }
190
0
      prev->next = q;
191
0
      q->start = start;
192
0
      q->end = end;
193
0
      q->next = p;
194
0
      return ival;
195
0
    }
196
0
  }
197
198
0
  if (ctx->unused_ranges) {
199
    /* reuse */
200
0
    q = ctx->unused_ranges;
201
0
    ctx->unused_ranges = q->next;
202
0
  } else {
203
0
    q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
204
0
  }
205
0
  q->start = p->start;
206
0
  q->end = p->end;
207
0
  q->next = p->next;
208
0
  p->start = start;
209
0
  p->end = end;
210
0
  p->next = q;
211
0
  return ival;
212
0
}
213
214
IR_ALWAYS_INLINE ir_live_interval *ir_add_prev_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
215
0
{
216
0
  ir_live_interval *ival = ctx->live_intervals[v];
217
218
0
  if (ival && ival->range.start == end) {
219
0
    ival->range.start = start;
220
0
    return ival;
221
0
  }
222
0
  return ir_add_live_range(ctx, v, start, end);
223
0
}
224
225
static void ir_add_fixed_live_range(ir_ctx *ctx, ir_reg reg, ir_live_pos start, ir_live_pos end)
226
0
{
227
0
  int v = ctx->vregs_count + 1 + reg;
228
0
  ir_live_interval *ival = ctx->live_intervals[v];
229
0
  ir_live_range *q;
230
231
0
  if (!ival) {
232
0
    ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
233
0
    ival->type = IR_VOID;
234
0
    ival->reg = reg;
235
0
    ival->flags = IR_LIVE_INTERVAL_FIXED;
236
0
    ival->vreg = v;
237
0
    ival->stack_spill_pos = -1; // not allocated
238
0
    ival->range.start = start;
239
0
    ival->range.end = ival->end = end;
240
0
    ival->range.next = NULL;
241
0
    ival->use_pos = NULL;
242
0
    ival->next = NULL;
243
244
0
    ctx->live_intervals[v] = ival;
245
0
  } else if (EXPECTED(end < ival->range.start)) {
246
0
    if (ctx->unused_ranges) {
247
      /* reuse */
248
0
      q = ctx->unused_ranges;
249
0
      ctx->unused_ranges = q->next;
250
0
    } else {
251
0
      q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
252
0
    }
253
254
0
    q->start = ival->range.start;
255
0
    q->end = ival->range.end;
256
0
    q->next = ival->range.next;
257
0
    ival->range.start = start;
258
0
    ival->range.end = end;
259
0
    ival->range.next = q;
260
0
  } else if (end == ival->range.start) {
261
0
    ival->range.start = start;
262
0
  } else {
263
0
    ir_add_live_range(ctx, v, start, end);
264
0
  }
265
0
}
266
267
static void ir_add_tmp(ir_ctx *ctx, ir_ref ref, ir_ref tmp_ref, int32_t tmp_op_num, ir_tmp_reg tmp_reg)
268
0
{
269
0
  ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
270
271
0
  ival->type = tmp_reg.type;
272
0
  ival->reg = IR_REG_NONE;
273
0
  ival->flags = IR_LIVE_INTERVAL_TEMP;
274
0
  ival->tmp_ref = tmp_ref;
275
0
  ival->tmp_op_num = tmp_op_num;
276
0
  ival->range.start = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.start;
277
0
  ival->range.end = ival->end = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.end;
278
0
  ival->range.next = NULL;
279
0
  ival->use_pos = NULL;
280
281
0
  if (!ctx->live_intervals[0]) {
282
0
    ival->next = NULL;
283
0
    ctx->live_intervals[0] = ival;
284
0
  } else if (ival->range.start >= ctx->live_intervals[0]->range.start) {
285
0
    ir_live_interval *prev = ctx->live_intervals[0];
286
287
0
    while (prev->next && ival->range.start >= prev->next->range.start) {
288
0
      prev = prev->next;
289
0
    }
290
0
    ival->next = prev->next;
291
0
    prev->next = ival;
292
0
  } else {
293
0
    ir_live_interval *next = ctx->live_intervals[0];
294
295
0
    ival->next = next;
296
0
    ctx->live_intervals[0] = ival;
297
0
  }
298
0
  return;
299
0
}
300
301
static bool ir_has_tmp(ir_ctx *ctx, ir_ref ref, int32_t op_num)
302
0
{
303
0
  ir_live_interval *ival = ctx->live_intervals[0];
304
305
0
  if (ival) {
306
0
    while (ival && IR_LIVE_POS_TO_REF(ival->range.start) <= ref) {
307
0
      if (ival->tmp_ref == ref && ival->tmp_op_num == op_num) {
308
0
        return 1;
309
0
      }
310
0
      ival = ival->next;
311
0
    }
312
0
  }
313
0
  return 0;
314
0
}
315
316
static ir_live_interval *ir_fix_live_range(ir_ctx *ctx, int v, ir_live_pos old_start, ir_live_pos new_start)
317
0
{
318
0
  ir_live_interval *ival = ctx->live_intervals[v];
319
0
  ir_live_range *p = &ival->range;
320
321
#if 0
322
  while (p && p->start < old_start) {
323
    p = p->next;
324
  }
325
#endif
326
0
  IR_ASSERT(ival && p->start == old_start);
327
0
  p->start = new_start;
328
0
  return ival;
329
0
}
330
331
static void ir_add_use_pos(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
332
0
{
333
0
  ir_use_pos *p = ival->use_pos;
334
335
0
  if (EXPECTED(!p || p->pos > use_pos->pos)) {
336
0
    use_pos->next = p;
337
0
    ival->use_pos = use_pos;
338
0
  } else {
339
0
    ir_use_pos *prev;
340
341
0
    do {
342
0
      prev = p;
343
0
      p = p->next;
344
0
    } while (p && p->pos < use_pos->pos);
345
346
0
    use_pos->next = prev->next;
347
0
    prev->next = use_pos;
348
0
  }
349
0
}
350
351
352
IR_ALWAYS_INLINE void ir_add_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_reg hint, uint8_t use_flags, ir_ref hint_ref)
353
0
{
354
0
  ir_use_pos *use_pos;
355
356
0
  use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
357
0
  use_pos->op_num = op_num;
358
0
  use_pos->hint = hint;
359
0
  use_pos->flags = use_flags;
360
0
  use_pos->hint_ref = hint_ref;
361
0
  use_pos->pos = pos;
362
363
0
  if (hint != IR_REG_NONE) {
364
0
    ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
365
0
  }
366
0
  if (hint_ref > 0) {
367
0
    ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
368
0
  }
369
370
0
  ir_add_use_pos(ctx, ival, use_pos);
371
0
}
372
373
static void ir_add_phi_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_ref phi_ref)
374
0
{
375
0
  ir_use_pos *use_pos;
376
377
0
  IR_ASSERT(phi_ref > 0);
378
0
  use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
379
0
  use_pos->op_num = op_num;
380
0
  use_pos->hint = IR_REG_NONE;
381
0
  use_pos->flags = IR_PHI_USE | IR_USE_SHOULD_BE_IN_REG; // TODO: ???
382
0
  use_pos->hint_ref = -phi_ref;
383
0
  use_pos->pos = pos;
384
385
0
  ir_add_use_pos(ctx, ival, use_pos);
386
0
}
387
388
static void ir_add_hint(ir_ctx *ctx, ir_ref ref, ir_live_pos pos, ir_reg hint)
389
0
{
390
0
  ir_live_interval *ival = ctx->live_intervals[ctx->vregs[ref]];
391
392
0
  if (!(ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS)) {
393
0
    ir_use_pos *use_pos = ival->use_pos;
394
395
0
    while (use_pos) {
396
0
      if (use_pos->pos == pos) {
397
0
        if (use_pos->hint == IR_REG_NONE) {
398
0
          use_pos->hint = hint;
399
0
          ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
400
0
        }
401
0
      }
402
0
      use_pos = use_pos->next;
403
0
    }
404
0
  }
405
0
}
406
407
static void ir_hint_propagation(ir_ctx *ctx)
408
0
{
409
0
  int i;
410
0
  ir_live_interval *ival;
411
0
  ir_use_pos *use_pos;
412
0
  ir_use_pos *hint_use_pos;
413
414
0
  for (i = ctx->vregs_count; i > 0; i--) {
415
0
    ival = ctx->live_intervals[i];
416
0
    if (ival
417
0
     && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) == (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
418
0
      use_pos = ival->use_pos;
419
0
      hint_use_pos = NULL;
420
0
      while (use_pos) {
421
0
        if (use_pos->op_num == 0) {
422
0
          if (use_pos->hint_ref > 0) {
423
0
            hint_use_pos = use_pos;
424
0
          }
425
0
        } else if (use_pos->hint != IR_REG_NONE) {
426
0
          if (hint_use_pos) {
427
0
            ir_add_hint(ctx, hint_use_pos->hint_ref, hint_use_pos->pos, use_pos->hint);
428
0
            hint_use_pos = NULL;
429
0
          }
430
0
        }
431
0
        use_pos = use_pos->next;
432
0
      }
433
0
    }
434
0
  }
435
0
}
436
437
#ifdef IR_BITSET_LIVENESS
438
/* DFS + Loop-Forest livness for SSA using bitset(s) */
439
static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b)
440
{
441
  bool ok = 1;
442
  int count = 0;
443
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
444
  ir_ref i;
445
446
  IR_BITSET_FOREACH(live, len, i) {
447
    /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
448
    ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
449
    ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
450
451
    if (use_pos->op_num) {
452
      ir_ref *ops = ctx->ir_base[ref].ops;
453
      ref = ops[use_pos->op_num];
454
    }
455
456
    if (ctx->ir_base[ref].op == IR_PARAM) {
457
      continue;
458
    }
459
    if (ctx->binding) {
460
      ir_ref var = ir_binding_find(ctx, ref);
461
      if (var < 0) {
462
        /* We may load the value at OSR entry-point */
463
        if (!count) {
464
          bb->flags &= ~IR_BB_EMPTY;
465
          bb->flags |= IR_BB_OSR_ENTRY_LOADS;
466
          if (!ctx->osr_entry_loads) {
467
            list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
468
            ir_list_init(list, 16);
469
          }
470
          ir_list_push(list, b);
471
          ir_list_push(list, 0);
472
        }
473
        ir_list_push(list, ref);
474
        count++;
475
        continue;
476
      }
477
    }
478
    fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
479
    ok = 0;
480
  } IR_BITSET_FOREACH_END();
481
482
  if (!ok) {
483
    IR_ASSERT(0);
484
  }
485
  if (count) {
486
    ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
487
488
#if 0
489
    /* ENTRY "clobbers" all registers */
490
    ir_ref ref = ctx->ir_base[bb->start].op1;
491
    ir_add_fixed_live_range(ctx, IR_REG_ALL,
492
      IR_DEF_LIVE_POS_FROM_REF(ref),
493
      IR_SAVE_LIVE_POS_FROM_REF(ref));
494
#endif
495
  }
496
}
497
498
static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, ir_bitset live)
499
{
500
  ir_ref stack[4];
501
  int stack_pos = 0;
502
  ir_target_constraints constraints;
503
  ir_insn *insn;
504
  uint32_t j, n, flags, def_flags;
505
  ir_ref *p, child;
506
  uint8_t use_flags;
507
  ir_reg reg;
508
  ir_live_pos use_pos;
509
  ir_live_interval *ival;
510
511
  while (1) {
512
    IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
513
514
    if (!(ctx->rules[input] & IR_SIMPLE)) {
515
      def_flags = ir_get_target_constraints(ctx, input, &constraints);
516
      n = constraints.tmps_count;
517
      while (n > 0) {
518
        n--;
519
        if (constraints.tmp_regs[n].type) {
520
          ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
521
        } else {
522
          /* CPU specific constraints */
523
          ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
524
            IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
525
            IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
526
        }
527
      }
528
    } else {
529
      def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
530
      constraints.hints_count = 0;
531
    }
532
533
    insn = &ctx->ir_base[input];
534
    flags = ir_op_flags[insn->op];
535
    n = IR_INPUT_EDGES_COUNT(flags);
536
    j = 1;
537
    p = insn->ops + j;
538
    if (flags & IR_OP_FLAG_CONTROL) {
539
      j++;
540
      p++;
541
    }
542
    for (; j <= n; j++, p++) {
543
      IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
544
      child = *p;
545
      if (child > 0) {
546
        uint32_t v = ctx->vregs[child];
547
548
        if (v) {
549
          use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
550
          reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
551
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
552
          if (EXPECTED(reg == IR_REG_NONE)) {
553
            use_pos += IR_USE_SUB_REF;
554
          }
555
556
          if (!ir_bitset_in(live, v)) {
557
            /* live.add(opd) */
558
            ir_bitset_incl(live, v);
559
            /* intervals[opd].addRange(b.from, op.id) */
560
            ival = ir_add_live_range(ctx, v,
561
              IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
562
          } else {
563
            ival = ctx->live_intervals[v];
564
          }
565
          ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
566
        } else if (ctx->rules[child] & IR_FUSED) {
567
          IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
568
          stack[stack_pos++] = child;
569
        } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
570
          ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
571
        }
572
      }
573
    }
574
    if (!stack_pos) {
575
      break;
576
    }
577
    input = stack[--stack_pos];
578
  }
579
}
580
581
int ir_compute_live_ranges(ir_ctx *ctx)
582
{
583
  uint32_t b, i, j, k, n, succ, *p;
584
  ir_ref ref;
585
  uint32_t len;
586
  ir_insn *insn;
587
  ir_block *bb, *succ_bb;
588
#ifdef IR_DEBUG
589
  ir_bitset visited;
590
#endif
591
  ir_bitset live, bb_live;
592
  ir_bitset loops = NULL;
593
  ir_bitqueue queue;
594
  ir_live_interval *ival;
595
596
  if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
597
    return 0;
598
  }
599
600
  if (ctx->rules) {
601
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
602
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
603
  }
604
605
  /* Root of the list of IR_VARs */
606
  ctx->vars = IR_UNUSED;
607
608
  /* Compute Live Ranges */
609
  ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
610
  len = ir_bitset_len(ctx->vregs_count + 1);
611
  bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t));
612
613
  /* vregs + tmp + fixed + ALL + SCRATCH_N */
614
  ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_SET_NUM, sizeof(ir_live_interval*));
615
616
#ifdef IR_DEBUG
617
  visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
618
#endif
619
620
    if (!ctx->arena) {
621
    ctx->arena = ir_arena_create(16 * 1024);
622
  }
623
624
  /* for each basic block in reverse order */
625
  for (b = ctx->cfg_blocks_count; b > 0; b--) {
626
    bb = &ctx->cfg_blocks[b];
627
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
628
    /* for each successor of b */
629
630
#ifdef IR_DEBUG
631
    ir_bitset_incl(visited, b);
632
#endif
633
    live = bb_live + (len * b);
634
    n = bb->successors_count;
635
    if (n == 0) {
636
      ir_bitset_clear(live, len);
637
    } else {
638
      p = &ctx->cfg_edges[bb->successors];
639
      succ = *p;
640
641
#ifdef IR_DEBUG
642
      /* blocks must be ordered where all dominators of a block are before this block */
643
      IR_ASSERT(ir_bitset_in(visited, succ) || bb->loop_header == succ);
644
#endif
645
646
      /* live = union of successors.liveIn */
647
      if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
648
        ir_bitset_copy(live, bb_live + (len * succ), len);
649
      } else {
650
        IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
651
        ir_bitset_clear(live, len);
652
      }
653
      if (n > 1) {
654
        for (p++, n--; n > 0; p++, n--) {
655
          succ = *p;
656
          if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
657
            ir_bitset_union(live, bb_live + (len * succ), len);
658
          } else {
659
            IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
660
          }
661
        }
662
      }
663
664
      /* for each opd in live */
665
      IR_BITSET_FOREACH(live, len, i) {
666
        /* intervals[opd].addRange(b.from, b.to) */
667
        ir_add_prev_live_range(ctx, i,
668
          IR_START_LIVE_POS_FROM_REF(bb->start),
669
          IR_END_LIVE_POS_FROM_REF(bb->end));
670
      } IR_BITSET_FOREACH_END();
671
    }
672
673
    if (bb->successors_count == 1) {
674
      /* for each phi function phi of successor */
675
      succ = ctx->cfg_edges[bb->successors];
676
      succ_bb = &ctx->cfg_blocks[succ];
677
      if (succ_bb->flags & IR_BB_HAS_PHI) {
678
        ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
679
680
        k = ir_phi_input_number(ctx, succ_bb, b);
681
        IR_ASSERT(k != 0);
682
        for (ref = 0; ref < use_list->count; ref++) {
683
          ir_ref use = ctx->use_edges[use_list->refs + ref];
684
          insn = &ctx->ir_base[use];
685
          if (insn->op == IR_PHI) {
686
            ir_ref input = ir_insn_op(insn, k);
687
            if (input > 0) {
688
              uint32_t v = ctx->vregs[input];
689
690
              /* live.add(phi.inputOf(b)) */
691
              IR_ASSERT(v);
692
              ir_bitset_incl(live, v);
693
              /* intervals[phi.inputOf(b)].addRange(b.from, b.to) */
694
              ival = ir_add_prev_live_range(ctx, v,
695
                IR_START_LIVE_POS_FROM_REF(bb->start),
696
                IR_END_LIVE_POS_FROM_REF(bb->end));
697
              ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
698
            }
699
          }
700
        }
701
      }
702
    }
703
704
    /* for each operation op of b in reverse order */
705
    ref = bb->end;
706
    insn = &ctx->ir_base[ref];
707
    if (insn->op == IR_END || insn->op == IR_LOOP_END) {
708
      ref = ctx->prev_ref[ref];
709
    }
710
    for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
711
      uint32_t def_flags;
712
      uint32_t flags;
713
      ir_ref *p;
714
      ir_target_constraints constraints;
715
      uint32_t v;
716
717
      if (ctx->rules) {
718
        int n;
719
720
        if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
721
          if (((ctx->rules[ref] & IR_RULE_MASK) == IR_VAR
722
            || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
723
           && ctx->use_lists[ref].count > 0) {
724
            insn = &ctx->ir_base[ref];
725
            if (insn->op != IR_VADDR) {
726
              insn->op3 = ctx->vars;
727
              ctx->vars = ref;
728
            }
729
          }
730
          continue;
731
        }
732
733
        def_flags = ir_get_target_constraints(ctx, ref, &constraints);
734
        n = constraints.tmps_count;
735
        while (n > 0) {
736
          n--;
737
          if (constraints.tmp_regs[n].type) {
738
            ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
739
          } else {
740
            /* CPU specific constraints */
741
            ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
742
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
743
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
744
          }
745
        }
746
      } else {
747
        def_flags = 0;
748
        constraints.def_reg = IR_REG_NONE;
749
        constraints.hints_count = 0;
750
      }
751
752
      insn = &ctx->ir_base[ref];
753
      v = ctx->vregs[ref];
754
      if (v) {
755
        IR_ASSERT(ir_bitset_in(live, v));
756
757
        if (insn->op != IR_PHI) {
758
          ir_live_pos def_pos;
759
          ir_ref hint_ref = 0;
760
          ir_reg reg = constraints.def_reg;
761
762
          if (reg != IR_REG_NONE) {
763
            def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
764
            if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
765
              /* parameter register must be kept before it's copied */
766
              ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
767
            }
768
          } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
769
            if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
770
              hint_ref = insn->op1;
771
            }
772
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
773
          } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
774
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
775
          } else {
776
            if (insn->op == IR_PARAM) {
777
              /* We may reuse parameter stack slot for spilling */
778
              ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
779
            }
780
            def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
781
          }
782
          /* live.remove(opd) */
783
          ir_bitset_excl(live, v);
784
          /* intervals[opd].setFrom(op.id) */
785
          ival = ir_fix_live_range(ctx, v,
786
            IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
787
          ival->type = insn->type;
788
          ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
789
        } else {
790
          /* live.remove(opd) */
791
          ir_bitset_excl(live, v);
792
          /* PHIs inputs must not be processed */
793
          ival = ctx->live_intervals[v];
794
          if (UNEXPECTED(!ival)) {
795
            /* Dead PHI */
796
            ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
797
          }
798
          ival->type = insn->type;
799
          ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
800
          continue;
801
        }
802
      } else if (def_flags & IR_EXTEND_INPUTS_TO_NEXT) {
803
        ir_ref next = ir_next_control(ctx, ref);
804
        ir_live_pos use_pos;
805
806
        IR_ASSERT(insn->op == IR_SNAPSHOT);
807
        j = 2;
808
        p = insn->ops + 2;
809
        for (; j <= insn->inputs_count; j++, p++) {
810
          ir_ref input = *p;
811
          uint32_t v;
812
813
          if (input > 0) {
814
            v = ctx->vregs[input];
815
            IR_ASSERT(v);
816
            use_pos = IR_USE_LIVE_POS_FROM_REF(next);
817
            if (!ir_bitset_in(live, v)) {
818
              /* live.add(opd) */
819
              ir_bitset_incl(live, v);
820
              /* intervals[opd].addRange(b.from, op.id) */
821
              ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
822
            } else {
823
              ival = ctx->live_intervals[v];
824
            }
825
            use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
826
            ir_add_use(ctx, ival, j, use_pos, IR_REG_NONE, 0, IR_UNUSED);
827
          }
828
        }
829
        continue;
830
      }
831
832
      IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
833
      flags = ir_op_flags[insn->op];
834
      j = 1;
835
      p = insn->ops + 1;
836
      if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
837
        j++;
838
        p++;
839
      }
840
      for (; j <= insn->inputs_count; j++, p++) {
841
        ir_ref input = *p;
842
        ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
843
        ir_live_pos use_pos;
844
        ir_ref hint_ref = 0;
845
        uint32_t v;
846
847
        if (input > 0) {
848
          v = ctx->vregs[input];
849
          if (v) {
850
            use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
851
            if (reg != IR_REG_NONE) {
852
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
853
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
854
            } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
855
              if (j == 1) {
856
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
857
                IR_ASSERT(ctx->vregs[ref]);
858
                hint_ref = ref;
859
              } else if (input == insn->op1) {
860
                /* Input is the same as "op1" */
861
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
862
              }
863
            }
864
            if (!ir_bitset_in(live, v)) {
865
              /* live.add(opd) */
866
              ir_bitset_incl(live, v);
867
              /* intervals[opd].addRange(b.from, op.id) */
868
              ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
869
            } else {
870
              ival = ctx->live_intervals[v];
871
            }
872
            ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
873
          } else {
874
            if (ctx->rules) {
875
              if ((ctx->rules[input] & (IR_FUSED|IR_SKIPPED)) == IR_FUSED) {
876
                ir_add_fusion_ranges(ctx, ref, input, bb, live);
877
              } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
878
                ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
879
              }
880
            }
881
            if (reg != IR_REG_NONE) {
882
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
883
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
884
            }
885
          }
886
        } else if (reg != IR_REG_NONE) {
887
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
888
          ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
889
        }
890
      }
891
    }
892
893
    /* if b is loop header */
894
    if ((bb->flags & IR_BB_LOOP_HEADER)
895
     && !ir_bitset_empty(live, len)) {
896
      /* variables live at loop header are alive at the whole loop body */
897
      uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1);
898
      uint32_t child;
899
      ir_block *child_bb;
900
      ir_bitset child_live_in;
901
902
      if (!loops) {
903
        loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
904
        ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1);
905
      } else {
906
        ir_bitset_clear(loops, bb_set_len);
907
        ir_bitqueue_clear(&queue);
908
      }
909
      ir_bitset_incl(loops, b);
910
      child = b;
911
      do {
912
        child_bb = &ctx->cfg_blocks[child];
913
        child_live_in = bb_live + (len * child);
914
915
        IR_BITSET_FOREACH(live, len, i) {
916
          ir_bitset_incl(child_live_in, i);
917
          ir_add_live_range(ctx, i,
918
            IR_START_LIVE_POS_FROM_REF(child_bb->start),
919
            IR_END_LIVE_POS_FROM_REF(child_bb->end));
920
        } IR_BITSET_FOREACH_END();
921
922
        child = child_bb->dom_child;
923
        while (child) {
924
          child_bb = &ctx->cfg_blocks[child];
925
          if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) {
926
            ir_bitqueue_add(&queue, child);
927
            if (child_bb->flags & IR_BB_LOOP_HEADER) {
928
              ir_bitset_incl(loops, child);
929
            }
930
          }
931
          child = child_bb->dom_next_child;
932
        }
933
      } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1);
934
    }
935
  }
936
937
  if (ctx->entries) {
938
    for (i = 0; i < ctx->entries_count; i++) {
939
      b = ctx->entries[i];
940
      bb = &ctx->cfg_blocks[b];
941
      live = bb_live + (len * b);
942
      ir_add_osr_entry_loads(ctx, bb, live, len, b);
943
    }
944
    if (ctx->osr_entry_loads) {
945
      ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
946
    }
947
  }
948
949
  if (loops) {
950
    ir_mem_free(loops);
951
    ir_bitqueue_free(&queue);
952
  }
953
954
  ir_mem_free(bb_live);
955
#ifdef IR_DEBUG
956
  ir_mem_free(visited);
957
#endif
958
959
  return 1;
960
}
961
962
#else
963
/* Path exploration by definition liveness for SSA using sets represented by linked lists */
964
965
#define IS_LIVE_IN_BLOCK(v, b) \
966
0
  (live_in_block[v] == b)
967
0
#define SET_LIVE_IN_BLOCK(v, b) do { \
968
0
    live_in_block[v] = b; \
969
0
  } while (0)
970
971
/* Returns the last virtual register alive at the end of the block (it is used as an already-visited marker) */
972
IR_ALWAYS_INLINE uint32_t ir_live_out_top(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b)
973
0
{
974
#if 0
975
  return live_outs[b];
976
#else
977
0
  if (!live_outs[b]) {
978
0
    return -1;
979
0
  }
980
0
  return ir_list_at(live_lists, live_outs[b]);
981
0
#endif
982
0
}
983
984
/* Remember a virtual register alive at the end of the block */
985
IR_ALWAYS_INLINE void ir_live_out_push(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b, uint32_t v)
986
0
{
987
#if 0
988
  ir_block *bb = &ctx->cfg_blocks[b];
989
  live_outs[b] = v;
990
  ir_add_prev_live_range(ctx, v,
991
    IR_START_LIVE_POS_FROM_REF(bb->start),
992
    IR_END_LIVE_POS_FROM_REF(bb->end));
993
#else
994
0
  if (live_lists->len >= live_lists->a.size) {
995
0
    ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
996
0
  }
997
  /* Form a linked list of virtual register live at the end of the block */
998
0
  ir_list_push_unchecked(live_lists, live_outs[b]); /* push old root of the list (previous element of the list) */
999
0
  live_outs[b] = ir_list_len(live_lists);           /* remember the new root */
1000
0
  ir_list_push_unchecked(live_lists, v);            /* push a virtual register */
1001
0
#endif
1002
0
}
1003
1004
/*
1005
 * Computes live-out sets for each basic-block per variable using def-use chains.
1006
 *
1007
 * The implementation is based on algorithms 6 and 7 desriebed in
1008
 * "Computing Liveness Sets for SSA-Form Programs", Florian Brandner, Benoit Boissinot.
1009
 * Alain Darte, Benoit Dupont de Dinechin, Fabrice Rastello. TR Inria RR-7503, 2011
1010
 */
1011
static void ir_compute_live_sets(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists)
1012
0
{
1013
0
  ir_list block_queue, fuse_queue;
1014
0
  ir_ref i;
1015
1016
0
  ir_list_init(&fuse_queue, 16);
1017
0
  ir_list_init(&block_queue, 256);
1018
1019
  /* For each virtual register explore paths from all uses to definition */
1020
0
  for (i = ctx->insns_count - 1; i > 0; i--) {
1021
0
    uint32_t v = ctx->vregs[i];
1022
1023
0
    if (v) {
1024
0
      uint32_t def_block = ctx->cfg_map[i];
1025
0
      ir_use_list *use_list = &ctx->use_lists[i];
1026
0
      ir_ref *p, n = use_list->count;
1027
1028
      /* Collect all blocks where 'v' is used into a 'block_queue' */
1029
0
      for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1030
0
        ir_ref use = *p;
1031
0
        ir_insn *insn = &ctx->ir_base[use];
1032
1033
0
        if (UNEXPECTED(insn->op == IR_PHI)) {
1034
0
          ir_ref n = insn->inputs_count - 1;
1035
0
          ir_ref *p = insn->ops + 2; /* PHI data inputs */
1036
0
          ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
1037
1038
0
          for (;n > 0; p++, q++, n--) {
1039
0
            if (*p == i) {
1040
0
              uint32_t pred_block = ctx->cfg_map[*q];
1041
1042
0
              if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1043
0
                ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1044
0
                if (pred_block != def_block) {
1045
0
                  ir_list_push(&block_queue, pred_block);
1046
0
                }
1047
0
              }
1048
0
            }
1049
0
          }
1050
0
        } else if (ctx->rules && UNEXPECTED(ctx->rules[use] & IR_FUSED)) {
1051
0
          while (1) {
1052
0
            ir_use_list *use_list = &ctx->use_lists[use];
1053
0
            ir_ref *p, n = use_list->count;
1054
1055
0
            for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1056
0
              ir_ref use = *p;
1057
1058
0
              if (ctx->rules[use] & IR_FUSED) {
1059
0
                ir_list_push(&fuse_queue, use);
1060
0
              } else {
1061
0
                uint32_t use_block = ctx->cfg_map[use];
1062
1063
0
                if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1064
0
                  ir_list_push(&block_queue, use_block);
1065
0
                }
1066
0
              }
1067
0
            }
1068
0
            if (!ir_list_len(&fuse_queue)) {
1069
0
              break;
1070
0
            }
1071
0
            use = ir_list_pop(&fuse_queue);
1072
0
          }
1073
0
        } else {
1074
0
          uint32_t use_block = ctx->cfg_map[use];
1075
1076
          /* Check if the virtual register is alive at the start of 'use_block' */
1077
0
          if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1078
0
            ir_list_push(&block_queue, use_block);
1079
0
          }
1080
0
        }
1081
0
      }
1082
1083
      /* UP_AND_MARK: Traverse through predecessor blocks until we reach the block where 'v' is defined*/
1084
0
      while (ir_list_len(&block_queue)) {
1085
0
        uint32_t b = ir_list_pop(&block_queue);
1086
0
        ir_block *bb = &ctx->cfg_blocks[b];
1087
0
        uint32_t *p, n = bb->predecessors_count;
1088
1089
0
        if (bb->flags & IR_BB_ENTRY) {
1090
          /* live_in_push(ENTRY, v) */
1091
0
          ir_insn *insn = &ctx->ir_base[bb->start];
1092
1093
0
          IR_ASSERT(insn->op == IR_ENTRY);
1094
0
          IR_ASSERT(insn->op3 >= 0 && insn->op3 < (ir_ref)ctx->entries_count);
1095
0
          if (live_lists->len >= live_lists->a.size) {
1096
0
            ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
1097
0
          }
1098
0
          ir_list_push_unchecked(live_lists, live_outs[ctx->cfg_blocks_count + 1 + insn->op3]);
1099
0
          ir_list_push_unchecked(live_lists, v);
1100
0
          live_outs[ctx->cfg_blocks_count + 1 + insn->op3] = ir_list_len(live_lists) - 1;
1101
0
          continue;
1102
0
        }
1103
0
        for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
1104
0
          uint32_t pred_block = *p;
1105
1106
          /* Check if 'pred_block' wasn't traversed before */
1107
0
          if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1108
            /* Mark a virtual register 'v' alive at the end of 'pred_block' */
1109
0
            ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1110
0
            if (pred_block != def_block) {
1111
0
              ir_list_push(&block_queue, pred_block);
1112
0
            }
1113
0
          }
1114
0
        }
1115
0
      }
1116
0
    }
1117
0
  }
1118
1119
0
  ir_list_free(&block_queue);
1120
0
  ir_list_free(&fuse_queue);
1121
0
}
1122
1123
static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, uint32_t pos, ir_list *live_lists, uint32_t b)
1124
0
{
1125
0
  bool ok = 1;
1126
0
  int count = 0;
1127
0
  ir_list *list = (ir_list*)ctx->osr_entry_loads;
1128
0
  ir_ref i;
1129
1130
0
  while (pos) {
1131
0
    i = ir_list_at(live_lists, pos);
1132
0
    pos = ir_list_at(live_lists, pos - 1);
1133
1134
    /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
1135
0
    ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
1136
0
    ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
1137
1138
0
    if (use_pos->op_num) {
1139
0
      ir_ref *ops = ctx->ir_base[ref].ops;
1140
0
      ref = ops[use_pos->op_num];
1141
0
    }
1142
1143
0
    if (ctx->ir_base[ref].op == IR_PARAM) {
1144
0
      continue;
1145
0
    }
1146
0
    if (ctx->binding) {
1147
0
      ir_ref var = ir_binding_find(ctx, ref);
1148
0
      if (var < 0) {
1149
        /* We may load the value at OSR entry-point */
1150
0
        if (!count) {
1151
0
          bb->flags &= ~IR_BB_EMPTY;
1152
0
          bb->flags |= IR_BB_OSR_ENTRY_LOADS;
1153
0
          if (!ctx->osr_entry_loads) {
1154
0
            list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
1155
0
            ir_list_init(list, 16);
1156
0
          }
1157
0
          ir_list_push(list, b);
1158
0
          ir_list_push(list, 0);
1159
0
        }
1160
0
        ir_list_push(list, ref);
1161
0
        count++;
1162
0
        continue;
1163
0
      }
1164
0
    }
1165
0
    fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
1166
0
    ok = 0;
1167
0
  }
1168
1169
0
  if (!ok) {
1170
0
    IR_ASSERT(0);
1171
0
  }
1172
0
  if (count) {
1173
0
    ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
1174
1175
#if 0
1176
    /* ENTRY "clobbers" all registers */
1177
    ir_ref ref = ctx->ir_base[bb->start].op1;
1178
    ir_add_fixed_live_range(ctx, IR_REG_ALL,
1179
      IR_DEF_LIVE_POS_FROM_REF(ref),
1180
      IR_SAVE_LIVE_POS_FROM_REF(ref));
1181
#endif
1182
0
  }
1183
0
}
1184
1185
static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, uint32_t *live_in_block, uint32_t b)
1186
0
{
1187
0
  ir_ref stack[4];
1188
0
  int stack_pos = 0;
1189
0
  ir_target_constraints constraints;
1190
0
  ir_insn *insn;
1191
0
  uint32_t j, n, flags, def_flags;
1192
0
  ir_ref *p, child;
1193
0
  uint8_t use_flags;
1194
0
  ir_reg reg;
1195
0
  ir_live_pos pos = IR_START_LIVE_POS_FROM_REF(ref);
1196
0
  ir_live_pos use_pos;
1197
0
  ir_live_interval *ival;
1198
1199
0
  while (1) {
1200
0
    IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
1201
1202
0
    if (!(ctx->rules[input] & IR_SIMPLE)) {
1203
0
      def_flags = ir_get_target_constraints(ctx, input, &constraints);
1204
0
      n = constraints.tmps_count;
1205
0
      while (n > 0) {
1206
0
        n--;
1207
0
        if (constraints.tmp_regs[n].type) {
1208
0
          ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1209
0
        } else {
1210
          /* CPU specific constraints */
1211
0
          ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1212
0
            pos + constraints.tmp_regs[n].start,
1213
0
            pos + constraints.tmp_regs[n].end);
1214
0
        }
1215
0
      }
1216
0
    } else {
1217
0
      def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
1218
0
      constraints.hints_count = 0;
1219
0
    }
1220
1221
0
    insn = &ctx->ir_base[input];
1222
0
    flags = ir_op_flags[insn->op];
1223
0
    IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
1224
0
    n = IR_INPUT_EDGES_COUNT(flags);
1225
0
    j = 1;
1226
0
    p = insn->ops + j;
1227
0
    if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_PINNED)) {
1228
0
      j++;
1229
0
      p++;
1230
0
    }
1231
0
    for (; j <= n; j++, p++) {
1232
0
      IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
1233
0
      child = *p;
1234
0
      if (child > 0) {
1235
0
        uint32_t v = ctx->vregs[child];
1236
1237
0
        if (v) {
1238
0
          reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1239
0
          use_pos = pos;
1240
0
          if (EXPECTED(reg == IR_REG_NONE)) {
1241
0
            use_pos += IR_USE_SUB_REF;
1242
0
          }
1243
1244
0
          if (!IS_LIVE_IN_BLOCK(v, b)) {
1245
            /* live.add(opd) */
1246
0
            SET_LIVE_IN_BLOCK(v, b);
1247
            /* intervals[opd].addRange(b.from, op.id) */
1248
0
            ival = ir_add_live_range(ctx, v,
1249
0
              IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1250
0
          } else {
1251
0
            ival = ctx->live_intervals[v];
1252
0
          }
1253
0
          use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
1254
0
          ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
1255
0
        } else if (ctx->rules[child] & IR_FUSED) {
1256
0
          IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
1257
0
          stack[stack_pos++] = child;
1258
0
        } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
1259
0
          ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
1260
0
        }
1261
0
      }
1262
0
    }
1263
0
    if (!stack_pos) {
1264
0
      break;
1265
0
    }
1266
0
    input = stack[--stack_pos];
1267
0
  }
1268
0
}
1269
1270
int ir_compute_live_ranges(ir_ctx *ctx)
1271
0
{
1272
0
  uint32_t b, i, j, k, n, succ;
1273
0
  ir_ref ref;
1274
0
  ir_insn *insn;
1275
0
  ir_block *bb, *succ_bb;
1276
0
  uint32_t *live_outs;
1277
0
  uint32_t *live_in_block;
1278
0
  ir_list live_lists;
1279
0
  ir_live_interval *ival;
1280
1281
0
  if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
1282
0
    return 0;
1283
0
  }
1284
1285
0
  if (ctx->rules) {
1286
0
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
1287
0
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
1288
0
  }
1289
1290
  /* Root of the list of IR_VARs */
1291
0
  ctx->vars = IR_UNUSED;
1292
1293
  /* Compute Live Ranges */
1294
0
  ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
1295
1296
  /* vregs + tmp + fixed + ALL + SCRATCH_N */
1297
0
  ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_SET_NUM, sizeof(ir_live_interval*));
1298
1299
0
    if (!ctx->arena) {
1300
0
    ctx->arena = ir_arena_create(16 * 1024);
1301
0
  }
1302
1303
0
  live_outs = ir_mem_calloc(ctx->cfg_blocks_count + 1 + ctx->entries_count, sizeof(uint32_t));
1304
0
  ir_list_init(&live_lists, 1024);
1305
0
  ir_compute_live_sets(ctx, live_outs, &live_lists);
1306
0
  live_in_block = ir_mem_calloc(ctx->vregs_count + 1, sizeof(uint32_t));
1307
1308
  /* for each basic block in reverse order */
1309
0
  for (b = ctx->cfg_blocks_count; b > 0; b--) {
1310
0
    bb = &ctx->cfg_blocks[b];
1311
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1312
1313
    /* For all virtual register alive at the end of the block */
1314
0
    n = live_outs[b];
1315
0
    while (n != 0) {
1316
0
      i = ir_list_at(&live_lists, n);
1317
0
      SET_LIVE_IN_BLOCK(i, b);
1318
0
      ir_add_prev_live_range(ctx, i,
1319
0
        IR_START_LIVE_POS_FROM_REF(bb->start),
1320
0
        IR_END_LIVE_POS_FROM_REF(bb->end));
1321
0
      n = ir_list_at(&live_lists, n - 1);
1322
0
    }
1323
1324
0
    if (bb->successors_count == 1) {
1325
      /* for each phi function of the successor */
1326
0
      succ = ctx->cfg_edges[bb->successors];
1327
0
      succ_bb = &ctx->cfg_blocks[succ];
1328
0
      if (succ_bb->flags & IR_BB_HAS_PHI) {
1329
0
        ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
1330
0
        ir_ref n, *p;
1331
1332
0
        k = ir_phi_input_number(ctx, succ_bb, b);
1333
0
        IR_ASSERT(k != 0);
1334
0
        n = use_list->count;
1335
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1336
0
          ir_ref use = *p;
1337
0
          insn = &ctx->ir_base[use];
1338
0
          if (insn->op == IR_PHI) {
1339
0
            ir_ref input = ir_insn_op(insn, k);
1340
0
            if (input > 0) {
1341
0
              uint32_t v = ctx->vregs[input];
1342
1343
0
              if (v) {
1344
0
                ival = ctx->live_intervals[v];
1345
0
                ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
1346
0
              }
1347
0
            }
1348
0
          }
1349
0
        }
1350
0
      }
1351
0
    }
1352
1353
    /* for each operation of the block in reverse order */
1354
0
    ref = bb->end;
1355
0
    insn = &ctx->ir_base[ref];
1356
0
    if (insn->op == IR_END || insn->op == IR_LOOP_END) {
1357
0
      ref = ctx->prev_ref[ref];
1358
0
    }
1359
0
    for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
1360
0
      uint32_t def_flags;
1361
0
      uint32_t flags;
1362
0
      ir_ref *p;
1363
0
      ir_target_constraints constraints;
1364
0
      uint32_t v;
1365
1366
0
      if (ctx->rules) {
1367
0
        int n;
1368
1369
0
        if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
1370
0
          if (((ctx->rules[ref] & IR_RULE_MASK) == IR_VAR
1371
0
            || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
1372
0
           && ctx->use_lists[ref].count > 0) {
1373
0
            insn = &ctx->ir_base[ref];
1374
0
            if (insn->op != IR_VADDR && insn->op != IR_PARAM) {
1375
0
              insn->op3 = ctx->vars;
1376
0
              ctx->vars = ref;
1377
0
            }
1378
0
          }
1379
0
          continue;
1380
0
        }
1381
1382
0
        def_flags = ir_get_target_constraints(ctx, ref, &constraints);
1383
0
        n = constraints.tmps_count;
1384
0
        while (n > 0) {
1385
0
          n--;
1386
0
          if (constraints.tmp_regs[n].type) {
1387
0
            ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1388
0
          } else {
1389
            /* CPU specific constraints */
1390
0
            ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1391
0
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
1392
0
              IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
1393
0
          }
1394
0
        }
1395
0
      } else {
1396
0
        def_flags = 0;
1397
0
        constraints.def_reg = IR_REG_NONE;
1398
0
        constraints.hints_count = 0;
1399
0
      }
1400
1401
0
      insn = &ctx->ir_base[ref];
1402
0
      v = ctx->vregs[ref];
1403
0
      if (v) {
1404
0
        if (insn->op != IR_PHI) {
1405
0
          ir_live_pos def_pos;
1406
0
          ir_ref hint_ref = 0;
1407
0
          ir_reg reg = constraints.def_reg;
1408
1409
0
          if (reg != IR_REG_NONE) {
1410
0
            def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
1411
0
            if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
1412
              /* parameter register must be kept before it's copied */
1413
0
              ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1414
0
            }
1415
0
          } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1416
0
            if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
1417
0
              hint_ref = insn->op1;
1418
0
            }
1419
0
            if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1420
0
              def_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1421
0
            } else {
1422
0
              def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1423
0
            }
1424
0
          } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1425
0
            def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1426
0
          } else {
1427
0
            if (insn->op == IR_PARAM) {
1428
              /* We may reuse parameter stack slot for spilling */
1429
0
              ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
1430
0
            }
1431
0
            def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
1432
0
          }
1433
          /* intervals[opd].setFrom(op.id) */
1434
0
          ival = ir_fix_live_range(ctx, v,
1435
0
            IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1436
0
          ival->type = insn->type;
1437
0
          ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
1438
0
        } else {
1439
          /* PHIs inputs must not be processed */
1440
0
          ival = ctx->live_intervals[v];
1441
0
          if (UNEXPECTED(!ival)) {
1442
            /* Dead PHI */
1443
0
            ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
1444
0
          }
1445
0
          ival->type = insn->type;
1446
0
          ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
1447
0
          continue;
1448
0
        }
1449
0
      } else if (def_flags & IR_EXTEND_INPUTS_TO_NEXT) {
1450
0
        ir_ref next = ir_next_control(ctx, ref);
1451
0
        ir_live_pos use_pos;
1452
1453
0
        IR_ASSERT(insn->op == IR_SNAPSHOT);
1454
0
        j = 2;
1455
0
        p = insn->ops + 2;
1456
0
        for (; j <= insn->inputs_count; j++, p++) {
1457
0
          ir_ref input = *p;
1458
0
          uint32_t v;
1459
1460
0
          if (input > 0) {
1461
0
            v = ctx->vregs[input];
1462
0
            IR_ASSERT(v);
1463
0
            use_pos = IR_USE_LIVE_POS_FROM_REF(next);
1464
0
            if (!IS_LIVE_IN_BLOCK(v, b)) {
1465
              /* live.add(opd) */
1466
0
              SET_LIVE_IN_BLOCK(v, b);
1467
              /* intervals[opd].addRange(b.from, op.id) */
1468
0
              ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1469
0
            } else {
1470
0
              ival = ctx->live_intervals[v];
1471
0
            }
1472
0
            use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1473
0
            ir_add_use(ctx, ival, j, use_pos, IR_REG_NONE, 0, IR_UNUSED);
1474
0
          }
1475
0
        }
1476
0
        continue;
1477
0
      }
1478
1479
0
      IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
1480
0
      flags = ir_op_flags[insn->op];
1481
0
      j = 1;
1482
0
      p = insn->ops + 1;
1483
0
      if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
1484
0
        j++;
1485
0
        p++;
1486
0
      }
1487
0
      for (; j <= insn->inputs_count; j++, p++) {
1488
0
        ir_ref input = *p;
1489
0
        ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1490
0
        ir_live_pos use_pos;
1491
0
        ir_ref hint_ref = 0;
1492
0
        uint32_t v;
1493
1494
0
        if (input > 0) {
1495
0
          v = ctx->vregs[input];
1496
0
          if (v) {
1497
0
            use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1498
0
            if (reg != IR_REG_NONE) {
1499
0
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1500
0
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1501
0
            } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1502
0
              if (j == 1) {
1503
0
                if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1504
0
                  use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1505
0
                } else {
1506
0
                  use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1507
0
                }
1508
0
                IR_ASSERT(ctx->vregs[ref]);
1509
0
                hint_ref = ref;
1510
0
              } else if (input == insn->op1) {
1511
                /* Input is the same as "op1" */
1512
0
                use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1513
0
              }
1514
0
            }
1515
0
            if (!IS_LIVE_IN_BLOCK(v, b)) {
1516
              /* live.add(opd) */
1517
0
              SET_LIVE_IN_BLOCK(v, b);
1518
              /* intervals[opd].addRange(b.from, op.id) */
1519
0
              ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1520
0
            } else {
1521
0
              ival = ctx->live_intervals[v];
1522
0
            }
1523
0
            ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
1524
0
          } else {
1525
0
            if (ctx->rules) {
1526
0
              if ((ctx->rules[input] & (IR_FUSED|IR_SKIPPED)) == IR_FUSED) {
1527
0
                ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b);
1528
0
              } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
1529
0
                ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
1530
0
              }
1531
0
            }
1532
0
            if (reg != IR_REG_NONE) {
1533
0
              use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1534
0
              ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1535
0
            }
1536
0
          }
1537
0
        } else if (reg != IR_REG_NONE) {
1538
0
          use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1539
0
          ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1540
0
        }
1541
0
      }
1542
0
    }
1543
0
  }
1544
1545
0
  if (ctx->entries) {
1546
0
    for (i = 0; i < ctx->entries_count; i++) {
1547
0
      b = ctx->entries[i];
1548
0
      bb = &ctx->cfg_blocks[b];
1549
0
      IR_ASSERT(bb->predecessors_count == 1);
1550
0
      ir_add_osr_entry_loads(ctx, bb, live_outs[ctx->cfg_blocks_count + 1 + i], &live_lists, b);
1551
0
    }
1552
0
    if (ctx->osr_entry_loads) {
1553
0
      ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
1554
0
    }
1555
0
  }
1556
1557
0
  ir_list_free(&live_lists);
1558
0
  ir_mem_free(live_outs);
1559
0
  ir_mem_free(live_in_block);
1560
1561
0
  return 1;
1562
0
}
1563
1564
#endif
1565
1566
/* Live Ranges coalescing */
1567
1568
static ir_live_pos ir_ivals_overlap(ir_live_range *lrg1, ir_live_range *lrg2)
1569
0
{
1570
0
  while (1) {
1571
0
    if (lrg2->start < lrg1->end) {
1572
0
      if (lrg1->start < lrg2->end) {
1573
0
        return IR_MAX(lrg1->start, lrg2->start);
1574
0
      } else {
1575
0
        lrg2 = lrg2->next;
1576
0
        if (!lrg2) {
1577
0
          return 0;
1578
0
        }
1579
0
      }
1580
0
    } else {
1581
0
      lrg1 = lrg1->next;
1582
0
      if (!lrg1) {
1583
0
        return 0;
1584
0
      }
1585
0
    }
1586
0
  }
1587
0
}
1588
1589
static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1590
0
{
1591
0
  ir_live_interval *ival1 = ctx->live_intervals[r1];
1592
0
  ir_live_interval *ival2 = ctx->live_intervals[r2];
1593
1594
#if 0
1595
  if (ival2->range.start >= ival1->end
1596
   || ival1->range.start >= ival2->end) {
1597
    return 0;
1598
  }
1599
#endif
1600
0
  return ir_ivals_overlap(&ival1->range, &ival2->range);
1601
0
}
1602
1603
static bool ir_ivals_inside(ir_live_range *parent, ir_live_range *child)
1604
0
{
1605
0
  do {
1606
0
    while (parent && parent->end < child->start) {
1607
0
      parent = parent->next;
1608
0
    }
1609
0
    if (!parent || parent->start > child->start || parent->end < child->end) {
1610
0
      return 0;
1611
0
    }
1612
0
    child = child->next;
1613
0
  } while (child);
1614
0
  return 1;
1615
0
}
1616
1617
static bool ir_vregs_inside(ir_ctx *ctx, uint32_t parent, uint32_t child)
1618
0
{
1619
0
  ir_live_interval *child_ival = ctx->live_intervals[child];
1620
0
  ir_live_interval *parent_ival = ctx->live_intervals[parent];
1621
1622
0
  if ((child_ival->flags | parent_ival->flags) & IR_LIVE_INTERVAL_COALESCED) {
1623
    // TODO: Support valid cases with already coalesced "parent_ival
1624
0
    return 0;
1625
0
  }
1626
#if 0
1627
  if (child_ival->end >= parent_ival->end) {
1628
    return 0;
1629
  }
1630
#endif
1631
0
  return ir_ivals_inside(&parent_ival->range, &child_ival->range);
1632
0
}
1633
1634
static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1635
0
{
1636
0
  ir_live_interval *ival = ctx->live_intervals[r2];
1637
0
  ir_live_range *live_range = &ival->range;
1638
0
  ir_live_range *next;
1639
0
  ir_use_pos *use_pos, *next_pos, **prev;
1640
1641
#if 0
1642
  fprintf(stderr, "COALESCE %d -> %d\n", r2, r1);
1643
#endif
1644
1645
0
  ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1646
0
  live_range = live_range->next;
1647
0
  while (live_range) {
1648
0
    next = live_range->next;
1649
0
    live_range->next = ctx->unused_ranges;
1650
0
    ctx->unused_ranges = live_range;
1651
0
    ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1652
0
    live_range = next;
1653
0
  }
1654
1655
  /* merge sorted use_pos lists */
1656
0
  prev = &ctx->live_intervals[r1]->use_pos;
1657
0
  use_pos = ival->use_pos;
1658
0
  while (use_pos) {
1659
0
    if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r1) {
1660
0
      use_pos->hint_ref = 0;
1661
0
    }
1662
0
    while (*prev && ((*prev)->pos < use_pos->pos ||
1663
0
      ((*prev)->pos == use_pos->pos &&
1664
0
        (use_pos->op_num == 0 || ((*prev)->op_num != 0 && (*prev)->op_num < use_pos->op_num))))) {
1665
0
      if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) {
1666
0
        (*prev)->hint_ref = 0;
1667
0
      }
1668
0
      prev = &(*prev)->next;
1669
0
    }
1670
0
    next_pos = use_pos->next;
1671
0
    use_pos->next = *prev;
1672
0
    *prev = use_pos;
1673
0
    prev = &use_pos->next;
1674
0
      use_pos = next_pos;
1675
0
  }
1676
0
  use_pos = *prev;
1677
0
  while (use_pos) {
1678
0
    if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r2) {
1679
0
      use_pos->hint_ref = 0;
1680
0
    }
1681
0
    use_pos = use_pos->next;
1682
0
  }
1683
1684
0
  ctx->live_intervals[r1]->flags |=
1685
0
    IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS));
1686
0
  if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
1687
0
    IR_ASSERT(!(ctx->live_intervals[r1]->flags & IR_LIVE_INTERVAL_MEM_PARAM));
1688
0
    ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
1689
0
  }
1690
0
  ctx->live_intervals[r2] = NULL;
1691
1692
  // TODO: remember to reuse ???
1693
  //ir_mem_free(ival);
1694
0
}
1695
1696
static void ir_vregs_coalesce(ir_ctx *ctx, uint32_t v1, uint32_t v2, ir_ref from, ir_ref to)
1697
0
{
1698
0
  ir_ref i;
1699
0
  uint16_t f1 = ctx->live_intervals[v1]->flags;
1700
0
  uint16_t f2 = ctx->live_intervals[v2]->flags;
1701
1702
#if 0
1703
  if (ctx->binding) {
1704
    ir_ref b1 = ir_binding_find(ctx, from);
1705
    ir_ref b2 = ir_binding_find(ctx, to);
1706
    IR_ASSERT(b1 == b2);
1707
  }
1708
#endif
1709
0
  if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) {
1710
0
    ir_vregs_join(ctx, v1, v2);
1711
0
    ctx->vregs[to] = v1;
1712
0
  } else if ((f2 & IR_LIVE_INTERVAL_COALESCED) && !(f1 & IR_LIVE_INTERVAL_COALESCED)) {
1713
0
    ir_vregs_join(ctx, v2, v1);
1714
0
    ctx->vregs[from] = v2;
1715
0
  } else if (from < to) {
1716
0
    ir_vregs_join(ctx, v1, v2);
1717
0
    if (f2 & IR_LIVE_INTERVAL_COALESCED) {
1718
0
      for (i = 1; i < ctx->insns_count; i++) {
1719
0
        if (ctx->vregs[i] == v2) {
1720
0
          ctx->vregs[i] = v1;
1721
0
        }
1722
0
      }
1723
0
    } else {
1724
0
      ctx->vregs[to] = v1;
1725
0
    }
1726
0
  } else {
1727
0
    ir_vregs_join(ctx, v2, v1);
1728
0
    if (f1 & IR_LIVE_INTERVAL_COALESCED) {
1729
0
      for (i = 1; i < ctx->insns_count; i++) {
1730
0
        if (ctx->vregs[i] == v1) {
1731
0
          ctx->vregs[i] = v2;
1732
0
        }
1733
0
      }
1734
0
    } else {
1735
0
      ctx->vregs[from] = v2;
1736
0
    }
1737
0
  }
1738
0
}
1739
1740
static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to)
1741
0
{
1742
0
  if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) {
1743
0
    ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY;
1744
0
    ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES;
1745
0
    ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
1746
#if 0
1747
    fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to);
1748
#endif
1749
0
  }
1750
0
}
1751
1752
typedef struct _ir_coalesce_block {
1753
  uint32_t b;
1754
  uint32_t loop_depth;
1755
} ir_coalesce_block;
1756
1757
static int ir_block_cmp(const void *b1, const void *b2)
1758
0
{
1759
0
  ir_coalesce_block *d1 = (ir_coalesce_block*)b1;
1760
0
  ir_coalesce_block *d2 = (ir_coalesce_block*)b2;
1761
1762
0
  if (d1->loop_depth > d2->loop_depth) {
1763
0
    return -1;
1764
0
  } else if (d1->loop_depth == d2->loop_depth) {
1765
0
    if (d1->b < d2->b) {
1766
0
      return -1;
1767
0
    } else {
1768
0
      return 1;
1769
0
    }
1770
0
  } else {
1771
0
    return 1;
1772
0
  }
1773
0
}
1774
1775
static void ir_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1776
0
{
1777
0
  ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1778
0
  ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1779
0
  ir_live_interval *ival;
1780
0
  ir_live_range *r;
1781
0
  ir_use_pos *p, *p1 = NULL, *p2 = NULL;
1782
0
  ir_ref tmp;
1783
1784
0
  tmp = insn->op1;
1785
0
  insn->op1 = insn->op2;
1786
0
  insn->op2 = tmp;
1787
1788
0
  ival = ctx->live_intervals[ctx->vregs[insn->op1]];
1789
0
  p = ival->use_pos;
1790
0
  while (p) {
1791
0
    if (p->pos == pos) {
1792
0
      p->pos = load_pos;
1793
0
      p->op_num = 1;
1794
0
      p1 = p;
1795
0
      break;
1796
0
    }
1797
0
    p = p->next;
1798
0
  }
1799
1800
0
  ival = ctx->live_intervals[ctx->vregs[i]];
1801
0
  p = ival->use_pos;
1802
0
  while (p) {
1803
0
    if (p->pos == load_pos) {
1804
0
      p->hint_ref = insn->op1;
1805
0
      break;
1806
0
    }
1807
0
    p = p->next;
1808
0
  }
1809
1810
0
  if (insn->op2 > 0 && ctx->vregs[insn->op2]) {
1811
0
    ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1812
0
    r = &ival->range;
1813
0
    while (r) {
1814
0
      if (r->end == load_pos) {
1815
0
        r->end = pos;
1816
0
        if (!r->next) {
1817
0
          ival->end = pos;
1818
0
        }
1819
0
        break;
1820
0
      }
1821
0
      r = r->next;
1822
0
    }
1823
0
    p = ival->use_pos;
1824
0
    while (p) {
1825
0
      if (p->pos == load_pos) {
1826
0
        p->pos = pos;
1827
0
        p->op_num = 2;
1828
0
        p2 = p;
1829
0
        break;
1830
0
      }
1831
0
      p = p->next;
1832
0
    }
1833
0
  }
1834
0
  if (p1 && p2) {
1835
0
    uint8_t tmp = p1->flags;
1836
0
    p1->flags = p2->flags;
1837
0
    p2->flags = tmp;
1838
0
  }
1839
0
}
1840
1841
static int ir_hint_conflict(ir_ctx *ctx, ir_ref ref, int use, int def)
1842
0
{
1843
0
  ir_use_pos *p;
1844
0
  ir_reg r1 = IR_REG_NONE;
1845
0
  ir_reg r2 = IR_REG_NONE;
1846
1847
0
  p = ctx->live_intervals[use]->use_pos;
1848
0
  while (p) {
1849
0
    if (IR_LIVE_POS_TO_REF(p->pos) == ref) {
1850
0
      break;
1851
0
    }
1852
0
    if (p->hint != IR_REG_NONE) {
1853
0
      r1 = p->hint;
1854
0
    }
1855
0
    p = p->next;
1856
0
  }
1857
1858
0
  p = ctx->live_intervals[def]->use_pos;
1859
0
  while (p) {
1860
0
    if (IR_LIVE_POS_TO_REF(p->pos) > ref) {
1861
0
      if (p->hint != IR_REG_NONE) {
1862
0
        r2 = p->hint;
1863
0
        break;
1864
0
      }
1865
0
    }
1866
0
    p = p->next;
1867
0
  }
1868
0
  return r1 != r2 && r1 != IR_REG_NONE && r2 != IR_REG_NONE;
1869
0
}
1870
1871
static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1872
0
{
1873
0
  if (ctx->vregs[insn->op1]
1874
0
   && ctx->vregs[insn->op1] != ctx->vregs[i]
1875
0
   && !ir_vregs_overlap(ctx, ctx->vregs[insn->op1], ctx->vregs[i])
1876
0
   && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op1], ctx->vregs[i])) {
1877
    /* pass */
1878
0
  } else {
1879
0
    if (ctx->vregs[insn->op2] && ctx->vregs[insn->op2] != ctx->vregs[i]) {
1880
0
      ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1881
0
      ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1882
0
      ir_live_interval *ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1883
0
      ir_live_range *r = &ival->range;
1884
1885
0
      if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ctx->use_lists[insn->op2].count == 1) {
1886
0
        return 0;
1887
0
      }
1888
0
      while (r) {
1889
0
        if (r->end == pos) {
1890
0
          r->end = load_pos;
1891
0
          if (!r->next) {
1892
0
            ival->end = load_pos;
1893
0
          }
1894
0
          if (!ir_vregs_overlap(ctx, ctx->vregs[insn->op2], ctx->vregs[i])
1895
0
           && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op2], ctx->vregs[i])) {
1896
0
            ir_swap_operands(ctx, i, insn);
1897
0
            return 1;
1898
0
          } else {
1899
0
            r->end = pos;
1900
0
            if (!r->next) {
1901
0
              ival->end = pos;
1902
0
            }
1903
0
          }
1904
0
          break;
1905
0
        }
1906
0
        r = r->next;
1907
0
      }
1908
0
    }
1909
0
  }
1910
0
  return 0;
1911
0
}
1912
1913
int ir_coalesce(ir_ctx *ctx)
1914
0
{
1915
0
  uint32_t b, n, succ, pred_b, count = 0;
1916
0
  ir_ref *p, use, input, k, j;
1917
0
  ir_block *bb, *succ_bb;
1918
0
  ir_use_list *use_list;
1919
0
  ir_insn *insn;
1920
0
  ir_bitset visited;
1921
0
  ir_coalesce_block *list;
1922
0
  bool compact = 0;
1923
1924
  /* Collect a list of blocks which are predecossors to block with phi functions */
1925
0
  list = ir_mem_malloc(sizeof(ir_coalesce_block) * ctx->cfg_blocks_count);
1926
0
  visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
1927
0
  for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
1928
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1929
0
    if (bb->flags & IR_BB_HAS_PHI) {
1930
0
      k = bb->predecessors_count;
1931
0
      if (k > 1) {
1932
0
        use_list = &ctx->use_lists[bb->start];
1933
0
        n = use_list->count;
1934
0
        IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
1935
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1936
0
          use = *p;
1937
0
          insn = &ctx->ir_base[use];
1938
0
          if (insn->op == IR_PHI) {
1939
0
            do {
1940
0
              k--;
1941
0
              pred_b = ctx->cfg_edges[bb->predecessors + k];
1942
0
              if (!ir_bitset_in(visited, pred_b)) {
1943
0
                ir_bitset_incl(visited, pred_b);
1944
0
                list[count].b = pred_b;
1945
0
                list[count].loop_depth = ctx->cfg_blocks[pred_b].loop_depth;
1946
0
                count++;
1947
0
              }
1948
0
            } while (k > 0);
1949
0
            break;
1950
0
          }
1951
0
        }
1952
0
      }
1953
0
    }
1954
0
  }
1955
0
  ir_mem_free(visited);
1956
1957
  /* Sort blocks according to their loop depth */
1958
0
  qsort(list, count, sizeof(ir_coalesce_block), ir_block_cmp);
1959
1960
0
  while (count > 0) {
1961
1962
0
    count--;
1963
0
    b = list[count].b;
1964
0
    bb = &ctx->cfg_blocks[b];
1965
0
    IR_ASSERT(bb->successors_count == 1);
1966
0
    succ = ctx->cfg_edges[bb->successors];
1967
0
    succ_bb = &ctx->cfg_blocks[succ];
1968
0
    IR_ASSERT(succ_bb->predecessors_count > 1);
1969
0
    k = ir_phi_input_number(ctx, succ_bb, b);
1970
0
    use_list = &ctx->use_lists[succ_bb->start];
1971
0
    n = use_list->count;
1972
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1973
0
      use = *p;
1974
0
      insn = &ctx->ir_base[use];
1975
0
      if (insn->op == IR_PHI) {
1976
0
        input = ir_insn_op(insn, k);
1977
0
        if (input > 0 && ctx->vregs[input]) {
1978
0
          uint32_t v1 = ctx->vregs[input];
1979
0
          uint32_t v2 = ctx->vregs[use];
1980
1981
0
          if (v1 == v2) {
1982
            /* already coalesced */
1983
0
          } else {
1984
0
            if (!ir_vregs_overlap(ctx, v1, v2)) {
1985
0
              ir_vregs_coalesce(ctx, v1, v2, input, use);
1986
0
              compact = 1;
1987
0
            } else {
1988
0
#if 1
1989
0
              if (ctx->rules && (ctx->rules[input] & IR_MAY_SWAP)) {
1990
0
                ir_insn *input_insn = &ctx->ir_base[input];
1991
1992
0
                IR_ASSERT(ir_op_flags[input_insn->op] & IR_OP_FLAG_COMMUTATIVE);
1993
0
                if (input_insn->op2 == use
1994
0
                 && input_insn->op1 != use
1995
0
                 && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG)) {
1996
0
                  ir_live_range *r = &ctx->live_intervals[v2]->range;
1997
1998
0
                  do {
1999
0
                    if (r->end == IR_USE_LIVE_POS_FROM_REF(input)) {
2000
0
                      break;
2001
0
                    }
2002
0
                    r = r->next;
2003
0
                  } while (r);
2004
0
                  if (r) {
2005
0
                    r->end = IR_LOAD_LIVE_POS_FROM_REF(input);
2006
0
                    if (!r->next) {
2007
0
                      ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input);
2008
0
                    }
2009
0
                    if (ir_vregs_overlap(ctx, v1, v2)) {
2010
0
                      r->end = IR_USE_LIVE_POS_FROM_REF(input);
2011
0
                      if (!r->next) {
2012
0
                        ctx->live_intervals[v2]->end = IR_USE_LIVE_POS_FROM_REF(input);
2013
0
                      }
2014
0
                    } else {
2015
0
                      ir_swap_operands(ctx, input, input_insn);
2016
0
                      IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2));
2017
0
                      ir_vregs_coalesce(ctx, v1, v2, input, use);
2018
0
                      compact = 1;
2019
0
                      continue;
2020
0
                    }
2021
0
                  }
2022
0
                }
2023
0
              }
2024
0
#endif
2025
0
              ir_add_phi_move(ctx, b, input, use);
2026
0
            }
2027
0
          }
2028
0
        } else {
2029
          /* Move for constant input */
2030
0
          ir_add_phi_move(ctx, b, input, use);
2031
0
        }
2032
0
      }
2033
0
    }
2034
0
  }
2035
0
  ir_mem_free(list);
2036
2037
0
  ir_hint_propagation(ctx);
2038
2039
0
  if (ctx->rules) {
2040
    /* try to swap operands of commutative instructions for better register allocation */
2041
0
    uint32_t *rule = ctx->rules + 1;
2042
0
    ir_ref i;
2043
2044
0
    for (i = 1; i < ctx->insns_count; rule++, i++) {
2045
0
      if ((*rule) & (IR_MAY_SWAP|IR_MAY_REUSE)) {
2046
0
        insn = &ctx->ir_base[i];
2047
0
        IR_ASSERT(ctx->vregs[i]);
2048
0
        if ((*rule) & IR_MAY_SWAP) {
2049
0
          IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE);
2050
0
          if (ctx->live_intervals[ctx->vregs[i]]->use_pos
2051
0
           && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG)
2052
0
           && insn->op2 > 0
2053
0
           && insn->op1 > 0
2054
0
           && insn->op1 != insn->op2) {
2055
0
            ir_try_swap_operands(ctx, i, insn);
2056
0
          }
2057
0
        } else {
2058
0
          IR_ASSERT((*rule) & IR_MAY_REUSE);
2059
0
          if (insn->op1 > 0
2060
0
           && ctx->vregs[insn->op1]
2061
0
           && ctx->vregs[i] != ctx->vregs[insn->op1]) {
2062
0
            if (ir_vregs_inside(ctx, ctx->vregs[insn->op1], ctx->vregs[i])) {
2063
0
              if (ctx->binding) {
2064
0
                ir_ref b1 = ir_binding_find(ctx, i);
2065
0
                ir_ref b2 = ir_binding_find(ctx, insn->op1);
2066
0
                if (b1 && b1 != b2) {
2067
0
                  continue;
2068
0
                }
2069
0
              }
2070
0
              ir_vregs_coalesce(ctx, ctx->vregs[i], ctx->vregs[insn->op1], i, insn->op1);
2071
0
              compact = 1;
2072
0
            }
2073
0
          }
2074
0
        }
2075
0
      }
2076
0
    }
2077
0
  }
2078
2079
0
  if (compact) {
2080
0
    ir_ref i, n;
2081
0
    uint32_t *xlat = ir_mem_malloc((ctx->vregs_count + 1) * sizeof(uint32_t));
2082
2083
0
    for (i = 1, n = 1; i <= ctx->vregs_count; i++) {
2084
0
      if (ctx->live_intervals[i]) {
2085
0
        xlat[i] = n;
2086
0
        if (i != n) {
2087
0
          ctx->live_intervals[n] = ctx->live_intervals[i];
2088
0
          ctx->live_intervals[n]->vreg = n;
2089
0
        }
2090
0
        n++;
2091
0
      }
2092
0
    }
2093
0
    n--;
2094
0
    if (n != ctx->vregs_count) {
2095
0
      j = ctx->vregs_count - n;
2096
      /* vregs + tmp + fixed + ALL + SCRATCH_N */
2097
0
      for (i = n + 1; i <= n + IR_REG_SET_NUM; i++) {
2098
0
        ctx->live_intervals[i] = ctx->live_intervals[i + j];
2099
0
        if (ctx->live_intervals[i]) {
2100
0
          ctx->live_intervals[i]->vreg = i;
2101
0
        }
2102
0
      }
2103
0
      for (j = 1; j < ctx->insns_count; j++) {
2104
0
        if (ctx->vregs[j]) {
2105
0
          ctx->vregs[j] = xlat[ctx->vregs[j]];
2106
0
        }
2107
0
      }
2108
0
      ctx->vregs_count = n;
2109
0
    }
2110
0
    ir_mem_free(xlat);
2111
0
  }
2112
2113
0
  return 1;
2114
0
}
2115
2116
/* SSA Deconstruction */
2117
2118
int ir_compute_dessa_moves(ir_ctx *ctx)
2119
0
{
2120
0
  uint32_t b, n;
2121
0
  ir_ref j, k, *p, use;
2122
0
  ir_block *bb;
2123
0
  ir_use_list *use_list;
2124
0
  ir_insn *insn;
2125
2126
0
  for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
2127
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
2128
0
    k = bb->predecessors_count;
2129
0
    if (k > 1) {
2130
0
      use_list = &ctx->use_lists[bb->start];
2131
0
      n = use_list->count;
2132
0
      if (n > 1) {
2133
0
        IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
2134
0
        k++;
2135
0
        for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2136
0
          use = *p;
2137
0
          insn = &ctx->ir_base[use];
2138
0
          if (insn->op == IR_PHI) {
2139
0
            for (j = 2; j <= k; j++) {
2140
0
              if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) {
2141
0
                int pred = ctx->cfg_edges[bb->predecessors + (j-2)];
2142
0
                ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY;
2143
0
                ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES;
2144
0
                ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
2145
0
              }
2146
0
            }
2147
0
          }
2148
0
        }
2149
0
      }
2150
0
    }
2151
0
  }
2152
0
  return 1;
2153
0
}
2154
2155
/*
2156
 * Parallel copy sequentialization algorithm
2157
 *
2158
 * The implementation is based on algorithm 1 desriebed in
2159
 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
2160
 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
2161
 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
2162
 * pp. 114-125, doi: 10.1109/CGO.2009.19.
2163
 */
2164
int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy, void *data)
2165
0
{
2166
0
  uint32_t succ, k, n = 0;
2167
0
  ir_block *bb, *succ_bb;
2168
0
  ir_use_list *use_list;
2169
0
  ir_ref *loc, *pred, *src, *dst, i, *p, ref, input;
2170
0
  ir_ref s, d;
2171
0
  ir_insn *insn;
2172
0
  uint32_t len;
2173
0
  ir_bitset todo, ready;
2174
0
  bool have_constants_or_addresses = 0;
2175
2176
0
  bb = &ctx->cfg_blocks[b];
2177
0
  if (!(bb->flags & IR_BB_DESSA_MOVES)) {
2178
0
    return 0;
2179
0
  }
2180
0
  IR_ASSERT(bb->successors_count == 1);
2181
0
  succ = ctx->cfg_edges[bb->successors];
2182
0
  succ_bb = &ctx->cfg_blocks[succ];
2183
0
  IR_ASSERT(succ_bb->predecessors_count > 1);
2184
0
  use_list = &ctx->use_lists[succ_bb->start];
2185
2186
0
  k = ir_phi_input_number(ctx, succ_bb, b);
2187
2188
0
  loc = ir_mem_malloc((ctx->vregs_count + 1) * 4 * sizeof(ir_ref));
2189
0
  pred = loc + ctx->vregs_count + 1;
2190
0
  src = pred + ctx->vregs_count + 1;
2191
0
  dst = src + ctx->vregs_count + 1;
2192
0
  len = ir_bitset_len(ctx->vregs_count + 1);
2193
0
  todo = ir_bitset_malloc(ctx->vregs_count + 1);
2194
2195
0
  for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
2196
0
    ref = *p;
2197
0
    insn = &ctx->ir_base[ref];
2198
0
    if (insn->op == IR_PHI) {
2199
0
      input = ir_insn_op(insn, k);
2200
0
      if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2201
0
        have_constants_or_addresses = 1;
2202
0
      } else if (ctx->vregs[input] != ctx->vregs[ref]) {
2203
0
        s = ctx->vregs[input];
2204
0
        d = ctx->vregs[ref];
2205
0
        src[s] = input;
2206
0
        dst[d] = ref;
2207
0
        loc[d] = pred[s] = 0;
2208
0
        ir_bitset_incl(todo, d);
2209
0
        n++;
2210
0
      }
2211
0
    }
2212
0
  }
2213
2214
0
  if (n > 0) {
2215
0
    src[0] = dst[0] = 0;
2216
0
    ready = ir_bitset_malloc(ctx->vregs_count + 1);
2217
0
    IR_BITSET_FOREACH(todo, len, d) {
2218
0
      ref = dst[d];
2219
0
      insn = &ctx->ir_base[ref];
2220
0
      IR_ASSERT(insn->op == IR_PHI);
2221
0
      input = ir_insn_op(insn, k);
2222
0
      s = ctx->vregs[input];
2223
0
      loc[s] = s;
2224
0
      pred[d] = s;
2225
0
    } IR_BITSET_FOREACH_END();
2226
2227
0
    IR_BITSET_FOREACH(todo, len, i) {
2228
0
      if (!loc[i]) {
2229
0
        ir_bitset_incl(ready, i);
2230
0
      }
2231
0
    } IR_BITSET_FOREACH_END();
2232
2233
0
    while (1) {
2234
0
      ir_ref a, b, c;
2235
2236
0
      while ((b = ir_bitset_pop_first(ready, len)) >= 0) {
2237
0
        a = pred[b];
2238
0
        c = loc[a];
2239
0
        emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b], data);
2240
0
        ir_bitset_excl(todo, b);
2241
0
        loc[a] = b;
2242
0
        src[b] = dst[b];
2243
0
        if (a == c && pred[a]) {
2244
0
          ir_bitset_incl(ready, a);
2245
0
        }
2246
0
      }
2247
0
      b = ir_bitset_pop_first(todo, len);
2248
0
      if (b < 0) {
2249
0
        break;
2250
0
      }
2251
0
      IR_ASSERT(b != loc[pred[b]]);
2252
0
      emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0, data);
2253
0
      loc[b] = 0;
2254
0
      ir_bitset_incl(ready, b);
2255
0
    }
2256
2257
0
    ir_mem_free(ready);
2258
0
  }
2259
2260
0
  ir_mem_free(todo);
2261
0
  ir_mem_free(loc);
2262
2263
0
  if (have_constants_or_addresses) {
2264
0
    for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) {
2265
0
      ref = *p;
2266
0
      insn = &ctx->ir_base[ref];
2267
0
      if (insn->op == IR_PHI) {
2268
0
        input = ir_insn_op(insn, k);
2269
0
        if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2270
0
          emit_copy(ctx, insn->type, input, ref, data);
2271
0
        }
2272
0
      }
2273
0
    }
2274
0
  }
2275
2276
0
  return 1;
2277
0
}
2278
2279
/* Linear Scan Register Allocation */
2280
2281
#ifdef IR_DEBUG
2282
0
# define IR_LOG_LSRA(action, ival, comment) do { \
2283
0
    if (ctx->flags & IR_DEBUG_RA) { \
2284
0
      ir_live_interval *_ival = (ival); \
2285
0
      ir_live_pos _start = _ival->range.start; \
2286
0
      ir_live_pos _end = _ival->end; \
2287
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \
2288
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2289
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2290
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \
2291
0
    } \
2292
0
  } while (0)
2293
0
# define IR_LOG_LSRA_ASSIGN(action, ival, comment) do { \
2294
0
    if (ctx->flags & IR_DEBUG_RA) { \
2295
0
      ir_live_interval *_ival = (ival); \
2296
0
      ir_live_pos _start = _ival->range.start; \
2297
0
      ir_live_pos _end = _ival->end; \
2298
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \
2299
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2300
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2301
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2302
0
        ir_reg_name(_ival->reg, _ival->type)); \
2303
0
    } \
2304
0
  } while (0)
2305
0
# define IR_LOG_LSRA_SPLIT(ival, pos) do { \
2306
0
    if (ctx->flags & IR_DEBUG_RA) { \
2307
0
      ir_live_interval *_ival = (ival); \
2308
0
      ir_live_pos _start = _ival->range.start; \
2309
0
      ir_live_pos _end = _ival->end; \
2310
0
      ir_live_pos _pos = (pos); \
2311
0
      fprintf(stderr, "      ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \
2312
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2313
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2314
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2315
0
        IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2316
0
    } \
2317
0
  } while (0)
2318
0
# define IR_LOG_LSRA_CONFLICT(action, ival, pos) do { \
2319
0
    if (ctx->flags & IR_DEBUG_RA) { \
2320
0
      ir_live_interval *_ival = (ival); \
2321
0
      ir_live_pos _start = _ival->range.start; \
2322
0
      ir_live_pos _end = _ival->end; \
2323
0
      ir_live_pos _pos = (pos); \
2324
0
      fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \
2325
0
        (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2326
0
        IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2327
0
        IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2328
0
        ir_reg_name(_ival->reg, _ival->type), \
2329
0
        IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2330
0
    } \
2331
0
  } while (0)
2332
#else
2333
# define IR_LOG_LSRA(action, ival, comment)
2334
# define IR_LOG_LSRA_ASSIGN(action, ival, comment)
2335
# define IR_LOG_LSRA_SPLIT(ival, pos)
2336
# define IR_LOG_LSRA_CONFLICT(action, ival, pos);
2337
#endif
2338
2339
static bool ir_ival_covers(ir_live_interval *ival, ir_live_pos position)
2340
0
{
2341
0
  ir_live_range *live_range = &ival->range;
2342
2343
0
  do {
2344
0
    if (position < live_range->end) {
2345
0
      return position >= live_range->start;
2346
0
    }
2347
0
    live_range = live_range->next;
2348
0
  } while (live_range);
2349
2350
0
  return 0;
2351
0
}
2352
2353
static bool ir_ival_has_hole_between(ir_live_interval *ival, ir_live_pos from, ir_live_pos to)
2354
0
{
2355
0
  ir_live_range *r = &ival->range;
2356
2357
0
  while (r) {
2358
0
    if (from < r->start) {
2359
0
      return 1;
2360
0
    } else if (to <= r->end) {
2361
0
      return 0;
2362
0
    }
2363
0
    r = r->next;
2364
0
  }
2365
0
  return 0;
2366
0
}
2367
2368
2369
static ir_live_pos ir_last_use_pos_before(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2370
0
{
2371
0
  ir_live_pos ret = 0;
2372
0
  ir_use_pos *p = ival->use_pos;
2373
2374
0
  while (p && p->pos <= pos) {
2375
0
    if (p->flags & flags) {
2376
0
      ret = p->pos;
2377
0
    }
2378
0
    p = p->next;
2379
0
  }
2380
0
  return ret;
2381
0
}
2382
2383
static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2384
0
{
2385
0
  ir_use_pos *p = ival->use_pos;
2386
2387
0
  while (p && p->pos < pos) {
2388
0
    p = p->next;
2389
0
  }
2390
0
  if (p && p->pos == pos && p->op_num != 0) {
2391
0
    p = p->next;
2392
0
  }
2393
0
  while (p && !(p->flags & flags)) {
2394
0
    p = p->next;
2395
0
  }
2396
0
  return p ? p->pos : 0x7fffffff;
2397
0
}
2398
2399
static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos)
2400
0
{
2401
0
  ir_ref ref = IR_LIVE_POS_TO_REF(pos);
2402
0
  uint32_t b = ctx->cfg_map[ref];
2403
2404
0
  while (!b) {
2405
0
    ref--;
2406
0
    IR_ASSERT(ref > 0);
2407
0
    b = ctx->cfg_map[ref];
2408
0
  }
2409
0
  IR_ASSERT(b <= ctx->cfg_blocks_count);
2410
0
  return &ctx->cfg_blocks[b];
2411
0
}
2412
2413
static ir_live_pos ir_find_optimal_split_position(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos min_pos, ir_live_pos max_pos, bool prefer_max)
2414
0
{
2415
0
  ir_block *min_bb, *max_bb;
2416
2417
0
  if (min_pos == max_pos) {
2418
0
    return max_pos;
2419
0
  }
2420
2421
0
  IR_ASSERT(min_pos < max_pos);
2422
0
  IR_ASSERT(min_pos >= ival->range.start);
2423
0
  IR_ASSERT(max_pos < ival->end);
2424
2425
0
  min_bb = ir_block_from_live_pos(ctx, min_pos);
2426
0
  max_bb = ir_block_from_live_pos(ctx, max_pos);
2427
2428
0
  if (min_bb == max_bb
2429
0
   || ir_ival_has_hole_between(ival, min_pos, max_pos)) {  // TODO: ???
2430
0
    return (prefer_max) ? max_pos : min_pos;
2431
0
  }
2432
2433
0
  if (max_bb->loop_depth > 0) {
2434
    /* Split at the end of the loop entry */
2435
0
    do {
2436
0
      ir_block *bb;
2437
2438
0
      if (max_bb->flags & IR_BB_LOOP_HEADER) {
2439
0
        bb = max_bb;
2440
0
      } else {
2441
0
        IR_ASSERT(max_bb->loop_header);
2442
0
        bb = &ctx->cfg_blocks[max_bb->loop_header];
2443
0
      }
2444
0
      bb = &ctx->cfg_blocks[bb->idom];
2445
0
      if (IR_DEF_LIVE_POS_FROM_REF(bb->end) < min_pos) {
2446
0
        break;
2447
0
      }
2448
0
      max_bb = bb;
2449
0
    } while (max_bb->loop_depth > 0);
2450
2451
0
    if (IR_DEF_LIVE_POS_FROM_REF(max_bb->end) < max_pos) {
2452
0
      return IR_DEF_LIVE_POS_FROM_REF(max_bb->end);
2453
0
    }
2454
0
  }
2455
2456
0
  if (IR_LOAD_LIVE_POS_FROM_REF(max_bb->start) > min_pos) {
2457
0
    return IR_LOAD_LIVE_POS_FROM_REF(max_bb->start);
2458
0
  } else {
2459
    // TODO: "min_bb" is in a deeper loop than "max_bb" ???
2460
0
    return max_pos;
2461
0
  }
2462
0
}
2463
2464
static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos pos)
2465
0
{
2466
0
  ir_live_interval *child;
2467
0
  ir_live_range *p, *prev;
2468
0
  ir_use_pos *use_pos, *prev_use_pos;
2469
2470
0
  IR_LOG_LSRA_SPLIT(ival, pos);
2471
0
  IR_ASSERT(pos > ival->range.start);
2472
0
  ctx->flags2 |= IR_RA_HAVE_SPLITS;
2473
2474
0
  p = &ival->range;
2475
0
  prev = NULL;
2476
0
  while (p && pos >= p->end) {
2477
0
    prev = p;
2478
0
    p = prev->next;
2479
0
  }
2480
0
  IR_ASSERT(p);
2481
2482
0
  if (pos < p->start) {
2483
    /* split between ranges */
2484
0
    pos = p->start;
2485
0
  }
2486
2487
0
  use_pos = ival->use_pos;
2488
0
  prev_use_pos = NULL;
2489
2490
0
  ival->flags &= ~(IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS);
2491
0
  if (p->start == pos) {
2492
0
    while (use_pos && pos > use_pos->pos) {
2493
0
      if (use_pos->hint != IR_REG_NONE) {
2494
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2495
0
      }
2496
0
      if (use_pos->hint_ref > 0) {
2497
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2498
0
      }
2499
0
      prev_use_pos = use_pos;
2500
0
      use_pos = use_pos->next;
2501
0
    }
2502
0
  } else {
2503
0
    while (use_pos && pos >= use_pos->pos) {
2504
0
      if (use_pos->hint != IR_REG_NONE) {
2505
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2506
0
      }
2507
0
      if (use_pos->hint_ref > 0) {
2508
0
        ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2509
0
      }
2510
0
      prev_use_pos = use_pos;
2511
0
      use_pos = use_pos->next;
2512
0
    }
2513
0
  }
2514
2515
0
  child = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
2516
0
  child->type = ival->type;
2517
0
  child->reg = IR_REG_NONE;
2518
0
  child->flags = IR_LIVE_INTERVAL_SPLIT_CHILD;
2519
0
  child->vreg = ival->vreg;
2520
0
  child->stack_spill_pos = -1; // not allocated
2521
0
  child->range.start = pos;
2522
0
  child->range.end = p->end;
2523
0
  child->range.next = p->next;
2524
0
  child->end = ival->end;
2525
0
  child->use_pos = prev_use_pos ? prev_use_pos->next : use_pos;
2526
2527
0
  child->next = ival->next;
2528
0
  ival->next = child;
2529
2530
0
  if (pos == p->start) {
2531
0
    prev->next = NULL;
2532
0
    ival->end = prev->end;
2533
    /* Cache to reuse */
2534
0
    p->next = ctx->unused_ranges;
2535
0
    ctx->unused_ranges = p;
2536
0
  } else {
2537
0
    p->end = ival->end = pos;
2538
0
    p->next = NULL;
2539
0
  }
2540
0
  if (prev_use_pos) {
2541
0
    prev_use_pos->next = NULL;
2542
0
  } else {
2543
0
    ival->use_pos = NULL;
2544
0
  }
2545
2546
0
  use_pos = child->use_pos;
2547
0
  while (use_pos) {
2548
0
    if (use_pos->hint != IR_REG_NONE) {
2549
0
      child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2550
0
    }
2551
0
    if (use_pos->hint_ref > 0) {
2552
0
      child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2553
0
    }
2554
0
    use_pos = use_pos->next;
2555
0
  }
2556
2557
0
  return child;
2558
0
}
2559
2560
static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size)
2561
0
{
2562
0
  ir_reg_alloc_data *data = ctx->data;
2563
0
  int32_t ret;
2564
2565
0
  IR_ASSERT(size == 0 || size == 1 || size == 2 || size == 4 || size == 8);
2566
0
  if (data->handled && data->handled[size]) {
2567
0
    ret = data->handled[size]->stack_spill_pos;
2568
0
    data->handled[size] = data->handled[size]->list_next;
2569
0
  } else if (size == 8) {
2570
0
    ret = ctx->stack_frame_size;
2571
0
    ctx->stack_frame_size += 8;
2572
0
  } else if (size == 4) {
2573
0
    if (data->unused_slot_4) {
2574
0
      ret = data->unused_slot_4;
2575
0
      data->unused_slot_4 = 0;
2576
0
      } else if (data->handled && data->handled[8]) {
2577
0
      ret = data->handled[8]->stack_spill_pos;
2578
0
      data->handled[8] = data->handled[8]->list_next;
2579
0
      data->unused_slot_4 = ret + 4;
2580
0
    } else {
2581
0
      ret = ctx->stack_frame_size;
2582
0
      if (sizeof(void*) == 8) {
2583
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2584
0
        ctx->stack_frame_size += 8;
2585
0
      } else {
2586
0
        ctx->stack_frame_size += 4;
2587
0
      }
2588
0
    }
2589
0
  } else if (size == 2) {
2590
0
    if (data->unused_slot_2) {
2591
0
      ret = data->unused_slot_2;
2592
0
      data->unused_slot_2 = 0;
2593
0
    } else if (data->unused_slot_4) {
2594
0
      ret = data->unused_slot_4;
2595
0
      data->unused_slot_2 = data->unused_slot_4 + 2;
2596
0
      data->unused_slot_4 = 0;
2597
0
      } else if (data->handled && data->handled[4]) {
2598
0
      ret = data->handled[4]->stack_spill_pos;
2599
0
      data->handled[4] = data->handled[4]->list_next;
2600
0
      data->unused_slot_2 = ret + 2;
2601
0
      } else if (data->handled && data->handled[8]) {
2602
0
      ret = data->handled[8]->stack_spill_pos;
2603
0
      data->handled[8] = data->handled[8]->list_next;
2604
0
      data->unused_slot_2 = ret + 2;
2605
0
      data->unused_slot_4 = ret + 4;
2606
0
    } else {
2607
0
      ret = ctx->stack_frame_size;
2608
0
      data->unused_slot_2 = ctx->stack_frame_size + 2;
2609
0
      if (sizeof(void*) == 8) {
2610
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2611
0
        ctx->stack_frame_size += 8;
2612
0
      } else {
2613
0
        ctx->stack_frame_size += 4;
2614
0
      }
2615
0
    }
2616
0
  } else if (size == 1) {
2617
0
    if (data->unused_slot_1) {
2618
0
      ret = data->unused_slot_1;
2619
0
      data->unused_slot_1 = 0;
2620
0
    } else if (data->unused_slot_2) {
2621
0
      ret = data->unused_slot_2;
2622
0
      data->unused_slot_1 = data->unused_slot_2 + 1;
2623
0
      data->unused_slot_2 = 0;
2624
0
    } else if (data->unused_slot_4) {
2625
0
      ret = data->unused_slot_4;
2626
0
      data->unused_slot_1 = data->unused_slot_4 + 1;
2627
0
      data->unused_slot_2 = data->unused_slot_4 + 2;
2628
0
      data->unused_slot_4 = 0;
2629
0
      } else if (data->handled && data->handled[2]) {
2630
0
      ret = data->handled[2]->stack_spill_pos;
2631
0
      data->handled[2] = data->handled[2]->list_next;
2632
0
      data->unused_slot_1 = ret + 1;
2633
0
      } else if (data->handled && data->handled[4]) {
2634
0
      ret = data->handled[4]->stack_spill_pos;
2635
0
      data->handled[4] = data->handled[4]->list_next;
2636
0
      data->unused_slot_1 = ret + 1;
2637
0
      data->unused_slot_2 = ret + 2;
2638
0
      } else if (data->handled && data->handled[8]) {
2639
0
      ret = data->handled[8]->stack_spill_pos;
2640
0
      data->handled[8] = data->handled[8]->list_next;
2641
0
      data->unused_slot_1 = ret + 1;
2642
0
      data->unused_slot_2 = ret + 2;
2643
0
      data->unused_slot_4 = ret + 4;
2644
0
    } else {
2645
0
      ret = ctx->stack_frame_size;
2646
0
      data->unused_slot_1 = ctx->stack_frame_size + 1;
2647
0
      data->unused_slot_2 = ctx->stack_frame_size + 2;
2648
0
      if (sizeof(void*) == 8) {
2649
0
        data->unused_slot_4 = ctx->stack_frame_size + 4;
2650
0
        ctx->stack_frame_size += 8;
2651
0
      } else {
2652
0
        ctx->stack_frame_size += 4;
2653
0
      }
2654
0
    }
2655
0
  } else {
2656
0
    ret = IR_NULL;
2657
0
  }
2658
0
  return ret;
2659
0
}
2660
2661
int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type)
2662
0
{
2663
0
  return ir_allocate_small_spill_slot(ctx, ir_type_size[type]);
2664
0
}
2665
2666
static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size)
2667
0
{
2668
0
  int32_t ret;
2669
2670
0
  if (size <= 8) {
2671
0
    if (size == 3) {
2672
0
      size = 4;
2673
0
    } else if (size > 4 && size < 8) {
2674
0
      size = 8;
2675
0
    }
2676
0
    return ir_allocate_small_spill_slot(ctx, size);
2677
0
  }
2678
2679
  /* Align stack allocated data to 16 byte */
2680
0
  ctx->flags2 |= IR_16B_FRAME_ALIGNMENT;
2681
0
  ret = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16);
2682
0
  size = IR_ALIGNED_SIZE(size, 8);
2683
0
  ctx->stack_frame_size = ret + size;
2684
2685
0
  return ret;
2686
0
}
2687
2688
static ir_reg ir_get_first_reg_hint(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2689
0
{
2690
0
  ir_use_pos *use_pos;
2691
0
  ir_reg reg;
2692
2693
0
  use_pos = ival->use_pos;
2694
0
  while (use_pos) {
2695
0
    reg = use_pos->hint;
2696
0
    if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2697
0
      return reg;
2698
0
    }
2699
0
    use_pos = use_pos->next;
2700
0
  }
2701
2702
0
  return IR_REG_NONE;
2703
0
}
2704
2705
static ir_reg ir_try_allocate_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available, ir_live_pos *freeUntilPos)
2706
0
{
2707
0
  ir_use_pos *use_pos;
2708
0
  ir_reg reg;
2709
2710
0
  if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2711
0
    use_pos = ival->use_pos;
2712
0
    while (use_pos) {
2713
0
      reg = use_pos->hint;
2714
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2715
0
        if (ival->end <= freeUntilPos[reg]) {
2716
          /* register available for the whole interval */
2717
0
          return reg;
2718
0
        }
2719
0
      }
2720
0
      use_pos = use_pos->next;
2721
0
    }
2722
0
  }
2723
2724
0
  if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REFS) {
2725
0
    use_pos = ival->use_pos;
2726
0
    while (use_pos) {
2727
0
      if (use_pos->hint_ref > 0) {
2728
0
        reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2729
0
        if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2730
0
          if (ival->end <= freeUntilPos[reg]) {
2731
            /* register available for the whole interval */
2732
0
            return reg;
2733
0
          }
2734
0
        }
2735
0
      }
2736
0
      use_pos = use_pos->next;
2737
0
    }
2738
0
  }
2739
2740
0
  return IR_REG_NONE;
2741
0
}
2742
2743
static ir_reg ir_get_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2744
0
{
2745
0
  ir_use_pos *use_pos;
2746
0
  ir_reg reg;
2747
2748
0
  use_pos = ival->use_pos;
2749
0
  while (use_pos) {
2750
0
    reg = use_pos->hint;
2751
0
    if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2752
0
      return reg;
2753
0
    } else if (use_pos->hint_ref > 0) {
2754
0
      reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2755
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2756
0
        return reg;
2757
0
      }
2758
0
    }
2759
0
    use_pos = use_pos->next;
2760
0
  }
2761
2762
0
  return IR_REG_NONE;
2763
0
}
2764
2765
static void ir_add_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2766
0
{
2767
0
  ir_live_pos pos = ival->range.start;
2768
2769
0
  if (*unhandled == NULL
2770
0
   || pos < (*unhandled)->range.start
2771
0
   || (pos == (*unhandled)->range.start
2772
0
    && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2773
0
    && !((*unhandled)->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2774
0
   || (pos == (*unhandled)->range.start
2775
0
    && ival->vreg > (*unhandled)->vreg)) {
2776
0
    ival->list_next = *unhandled;
2777
0
    *unhandled = ival;
2778
0
  } else {
2779
0
    ir_live_interval *prev = *unhandled;
2780
2781
0
    while (prev->list_next) {
2782
0
      if (pos < prev->list_next->range.start
2783
0
       || (pos == prev->list_next->range.start
2784
0
        && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2785
0
        && !(prev->list_next->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2786
0
       || (pos == prev->list_next->range.start
2787
0
        && ival->vreg > prev->list_next->vreg)) {
2788
0
        break;
2789
0
      }
2790
0
      prev = prev->list_next;
2791
0
    }
2792
0
    ival->list_next = prev->list_next;
2793
0
    prev->list_next = ival;
2794
0
  }
2795
0
}
2796
2797
/* merge sorted lists */
2798
static void ir_merge_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2799
0
{
2800
0
  ir_live_interval **prev;
2801
0
  ir_live_pos pos;
2802
2803
0
  if (*unhandled == NULL) {
2804
0
    *unhandled = ival;
2805
0
    while (ival) {
2806
0
      ival = ival->list_next = ival->next;
2807
0
    }
2808
0
  } else {
2809
0
    prev = unhandled;
2810
0
    while (ival) {
2811
0
      pos = ival->range.start;
2812
0
      while (*prev && pos >= (*prev)->range.start) {
2813
0
        prev = &(*prev)->list_next;
2814
0
      }
2815
0
      ival->list_next = *prev;
2816
0
      *prev = ival;
2817
0
      prev = &ival->list_next;
2818
0
      ival = ival->next;
2819
0
    }
2820
0
  }
2821
0
#ifdef IR_DEBUG
2822
0
  ival = *unhandled;
2823
0
  pos = 0;
2824
2825
0
  while (ival) {
2826
0
    IR_ASSERT(ival->range.start >= pos);
2827
0
    pos = ival->range.start;
2828
0
    ival = ival->list_next;
2829
0
  }
2830
0
#endif
2831
0
}
2832
2833
static void ir_add_to_unhandled_spill(ir_live_interval **unhandled, ir_live_interval *ival)
2834
0
{
2835
0
  ir_live_pos pos = ival->range.start;
2836
2837
0
  if (*unhandled == NULL
2838
0
   || pos <= (*unhandled)->range.start) {
2839
0
    ival->list_next = *unhandled;
2840
0
    *unhandled = ival;
2841
0
  } else {
2842
0
    ir_live_interval *prev = *unhandled;
2843
2844
0
    while (prev->list_next) {
2845
0
      if (pos <= prev->list_next->range.start) {
2846
0
        break;
2847
0
      }
2848
0
      prev = prev->list_next;
2849
0
    }
2850
0
    ival->list_next = prev->list_next;
2851
0
    prev->list_next = ival;
2852
0
  }
2853
0
}
2854
2855
static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval *inactive, ir_live_interval **unhandled)
2856
0
{
2857
0
  ir_live_pos freeUntilPos[IR_REG_NUM];
2858
0
  int i, reg;
2859
0
  ir_live_pos pos, next;
2860
0
  ir_live_interval *other;
2861
0
  ir_regset available, overlapped, scratch;
2862
2863
0
  if (IR_IS_TYPE_FP(ival->type)) {
2864
0
    available = IR_REGSET_FP;
2865
    /* set freeUntilPos of all physical registers to maxInt */
2866
0
    for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
2867
0
      freeUntilPos[i] = 0x7fffffff;
2868
0
    }
2869
0
  } else {
2870
0
    available = IR_REGSET_GP;
2871
0
    if (ctx->flags & IR_USE_FRAME_POINTER) {
2872
0
      IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
2873
0
    }
2874
#if defined(IR_TARGET_X86)
2875
    if (ir_type_size[ival->type] == 1) {
2876
      /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
2877
      IR_REGSET_EXCL(available, IR_REG_RBP);
2878
      IR_REGSET_EXCL(available, IR_REG_RSI);
2879
      IR_REGSET_EXCL(available, IR_REG_RDI);
2880
    }
2881
#endif
2882
    /* set freeUntilPos of all physical registers to maxInt */
2883
0
    for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
2884
0
      freeUntilPos[i] = 0x7fffffff;
2885
0
    }
2886
0
  }
2887
2888
0
  available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
2889
2890
  /* for each interval it in active */
2891
0
  other = *active;
2892
0
  while (other) {
2893
    /* freeUntilPos[it.reg] = 0 */
2894
0
    reg = other->reg;
2895
0
    IR_ASSERT(reg >= 0);
2896
0
    if (reg >= IR_REG_NUM) {
2897
0
      available = IR_REGSET_DIFFERENCE(available, ir_scratch_regset[reg - IR_REG_NUM]);
2898
0
    } else {
2899
0
      IR_REGSET_EXCL(available, reg);
2900
0
    }
2901
0
    other = other->list_next;
2902
0
  }
2903
2904
  /* for each interval it in inactive intersecting with current
2905
   *
2906
   * This loop is not necessary for program in SSA form (see LSRA on SSA fig. 6),
2907
   * but it is still necessary after coalescing and splitting
2908
   */
2909
0
  overlapped = IR_REGSET_EMPTY;
2910
0
  other = inactive;
2911
0
  pos = ival->end;
2912
0
  while (other) {
2913
    /* freeUntilPos[it.reg] = next intersection of it with current */
2914
0
    if (other->current_range->start < pos) {
2915
0
      next = ir_ivals_overlap(&ival->range, other->current_range);
2916
0
      if (next) {
2917
0
        reg = other->reg;
2918
0
        IR_ASSERT(reg >= 0);
2919
0
        if (reg >= IR_REG_NUM) {
2920
0
          ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]);
2921
0
          overlapped = IR_REGSET_UNION(overlapped, regset);
2922
0
          IR_REGSET_FOREACH(regset, reg) {
2923
0
            if (next < freeUntilPos[reg]) {
2924
0
              freeUntilPos[reg] = next;
2925
0
            }
2926
0
          } IR_REGSET_FOREACH_END();
2927
0
        } else if (IR_REGSET_IN(available, reg)) {
2928
0
          IR_REGSET_INCL(overlapped, reg);
2929
0
          if (next < freeUntilPos[reg]) {
2930
0
            freeUntilPos[reg] = next;
2931
0
          }
2932
0
        }
2933
0
      }
2934
0
    }
2935
0
    other = other->list_next;
2936
0
  }
2937
2938
0
  available = IR_REGSET_DIFFERENCE(available, overlapped);
2939
0
  if (available != IR_REGSET_EMPTY) {
2940
2941
0
    if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
2942
      /* Try to use hint */
2943
0
      reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos);
2944
0
      if (reg != IR_REG_NONE) {
2945
0
        ival->reg = reg;
2946
0
        IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (hint available without spilling)");
2947
0
        if (*unhandled && ival->end > (*unhandled)->range.start) {
2948
0
          ival->list_next = *active;
2949
0
          *active = ival;
2950
0
        }
2951
0
        return reg;
2952
0
      }
2953
0
    }
2954
2955
0
    if (ival->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) {
2956
      /* Try to reuse the register previously allocated for splited interval */
2957
0
      reg = ctx->live_intervals[ival->vreg]->reg;
2958
0
      if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2959
0
        ival->reg = reg;
2960
0
        IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling)");
2961
0
        if (*unhandled && ival->end > (*unhandled)->range.start) {
2962
0
          ival->list_next = *active;
2963
0
          *active = ival;
2964
0
        }
2965
0
        return reg;
2966
0
      }
2967
0
    }
2968
2969
    /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2970
0
    scratch = IR_REGSET_INTERSECTION(available,
2971
0
      ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM]);
2972
0
    if (scratch != IR_REGSET_EMPTY) {
2973
      /* prefer registers that don't conflict with the hints for the following unhandled intervals */
2974
0
      if (1) {
2975
0
        ir_regset non_conflicting = scratch;
2976
2977
0
        other = *unhandled;
2978
0
        while (other && other->range.start < ival->range.end) {
2979
0
          if (other->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2980
0
            reg = ir_get_first_reg_hint(ctx, other, non_conflicting);
2981
2982
0
            if (reg >= 0) {
2983
0
              IR_REGSET_EXCL(non_conflicting, reg);
2984
0
              if (non_conflicting == IR_REGSET_EMPTY) {
2985
0
                break;
2986
0
              }
2987
0
            }
2988
0
          }
2989
0
          other = other->list_next;
2990
0
        }
2991
0
        if (non_conflicting != IR_REGSET_EMPTY) {
2992
0
          reg = IR_REGSET_FIRST(non_conflicting);
2993
0
        } else {
2994
0
          reg = IR_REGSET_FIRST(scratch);
2995
0
        }
2996
0
      } else {
2997
0
        reg = IR_REGSET_FIRST(scratch);
2998
0
      }
2999
0
    } else {
3000
0
      reg = IR_REGSET_FIRST(available);
3001
0
    }
3002
0
    ival->reg = reg;
3003
0
    IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling)");
3004
0
    if (*unhandled && ival->end > (*unhandled)->range.start) {
3005
0
      ival->list_next = *active;
3006
0
      *active = ival;
3007
0
    }
3008
0
    return reg;
3009
0
  }
3010
3011
  /* reg = register with highest freeUntilPos */
3012
0
  reg = IR_REG_NONE;
3013
0
  pos = 0;
3014
0
  IR_REGSET_FOREACH(overlapped, i) {
3015
0
    if (freeUntilPos[i] > pos) {
3016
0
      pos = freeUntilPos[i];
3017
0
      reg = i;
3018
0
    } else if (freeUntilPos[i] == pos
3019
0
        && !IR_REGSET_IN(ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM], reg)
3020
0
        && IR_REGSET_IN(ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM], i)) {
3021
      /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
3022
0
      pos = freeUntilPos[i];
3023
0
      reg = i;
3024
0
    }
3025
0
  } IR_REGSET_FOREACH_END();
3026
3027
0
  if (pos > ival->range.start) {
3028
    /* register available for the first part of the interval */
3029
    /* split current before freeUntilPos[reg] */
3030
0
    ir_live_pos split_pos = ir_last_use_pos_before(ival, pos,
3031
0
      IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3032
0
    if (split_pos > ival->range.start) {
3033
0
      split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, pos, 0);
3034
0
      other = ir_split_interval_at(ctx, ival, split_pos);
3035
0
      if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
3036
0
        ir_reg pref_reg = ir_try_allocate_preferred_reg(ctx, ival, IR_REGSET_UNION(available, overlapped), freeUntilPos);
3037
3038
0
        if (pref_reg != IR_REG_NONE) {
3039
0
          ival->reg = pref_reg;
3040
0
        } else {
3041
0
          ival->reg = reg;
3042
0
        }
3043
0
      } else {
3044
0
        ival->reg = reg;
3045
0
      }
3046
0
      IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (available without spilling for the first part)");
3047
0
      if (*unhandled && ival->end > (*unhandled)->range.start) {
3048
0
        ival->list_next = *active;
3049
0
        *active = ival;
3050
0
      }
3051
0
      ir_add_to_unhandled(unhandled, other);
3052
0
      IR_LOG_LSRA("      ---- Queue", other, "");
3053
0
      return reg;
3054
0
    }
3055
0
  }
3056
0
  return IR_REG_NONE;
3057
0
}
3058
3059
static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval **inactive, ir_live_interval **unhandled)
3060
0
{
3061
0
  ir_live_pos nextUsePos[IR_REG_NUM];
3062
0
  ir_live_pos blockPos[IR_REG_NUM];
3063
0
  int score, best_score, scores[IR_REG_NUM];
3064
0
  int i, reg;
3065
0
  ir_live_pos pos, next_use_pos;
3066
0
  ir_live_interval *other, *prev;
3067
0
  ir_use_pos *use_pos;
3068
0
  ir_regset available, tmp_regset;
3069
3070
0
  if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3071
0
    use_pos = ival->use_pos;
3072
0
    while (use_pos && !(use_pos->flags & IR_USE_MUST_BE_IN_REG)) {
3073
0
      use_pos = use_pos->next;
3074
0
    }
3075
0
    if (!use_pos) {
3076
      /* spill */
3077
0
      IR_LOG_LSRA("    ---- Spill", ival, " (no use pos that must be in reg)");
3078
0
      ctx->flags2 |= IR_RA_HAVE_SPILLS;
3079
0
      return IR_REG_NONE;
3080
0
    }
3081
0
    next_use_pos = use_pos->pos;
3082
0
  } else {
3083
0
    next_use_pos = ival->range.end;
3084
0
  }
3085
3086
0
  if (IR_IS_TYPE_FP(ival->type)) {
3087
0
    available = IR_REGSET_FP;
3088
    /* set nextUsePos of all physical registers to maxInt */
3089
0
    for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
3090
0
      nextUsePos[i] = 0x7fffffff;
3091
0
      blockPos[i] = 0x7fffffff;
3092
0
      scores[i] = 0;
3093
0
    }
3094
0
  } else {
3095
0
    available = IR_REGSET_GP;
3096
0
    if (ctx->flags & IR_USE_FRAME_POINTER) {
3097
0
      IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
3098
0
    }
3099
#if defined(IR_TARGET_X86)
3100
    if (ir_type_size[ival->type] == 1) {
3101
      /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
3102
      IR_REGSET_EXCL(available, IR_REG_RBP);
3103
      IR_REGSET_EXCL(available, IR_REG_RSI);
3104
      IR_REGSET_EXCL(available, IR_REG_RDI);
3105
    }
3106
#endif
3107
    /* set nextUsePos of all physical registers to maxInt */
3108
0
    for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
3109
0
      nextUsePos[i] = 0x7fffffff;
3110
0
      blockPos[i] = 0x7fffffff;
3111
0
      scores[i] = 0;
3112
0
    }
3113
0
  }
3114
3115
0
  available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
3116
3117
0
  if (IR_REGSET_IS_EMPTY(available)) {
3118
0
    fprintf(stderr, "LSRA Internal Error: No registers available. Allocation is not possible\n");
3119
0
    IR_ASSERT(0);
3120
0
    exit(-1);
3121
0
  }
3122
3123
  /* for each interval it in active */
3124
0
  other = *active;
3125
0
  while (other) {
3126
    /* nextUsePos[it.reg] = next use of it after start of current */
3127
0
    reg = other->reg;
3128
0
    IR_ASSERT(reg >= 0);
3129
0
    if (reg >= IR_REG_NUM) {
3130
0
      ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]);
3131
0
      IR_REGSET_FOREACH(regset, reg) {
3132
0
        blockPos[reg] = nextUsePos[reg] = 0;
3133
0
      } IR_REGSET_FOREACH_END();
3134
0
    } else if (IR_REGSET_IN(available, reg)) {
3135
0
      if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3136
0
        blockPos[reg] = nextUsePos[reg] = 0;
3137
0
      } else {
3138
0
        pos = ir_first_use_pos_after(other, ival->range.start,
3139
0
          IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3140
0
        if (pos < nextUsePos[reg]) {
3141
0
          nextUsePos[reg] = pos;
3142
            /* Prefer splitting interval that was already splitted before */
3143
0
          scores[reg] = (other->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) ? 1 : 0;
3144
0
        }
3145
0
      }
3146
0
    }
3147
0
    other = other->list_next;
3148
0
  }
3149
3150
  /* for each interval it in inactive intersecting with current */
3151
0
  other = *inactive;
3152
0
  while (other) {
3153
    /* freeUntilPos[it.reg] = next intersection of it with current */
3154
0
    reg = other->reg;
3155
0
    IR_ASSERT(reg >= 0);
3156
0
    if (reg >= IR_REG_NUM) {
3157
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3158
3159
0
      if (overlap) {
3160
0
        ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]);
3161
0
        IR_REGSET_FOREACH(regset, reg) {
3162
0
          if (overlap < nextUsePos[reg]) {
3163
0
            nextUsePos[reg] = overlap;
3164
0
            scores[reg] = 0;
3165
0
          }
3166
0
          if (overlap < blockPos[reg]) {
3167
0
            blockPos[reg] = overlap;
3168
0
          }
3169
0
        } IR_REGSET_FOREACH_END();
3170
0
      }
3171
0
    } else if (IR_REGSET_IN(available, reg)) {
3172
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3173
3174
0
      if (overlap) {
3175
0
        if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3176
0
          if (overlap < nextUsePos[reg]) {
3177
0
            nextUsePos[reg] = overlap;
3178
0
            scores[reg] = 0;
3179
0
          }
3180
0
          if (overlap < blockPos[reg]) {
3181
0
            blockPos[reg] = overlap;
3182
0
          }
3183
0
        } else {
3184
0
          pos = ir_first_use_pos_after(other, ival->range.start,
3185
0
            IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3186
0
          if (pos < nextUsePos[reg]) {
3187
0
            nextUsePos[reg] = pos;
3188
            /* Prefer splitting interval that was already splitted before */
3189
0
            scores[reg] = (other->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) ? 1 : 0;
3190
0
          }
3191
0
        }
3192
0
      }
3193
0
    }
3194
0
    other = other->list_next;
3195
0
  }
3196
3197
  /* register hinting */
3198
0
  reg = IR_REG_NONE;
3199
0
  if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
3200
0
    reg = ir_get_preferred_reg(ctx, ival, available);
3201
0
  }
3202
0
  if (reg == IR_REG_NONE) {
3203
0
select_register:
3204
0
    reg = IR_REGSET_FIRST(available);
3205
0
  }
3206
3207
  /* reg = register with highest nextUsePos */
3208
0
  pos = nextUsePos[reg];
3209
0
  best_score = (scores[reg] << 28) + nextUsePos[reg];
3210
0
  tmp_regset = available;
3211
0
  IR_REGSET_EXCL(tmp_regset, reg);
3212
0
  IR_REGSET_FOREACH(tmp_regset, i) {
3213
0
    if (nextUsePos[i] > pos) {
3214
0
      pos = nextUsePos[i];
3215
0
    }
3216
0
    score = (scores[i] << 28) + nextUsePos[i];
3217
0
    if (score > best_score) {
3218
0
      reg = i;
3219
0
      best_score = score;
3220
0
    }
3221
0
  } IR_REGSET_FOREACH_END();
3222
3223
  /* if first usage of current is after nextUsePos[reg] then */
3224
0
  if (next_use_pos > pos && !(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3225
    /* all other intervals are used before current, so it is best to spill current itself */
3226
    /* assign spill slot to current */
3227
    /* split current before its first use position that requires a register */
3228
0
    ir_live_pos split_pos;
3229
3230
0
    if (next_use_pos == ival->range.start) {
3231
0
      IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
3232
      /* split right after definition */
3233
0
      split_pos = next_use_pos + 1;
3234
0
    } else {
3235
0
      split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
3236
0
    }
3237
3238
0
    if (split_pos > ival->range.start) {
3239
0
      IR_LOG_LSRA("    ---- Conflict with others", ival, " (all others are used before)");
3240
0
      other = ir_split_interval_at(ctx, ival, split_pos);
3241
0
      IR_LOG_LSRA("    ---- Spill", ival, "");
3242
0
      ir_add_to_unhandled(unhandled, other);
3243
0
      IR_LOG_LSRA("    ---- Queue", other, "");
3244
0
      return IR_REG_NONE;
3245
0
    }
3246
0
  }
3247
3248
0
  if (ival->end > blockPos[reg]) {
3249
    /* spilling make a register free only for the first part of current */
3250
0
    IR_LOG_LSRA("    ---- Conflict with others", ival, " (spilling make a register free only for the first part)");
3251
    /* split current at optimal position before block_pos[reg] */
3252
0
    ir_live_pos split_pos = ir_last_use_pos_before(ival,  blockPos[reg] + 1,
3253
0
      IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3254
0
    if (split_pos == 0) {
3255
0
      split_pos = ir_first_use_pos_after(ival, blockPos[reg],
3256
0
        IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1;
3257
0
      other = ir_split_interval_at(ctx, ival, split_pos);
3258
0
      ir_add_to_unhandled(unhandled, other);
3259
0
      IR_LOG_LSRA("      ---- Queue", other, "");
3260
0
      return IR_REG_NONE;
3261
0
    }
3262
0
    if (split_pos >= blockPos[reg]) {
3263
0
try_next_available_register:
3264
0
      IR_REGSET_EXCL(available, reg);
3265
0
      if (IR_REGSET_IS_EMPTY(available)) {
3266
0
        fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n");
3267
0
        IR_ASSERT(0);
3268
0
        exit(-1);
3269
0
      }
3270
0
      IR_LOG_LSRA("      ---- Restart", ival, "");
3271
0
      goto select_register;
3272
0
    }
3273
0
    split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, blockPos[reg], 1);
3274
0
    other = ir_split_interval_at(ctx, ival, split_pos);
3275
0
    ir_add_to_unhandled(unhandled, other);
3276
0
    IR_LOG_LSRA("      ---- Queue", other, "");
3277
0
  }
3278
3279
  /* spill intervals that currently block reg */
3280
0
  prev = NULL;
3281
0
  other = *active;
3282
0
  while (other) {
3283
0
    ir_live_pos split_pos;
3284
3285
0
    if (reg == other->reg) {
3286
      /* split active interval for reg at position */
3287
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3288
3289
0
      if (overlap) {
3290
0
        ir_live_interval *child, *child2;
3291
3292
0
        IR_ASSERT(other->type != IR_VOID);
3293
0
        IR_LOG_LSRA_CONFLICT("      ---- Conflict with active", other, overlap);
3294
3295
0
        split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3296
0
        if (split_pos) {
3297
0
          split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1);
3298
0
          if (split_pos > other->range.start) {
3299
0
            child = ir_split_interval_at(ctx, other, split_pos);
3300
0
            if (prev) {
3301
0
              prev->list_next = other->list_next;
3302
0
            } else {
3303
0
              *active = other->list_next;
3304
0
            }
3305
0
            IR_LOG_LSRA("      ---- Finish", other, "");
3306
0
          } else {
3307
0
            goto try_next_available_register;
3308
0
          }
3309
0
        } else {
3310
0
          child = other;
3311
0
        }
3312
3313
0
        split_pos = ir_first_use_pos_after(child, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; // TODO: ???
3314
0
        if (split_pos > child->range.start && split_pos < child->end) {
3315
0
          if (child == other) {
3316
0
            other->reg = IR_REG_NONE;
3317
0
            if (prev) {
3318
0
              prev->list_next = other->list_next;
3319
0
            } else {
3320
0
              *active = other->list_next;
3321
0
            }
3322
0
          }
3323
0
          ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->range.start, split_pos, 1);
3324
0
          if (opt_split_pos > child->range.start) {
3325
0
            split_pos = opt_split_pos;
3326
0
          }
3327
0
          child2 = ir_split_interval_at(ctx, child, split_pos);
3328
0
          IR_LOG_LSRA("      ---- Spill", child, "");
3329
0
          ir_add_to_unhandled(unhandled, child2);
3330
0
          IR_LOG_LSRA("      ---- Queue", child2, "");
3331
0
        } else if (child != other) {
3332
          // TODO: this may cause endless loop
3333
0
          ir_add_to_unhandled(unhandled, child);
3334
0
          IR_LOG_LSRA("      ---- Queue", child, "");
3335
0
        } else {
3336
0
          goto try_next_available_register;
3337
0
        }
3338
0
      }
3339
0
      break;
3340
0
    }
3341
0
    prev = other;
3342
0
    other = other->list_next;
3343
0
  }
3344
3345
  /* split any inactive interval for reg at the end of its lifetime hole */
3346
0
  other = *inactive;
3347
0
  while (other) {
3348
    /* freeUntilPos[it.reg] = next intersection of it with current */
3349
0
    if (reg == other->reg) {
3350
0
      ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3351
3352
0
      if (overlap) {
3353
0
        ir_live_interval *child;
3354
3355
0
        IR_ASSERT(other->type != IR_VOID);
3356
0
        IR_LOG_LSRA_CONFLICT("      ---- Conflict with inactive", other, overlap);
3357
        // TODO: optimal split position (this case is not tested)
3358
0
        child = ir_split_interval_at(ctx, other, overlap);
3359
        /* reset range cache */
3360
0
        other->current_range = &other->range;
3361
0
        ir_add_to_unhandled(unhandled, child);
3362
0
        IR_LOG_LSRA("      ---- Queue", child, "");
3363
0
      }
3364
0
    }
3365
0
    other = other->list_next;
3366
0
  }
3367
3368
  /* current.reg = reg */
3369
0
  ival->reg = reg;
3370
0
  IR_LOG_LSRA_ASSIGN("    ---- Assign", ival, " (after splitting others)");
3371
3372
0
  if (*unhandled && ival->end > (*unhandled)->range.start) {
3373
0
    ival->list_next = *active;
3374
0
    *active = ival;
3375
0
  }
3376
0
  return reg;
3377
0
}
3378
3379
static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *data)
3380
0
{
3381
0
  ir_block *bb = data;
3382
0
  ir_tmp_reg tmp_reg;
3383
3384
0
  if (to == 0) {
3385
0
    if (IR_IS_TYPE_INT(type)) {
3386
0
      tmp_reg.num = 0;
3387
0
      tmp_reg.type = type;
3388
0
      tmp_reg.start = IR_USE_SUB_REF;
3389
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3390
0
    } else {
3391
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
3392
0
      tmp_reg.num = 1;
3393
0
      tmp_reg.type = type;
3394
0
      tmp_reg.start = IR_USE_SUB_REF;
3395
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3396
0
    }
3397
0
  } else if (from != 0) {
3398
0
    if (IR_IS_TYPE_INT(type)) {
3399
0
      tmp_reg.num = 0;
3400
0
      tmp_reg.type = type;
3401
0
      tmp_reg.start = IR_USE_SUB_REF;
3402
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3403
0
    } else {
3404
0
      IR_ASSERT(IR_IS_TYPE_FP(type));
3405
0
      tmp_reg.num = 1;
3406
0
      tmp_reg.type = type;
3407
0
      tmp_reg.start = IR_USE_SUB_REF;
3408
0
      tmp_reg.end = IR_SAVE_SUB_REF;
3409
0
    }
3410
0
  } else {
3411
0
    return 1;
3412
0
  }
3413
0
  if (!ir_has_tmp(ctx, bb->end, tmp_reg.num)) {
3414
0
    ir_add_tmp(ctx, bb->end, bb->end, tmp_reg.num, tmp_reg);
3415
0
  }
3416
0
  return 1;
3417
0
}
3418
3419
static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival)
3420
0
{
3421
0
  ir_use_pos *use_pos = ival->use_pos;
3422
3423
0
  if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
3424
0
    IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0);
3425
0
#if IR_DEBUG
3426
0
    ir_insn *insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
3427
0
    IR_ASSERT(insn->op == IR_PARAM);
3428
0
#endif
3429
0
    use_pos = use_pos->next;
3430
0
    if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
3431
0
      return 0;
3432
0
    }
3433
3434
0
    if (use_pos) {
3435
0
      ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
3436
0
      if (bb->loop_depth) {
3437
0
        return 0;
3438
0
      }
3439
0
    }
3440
3441
0
    return 1;
3442
0
  }
3443
0
  return 0;
3444
0
}
3445
3446
static void ir_assign_bound_spill_slots(ir_ctx *ctx)
3447
0
{
3448
0
  ir_hashtab_bucket *b = ctx->binding->data;
3449
0
  uint32_t n = ctx->binding->count;
3450
0
  uint32_t v;
3451
0
  ir_live_interval *ival;
3452
3453
0
  while (n > 0) {
3454
0
    v = ctx->vregs[b->key];
3455
0
    if (v) {
3456
0
      ival = ctx->live_intervals[v];
3457
0
      if (ival
3458
0
       && ival->stack_spill_pos == -1
3459
0
       && (ival->next || ival->reg == IR_REG_NONE)) {
3460
0
        IR_ASSERT(b->val < 0);
3461
        /* special spill slot */
3462
0
        ival->stack_spill_pos = -b->val;
3463
0
        ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL;
3464
0
      }
3465
0
    }
3466
0
    b++;
3467
0
    n--;
3468
0
  }
3469
0
}
3470
3471
static int ir_linear_scan(ir_ctx *ctx, ir_ref vars)
3472
0
{
3473
0
  uint32_t b;
3474
0
  ir_block *bb;
3475
0
  ir_live_interval *unhandled = NULL;
3476
0
  ir_live_interval *active = NULL;
3477
0
  ir_live_interval *inactive = NULL;
3478
0
  ir_live_interval *ival, *other, *prev;
3479
0
  int j;
3480
0
  ir_live_pos position;
3481
0
  ir_reg reg;
3482
3483
0
  if (!ctx->live_intervals) {
3484
0
    return 0;
3485
0
  }
3486
3487
0
  if (ctx->flags2 & IR_LR_HAVE_DESSA_MOVES) {
3488
    /* Add fixed intervals for temporary registers used for DESSA moves */
3489
0
    for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
3490
0
      IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
3491
0
      if (bb->flags & IR_BB_DESSA_MOVES) {
3492
0
        ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps, bb);
3493
0
      }
3494
0
    }
3495
0
  }
3496
3497
0
  while (vars) {
3498
0
    ir_ref var = vars;
3499
0
    ir_insn *insn = &ctx->ir_base[var];
3500
3501
0
    IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA);
3502
0
    vars = insn->op3; /* list next */
3503
3504
0
    if (insn->op == IR_VAR) {
3505
0
      ir_ref slot = ir_allocate_spill_slot(ctx, insn->type);
3506
0
      ir_use_list *use_list;
3507
0
      ir_ref n, *p;
3508
3509
0
      insn->op3 = slot;
3510
0
      use_list = &ctx->use_lists[var];
3511
0
      n = use_list->count;
3512
0
      p = &ctx->use_edges[use_list->refs];
3513
0
      for (; n > 0; p++, n--) {
3514
0
        insn = &ctx->ir_base[*p];
3515
0
        if (insn->op == IR_VADDR) {
3516
0
          insn->op3 = slot;
3517
0
        }
3518
0
      }
3519
0
    } else {
3520
0
      ir_insn *val = &ctx->ir_base[insn->op2];
3521
3522
0
      IR_ASSERT(IR_IS_CONST_REF(insn->op2));
3523
0
      IR_ASSERT(IR_IS_TYPE_INT(val->type));
3524
0
      IR_ASSERT(!IR_IS_SYM_CONST(val->op));
3525
0
      IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0);
3526
0
      IR_ASSERT(val->val.i64 < 0x7fffffff);
3527
3528
0
      insn->op3 = ir_allocate_big_spill_slot(ctx, val->val.i32);
3529
0
    }
3530
0
  }
3531
3532
0
  for (j = ctx->vregs_count; j != 0; j--) {
3533
0
    ival = ctx->live_intervals[j];
3534
0
    if (ival) {
3535
0
      if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
3536
0
          || !ir_ival_spill_for_fuse_load(ctx, ival)) {
3537
0
        ir_add_to_unhandled(&unhandled, ival);
3538
0
      }
3539
0
    }
3540
0
  }
3541
3542
0
  ival = ctx->live_intervals[0];
3543
0
  if (ival) {
3544
0
    ir_merge_to_unhandled(&unhandled, ival);
3545
0
  }
3546
3547
  /* vregs + tmp + fixed + ALL + SCRATCH_N */
3548
0
  for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_SET_NUM; j++) {
3549
0
    ival = ctx->live_intervals[j];
3550
0
    if (ival) {
3551
0
      ival->current_range = &ival->range;
3552
0
      ival->list_next = inactive;
3553
0
      inactive = ival;
3554
0
    }
3555
0
  }
3556
3557
0
  ctx->flags2 &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS);
3558
3559
0
#ifdef IR_DEBUG
3560
0
  if (ctx->flags & IR_DEBUG_RA) {
3561
0
    fprintf(stderr, "----\n");
3562
0
    ir_dump_live_ranges(ctx, stderr);
3563
0
    fprintf(stderr, "---- Start LSRA\n");
3564
0
  }
3565
0
#endif
3566
3567
0
  while (unhandled) {
3568
0
    ival = unhandled;
3569
0
    ival->current_range = &ival->range;
3570
0
    unhandled = ival->list_next;
3571
0
    position = ival->range.start;
3572
3573
0
    IR_LOG_LSRA("  ---- Processing", ival, "...");
3574
3575
    /* for each interval i in active */
3576
0
    other = active;
3577
0
    prev = NULL;
3578
0
    while (other) {
3579
0
      ir_live_range *r = other->current_range;
3580
3581
0
      IR_ASSERT(r);
3582
0
      if (r->end <= position) {
3583
0
        do {
3584
0
          r = r->next;
3585
0
        } while (r && r->end <= position);
3586
0
        if (!r) {
3587
          /* move i from active to handled */
3588
0
          other = other->list_next;
3589
0
          if (prev) {
3590
0
            prev->list_next = other;
3591
0
          } else {
3592
0
            active = other;
3593
0
          }
3594
0
          continue;
3595
0
        }
3596
0
        other->current_range = r;
3597
0
      }
3598
0
      if (position < r->start) {
3599
        /* move i from active to inactive */
3600
0
        if (prev) {
3601
0
          prev->list_next = other->list_next;
3602
0
        } else {
3603
0
          active = other->list_next;
3604
0
        }
3605
0
        other->list_next = inactive;
3606
0
        inactive = other;
3607
0
      } else {
3608
0
        prev = other;
3609
0
      }
3610
0
      other = prev ? prev->list_next : active;
3611
0
    }
3612
3613
    /* for each interval i in inactive */
3614
0
    other = inactive;
3615
0
    prev = NULL;
3616
0
    while (other) {
3617
0
      ir_live_range *r = other->current_range;
3618
3619
0
      IR_ASSERT(r);
3620
0
      if (r->end <= position) {
3621
0
        do {
3622
0
          r = r->next;
3623
0
        } while (r && r->end <= position);
3624
0
        if (!r) {
3625
          /* move i from inactive to handled */
3626
0
          other = other->list_next;
3627
0
          if (prev) {
3628
0
            prev->list_next = other;
3629
0
          } else {
3630
0
            inactive = other;
3631
0
          }
3632
0
          continue;
3633
0
        }
3634
0
        other->current_range = r;
3635
0
      }
3636
0
      if (position >= r->start) {
3637
        /* move i from inactive to active */
3638
0
        if (prev) {
3639
0
          prev->list_next = other->list_next;
3640
0
        } else {
3641
0
          inactive = other->list_next;
3642
0
        }
3643
0
        other->list_next = active;
3644
0
        active = other;
3645
0
      } else {
3646
0
        prev = other;
3647
0
      }
3648
0
      other = prev ? prev->list_next : inactive;
3649
0
    }
3650
3651
0
    reg = ir_try_allocate_free_reg(ctx, ival, &active, inactive, &unhandled);
3652
0
    if (reg == IR_REG_NONE) {
3653
0
      reg = ir_allocate_blocked_reg(ctx, ival, &active, &inactive, &unhandled);
3654
0
    }
3655
0
  }
3656
3657
#if 0 //def IR_DEBUG
3658
  /* all intervals must be processed */
3659
  ival = active;
3660
  while (ival) {
3661
    IR_ASSERT(!ival->next);
3662
    ival = ival->list_next;
3663
  }
3664
  ival = inactive;
3665
  while (ival) {
3666
    IR_ASSERT(!ival->next);
3667
    ival = ival->list_next;
3668
  }
3669
#endif
3670
3671
0
  if (ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) {
3672
3673
0
    if (ctx->binding) {
3674
0
      ir_assign_bound_spill_slots(ctx);
3675
0
    }
3676
3677
    /* Use simple linear-scan (without holes) to allocate and reuse spill slots */
3678
0
    unhandled = NULL;
3679
0
    for (j = ctx->vregs_count; j != 0; j--) {
3680
0
      ival = ctx->live_intervals[j];
3681
0
      if (ival
3682
0
       && (ival->next || ival->reg == IR_REG_NONE)
3683
0
       && ival->stack_spill_pos == -1) {
3684
0
        ival->flags |= IR_LIVE_INTERVAL_SPILLED;
3685
0
        if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3686
0
          ir_live_range *r;
3687
3688
0
          other = ival;
3689
0
          while (other->next) {
3690
0
            other = other->next;
3691
0
          }
3692
0
          r = &other->range;
3693
0
          while (r->next) {
3694
0
            r = r->next;
3695
0
          }
3696
0
          ival->end = r->end;
3697
0
          ir_add_to_unhandled_spill(&unhandled, ival);
3698
0
        }
3699
0
      }
3700
0
    }
3701
3702
0
    if (unhandled) {
3703
0
      uint8_t size;
3704
0
      ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
3705
0
      ir_live_interval *old;
3706
3707
0
      ((ir_reg_alloc_data*)(ctx->data))->handled = handled;
3708
0
      active = NULL;
3709
0
      while (unhandled) {
3710
0
        ival = unhandled;
3711
0
        ival->current_range = &ival->range;
3712
0
        unhandled = ival->list_next;
3713
0
        position = ival->range.start;
3714
3715
        /* for each interval i in active */
3716
0
        other = active;
3717
0
        prev = NULL;
3718
0
        while (other) {
3719
0
          if (other->end <= position) {
3720
            /* move i from active to handled */
3721
0
            if (prev) {
3722
0
              prev->list_next = other->list_next;
3723
0
            } else {
3724
0
              active = other->list_next;
3725
0
            }
3726
0
            size = ir_type_size[other->type];
3727
0
            IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3728
0
            old = handled[size];
3729
0
            while (old) {
3730
0
              if (old->stack_spill_pos == other->stack_spill_pos) {
3731
0
                break;
3732
0
              }
3733
0
              old = old->list_next;
3734
0
            }
3735
0
            if (!old) {
3736
0
              other->list_next = handled[size];
3737
0
              handled[size] = other;
3738
0
            }
3739
0
          } else {
3740
0
            prev = other;
3741
0
          }
3742
0
          other = prev ? prev->list_next : active;
3743
0
        }
3744
3745
0
        ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type);
3746
0
        if (unhandled && ival->end > unhandled->range.start) {
3747
0
          ival->list_next = active;
3748
0
          active = ival;
3749
0
        } else {
3750
0
          size = ir_type_size[ival->type];
3751
0
          IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3752
0
          old = handled[size];
3753
0
          while (old) {
3754
0
            if (old->stack_spill_pos == ival->stack_spill_pos) {
3755
0
              break;
3756
0
            }
3757
0
            old = old->list_next;
3758
0
          }
3759
0
          if (!old) {
3760
0
            ival->list_next = handled[size];
3761
0
            handled[size] = ival;
3762
0
          }
3763
0
        }
3764
0
      }
3765
0
      ((ir_reg_alloc_data*)(ctx->data))->handled = NULL;
3766
0
    }
3767
0
  }
3768
3769
#ifdef IR_TARGET_X86
3770
  if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
3771
    ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE);
3772
  } else if ((ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE)
3773
      && ((ir_reg_alloc_data*)(ctx->data))->cc->fp_ret_reg == IR_REG_NONE) {
3774
    ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type);
3775
  } else {
3776
    ctx->ret_slot = -1;
3777
  }
3778
#endif
3779
3780
0
#ifdef IR_DEBUG
3781
0
  if (ctx->flags & IR_DEBUG_RA) {
3782
0
    fprintf(stderr, "---- Finish LSRA\n");
3783
0
    ir_dump_live_ranges(ctx, stderr);
3784
0
    fprintf(stderr, "----\n");
3785
0
  }
3786
0
#endif
3787
3788
0
  return 1;
3789
0
}
3790
3791
static bool needs_spill_reload(ir_ctx *ctx, ir_live_interval *ival, uint32_t b0, ir_bitset available)
3792
0
{
3793
0
    ir_worklist worklist;
3794
0
  ir_block *bb;
3795
0
  uint32_t b, *p, n;
3796
3797
0
  ir_worklist_init(&worklist, ctx->cfg_blocks_count + 1);
3798
0
  ir_worklist_push(&worklist, b0);
3799
0
  while (ir_worklist_len(&worklist) != 0) {
3800
0
    b = ir_worklist_pop(&worklist);
3801
0
        bb = &ctx->cfg_blocks[b];
3802
0
    if (bb->flags & (IR_BB_ENTRY|IR_BB_START)) {
3803
0
      ir_worklist_free(&worklist);
3804
0
      return 1;
3805
0
    }
3806
0
    n = bb->predecessors_count;
3807
0
    for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
3808
0
      b = *p;
3809
0
      bb = &ctx->cfg_blocks[b];
3810
3811
0
      if (!ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(bb->end))) {
3812
0
        ir_worklist_free(&worklist);
3813
0
        return 1;
3814
0
      } else if (!ir_bitset_in(available, b)) {
3815
0
        ir_worklist_push(&worklist, b);
3816
0
      }
3817
0
    }
3818
0
  }
3819
0
  ir_worklist_free(&worklist);
3820
0
  return 0;
3821
0
}
3822
3823
static bool needs_spill_load(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
3824
0
{
3825
0
  if (use_pos->next
3826
0
   && use_pos->op_num == 1
3827
0
   && use_pos->next->pos == use_pos->pos
3828
0
   && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3829
    /* Support for R2 = ADD(R1, R1) */
3830
0
    use_pos = use_pos->next;
3831
0
  }
3832
0
  return use_pos->next && use_pos->next->op_num != 0;
3833
0
}
3834
3835
static void ir_set_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op, int8_t reg)
3836
0
{
3837
0
  char key[10];
3838
3839
0
  if (!ctx->fused_regs) {
3840
0
    ctx->fused_regs = ir_mem_malloc(sizeof(ir_strtab));
3841
0
    ir_strtab_init(ctx->fused_regs, 8, 128);
3842
0
  }
3843
0
  memcpy(key, &root, sizeof(ir_ref));
3844
0
  memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
3845
0
  ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | (uint8_t)reg);
3846
0
}
3847
3848
static void assign_regs(ir_ctx *ctx)
3849
0
{
3850
0
  ir_ref i;
3851
0
  ir_live_interval *ival, *top_ival;
3852
0
  ir_use_pos *use_pos;
3853
0
  int8_t reg, old_reg;
3854
0
  ir_ref ref;
3855
0
  ir_regset used_regs = 0;
3856
3857
0
  if (!ctx->regs) {
3858
0
    ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
3859
0
    memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
3860
0
  }
3861
3862
0
  if (!(ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) {
3863
0
    for (i = 1; i <= ctx->vregs_count; i++) {
3864
0
      ival = ctx->live_intervals[i];
3865
0
      if (ival) {
3866
0
        do {
3867
0
          if (ival->reg != IR_REG_NONE) {
3868
0
            reg = ival->reg;
3869
0
            IR_REGSET_INCL(used_regs, reg);
3870
0
            use_pos = ival->use_pos;
3871
0
            while (use_pos) {
3872
0
              ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
3873
0
              ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3874
0
              use_pos = use_pos->next;
3875
0
            }
3876
0
          }
3877
0
          ival = ival->next;
3878
0
        } while (ival);
3879
0
      }
3880
0
    }
3881
0
  } else {
3882
0
    ir_bitset available = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
3883
3884
0
    for (i = 1; i <= ctx->vregs_count; i++) {
3885
0
      top_ival = ival = ctx->live_intervals[i];
3886
0
      if (ival) {
3887
0
        if (!(ival->flags & IR_LIVE_INTERVAL_SPILLED)) {
3888
0
          do {
3889
0
            if (ival->reg != IR_REG_NONE) {
3890
0
              IR_REGSET_INCL(used_regs, ival->reg);
3891
0
              use_pos = ival->use_pos;
3892
0
              while (use_pos) {
3893
0
                reg = ival->reg;
3894
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3895
0
                if (use_pos->hint_ref < 0) {
3896
0
                  ref = -use_pos->hint_ref;
3897
0
                }
3898
0
                ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3899
3900
0
                use_pos = use_pos->next;
3901
0
              }
3902
0
            }
3903
0
            ival = ival->next;
3904
0
          } while (ival);
3905
0
        } else {
3906
0
          do {
3907
0
            if (ival->reg != IR_REG_NONE) {
3908
0
              ir_ref prev_use_ref = IR_UNUSED;
3909
3910
0
              ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3911
0
              IR_REGSET_INCL(used_regs, ival->reg);
3912
0
              use_pos = ival->use_pos;
3913
0
              while (use_pos) {
3914
0
                reg = ival->reg;
3915
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3916
                // TODO: Insert spill loads and stores in optimal positions (resolution)
3917
0
                if (use_pos->op_num == 0) {
3918
0
                  if ((ctx->ir_base[ref].op == IR_COPY
3919
0
                    || ctx->ir_base[ref].op == IR_BITCAST
3920
0
                    || ctx->ir_base[ref].op == IR_TRUNC)
3921
0
                   && !IR_IS_CONST_REF(ctx->ir_base[ref].op1)
3922
0
                   && ctx->vregs[ctx->ir_base[ref].op1] == (uint32_t)i) {
3923
                    /* register reuse */
3924
0
                    ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3925
0
                    prev_use_ref = ref;
3926
0
                    use_pos = use_pos->next;
3927
0
                    continue;
3928
0
                  }
3929
0
                  ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3930
0
                  if (ctx->ir_base[ref].op == IR_PHI) {
3931
                    /* Spilled PHI var is passed through memory */
3932
0
                    reg = IR_REG_NONE;
3933
0
                    prev_use_ref = IR_UNUSED;
3934
0
                  } else if (ctx->ir_base[ref].op == IR_PARAM
3935
0
                   && (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3936
                    /* Stack PARAM var is passed through memory */
3937
0
                    reg = IR_REG_NONE;
3938
0
                  } else {
3939
0
                    uint32_t use_b = ctx->cfg_map[ref];
3940
3941
0
                    if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
3942
0
                      ir_bitset_incl(available, use_b);
3943
0
                    }
3944
0
                    if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3945
0
                      reg |= IR_REG_SPILL_SPECIAL;
3946
0
                    } else {
3947
0
                      reg |= IR_REG_SPILL_STORE;
3948
0
                    }
3949
0
                    prev_use_ref = ref;
3950
0
                  }
3951
0
                } else {
3952
0
                  if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
3953
0
                   && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
3954
0
                    if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
3955
0
                     && use_pos->hint != reg
3956
//                     && ctx->ir_base[ref].op != IR_CALL
3957
//                     && ctx->ir_base[ref].op != IR_TAILCALL) {
3958
0
                     && ctx->ir_base[ref].op != IR_SNAPSHOT
3959
0
                     && !needs_spill_load(ctx, ival, use_pos)) {
3960
                      /* fuse spill load (valid only when register is not reused) */
3961
0
                      reg = IR_REG_NONE;
3962
0
                      if (use_pos->next
3963
0
                       && use_pos->op_num == 1
3964
0
                       && use_pos->next->pos == use_pos->pos
3965
0
                       && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3966
                        /* Support for R2 = BINOP(R1, R1) */
3967
0
                        if (use_pos->hint_ref < 0) {
3968
0
                          ref = -use_pos->hint_ref;
3969
0
                        }
3970
0
                        ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3971
0
                        use_pos = use_pos->next;
3972
0
                      }
3973
0
                    } else {
3974
0
                      if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3975
0
                        reg |= IR_REG_SPILL_SPECIAL;
3976
0
                      } else {
3977
0
                        reg |= IR_REG_SPILL_LOAD;
3978
0
                      }
3979
0
                      if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
3980
0
                        uint32_t use_b = ctx->cfg_map[ref];
3981
3982
0
                        if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
3983
0
                          ir_bitset_incl(available, use_b);
3984
0
                        }
3985
0
                        prev_use_ref = ref;
3986
0
                      }
3987
0
                    }
3988
0
                  } else {
3989
                    /* reuse register without spill load */
3990
0
                  }
3991
3992
0
                  if (use_pos->hint_ref < 0) {
3993
0
                    if (use_pos->flags & IR_PHI_USE) {
3994
0
                      IR_ASSERT(use_pos->hint_ref < 0);
3995
0
                      IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
3996
0
                      IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
3997
0
                      if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
3998
                        /* Spilled PHI var is passed through memory */
3999
0
                        reg = IR_REG_NONE;
4000
0
                      }
4001
0
                    } else {
4002
0
                      IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
4003
0
                      old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num);
4004
0
                      if ((old_reg != IR_REG_NONE && reg != old_reg) || reg == IR_REG_NONE) {
4005
0
                        ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
4006
0
                        ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
4007
0
                        use_pos = use_pos->next;
4008
0
                        continue;
4009
0
                      }
4010
0
                    }
4011
0
                    ref = -use_pos->hint_ref;
4012
0
                  }
4013
0
                }
4014
4015
0
                ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
4016
4017
0
                use_pos = use_pos->next;
4018
0
              }
4019
0
            } else {
4020
0
              use_pos = ival->use_pos;
4021
0
              while (use_pos) {
4022
0
                ref = IR_LIVE_POS_TO_REF(use_pos->pos);
4023
0
                if (ctx->ir_base[ref].op == IR_SNAPSHOT
4024
0
                 && !(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
4025
0
                  IR_ASSERT(use_pos->hint_ref >= 0);
4026
                  /* A reference to a CPU spill slot */
4027
0
                  reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER;
4028
0
                  ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
4029
0
                } else if (use_pos->hint_ref < 0 && !(use_pos->flags & IR_PHI_USE)) {
4030
0
                  IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
4031
0
                  ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
4032
0
                  ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, IR_REG_NONE);
4033
0
                }
4034
0
                use_pos = use_pos->next;
4035
0
              }
4036
0
            }
4037
0
            ival = ival->next;
4038
0
          } while (ival);
4039
0
        }
4040
0
      }
4041
0
    }
4042
0
    ir_mem_free(available);
4043
0
  }
4044
4045
  /* Temporary registers */
4046
0
  ival = ctx->live_intervals[0];
4047
0
  if (ival) {
4048
0
    do {
4049
0
      IR_ASSERT(ival->reg != IR_REG_NONE);
4050
0
      IR_REGSET_INCL(used_regs, ival->reg);
4051
0
      reg = ival->reg;
4052
0
      if (ival->tmp_op_num > 0) {
4053
0
        ir_insn *insn = &ctx->ir_base[ival->tmp_ref];
4054
4055
0
        if (ival->tmp_op_num <= insn->inputs_count) {
4056
0
          ir_ref *ops = insn->ops;
4057
0
          if (IR_IS_CONST_REF(ops[ival->tmp_op_num])) {
4058
            /* constant rematerialization */
4059
0
            reg |= IR_REG_SPILL_LOAD;
4060
0
          } else if (ctx->ir_base[ops[ival->tmp_op_num]].op == IR_ALLOCA
4061
0
              || ctx->ir_base[ops[ival->tmp_op_num]].op == IR_VADDR) {
4062
            /* local address rematerialization */
4063
0
            reg |= IR_REG_SPILL_LOAD;
4064
0
          }
4065
0
        }
4066
0
      }
4067
0
      ir_set_alocated_reg(ctx, ival->tmp_ref, ival->tmp_op_num, reg);
4068
0
      ival = ival->next;
4069
0
    } while (ival);
4070
0
  }
4071
4072
0
  const ir_call_conv_dsc *cc = ((ir_reg_alloc_data*)(ctx->data))->cc;
4073
0
  if (ctx->fixed_stack_frame_size != -1) {
4074
0
    ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset;
4075
0
    if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, cc->preserved_regs),
4076
0
      ctx->used_preserved_regs)) {
4077
      // TODO: Preserved reg and fixed frame conflict ???
4078
      // IR_ASSERT(0 && "Preserved reg and fixed frame conflict");
4079
0
    }
4080
0
  } else {
4081
0
    ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset,
4082
0
      IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, cc->preserved_regs),
4083
0
        (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : cc->preserved_regs));
4084
0
  }
4085
4086
0
  ir_fix_stack_frame(ctx);
4087
0
}
4088
4089
int ir_reg_alloc(ir_ctx *ctx)
4090
0
{
4091
0
  ir_reg_alloc_data data;
4092
0
  ir_ref vars = ctx->vars;
4093
4094
0
  data.cc = ir_get_call_conv_dsc(ctx->flags);
4095
0
  data.unused_slot_4 = 0;
4096
0
  data.unused_slot_2 = 0;
4097
0
  data.unused_slot_1 = 0;
4098
0
  data.handled = NULL;
4099
4100
0
  ctx->data = &data;
4101
0
  ctx->stack_frame_size = 0;
4102
4103
0
  if (ir_linear_scan(ctx, vars)) {
4104
0
    assign_regs(ctx);
4105
0
    ctx->data = NULL;
4106
0
    return 1;
4107
0
  }
4108
4109
0
  ctx->data = NULL;
4110
0
  return 0;
4111
0
}