Coverage Report

Created: 2026-01-18 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_gcm.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (GCM - Global Code Motion and Scheduler)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 *
7
 * The GCM algorithm is based on Cliff Click's publication
8
 * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95.
9
 */
10
11
#include "ir.h"
12
#include "ir_private.h"
13
14
0
#define IR_GCM_IS_SCHEDULED_EARLY(b) (((int32_t)(b)) < 0)
15
0
#define IR_GCM_EARLY_BLOCK(b)        ((uint32_t)-((int32_t)(b)))
16
17
#define IR_GCM_SPLIT 1
18
#define IR_SCHEDULE_SWAP_OPS 1
19
20
static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late)
21
0
{
22
0
  ir_ref n, *p, input;
23
0
  ir_insn *insn;
24
0
  uint32_t dom_depth;
25
0
  uint32_t b, result;
26
27
0
  insn = &ctx->ir_base[ref];
28
29
0
  IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
30
0
  IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
31
32
0
  result = 1;
33
0
  dom_depth = 0;
34
35
0
  n = insn->inputs_count;
36
0
  for (p = insn->ops + 1; n > 0; p++, n--) {
37
0
    input = *p;
38
0
    if (input > 0) {
39
0
      b = ctx->cfg_map[input];
40
0
      if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
41
0
        b = IR_GCM_EARLY_BLOCK(b);
42
0
      } else if (!b) {
43
0
        b = ir_gcm_schedule_early(ctx, input, queue_late);
44
0
      }
45
0
      if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
46
0
        dom_depth = ctx->cfg_blocks[b].dom_depth;
47
0
        result = b;
48
0
      }
49
0
    }
50
0
  }
51
52
0
  ctx->cfg_map[ref] = IR_GCM_EARLY_BLOCK(result);
53
0
  ir_list_push_unchecked(queue_late, ref);
54
0
  return result;
55
0
}
56
57
/* Last Common Ancestor */
58
static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2)
59
0
{
60
0
  uint32_t dom_depth;
61
62
0
  dom_depth = ctx->cfg_blocks[b2].dom_depth;
63
0
  while (ctx->cfg_blocks[b1].dom_depth > dom_depth) {
64
0
    b1 = ctx->cfg_blocks[b1].dom_parent;
65
0
  }
66
0
  dom_depth = ctx->cfg_blocks[b1].dom_depth;
67
0
  while (ctx->cfg_blocks[b2].dom_depth > dom_depth) {
68
0
    b2 = ctx->cfg_blocks[b2].dom_parent;
69
0
  }
70
0
  while (b1 != b2) {
71
0
    b1 = ctx->cfg_blocks[b1].dom_parent;
72
0
    b2 = ctx->cfg_blocks[b2].dom_parent;
73
0
  }
74
0
  return b2;
75
0
}
76
77
static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
78
0
{
79
0
  ir_block *bb = &ctx->cfg_blocks[lca];
80
0
  uint32_t loop_depth = bb->loop_depth;
81
0
  uint32_t flags, best, b;
82
83
0
  if (!loop_depth) {
84
0
    return lca;
85
0
  }
86
87
#if 0 /* This is not necessary anymore. Conditions may be fused with IF across BBs. */
88
  if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) {
89
    ir_use_list *use_list = &ctx->use_lists[ref];
90
91
    if (use_list->count == 1) {
92
      ir_ref use = ctx->use_edges[use_list->refs];
93
      ir_insn *insn = &ctx->ir_base[use];
94
      if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
95
        /* Don't hoist invariant comparison */
96
        return lca;
97
      }
98
    }
99
  }
100
#endif
101
102
0
  flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
103
0
  if ((flags & IR_BB_LOOP_WITH_ENTRY)
104
0
   && !(ctx->binding && ir_binding_find(ctx, ref))) {
105
    /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */
106
0
    return lca;
107
0
  }
108
109
0
  best = b = lca;
110
0
  do {
111
0
    b = bb->dom_parent;
112
0
    bb = &ctx->cfg_blocks[b];
113
0
    if (bb->loop_depth < loop_depth) {
114
0
      if (!bb->loop_depth) {
115
0
#if 1
116
        /* Avoid LICM if LOOP doesn't have a pre-header block */
117
0
        ir_block *loop_bb = &ctx->cfg_blocks[best];
118
119
0
        if (!(loop_bb->flags & IR_BB_LOOP_HEADER)) {
120
0
          loop_bb = &ctx->cfg_blocks[loop_bb->loop_header];
121
0
        }
122
0
        if (loop_bb->predecessors_count > 2) {
123
0
          int n = loop_bb->predecessors_count;
124
0
          uint32_t *p = ctx->cfg_edges + loop_bb->predecessors;
125
126
0
          while (n && *p != b) {
127
0
            n--; p++;
128
0
          }
129
0
          if (!n) {
130
0
            break;
131
0
          }
132
0
        }
133
0
#endif
134
0
        best = b;
135
0
        break;
136
0
      }
137
0
      flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
138
0
      if ((flags & IR_BB_LOOP_WITH_ENTRY)
139
0
       && !(ctx->binding && ir_binding_find(ctx, ref))) {
140
0
        break;
141
0
      }
142
0
      loop_depth = bb->loop_depth;
143
0
      best = b;
144
0
    }
145
0
  } while (b != ctx->cfg_map[ref]);
146
147
0
  return best;
148
0
}
149
150
#if IR_GCM_SPLIT
151
/* Partially Dead Code Elimination through splitting the node and sunking the clones
152
 *
153
 * This code is based on the Benedikt Meurer's idea first implemented in V8.
154
 * See: https://codereview.chromium.org/899433005
155
 */
156
157
typedef struct _ir_gcm_split_data {
158
  ir_sparse_set totally_useful;
159
  ir_list       worklist;
160
} ir_gcm_split_data;
161
162
static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
163
0
{
164
0
  uint32_t *p, i, n = bb->predecessors_count;
165
166
0
  IR_ASSERT(n > 0);
167
0
  p = ctx->cfg_edges + bb->predecessors;
168
0
  do {
169
0
    i = *p;
170
0
    if (!ir_sparse_set_in(&data->totally_useful, i)) {
171
0
      ir_list_push(&data->worklist, i);
172
0
    }
173
0
    p++;
174
0
    n--;
175
0
  } while (n > 0);
176
0
}
177
178
static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
179
0
{
180
0
  uint32_t *p, i, n = bb->successors_count;
181
182
0
  if (n <= 1) {
183
0
    IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors]));
184
0
    return 1;
185
0
  }
186
187
0
  p = ctx->cfg_edges + bb->successors;
188
0
  do {
189
0
    i = *p;
190
0
    if (!ir_sparse_set_in(&data->totally_useful, i)) {
191
0
      return 0;
192
0
    }
193
0
    p++;
194
0
    n--;
195
0
  } while (n > 0);
196
197
0
  return 1;
198
0
}
199
200
static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
201
0
{
202
0
  ir_use_list *use_list;
203
0
  ir_insn *insn;
204
0
  ir_ref n, *p, use;
205
0
  uint32_t i;
206
0
  ir_gcm_split_data *data = ctx->data;
207
208
0
  IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count);
209
210
  /* 1. Find a set of blocks where the node is TOTALLY_USEFUL (not PARTIALLY_DEAD)
211
   * 1.1. Collect the blocks where the node is really USED.
212
   */
213
0
  ir_sparse_set_clear(&data->totally_useful);
214
215
0
  use_list = &ctx->use_lists[ref];
216
0
  n = use_list->count;
217
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
218
0
    use = *p;
219
0
    insn = &ctx->ir_base[use];
220
0
    if (insn->op == IR_PHI) {
221
0
      ir_ref *p = insn->ops + 2; /* PHI data inputs */
222
0
      ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
223
0
      ir_ref n = insn->inputs_count - 1;
224
225
0
      for (;n > 0; p++, q++, n--) {
226
0
        if (*p == ref) {
227
0
          i = ctx->cfg_map[*q];
228
0
          IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
229
0
          if (!ir_sparse_set_in(&data->totally_useful, i)) {
230
0
            if (i == b) return 0; /* node is totally-useful in the scheduled block */
231
0
            ir_sparse_set_add(&data->totally_useful, i);
232
0
          }
233
0
        }
234
0
      }
235
0
    } else {
236
0
      i = ctx->cfg_map[use];
237
0
      if (!i) {
238
0
        continue;
239
0
      }
240
0
      IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
241
0
      if (!ir_sparse_set_in(&data->totally_useful, i)) {
242
0
        if (i == b) return 0; /* node is totally-useful in the scheduled block */
243
0
        ir_sparse_set_add(&data->totally_useful, i);
244
0
      }
245
0
    }
246
0
  }
247
248
0
#ifdef IR_DEBUG
249
0
  if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
250
0
    bool first = 1;
251
0
    fprintf(stderr, "*** Split partially dead node d_%d scheduled to BB%d\n", ref, b);
252
0
    IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
253
0
      if (first) {
254
0
        fprintf(stderr, "\td_%d is USED in [BB%d", ref, i);
255
0
        first = 0;
256
0
      } else {
257
0
        fprintf(stderr, ", BB%d", i);
258
0
      }
259
0
    } IR_SPARSE_SET_FOREACH_END();
260
0
    fprintf(stderr, "]\n");
261
0
  }
262
0
#endif
263
264
  /* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and
265
   *      add them into TOTALLY_USEFUL set if all of their sucessors are already there.
266
   */
267
0
  IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
268
0
    _push_predecessors(ctx, &ctx->cfg_blocks[i], data);
269
0
  } IR_SPARSE_SET_FOREACH_END();
270
271
0
  while (ir_list_len(&data->worklist)) {
272
0
    i = ir_list_pop(&data->worklist);
273
0
    if (!ir_sparse_set_in(&data->totally_useful, i)) {
274
0
      ir_block *bb = &ctx->cfg_blocks[i];
275
276
0
      if (_check_successors(ctx, bb, data)) {
277
0
        if (i == b) {
278
          /* node is TOTALLY_USEFUL in the scheduled block */
279
0
          ir_list_clear(&data->worklist);
280
0
          return 0;
281
0
        }
282
0
        ir_sparse_set_add(&data->totally_useful, i);
283
0
        _push_predecessors(ctx, bb, data);
284
0
      }
285
0
    }
286
0
  }
287
288
0
  IR_ASSERT(!ir_sparse_set_in(&data->totally_useful, b));
289
290
0
#ifdef IR_DEBUG
291
0
  if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
292
0
    bool first = 1;
293
0
    IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
294
0
      if (first) {
295
0
        fprintf(stderr, "\td_%d is TOTALLY_USEFUL in [BB%d", ref, i);
296
0
        first = 0;
297
0
      } else {
298
0
        fprintf(stderr, ", BB%d", i);
299
0
      }
300
0
    } IR_SPARSE_SET_FOREACH_END();
301
0
    fprintf(stderr, "]\n");
302
0
  }
303
0
#endif
304
305
  /* 2. Split the USEs into partitions */
306
0
  use_list = &ctx->use_lists[ref];
307
0
  ir_hashtab hash;
308
0
  uint32_t j, clone, clones_count = 0, uses_count = 0;
309
0
  struct {
310
0
    ir_ref   ref;
311
0
    uint32_t block;
312
0
    uint32_t use_count;
313
0
    uint32_t use;
314
0
  } *clones = ir_mem_malloc(sizeof(*clones) * use_list->count);
315
0
  struct {
316
0
    ir_ref   ref;
317
0
    uint32_t block;
318
0
    uint32_t next;
319
0
  } *uses = ir_mem_malloc(sizeof(*uses) * use_list->count);
320
321
0
  ir_hashtab_init(&hash, use_list->count);
322
0
  n = use_list->count;
323
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
324
0
    use = *p;
325
0
    insn = &ctx->ir_base[use];
326
0
    if (insn->op == IR_PHI) {
327
0
      ir_ref *p = insn->ops + 2; /* PHI data inputs */
328
0
      ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
329
0
      ir_ref n = insn->inputs_count - 1;
330
331
      /* PHIs must be processed once */
332
0
      if (ir_hashtab_find(&hash, -use) != (ir_ref)IR_INVALID_VAL) {
333
0
        continue;
334
0
      }
335
0
      ir_hashtab_add(&hash, -use, IR_NULL);
336
0
      for (;n > 0; p++, q++, n--) {
337
0
        if (*p == ref) {
338
0
          j = i = ctx->cfg_map[*q];
339
0
          while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
340
0
            j = ctx->cfg_blocks[j].idom;
341
0
          }
342
0
          clone = ir_hashtab_find(&hash, j);
343
0
          if (clone == IR_INVALID_VAL) {
344
0
            clone = clones_count++;
345
0
            ir_hashtab_add(&hash, j, clone);
346
0
            clones[clone].block = j;
347
0
            clones[clone].use_count = 0;
348
0
            clones[clone].use = (uint32_t)-1;
349
0
          }
350
0
          uses[uses_count].ref = use;
351
0
          uses[uses_count].block = i;
352
0
          uses[uses_count].next = clones[clone].use;
353
0
          clones[clone].use_count++;
354
0
          clones[clone].use = uses_count++;
355
0
        }
356
0
      }
357
0
    } else {
358
0
      j = i = ctx->cfg_map[use];
359
0
      if (i) {
360
0
        IR_ASSERT(i > 0);
361
0
        while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
362
0
          j = ctx->cfg_blocks[j].idom;
363
0
        }
364
0
      }
365
0
      clone = ir_hashtab_find(&hash, j);
366
0
      if (clone == IR_INVALID_VAL) {
367
0
        clone = clones_count++;
368
0
        ir_hashtab_add(&hash, j, clone);
369
0
        clones[clone].block = j;
370
0
        clones[clone].use_count = 0;
371
0
        clones[clone].use = -1;
372
0
      }
373
0
      uses[uses_count].ref = use;
374
0
      uses[uses_count].block = i;
375
0
      uses[uses_count].next = clones[clone].use;
376
0
      clones[clone].use_count++;
377
0
      clones[clone].use = uses_count++;
378
0
    }
379
0
  }
380
381
0
#ifdef IR_DEBUG
382
0
  if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
383
0
    for (i = 0; i < clones_count; i++) {
384
0
      uint32_t u = clones[i].use;
385
386
0
      fprintf(stderr, "\tCLONE #%d in BB%d USES(%d)=[d_%d/BB%d",
387
0
        i, clones[i].block, clones[i].use_count, uses[u].ref, uses[u].block);
388
0
      u = uses[u].next;
389
0
      while (u != (uint32_t)-1) {
390
0
        fprintf(stderr, ", d_%d/BB%d", uses[u].ref, uses[u].block);
391
0
        u = uses[u].next;
392
0
      }
393
0
      fprintf(stderr, "]\n");
394
0
    }
395
0
  }
396
0
#endif
397
398
  /* Create Clones */
399
0
  insn = &ctx->ir_base[ref];
400
0
  clones[0].ref = ref;
401
0
  for (i = 1; i < clones_count; i++) {
402
0
    clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3);
403
0
    insn = &ctx->ir_base[ref];
404
    /* Depending on the flags in IR_OPS, these can be references or data. */
405
0
    if (insn->op1 > 0 && insn->inputs_count >= 1) ir_use_list_add(ctx, insn->op1, clone);
406
0
    if (insn->op2 > 0 && insn->inputs_count >= 2) ir_use_list_add(ctx, insn->op2, clone);
407
0
    if (insn->op3 > 0 && insn->inputs_count >= 3) ir_use_list_add(ctx, insn->op3, clone);
408
0
  }
409
410
  /* Reconstruct IR: Update DEF->USE lists, CFG mapping and etc */
411
0
  ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list));
412
0
  ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t));
413
0
  n = ctx->use_lists[ref].refs;
414
0
  for (i = 0; i < clones_count; i++) {
415
0
    clone = clones[i].ref;
416
0
    if (clones[i].block
417
0
     && clones[i].use_count == 1
418
0
     && ctx->cfg_blocks[clones[i].block].loop_depth >= ctx->cfg_blocks[uses[clones[i].use].block].loop_depth) {
419
      /* TOTALLY_USEFUL block may be a head of a diamond above the real usage.
420
       * Sink it down to the real usage block.
421
       * Clones with few uses will be sunk into the LCA block.
422
       */
423
0
      clones[i].block = uses[clones[i].use].block;
424
0
    }
425
0
    ctx->cfg_map[clone] = clones[i].block;
426
0
    ctx->use_lists[clone].count = clones[i].use_count;
427
0
    ctx->use_lists[clone].refs = n;
428
429
0
    uint32_t u = clones[i].use;
430
0
    while (u != (uint32_t)-1) {
431
0
      use = uses[u].ref;
432
0
      ctx->use_edges[n++] = use;
433
0
      u = uses[u].next;
434
0
      if (i > 0) {
435
        /* replace inputs */
436
0
        ir_insn *insn = &ctx->ir_base[use];
437
0
        ir_ref k, l = insn->inputs_count;
438
439
0
        if (insn->op == IR_PHI) {
440
0
          for (k = 1; k <= l; k++) {
441
0
            if (ir_insn_op(insn, k) == ref) {
442
0
              j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)];
443
0
              if (j != clones[i].block) {
444
0
                uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth;
445
0
                while (ctx->cfg_blocks[j].dom_depth > dom_depth) {
446
0
                  j = ctx->cfg_blocks[j].dom_parent;
447
0
                }
448
0
                if (j != clones[i].block) {
449
0
                  continue;
450
0
                }
451
0
              }
452
0
              ir_insn_set_op(insn, k, clone);
453
0
              break;
454
0
            }
455
0
          }
456
0
        } else {
457
0
          for (k = 1; k <= l; k++) {
458
0
            if (ir_insn_op(insn, k) == ref) {
459
0
              ir_insn_set_op(insn, k, clone);
460
0
              break;
461
0
            }
462
0
          }
463
0
        }
464
0
      }
465
0
    }
466
0
  }
467
468
0
  ir_mem_free(uses);
469
0
  ir_mem_free(clones);
470
0
  ir_hashtab_free(&hash);
471
472
0
#ifdef IR_DEBUG
473
0
  if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
474
0
    ir_check(ctx);
475
0
  }
476
0
#endif
477
478
0
  return 1;
479
0
}
480
#endif
481
482
#ifdef IR_DEBUG
483
static bool ir_gcm_dominates(ir_ctx *ctx, uint32_t b1, uint32_t b2)
484
0
{
485
0
  uint32_t b1_depth = ctx->cfg_blocks[b1].dom_depth;
486
0
  const ir_block *bb2 = &ctx->cfg_blocks[b2];
487
488
0
  while (bb2->dom_depth > b1_depth) {
489
0
    b2 = bb2->dom_parent;
490
0
    bb2 = &ctx->cfg_blocks[b2];
491
0
  }
492
0
  return b1 == b2;
493
0
}
494
#endif
495
496
static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b)
497
0
{
498
0
  ir_ref n, use;
499
0
  uint32_t lca = 0;
500
501
0
  IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR);
502
0
  IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI);
503
504
0
  IR_ASSERT(IR_GCM_IS_SCHEDULED_EARLY(b));
505
0
  b = IR_GCM_EARLY_BLOCK(b);
506
0
  ctx->cfg_map[ref] = b;
507
508
0
  for (n = 0; n < ctx->use_lists[ref].count; n++) {
509
0
    use = ctx->use_edges[ctx->use_lists[ref].refs + n];
510
0
    b = ctx->cfg_map[use];
511
0
    if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
512
0
      ir_gcm_schedule_late(ctx, use, b);
513
0
      b = ctx->cfg_map[use];
514
0
      IR_ASSERT(b != 0);
515
0
    } else if (!b) {
516
0
      continue;
517
0
    } else if (ctx->ir_base[use].op == IR_PHI) {
518
0
      ir_insn *insn = &ctx->ir_base[use];
519
0
      ir_ref *p = insn->ops + 2; /* PHI data inputs */
520
0
      ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
521
0
      ir_ref n = insn->inputs_count - 1;
522
523
0
      for (;n > 0; p++, q++, n--) {
524
0
        if (*p == ref) {
525
0
          b = ctx->cfg_map[*q];
526
0
          lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
527
0
        }
528
0
      }
529
0
      continue;
530
0
    }
531
0
    lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
532
0
  }
533
534
0
  IR_ASSERT(lca != 0 && "No Common Ancestor");
535
0
  IR_ASSERT(ir_gcm_dominates(ctx, ctx->cfg_map[ref], lca) && "Early placement doesn't dominate the late");
536
537
0
#if IR_GCM_SPLIT
538
0
  if (ctx->use_lists[ref].count > 1
539
0
   && ir_split_partially_dead_node(ctx, ref, lca)) {
540
0
    return;
541
0
  }
542
0
#endif
543
544
0
  if (lca != ctx->cfg_map[ref]) {
545
0
    b = ir_gcm_select_best_block(ctx, ref, lca);
546
547
0
    ctx->cfg_map[ref] = b;
548
549
    /* OVERFLOW is a projection of ADD/SUB/MUL_OV and must be scheduled into the same block */
550
0
    if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) {
551
0
      ir_use_list *use_list = &ctx->use_lists[ref];
552
0
      ir_ref n, *p, use;
553
554
0
      for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) {
555
0
        use = *p;
556
0
        if (ctx->ir_base[use].op == IR_OVERFLOW) {
557
0
          ctx->cfg_map[use] = b;
558
0
          break;
559
0
        }
560
0
      }
561
0
    }
562
0
  }
563
0
}
564
565
int ir_gcm(ir_ctx *ctx)
566
0
{
567
0
  ir_ref k, n, *p, ref;
568
0
  ir_block *bb;
569
0
  ir_list queue_early;
570
0
  ir_list queue_late;
571
0
  uint32_t *_blocks, b;
572
0
  ir_insn *insn, *use_insn;
573
0
  ir_use_list *use_list;
574
575
0
  IR_ASSERT(ctx->cfg_map);
576
0
  _blocks = ctx->cfg_map;
577
578
0
  ir_list_init(&queue_early, ctx->insns_count);
579
580
0
  if (ctx->cfg_blocks_count == 1) {
581
0
    ref = ctx->cfg_blocks[1].end;
582
0
    do {
583
0
      insn = &ctx->ir_base[ref];
584
0
      _blocks[ref] = 1; /* pin to block */
585
0
      if (insn->inputs_count > 1) {
586
        /* insn has input data edges */
587
0
        ir_list_push_unchecked(&queue_early, ref);
588
0
      }
589
0
      ref = insn->op1; /* control predecessor */
590
0
    } while (ref != 1); /* IR_START */
591
0
    _blocks[1] = 1; /* pin to block */
592
593
0
    use_list = &ctx->use_lists[1];
594
0
    n = use_list->count;
595
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
596
0
      ref = *p;
597
0
      use_insn = &ctx->ir_base[ref];
598
0
      if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
599
0
        ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR;
600
0
        _blocks[ref] = 1; /* pin to block */
601
0
      }
602
0
    }
603
604
    /* Place all live nodes to the first block */
605
0
    while (ir_list_len(&queue_early)) {
606
0
      ref = ir_list_pop(&queue_early);
607
0
      insn = &ctx->ir_base[ref];
608
0
      n = insn->inputs_count;
609
0
      for (p = insn->ops + 1; n > 0; p++, n--) {
610
0
        ref = *p;
611
0
        if (ref > 0 && _blocks[ref] == 0) {
612
0
          _blocks[ref] = 1;
613
0
          ir_list_push_unchecked(&queue_early, ref);
614
0
        }
615
0
      }
616
0
    }
617
618
0
    ir_list_free(&queue_early);
619
620
0
    return 1;
621
0
  }
622
623
0
  ir_list_init(&queue_late, ctx->insns_count);
624
625
  /* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */
626
0
  b = ctx->cfg_blocks_count;
627
0
  for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) {
628
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
629
0
    ref = bb->end;
630
631
    /* process the last instruction of the block */
632
0
    insn = &ctx->ir_base[ref];
633
0
    _blocks[ref] = b; /* pin to block */
634
0
    if (insn->inputs_count > 1) {
635
      /* insn has input data edges */
636
0
      ir_list_push_unchecked(&queue_early, ref);
637
0
    }
638
0
    ref = insn->op1; /* control predecessor */
639
640
0
    while (ref != bb->start) {
641
0
      insn = &ctx->ir_base[ref];
642
0
      _blocks[ref] = b; /* pin to block */
643
0
      if (insn->inputs_count > 1) {
644
        /* insn has input data edges */
645
0
        ir_list_push_unchecked(&queue_early, ref);
646
0
      }
647
0
      if (insn->type != IR_VOID) {
648
0
        IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM);
649
0
      }
650
0
      ref = insn->op1; /* control predecessor */
651
0
    }
652
653
    /* process the first instruction of the block */
654
0
    _blocks[ref] = b; /* pin to block */
655
656
0
    use_list = &ctx->use_lists[ref];
657
0
    n = use_list->count;
658
0
    if (n > 1) {
659
0
      for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
660
0
        ref = *p;
661
0
        use_insn = &ctx->ir_base[ref];
662
0
        if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
663
0
          bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI;
664
0
          if (EXPECTED(ctx->use_lists[ref].count != 0)) {
665
0
            _blocks[ref] = b; /* pin to block */
666
0
            ir_list_push_unchecked(&queue_early, ref);
667
0
          }
668
0
        } else if (use_insn->op == IR_PARAM) {
669
0
          bb->flags |= IR_BB_HAS_PARAM;
670
0
          _blocks[ref] = b; /* pin to block */
671
0
        } else if (use_insn->op == IR_VAR) {
672
0
          bb->flags |= IR_BB_HAS_VAR;
673
0
          _blocks[ref] = b; /* pin to block */
674
0
        }
675
0
      }
676
0
    }
677
0
  }
678
679
0
  n = ir_list_len(&queue_early);
680
0
  while (n > 0) {
681
0
    n--;
682
0
    ref = ir_list_at(&queue_early, n);
683
0
    insn = &ctx->ir_base[ref];
684
0
    k = insn->inputs_count - 1;
685
0
    for (p = insn->ops + 2; k > 0; p++, k--) {
686
0
      ref = *p;
687
0
      if (ref > 0 && _blocks[ref] == 0) {
688
0
        ir_gcm_schedule_early(ctx, ref, &queue_late);
689
0
      }
690
0
    }
691
0
  }
692
693
0
#ifdef IR_DEBUG
694
0
  if (ctx->flags & IR_DEBUG_GCM) {
695
0
    fprintf(stderr, "GCM Schedule Early\n");
696
0
    for (n = 1; n < ctx->insns_count; n++) {
697
0
      fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
698
0
    }
699
0
  }
700
0
#endif
701
702
0
#if IR_GCM_SPLIT
703
0
  ir_gcm_split_data data;
704
705
0
  ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1);
706
0
  ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1);
707
0
  ctx->data = &data;
708
0
#endif
709
710
0
  n = ir_list_len(&queue_late);
711
0
  while (n > 0) {
712
0
    n--;
713
0
    ref = ir_list_at(&queue_late, n);
714
0
    b = ctx->cfg_map[ref];
715
0
    if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
716
0
      ir_gcm_schedule_late(ctx, ref, b);
717
0
    }
718
0
  }
719
720
0
#if IR_GCM_SPLIT
721
0
  ir_list_free(&data.worklist);
722
0
  ir_sparse_set_free(&data.totally_useful);
723
0
  ctx->data = NULL;
724
0
#endif
725
726
0
  ir_list_free(&queue_early);
727
0
  ir_list_free(&queue_late);
728
729
0
#ifdef IR_DEBUG
730
0
  if (ctx->flags & IR_DEBUG_GCM) {
731
0
    fprintf(stderr, "GCM Schedule Late\n");
732
0
    for (n = 1; n < ctx->insns_count; n++) {
733
0
      fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
734
0
    }
735
0
  }
736
0
#endif
737
738
0
  return 1;
739
0
}
740
741
static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat)
742
0
{
743
0
  uint32_t n1, n2, pos;
744
0
  ir_ref key;
745
0
  ir_hashtab_bucket *b1, *b2;
746
0
  ir_hashtab *binding = ctx->binding;
747
0
  uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask);
748
749
0
  memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t));
750
0
  n1 = binding->count;
751
0
  n2 = 0;
752
0
  pos = 0;
753
0
  b1 = binding->data;
754
0
  b2 = binding->data;
755
0
  while (n1 > 0) {
756
0
    key = b1->key;
757
0
    IR_ASSERT(key < ctx->insns_count);
758
0
    if (_xlat[key]) {
759
0
      key = _xlat[key];
760
0
      b2->key = key;
761
0
      if (b1->val > 0) {
762
0
        IR_ASSERT(_xlat[b1->val]);
763
0
        b2->val = _xlat[b1->val];
764
0
      } else {
765
0
        b2->val = b1->val;
766
0
      }
767
0
      key |= binding->mask;
768
0
      b2->next = ((uint32_t*)binding->data)[key];
769
0
      ((uint32_t*)binding->data)[key] = pos;
770
0
      pos += sizeof(ir_hashtab_bucket);
771
0
      b2++;
772
0
      n2++;
773
0
    }
774
0
    b1++;
775
0
    n1--;
776
0
  }
777
0
  binding->count = n2;
778
0
}
779
780
IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
781
0
{
782
0
  if (!_xlat[ref]) {
783
0
    _xlat[ref] = ref; /* this is only a "used constant" marker */
784
0
    return 1;
785
0
  }
786
0
  return 0;
787
0
}
788
789
IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start)
790
0
{
791
0
  ir_insn *insn = &ctx->ir_base[start];
792
0
  uint32_t n = insn->inputs_count;
793
0
  ir_ref *p = insn->ops + 1;
794
795
0
  if (n == 1) {
796
0
    return ctx->cfg_map[*p] < b;
797
0
  } else {
798
0
    IR_ASSERT(n > 1);
799
0
    for (; n > 0; p++, n--) {
800
0
      ir_ref input = *p;
801
802
0
      if (!IR_IS_CONST_REF(input)) {
803
0
        uint32_t input_b = ctx->cfg_map[input];
804
805
0
        if (input_b < b) {
806
          /* ordered */
807
0
        } else if ((bb->flags & IR_BB_LOOP_HEADER)
808
0
          && (input_b == b || ctx->cfg_blocks[input_b].loop_header == b)) {
809
          /* back-edge of reducible loop */
810
0
        } else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP)
811
0
          && (ctx->cfg_blocks[input_b].loop_header == bb->loop_header)) {
812
          /* closing edge of irreducible loop */
813
0
        } else {
814
0
          return 0;
815
0
        }
816
0
      }
817
0
    }
818
0
    return 1;
819
0
  }
820
0
}
821
822
static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *_next)
823
0
{
824
0
  uint32_t b, succ, count, *q, *xlat;
825
0
  ir_block *bb;
826
0
  ir_ref ref, n, prev;
827
0
  ir_worklist worklist;
828
0
  ir_block *new_blocks;
829
830
#if 0
831
  for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
832
    if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
833
      goto fix;
834
    }
835
  }
836
  return;
837
838
fix:
839
#endif
840
0
  count = ctx->cfg_blocks_count + 1;
841
0
  new_blocks = ir_mem_malloc(count * sizeof(ir_block));
842
0
  xlat = ir_mem_malloc(count * sizeof(uint32_t));
843
0
  ir_worklist_init(&worklist, count);
844
0
  ir_worklist_push(&worklist, 1);
845
0
  while (ir_worklist_len(&worklist) != 0) {
846
0
next:
847
0
    b = ir_worklist_peek(&worklist);
848
0
    bb = &ctx->cfg_blocks[b];
849
0
    n = bb->successors_count;
850
0
    if (n == 1) {
851
0
      succ = ctx->cfg_edges[bb->successors];
852
0
      if (ir_worklist_push(&worklist, succ)) {
853
0
        goto next;
854
0
      }
855
0
    } else if (n > 1) {
856
0
      uint32_t best = 0;
857
0
      uint32_t best_loop_depth = 0;
858
859
0
      q = ctx->cfg_edges + bb->successors + n;
860
0
      do {
861
0
        q--;
862
0
        succ = *q;
863
0
        if (ir_bitset_in(worklist.visited, succ)) {
864
          /* already processed */
865
0
        } else if ((ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER)
866
0
          && (succ == b || ctx->cfg_blocks[b].loop_header == succ)) {
867
          /* back-edge of reducible loop */
868
0
        } else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP)
869
0
          && (ctx->cfg_blocks[succ].loop_header == ctx->cfg_blocks[b].loop_header)) {
870
          /* closing edge of irreducible loop */
871
0
        } else if (!best) {
872
0
          best = succ;
873
0
          best_loop_depth = ctx->cfg_blocks[best].loop_depth;
874
0
        } else if (ctx->cfg_blocks[succ].loop_depth < best_loop_depth) {
875
          /* prefer deeper loop */
876
0
          best = succ;
877
0
          best_loop_depth = ctx->cfg_blocks[best].loop_depth;
878
0
        }
879
0
        n--;
880
0
      } while (n > 0);
881
0
      if (best) {
882
0
        ir_worklist_push(&worklist, best);
883
0
        goto next;
884
0
      }
885
0
    }
886
0
    ir_worklist_pop(&worklist);
887
0
    count--;
888
0
    new_blocks[count] = *bb;
889
0
    xlat[b] = count;
890
0
  }
891
0
  IR_ASSERT(count == 1);
892
0
  xlat[0] = 0;
893
0
  ir_worklist_free(&worklist);
894
895
0
  prev = 0;
896
0
  for (b = 1, bb = new_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
897
0
    bb->idom = xlat[bb->idom];
898
0
    bb->loop_header = xlat[bb->loop_header];
899
0
    n = bb->successors_count;
900
0
    if (n > 0) {
901
0
      for (q = ctx->cfg_edges + bb->successors; n > 0; q++, n--) {
902
0
        *q = xlat[*q];
903
0
      }
904
0
    }
905
0
    n = bb->predecessors_count;
906
0
    if (n > 0) {
907
0
      for (q = ctx->cfg_edges + bb->predecessors; n > 0; q++, n--) {
908
0
        *q = xlat[*q];
909
0
      }
910
0
    }
911
0
    _next[prev] = bb->start;
912
0
    _prev[bb->start] = prev;
913
0
    prev = bb->end;
914
0
  }
915
0
  _next[0] = 0;
916
0
  _next[prev] = 0;
917
918
0
  for (ref = 2; ref < ctx->insns_count; ref++) {
919
0
    ctx->cfg_map[ref] = xlat[ctx->cfg_map[ref]];
920
0
  }
921
0
  ir_mem_free(xlat);
922
923
0
  ir_mem_free(ctx->cfg_blocks);
924
0
  ctx->cfg_blocks = new_blocks;
925
0
}
926
927
int ir_schedule(ir_ctx *ctx)
928
0
{
929
0
  ir_ctx new_ctx;
930
0
  ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
931
0
  ir_ref *_xlat;
932
0
  ir_ref *edges;
933
0
  ir_ref prev_b_end;
934
0
  uint32_t b;
935
0
  uint32_t *_blocks = ctx->cfg_map;
936
0
  ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
937
0
  ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
938
0
  ir_block *bb;
939
0
  ir_insn *insn, *new_insn;
940
0
  ir_use_list *lists, *use_list, *new_list;
941
0
  bool bad_bb_order = 0;
942
943
944
  /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
945
0
  IR_ASSERT(_blocks[1] == 1);
946
947
  /* link BB boundaries */
948
0
  _prev[1] = 0;
949
0
  prev_b_end = ctx->cfg_blocks[1].end;
950
0
  _next[1] = prev_b_end;
951
0
  _prev[prev_b_end] = 1;
952
0
  for (b = 2, bb = ctx->cfg_blocks + 2; b <= ctx->cfg_blocks_count; b++, bb++) {
953
0
    _next[prev_b_end] = bb->start;
954
0
    _prev[bb->start] = prev_b_end;
955
0
    _next[bb->start] = bb->end;
956
0
    _prev[bb->end] = bb->start;
957
0
    prev_b_end = bb->end;
958
0
    if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) {
959
0
      bad_bb_order = 1;
960
0
    }
961
0
  }
962
0
  _next[prev_b_end] = 0;
963
964
  /* insert intermediate BB nodes */
965
0
  for (i = 2, j = 1; i < ctx->insns_count; i++) {
966
0
    b = _blocks[i];
967
0
    if (!b) continue;
968
0
    bb = &ctx->cfg_blocks[b];
969
0
    if (i != bb->start && i != bb->end) {
970
      /* insert before "end" */
971
0
      ir_ref n = bb->end;
972
0
      ir_ref p = _prev[n];
973
0
      _prev[i] = p;
974
0
      _next[i] = n;
975
0
      _next[p] = i;
976
0
      _prev[n] = i;
977
0
    }
978
0
  }
979
980
0
  if (bad_bb_order) {
981
0
    ir_fix_bb_order(ctx, _prev, _next);
982
0
  }
983
984
0
#ifdef IR_DEBUG
985
0
  if (ctx->flags & IR_DEBUG_SCHEDULE) {
986
0
    fprintf(stderr, "Before Schedule\n");
987
0
    for (i = 1; i != 0; i = _next[i]) {
988
0
      fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
989
0
    }
990
0
  }
991
0
#endif
992
993
0
  _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
994
0
  _xlat += ctx->consts_count;
995
0
  _xlat[IR_TRUE] = IR_TRUE;
996
0
  _xlat[IR_FALSE] = IR_FALSE;
997
0
  _xlat[IR_NULL] = IR_NULL;
998
0
  _xlat[IR_UNUSED] = IR_UNUSED;
999
0
  insns_count = 1;
1000
0
  consts_count = -(IR_TRUE - 1);
1001
1002
  /* Topological sort according dependencies inside each basic block */
1003
0
  for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
1004
0
    ir_ref start;
1005
1006
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1007
    /* Schedule BB start */
1008
0
    start = i = bb->start;
1009
0
    _xlat[i] = bb->start = insns_count;
1010
0
    insn = &ctx->ir_base[i];
1011
0
    if (insn->op == IR_BEGIN) {
1012
0
      if (insn->op2) {
1013
0
        consts_count += ir_count_constant(_xlat, insn->op2);
1014
0
      }
1015
0
    } else if (insn->op == IR_CASE_VAL) {
1016
0
      IR_ASSERT(insn->op2 < IR_TRUE);
1017
0
      consts_count += ir_count_constant(_xlat, insn->op2);
1018
0
    } else if (insn->op == IR_CASE_RANGE) {
1019
0
      IR_ASSERT(insn->op2 < IR_TRUE);
1020
0
      consts_count += ir_count_constant(_xlat, insn->op2);
1021
0
      IR_ASSERT(insn->op3 < IR_TRUE);
1022
0
      consts_count += ir_count_constant(_xlat, insn->op3);
1023
0
    }
1024
0
    n = insn->inputs_count;
1025
0
    insns_count += ir_insn_inputs_to_len(n);
1026
0
    i = _next[i];
1027
0
    insn = &ctx->ir_base[i];
1028
0
    if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
1029
0
      int count = 0;
1030
1031
      /* Schedule PARAM, VAR, PI */
1032
0
      while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
1033
0
        _xlat[i] = insns_count;
1034
0
        insns_count += 1;
1035
0
        i = _next[i];
1036
0
        insn = &ctx->ir_base[i];
1037
0
        count++;
1038
0
      }
1039
      /* Schedule PHIs */
1040
0
      while (insn->op == IR_PHI) {
1041
0
        ir_ref j, *p, input;
1042
1043
0
        _xlat[i] = insns_count;
1044
        /* Reuse "n" from MERGE and skip first input */
1045
0
        insns_count += ir_insn_inputs_to_len(n + 1);
1046
0
        for (j = n, p = insn->ops + 2; j > 0; p++, j--) {
1047
0
          input = *p;
1048
0
          if (input < IR_TRUE) {
1049
0
            consts_count += ir_count_constant(_xlat, input);
1050
0
          }
1051
0
        }
1052
0
        i = _next[i];
1053
0
        insn = &ctx->ir_base[i];
1054
0
        count++;
1055
0
      }
1056
      /* Schedule remaining PHIs */
1057
0
      if (UNEXPECTED(count < ctx->use_lists[start].count - 1)) {
1058
0
        ir_use_list *use_list = &ctx->use_lists[start];
1059
0
        ir_ref *p, count = use_list->count;
1060
0
        ir_ref phis = _prev[i];
1061
1062
0
        for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) {
1063
0
          ir_ref use = *p;
1064
0
          ir_insn *use_insn = &ctx->ir_base[use];
1065
0
          if (!_xlat[use] && (_blocks[use] || use_insn->op == IR_PARAM)) {
1066
0
            IR_ASSERT(_blocks[use] == b || use_insn->op == IR_PARAM);
1067
0
            if (use_insn->op == IR_PARAM
1068
0
             || use_insn->op == IR_VAR
1069
0
             || use_insn->op == IR_PI
1070
0
             || use_insn->op == IR_PHI) {
1071
0
              if (_prev[use] != phis) {
1072
                /* remove "use" */
1073
0
                _prev[_next[use]] = _prev[use];
1074
0
                _next[_prev[use]] = _next[use];
1075
                /* insert "use" after "phis" */
1076
0
                _prev[use] = phis;
1077
0
                _next[use] = _next[phis];
1078
0
                _prev[_next[phis]] = use;
1079
0
                _next[phis] = use;
1080
0
              }
1081
0
              phis = use;
1082
0
              _xlat[use] = insns_count;
1083
0
              if (use_insn->op == IR_PHI) {
1084
0
                ir_ref *q;
1085
                /* Reuse "n" from MERGE and skip first input */
1086
0
                insns_count += ir_insn_inputs_to_len(n + 1);
1087
0
                for (j = n, q = use_insn->ops + 2; j > 0; q++, j--) {
1088
0
                  ir_ref input = *q;
1089
0
                  if (input < IR_TRUE) {
1090
0
                    consts_count += ir_count_constant(_xlat, input);
1091
0
                  }
1092
0
                }
1093
0
              } else {
1094
0
                insns_count += 1;
1095
0
              }
1096
0
            }
1097
0
          }
1098
0
        }
1099
0
        i = _next[phis];
1100
0
        insn = &ctx->ir_base[i];
1101
0
      }
1102
0
    }
1103
0
    if (bb->successors_count > 1) {
1104
0
      ir_ref input, j = bb->end;
1105
0
      ir_insn *end = &ctx->ir_base[j];
1106
1107
0
      if (end->op == IR_IF) {
1108
        /* Move condition closer to IF */
1109
0
        input = end->op2;
1110
0
        if (input > 0
1111
0
         && _blocks[input] == b
1112
0
         && !_xlat[input]
1113
0
         && _prev[j] != input
1114
0
         && (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) {
1115
0
          if (input == i) {
1116
0
            i = _next[i];
1117
0
            insn = &ctx->ir_base[i];
1118
0
          }
1119
          /* remove "input" */
1120
0
          _prev[_next[input]] = _prev[input];
1121
0
          _next[_prev[input]] = _next[input];
1122
          /* insert before "j" */
1123
0
          _prev[input] = _prev[j];
1124
0
          _next[input] = j;
1125
0
          _next[_prev[j]] = input;
1126
0
          _prev[j] = input;
1127
0
        }
1128
0
      }
1129
0
    }
1130
0
    while (i != bb->end) {
1131
0
      ir_ref n, j, *p, input;
1132
1133
0
restart:
1134
0
      IR_ASSERT(_blocks[i] == b);
1135
0
      n = insn->inputs_count;
1136
0
      for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
1137
0
        input = *p;
1138
0
        if (!_xlat[input]) {
1139
          /* input is not scheduled yet */
1140
0
          if (input > 0) {
1141
0
            if (_blocks[input] == b) {
1142
              /* "input" should be before "i" to satisfy dependency */
1143
0
#ifdef IR_DEBUG
1144
0
              if (ctx->flags & IR_DEBUG_SCHEDULE) {
1145
0
                fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
1146
0
              }
1147
0
#endif
1148
              /* remove "input" */
1149
0
              _prev[_next[input]] = _prev[input];
1150
0
              _next[_prev[input]] = _next[input];
1151
              /* insert before "i" */
1152
0
              _prev[input] = _prev[i];
1153
0
              _next[input] = i;
1154
0
              _next[_prev[i]] = input;
1155
0
              _prev[i] = input;
1156
              /* restart from "input" */
1157
0
              i = input;
1158
0
              insn = &ctx->ir_base[i];
1159
0
              goto restart;
1160
0
            }
1161
0
          } else if (input < IR_TRUE) {
1162
0
            consts_count += ir_count_constant(_xlat, input);
1163
0
          }
1164
0
        }
1165
0
      }
1166
0
      _xlat[i] = insns_count;
1167
0
      insns_count += ir_insn_inputs_to_len(n);
1168
0
      IR_ASSERT(_next[i] != IR_UNUSED);
1169
0
      i = _next[i];
1170
0
      insn = &ctx->ir_base[i];
1171
0
    }
1172
    /* Schedule BB end */
1173
0
    _xlat[i] = bb->end = insns_count;
1174
0
    insns_count++;
1175
0
    if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
1176
0
      if (insn->op2 < IR_TRUE) {
1177
0
        consts_count += ir_count_constant(_xlat, insn->op2);
1178
0
      }
1179
0
    }
1180
0
  }
1181
1182
0
#ifdef IR_DEBUG
1183
0
  if (ctx->flags & IR_DEBUG_SCHEDULE) {
1184
0
    fprintf(stderr, "After Schedule\n");
1185
0
    for (i = 1; i != 0; i = _next[i]) {
1186
0
      fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]);
1187
0
    }
1188
0
  }
1189
0
#endif
1190
1191
0
#if 1
1192
  /* Check if scheduling didn't make any modifications */
1193
0
  if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
1194
0
    bool changed = 0;
1195
1196
0
    for (i = 1; i != 0; i = _next[i]) {
1197
0
      if (_xlat[i] != i) {
1198
0
        changed = 1;
1199
0
        break;
1200
0
      }
1201
0
    }
1202
0
    if (!changed) {
1203
0
      _xlat -= ctx->consts_count;
1204
0
      ir_mem_free(_xlat);
1205
0
      ir_mem_free(_next);
1206
1207
0
      ctx->prev_ref = _prev;
1208
0
      ctx->flags2 |= IR_LINEAR;
1209
0
      ir_truncate(ctx);
1210
1211
0
      return 1;
1212
0
    }
1213
0
  }
1214
0
#endif
1215
1216
0
  ir_mem_free(_prev);
1217
1218
0
  ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
1219
0
  new_ctx.insns_count = insns_count;
1220
0
  new_ctx.flags2 = ctx->flags2;
1221
0
  new_ctx.ret_type = ctx->ret_type;
1222
0
  new_ctx.value_params = ctx->value_params;
1223
0
  new_ctx.mflags = ctx->mflags;
1224
0
  new_ctx.spill_base = ctx->spill_base;
1225
0
  new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
1226
0
  new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
1227
0
  new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
1228
0
  new_ctx.fixed_regset = ctx->fixed_regset;
1229
0
  new_ctx.fixed_save_regset = ctx->fixed_save_regset;
1230
0
  new_ctx.entries_count = ctx->entries_count;
1231
#if defined(IR_TARGET_AARCH64)
1232
  new_ctx.deoptimization_exits = ctx->deoptimization_exits;
1233
  new_ctx.get_exit_addr = ctx->get_exit_addr;
1234
  new_ctx.get_veneer = ctx->get_veneer;
1235
  new_ctx.set_veneer = ctx->set_veneer;
1236
#endif
1237
0
  new_ctx.loader = ctx->loader;
1238
1239
  /* Copy constants */
1240
0
  if (consts_count == ctx->consts_count) {
1241
0
    new_ctx.consts_count = consts_count;
1242
0
    ref = 1 - consts_count;
1243
0
    insn = &ctx->ir_base[ref];
1244
0
    new_insn = &new_ctx.ir_base[ref];
1245
1246
0
    memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
1247
0
    if (ctx->strtab.data) {
1248
0
      while (ref != IR_TRUE) {
1249
0
        if (new_insn->op == IR_FUNC_ADDR) {
1250
0
          if (new_insn->proto) {
1251
0
            size_t len;
1252
0
            const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1253
0
            new_insn->proto = ir_strl(&new_ctx, proto, len);
1254
0
          }
1255
0
        } else if (new_insn->op == IR_FUNC) {
1256
0
          size_t len;
1257
0
          const char *name = ir_get_strl(ctx, new_insn->val.name, &len);
1258
0
          new_insn->val.u64 = ir_strl(&new_ctx, name, len);
1259
0
          if (new_insn->proto) {
1260
0
            const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1261
0
            new_insn->proto = ir_strl(&new_ctx, proto, len);
1262
0
          }
1263
0
        } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR || new_insn->op == IR_LABEL) {
1264
0
          size_t len;
1265
0
          const char *str = ir_get_strl(ctx, new_insn->val.name, &len);
1266
0
          new_insn->val.u64 = ir_strl(&new_ctx, str, len);
1267
0
        }
1268
0
        new_insn++;
1269
0
        ref++;
1270
0
      }
1271
0
    }
1272
0
  } else {
1273
0
    new_ref = -new_ctx.consts_count;
1274
0
    new_insn = &new_ctx.ir_base[new_ref];
1275
0
    for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
1276
0
      if (!_xlat[ref]) {
1277
0
        continue;
1278
0
      }
1279
0
      new_insn->optx = insn->optx;
1280
0
      new_insn->prev_const = 0;
1281
0
      if (insn->op == IR_FUNC_ADDR) {
1282
0
        new_insn->val.u64 = insn->val.u64;
1283
0
        if (insn->proto) {
1284
0
          size_t len;
1285
0
          const char *proto = ir_get_strl(ctx, insn->proto, &len);
1286
0
          new_insn->proto = ir_strl(&new_ctx, proto, len);
1287
0
        } else {
1288
0
          new_insn->proto = 0;
1289
0
        }
1290
0
      } else if (insn->op == IR_FUNC) {
1291
0
        size_t len;
1292
0
        const char *name = ir_get_strl(ctx, insn->val.name, &len);
1293
0
        new_insn->val.u64 = ir_strl(&new_ctx, name, len);
1294
0
        if (insn->proto) {
1295
0
          const char *proto = ir_get_strl(ctx, insn->proto, &len);
1296
0
          new_insn->proto = ir_strl(&new_ctx, proto, len);
1297
0
        } else {
1298
0
          new_insn->proto = 0;
1299
0
        }
1300
0
      } else if (insn->op == IR_SYM || insn->op == IR_STR || insn->op == IR_LABEL) {
1301
0
        size_t len;
1302
0
        const char *str = ir_get_strl(ctx, insn->val.name, &len);
1303
0
        new_insn->val.u64 = ir_strl(&new_ctx, str, len);
1304
0
      } else {
1305
0
        new_insn->val.u64 = insn->val.u64;
1306
0
      }
1307
0
      _xlat[ref] = new_ref;
1308
0
      new_ref--;
1309
0
      new_insn--;
1310
0
    }
1311
0
    new_ctx.consts_count = -new_ref;
1312
0
  }
1313
1314
0
  new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
1315
0
  new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
1316
0
  new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
1317
0
  new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
1318
1319
  /* Copy instructions, use lists and use edges */
1320
0
  prev_ref = 0;
1321
0
  use_edges_count = 0;
1322
0
  for (i = 1; i != 0; i = _next[i]) {
1323
0
    new_ref = _xlat[i];
1324
0
    new_ctx.cfg_map[new_ref] = _blocks[i];
1325
0
    _prev[new_ref] = prev_ref;
1326
0
    prev_ref = new_ref;
1327
1328
0
    use_list = &ctx->use_lists[i];
1329
0
    n = use_list->count;
1330
0
    k = 0;
1331
0
    if (n == 1) {
1332
0
      ref = ctx->use_edges[use_list->refs];
1333
0
      if (_xlat[ref]) {
1334
0
        *edges = _xlat[ref];
1335
0
        edges++;
1336
0
        k = 1;
1337
0
      }
1338
0
    } else {
1339
0
      p = &ctx->use_edges[use_list->refs];
1340
0
      while (n--) {
1341
0
        ref = *p;
1342
0
        if (_xlat[ref]) {
1343
0
          *edges = _xlat[ref];
1344
0
          edges++;
1345
0
          k++;
1346
0
        }
1347
0
        p++;
1348
0
      }
1349
0
    }
1350
0
    new_list = &lists[new_ref];
1351
0
    new_list->refs = use_edges_count;
1352
0
    use_edges_count += k;
1353
0
    new_list->count = k;
1354
1355
0
    insn = &ctx->ir_base[i];
1356
0
    new_insn = &new_ctx.ir_base[new_ref];
1357
1358
0
    new_insn->optx = insn->optx;
1359
0
    n = new_insn->inputs_count;
1360
0
    switch (n) {
1361
0
      case 0:
1362
0
        new_insn->op1 = insn->op1;
1363
0
        new_insn->op2 = insn->op2;
1364
0
        new_insn->op3 = insn->op3;
1365
0
        break;
1366
0
      case 1:
1367
0
        new_insn->op1 = _xlat[insn->op1];
1368
0
        if (new_insn->op == IR_PARAM || new_insn->op == IR_VAR || new_insn->op == IR_PROTO) {
1369
0
          size_t len;
1370
0
          const char *str = ir_get_strl(ctx, insn->op2, &len);
1371
0
          new_insn->op2 = ir_strl(&new_ctx, str, len);
1372
0
        } else if (new_insn->op == IR_BEGIN && insn->op2) {
1373
0
          new_insn->op2 = _xlat[insn->op2];
1374
0
        } else {
1375
0
          new_insn->op2 = insn->op2;
1376
0
        }
1377
0
        new_insn->op3 = insn->op3;
1378
0
        break;
1379
0
      case 2:
1380
0
        new_insn->op1 = _xlat[insn->op1];
1381
0
        new_insn->op2 = _xlat[insn->op2];
1382
0
        new_insn->op3 = insn->op3;
1383
0
#if IR_SCHEDULE_SWAP_OPS
1384
        /* Swap operands according to folding rules */
1385
0
        if (new_insn->op1 < new_insn->op2) {
1386
0
          switch (new_insn->op) {
1387
0
            case IR_EQ:
1388
0
            case IR_NE:
1389
0
            case IR_ORDERED:
1390
0
            case IR_UNORDERED:
1391
0
            case IR_ADD:
1392
0
            case IR_MUL:
1393
0
            case IR_ADD_OV:
1394
0
            case IR_MUL_OV:
1395
0
            case IR_OR:
1396
0
            case IR_AND:
1397
0
            case IR_XOR:
1398
0
            case IR_MIN:
1399
0
            case IR_MAX:
1400
0
              SWAP_REFS(new_insn->op1, new_insn->op2);
1401
0
              break;
1402
0
            case IR_LT:
1403
0
            case IR_GE:
1404
0
            case IR_LE:
1405
0
            case IR_GT:
1406
0
            case IR_ULT:
1407
0
            case IR_UGE:
1408
0
            case IR_ULE:
1409
0
            case IR_UGT:
1410
0
              SWAP_REFS(new_insn->op1, new_insn->op2);
1411
0
              new_insn->op ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */
1412
0
              break;
1413
0
          }
1414
0
        }
1415
0
#endif
1416
0
        break;
1417
0
      case 3:
1418
0
        new_insn->op1 = _xlat[insn->op1];
1419
0
        new_insn->op2 = _xlat[insn->op2];
1420
0
        new_insn->op3 = _xlat[insn->op3];
1421
0
        break;
1422
0
      default:
1423
0
        for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) {
1424
0
          *q = _xlat[*p];
1425
0
        }
1426
0
        break;
1427
0
    }
1428
0
  }
1429
1430
  /* Update list of terminators (IR_OPND_CONTROL_REF) */
1431
0
  insn = &new_ctx.ir_base[1];
1432
0
  ref = insn->op1;
1433
0
  if (ref) {
1434
0
    insn->op1 = ref = _xlat[ref];
1435
0
    while (1) {
1436
0
      insn = &new_ctx.ir_base[ref];
1437
0
      ref = insn->op3;
1438
0
      if (!ref) {
1439
0
        break;
1440
0
      }
1441
0
      insn->op3 = ref = _xlat[ref];
1442
0
    }
1443
0
  }
1444
1445
0
  IR_ASSERT(ctx->use_edges_count >= use_edges_count);
1446
0
  new_ctx.use_edges_count = use_edges_count;
1447
0
  new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
1448
1449
0
  if (ctx->binding) {
1450
0
    ir_xlat_binding(ctx, _xlat);
1451
0
    new_ctx.binding = ctx->binding;
1452
0
    ctx->binding = NULL;
1453
0
  }
1454
1455
0
  _xlat -= ctx->consts_count;
1456
0
  ir_mem_free(_xlat);
1457
1458
0
  new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
1459
0
  new_ctx.cfg_edges_count = ctx->cfg_edges_count;
1460
0
  new_ctx.cfg_blocks = ctx->cfg_blocks;
1461
0
  new_ctx.cfg_edges = ctx->cfg_edges;
1462
0
  ctx->cfg_blocks = NULL;
1463
0
  ctx->cfg_edges = NULL;
1464
0
  ctx->value_params = NULL;
1465
0
  ir_code_buffer *saved_code_buffer = ctx->code_buffer;
1466
1467
0
  ir_free(ctx);
1468
0
  IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
1469
0
  IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
1470
0
  memcpy(ctx, &new_ctx, sizeof(ir_ctx));
1471
0
  ctx->code_buffer = saved_code_buffer;
1472
0
  ctx->flags2 |= IR_LINEAR;
1473
1474
0
  ir_mem_free(_next);
1475
1476
0
  return 1;
1477
0
}
1478
1479
void ir_build_prev_refs(ir_ctx *ctx)
1480
0
{
1481
0
  uint32_t b;
1482
0
  ir_block *bb;
1483
0
  ir_ref i, n, prev;
1484
0
  ir_insn *insn;
1485
1486
0
  ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
1487
0
  prev = 0;
1488
0
  for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
1489
0
    IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1490
0
    for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) {
1491
0
      ctx->prev_ref[i] = prev;
1492
0
      n = ir_insn_len(insn);
1493
0
      prev = i;
1494
0
      i += n;
1495
0
      insn += n;
1496
0
    }
1497
0
    ctx->prev_ref[i] = prev;
1498
0
  }
1499
0
}