Coverage Report

Created: 2025-09-27 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_sccp.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (SCCP - Sparse Conditional Constant Propagation)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 *
7
 * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication
8
 * See:  M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches"
9
 * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991
10
 */
11
12
#include "ir.h"
13
#include "ir_private.h"
14
15
#define IR_COMBO_COPY_PROPAGATION 1
16
17
0
#define IR_TOP                  IR_UNUSED
18
0
#define IR_BOTTOM               IR_LAST_OP
19
20
#define IR_MAKE_TOP(ref)        do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
21
0
#define IR_MAKE_BOTTOM(ref)     do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
22
23
0
#define IR_IS_TOP(ref)          (ref >= 0 && _values[ref].op == IR_TOP)
24
0
#define IR_IS_BOTTOM(ref)       (ref >= 0 && _values[ref].op == IR_BOTTOM)
25
0
#define IR_IS_REACHABLE(ref)    _ir_is_reachable_ctrl(ctx, _values, ref)
26
0
#define IR_IS_CONST(ref)        (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op))
27
28
IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref)
29
0
{
30
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
31
0
  IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL);
32
0
  return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */
33
0
}
34
35
IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
36
0
{
37
0
  ir_use_list *use_list;
38
0
  ir_ref n, *p, use;
39
40
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
41
0
  use_list = &ctx->use_lists[ref];
42
0
  n = use_list->count;
43
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
44
0
    use = *p;
45
0
    if (_values[use].op != IR_BOTTOM) {
46
0
      ir_bitqueue_add(worklist, use);
47
0
    }
48
0
  }
49
0
}
50
51
IR_ALWAYS_INLINE void ir_sccp_add_input(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
52
0
{
53
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
54
0
  IR_ASSERT(_values[ref].op == IR_TOP);
55
  /* do backward propagaton only once */
56
0
  if (!_values[ref].op1) {
57
0
    _values[ref].op1 = 1;
58
0
    ir_bitqueue_add(worklist, ref);
59
0
  }
60
0
}
61
62
#if IR_COMBO_COPY_PROPAGATION
63
IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a)
64
0
{
65
0
  if (a > 0 && _values[a].op == IR_COPY) {
66
0
    do {
67
0
      a = _values[a].op1;
68
0
      IR_ASSERT(a > 0);
69
0
    } while (_values[a].op == IR_COPY);
70
0
    IR_ASSERT(_values[a].op == IR_BOTTOM);
71
0
  }
72
0
  return a;
73
0
}
74
75
#if 0
76
static void CHECK_LIST(ir_insn *_values, ir_ref ref)
77
{
78
  ir_ref member = _values[ref].op2;
79
  while (member != ref) {
80
    IR_ASSERT(_values[_values[member].op2].op3 == member);
81
    member = _values[member].op2;
82
  }
83
  IR_ASSERT(_values[_values[ref].op2].op3 == ref);
84
}
85
#else
86
# define CHECK_LIST(_values, ref)
87
#endif
88
89
static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst)
90
0
{
91
0
  IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY);
92
0
  IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)));
93
0
  IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst);
94
95
0
  _values[dst].optx = IR_COPY;
96
0
  _values[dst].op1 = src;
97
98
0
  if (_values[src].op == IR_BOTTOM) {
99
    /* initialize empty double-linked list */
100
0
    if (_values[src].op1 != src) {
101
0
      _values[src].op1 = src;
102
0
      _values[src].op2 = src;
103
0
      _values[src].op3 = src;
104
0
    }
105
0
  } else {
106
0
    src = ir_sccp_identity(ctx, _values, src);
107
0
  }
108
109
  /* insert into circular double-linked list */
110
0
  ir_ref prev = _values[src].op3;
111
0
  _values[dst].op2 = src;
112
0
  _values[dst].op3 = prev;
113
0
  _values[src].op3 = dst;
114
0
  _values[prev].op2 = dst;
115
0
  CHECK_LIST(_values, dst);
116
0
}
117
118
static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
119
0
{
120
0
  ir_ref member, head, tail, next, prev;
121
122
0
  CHECK_LIST(_values, ref);
123
0
  IR_MAKE_BOTTOM(ref);
124
0
  _values[ref].op1 = ref;
125
126
0
  member = _values[ref].op2;
127
0
  head = tail = IR_UNUSED;
128
0
  while (member != ref) {
129
0
    if (_values[member].op != IR_BOTTOM) {
130
0
      ir_bitqueue_add(worklist, member);
131
0
    }
132
0
    ir_sccp_add_uses(ctx, _values, worklist, member);
133
134
0
    next = _values[member].op2;
135
0
    if (ir_sccp_identity(ctx, _values, member) == ref) {
136
      /* remove "member" from the old circular double-linked list */
137
0
      prev = _values[member].op3;
138
0
      _values[prev].op2 = next;
139
0
      _values[next].op3 = prev;
140
141
      /* insert "member" into the new double-linked list */
142
0
      if (!head) {
143
0
        head = tail = member;
144
0
      } else {
145
0
        _values[tail].op2 = member;
146
0
        _values[member].op3 = tail;
147
0
        tail = member;
148
0
      }
149
0
    }
150
0
    member = next;
151
0
  }
152
153
  /* remove "ref" from the old circular double-linked list */
154
0
  next = _values[ref].op2;
155
0
  prev = _values[ref].op3;
156
0
  _values[prev].op2 = next;
157
0
  _values[next].op3 = prev;
158
0
  CHECK_LIST(_values, next);
159
160
  /* close the new circle */
161
0
  if (head) {
162
0
    _values[ref].op2 = head;
163
0
    _values[ref].op3 = tail;
164
0
    _values[tail].op2 = ref;
165
0
    _values[head].op3 = ref;
166
0
  } else {
167
0
    _values[ref].op2 = ref;
168
0
    _values[ref].op3 = ref;
169
0
  }
170
0
  CHECK_LIST(_values, ref);
171
0
}
172
173
IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
174
0
{
175
0
  if (_values[ref].op == IR_COPY) {
176
0
    ir_sccp_split_partition(ctx, _values, worklist, ref);
177
0
  } else {
178
0
    IR_MAKE_BOTTOM(ref);
179
0
  }
180
0
}
181
182
0
# define IR_MAKE_BOTTOM_EX(ref) ir_sccp_make_bottom_ex(ctx, _values, worklist, ref)
183
#else
184
# define ir_sccp_identity(_ctx, _values, ref) (ref)
185
# define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref)
186
#endif
187
188
IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn)
189
0
{
190
0
  IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op));
191
192
0
  if (_values[ref].op == IR_TOP) {
193
    /* TOP meet NEW_CONST => NEW_CONST */
194
0
    _values[ref].optx = val_insn->opt;
195
0
    _values[ref].val.u64 = val_insn->val.u64;
196
0
    return 1;
197
0
  } else if (_values[ref].opt == val_insn->opt) {
198
    /* OLD_CONST meet NEW_CONST => (OLD_CONST == NEW_CONST) ? OLD_CONST : BOTTOM */
199
0
    if (_values[ref].val.u64 == val_insn->val.u64) {
200
0
      return 0;
201
0
    }
202
0
  }
203
204
0
  IR_MAKE_BOTTOM_EX(ref);
205
0
  return 1;
206
0
}
207
208
IR_ALWAYS_INLINE bool ir_sccp_meet(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
209
0
{
210
0
  ir_ref val_identity = ir_sccp_identity(ctx, _values, val);
211
0
  ir_insn *val_insn;
212
213
0
  if (IR_IS_CONST_REF(val_identity)) {
214
0
    val_insn = &ctx->ir_base[val_identity];
215
0
  } else {
216
0
    val_insn = &_values[val_identity];
217
218
0
    if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) {
219
0
#if IR_COMBO_COPY_PROPAGATION
220
0
      if (_values[ref].op == IR_COPY) {
221
        /* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
222
         *   (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
223
0
        if (ir_sccp_identity(ctx, _values, ref) == val_identity) {
224
0
          return 0; /* not changed */
225
0
        }
226
0
        ir_sccp_split_partition(ctx, _values, worklist, ref);
227
0
        return 1;
228
0
      } else {
229
0
        IR_ASSERT(_values[ref].op != IR_BOTTOM);
230
        /* TOP       meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
231
        /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
232
0
        ir_sccp_add_identity(ctx, _values, val, ref);
233
0
        return 1;
234
0
      }
235
0
#endif
236
237
0
      IR_MAKE_BOTTOM(ref);
238
0
      return 1;
239
0
    }
240
0
  }
241
242
0
  return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn);
243
0
}
244
245
static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *insn)
246
0
{
247
0
  ir_insn *op1_insn, *op2_insn, *op3_insn;
248
0
  ir_ref op1, op2, op3, copy;
249
0
  uint32_t opt = insn->opt;
250
251
0
  op1 = ir_sccp_identity(ctx, _values, insn->op1);
252
0
  op2 = ir_sccp_identity(ctx, _values, insn->op2);
253
0
  op3 = ir_sccp_identity(ctx, _values, insn->op3);
254
255
0
restart:
256
0
  op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
257
0
  op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
258
0
  op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
259
260
0
  switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
261
0
    case IR_FOLD_DO_RESTART:
262
0
      opt = ctx->fold_insn.optx;
263
0
      op1 = ctx->fold_insn.op1;
264
0
      op2 = ctx->fold_insn.op2;
265
0
      op3 = ctx->fold_insn.op3;
266
0
      goto restart;
267
0
    case IR_FOLD_DO_CSE:
268
0
    case IR_FOLD_DO_EMIT:
269
0
      IR_MAKE_BOTTOM_EX(ref);
270
0
      return 1;
271
0
    case IR_FOLD_DO_COPY:
272
0
      copy = ctx->fold_insn.op1;
273
0
      return ir_sccp_meet(ctx, _values, worklist, ref, copy);
274
0
    case IR_FOLD_DO_CONST:
275
0
      return ir_sccp_meet_const(ctx, _values, worklist, ref, &ctx->fold_insn);
276
0
    default:
277
0
      IR_ASSERT(0);
278
0
      return 0;
279
0
  }
280
0
}
281
282
static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref i, ir_insn *insn)
283
0
{
284
0
  ir_ref j, n, input, *merge_input, *p;
285
0
  ir_insn *v, *new_const = NULL;
286
0
#if IR_COMBO_COPY_PROPAGATION
287
0
  ir_ref new_copy = IR_UNUSED;
288
0
  ir_ref new_copy_identity = IR_UNUSED;
289
0
  ir_ref phi_identity = ir_sccp_identity(ctx, _values, i);
290
0
#endif
291
292
0
  if (!IR_IS_REACHABLE(insn->op1)) {
293
0
    return 0;
294
0
  }
295
0
  n = insn->inputs_count;
296
0
  if (n > 3 && _values[i].op == IR_TOP) {
297
0
    for (j = 0; j < (n>>2); j++) {
298
0
      _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
299
0
    }
300
0
  }
301
302
0
  p = insn->ops + 2;
303
0
  merge_input = ctx->ir_base[insn->op1].ops + 1;
304
0
  for (; --n > 0; p++, merge_input++) {
305
0
    IR_ASSERT(*merge_input > 0);
306
0
    if (!IR_IS_REACHABLE(*merge_input)) {
307
0
      continue;
308
0
    }
309
310
0
    input = *p;
311
0
    if (IR_IS_CONST_REF(input)) {
312
0
      v = &ctx->ir_base[input];
313
0
    } else if (input == i) {
314
0
      continue;
315
0
    } else {
316
0
      v = &_values[input];
317
0
      if (v->op == IR_TOP) {
318
0
        ir_sccp_add_input(ctx, _values, worklist, input);
319
0
        continue;
320
0
#if IR_COMBO_COPY_PROPAGATION
321
0
      } else if (v->op == IR_COPY) {
322
0
        input = v->op1;
323
0
        new_copy_identity = ir_sccp_identity(ctx, _values, input);
324
0
        if (new_copy_identity == phi_identity) {
325
0
          new_copy_identity = IR_UNUSED;
326
0
          continue;
327
0
        }
328
0
        new_copy = input;
329
0
        goto next;
330
0
#endif
331
0
      } else if (v->op == IR_BOTTOM) {
332
0
#if IR_COMBO_COPY_PROPAGATION
333
0
        if (input == phi_identity) {
334
0
          continue;
335
0
        }
336
0
        new_copy = new_copy_identity = input;
337
0
        goto next;
338
#else
339
        goto make_bottom;
340
#endif
341
0
      }
342
0
    }
343
0
    new_const = v;
344
0
    goto next;
345
0
  }
346
347
0
  return 0;
348
349
0
next:
350
0
  p++;
351
0
  merge_input++;
352
  /* for all live merge inputs */
353
0
  for (; --n > 0; p++, merge_input++) {
354
0
    IR_ASSERT(*merge_input > 0);
355
0
    if (!IR_IS_REACHABLE(*merge_input)) {
356
0
      continue;
357
0
    }
358
359
0
    input = *p;
360
0
    if (IR_IS_CONST_REF(input)) {
361
0
#if IR_COMBO_COPY_PROPAGATION
362
0
      if (new_copy) {
363
0
        goto make_bottom;
364
0
      }
365
0
#endif
366
0
      v = &ctx->ir_base[input];
367
0
    } else if (input == i) {
368
0
      continue;
369
0
    } else {
370
0
      v = &_values[input];
371
0
      if (v->op == IR_TOP) {
372
0
        ir_sccp_add_input(ctx, _values, worklist, input);
373
0
        continue;
374
0
#if IR_COMBO_COPY_PROPAGATION
375
0
      } else if (v->op == IR_COPY) {
376
0
        ir_ref identity = ir_sccp_identity(ctx, _values, v->op1);
377
378
0
        if (identity == phi_identity || identity == new_copy_identity) {
379
0
          continue;
380
0
        }
381
0
        goto make_bottom;
382
0
#endif
383
0
      } else if (v->op == IR_BOTTOM) {
384
0
#if IR_COMBO_COPY_PROPAGATION
385
0
        if (input  == phi_identity || input == new_copy_identity) {
386
0
          continue;
387
0
        }
388
0
#endif
389
0
        goto make_bottom;
390
0
      }
391
0
    }
392
0
    if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) {
393
0
      goto make_bottom;
394
0
    }
395
0
  }
396
397
0
#if IR_COMBO_COPY_PROPAGATION
398
0
  if (new_copy) {
399
0
    return ir_sccp_meet(ctx, _values, worklist, i, new_copy);
400
0
  }
401
0
#endif
402
403
0
  return ir_sccp_meet_const(ctx, _values, worklist, i, new_const);
404
405
0
make_bottom:
406
0
  IR_MAKE_BOTTOM_EX(i);
407
0
  return 1;
408
0
}
409
410
static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn)
411
0
{
412
0
  if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
413
0
    return ctx->use_lists[ref].count == 1;
414
0
  } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) {
415
0
    return ctx->use_lists[ref].count == 1;
416
0
  }
417
0
  return 0;
418
0
}
419
420
static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
421
0
{
422
0
  if (ctx->use_lists[ref].count == 1) {
423
0
    uint32_t flags = ir_op_flags[ctx->ir_base[ref].op];
424
425
0
    if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
426
0
      return 1;
427
0
    } else if (ctx->ir_base[ref].op == IR_ALLOCA) {
428
0
      return 1;
429
0
    }
430
0
  }
431
0
  return 0;
432
0
}
433
434
static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
435
0
{
436
0
  if (ctx->use_lists[ref].count == 0) {
437
0
    return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
438
0
  } else {
439
0
    return ir_is_dead_load(ctx, ref);
440
0
  }
441
0
  return 0;
442
0
}
443
444
static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
445
0
{
446
0
  ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
447
448
0
  return ir_const_is_true(v);
449
0
}
450
451
static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
452
0
{
453
0
  ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
454
0
  ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
455
456
0
  IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
457
0
  IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
458
0
  return v1->val.u64 == v2->val.u64;
459
0
}
460
461
static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b, ir_ref c)
462
0
{
463
0
  ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
464
0
  ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
465
0
  ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c];
466
467
0
  IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
468
0
  IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
469
0
  IR_ASSERT(!IR_IS_SYM_CONST(v3->op));
470
0
  if (IR_IS_TYPE_SIGNED(v1->type)) {
471
0
    return v1->val.i64 >= v2->val.i64 && v1->val.i64 <= v3->val.i64;
472
0
  } else {
473
0
    return v1->val.u64 >= v2->val.u64 && v1->val.u64 <= v3->val.u64;
474
0
    }
475
0
}
476
477
#ifdef IR_SCCP_TRACE
478
static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
479
{
480
  if (IR_IS_BOTTOM(i)) {
481
    fprintf(stderr, "BOTTOM");
482
  } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
483
    fprintf(stderr, "CONST(");
484
    ir_print_const(ctx, &_values[i], stderr, true);
485
    fprintf(stderr, ")");
486
#if IR_COMBO_COPY_PROPAGATION
487
  } else if (_values[i].op == IR_COPY) {
488
    fprintf(stderr, "COPY(%d)", _values[i].op1);
489
#endif
490
  } else if (IR_IS_TOP(i)) {
491
    fprintf(stderr, "TOP");
492
  } else if (_values[i].op == IR_IF) {
493
    fprintf(stderr, "IF(%d)", _values[i].op1);
494
  } else if (_values[i].op == IR_MERGE) {
495
    fprintf(stderr, "MERGE(%d)", _values[i].op1);
496
  } else {
497
    fprintf(stderr, "%d", _values[i].op);
498
  }
499
}
500
501
static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i)
502
{
503
  fprintf(stderr, "%d. ", i);
504
  ir_sccp_trace_val(ctx, _values, i);
505
}
506
507
static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
508
{
509
  fprintf(stderr, " -> ");
510
  ir_sccp_trace_val(ctx, _values, i);
511
  fprintf(stderr, "\n");
512
}
513
#else
514
# define ir_sccp_trace_start(c, v, i)
515
# define ir_sccp_trace_end(c, v, i)
516
#endif
517
518
static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
519
0
{
520
0
  ir_ref i, j, n, *p, use;
521
0
  ir_use_list *use_list;
522
0
  ir_insn *insn, *use_insn;
523
0
  uint32_t flags;
524
525
  /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
526
0
  worklist->pos = 0;
527
0
  ir_bitset_incl(worklist->set, 1);
528
0
  for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) {
529
0
    IR_ASSERT(_values[i].op != IR_BOTTOM);
530
0
    ir_sccp_trace_start(ctx, _values, i);
531
0
    insn = &ctx->ir_base[i];
532
0
    flags = ir_op_flags[insn->op];
533
0
    if (flags & IR_OP_FLAG_DATA) {
534
0
      if (ctx->use_lists[i].count == 0) {
535
        /* dead code */
536
0
        continue;
537
0
      } else if (insn->op == IR_PHI) {
538
0
        if (!ir_sccp_analyze_phi(ctx, _values, worklist, i, insn)) {
539
0
          continue;
540
0
        }
541
0
      } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) {
542
0
        bool may_benefit = 0;
543
0
        bool has_top = 0;
544
545
0
        if (_values[i].op != IR_TOP) {
546
0
          may_benefit = 1;
547
0
        }
548
549
0
        IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
550
0
        n = IR_INPUT_EDGES_COUNT(flags);
551
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
552
0
          ir_ref input = *p;
553
0
          if (input > 0) {
554
0
            if (_values[input].op == IR_TOP) {
555
0
              has_top = 1;
556
0
              ir_sccp_add_input(ctx, _values, worklist, input);
557
0
            } else if (_values[input].op != IR_BOTTOM) {
558
              /* Perform folding only if some of direct inputs
559
               * is going to be replaced by a constant or copy.
560
               * This approach may miss some folding optimizations
561
               * dependent on indirect inputs. e.g. reassociation.
562
               */
563
0
              may_benefit = 1;
564
0
            }
565
0
          }
566
0
        }
567
0
        if (has_top) {
568
0
          continue;
569
0
        }
570
0
        if (!may_benefit) {
571
0
          IR_MAKE_BOTTOM_EX(i);
572
0
          if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
573
0
            ir_bitqueue_add(iter_worklist, i);
574
0
          }
575
0
        } else if (!ir_sccp_fold(ctx, _values, worklist, i, insn)) {
576
          /* not changed */
577
0
          continue;
578
0
        } else if (_values[i].op == IR_BOTTOM) {
579
0
          insn = &ctx->ir_base[i];
580
0
          if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
581
0
            ir_bitqueue_add(iter_worklist, i);
582
0
          }
583
0
        }
584
0
      } else {
585
0
        IR_MAKE_BOTTOM_EX(i);
586
0
      }
587
0
    } else if (flags & IR_OP_FLAG_BB_START) {
588
0
      if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_BEGIN) {
589
0
        ir_bitqueue_add(iter_worklist, i);
590
0
      }
591
0
      if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
592
0
        ir_ref unfeasible_inputs = 0;
593
594
0
        n = insn->inputs_count;
595
0
        if (n > 3 && _values[i].op == IR_TOP) {
596
0
          for (j = 0; j < (n>>2); j++) {
597
0
            _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
598
0
          }
599
0
        }
600
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
601
0
          ir_ref input = *p;
602
0
          IR_ASSERT(input > 0);
603
0
          if (!IR_IS_REACHABLE(input)) {
604
0
            unfeasible_inputs++;
605
0
          }
606
0
        }
607
0
        if (unfeasible_inputs == 0) {
608
0
          IR_MAKE_BOTTOM(i);
609
0
        } else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) {
610
0
          _values[i].optx = IR_MERGE;
611
0
          _values[i].op1 = unfeasible_inputs;
612
0
        } else {
613
0
          continue;
614
0
        }
615
0
        if (ctx->flags2 & IR_MEM2SSA_VARS) {
616
          /* MEM2SSA puts new PHI at the bottom, but we like to process them now */
617
0
          use_list = &ctx->use_lists[i];
618
0
          n = use_list->count;
619
0
          for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
620
0
            use = *p;
621
0
            if (_values[use].op != IR_BOTTOM) {
622
0
              if (ctx->ir_base[use].op == IR_PHI) {
623
0
                ir_bitqueue_del(worklist, use);
624
0
                if (ctx->use_lists[use].count != 0) {
625
0
                  if (ir_sccp_analyze_phi(ctx, _values, worklist, use, &ctx->ir_base[use])) {
626
0
                    ir_sccp_add_uses(ctx, _values, worklist, use);
627
0
                  }
628
0
                }
629
0
              } else {
630
0
                ir_bitqueue_add(worklist, use);
631
0
              }
632
0
            }
633
0
          }
634
0
          continue;
635
0
        }
636
0
      } else {
637
0
        IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1));
638
0
        IR_MAKE_BOTTOM(i);
639
0
      }
640
0
    } else {
641
0
      IR_ASSERT(insn->op1 > 0);
642
0
      if (!IR_IS_REACHABLE(insn->op1)) {
643
        /* control inpt is not feasible */
644
0
        continue;
645
0
      }
646
0
      if (insn->op == IR_IF) {
647
0
        if (IR_IS_TOP(insn->op2)) {
648
0
          ir_sccp_add_input(ctx, _values, worklist, insn->op2);
649
0
          continue;
650
0
        }
651
0
        if (IR_IS_CONST(insn->op2)) {
652
0
          bool b = ir_sccp_is_true(ctx, _values, insn->op2);
653
0
          use_list = &ctx->use_lists[i];
654
0
          IR_ASSERT(use_list->count == 2);
655
0
          p = &ctx->use_edges[use_list->refs];
656
0
          use = *p;
657
0
          use_insn = &ctx->ir_base[use];
658
0
          IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE);
659
0
          if ((use_insn->op == IR_IF_TRUE) != b) {
660
0
            use = *(p+1);
661
0
            IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE);
662
0
          }
663
0
          if (_values[i].op == IR_TOP) {
664
0
            _values[i].optx = IR_IF;
665
0
            _values[i].op1 = use;
666
0
            ir_bitqueue_add(worklist, use);
667
0
            continue;
668
0
          } else if (_values[i].op == IR_IF && _values[i].op1 == use) {
669
0
            continue;
670
0
          }
671
0
        }
672
0
        IR_MAKE_BOTTOM(i);
673
0
        ir_bitqueue_add(iter_worklist, i);
674
0
      } else if (insn->op == IR_SWITCH) {
675
0
        if (IR_IS_TOP(insn->op2)) {
676
0
          ir_sccp_add_input(ctx, _values, worklist, insn->op2);
677
0
          continue;
678
0
        }
679
0
        if (IR_IS_CONST(insn->op2)) {
680
0
          ir_ref use_case = IR_UNUSED;
681
682
0
          use_list = &ctx->use_lists[i];
683
0
          n = use_list->count;
684
0
          for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
685
0
            use = *p;
686
0
            IR_ASSERT(use > 0);
687
0
            use_insn = &ctx->ir_base[use];
688
0
            if (use_insn->op == IR_CASE_VAL) {
689
0
              if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) {
690
0
                use_case = use;
691
0
                break;
692
0
              }
693
0
            } else if (use_insn->op == IR_CASE_DEFAULT) {
694
0
              use_case = use;
695
0
            } else if (use_insn->op == IR_CASE_RANGE) {
696
0
              if (ir_sccp_in_range(ctx, _values, insn->op2, use_insn->op2, use_insn->op3)) {
697
0
                use_case = use;
698
0
                break;
699
0
              }
700
0
            }
701
0
          }
702
0
          if (use_case) {
703
0
            use_insn = &ctx->ir_base[use_case];
704
0
            if (_values[i].op == IR_TOP) {
705
0
              _values[i].optx = IR_IF;
706
0
              _values[i].op1 = use_case;
707
0
              ir_bitqueue_add(worklist, use_case);
708
0
              continue;
709
0
            } else if (_values[i].op == IR_IF || _values[i].op1 == use_case) {
710
0
              continue;
711
0
            }
712
0
          }
713
0
        }
714
0
        IR_MAKE_BOTTOM(i);
715
0
      } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) {
716
        /* schedule dead load elimination */
717
0
        ir_bitqueue_add(iter_worklist, i);
718
0
        IR_MAKE_BOTTOM(i);
719
0
      } else {
720
0
        if (_values[i].op == IR_TOP) {
721
0
          bool has_top = 0;
722
723
          /* control, call, load and store instructions may have unprocessed inputs */
724
0
          n = IR_INPUT_EDGES_COUNT(flags);
725
0
          if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) {
726
0
            for (j = 0; j < (n>>2); j++) {
727
0
              _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
728
0
            }
729
0
            for (j = 2, p = insn->ops + j; j <= n; j++, p++) {
730
0
              IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
731
0
              use = *p;
732
0
              if (use > 0 && _values[use].op == IR_TOP) {
733
0
                has_top = 1;
734
0
                ir_sccp_add_input(ctx, _values, worklist, use);
735
0
              }
736
0
            }
737
0
          } else if (n >= 2) {
738
0
            IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA);
739
0
            use = insn->op2;
740
0
            if (use > 0 && _values[use].op == IR_TOP) {
741
0
              has_top = 1;
742
0
              ir_sccp_add_input(ctx, _values, worklist, use);
743
0
            }
744
0
            if (n > 2) {
745
0
              IR_ASSERT(n == 3);
746
0
              IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA);
747
0
              use = insn->op3;
748
0
              if (use > 0 && _values[use].op == IR_TOP) {
749
0
                has_top = 1;
750
0
                ir_sccp_add_input(ctx, _values, worklist, use);
751
0
              }
752
0
            }
753
0
          }
754
755
0
          if (has_top && !(flags & IR_OP_FLAG_BB_END)) {
756
0
            use = ir_next_control(ctx, i);
757
0
            if (_values[use].op == IR_TOP) {
758
0
              has_top = 1;
759
              /* do forward control propagaton only once */
760
0
              if (!_values[use].op1) {
761
0
                _values[use].op1 = 1;
762
0
                ir_bitqueue_add(worklist, use);
763
0
              }
764
0
            }
765
0
            continue;
766
0
          }
767
0
        }
768
0
        IR_MAKE_BOTTOM(i);
769
0
      }
770
0
    }
771
0
    ir_sccp_add_uses(ctx, _values, worklist, i);
772
0
  }
773
774
0
#ifdef IR_DEBUG
775
0
  if (ctx->flags & IR_DEBUG_SCCP) {
776
0
    for (i = 1; i < ctx->insns_count; i++) {
777
0
      if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
778
0
        fprintf(stderr, "%d. CONST(", i);
779
0
        ir_print_const(ctx, &_values[i], stderr, true);
780
0
        fprintf(stderr, ")\n");
781
0
#if IR_COMBO_COPY_PROPAGATION
782
0
      } else if (_values[i].op == IR_COPY) {
783
0
        fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
784
0
#endif
785
0
      } else if (IR_IS_TOP(i)) {
786
0
        fprintf(stderr, "%d. TOP\n", i);
787
0
      } else if (_values[i].op == IR_IF) {
788
0
        fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
789
0
      } else if (_values[i].op == IR_MERGE) {
790
0
        fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
791
0
      } else if (!IR_IS_BOTTOM(i)) {
792
0
        fprintf(stderr, "%d. %d\n", i, _values[i].op);
793
0
      }
794
0
    }
795
0
  }
796
0
#endif
797
0
}
798
799
/**********************/
800
/* SCCP trasformation */
801
/**********************/
802
803
static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
804
0
{
805
0
  ir_ref j, n, *p;
806
0
  ir_insn *insn;
807
808
0
  CLEAR_USES(ref);
809
0
  insn = &ctx->ir_base[ref];
810
0
  n = insn->inputs_count;
811
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
812
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
813
0
    *p = IR_UNUSED;
814
0
  }
815
0
}
816
817
static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
818
0
{
819
0
  ir_ref j, n, *p;
820
0
  ir_insn *insn;
821
822
0
  CLEAR_USES(ref);
823
0
  insn = &ctx->ir_base[ref];
824
0
  n = insn->inputs_count;
825
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
826
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
827
0
    ir_ref input = *p;
828
0
    *p = IR_UNUSED;
829
    /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
830
0
    if (input > 0 && _values[input].op > IR_COPY) {
831
0
      ir_use_list_remove_all(ctx, input, ref);
832
0
      if (ir_is_dead(ctx, input)) {
833
        /* schedule DCE */
834
0
        ir_bitqueue_add(worklist, input);
835
0
      }
836
0
    }
837
0
  }
838
0
}
839
840
static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
841
0
{
842
0
  ir_ref j, n, *p, use, i;
843
0
  ir_insn *insn;
844
0
  ir_use_list *use_list;
845
846
0
  IR_ASSERT(ref != new_ref);
847
848
0
  insn = &ctx->ir_base[ref];
849
850
0
#if IR_COMBO_COPY_PROPAGATION
851
0
  if ((ir_op_flags[insn->op] & IR_OP_FLAG_MEM) && IR_IS_REACHABLE(insn->op1)) {
852
    /* remove from control list */
853
0
    ir_ref prev = insn->op1;
854
0
    ir_ref next = ir_next_control(ctx, ref);
855
0
    ctx->ir_base[next].op1 = prev;
856
0
    ir_use_list_remove_one(ctx, ref, next);
857
0
    ir_use_list_replace_one(ctx, prev, ref, next);
858
0
    insn->op1 = IR_UNUSED;
859
0
  }
860
0
#endif
861
862
0
  n = insn->inputs_count;
863
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
864
0
  for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
865
0
    ir_ref input = *p;
866
0
    *p = IR_UNUSED;
867
    /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
868
0
    if (input > 0 && _values[input].op > IR_COPY) {
869
0
      ir_use_list_remove_all(ctx, input, ref);
870
0
      if (ir_is_dead(ctx, input)) {
871
        /* schedule DCE */
872
0
        ir_bitqueue_add(worklist, input);
873
0
      }
874
0
    }
875
0
  }
876
877
0
  use_list = &ctx->use_lists[ref];
878
0
  n = use_list->count;
879
0
  p = &ctx->use_edges[use_list->refs];
880
0
  if (new_ref <= 0) {
881
    /* constant or IR_UNUSED */
882
0
    for (; n; p++, n--) {
883
0
      use = *p;
884
      /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
885
0
      if (_values[use].op > IR_COPY) {
886
0
        insn = &ctx->ir_base[use];
887
0
        i = ir_insn_find_op(insn, ref);
888
0
        if (!i) continue;
889
0
        IR_ASSERT(i > 0);
890
0
        ir_insn_set_op(insn, i, new_ref);
891
        /* schedule folding */
892
0
        ir_bitqueue_add(worklist, use);
893
0
      }
894
0
    }
895
0
  } else {
896
0
    for (j = 0; j < n; j++, p++) {
897
0
      use = *p;
898
      /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
899
0
      if (_values[use].op == IR_BOTTOM) {
900
0
        insn = &ctx->ir_base[use];
901
0
        i = ir_insn_find_op(insn, ref);
902
0
        IR_ASSERT(i > 0);
903
0
        ir_insn_set_op(insn, i, new_ref);
904
0
        if (ir_use_list_add(ctx, new_ref, use)) {
905
          /* restore after reallocation */
906
0
          use_list = &ctx->use_lists[ref];
907
0
          n = use_list->count;
908
0
          p = &ctx->use_edges[use_list->refs + j];
909
0
        }
910
        /* schedule folding */
911
0
        ir_bitqueue_add(worklist, use);
912
0
      }
913
0
    }
914
0
  }
915
0
  CLEAR_USES(ref);
916
0
}
917
918
static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
919
0
{
920
0
  ir_ref next;
921
0
  ir_insn *insn, *next_insn;
922
923
0
  insn = &ctx->ir_base[ref];
924
0
  if (ctx->use_lists[dst].count == 1) {
925
0
    next = ctx->use_edges[ctx->use_lists[dst].refs];
926
0
    next_insn = &ctx->ir_base[next];
927
    /* remove IF and IF_TRUE/FALSE from double linked control list */
928
0
    next_insn->op1 = insn->op1;
929
0
    ir_use_list_replace_one(ctx, insn->op1, ref, next);
930
    /* remove IF and IF_TRUE/FALSE instructions */
931
0
    ir_sccp_make_nop(ctx, ref);
932
0
    ir_sccp_make_nop(ctx, dst);
933
0
  } else {
934
0
    insn->op2 = IR_UNUSED;
935
0
    insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
936
0
    next_insn = &ctx->ir_base[dst];
937
0
    next_insn->op = IR_BEGIN;
938
0
  }
939
0
}
940
941
static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
942
0
{
943
0
  ir_ref old_merge_inputs, new_merge_inputs, i, *p;
944
0
  ir_use_list *use_list;
945
0
  ir_bitset life_inputs;
946
0
  ir_bitset_base_t holder = 0;
947
948
0
  IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
949
0
  old_merge_inputs = insn->inputs_count;
950
0
  new_merge_inputs = 0;
951
0
  life_inputs = (old_merge_inputs < IR_BITSET_BITS) ? &holder : ir_bitset_malloc(old_merge_inputs + 1);
952
953
0
  for (i = 1; i <= old_merge_inputs; i++) {
954
0
    ir_ref input = ir_insn_op(insn, i);
955
956
0
    if (input) {
957
0
      new_merge_inputs++;
958
0
      if (new_merge_inputs != i) {
959
0
        ir_insn_set_op(insn, new_merge_inputs, input);
960
0
      }
961
0
      ir_bitset_incl(life_inputs, i);
962
0
    }
963
0
  }
964
965
0
  if (new_merge_inputs == old_merge_inputs) {
966
    /* All inputs are feasible */
967
0
    if (life_inputs != &holder) {
968
0
      ir_mem_free(life_inputs);
969
0
    }
970
0
    return 0;
971
0
  }
972
973
0
  for (i = new_merge_inputs + 1; i <= old_merge_inputs; i++) {
974
0
    ir_insn_set_op(insn, i, IR_UNUSED);
975
0
  }
976
977
0
  if (new_merge_inputs <= 1) {
978
#if 0
979
    if (new_merge_inputs == 1
980
     && insn->op == IR_LOOP_BEGIN
981
     && insn->op1 > ref) { // TODO: check dominance instead of order
982
      /* dead loop */
983
      ir_use_list_remove_one(ctx, insn->op1, ref);
984
      insn->op1 = IR_UNUSED;
985
      new_merge_inputs = 0;
986
    }
987
#endif
988
0
    insn->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
989
0
    ir_bitqueue_add(worklist, ref);
990
0
  } else {
991
0
    insn->inputs_count = new_merge_inputs;
992
0
  }
993
994
  /* Update PHIs */
995
0
  use_list = &ctx->use_lists[ref];
996
0
  if (use_list->count > 1) {
997
0
    ir_ref use_count = 0;
998
0
    ir_ref *q;
999
1000
0
    for (i = 0, p = q = &ctx->use_edges[use_list->refs]; i < use_list->count; p++, i++) {
1001
0
      ir_ref use = *p;
1002
0
      ir_insn *use_insn = &ctx->ir_base[use];
1003
1004
0
      if (use_insn->op == IR_PHI) {
1005
0
        ir_ref j, k;
1006
1007
        /* compress PHI */
1008
0
        for (j = k = 1; j <= old_merge_inputs; j++) {
1009
0
          ir_ref input = ir_insn_op(use_insn, j + 1);
1010
1011
0
          if (ir_bitset_in(life_inputs, j)) {
1012
0
            IR_ASSERT(input);
1013
0
            if (k != j) {
1014
0
              ir_insn_set_op(use_insn, k + 1, input);
1015
0
            }
1016
0
            k++;
1017
0
          } else if (input > 0) {
1018
0
            ir_use_list_remove_one(ctx, input, use);
1019
0
          }
1020
0
        }
1021
0
        while (k <= old_merge_inputs) {
1022
0
          k++;
1023
0
          ir_insn_set_op(use_insn, k, IR_UNUSED);
1024
0
        }
1025
1026
0
        if (new_merge_inputs == 0) {
1027
          /* remove PHI */
1028
#if 0
1029
          use_insn->op1 = IR_UNUSED;
1030
          ir_iter_remove_insn(ctx, use, worklist);
1031
#else
1032
0
          IR_ASSERT(0);
1033
0
#endif
1034
0
          continue;
1035
0
        } else if (new_merge_inputs == 1) {
1036
          /* replace PHI by COPY */
1037
0
          use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1);
1038
0
          use_insn->op1 = use_insn->op2;
1039
0
          use_insn->op2 = IR_UNUSED;
1040
0
          ir_bitqueue_add(worklist, use);
1041
0
          continue;
1042
0
        } else {
1043
0
          use_insn->inputs_count = new_merge_inputs + 1;
1044
0
        }
1045
0
      }
1046
0
      if (p != q) {
1047
0
        *q = use;
1048
0
      }
1049
0
      q++;
1050
0
      use_count++;
1051
0
    }
1052
0
    for (i = use_count; i < use_list->count; q++, i++) {
1053
0
      *q = IR_UNUSED;
1054
0
    }
1055
0
    use_list->count = use_count;
1056
0
  }
1057
1058
0
  if (life_inputs != &holder) {
1059
0
    ir_mem_free(life_inputs);
1060
0
  }
1061
1062
0
  return 1;
1063
0
}
1064
1065
static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
1066
0
{
1067
0
  ir_ref i, j;
1068
0
  ir_insn *value;
1069
1070
0
  for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
1071
0
    if (value->op == IR_BOTTOM) {
1072
0
      continue;
1073
0
    } else if (IR_IS_CONST_OP(value->op)) {
1074
      /* replace instruction by constant */
1075
0
      j = ir_const(ctx, value->val, value->type);
1076
0
      ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1077
0
    } else if (IR_IS_SYM_CONST(value->op)) {
1078
      /* replace instruction by constant */
1079
0
      j = ir_const_ex(ctx, value->val, value->type, value->optx);
1080
0
      ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1081
0
#if IR_COMBO_COPY_PROPAGATION
1082
0
    } else if (value->op == IR_COPY) {
1083
0
      ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->op1), iter_worklist);
1084
0
#endif
1085
0
    } else if (value->op == IR_TOP) {
1086
      /* remove unreachable instruction */
1087
0
      ir_insn *insn = &ctx->ir_base[i];
1088
1089
0
      if (insn->op == IR_NOP) {
1090
        /* already removed */
1091
0
      } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
1092
0
        if (insn->op != IR_PARAM) {
1093
0
          ir_sccp_remove_insn(ctx, _values, i, iter_worklist);
1094
0
        }
1095
0
      } else {
1096
0
        if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) {
1097
          /* remove from terminators list */
1098
0
          ir_ref prev = ctx->ir_base[1].op1;
1099
0
          if (prev == i) {
1100
0
            ctx->ir_base[1].op1 = insn->op3;
1101
0
          } else {
1102
0
            while (prev) {
1103
0
              if (ctx->ir_base[prev].op3 == i) {
1104
0
                ctx->ir_base[prev].op3 = insn->op3;
1105
0
                break;
1106
0
              }
1107
0
              prev = ctx->ir_base[prev].op3;
1108
0
            }
1109
0
          }
1110
0
        }
1111
0
        ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist);
1112
0
      }
1113
0
    } else if (value->op == IR_IF) {
1114
      /* remove one way IF/SWITCH */
1115
0
      ir_sccp_remove_if(ctx, _values, i, value->op1);
1116
0
    } else if (value->op == IR_MERGE) {
1117
      /* schedule merge to remove unfeasible MERGE inputs */
1118
0
      ir_bitqueue_add(worklist, i);
1119
0
    }
1120
0
  }
1121
1122
0
  while ((i = ir_bitqueue_pop(worklist)) >= 0) {
1123
0
    IR_ASSERT(_values[i].op == IR_MERGE);
1124
0
    ir_sccp_remove_unfeasible_merge_inputs(ctx, i, &ctx->ir_base[i], iter_worklist);
1125
0
  }
1126
0
}
1127
1128
/***************************/
1129
/* Iterative Optimizations */
1130
/***************************/
1131
1132
/* Modification of some instruction may open new optimization oprtunities for other
1133
 * instructions that use this one.
1134
 *
1135
 * For example, let "a = ADD(x, y)" became "a = ADD(x, C1)". In case we also have
1136
 * "b = ADD(a, C2)" we may optimize it into "b = ADD(x, C1 + C2)" and then might
1137
 * also remove "a".
1138
 *
1139
 * This implementation supports only few optimization of combinations from ir_fold.h
1140
 *
1141
 * TODO: Think abput a more general solution ???
1142
 */
1143
static void ir_iter_add_related_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1144
0
{
1145
0
  ir_insn *insn = &ctx->ir_base[ref];
1146
1147
0
  if (insn->op == IR_ADD || insn->op == IR_SUB) {
1148
0
    ir_use_list *use_list = &ctx->use_lists[ref];
1149
1150
0
    if (use_list->count == 1) {
1151
0
      ir_ref use = ctx->use_edges[use_list->refs];
1152
0
      ir_insn *use_insn = &ctx->ir_base[ref];
1153
1154
0
      if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) {
1155
0
        ir_bitqueue_add(worklist, use);
1156
0
      }
1157
0
    }
1158
0
  }
1159
0
}
1160
1161
static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1162
0
{
1163
0
  ir_ref j, n, *p;
1164
0
  ir_insn *insn;
1165
1166
0
  CLEAR_USES(ref);
1167
0
  insn = &ctx->ir_base[ref];
1168
0
  n = insn->inputs_count;
1169
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
1170
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
1171
0
    ir_ref input = *p;
1172
0
    *p = IR_UNUSED;
1173
0
    if (input > 0) {
1174
0
      ir_use_list_remove_all(ctx, input, ref);
1175
0
      if (ir_is_dead(ctx, input)) {
1176
        /* schedule DCE */
1177
0
        ir_bitqueue_add(worklist, input);
1178
0
      } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1179
        /* try to optimize PHI into ABS/MIN/MAX/COND */
1180
0
        ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1181
0
      }
1182
0
    }
1183
0
  }
1184
0
}
1185
1186
void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1187
0
{
1188
0
  ir_ref i, j, n, *p, use;
1189
0
  ir_insn *insn;
1190
0
  ir_use_list *use_list;
1191
1192
0
  IR_ASSERT(ref != new_ref);
1193
1194
0
  use_list = &ctx->use_lists[ref];
1195
0
  n = use_list->count;
1196
0
  p = &ctx->use_edges[use_list->refs];
1197
0
  if (new_ref <= 0) {
1198
    /* constant or IR_UNUSED */
1199
0
    for (; n; p++, n--) {
1200
0
      use = *p;
1201
0
      IR_ASSERT(use != ref);
1202
0
      insn = &ctx->ir_base[use];
1203
0
      i = ir_insn_find_op(insn, ref);
1204
0
      IR_ASSERT(i > 0);
1205
0
      ir_insn_set_op(insn, i, new_ref);
1206
      /* schedule folding */
1207
0
      ir_bitqueue_add(worklist, use);
1208
0
      ir_iter_add_related_uses(ctx, use, worklist);
1209
0
    }
1210
0
  } else {
1211
0
    for (j = 0; j < n; j++, p++) {
1212
0
      use = *p;
1213
0
      IR_ASSERT(use != ref);
1214
0
      insn = &ctx->ir_base[use];
1215
0
      i = ir_insn_find_op(insn, ref);
1216
0
      IR_ASSERT(i > 0);
1217
0
      ir_insn_set_op(insn, i, new_ref);
1218
0
      if (ir_use_list_add(ctx, new_ref, use)) {
1219
        /* restore after reallocation */
1220
0
        use_list = &ctx->use_lists[ref];
1221
0
        n = use_list->count;
1222
0
        p = &ctx->use_edges[use_list->refs + j];
1223
0
      }
1224
      /* schedule folding */
1225
0
      ir_bitqueue_add(worklist, use);
1226
0
    }
1227
0
  }
1228
0
}
1229
1230
static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1231
0
{
1232
0
  ir_ref j, n, *p;
1233
0
  ir_insn *insn;
1234
1235
0
  insn = &ctx->ir_base[ref];
1236
0
  n = insn->inputs_count;
1237
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
1238
0
  for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
1239
0
    ir_ref input = *p;
1240
0
    *p = IR_UNUSED;
1241
0
    if (input > 0) {
1242
0
      ir_use_list_remove_all(ctx, input, ref);
1243
0
      if (ir_is_dead(ctx, input)) {
1244
        /* schedule DCE */
1245
0
        ir_bitqueue_add(worklist, input);
1246
0
      } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1247
        /* try to optimize PHI into ABS/MIN/MAX/COND */
1248
0
        ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1249
0
      }
1250
0
    }
1251
0
  }
1252
1253
0
  ir_iter_replace(ctx, ref, new_ref, worklist);
1254
1255
0
  CLEAR_USES(ref);
1256
0
}
1257
1258
void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist)
1259
0
{
1260
0
  ir_insn *insn = &ctx->ir_base[ref];
1261
0
  ir_ref old_val = ir_insn_op(insn, idx);
1262
1263
0
  IR_ASSERT(old_val != new_val);
1264
0
  if (!IR_IS_CONST_REF(new_val)) {
1265
0
    ir_use_list_add(ctx, new_val, ref);
1266
0
  }
1267
0
  ir_insn_set_op(insn, idx, new_val);
1268
0
  if (!IR_IS_CONST_REF(old_val)) {
1269
0
    ir_use_list_remove_one(ctx, old_val, ref);
1270
0
    if (ir_is_dead(ctx, old_val)) {
1271
      /* schedule DCE */
1272
0
      ir_bitqueue_add(worklist, old_val);
1273
0
    }
1274
0
  }
1275
0
}
1276
1277
static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
1278
0
{
1279
0
  IR_ASSERT(!IR_IS_CONST_REF(op1));
1280
1281
0
  ir_use_list *use_list = &ctx->use_lists[op1];
1282
0
  ir_ref *p, n = use_list->count;
1283
1284
0
  for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1285
0
    ir_ref use = *p;
1286
0
    ir_insn *use_insn = &ctx->ir_base[use];
1287
1288
0
    if (use_insn->optx == optx) {
1289
0
      IR_ASSERT(use_insn->op1 == op1);
1290
0
      return use;
1291
0
    }
1292
0
  }
1293
0
  return IR_UNUSED;
1294
0
}
1295
1296
static ir_ref ir_iter_find_cse(ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
1297
0
{
1298
0
  uint32_t n = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1299
0
  ir_use_list *use_list = NULL;
1300
0
  ir_ref *p, use;
1301
0
  ir_insn *use_insn;
1302
1303
0
  if (n == 2) {
1304
0
    if (!IR_IS_CONST_REF(op1)) {
1305
0
      use_list = &ctx->use_lists[op1];
1306
0
    }
1307
0
    if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1308
0
      use_list = &ctx->use_lists[op2];
1309
0
    }
1310
0
    if (use_list) {
1311
0
      n = use_list->count;
1312
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1313
0
        use = *p;
1314
0
        if (use != ref) {
1315
0
          use_insn = &ctx->ir_base[use];
1316
0
          if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2) {
1317
0
            IR_ASSERT(use_insn->op3 == op3);
1318
0
            if (use < ref) {
1319
0
              return use;
1320
0
            } else {
1321
0
              ir_bitqueue_add(worklist, use);
1322
0
            }
1323
0
          }
1324
0
        }
1325
0
      }
1326
0
    }
1327
0
   } else if (n < 2) {
1328
0
    IR_ASSERT(n == 1);
1329
0
    if (!IR_IS_CONST_REF(op1)) {
1330
0
      use_list = &ctx->use_lists[op1];
1331
0
      n = use_list->count;
1332
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1333
0
        use = *p;
1334
0
        if (use != ref) {
1335
0
          use_insn = &ctx->ir_base[use];
1336
0
          if (use_insn->opt == opt) {
1337
0
            IR_ASSERT(use_insn->op1 == op1);
1338
0
            IR_ASSERT(use_insn->op2 == op2);
1339
0
            IR_ASSERT(use_insn->op3 == op3);
1340
0
            if (use < ref) {
1341
0
              return use;
1342
0
            } else {
1343
0
              ir_bitqueue_add(worklist, use);
1344
0
            }
1345
0
          }
1346
0
        }
1347
0
      }
1348
0
    }
1349
0
  } else {
1350
0
    IR_ASSERT(n == 3);
1351
0
    if (!IR_IS_CONST_REF(op1)) {
1352
0
      use_list = &ctx->use_lists[op1];
1353
0
    }
1354
0
    if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1355
0
      use_list = &ctx->use_lists[op2];
1356
0
    }
1357
0
    if (!IR_IS_CONST_REF(op3) && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
1358
0
      use_list = &ctx->use_lists[op3];
1359
0
    }
1360
0
    if (use_list) {
1361
0
      n = use_list->count;
1362
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1363
0
        use = *p;
1364
0
        if (use != ref) {
1365
0
          use_insn = &ctx->ir_base[use];
1366
0
          if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2 && use_insn->op3 == op3) {
1367
0
            if (use < ref) {
1368
0
              return use;
1369
0
            } else {
1370
0
              ir_bitqueue_add(worklist, use);
1371
0
            }
1372
0
          }
1373
0
        }
1374
0
      }
1375
0
    }
1376
0
  }
1377
0
  return IR_UNUSED;
1378
0
}
1379
1380
static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1381
0
{
1382
0
  uint32_t opt;
1383
0
  ir_ref op1, op2, op3, copy;
1384
0
  ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
1385
1386
0
  insn = &ctx->ir_base[ref];
1387
0
  opt = insn->opt;
1388
0
  op1 = insn->op1;
1389
0
  op2 = insn->op2;
1390
0
  op3 = insn->op3;
1391
1392
0
restart:
1393
0
  op1_insn = ctx->ir_base + op1;
1394
0
  op2_insn = ctx->ir_base + op2;
1395
0
  op3_insn = ctx->ir_base + op3;
1396
1397
0
  switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
1398
0
    case IR_FOLD_DO_RESTART:
1399
0
      opt = ctx->fold_insn.optx;
1400
0
      op1 = ctx->fold_insn.op1;
1401
0
      op2 = ctx->fold_insn.op2;
1402
0
      op3 = ctx->fold_insn.op3;
1403
0
      goto restart;
1404
0
    case IR_FOLD_DO_CSE:
1405
0
      copy = ir_iter_find_cse(ctx, ref, ctx->fold_insn.opt,
1406
0
        ctx->fold_insn.op1, ctx->fold_insn.op2, ctx->fold_insn.op3, worklist);
1407
0
      if (copy) {
1408
0
        ir_iter_replace_insn(ctx, ref, copy, worklist);
1409
0
        break;
1410
0
      }
1411
0
      IR_FALLTHROUGH;
1412
0
    case IR_FOLD_DO_EMIT:
1413
0
      insn = &ctx->ir_base[ref];
1414
0
      if (insn->opt != ctx->fold_insn.opt
1415
0
       || insn->op1 != ctx->fold_insn.op1
1416
0
       || insn->op2 != ctx->fold_insn.op2
1417
0
       || insn->op3 != ctx->fold_insn.op3) {
1418
1419
0
        ir_use_list *use_list;
1420
0
        ir_ref n, j, *p, use;
1421
1422
0
        insn->optx = ctx->fold_insn.opt;
1423
0
        IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
1424
0
        insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1425
0
        if (insn->op1 != ctx->fold_insn.op1) {
1426
0
          if (insn->op1 > 0) {
1427
0
            ir_use_list_remove_one(ctx, insn->op1, ref);
1428
0
          }
1429
0
          if (ctx->fold_insn.op1 > 0) {
1430
0
            ir_use_list_add(ctx, ctx->fold_insn.op1, ref);
1431
0
          }
1432
0
        }
1433
0
        if (insn->op2 != ctx->fold_insn.op2) {
1434
0
          if (insn->op2 > 0) {
1435
0
            ir_use_list_remove_one(ctx, insn->op2, ref);
1436
0
          }
1437
0
          if (ctx->fold_insn.op2 > 0) {
1438
0
            ir_use_list_add(ctx, ctx->fold_insn.op2, ref);
1439
0
          }
1440
0
        }
1441
0
        if (insn->op3 != ctx->fold_insn.op3) {
1442
0
          if (insn->op3 > 0) {
1443
0
            ir_use_list_remove_one(ctx, insn->op3, ref);
1444
0
          }
1445
0
          if (ctx->fold_insn.op3 > 0) {
1446
0
            ir_use_list_add(ctx, ctx->fold_insn.op3, ref);
1447
0
          }
1448
0
        }
1449
0
        insn->op1 = ctx->fold_insn.op1;
1450
0
        insn->op2 = ctx->fold_insn.op2;
1451
0
        insn->op3 = ctx->fold_insn.op3;
1452
1453
0
        use_list = &ctx->use_lists[ref];
1454
0
        n = use_list->count;
1455
0
        for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
1456
0
          use = *p;
1457
0
          ir_bitqueue_add(worklist, use);
1458
0
        }
1459
0
      }
1460
0
      break;
1461
0
    case IR_FOLD_DO_COPY:
1462
0
      op1 = ctx->fold_insn.op1;
1463
0
      ir_iter_replace_insn(ctx, ref, op1, worklist);
1464
0
      break;
1465
0
    case IR_FOLD_DO_CONST:
1466
0
      op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type);
1467
0
      ir_iter_replace_insn(ctx, ref, op1, worklist);
1468
0
      break;
1469
0
    default:
1470
0
      IR_ASSERT(0);
1471
0
      break;
1472
0
  }
1473
0
}
1474
1475
static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
1476
0
{
1477
0
  ir_insn *insn = &ctx->ir_base[ref];
1478
1479
0
  IR_ASSERT(insn->type == IR_DOUBLE);
1480
0
  if (IR_IS_CONST_REF(ref)) {
1481
0
    return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d;
1482
0
  } else {
1483
0
    switch (insn->op) {
1484
0
      case IR_FP2FP:
1485
0
        return 1;
1486
//      case IR_INT2FP:
1487
//        return ctx->use_lists[ref].count == 1;
1488
0
      case IR_NEG:
1489
0
      case IR_ABS:
1490
0
        return ctx->use_lists[ref].count == 1 &&
1491
0
          ir_may_promote_d2f(ctx, insn->op1);
1492
0
      case IR_ADD:
1493
0
      case IR_SUB:
1494
0
      case IR_MUL:
1495
0
      case IR_DIV:
1496
0
      case IR_MIN:
1497
0
      case IR_MAX:
1498
0
        return ctx->use_lists[ref].count == 1 &&
1499
0
          ir_may_promote_d2f(ctx, insn->op1) &&
1500
0
          ir_may_promote_d2f(ctx, insn->op2);
1501
0
      default:
1502
0
        break;
1503
0
    }
1504
0
  }
1505
0
  return 0;
1506
0
}
1507
1508
static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
1509
0
{
1510
0
  ir_insn *insn = &ctx->ir_base[ref];
1511
1512
0
  IR_ASSERT(insn->type == IR_FLOAT);
1513
0
  if (IR_IS_CONST_REF(ref)) {
1514
0
    return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f;
1515
0
  } else {
1516
0
    switch (insn->op) {
1517
0
      case IR_FP2FP:
1518
0
        return 1;
1519
0
      case IR_INT2FP:
1520
0
        return ctx->use_lists[ref].count == 1;
1521
0
      case IR_NEG:
1522
0
      case IR_ABS:
1523
0
        return ctx->use_lists[ref].count == 1 &&
1524
0
          ir_may_promote_f2d(ctx, insn->op1);
1525
0
      case IR_ADD:
1526
0
      case IR_SUB:
1527
0
      case IR_MUL:
1528
//      case IR_DIV:
1529
0
      case IR_MIN:
1530
0
      case IR_MAX:
1531
0
        return ctx->use_lists[ref].count == 1 &&
1532
0
          ir_may_promote_f2d(ctx, insn->op1) &&
1533
0
          ir_may_promote_f2d(ctx, insn->op2);
1534
0
      default:
1535
0
        break;
1536
0
    }
1537
0
  }
1538
0
  return 0;
1539
0
}
1540
1541
static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1542
0
{
1543
0
  ir_insn *insn = &ctx->ir_base[ref];
1544
0
  uint32_t count;
1545
1546
0
  IR_ASSERT(insn->type == IR_DOUBLE);
1547
0
  if (IR_IS_CONST_REF(ref)) {
1548
0
    return ir_const_float(ctx, (float)insn->val.d);
1549
0
  } else {
1550
0
    ir_bitqueue_add(worklist, ref);
1551
0
    switch (insn->op) {
1552
0
      case IR_FP2FP:
1553
0
        count = ctx->use_lists[ref].count;
1554
0
        ir_use_list_remove_all(ctx, ref, use);
1555
0
        if (ctx->use_lists[ref].count == 0) {
1556
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1557
0
          if (count > 1) {
1558
0
            do {
1559
0
              ir_use_list_add(ctx, insn->op1, use);
1560
0
            } while (--count > 1);
1561
0
          }
1562
0
          ref = insn->op1;
1563
0
          MAKE_NOP(insn);
1564
0
          return ref;
1565
0
        } else {
1566
0
          ir_use_list_add(ctx, insn->op1, use);
1567
0
          count -= ctx->use_lists[ref].count;
1568
0
          if (count > 1) {
1569
0
            do {
1570
0
              ir_use_list_add(ctx, insn->op1, use);
1571
0
            } while (--count > 1);
1572
0
          }
1573
0
        }
1574
0
        return insn->op1;
1575
//      case IR_INT2FP:
1576
//        insn->type = IR_FLOAT;
1577
//        return ref;
1578
0
      case IR_NEG:
1579
0
      case IR_ABS:
1580
0
        insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1581
0
        insn->type = IR_FLOAT;
1582
0
        return ref;
1583
0
      case IR_ADD:
1584
0
      case IR_SUB:
1585
0
      case IR_MUL:
1586
0
      case IR_DIV:
1587
0
      case IR_MIN:
1588
0
      case IR_MAX:
1589
0
        if (insn->op1 == insn->op2) {
1590
0
          insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1591
0
        } else {
1592
0
          insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1593
0
          insn->op2 = ir_promote_d2f(ctx, insn->op2, ref, worklist);
1594
0
        }
1595
0
        insn->type = IR_FLOAT;
1596
0
        return ref;
1597
0
      default:
1598
0
        break;
1599
0
    }
1600
0
  }
1601
0
  IR_ASSERT(0);
1602
0
  return ref;
1603
0
}
1604
1605
static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1606
0
{
1607
0
  ir_insn *insn = &ctx->ir_base[ref];
1608
0
  uint32_t count;
1609
0
  ir_ref old_ref;
1610
1611
0
  IR_ASSERT(insn->type == IR_FLOAT);
1612
0
  if (IR_IS_CONST_REF(ref)) {
1613
0
    return ir_const_double(ctx, (double)insn->val.f);
1614
0
  } else {
1615
0
    ir_bitqueue_add(worklist, ref);
1616
0
    switch (insn->op) {
1617
0
      case IR_FP2FP:
1618
0
        count = ctx->use_lists[ref].count;
1619
0
        ir_use_list_remove_all(ctx, ref, use);
1620
0
        if (ctx->use_lists[ref].count == 0) {
1621
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1622
0
          if (count > 1) {
1623
0
            do {
1624
0
              ir_use_list_add(ctx, insn->op1, use);
1625
0
            } while (--count > 1);
1626
0
          }
1627
0
          ref = insn->op1;
1628
0
          MAKE_NOP(insn);
1629
0
          return ref;
1630
0
        } else {
1631
0
          ir_use_list_add(ctx, insn->op1, use);
1632
0
          count -= ctx->use_lists[ref].count;
1633
0
          if (count > 1) {
1634
0
            do {
1635
0
              ir_use_list_add(ctx, insn->op1, use);
1636
0
            } while (--count > 1);
1637
0
          }
1638
0
        }
1639
0
        return insn->op1;
1640
0
      case IR_INT2FP:
1641
0
        old_ref = ir_iter_find_cse1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1);
1642
0
        if (old_ref) {
1643
0
          IR_ASSERT(ctx->use_lists[ref].count == 1);
1644
0
          ir_use_list_remove_one(ctx, insn->op1, ref);
1645
0
          CLEAR_USES(ref);
1646
0
          MAKE_NOP(insn);
1647
0
          ir_use_list_add(ctx, old_ref, use);
1648
0
          return old_ref;
1649
0
        }
1650
0
        insn->type = IR_DOUBLE;
1651
0
        return ref;
1652
0
      case IR_NEG:
1653
0
      case IR_ABS:
1654
0
        insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1655
0
        insn->type = IR_DOUBLE;
1656
0
        return ref;
1657
0
      case IR_ADD:
1658
0
      case IR_SUB:
1659
0
      case IR_MUL:
1660
//      case IR_DIV:
1661
0
      case IR_MIN:
1662
0
      case IR_MAX:
1663
0
        if (insn->op1 == insn->op2) {
1664
0
          insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1665
0
        } else {
1666
0
          insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1667
0
          insn->op2 = ir_promote_f2d(ctx, insn->op2, ref, worklist);
1668
0
        }
1669
0
        insn->type = IR_DOUBLE;
1670
0
        return ref;
1671
0
      default:
1672
0
        break;
1673
0
    }
1674
0
  }
1675
0
  IR_ASSERT(0);
1676
0
  return ref;
1677
0
}
1678
1679
static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref)
1680
0
{
1681
0
  ir_insn *insn = &ctx->ir_base[ref];
1682
0
  ir_ref *p, n, input;
1683
1684
0
  if (IR_IS_CONST_REF(ref)) {
1685
0
    return !IR_IS_SYM_CONST(insn->op);
1686
0
  } else {
1687
0
    switch (insn->op) {
1688
0
      case IR_ZEXT:
1689
0
      case IR_SEXT:
1690
0
      case IR_TRUNC:
1691
0
        return ctx->ir_base[insn->op1].type == type || ctx->use_lists[ref].count == 1;
1692
0
      case IR_NEG:
1693
0
      case IR_ABS:
1694
0
      case IR_NOT:
1695
0
        return ctx->use_lists[ref].count == 1 &&
1696
0
          ir_may_promote_trunc(ctx, type, insn->op1);
1697
0
      case IR_ADD:
1698
0
      case IR_SUB:
1699
0
      case IR_MUL:
1700
0
      case IR_MIN:
1701
0
      case IR_MAX:
1702
0
      case IR_OR:
1703
0
      case IR_AND:
1704
0
      case IR_XOR:
1705
0
      case IR_SHL:
1706
0
        return ctx->use_lists[ref].count == 1 &&
1707
0
          ir_may_promote_trunc(ctx, type, insn->op1) &&
1708
0
          ir_may_promote_trunc(ctx, type, insn->op2);
1709
//      case IR_SHR:
1710
//      case IR_SAR:
1711
//      case IR_DIV:
1712
//      case IR_MOD:
1713
//      case IR_FP2INT:
1714
//        TODO: ???
1715
0
      case IR_COND:
1716
0
        return ctx->use_lists[ref].count == 1 &&
1717
0
          ir_may_promote_trunc(ctx, type, insn->op2) &&
1718
0
          ir_may_promote_trunc(ctx, type, insn->op3);
1719
0
      case IR_PHI:
1720
0
        if (ctx->use_lists[ref].count != 1) {
1721
0
          ir_use_list *use_list = &ctx->use_lists[ref];
1722
0
          ir_ref count = 0;
1723
1724
0
          for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
1725
0
            if (*p != ref) {
1726
0
              if (count) {
1727
0
                return 0;
1728
0
              }
1729
0
              count = 1;
1730
0
            }
1731
0
          }
1732
0
        }
1733
0
        for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1734
0
          input = *p;
1735
0
          if (input != ref) {
1736
0
            if (!ir_may_promote_trunc(ctx, type, input)) {
1737
0
              return 0;
1738
0
            }
1739
0
          }
1740
0
        }
1741
0
        return 1;
1742
0
      default:
1743
0
        break;
1744
0
    }
1745
0
  }
1746
0
  return 0;
1747
0
}
1748
1749
static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1750
0
{
1751
0
  ir_insn *insn = &ctx->ir_base[ref];
1752
0
  uint32_t count;
1753
0
  ir_ref *p, n, input;
1754
1755
0
  if (IR_IS_CONST_REF(ref)) {
1756
0
    ir_val val;
1757
1758
0
    switch (type) {
1759
0
      case IR_I8:  val.i64 = insn->val.i8; break;
1760
0
      case IR_U8:  val.u64 = insn->val.u8; break;
1761
0
      case IR_I16: val.i64 = insn->val.i16; break;
1762
0
      case IR_U16: val.u64 = insn->val.u16; break;
1763
0
      case IR_I32: val.i64 = insn->val.i32; break;
1764
0
      case IR_U32: val.u64 = insn->val.u32; break;
1765
0
      case IR_CHAR:val.i64 = insn->val.i8; break;
1766
0
      case IR_BOOL:val.u64 = insn->val.u8 != 0; break;
1767
0
      default: IR_ASSERT(0); val.u64 = 0;
1768
0
    }
1769
0
    return ir_const(ctx, val, type);
1770
0
  } else {
1771
0
    ir_bitqueue_add(worklist, ref);
1772
0
    switch (insn->op) {
1773
0
      case IR_ZEXT:
1774
0
      case IR_SEXT:
1775
0
      case IR_TRUNC:
1776
0
        if (ctx->ir_base[insn->op1].type != type) {
1777
0
          ir_type src_type = ctx->ir_base[insn->op1].type;
1778
0
          if (ir_type_size[src_type] == ir_type_size[type]) {
1779
0
            insn->op = IR_BITCAST;
1780
0
          } else if (ir_type_size[src_type] > ir_type_size[type]) {
1781
0
            insn->op = IR_TRUNC;
1782
0
          } else {
1783
0
            if (insn->op != IR_SEXT && insn->op != IR_ZEXT) {
1784
0
              insn->op = IR_IS_TYPE_SIGNED(type) ? IR_SEXT : IR_ZEXT;
1785
0
            }
1786
0
          }
1787
0
          insn->type = type;
1788
0
          return ref;
1789
0
        }
1790
1791
0
        count = ctx->use_lists[ref].count;
1792
0
        ir_use_list_remove_all(ctx, ref, use);
1793
0
        if (ctx->use_lists[ref].count == 0) {
1794
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1795
0
          if (count > 1) {
1796
0
            do {
1797
0
              ir_use_list_add(ctx, insn->op1, use);
1798
0
            } while (--count > 1);
1799
0
          }
1800
0
          ref = insn->op1;
1801
0
          MAKE_NOP(insn);
1802
0
          return ref;
1803
0
        } else {
1804
0
          ir_use_list_add(ctx, insn->op1, use);
1805
0
          count -= ctx->use_lists[ref].count;
1806
0
          if (count > 1) {
1807
0
            do {
1808
0
              ir_use_list_add(ctx, insn->op1, use);
1809
0
            } while (--count > 1);
1810
0
          }
1811
0
        }
1812
0
        return insn->op1;
1813
0
      case IR_NEG:
1814
0
      case IR_ABS:
1815
0
      case IR_NOT:
1816
0
        insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1817
0
        insn->type = type;
1818
0
        return ref;
1819
0
      case IR_ADD:
1820
0
      case IR_SUB:
1821
0
      case IR_MUL:
1822
0
      case IR_MIN:
1823
0
      case IR_MAX:
1824
0
      case IR_OR:
1825
0
      case IR_AND:
1826
0
      case IR_XOR:
1827
0
      case IR_SHL:
1828
0
        if (insn->op1 == insn->op2) {
1829
0
          insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1830
0
        } else {
1831
0
          insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1832
0
          insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1833
0
        }
1834
0
        insn->type = type;
1835
0
        return ref;
1836
//      case IR_DIV:
1837
//      case IR_MOD:
1838
//      case IR_SHR:
1839
//      case IR_SAR:
1840
//      case IR_FP2INT:
1841
//        TODO: ???
1842
0
      case IR_COND:
1843
0
        if (insn->op2 == insn->op3) {
1844
0
          insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1845
0
        } else {
1846
0
          insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1847
0
          insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist);
1848
0
        }
1849
0
        insn->type = type;
1850
0
        return ref;
1851
0
      case IR_PHI:
1852
0
        for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1853
0
          input = *p;
1854
0
          if (input != ref) {
1855
0
            *p = ir_promote_i2i(ctx, type, input, ref, worklist);
1856
0
          }
1857
0
        }
1858
0
        insn->type = type;
1859
0
        return ref;
1860
0
      default:
1861
0
        break;
1862
0
    }
1863
0
  }
1864
0
  IR_ASSERT(0);
1865
0
  return ref;
1866
0
}
1867
1868
static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type)
1869
0
{
1870
0
  ir_val new_val;
1871
1872
0
  switch (val_insn->type) {
1873
0
    default:
1874
0
      IR_ASSERT(0);
1875
0
    case IR_I8:
1876
0
    case IR_U8:
1877
0
    case IR_BOOL:
1878
0
    case IR_CHAR:
1879
0
      if (op == IR_SEXT) {
1880
0
        new_val.i64 = (int64_t)val_insn->val.i8;
1881
0
      } else {
1882
0
        new_val.u64 = (uint64_t)val_insn->val.u8;
1883
0
      }
1884
0
      break;
1885
0
    case IR_I16:
1886
0
    case IR_U16:
1887
0
      if (op == IR_SEXT) {
1888
0
        new_val.i64 = (int64_t)val_insn->val.i16;
1889
0
      } else {
1890
0
        new_val.u64 = (uint64_t)val_insn->val.u16;
1891
0
      }
1892
0
      break;
1893
0
    case IR_I32:
1894
0
    case IR_U32:
1895
0
      if (op == IR_SEXT) {
1896
0
        new_val.i64 = (int64_t)val_insn->val.i32;
1897
0
      } else {
1898
0
        new_val.u64 = (uint64_t)val_insn->val.u32;
1899
0
      }
1900
0
      break;
1901
0
  }
1902
0
  return ir_const(ctx, new_val, type);
1903
0
}
1904
1905
static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist)
1906
0
{
1907
0
  uint32_t optx = IR_OPTX(op, type, 1);
1908
0
  ir_ref ref;
1909
1910
0
  if (!IR_IS_CONST_REF(src_ref)) {
1911
0
    ref = ir_iter_find_cse1(ctx, optx, src_ref);
1912
0
    if (ref) {
1913
0
      ir_use_list_add(ctx, ref, var_ref);
1914
0
      if (!IR_IS_CONST_REF(src_ref)) {
1915
0
        ir_use_list_remove_one(ctx, src_ref, var_ref);
1916
0
      }
1917
0
      ir_bitqueue_add(worklist, ref);
1918
0
      return ref;
1919
0
    }
1920
0
  }
1921
1922
0
  ref = ir_emit1(ctx, optx, src_ref);
1923
0
  ir_use_list_add(ctx, ref, var_ref);
1924
0
  if (!IR_IS_CONST_REF(src_ref)) {
1925
0
    ir_use_list_replace_one(ctx, src_ref, var_ref, ref);
1926
0
  }
1927
0
  ir_bitqueue_grow(worklist, ref + 1);
1928
0
  ir_bitqueue_add(worklist, ref);
1929
0
  return ref;
1930
0
}
1931
1932
static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
1933
0
{
1934
0
  ir_insn *insn;
1935
0
  ir_ref n, *p, input, result, ctrl;
1936
1937
0
  if (IR_IS_CONST_REF(val)) {
1938
0
    return 1; /* IR_START */
1939
0
  }
1940
1941
0
  insn = &ctx->ir_base[val];
1942
0
  if (ir_op_flags[insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM)) {
1943
0
    return val;
1944
0
  }
1945
1946
0
  IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_DATA);
1947
0
  if (IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL_DEP) {
1948
0
    return insn->op1;
1949
0
  }
1950
1951
0
  n = insn->inputs_count;
1952
0
  p = insn->ops + 1;
1953
1954
0
  result = 1;
1955
0
  for (; n > 0; p++, n--) {
1956
0
    input = *p;
1957
0
    ctrl = _ir_estimated_control(ctx, input, loop);
1958
0
    if (ctrl >= loop) return ctrl;
1959
0
    if (ctrl > result) { // TODO: check dominance depth instead of order
1960
0
      result = ctrl;
1961
0
    }
1962
0
  }
1963
0
  return result;
1964
0
}
1965
1966
static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop)
1967
0
{
1968
0
  ref = _ir_estimated_control(ctx, ref, loop);
1969
0
  return ref < loop; // TODO: check dominance instead of order
1970
0
}
1971
1972
static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
1973
0
{
1974
0
  if (IR_IS_CONST_REF(ref)) {
1975
0
    return 1;
1976
0
  } else {
1977
0
    ir_insn *insn = &ctx->ir_base[ref];
1978
1979
0
    if (insn->op == IR_LOAD) {
1980
0
      if (ir_is_loop_invariant(ctx, ref, loop)) {
1981
0
        return 1;
1982
0
      } else {
1983
        /* ZEXT(LOAD(_, _)) costs the same as LOAD(_, _) */
1984
0
        if (ctx->use_lists[ref].count == 2) {
1985
0
          return 1;
1986
0
        } else if (ctx->use_lists[ref].count == 3) {
1987
0
          ir_use_list *use_list = &ctx->use_lists[ref];
1988
0
          ir_ref *p, n, use;
1989
1990
0
          for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
1991
0
            use = *p;
1992
0
            if (use != ext_ref) {
1993
0
              ir_insn *use_insn = &ctx->ir_base[use];
1994
1995
0
              if (use_insn->op != op
1996
0
               && (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
1997
0
                || use_insn->op1 != ref)) {
1998
0
                return 0;
1999
0
              }
2000
0
            }
2001
0
          }
2002
0
          return 1;
2003
0
        }
2004
0
      }
2005
0
      return 0;
2006
0
    } else {
2007
0
      return ir_is_loop_invariant(ctx, ref, loop);
2008
0
    }
2009
0
  }
2010
0
}
2011
2012
static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
2013
0
{
2014
0
  ir_op op = ctx->ir_base[ext_ref].op;
2015
0
  ir_type type = ctx->ir_base[ext_ref].type;
2016
0
  ir_insn *phi_insn;
2017
0
  ir_use_list *use_list;
2018
0
  ir_ref n, *p, use, ext_ref_2 = IR_UNUSED;
2019
2020
  /* Check if we may change the type of the induction variable */
2021
0
  use_list = &ctx->use_lists[phi_ref];
2022
0
  n = use_list->count;
2023
0
  if (n > 1) {
2024
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2025
0
      use = *p;
2026
0
      if (use == op_ref || use == ext_ref) {
2027
0
        continue;
2028
0
      } else {
2029
0
        ir_insn *use_insn = &ctx->ir_base[use];
2030
2031
0
        if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2032
0
          if (use_insn->op1 == phi_ref) {
2033
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
2034
0
              return 0;
2035
0
            }
2036
0
            if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2037
0
              continue;
2038
0
              }
2039
0
          } else if (use_insn->op2 == phi_ref) {
2040
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
2041
0
              return 0;
2042
0
            }
2043
0
            if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2044
0
              continue;
2045
0
              }
2046
0
          }
2047
0
          return 0;
2048
0
        } else if (use_insn->op == IR_IF) {
2049
0
          continue;
2050
0
        } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2051
0
          ext_ref_2 = use;
2052
0
          continue;
2053
0
        } else {
2054
0
          return 0;
2055
0
        }
2056
0
      }
2057
0
    }
2058
0
  }
2059
2060
0
  use_list = &ctx->use_lists[op_ref];
2061
0
  n = use_list->count;
2062
0
  if (n > 1) {
2063
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2064
0
      use = *p;
2065
0
      if (use == phi_ref || use == ext_ref) {
2066
0
        continue;
2067
0
      } else {
2068
0
        ir_insn *use_insn = &ctx->ir_base[use];
2069
2070
0
        if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2071
0
          if (use_insn->op1 == phi_ref) {
2072
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
2073
0
              return 0;
2074
0
            }
2075
0
            if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2076
0
              continue;
2077
0
              }
2078
0
          } else if (use_insn->op2 == phi_ref) {
2079
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
2080
0
              return 0;
2081
0
            }
2082
0
            if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2083
0
              continue;
2084
0
              }
2085
0
          }
2086
0
          return 0;
2087
0
        } else if (use_insn->op == IR_IF) {
2088
0
          continue;
2089
0
        } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2090
0
          ext_ref_2 = use;
2091
0
          continue;
2092
0
        } else {
2093
0
          return 0;
2094
0
        }
2095
0
      }
2096
0
    }
2097
0
  }
2098
2099
0
  for (n = 0; n < ctx->use_lists[phi_ref].count; n++) {
2100
    /* "use_lists" may be reallocated by ir_ext_ref() */
2101
0
    use = ctx->use_edges[ctx->use_lists[phi_ref].refs + n];
2102
0
    if (use == ext_ref) {
2103
0
      continue;
2104
0
    } else {
2105
0
      ir_insn *use_insn = &ctx->ir_base[use];
2106
2107
0
      if (use_insn->op == IR_IF) {
2108
0
        continue;
2109
0
      } else if (use_insn->op == op) {
2110
0
        IR_ASSERT(ext_ref_2 == use);
2111
0
        continue;
2112
0
      }
2113
0
      IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
2114
0
        || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL)
2115
0
       && (use_insn->op1 == phi_ref || use_insn->op2 == phi_ref));
2116
0
      if (use_insn->op1 != phi_ref) {
2117
0
        if (IR_IS_CONST_REF(use_insn->op1)
2118
0
         && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2119
0
          ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2120
0
        } else {
2121
0
          ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2122
0
        }
2123
0
        ir_bitqueue_add(worklist, use);
2124
0
      }
2125
0
      if (use_insn->op2 != phi_ref) {
2126
0
        if (IR_IS_CONST_REF(use_insn->op2)
2127
0
         && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2128
0
          ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2129
0
        } else {
2130
0
          ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2131
0
        }
2132
0
        ir_bitqueue_add(worklist, use);
2133
0
      }
2134
0
    }
2135
0
  }
2136
2137
0
  if (ctx->use_lists[op_ref].count > 1) {
2138
0
    for (n = 0; n < ctx->use_lists[op_ref].count; n++) {
2139
      /* "use_lists" may be reallocated by ir_ext_ref() */
2140
0
      use = ctx->use_edges[ctx->use_lists[op_ref].refs + n];
2141
0
      if (use == ext_ref || use == phi_ref) {
2142
0
        continue;
2143
0
      } else {
2144
0
        ir_insn *use_insn = &ctx->ir_base[use];
2145
2146
0
        if (use_insn->op == IR_IF) {
2147
0
          continue;
2148
0
        } else if (use_insn->op == op) {
2149
0
          IR_ASSERT(ext_ref_2 == use);
2150
0
          continue;
2151
0
        }
2152
0
        IR_ASSERT(use_insn->op >= IR_EQ && use_insn->op <= IR_UGT);
2153
0
        if (use_insn->op1 != op_ref) {
2154
0
          if (IR_IS_CONST_REF(use_insn->op1)
2155
0
           && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2156
0
            ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2157
0
          } else {
2158
0
            ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2159
0
          }
2160
0
          ir_bitqueue_add(worklist, use);
2161
0
        }
2162
0
        if (use_insn->op2 != op_ref) {
2163
0
          if (IR_IS_CONST_REF(use_insn->op2)
2164
0
           && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2165
0
            ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2166
0
          } else {
2167
0
            ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2168
0
          }
2169
0
          ir_bitqueue_add(worklist, use);
2170
0
        }
2171
0
      }
2172
0
    }
2173
0
  }
2174
2175
0
  ir_iter_replace_insn(ctx, ext_ref, ctx->ir_base[ext_ref].op1, worklist);
2176
2177
0
  if (ext_ref_2) {
2178
0
    ir_iter_replace_insn(ctx, ext_ref_2, ctx->ir_base[ext_ref_2].op1, worklist);
2179
0
  }
2180
2181
0
  ctx->ir_base[op_ref].type = type;
2182
2183
0
  phi_insn = &ctx->ir_base[phi_ref];
2184
0
  phi_insn->type = type;
2185
0
  if (IR_IS_CONST_REF(phi_insn->op2)
2186
0
   && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
2187
0
    ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
2188
0
  } else {
2189
0
    ctx->ir_base[phi_ref].op2 = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist);
2190
0
  }
2191
2192
0
  return 1;
2193
0
}
2194
2195
static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
2196
0
 {
2197
0
  ir_ref ref = insn->op1;
2198
2199
  /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
2200
0
  insn = &ctx->ir_base[ref];
2201
0
  if (insn->op == IR_PHI
2202
0
   && insn->inputs_count == 3 /* (2 values) */
2203
0
   && ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
2204
0
    ir_ref op_ref = insn->op3;
2205
0
    ir_insn *op_insn = &ctx->ir_base[op_ref];
2206
2207
0
    if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
2208
0
      if (op_insn->op1 == ref) {
2209
0
        if (ir_is_loop_invariant(ctx, op_insn->op2, insn->op1)) {
2210
0
          return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2211
0
        }
2212
0
      } else if (op_insn->op2 == ref) {
2213
0
        if (ir_is_loop_invariant(ctx, op_insn->op1, insn->op1)) {
2214
0
          return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2215
0
        }
2216
0
      }
2217
0
    }
2218
0
  } else if (insn->op == IR_ADD || insn->op == IR_SUB || insn->op == IR_MUL) {
2219
0
    if (!IR_IS_CONST_REF(insn->op1)
2220
0
     && ctx->ir_base[insn->op1].op == IR_PHI
2221
0
     && ctx->ir_base[insn->op1].inputs_count == 3 /* (2 values) */
2222
0
     && ctx->ir_base[insn->op1].op3 == ref
2223
0
     && ctx->ir_base[ctx->ir_base[insn->op1].op1].op == IR_LOOP_BEGIN
2224
0
     && ir_is_loop_invariant(ctx, insn->op2, ctx->ir_base[insn->op1].op1)) {
2225
0
      return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op1, ref, worklist);
2226
0
    } else if (!IR_IS_CONST_REF(insn->op2)
2227
0
     && ctx->ir_base[insn->op2].op == IR_PHI
2228
0
     && ctx->ir_base[insn->op2].inputs_count == 3 /* (2 values) */
2229
0
     && ctx->ir_base[insn->op2].op3 == ref
2230
0
     && ctx->ir_base[ctx->ir_base[insn->op2].op1].op == IR_LOOP_BEGIN
2231
0
     && ir_is_loop_invariant(ctx, insn->op1, ctx->ir_base[insn->op2].op1)) {
2232
0
      return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op2, ref, worklist);
2233
0
    }
2234
0
  }
2235
2236
0
  return 0;
2237
0
}
2238
2239
static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref)
2240
0
{
2241
0
  ir_use_list *use_list = &ctx->use_lists[if_ref];
2242
0
  ir_ref *p = &ctx->use_edges[use_list->refs];
2243
2244
0
  IR_ASSERT(use_list->count == 2);
2245
0
  if (ctx->ir_base[*p].op == IR_IF_TRUE) {
2246
0
    IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE);
2247
0
    *if_true_ref = *p;
2248
0
    *if_false_ref = *(p + 1);
2249
0
  } else {
2250
0
    IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE);
2251
0
    IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE);
2252
0
    *if_false_ref = *p;
2253
0
    *if_true_ref = *(p + 1);
2254
0
  }
2255
0
}
2256
2257
static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist)
2258
0
{
2259
0
  ir_ref prev, next;
2260
0
  ir_use_list *use_list;
2261
2262
0
  if (ctx->use_lists[begin].count > 1) {
2263
0
    ir_ref *p, n, i, use;
2264
0
    ir_insn *use_insn;
2265
0
    ir_ref region = end;
2266
0
    ir_ref next = IR_UNUSED;
2267
2268
0
    while (!IR_IS_BB_START(ctx->ir_base[region].op)) {
2269
0
      region = ctx->ir_base[region].op1;
2270
0
    }
2271
2272
0
    use_list = &ctx->use_lists[begin];
2273
0
    n = use_list->count;
2274
0
    for (p = &ctx->use_edges[use_list->refs], i = 0; i < n; p++, i++) {
2275
0
      use = *p;
2276
0
      use_insn = &ctx->ir_base[use];
2277
0
      if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) {
2278
0
        IR_ASSERT(!next);
2279
0
        next = use;
2280
0
      } else {
2281
0
        IR_ASSERT(use_insn->op == IR_VAR);
2282
0
        IR_ASSERT(use_insn->op1 == begin);
2283
0
        use_insn->op1 = region;
2284
0
        if (ir_use_list_add(ctx, region, use)) {
2285
          /* restore after reallocation */
2286
0
          use_list = &ctx->use_lists[begin];
2287
0
          n = use_list->count;
2288
0
          p = &ctx->use_edges[use_list->refs + i];
2289
0
        }
2290
0
      }
2291
0
    }
2292
2293
0
    IR_ASSERT(next);
2294
0
    ctx->use_edges[use_list->refs] = next;
2295
0
    use_list->count = 1;
2296
0
  }
2297
2298
0
  IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
2299
0
  IR_ASSERT(ctx->ir_base[end].op == IR_END);
2300
0
  IR_ASSERT(ctx->ir_base[begin].op1 == end);
2301
0
  IR_ASSERT(ctx->use_lists[end].count == 1);
2302
2303
0
  prev = ctx->ir_base[end].op1;
2304
2305
0
  use_list = &ctx->use_lists[begin];
2306
0
  IR_ASSERT(use_list->count == 1);
2307
0
  next = ctx->use_edges[use_list->refs];
2308
2309
  /* remove BEGIN and END */
2310
0
  MAKE_NOP(&ctx->ir_base[begin]); CLEAR_USES(begin);
2311
0
  MAKE_NOP(&ctx->ir_base[end]);   CLEAR_USES(end);
2312
2313
  /* connect their predecessor and successor */
2314
0
  ctx->ir_base[next].op1 = prev;
2315
0
  ir_use_list_replace_one(ctx, prev, end, next);
2316
2317
0
  if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
2318
0
    ir_bitqueue_add(worklist, prev);
2319
0
  }
2320
0
}
2321
2322
static void ir_remove_unused_vars(ir_ctx *ctx, ir_ref start, ir_ref end)
2323
0
{
2324
0
  ir_use_list *use_list = &ctx->use_lists[start];
2325
0
  ir_ref *p, use, n = use_list->count;
2326
2327
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2328
0
    use = *p;
2329
0
    if (use != end) {
2330
0
      ir_insn *use_insn = &ctx->ir_base[use];
2331
0
      IR_ASSERT(use_insn->op == IR_VAR);
2332
0
      IR_ASSERT(ctx->use_lists[use].count == 0);
2333
0
      MAKE_NOP(use_insn);
2334
0
    }
2335
0
  }
2336
0
}
2337
2338
static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2339
0
{
2340
0
  if (insn->inputs_count == 2) {
2341
0
    ir_ref end1_ref = insn->op1, end2_ref = insn->op2;
2342
0
    ir_insn *end1 = &ctx->ir_base[end1_ref];
2343
0
    ir_insn *end2 = &ctx->ir_base[end2_ref];
2344
2345
0
    if (end1->op != IR_END || end2->op != IR_END) {
2346
0
      return 0;
2347
0
    }
2348
2349
0
    ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2350
0
    ir_insn *start1 = &ctx->ir_base[start1_ref];
2351
0
    ir_insn *start2 = &ctx->ir_base[start2_ref];
2352
2353
0
    if (start1->op1 != start2->op1) {
2354
0
      return 0;
2355
0
    }
2356
2357
0
    ir_ref root_ref = start1->op1;
2358
0
    ir_insn *root = &ctx->ir_base[root_ref];
2359
2360
0
    if (root->op != IR_IF
2361
0
     && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) {
2362
0
      return 0;
2363
0
    }
2364
2365
    /* Empty Diamond
2366
     *
2367
     *    prev                     prev
2368
     *    |  condition             |  condition
2369
     *    | /                      |
2370
     *    IF                       |
2371
     *    | \                      |
2372
     *    |  +-----+               |
2373
     *    |        IF_FALSE        |
2374
     *    IF_TRUE  |           =>  |
2375
     *    |        END             |
2376
     *    END     /                |
2377
     *    |  +---+                 |
2378
     *    | /                      |
2379
     *    MERGE                    |
2380
     *    |                        |
2381
     *    next                     next
2382
     */
2383
2384
0
    ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2385
0
    ir_insn *next = &ctx->ir_base[next_ref];
2386
2387
0
    if (ctx->use_lists[start1_ref].count != 1) {
2388
0
      ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2389
0
    }
2390
0
    if (ctx->use_lists[start2_ref].count != 1) {
2391
0
      ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2392
0
    }
2393
2394
0
    next->op1 = root->op1;
2395
0
    ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2396
0
    if (!IR_IS_CONST_REF(root->op2)) {
2397
0
      ir_use_list_remove_all(ctx, root->op2, root_ref);
2398
0
      if (ir_is_dead(ctx, root->op2)) {
2399
0
        ir_bitqueue_add(worklist, root->op2);
2400
0
      }
2401
0
    }
2402
2403
0
    MAKE_NOP(root);   CLEAR_USES(root_ref);
2404
0
    MAKE_NOP(start1); CLEAR_USES(start1_ref);
2405
0
    MAKE_NOP(start2); CLEAR_USES(start2_ref);
2406
0
    MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2407
0
    MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2408
0
    MAKE_NOP(insn);   CLEAR_USES(ref);
2409
2410
0
    if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2411
0
      ir_bitqueue_add(worklist, next->op1);
2412
0
    }
2413
2414
0
    return 1;
2415
0
  } else {
2416
0
    ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1;
2417
0
    ir_ref root_ref = IR_UNUSED;
2418
2419
0
    for (i = 0; i < count; i++) {
2420
0
      ir_ref end_ref, start_ref;
2421
0
      ir_insn *end, *start;
2422
2423
0
      end_ref = ops[i];
2424
0
      end = &ctx->ir_base[end_ref];
2425
0
      if (end->op != IR_END) {
2426
0
        return 0;
2427
0
      }
2428
0
      start_ref = end->op1;
2429
0
      start = &ctx->ir_base[start_ref];
2430
0
      if (start->op != IR_CASE_VAL && start->op != IR_CASE_RANGE && start->op != IR_CASE_DEFAULT) {
2431
0
        return 0;
2432
0
      }
2433
0
      if (ctx->use_lists[start_ref].count != 1) {
2434
0
        ir_remove_unused_vars(ctx, start_ref, end_ref);
2435
0
      }
2436
0
      if (!root_ref) {
2437
0
        root_ref = start->op1;
2438
0
        if (ctx->use_lists[root_ref].count != count) {
2439
0
          return 0;
2440
0
        }
2441
0
      } else if (start->op1 != root_ref) {
2442
0
        return 0;
2443
0
      }
2444
0
    }
2445
2446
    /* Empty N-Diamond */
2447
0
    ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2448
0
    ir_insn *next = &ctx->ir_base[next_ref];
2449
0
    ir_insn *root = &ctx->ir_base[root_ref];
2450
2451
0
    next->op1 = root->op1;
2452
0
    ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2453
2454
0
    if (!IR_IS_CONST_REF(root->op2)) {
2455
0
      ir_use_list_remove_all(ctx, root->op2, root_ref);
2456
0
      if (ir_is_dead(ctx, root->op2)) {
2457
0
        ir_bitqueue_add(worklist, root->op2);
2458
0
      }
2459
0
    }
2460
2461
0
    MAKE_NOP(root);   CLEAR_USES(root_ref);
2462
2463
0
    for (i = 0; i < count; i++) {
2464
0
      ir_ref end_ref = ops[i];
2465
0
      ir_insn *end = &ctx->ir_base[end_ref];
2466
0
      ir_ref start_ref = end->op1;
2467
0
      ir_insn *start = &ctx->ir_base[start_ref];
2468
2469
0
      MAKE_NOP(start); CLEAR_USES(start_ref);
2470
0
      MAKE_NOP(end);   CLEAR_USES(end_ref);
2471
0
    }
2472
2473
0
    MAKE_NOP(insn);   CLEAR_USES(ref);
2474
2475
0
    if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2476
0
      ir_bitqueue_add(worklist, next->op1);
2477
0
    }
2478
2479
0
    return 1;
2480
0
  }
2481
0
}
2482
2483
static bool ir_is_zero(ir_ctx *ctx, ir_ref ref)
2484
0
{
2485
0
  return IR_IS_CONST_REF(ref)
2486
0
    && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2487
0
    && ctx->ir_base[ref].val.u32 == 0;
2488
0
}
2489
2490
static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2491
0
{
2492
0
  IR_ASSERT(insn->inputs_count == 3);
2493
0
  IR_ASSERT(ctx->use_lists[merge_ref].count == 2);
2494
2495
0
  ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2496
0
  ir_insn *end1 = &ctx->ir_base[end1_ref];
2497
0
  ir_insn *end2 = &ctx->ir_base[end2_ref];
2498
2499
0
  if (end1->op == IR_END && end2->op == IR_END) {
2500
0
    ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2501
0
    ir_insn *start1 = &ctx->ir_base[start1_ref];
2502
0
    ir_insn *start2 = &ctx->ir_base[start2_ref];
2503
2504
0
    if (start1->op1 == start2->op1) {
2505
0
      ir_ref root_ref = start1->op1;
2506
0
      ir_insn *root = &ctx->ir_base[root_ref];
2507
2508
0
      if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2) && ctx->use_lists[root->op2].count == 1) {
2509
0
        ir_ref cond_ref = root->op2;
2510
0
        ir_insn *cond = &ctx->ir_base[cond_ref];
2511
0
        ir_type type = insn->type;
2512
0
        bool is_cmp, is_less;
2513
2514
0
        if (IR_IS_TYPE_FP(type)) {
2515
0
          is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE ||
2516
0
            cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2517
0
          is_less = (cond->op == IR_LT || cond->op == IR_LE ||
2518
0
            cond->op == IR_ULT || cond->op == IR_ULE);
2519
0
        } else if (IR_IS_TYPE_SIGNED(type)) {
2520
0
          is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE);
2521
0
          is_less = (cond->op == IR_LT || cond->op == IR_LE);
2522
0
        } else {
2523
0
          IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
2524
0
          is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2525
0
          is_less = (cond->op == IR_ULT || cond->op == IR_ULE);
2526
0
        }
2527
2528
0
        if (is_cmp
2529
0
         && ((insn->op2 == cond->op1 && insn->op3 == cond->op2)
2530
0
           || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) {
2531
          /* MAX/MIN
2532
           *
2533
           *    prev                     prev
2534
           *    |  LT(A, B)              |
2535
           *    | /                      |
2536
           *    IF                       |
2537
           *    | \                      |
2538
           *    |  +-----+               |
2539
           *    |        IF_FALSE        |
2540
           *    IF_TRUE  |           =>  |
2541
           *    |        END             |
2542
           *    END     /                |
2543
           *    |  +---+                 |
2544
           *    | /                      |
2545
           *    MERGE                    |
2546
           *    |    \                   |
2547
           *    |     PHI(A, B)          |    MIN(A, B)
2548
           *    next                     next
2549
           */
2550
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2551
0
          ir_insn *next;
2552
2553
0
          if (next_ref == ref) {
2554
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2555
0
          }
2556
0
          next = &ctx->ir_base[next_ref];
2557
2558
0
          if (ctx->use_lists[start1_ref].count != 1) {
2559
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2560
0
          }
2561
0
          if (ctx->use_lists[start2_ref].count != 1) {
2562
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2563
0
          }
2564
2565
0
          insn->op = (
2566
0
            (is_less ? cond->op1 : cond->op2)
2567
0
            ==
2568
0
            ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3)
2569
0
            ) ? IR_MIN : IR_MAX;
2570
0
          insn->inputs_count = 2;
2571
0
          if (insn->op2 > insn->op3) {
2572
0
            insn->op1 = insn->op2;
2573
0
            insn->op2 = insn->op3;
2574
0
          } else {
2575
0
            insn->op1 = insn->op3;
2576
0
          }
2577
0
          insn->op3 = IR_UNUSED;
2578
2579
0
          next->op1 = root->op1;
2580
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2581
0
          if (!IR_IS_CONST_REF(insn->op1)) {
2582
0
            ir_use_list_remove_all(ctx, insn->op1, cond_ref);
2583
0
          }
2584
0
          if (!IR_IS_CONST_REF(insn->op2)) {
2585
0
            ir_use_list_remove_all(ctx, insn->op2, cond_ref);
2586
0
          }
2587
2588
0
          MAKE_NOP(cond);   CLEAR_USES(cond_ref);
2589
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2590
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2591
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2592
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2593
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2594
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2595
2596
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2597
0
            ir_bitqueue_add(worklist, next->op1);
2598
0
          }
2599
2600
0
          return 1;
2601
0
        } else if (is_cmp
2602
0
            && ((ctx->ir_base[insn->op2].op == IR_NEG
2603
0
              && ctx->use_lists[insn->op2].count == 1
2604
0
              && ctx->ir_base[insn->op2].op1 == insn->op3
2605
0
              && ((cond->op1 == insn->op3
2606
0
                && ir_is_zero(ctx, cond->op2)
2607
0
                && is_less == (start1->op == IR_IF_TRUE))
2608
0
               || (cond->op2 == insn->op3
2609
0
                && ir_is_zero(ctx, cond->op1)
2610
0
                && is_less != (start1->op == IR_IF_TRUE))))
2611
0
             || (ctx->ir_base[insn->op3].op == IR_NEG
2612
0
              && ctx->use_lists[insn->op3].count == 1
2613
0
              && ctx->ir_base[insn->op3].op1 == insn->op2
2614
0
              && ((cond->op1 == insn->op2
2615
0
                && ir_is_zero(ctx, cond->op2)
2616
0
                && is_less != (start1->op == IR_IF_TRUE))
2617
0
               || (cond->op2 == insn->op2
2618
0
                && ir_is_zero(ctx, cond->op1)
2619
0
                && is_less == (start1->op == IR_IF_TRUE)))))) {
2620
          /* ABS
2621
           *
2622
           *    prev                     prev
2623
           *    |  LT(A, 0)              |
2624
           *    | /                      |
2625
           *    IF                       |
2626
           *    | \                      |
2627
           *    |  +-----+               |
2628
           *    |        IF_FALSE        |
2629
           *    IF_TRUE  |           =>  |
2630
           *    |        END             |
2631
           *    END     /                |
2632
           *    |  +---+                 |
2633
           *    | /                      |
2634
           *    MERGE                    |
2635
           *    |    \                   |
2636
           *    |     PHI(A, NEG(A))     |    ABS(A)
2637
           *    next                     next
2638
           */
2639
0
          ir_ref neg_ref;
2640
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2641
0
          ir_insn *next;
2642
2643
0
          if (next_ref == ref) {
2644
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2645
0
          }
2646
0
          next = &ctx->ir_base[next_ref];
2647
2648
0
          if (ctx->use_lists[start1_ref].count != 1) {
2649
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2650
0
          }
2651
0
          if (ctx->use_lists[start2_ref].count != 1) {
2652
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2653
0
          }
2654
2655
0
          insn->op = IR_ABS;
2656
0
          insn->inputs_count = 1;
2657
0
          if (ctx->ir_base[insn->op2].op == IR_NEG) {
2658
0
            neg_ref = insn->op2;
2659
0
            insn->op1 = insn->op3;
2660
0
          } else {
2661
0
            neg_ref = insn->op3;
2662
0
            insn->op1 = insn->op2;
2663
0
          }
2664
0
          insn->op2 = IR_UNUSED;
2665
0
          insn->op3 = IR_UNUSED;
2666
2667
0
          next->op1 = root->op1;
2668
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2669
0
          ir_use_list_remove_one(ctx, insn->op1, neg_ref);
2670
0
          if (!IR_IS_CONST_REF(insn->op1)) {
2671
0
            ir_use_list_remove_all(ctx, insn->op1, cond_ref);
2672
0
          }
2673
2674
0
          MAKE_NOP(cond);   CLEAR_USES(cond_ref);
2675
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2676
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2677
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2678
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2679
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2680
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2681
0
          MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref);
2682
2683
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2684
0
            ir_bitqueue_add(worklist, next->op1);
2685
0
          }
2686
2687
0
          return 1;
2688
#if 0
2689
        } else {
2690
          /* COND
2691
           *
2692
           *    prev                     prev
2693
           *    |  cond                  |
2694
           *    | /                      |
2695
           *    IF                       |
2696
           *    | \                      |
2697
           *    |  +-----+               |
2698
           *    |        IF_FALSE        |
2699
           *    IF_TRUE  |           =>  |
2700
           *    |        END             |
2701
           *    END     /                |
2702
           *    |  +---+                 |
2703
           *    | /                      |
2704
           *    MERGE                    |
2705
           *    |    \                   |
2706
           *    |     PHI(A, B)          |    COND(cond, A, B)
2707
           *    next                     next
2708
           */
2709
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2710
          ir_insn *next;
2711
2712
          if (next_ref == ref) {
2713
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2714
          }
2715
          next = &ctx->ir_base[next_ref];
2716
2717
          if (ctx->use_lists[start1_ref].count != 1) {
2718
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2719
          }
2720
          if (ctx->use_lists[start2_ref].count != 1) {
2721
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2722
          }
2723
2724
          insn->op = IR_COND;
2725
          insn->inputs_count = 3;
2726
          insn->op1 = cond_ref;
2727
          if (start1->op == IR_IF_FALSE) {
2728
            SWAP_REFS(insn->op2, insn->op3);
2729
          }
2730
2731
          next->op1 = root->op1;
2732
          ir_use_list_replace_one(ctx, cond_ref, root_ref, ref);
2733
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2734
          ir_use_list_remove_all(ctx, root->op2, root_ref);
2735
2736
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2737
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2738
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2739
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2740
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2741
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2742
2743
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2744
            ir_bitqueue_add(worklist, next->op1);
2745
          }
2746
2747
          return 1;
2748
#endif
2749
0
        }
2750
0
      }
2751
0
    }
2752
0
  }
2753
2754
0
  return 0;
2755
0
}
2756
2757
static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2)
2758
0
{
2759
0
  IR_ASSERT(op1->type == op2->type);
2760
0
  if (IR_IS_TYPE_INT(op1->type)) {
2761
0
    if (op == IR_EQ) {
2762
0
      return op1->val.u64 == op2->val.u64;
2763
0
    } else if (op == IR_NE) {
2764
0
      return op1->val.u64 != op2->val.u64;
2765
0
    } else if (op == IR_LT) {
2766
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2767
0
        return op1->val.i64 < op2->val.i64;
2768
0
      } else {
2769
0
        return op1->val.u64 < op2->val.u64;
2770
0
      }
2771
0
    } else if (op == IR_GE) {
2772
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2773
0
        return op1->val.i64 >= op2->val.i64;
2774
0
      } else {
2775
0
        return op1->val.u64 >= op2->val.u64;
2776
0
      }
2777
0
    } else if (op == IR_LE) {
2778
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2779
0
        return op1->val.i64 <= op2->val.i64;
2780
0
      } else {
2781
0
        return op1->val.u64 <= op2->val.u64;
2782
0
      }
2783
0
    } else if (op == IR_GT) {
2784
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2785
0
        return op1->val.i64 > op2->val.i64;
2786
0
      } else {
2787
0
        return op1->val.u64 > op2->val.u64;
2788
0
      }
2789
0
    } else if (op == IR_ULT) {
2790
0
      return op1->val.u64 < op2->val.u64;
2791
0
    } else if (op == IR_UGE) {
2792
0
      return op1->val.u64 >= op2->val.u64;
2793
0
    } else if (op == IR_ULE) {
2794
0
      return op1->val.u64 <= op2->val.u64;
2795
0
    } else if (op == IR_UGT) {
2796
0
      return op1->val.u64 > op2->val.u64;
2797
0
    } else {
2798
0
      IR_ASSERT(0);
2799
0
      return 0;
2800
0
    }
2801
0
  } else if (op1->type == IR_DOUBLE) {
2802
0
    if (op == IR_EQ) {
2803
0
      return op1->val.d == op2->val.d;
2804
0
    } else if (op == IR_NE) {
2805
0
      return op1->val.d != op2->val.d;
2806
0
    } else if (op == IR_LT) {
2807
0
      return op1->val.d < op2->val.d;
2808
0
    } else if (op == IR_GE) {
2809
0
      return op1->val.d >= op2->val.d;
2810
0
    } else if (op == IR_LE) {
2811
0
      return op1->val.d <= op2->val.d;
2812
0
    } else if (op == IR_GT) {
2813
0
      return op1->val.d > op2->val.d;
2814
0
    } else if (op == IR_ULT) {
2815
0
      return !(op1->val.d >= op2->val.d);
2816
0
    } else if (op == IR_UGE) {
2817
0
      return !(op1->val.d < op2->val.d);
2818
0
    } else if (op == IR_ULE) {
2819
0
      return !(op1->val.d > op2->val.d);
2820
0
    } else if (op == IR_UGT) {
2821
0
      return !(op1->val.d <= op2->val.d);
2822
0
    } else {
2823
0
      IR_ASSERT(0);
2824
0
      return 0;
2825
0
    }
2826
0
  } else {
2827
0
    IR_ASSERT(op1->type == IR_FLOAT);
2828
0
    if (op == IR_EQ) {
2829
0
      return op1->val.f == op2->val.f;
2830
0
    } else if (op == IR_NE) {
2831
0
      return op1->val.f != op2->val.f;
2832
0
    } else if (op == IR_LT) {
2833
0
      return op1->val.f < op2->val.f;
2834
0
    } else if (op == IR_GE) {
2835
0
      return op1->val.f >= op2->val.f;
2836
0
    } else if (op == IR_LE) {
2837
0
      return op1->val.f <= op2->val.f;
2838
0
    } else if (op == IR_GT) {
2839
0
      return op1->val.f > op2->val.f;
2840
0
    } else if (op == IR_ULT) {
2841
0
      return !(op1->val.f >= op2->val.f);
2842
0
    } else if (op == IR_UGE) {
2843
0
      return !(op1->val.f < op2->val.f);
2844
0
    } else if (op == IR_ULE) {
2845
0
      return !(op1->val.f > op2->val.f);
2846
0
    } else if (op == IR_UGT) {
2847
0
      return !(op1->val.f <= op2->val.f);
2848
0
    } else {
2849
0
      IR_ASSERT(0);
2850
0
      return 0;
2851
0
    }
2852
0
  }
2853
0
}
2854
2855
static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2856
0
{
2857
0
  ir_ref cond_ref = insn->op2;
2858
0
  ir_insn *cond = &ctx->ir_base[cond_ref];
2859
2860
0
  if (cond->op == IR_PHI
2861
0
   && cond->inputs_count == 3
2862
0
   && cond->op1 == insn->op1
2863
0
   && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op))
2864
0
    || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) {
2865
0
    ir_ref merge_ref = insn->op1;
2866
0
    ir_insn *merge = &ctx->ir_base[merge_ref];
2867
2868
0
    if (ctx->use_lists[merge_ref].count == 2) {
2869
0
      ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2870
0
      ir_insn *end1 = &ctx->ir_base[end1_ref];
2871
0
      ir_insn *end2 = &ctx->ir_base[end2_ref];
2872
2873
0
      if (end1->op == IR_END && end2->op == IR_END) {
2874
0
        ir_ref if_true_ref, if_false_ref;
2875
0
        ir_insn *if_true, *if_false;
2876
0
        ir_op op = IR_IF_FALSE;
2877
2878
0
        ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
2879
2880
0
        if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) {
2881
0
          IR_ASSERT(IR_IS_CONST_REF(cond->op3));
2882
0
          SWAP_REFS(cond->op2, cond->op3);
2883
0
          SWAP_REFS(merge->op1, merge->op2);
2884
0
          SWAP_REFS(end1_ref, end2_ref);
2885
0
          SWAP_INSNS(end1, end2);
2886
0
        }
2887
0
        if (ir_const_is_true(&ctx->ir_base[cond->op2])) {
2888
0
          SWAP_REFS(if_true_ref, if_false_ref);
2889
0
          op = IR_IF_TRUE;
2890
0
        }
2891
0
        if_true = &ctx->ir_base[if_true_ref];
2892
0
        if_false = &ctx->ir_base[if_false_ref];
2893
2894
0
        if (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)) {
2895
0
          if (ir_const_is_true(&ctx->ir_base[cond->op3]) ^ (op == IR_IF_TRUE)) {
2896
            /* Simple IF Split
2897
             *
2898
             *    |        |               |        |
2899
             *    |        END             |        END
2900
             *    END     /                END       \
2901
             *    |  +---+                 |          +
2902
             *    | /                      |          |
2903
             *    MERGE                    |          |
2904
             *    | \                      |          |
2905
             *    |  PHI(false, true)      |          |
2906
             *    | /                      |          |
2907
             *    IF                   =>  |          |
2908
             *    | \                      |          |
2909
             *    |  +------+              |          |
2910
             *    |         IF_TRUE        |          BEGIN
2911
             *    IF_FALSE  |              BEGIN
2912
             *    |                        |
2913
             */
2914
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2915
0
            ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
2916
2917
0
            MAKE_NOP(merge); CLEAR_USES(merge_ref);
2918
0
            MAKE_NOP(cond);  CLEAR_USES(cond_ref);
2919
0
            MAKE_NOP(insn);  CLEAR_USES(ref);
2920
2921
0
            if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2922
0
            if_false->op1 = end1_ref;
2923
2924
0
            if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2925
0
            if_true->op1 = end2_ref;
2926
2927
0
            ir_bitqueue_add(worklist, if_false_ref);
2928
0
            ir_bitqueue_add(worklist, if_true_ref);
2929
2930
0
            return 1;
2931
0
          } else {
2932
            /* Simple IF Split
2933
             *
2934
             *    |        |               |        |
2935
             *    |        END             |        END
2936
             *    END     /                END      |
2937
             *    |  +---+                 |        |
2938
             *    | /                      |        |
2939
             *    MERGE                    |        +
2940
             *    | \                      |       /
2941
             *    |  PHI(false, false)     |      /
2942
             *    | /                      |     /
2943
             *    IF                   =>  |    /
2944
             *    | \                      |   /
2945
             *    |  +------+              |  /
2946
             *    |         IF_TRUE        | /        BEGIN(unreachable)
2947
             *    IF_FALSE  |              MERGE
2948
             *    |                        |
2949
             */
2950
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2951
0
            ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
2952
2953
0
            MAKE_NOP(merge); CLEAR_USES(merge_ref);
2954
0
            MAKE_NOP(cond);  CLEAR_USES(cond_ref);
2955
0
            MAKE_NOP(insn);  CLEAR_USES(ref);
2956
2957
0
            if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
2958
0
            if_false->op1 = end1_ref;
2959
0
            if_false->op2 = end2_ref;
2960
2961
0
            if_true->optx = IR_BEGIN;
2962
0
            if_true->op1 = IR_UNUSED;
2963
2964
0
            ctx->flags2 &= ~IR_CFG_REACHABLE;
2965
2966
0
            ir_bitqueue_add(worklist, if_false_ref);
2967
2968
0
            return 1;
2969
0
          }
2970
0
        }
2971
2972
        /* Simple IF Split
2973
         *
2974
         *    |        |               |        |
2975
         *    |        END             |        IF(X)
2976
         *    END     /                END     / \
2977
         *    |  +---+                 |   +--+   +
2978
         *    | /                      |  /       |
2979
         *    MERGE                    | IF_FALSE |
2980
         *    | \                      | |        |
2981
         *    |  PHI(false, X)         | |        |
2982
         *    | /                      | |        |
2983
         *    IF                   =>  | END      |
2984
         *    | \                      | |        |
2985
         *    |  +------+              | |        |
2986
         *    |         IF_TRUE        | |        IF_TRUE
2987
         *    IF_FALSE  |              MERGE
2988
         *    |                        |
2989
         */
2990
0
        ir_use_list_remove_all(ctx, merge_ref, cond_ref);
2991
0
        ir_use_list_remove_all(ctx, ref, if_true_ref);
2992
0
        if (!IR_IS_CONST_REF(cond->op3)) {
2993
0
          ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref);
2994
0
        }
2995
0
        ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2996
0
        ir_use_list_add(ctx, end2_ref, if_true_ref);
2997
2998
0
        end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
2999
0
        end2->op2 = cond->op3;
3000
0
        ir_bitqueue_add(worklist, end2_ref);
3001
3002
0
        merge->optx = IR_OPTX(op, IR_VOID, 1);
3003
0
        merge->op1 = end2_ref;
3004
0
        merge->op2 = IR_UNUSED;
3005
3006
0
        MAKE_NOP(cond);
3007
0
        CLEAR_USES(cond_ref);
3008
3009
0
        insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3010
0
        insn->op1 = merge_ref;
3011
0
        insn->op2 = IR_UNUSED;
3012
3013
0
        if_true->op1 = end2_ref;
3014
3015
0
        if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3016
0
        if_false->op1 = end1_ref;
3017
0
        if_false->op2 = ref;
3018
3019
0
        ir_bitqueue_add(worklist, if_false_ref);
3020
0
        if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3021
0
          ir_bitqueue_add(worklist, end2->op1);
3022
0
        }
3023
3024
0
        return 1;
3025
0
      }
3026
0
    }
3027
0
  }
3028
3029
0
  return 0;
3030
0
}
3031
3032
static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3033
0
{
3034
0
  ir_ref cond_ref = insn->op2;
3035
0
  ir_insn *cond = &ctx->ir_base[cond_ref];
3036
3037
0
  if (cond->op >= IR_EQ && cond->op <= IR_UGT
3038
0
   && IR_IS_CONST_REF(cond->op2)
3039
0
   && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
3040
0
   && ctx->use_lists[insn->op2].count == 1) {
3041
0
    ir_ref phi_ref = cond->op1;
3042
0
    ir_insn *phi = &ctx->ir_base[phi_ref];
3043
3044
0
    if (phi->op == IR_PHI
3045
0
     && phi->inputs_count == 3
3046
0
     && phi->op1 == insn->op1
3047
0
     && ctx->use_lists[phi_ref].count == 1
3048
0
     && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op))
3049
0
      || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) {
3050
0
      ir_ref merge_ref = insn->op1;
3051
0
      ir_insn *merge = &ctx->ir_base[merge_ref];
3052
3053
0
      if (ctx->use_lists[merge_ref].count == 2) {
3054
0
        ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
3055
0
        ir_insn *end1 = &ctx->ir_base[end1_ref];
3056
0
        ir_insn *end2 = &ctx->ir_base[end2_ref];
3057
3058
0
        if (end1->op == IR_END && end2->op == IR_END) {
3059
0
          ir_ref if_true_ref, if_false_ref;
3060
0
          ir_insn *if_true, *if_false;
3061
0
          ir_op op = IR_IF_FALSE;
3062
3063
0
          ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3064
3065
0
          if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) {
3066
0
            IR_ASSERT(IR_IS_CONST_REF(phi->op3));
3067
0
            SWAP_REFS(phi->op2, phi->op3);
3068
0
            SWAP_REFS(merge->op1, merge->op2);
3069
0
            SWAP_REFS(end1_ref, end2_ref);
3070
0
            SWAP_INSNS(end1, end2);
3071
0
          }
3072
0
          if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) {
3073
0
            SWAP_REFS(if_true_ref, if_false_ref);
3074
0
            op = IR_IF_TRUE;
3075
0
          }
3076
0
          if_true = &ctx->ir_base[if_true_ref];
3077
0
          if_false = &ctx->ir_base[if_false_ref];
3078
3079
0
          if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) {
3080
0
            if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) {
3081
              /* IF Split
3082
               *
3083
               *    |        |               |        |
3084
               *    |        END             |        END
3085
               *    END     /                END      |
3086
               *    |  +---+                 |        |
3087
               *    | /                      |        |
3088
               *    MERGE                    |        |
3089
               *    | \                      |        |
3090
               *    |  PHI(C1, X)            |        |
3091
               *    |  |                     |        |
3092
               *    |  CMP(_, C2)            |        |
3093
               *    | /                      |        |
3094
               *    IF                   =>  |        |
3095
               *    | \                      |        |
3096
               *    |  +------+              |        |
3097
               *    |         IF_TRUE        |        BEGIN
3098
               *    IF_FALSE  |              BEGIN    |
3099
               *    |                        |
3100
               */
3101
3102
0
              ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3103
0
              ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
3104
3105
0
              MAKE_NOP(merge); CLEAR_USES(merge_ref);
3106
0
              MAKE_NOP(phi);   CLEAR_USES(phi_ref);
3107
0
              MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3108
0
              MAKE_NOP(insn);  CLEAR_USES(ref);
3109
3110
0
              if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3111
0
              if_false->op1 = end1_ref;
3112
3113
0
              if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3114
0
              if_true->op1 = end2_ref;
3115
3116
0
              ir_bitqueue_add(worklist, if_false_ref);
3117
0
              ir_bitqueue_add(worklist, if_true_ref);
3118
3119
0
              return 1;
3120
0
            } else {
3121
              /* IF Split
3122
               *
3123
               *    |        |               |        |
3124
               *    |        END             |        END
3125
               *    END     /                END      |
3126
               *    |  +---+                 |        |
3127
               *    | /                      |        |
3128
               *    MERGE                    |        |
3129
               *    | \                      |        |
3130
               *    |  PHI(C1, X)            |        |
3131
               *    |  |                     |        +
3132
               *    |  CMP(_, C2)            |       /
3133
               *    | /                      |      /
3134
               *    IF                   =>  |     /
3135
               *    | \                      |    /
3136
               *    |  +------+              |   /
3137
               *    |         IF_TRUE        |  /      BEGIN(unreachable)
3138
               *    IF_FALSE  |              MERGE     |
3139
               *    |                        |
3140
               */
3141
3142
0
              ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3143
0
              ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
3144
3145
0
              MAKE_NOP(merge); CLEAR_USES(merge_ref);
3146
0
              MAKE_NOP(phi);   CLEAR_USES(phi_ref);
3147
0
              MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3148
0
              MAKE_NOP(insn);  CLEAR_USES(ref);
3149
3150
0
              if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3151
0
              if_false->op1 = end1_ref;
3152
0
              if_false->op2 = end2_ref;
3153
3154
0
              if_true->optx = IR_BEGIN;
3155
0
              if_true->op1 = IR_UNUSED;
3156
3157
0
              ctx->flags2 &= ~IR_CFG_REACHABLE;
3158
3159
0
              ir_bitqueue_add(worklist, if_false_ref);
3160
3161
0
              return 1;
3162
0
            }
3163
0
          } else {
3164
            /* IF Split
3165
             *
3166
             *    |        |               |        |
3167
             *    |        END             |        IF<----+
3168
             *    END     /                END     / \     |
3169
             *    |  +---+                 |   +--+   +    |
3170
             *    | /                      |  /       |    |
3171
             *    MERGE                    | IF_FALSE |    |
3172
             *    | \                      | |        |    |
3173
             *    |  PHI(C1, X)            | |        |    |
3174
             *    |  |                     | |        |    |
3175
             *    |  CMP(_, C2)            | |        |    CMP(X, C2)
3176
             *    | /                      | |        |
3177
             *    IF                   =>  | END      |
3178
             *    | \                      | |        |
3179
             *    |  +------+              | |        |
3180
             *    |         IF_TRUE        | |        IF_TRUE
3181
             *    IF_FALSE  |              MERGE
3182
             *    |                        |
3183
             */
3184
3185
0
            ir_use_list_remove_all(ctx, merge_ref, phi_ref);
3186
0
            ir_use_list_remove_all(ctx, ref, if_true_ref);
3187
0
            if (!IR_IS_CONST_REF(phi->op3)) {
3188
0
              ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2);
3189
0
            }
3190
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3191
0
            ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref);
3192
0
            ir_use_list_add(ctx, end2_ref, if_true_ref);
3193
3194
0
            end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
3195
0
            end2->op2 = insn->op2;
3196
0
            ir_bitqueue_add(worklist, end2_ref);
3197
3198
0
            merge->optx = IR_OPTX(op, IR_VOID, 1);
3199
0
            merge->op1 = end2_ref;
3200
0
            merge->op2 = IR_UNUSED;
3201
3202
0
            cond->op1 = phi->op3;
3203
0
            MAKE_NOP(phi);
3204
0
            CLEAR_USES(phi_ref);
3205
3206
0
            insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3207
0
            insn->op1 = merge_ref;
3208
0
            insn->op2 = IR_UNUSED;
3209
3210
0
            if_true->op1 = end2_ref;
3211
3212
0
            if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3213
0
            if_false->op1 = end1_ref;
3214
0
            if_false->op2 = ref;
3215
3216
0
            ir_bitqueue_add(worklist, if_false_ref);
3217
0
            if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3218
0
              ir_bitqueue_add(worklist, end2->op1);
3219
0
            }
3220
3221
0
            return 1;
3222
0
          }
3223
0
        }
3224
0
      }
3225
0
    }
3226
0
  }
3227
3228
0
  return 0;
3229
0
}
3230
3231
static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist)
3232
0
{
3233
0
  ir_use_list *use_list = &ctx->use_lists[merge_ref];
3234
3235
0
  if (use_list->count == 1) {
3236
0
    ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist);
3237
0
  } else if (use_list->count == 2) {
3238
0
    if (merge->inputs_count == 2) {
3239
0
      ir_ref phi_ref = ctx->use_edges[use_list->refs];
3240
0
      ir_insn *phi = &ctx->ir_base[phi_ref];
3241
3242
0
      ir_ref next_ref = ctx->use_edges[use_list->refs + 1];
3243
0
      ir_insn *next = &ctx->ir_base[next_ref];
3244
3245
0
      if (next->op == IR_PHI) {
3246
0
        SWAP_REFS(phi_ref, next_ref);
3247
0
        SWAP_INSNS(phi, next);
3248
0
      }
3249
3250
0
      if (phi->op == IR_PHI && next->op != IR_PHI) {
3251
0
        if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) {
3252
0
          if (next->op2 == phi_ref) {
3253
0
            if (ir_try_split_if(ctx, next_ref, next, worklist)) {
3254
0
              return;
3255
0
            }
3256
0
          } else {
3257
0
            ir_insn *cmp = &ctx->ir_base[next->op2];
3258
3259
0
            if (cmp->op >= IR_EQ && cmp->op <= IR_UGT
3260
0
             && cmp->op1 == phi_ref
3261
0
             && IR_IS_CONST_REF(cmp->op2)
3262
0
             && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op)
3263
0
             && ctx->use_lists[next->op2].count == 1) {
3264
0
              if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) {
3265
0
                return;
3266
0
              }
3267
0
            }
3268
0
          }
3269
0
        }
3270
0
        ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist);
3271
0
      }
3272
0
    }
3273
0
  }
3274
0
}
3275
3276
static ir_ref ir_find_ext_use(ir_ctx *ctx, ir_ref ref)
3277
0
{
3278
0
  ir_use_list *use_list = &ctx->use_lists[ref];
3279
0
  ir_ref *p, n, use;
3280
0
  ir_insn *use_insn;
3281
3282
0
  for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
3283
0
    use = *p;
3284
0
    use_insn = &ctx->ir_base[use];
3285
0
    if (use_insn->op == IR_SEXT || use_insn->op == IR_ZEXT) {
3286
0
      return use;
3287
0
    }
3288
0
  }
3289
0
  return IR_UNUSED;
3290
0
}
3291
3292
static void ir_iter_optimize_induction_var(ir_ctx *ctx, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
3293
0
{
3294
0
  ir_ref ext_ref;
3295
3296
0
  ext_ref = ir_find_ext_use(ctx, phi_ref);
3297
0
  if (!ext_ref) {
3298
0
    ext_ref = ir_find_ext_use(ctx, op_ref);
3299
0
  }
3300
0
  if (ext_ref) {
3301
0
    ir_try_promote_induction_var_ext(ctx, ext_ref, phi_ref, op_ref, worklist);
3302
0
  }
3303
0
}
3304
3305
static void ir_iter_optimize_loop(ir_ctx *ctx, ir_ref loop_ref, ir_insn *loop, ir_bitqueue *worklist)
3306
0
{
3307
0
  ir_ref n;
3308
3309
0
  if (loop->inputs_count != 2 || ctx->use_lists[loop_ref].count <= 1) {
3310
0
    return;
3311
0
  }
3312
3313
  /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
3314
0
  for (n = 0; n < ctx->use_lists[loop_ref].count; n++) {
3315
    /* "use_lists" may be reallocated by ir_ext_ref() */
3316
0
    ir_ref use = ctx->use_edges[ctx->use_lists[loop_ref].refs + n];
3317
0
    ir_insn *use_insn = &ctx->ir_base[use];
3318
3319
0
    if (use_insn->op == IR_PHI) {
3320
0
      ir_ref op_ref = use_insn->op3;
3321
0
      ir_insn *op_insn = &ctx->ir_base[op_ref];
3322
3323
0
      if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
3324
0
        if (op_insn->op1 == use) {
3325
0
          if (ir_is_loop_invariant(ctx, op_insn->op2, loop_ref)) {
3326
0
            ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3327
0
          }
3328
0
        } else if (op_insn->op2 == use) {
3329
0
          if (ir_is_loop_invariant(ctx, op_insn->op1, loop_ref)) {
3330
0
            ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3331
0
          }
3332
0
        }
3333
0
        }
3334
0
    }
3335
0
  }
3336
0
}
3337
3338
static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref condition, bool *swap)
3339
0
{
3340
0
  ir_insn *condition_insn = &ctx->ir_base[condition];
3341
3342
0
  while ((condition_insn->op == IR_BITCAST
3343
0
    || condition_insn->op == IR_ZEXT
3344
0
    || condition_insn->op == IR_SEXT)
3345
0
   && ctx->use_lists[condition].count == 1) {
3346
0
    condition = condition_insn->op1;
3347
0
    condition_insn = &ctx->ir_base[condition];
3348
0
  }
3349
3350
0
  if (condition_insn->opt == IR_OPT(IR_NOT, IR_BOOL)) {
3351
0
    *swap = 1;
3352
0
    condition = condition_insn->op1;
3353
0
    condition_insn = &ctx->ir_base[condition];
3354
0
  }
3355
3356
0
  if (condition_insn->op == IR_NE && IR_IS_CONST_REF(condition_insn->op2)) {
3357
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3358
3359
0
    if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3360
0
      condition = condition_insn->op1;
3361
0
      condition_insn = &ctx->ir_base[condition];
3362
0
    }
3363
0
  } else if (condition_insn->op == IR_EQ && IR_IS_CONST_REF(condition_insn->op2)) {
3364
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3365
3366
0
    if (condition_insn->op2 == IR_TRUE) {
3367
0
      condition = condition_insn->op1;
3368
0
      condition_insn = &ctx->ir_base[condition];
3369
0
    } else if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3370
0
      condition = condition_insn->op1;
3371
0
      condition_insn = &ctx->ir_base[condition];
3372
0
      *swap = !*swap;
3373
0
    }
3374
0
  }
3375
3376
0
  while ((condition_insn->op == IR_BITCAST
3377
0
    || condition_insn->op == IR_ZEXT
3378
0
    || condition_insn->op == IR_SEXT)
3379
0
   && ctx->use_lists[condition].count == 1) {
3380
0
    condition = condition_insn->op1;
3381
0
    condition_insn = &ctx->ir_base[condition];
3382
0
  }
3383
3384
0
  if (condition_insn->op == IR_ALLOCA || condition_insn->op == IR_VADDR) {
3385
0
    return IR_TRUE;
3386
0
  }
3387
3388
0
  if (!IR_IS_CONST_REF(condition) && ctx->use_lists[condition].count > 1) {
3389
0
    condition = ir_check_dominating_predicates(ctx, control, condition);
3390
0
  }
3391
3392
0
  return condition;
3393
0
}
3394
3395
static void ir_iter_optimize_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3396
0
{
3397
0
  bool swap = 0;
3398
0
  ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3399
3400
0
  if (swap) {
3401
0
    ir_use_list *use_list = &ctx->use_lists[ref];
3402
0
    ir_ref *p, use;
3403
3404
0
    IR_ASSERT(use_list->count == 2);
3405
0
    p = ctx->use_edges + use_list->refs;
3406
0
    use = *p;
3407
0
    if (ctx->ir_base[use].op == IR_IF_TRUE) {
3408
0
      ctx->ir_base[use].op = IR_IF_FALSE;
3409
0
      use = *(p+1);
3410
0
      ctx->ir_base[use].op = IR_IF_TRUE;
3411
0
    } else {
3412
0
      ctx->ir_base[use].op = IR_IF_TRUE;
3413
0
      use = *(p+1);
3414
0
      ctx->ir_base[use].op = IR_IF_FALSE;
3415
0
    }
3416
0
  }
3417
3418
0
  if (IR_IS_CONST_REF(condition)) {
3419
    /*
3420
     *    |                        |
3421
     *    IF(TRUE)             =>  END
3422
     *    | \                      |
3423
     *    |  +------+              |
3424
     *    |         IF_TRUE        |        BEGIN(unreachable)
3425
     *    IF_FALSE  |              BEGIN
3426
     *    |                        |
3427
     */
3428
0
    ir_ref if_true_ref, if_false_ref;
3429
0
    ir_insn *if_true, *if_false;
3430
3431
0
    insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3432
0
    if (!IR_IS_CONST_REF(insn->op2)) {
3433
0
      ir_use_list_remove_one(ctx, insn->op2, ref);
3434
0
      ir_bitqueue_add(worklist, insn->op2);
3435
0
    }
3436
0
    insn->op2 = IR_UNUSED;
3437
3438
0
    ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3439
0
    if_true = &ctx->ir_base[if_true_ref];
3440
0
    if_false = &ctx->ir_base[if_false_ref];
3441
0
    if_true->op = IR_BEGIN;
3442
0
    if_false->op = IR_BEGIN;
3443
0
    if (ir_ref_is_true(ctx, condition)) {
3444
0
      if_false->op1 = IR_UNUSED;
3445
0
      ir_use_list_remove_one(ctx, ref, if_false_ref);
3446
0
      ir_bitqueue_add(worklist, if_true_ref);
3447
0
    } else {
3448
0
      if_true->op1 = IR_UNUSED;
3449
0
      ir_use_list_remove_one(ctx, ref, if_true_ref);
3450
0
      ir_bitqueue_add(worklist, if_false_ref);
3451
0
    }
3452
0
    ctx->flags2 &= ~IR_CFG_REACHABLE;
3453
0
  } else if (insn->op2 != condition) {
3454
0
    ir_iter_update_op(ctx, ref, 2, condition, worklist);
3455
0
  }
3456
0
}
3457
3458
static void ir_iter_optimize_guard(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3459
0
{
3460
0
  bool swap = 0;
3461
0
  ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3462
3463
0
  if (swap) {
3464
0
    if (insn->op == IR_GUARD) {
3465
0
      insn->op = IR_GUARD_NOT;
3466
0
    } else {
3467
0
      insn->op = IR_GUARD;
3468
0
    }
3469
0
  }
3470
3471
0
  if (IR_IS_CONST_REF(condition)) {
3472
0
    if (insn->op == IR_GUARD) {
3473
0
      if (ir_ref_is_true(ctx, condition)) {
3474
0
        ir_ref prev, next;
3475
3476
0
remove_guard:
3477
0
        prev = insn->op1;
3478
0
        next = ir_next_control(ctx, ref);
3479
0
        ctx->ir_base[next].op1 = prev;
3480
0
        ir_use_list_remove_one(ctx, ref, next);
3481
0
        ir_use_list_replace_one(ctx, prev, ref, next);
3482
0
        insn->op1 = IR_UNUSED;
3483
3484
0
        if (!IR_IS_CONST_REF(insn->op2)) {
3485
0
          ir_use_list_remove_one(ctx, insn->op2, ref);
3486
0
          if (ir_is_dead(ctx, insn->op2)) {
3487
            /* schedule DCE */
3488
0
            ir_bitqueue_add(worklist, insn->op2);
3489
0
          }
3490
0
        }
3491
3492
0
        if (insn->op3) {
3493
          /* SNAPSHOT */
3494
0
          ir_iter_remove_insn(ctx, insn->op3, worklist);
3495
0
        }
3496
3497
0
        MAKE_NOP(insn);
3498
0
        return;
3499
0
      } else {
3500
0
        condition = IR_FALSE;
3501
0
      }
3502
0
    } else {
3503
0
      if (ir_ref_is_true(ctx, condition)) {
3504
0
        condition = IR_TRUE;
3505
0
      } else {
3506
0
        goto remove_guard;
3507
0
      }
3508
0
    }
3509
0
  }
3510
3511
0
  if (insn->op2 != condition) {
3512
0
    ir_iter_update_op(ctx, ref, 2, condition, worklist);
3513
0
  }
3514
0
}
3515
3516
void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
3517
0
{
3518
0
  ir_ref i, val;
3519
0
  ir_insn *insn;
3520
3521
0
  while ((i = ir_bitqueue_pop(worklist)) >= 0) {
3522
0
    insn = &ctx->ir_base[i];
3523
0
    if (IR_IS_FOLDABLE_OP(insn->op)) {
3524
0
      if (ctx->use_lists[i].count == 0) {
3525
0
        if (insn->op == IR_PHI) {
3526
0
          ir_bitqueue_add(worklist, insn->op1);
3527
0
        }
3528
0
        ir_iter_remove_insn(ctx, i, worklist);
3529
0
      } else {
3530
0
        insn = &ctx->ir_base[i];
3531
0
        switch (insn->op) {
3532
0
          case IR_FP2FP:
3533
0
            if (insn->type == IR_FLOAT) {
3534
0
              if (ir_may_promote_d2f(ctx, insn->op1)) {
3535
0
                ir_ref ref = ir_promote_d2f(ctx, insn->op1, i, worklist);
3536
0
                insn->op1 = ref;
3537
0
                ir_iter_replace_insn(ctx, i, ref, worklist);
3538
0
                break;
3539
0
              }
3540
0
            } else {
3541
0
              if (ir_may_promote_f2d(ctx, insn->op1)) {
3542
0
                ir_ref ref = ir_promote_f2d(ctx, insn->op1, i, worklist);
3543
0
                insn->op1 = ref;
3544
0
                ir_iter_replace_insn(ctx, i, ref, worklist);
3545
0
                break;
3546
0
              }
3547
0
            }
3548
0
            goto folding;
3549
0
          case IR_FP2INT:
3550
0
            if (ctx->ir_base[insn->op1].type == IR_DOUBLE) {
3551
0
              if (ir_may_promote_d2f(ctx, insn->op1)) {
3552
0
                insn->op1 = ir_promote_d2f(ctx, insn->op1, i, worklist);
3553
0
              }
3554
0
            } else {
3555
0
              if (ir_may_promote_f2d(ctx, insn->op1)) {
3556
0
                insn->op1 = ir_promote_f2d(ctx, insn->op1, i, worklist);
3557
0
              }
3558
0
            }
3559
0
            goto folding;
3560
0
          case IR_TRUNC:
3561
0
            if (ir_may_promote_trunc(ctx, insn->type, insn->op1)) {
3562
0
              ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i, worklist);
3563
0
              insn->op1 = ref;
3564
0
              ir_iter_replace_insn(ctx, i, ref, worklist);
3565
0
              break;
3566
0
            }
3567
0
            goto folding;
3568
0
          case IR_SEXT:
3569
0
          case IR_ZEXT:
3570
0
            if (ir_try_promote_ext(ctx, i, insn, worklist)) {
3571
0
              break;
3572
0
            }
3573
0
            goto folding;
3574
0
          case IR_PHI:
3575
0
            break;
3576
0
          default:
3577
0
folding:
3578
0
            ir_iter_fold(ctx, i, worklist);
3579
0
            break;
3580
0
        }
3581
0
      }
3582
0
    } else if (ir_op_flags[insn->op] & IR_OP_FLAG_BB_START) {
3583
0
      if (!(ctx->flags & IR_OPT_CFG)) {
3584
        /* pass */
3585
0
      } else if (insn->op == IR_BEGIN) {
3586
0
        if (insn->op1 && ctx->ir_base[insn->op1].op == IR_END) {
3587
0
          ir_merge_blocks(ctx, insn->op1, i, worklist);
3588
0
        }
3589
0
      } else if (insn->op == IR_MERGE) {
3590
0
        ir_iter_optimize_merge(ctx, i, insn, worklist);
3591
0
      } else if (insn->op == IR_LOOP_BEGIN) {
3592
0
        ir_iter_optimize_loop(ctx, i, insn, worklist);
3593
0
      }
3594
0
    } else if (ir_is_dead_load(ctx, i)) {
3595
0
      ir_ref next;
3596
3597
      /* remove LOAD from double linked control list */
3598
0
remove_mem_insn:
3599
0
      next = ctx->use_edges[ctx->use_lists[i].refs];
3600
0
      IR_ASSERT(ctx->use_lists[i].count == 1);
3601
0
      ctx->ir_base[next].op1 = insn->op1;
3602
0
      ir_use_list_replace_one(ctx, insn->op1, i, next);
3603
0
      insn->op1 = IR_UNUSED;
3604
0
      ir_iter_remove_insn(ctx, i, worklist);
3605
0
    } else if (insn->op == IR_LOAD) {
3606
0
      val = ir_find_aliasing_load(ctx, insn->op1, insn->type, insn->op2);
3607
0
      if (val) {
3608
0
        ir_insn *val_insn;
3609
0
        ir_ref prev, next;
3610
3611
0
remove_aliased_load:
3612
0
        prev = insn->op1;
3613
0
        next = ir_next_control(ctx, i);
3614
0
        ctx->ir_base[next].op1 = prev;
3615
0
        ir_use_list_remove_one(ctx, i, next);
3616
0
        ir_use_list_replace_one(ctx, prev, i, next);
3617
0
        insn->op1 = IR_UNUSED;
3618
3619
0
        val_insn = &ctx->ir_base[val];
3620
0
        if (val_insn->type == insn->type) {
3621
0
          ir_iter_replace_insn(ctx, i, val, worklist);
3622
0
        } else {
3623
0
          if (!IR_IS_CONST_REF(insn->op2)) {
3624
0
            ir_use_list_remove_one(ctx, insn->op2, i);
3625
0
            if (ir_is_dead(ctx, insn->op2)) {
3626
              /* schedule DCE */
3627
0
              ir_bitqueue_add(worklist, insn->op2);
3628
0
            }
3629
0
          }
3630
0
          if (!IR_IS_CONST_REF(val)) {
3631
0
            ir_use_list_add(ctx, val, i);
3632
0
          }
3633
0
          if (ir_type_size[val_insn->type] == ir_type_size[insn->type]) {
3634
            /* load forwarding with bitcast (L2L) */
3635
0
            insn->optx = IR_OPTX(IR_BITCAST, insn->type, 1);
3636
0
          } else {
3637
            /* partial load forwarding (L2L) */
3638
0
            insn->optx = IR_OPTX(IR_TRUNC, insn->type, 1);
3639
0
          }
3640
0
          insn->op1 = val;
3641
0
          insn->op2 = IR_UNUSED;
3642
0
          ir_bitqueue_add(worklist, i);
3643
0
        }
3644
0
      }
3645
0
    } else if (insn->op == IR_STORE) {
3646
0
      if (ir_find_aliasing_store(ctx, insn->op1, insn->op2, insn->op3)) {
3647
0
        goto remove_mem_insn;
3648
0
      } else {
3649
0
        ir_insn *val_insn;
3650
3651
0
remove_bitcast:
3652
0
        val = insn->op3;
3653
0
        val_insn = &ctx->ir_base[val];
3654
0
        if (val_insn->op == IR_BITCAST
3655
0
         && ir_type_size[val_insn->type] == ir_type_size[ctx->ir_base[val_insn->op1].type]) {
3656
0
          insn->op3 = val_insn->op1;
3657
0
          ir_use_list_remove_one(ctx, val, i);
3658
0
          if (ctx->use_lists[val].count == 0) {
3659
0
            if (!IR_IS_CONST_REF(val_insn->op1)) {
3660
0
              ir_use_list_replace_one(ctx, val_insn->op1, val, i);
3661
0
            }
3662
0
            ir_iter_remove_insn(ctx, val, worklist);
3663
0
          } else {
3664
0
            if (!IR_IS_CONST_REF(val_insn->op1)) {
3665
0
              ir_use_list_add(ctx, val_insn->op1, i);
3666
0
            }
3667
0
          }
3668
0
        }
3669
0
      }
3670
0
    } else if (insn->op == IR_VLOAD) {
3671
0
      val = ir_find_aliasing_vload(ctx, insn->op1, insn->type, insn->op2);
3672
0
      if (val) {
3673
0
        goto remove_aliased_load;
3674
0
      }
3675
0
    } else if (insn->op == IR_VSTORE) {
3676
0
      if (ir_find_aliasing_vstore(ctx, insn->op1, insn->op2, insn->op3)) {
3677
0
        goto remove_mem_insn;
3678
0
      } else {
3679
0
        goto remove_bitcast;
3680
0
      }
3681
0
    } else if (insn->op == IR_IF) {
3682
0
      ir_iter_optimize_if(ctx, i, insn, worklist);
3683
0
    } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
3684
0
      ir_iter_optimize_guard(ctx, i, insn, worklist);
3685
0
    }
3686
0
  }
3687
0
}
3688
3689
int ir_sccp(ir_ctx *ctx)
3690
0
{
3691
0
  ir_bitqueue sccp_worklist, iter_worklist;
3692
0
  ir_insn *_values;
3693
3694
0
  ir_bitqueue_init(&iter_worklist, ctx->insns_count);
3695
0
  ir_bitqueue_init(&sccp_worklist, ctx->insns_count);
3696
0
  _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
3697
3698
0
  ctx->flags2 |= IR_OPT_IN_SCCP;
3699
0
  ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist);
3700
0
  ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist);
3701
0
  ctx->flags2 &= ~IR_OPT_IN_SCCP;
3702
3703
0
  ir_mem_free(_values);
3704
0
  ir_bitqueue_free(&sccp_worklist);
3705
3706
0
  ctx->flags2 |= IR_CFG_REACHABLE;
3707
3708
0
  ir_iter_opt(ctx, &iter_worklist);
3709
3710
0
  ir_bitqueue_free(&iter_worklist);
3711
3712
0
  return 1;
3713
0
}