Coverage Report

Created: 2025-11-16 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_sccp.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (SCCP - Sparse Conditional Constant Propagation)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 *
7
 * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication
8
 * See:  M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches"
9
 * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991
10
 */
11
12
#include "ir.h"
13
#include "ir_private.h"
14
15
#include <math.h>
16
17
#define IR_COMBO_COPY_PROPAGATION 1
18
19
0
#define IR_TOP                  IR_UNUSED
20
0
#define IR_BOTTOM               IR_LAST_OP
21
22
#define IR_MAKE_TOP(ref)        do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
23
0
#define IR_MAKE_BOTTOM(ref)     do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
24
25
0
#define IR_IS_TOP(ref)          (ref >= 0 && _values[ref].op == IR_TOP)
26
0
#define IR_IS_BOTTOM(ref)       (ref >= 0 && _values[ref].op == IR_BOTTOM)
27
0
#define IR_IS_REACHABLE(ref)    _ir_is_reachable_ctrl(ctx, _values, ref)
28
0
#define IR_IS_CONST(ref)        (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op))
29
30
IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref)
31
0
{
32
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
33
0
  IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL);
34
0
  return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */
35
0
}
36
37
IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
38
0
{
39
0
  ir_use_list *use_list;
40
0
  ir_ref n, *p, use;
41
42
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
43
0
  use_list = &ctx->use_lists[ref];
44
0
  n = use_list->count;
45
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
46
0
    use = *p;
47
0
    if (_values[use].op != IR_BOTTOM) {
48
0
      ir_bitqueue_add(worklist, use);
49
0
    }
50
0
  }
51
0
}
52
53
IR_ALWAYS_INLINE void ir_sccp_add_input(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
54
0
{
55
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
56
0
  IR_ASSERT(_values[ref].op == IR_TOP);
57
  /* do backward propagaton only once */
58
0
  if (!_values[ref].op1) {
59
0
    _values[ref].op1 = 1;
60
0
    ir_bitqueue_add(worklist, ref);
61
0
  }
62
0
}
63
64
#if IR_COMBO_COPY_PROPAGATION
65
IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a)
66
0
{
67
0
  if (a > 0 && _values[a].op == IR_COPY) {
68
0
    do {
69
0
      a = _values[a].op1;
70
0
      IR_ASSERT(a > 0);
71
0
    } while (_values[a].op == IR_COPY);
72
0
    IR_ASSERT(_values[a].op == IR_BOTTOM);
73
0
  }
74
0
  return a;
75
0
}
76
77
#if 0
78
static void CHECK_LIST(ir_insn *_values, ir_ref ref)
79
{
80
  ir_ref member = _values[ref].op2;
81
  while (member != ref) {
82
    IR_ASSERT(_values[_values[member].op2].op3 == member);
83
    member = _values[member].op2;
84
  }
85
  IR_ASSERT(_values[_values[ref].op2].op3 == ref);
86
}
87
#else
88
# define CHECK_LIST(_values, ref)
89
#endif
90
91
static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst)
92
0
{
93
0
  IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY);
94
0
  IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)));
95
0
  IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst);
96
97
0
  _values[dst].optx = IR_COPY;
98
0
  _values[dst].op1 = src;
99
100
0
  if (_values[src].op == IR_BOTTOM) {
101
    /* initialize empty double-linked list */
102
0
    if (_values[src].op1 != src) {
103
0
      _values[src].op1 = src;
104
0
      _values[src].op2 = src;
105
0
      _values[src].op3 = src;
106
0
    }
107
0
  } else {
108
0
    src = ir_sccp_identity(ctx, _values, src);
109
0
  }
110
111
  /* insert into circular double-linked list */
112
0
  ir_ref prev = _values[src].op3;
113
0
  _values[dst].op2 = src;
114
0
  _values[dst].op3 = prev;
115
0
  _values[src].op3 = dst;
116
0
  _values[prev].op2 = dst;
117
0
  CHECK_LIST(_values, dst);
118
0
}
119
120
static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
121
0
{
122
0
  ir_ref member, head, tail, next, prev;
123
124
0
  CHECK_LIST(_values, ref);
125
0
  IR_MAKE_BOTTOM(ref);
126
0
  _values[ref].op1 = ref;
127
128
0
  member = _values[ref].op2;
129
0
  head = tail = IR_UNUSED;
130
0
  while (member != ref) {
131
0
    if (_values[member].op != IR_BOTTOM) {
132
0
      ir_bitqueue_add(worklist, member);
133
0
    }
134
0
    ir_sccp_add_uses(ctx, _values, worklist, member);
135
136
0
    next = _values[member].op2;
137
0
    if (ir_sccp_identity(ctx, _values, member) == ref) {
138
      /* remove "member" from the old circular double-linked list */
139
0
      prev = _values[member].op3;
140
0
      _values[prev].op2 = next;
141
0
      _values[next].op3 = prev;
142
143
      /* insert "member" into the new double-linked list */
144
0
      if (!head) {
145
0
        head = tail = member;
146
0
      } else {
147
0
        _values[tail].op2 = member;
148
0
        _values[member].op3 = tail;
149
0
        tail = member;
150
0
      }
151
0
    }
152
0
    member = next;
153
0
  }
154
155
  /* remove "ref" from the old circular double-linked list */
156
0
  next = _values[ref].op2;
157
0
  prev = _values[ref].op3;
158
0
  _values[prev].op2 = next;
159
0
  _values[next].op3 = prev;
160
0
  CHECK_LIST(_values, next);
161
162
  /* close the new circle */
163
0
  if (head) {
164
0
    _values[ref].op2 = head;
165
0
    _values[ref].op3 = tail;
166
0
    _values[tail].op2 = ref;
167
0
    _values[head].op3 = ref;
168
0
  } else {
169
0
    _values[ref].op2 = ref;
170
0
    _values[ref].op3 = ref;
171
0
  }
172
0
  CHECK_LIST(_values, ref);
173
0
}
174
175
IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref)
176
0
{
177
0
  if (_values[ref].op == IR_COPY) {
178
0
    ir_sccp_split_partition(ctx, _values, worklist, ref);
179
0
  } else {
180
0
    IR_MAKE_BOTTOM(ref);
181
0
  }
182
0
}
183
184
0
# define IR_MAKE_BOTTOM_EX(ref) ir_sccp_make_bottom_ex(ctx, _values, worklist, ref)
185
#else
186
# define ir_sccp_identity(_ctx, _values, ref) (ref)
187
# define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref)
188
#endif
189
190
IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn)
191
0
{
192
0
  IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op));
193
194
0
  if (_values[ref].op == IR_TOP) {
195
    /* TOP meet NEW_CONST => NEW_CONST */
196
0
    _values[ref].optx = val_insn->opt;
197
0
    _values[ref].val.u64 = val_insn->val.u64;
198
0
    return 1;
199
0
  } else if (_values[ref].opt == val_insn->opt) {
200
    /* OLD_CONST meet NEW_CONST => (OLD_CONST == NEW_CONST) ? OLD_CONST : BOTTOM */
201
0
    if (_values[ref].val.u64 == val_insn->val.u64) {
202
0
      return 0;
203
0
    }
204
0
  }
205
206
0
  IR_MAKE_BOTTOM_EX(ref);
207
0
  return 1;
208
0
}
209
210
IR_ALWAYS_INLINE bool ir_sccp_meet(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
211
0
{
212
0
  ir_ref val_identity = ir_sccp_identity(ctx, _values, val);
213
0
  ir_insn *val_insn;
214
215
0
  if (IR_IS_CONST_REF(val_identity)) {
216
0
    val_insn = &ctx->ir_base[val_identity];
217
0
  } else {
218
0
    val_insn = &_values[val_identity];
219
220
0
    if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) {
221
0
#if IR_COMBO_COPY_PROPAGATION
222
0
      if (_values[ref].op == IR_COPY) {
223
        /* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
224
         *   (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
225
0
        if (ir_sccp_identity(ctx, _values, ref) == val_identity) {
226
0
          return 0; /* not changed */
227
0
        }
228
0
        ir_sccp_split_partition(ctx, _values, worklist, ref);
229
0
        return 1;
230
0
      } else {
231
0
        IR_ASSERT(_values[ref].op != IR_BOTTOM);
232
        /* TOP       meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
233
        /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
234
0
        ir_sccp_add_identity(ctx, _values, val, ref);
235
0
        return 1;
236
0
      }
237
0
#endif
238
239
0
      IR_MAKE_BOTTOM(ref);
240
0
      return 1;
241
0
    }
242
0
  }
243
244
0
  return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn);
245
0
}
246
247
static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *insn)
248
0
{
249
0
  ir_insn *op1_insn, *op2_insn, *op3_insn;
250
0
  ir_ref op1, op2, op3, copy;
251
0
  uint32_t opt = insn->opt;
252
253
0
  op1 = ir_sccp_identity(ctx, _values, insn->op1);
254
0
  op2 = ir_sccp_identity(ctx, _values, insn->op2);
255
0
  op3 = ir_sccp_identity(ctx, _values, insn->op3);
256
257
0
restart:
258
0
  op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
259
0
  op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
260
0
  op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
261
262
0
  switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
263
0
    case IR_FOLD_DO_RESTART:
264
0
      opt = ctx->fold_insn.optx;
265
0
      op1 = ctx->fold_insn.op1;
266
0
      op2 = ctx->fold_insn.op2;
267
0
      op3 = ctx->fold_insn.op3;
268
0
      goto restart;
269
0
    case IR_FOLD_DO_CSE:
270
0
    case IR_FOLD_DO_EMIT:
271
0
      IR_MAKE_BOTTOM_EX(ref);
272
0
      return 1;
273
0
    case IR_FOLD_DO_COPY:
274
0
      copy = ctx->fold_insn.op1;
275
0
      return ir_sccp_meet(ctx, _values, worklist, ref, copy);
276
0
    case IR_FOLD_DO_CONST:
277
0
      return ir_sccp_meet_const(ctx, _values, worklist, ref, &ctx->fold_insn);
278
0
    default:
279
0
      IR_ASSERT(0);
280
0
      return 0;
281
0
  }
282
0
}
283
284
static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref i, ir_insn *insn)
285
0
{
286
0
  ir_ref j, n, input, *merge_input, *p;
287
0
  ir_insn *v, *new_const = NULL;
288
0
#if IR_COMBO_COPY_PROPAGATION
289
0
  ir_ref new_copy = IR_UNUSED;
290
0
  ir_ref new_copy_identity = IR_UNUSED;
291
0
  ir_ref phi_identity = ir_sccp_identity(ctx, _values, i);
292
0
#endif
293
294
0
  if (!IR_IS_REACHABLE(insn->op1)) {
295
0
    return 0;
296
0
  }
297
0
  n = insn->inputs_count;
298
0
  if (n > 3 && _values[i].op == IR_TOP) {
299
0
    for (j = 0; j < (n>>2); j++) {
300
0
      _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
301
0
    }
302
0
  }
303
304
0
  p = insn->ops + 2;
305
0
  merge_input = ctx->ir_base[insn->op1].ops + 1;
306
0
  for (; --n > 0; p++, merge_input++) {
307
0
    IR_ASSERT(*merge_input > 0);
308
0
    if (!IR_IS_REACHABLE(*merge_input)) {
309
0
      continue;
310
0
    }
311
312
0
    input = *p;
313
0
    if (IR_IS_CONST_REF(input)) {
314
0
      v = &ctx->ir_base[input];
315
0
    } else if (input == i) {
316
0
      continue;
317
0
    } else {
318
0
      v = &_values[input];
319
0
      if (v->op == IR_TOP) {
320
0
        ir_sccp_add_input(ctx, _values, worklist, input);
321
0
        continue;
322
0
#if IR_COMBO_COPY_PROPAGATION
323
0
      } else if (v->op == IR_COPY) {
324
0
        input = v->op1;
325
0
        new_copy_identity = ir_sccp_identity(ctx, _values, input);
326
0
        if (new_copy_identity == phi_identity) {
327
0
          new_copy_identity = IR_UNUSED;
328
0
          continue;
329
0
        }
330
0
        new_copy = input;
331
0
        goto next;
332
0
#endif
333
0
      } else if (v->op == IR_BOTTOM) {
334
0
#if IR_COMBO_COPY_PROPAGATION
335
0
        if (input == phi_identity) {
336
0
          continue;
337
0
        }
338
0
        new_copy = new_copy_identity = input;
339
0
        goto next;
340
#else
341
        goto make_bottom;
342
#endif
343
0
      }
344
0
    }
345
0
    new_const = v;
346
0
    goto next;
347
0
  }
348
349
0
  return 0;
350
351
0
next:
352
0
  p++;
353
0
  merge_input++;
354
  /* for all live merge inputs */
355
0
  for (; --n > 0; p++, merge_input++) {
356
0
    IR_ASSERT(*merge_input > 0);
357
0
    if (!IR_IS_REACHABLE(*merge_input)) {
358
0
      continue;
359
0
    }
360
361
0
    input = *p;
362
0
    if (IR_IS_CONST_REF(input)) {
363
0
#if IR_COMBO_COPY_PROPAGATION
364
0
      if (new_copy) {
365
0
        goto make_bottom;
366
0
      }
367
0
#endif
368
0
      v = &ctx->ir_base[input];
369
0
    } else if (input == i) {
370
0
      continue;
371
0
    } else {
372
0
      v = &_values[input];
373
0
      if (v->op == IR_TOP) {
374
0
        ir_sccp_add_input(ctx, _values, worklist, input);
375
0
        continue;
376
0
#if IR_COMBO_COPY_PROPAGATION
377
0
      } else if (v->op == IR_COPY) {
378
0
        ir_ref identity = ir_sccp_identity(ctx, _values, v->op1);
379
380
0
        if (identity == phi_identity || identity == new_copy_identity) {
381
0
          continue;
382
0
        }
383
0
        goto make_bottom;
384
0
#endif
385
0
      } else if (v->op == IR_BOTTOM) {
386
0
#if IR_COMBO_COPY_PROPAGATION
387
0
        if (input  == phi_identity || input == new_copy_identity) {
388
0
          continue;
389
0
        }
390
0
#endif
391
0
        goto make_bottom;
392
0
      }
393
0
    }
394
0
    if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) {
395
0
      goto make_bottom;
396
0
    }
397
0
  }
398
399
0
#if IR_COMBO_COPY_PROPAGATION
400
0
  if (new_copy) {
401
0
    return ir_sccp_meet(ctx, _values, worklist, i, new_copy);
402
0
  }
403
0
#endif
404
405
0
  return ir_sccp_meet_const(ctx, _values, worklist, i, new_const);
406
407
0
make_bottom:
408
0
  IR_MAKE_BOTTOM_EX(i);
409
0
  return 1;
410
0
}
411
412
static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn)
413
0
{
414
0
  if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
415
0
    return ctx->use_lists[ref].count == 1;
416
0
  } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) {
417
0
    return ctx->use_lists[ref].count == 1;
418
0
  }
419
0
  return 0;
420
0
}
421
422
static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
423
0
{
424
0
  if (ctx->use_lists[ref].count == 1) {
425
0
    ir_insn *insn = &ctx->ir_base[ref];
426
0
    uint32_t flags = ir_op_flags[insn->op];
427
428
0
    if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
429
0
      return 1;
430
0
    } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) {
431
0
      return 1;
432
0
    }
433
0
  }
434
0
  return 0;
435
0
}
436
437
static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
438
0
{
439
0
  if (ctx->use_lists[ref].count == 0) {
440
0
    return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
441
0
  } else {
442
0
    return ir_is_dead_load(ctx, ref);
443
0
  }
444
0
  return 0;
445
0
}
446
447
static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
448
0
{
449
0
  ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
450
451
0
  return ir_const_is_true(v);
452
0
}
453
454
static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
455
0
{
456
0
  ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
457
0
  ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
458
459
0
  IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
460
0
  IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
461
0
  return v1->val.u64 == v2->val.u64;
462
0
}
463
464
static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b, ir_ref c)
465
0
{
466
0
  ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
467
0
  ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
468
0
  ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c];
469
470
0
  IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
471
0
  IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
472
0
  IR_ASSERT(!IR_IS_SYM_CONST(v3->op));
473
0
  if (IR_IS_TYPE_SIGNED(v1->type)) {
474
0
    return v1->val.i64 >= v2->val.i64 && v1->val.i64 <= v3->val.i64;
475
0
  } else {
476
0
    return v1->val.u64 >= v2->val.u64 && v1->val.u64 <= v3->val.u64;
477
0
    }
478
0
}
479
480
#ifdef IR_SCCP_TRACE
481
static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i)
482
{
483
  if (IR_IS_BOTTOM(i)) {
484
    fprintf(stderr, "BOTTOM");
485
  } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
486
    fprintf(stderr, "CONST(");
487
    ir_print_const(ctx, &_values[i], stderr, true);
488
    fprintf(stderr, ")");
489
#if IR_COMBO_COPY_PROPAGATION
490
  } else if (_values[i].op == IR_COPY) {
491
    fprintf(stderr, "COPY(%d)", _values[i].op1);
492
#endif
493
  } else if (IR_IS_TOP(i)) {
494
    fprintf(stderr, "TOP");
495
  } else if (_values[i].op == IR_IF) {
496
    fprintf(stderr, "IF(%d)", _values[i].op1);
497
  } else if (_values[i].op == IR_MERGE) {
498
    fprintf(stderr, "MERGE(%d)", _values[i].op1);
499
  } else {
500
    fprintf(stderr, "%d", _values[i].op);
501
  }
502
}
503
504
static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i)
505
{
506
  fprintf(stderr, "%d. ", i);
507
  ir_sccp_trace_val(ctx, _values, i);
508
}
509
510
static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i)
511
{
512
  fprintf(stderr, " -> ");
513
  ir_sccp_trace_val(ctx, _values, i);
514
  fprintf(stderr, "\n");
515
}
516
#else
517
# define ir_sccp_trace_start(c, v, i)
518
# define ir_sccp_trace_end(c, v, i)
519
#endif
520
521
static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
522
0
{
523
0
  ir_ref i, j, n, *p, use;
524
0
  ir_use_list *use_list;
525
0
  ir_insn *insn, *use_insn;
526
0
  uint32_t flags;
527
528
  /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
529
0
  worklist->pos = 0;
530
0
  ir_bitset_incl(worklist->set, 1);
531
0
  for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) {
532
0
    IR_ASSERT(_values[i].op != IR_BOTTOM);
533
0
    ir_sccp_trace_start(ctx, _values, i);
534
0
    insn = &ctx->ir_base[i];
535
0
    flags = ir_op_flags[insn->op];
536
0
    if (flags & IR_OP_FLAG_DATA) {
537
0
      if (ctx->use_lists[i].count == 0) {
538
        /* dead code */
539
0
        continue;
540
0
      } else if (insn->op == IR_PHI) {
541
0
        if (!ir_sccp_analyze_phi(ctx, _values, worklist, i, insn)) {
542
0
          continue;
543
0
        }
544
0
      } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) {
545
0
        bool may_benefit = 0;
546
0
        bool has_top = 0;
547
548
0
        if (_values[i].op != IR_TOP) {
549
0
          may_benefit = 1;
550
0
        }
551
552
0
        IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
553
0
        n = IR_INPUT_EDGES_COUNT(flags);
554
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
555
0
          ir_ref input = *p;
556
0
          if (input > 0) {
557
0
            if (_values[input].op == IR_TOP) {
558
0
              has_top = 1;
559
0
              ir_sccp_add_input(ctx, _values, worklist, input);
560
0
            } else if (_values[input].op != IR_BOTTOM) {
561
              /* Perform folding only if some of direct inputs
562
               * is going to be replaced by a constant or copy.
563
               * This approach may miss some folding optimizations
564
               * dependent on indirect inputs. e.g. reassociation.
565
               */
566
0
              may_benefit = 1;
567
0
            }
568
0
          }
569
0
        }
570
0
        if (has_top) {
571
0
          continue;
572
0
        }
573
0
        if (!may_benefit) {
574
0
          IR_MAKE_BOTTOM_EX(i);
575
0
          if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
576
0
            ir_bitqueue_add(iter_worklist, i);
577
0
          }
578
0
        } else if (!ir_sccp_fold(ctx, _values, worklist, i, insn)) {
579
          /* not changed */
580
0
          continue;
581
0
        } else if (_values[i].op == IR_BOTTOM) {
582
0
          insn = &ctx->ir_base[i];
583
0
          if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
584
0
            ir_bitqueue_add(iter_worklist, i);
585
0
          }
586
0
        }
587
0
      } else {
588
0
        IR_MAKE_BOTTOM_EX(i);
589
0
        n = IR_INPUT_EDGES_COUNT(flags);
590
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
591
0
          ir_ref input = *p;
592
0
          if (input > 0) {
593
0
            if (_values[input].op == IR_TOP) {
594
0
              ir_sccp_add_input(ctx, _values, worklist, input);
595
0
            }
596
0
          }
597
0
        }
598
0
      }
599
0
    } else if (flags & IR_OP_FLAG_BB_START) {
600
0
      if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_BEGIN) {
601
0
        ir_bitqueue_add(iter_worklist, i);
602
0
      }
603
0
      if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
604
0
        ir_ref unfeasible_inputs = 0;
605
606
0
        n = insn->inputs_count;
607
0
        if (n > 3 && _values[i].op == IR_TOP) {
608
0
          for (j = 0; j < (n>>2); j++) {
609
0
            _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
610
0
          }
611
0
        }
612
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
613
0
          ir_ref input = *p;
614
0
          IR_ASSERT(input > 0);
615
0
          if (!IR_IS_REACHABLE(input)) {
616
0
            unfeasible_inputs++;
617
0
          }
618
0
        }
619
0
        if (unfeasible_inputs == 0) {
620
0
          IR_MAKE_BOTTOM(i);
621
0
        } else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) {
622
0
          _values[i].optx = IR_MERGE;
623
0
          _values[i].op1 = unfeasible_inputs;
624
0
        } else {
625
0
          continue;
626
0
        }
627
0
        if (ctx->flags2 & IR_MEM2SSA_VARS) {
628
          /* MEM2SSA puts new PHI at the bottom, but we like to process them now */
629
0
          use_list = &ctx->use_lists[i];
630
0
          n = use_list->count;
631
0
          for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
632
0
            use = *p;
633
0
            if (_values[use].op != IR_BOTTOM) {
634
0
              if (ctx->ir_base[use].op == IR_PHI) {
635
0
                ir_bitqueue_del(worklist, use);
636
0
                if (ctx->use_lists[use].count != 0) {
637
0
                  if (ir_sccp_analyze_phi(ctx, _values, worklist, use, &ctx->ir_base[use])) {
638
0
                    ir_sccp_add_uses(ctx, _values, worklist, use);
639
0
                  }
640
0
                }
641
0
              } else {
642
0
                ir_bitqueue_add(worklist, use);
643
0
              }
644
0
            }
645
0
          }
646
0
          continue;
647
0
        }
648
0
      } else {
649
0
        IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1));
650
0
        IR_MAKE_BOTTOM(i);
651
0
      }
652
0
    } else {
653
0
      IR_ASSERT(insn->op1 > 0);
654
0
      if (!IR_IS_REACHABLE(insn->op1)) {
655
        /* control inpt is not feasible */
656
0
        continue;
657
0
      }
658
0
      if (insn->op == IR_IF) {
659
0
        if (IR_IS_TOP(insn->op2)) {
660
0
          ir_sccp_add_input(ctx, _values, worklist, insn->op2);
661
0
          continue;
662
0
        }
663
0
        if (IR_IS_CONST(insn->op2)) {
664
0
          bool b = ir_sccp_is_true(ctx, _values, insn->op2);
665
0
          use_list = &ctx->use_lists[i];
666
0
          IR_ASSERT(use_list->count == 2);
667
0
          p = &ctx->use_edges[use_list->refs];
668
0
          use = *p;
669
0
          use_insn = &ctx->ir_base[use];
670
0
          IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE);
671
0
          if ((use_insn->op == IR_IF_TRUE) != b) {
672
0
            use = *(p+1);
673
0
            IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE);
674
0
          }
675
0
          if (_values[i].op == IR_TOP) {
676
0
            _values[i].optx = IR_IF;
677
0
            _values[i].op1 = use;
678
0
            ir_bitqueue_add(worklist, use);
679
0
            continue;
680
0
          } else if (_values[i].op == IR_IF && _values[i].op1 == use) {
681
0
            continue;
682
0
          }
683
0
        }
684
0
        IR_MAKE_BOTTOM(i);
685
0
        ir_bitqueue_add(iter_worklist, i);
686
0
      } else if (insn->op == IR_SWITCH) {
687
0
        if (IR_IS_TOP(insn->op2)) {
688
0
          ir_sccp_add_input(ctx, _values, worklist, insn->op2);
689
0
          continue;
690
0
        }
691
0
        if (IR_IS_CONST(insn->op2)) {
692
0
          ir_ref use_case = IR_UNUSED;
693
694
0
          use_list = &ctx->use_lists[i];
695
0
          n = use_list->count;
696
0
          for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
697
0
            use = *p;
698
0
            IR_ASSERT(use > 0);
699
0
            use_insn = &ctx->ir_base[use];
700
0
            if (use_insn->op == IR_CASE_VAL) {
701
0
              if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) {
702
0
                use_case = use;
703
0
                break;
704
0
              }
705
0
            } else if (use_insn->op == IR_CASE_DEFAULT) {
706
0
              use_case = use;
707
0
            } else if (use_insn->op == IR_CASE_RANGE) {
708
0
              if (ir_sccp_in_range(ctx, _values, insn->op2, use_insn->op2, use_insn->op3)) {
709
0
                use_case = use;
710
0
                break;
711
0
              }
712
0
            }
713
0
          }
714
0
          if (use_case) {
715
0
            use_insn = &ctx->ir_base[use_case];
716
0
            if (_values[i].op == IR_TOP) {
717
0
              _values[i].optx = IR_IF;
718
0
              _values[i].op1 = use_case;
719
0
              ir_bitqueue_add(worklist, use_case);
720
0
              continue;
721
0
            } else if (_values[i].op == IR_IF || _values[i].op1 == use_case) {
722
0
              continue;
723
0
            }
724
0
          }
725
0
        }
726
0
        IR_MAKE_BOTTOM(i);
727
0
      } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) {
728
        /* schedule dead load elimination */
729
0
        ir_bitqueue_add(iter_worklist, i);
730
0
        IR_MAKE_BOTTOM(i);
731
0
      } else {
732
        /* control, call, load and store instructions may have unprocessed inputs */
733
0
        n = IR_INPUT_EDGES_COUNT(flags);
734
0
        if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) {
735
0
          for (j = 0; j < (n>>2); j++) {
736
0
            _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
737
0
          }
738
0
          for (j = 2, p = insn->ops + j; j <= n; j++, p++) {
739
0
            IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
740
0
            use = *p;
741
0
            if (use > 0 && _values[use].op == IR_TOP) {
742
0
              ir_sccp_add_input(ctx, _values, worklist, use);
743
0
            }
744
0
          }
745
0
        } else if (n >= 2) {
746
0
          IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA);
747
0
          use = insn->op2;
748
0
          if (use > 0 && _values[use].op == IR_TOP) {
749
0
            ir_sccp_add_input(ctx, _values, worklist, use);
750
0
          }
751
0
          if (n > 2) {
752
0
            IR_ASSERT(n == 3);
753
0
            IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA);
754
0
            use = insn->op3;
755
0
            if (use > 0 && _values[use].op == IR_TOP) {
756
0
              ir_sccp_add_input(ctx, _values, worklist, use);
757
0
            }
758
0
          }
759
0
        }
760
0
        IR_MAKE_BOTTOM(i);
761
0
      }
762
0
    }
763
0
    ir_sccp_add_uses(ctx, _values, worklist, i);
764
0
  }
765
766
0
#ifdef IR_DEBUG
767
0
  if (ctx->flags & IR_DEBUG_SCCP) {
768
0
    for (i = 1; i < ctx->insns_count; i++) {
769
0
      if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
770
0
        fprintf(stderr, "%d. CONST(", i);
771
0
        ir_print_const(ctx, &_values[i], stderr, true);
772
0
        fprintf(stderr, ")\n");
773
0
#if IR_COMBO_COPY_PROPAGATION
774
0
      } else if (_values[i].op == IR_COPY) {
775
0
        fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
776
0
#endif
777
0
      } else if (IR_IS_TOP(i)) {
778
0
        fprintf(stderr, "%d. TOP\n", i);
779
0
      } else if (_values[i].op == IR_IF) {
780
0
        fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
781
0
      } else if (_values[i].op == IR_MERGE) {
782
0
        fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
783
0
      } else if (!IR_IS_BOTTOM(i)) {
784
0
        fprintf(stderr, "%d. %d\n", i, _values[i].op);
785
0
      }
786
0
    }
787
0
  }
788
0
#endif
789
0
}
790
791
/**********************/
792
/* SCCP trasformation */
793
/**********************/
794
795
static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
796
0
{
797
0
  ir_ref j, n, *p;
798
0
  ir_insn *insn;
799
800
0
  CLEAR_USES(ref);
801
0
  insn = &ctx->ir_base[ref];
802
0
  n = insn->inputs_count;
803
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
804
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
805
0
    *p = IR_UNUSED;
806
0
  }
807
0
}
808
809
static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
810
0
{
811
0
  ir_ref j, n, *p;
812
0
  ir_insn *insn;
813
814
0
  CLEAR_USES(ref);
815
0
  insn = &ctx->ir_base[ref];
816
0
  n = insn->inputs_count;
817
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
818
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
819
0
    ir_ref input = *p;
820
0
    *p = IR_UNUSED;
821
    /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
822
0
    if (input > 0 && _values[input].op > IR_COPY) {
823
0
      ir_use_list_remove_all(ctx, input, ref);
824
0
      if (ir_is_dead(ctx, input)) {
825
        /* schedule DCE */
826
0
        ir_bitqueue_add(worklist, input);
827
0
      }
828
0
    }
829
0
  }
830
0
}
831
832
static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
833
0
{
834
0
  ir_ref j, n, *p, use, i;
835
0
  ir_insn *insn;
836
0
  ir_use_list *use_list;
837
838
0
  IR_ASSERT(ref != new_ref);
839
840
0
  insn = &ctx->ir_base[ref];
841
842
0
#if IR_COMBO_COPY_PROPAGATION
843
0
  if ((ir_op_flags[insn->op] & IR_OP_FLAG_MEM) && IR_IS_REACHABLE(insn->op1)) {
844
    /* remove from control list */
845
0
    ir_ref prev = insn->op1;
846
0
    ir_ref next = ir_next_control(ctx, ref);
847
0
    ctx->ir_base[next].op1 = prev;
848
0
    ir_use_list_remove_one(ctx, ref, next);
849
0
    ir_use_list_replace_one(ctx, prev, ref, next);
850
0
    insn->op1 = IR_UNUSED;
851
0
  }
852
0
#endif
853
854
0
  n = insn->inputs_count;
855
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
856
0
  for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
857
0
    ir_ref input = *p;
858
0
    *p = IR_UNUSED;
859
    /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
860
0
    if (input > 0 && _values[input].op > IR_COPY) {
861
0
      ir_use_list_remove_all(ctx, input, ref);
862
0
      if (ir_is_dead(ctx, input)) {
863
        /* schedule DCE */
864
0
        ir_bitqueue_add(worklist, input);
865
0
      }
866
0
    }
867
0
  }
868
869
0
  use_list = &ctx->use_lists[ref];
870
0
  n = use_list->count;
871
0
  p = &ctx->use_edges[use_list->refs];
872
0
  if (new_ref <= 0) {
873
    /* constant or IR_UNUSED */
874
0
    for (; n; p++, n--) {
875
0
      use = *p;
876
      /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
877
0
      if (_values[use].op > IR_COPY) {
878
0
        insn = &ctx->ir_base[use];
879
0
        i = ir_insn_find_op(insn, ref);
880
0
        if (!i) continue;
881
0
        IR_ASSERT(i > 0);
882
0
        ir_insn_set_op(insn, i, new_ref);
883
        /* schedule folding */
884
0
        ir_bitqueue_add(worklist, use);
885
0
      }
886
0
    }
887
0
  } else {
888
0
    for (j = 0; j < n; j++, p++) {
889
0
      use = *p;
890
      /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
891
0
      if (_values[use].op == IR_BOTTOM) {
892
0
        insn = &ctx->ir_base[use];
893
0
        i = ir_insn_find_op(insn, ref);
894
0
        IR_ASSERT(i > 0);
895
0
        ir_insn_set_op(insn, i, new_ref);
896
0
        if (ir_use_list_add(ctx, new_ref, use)) {
897
          /* restore after reallocation */
898
0
          use_list = &ctx->use_lists[ref];
899
0
          n = use_list->count;
900
0
          p = &ctx->use_edges[use_list->refs + j];
901
0
        }
902
        /* schedule folding */
903
0
        ir_bitqueue_add(worklist, use);
904
0
      }
905
0
    }
906
0
  }
907
0
  CLEAR_USES(ref);
908
0
}
909
910
static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
911
0
{
912
0
  ir_ref next;
913
0
  ir_insn *insn, *next_insn;
914
915
0
  insn = &ctx->ir_base[ref];
916
0
  if (ctx->use_lists[dst].count == 1) {
917
0
    next = ctx->use_edges[ctx->use_lists[dst].refs];
918
0
    next_insn = &ctx->ir_base[next];
919
    /* remove IF and IF_TRUE/FALSE from double linked control list */
920
0
    next_insn->op1 = insn->op1;
921
0
    ir_use_list_replace_one(ctx, insn->op1, ref, next);
922
    /* remove IF and IF_TRUE/FALSE instructions */
923
0
    ir_sccp_make_nop(ctx, ref);
924
0
    ir_sccp_make_nop(ctx, dst);
925
0
  } else {
926
0
    insn->op2 = IR_UNUSED;
927
0
    insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
928
0
    next_insn = &ctx->ir_base[dst];
929
0
    next_insn->op = IR_BEGIN;
930
0
  }
931
0
}
932
933
static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
934
0
{
935
0
  ir_ref old_merge_inputs, new_merge_inputs, i, *p;
936
0
  ir_use_list *use_list;
937
0
  ir_bitset life_inputs;
938
0
  ir_bitset_base_t holder = 0;
939
940
0
  IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
941
0
  old_merge_inputs = insn->inputs_count;
942
0
  new_merge_inputs = 0;
943
0
  life_inputs = (old_merge_inputs < IR_BITSET_BITS) ? &holder : ir_bitset_malloc(old_merge_inputs + 1);
944
945
0
  for (i = 1; i <= old_merge_inputs; i++) {
946
0
    ir_ref input = ir_insn_op(insn, i);
947
948
0
    if (input) {
949
0
      new_merge_inputs++;
950
0
      if (new_merge_inputs != i) {
951
0
        ir_insn_set_op(insn, new_merge_inputs, input);
952
0
      }
953
0
      ir_bitset_incl(life_inputs, i);
954
0
    }
955
0
  }
956
957
0
  if (new_merge_inputs == old_merge_inputs) {
958
    /* All inputs are feasible */
959
0
    if (life_inputs != &holder) {
960
0
      ir_mem_free(life_inputs);
961
0
    }
962
0
    return 0;
963
0
  }
964
965
0
  for (i = new_merge_inputs + 1; i <= old_merge_inputs; i++) {
966
0
    ir_insn_set_op(insn, i, IR_UNUSED);
967
0
  }
968
969
0
  if (new_merge_inputs <= 1) {
970
#if 0
971
    if (new_merge_inputs == 1
972
     && insn->op == IR_LOOP_BEGIN
973
     && insn->op1 > ref) { // TODO: check dominance instead of order
974
      /* dead loop */
975
      ir_use_list_remove_one(ctx, insn->op1, ref);
976
      insn->op1 = IR_UNUSED;
977
      new_merge_inputs = 0;
978
    }
979
#endif
980
0
    insn->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
981
0
    ir_bitqueue_add(worklist, ref);
982
0
  } else {
983
0
    insn->inputs_count = new_merge_inputs;
984
0
  }
985
986
  /* Update PHIs */
987
0
  use_list = &ctx->use_lists[ref];
988
0
  if (use_list->count > 1) {
989
0
    ir_ref use_count = 0;
990
0
    ir_ref *q;
991
992
0
    for (i = 0, p = q = &ctx->use_edges[use_list->refs]; i < use_list->count; p++, i++) {
993
0
      ir_ref use = *p;
994
0
      ir_insn *use_insn = &ctx->ir_base[use];
995
996
0
      if (use_insn->op == IR_PHI) {
997
0
        ir_ref j, k;
998
999
        /* compress PHI */
1000
0
        for (j = k = 1; j <= old_merge_inputs; j++) {
1001
0
          ir_ref input = ir_insn_op(use_insn, j + 1);
1002
1003
0
          if (ir_bitset_in(life_inputs, j)) {
1004
0
            IR_ASSERT(input);
1005
0
            if (k != j) {
1006
0
              ir_insn_set_op(use_insn, k + 1, input);
1007
0
            }
1008
0
            k++;
1009
0
          } else if (input > 0) {
1010
0
            ir_use_list_remove_one(ctx, input, use);
1011
0
          }
1012
0
        }
1013
0
        while (k <= old_merge_inputs) {
1014
0
          k++;
1015
0
          ir_insn_set_op(use_insn, k, IR_UNUSED);
1016
0
        }
1017
1018
0
        if (new_merge_inputs == 0) {
1019
          /* remove PHI */
1020
#if 0
1021
          use_insn->op1 = IR_UNUSED;
1022
          ir_iter_remove_insn(ctx, use, worklist);
1023
#else
1024
0
          IR_ASSERT(0);
1025
0
#endif
1026
0
          continue;
1027
0
        } else if (new_merge_inputs == 1) {
1028
          /* replace PHI by COPY */
1029
0
          use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1);
1030
0
          use_insn->op1 = use_insn->op2;
1031
0
          use_insn->op2 = IR_UNUSED;
1032
0
          ir_bitqueue_add(worklist, use);
1033
0
          continue;
1034
0
        } else {
1035
0
          use_insn->inputs_count = new_merge_inputs + 1;
1036
0
        }
1037
0
      }
1038
0
      if (p != q) {
1039
0
        *q = use;
1040
0
      }
1041
0
      q++;
1042
0
      use_count++;
1043
0
    }
1044
0
    for (i = use_count; i < use_list->count; q++, i++) {
1045
0
      *q = IR_UNUSED;
1046
0
    }
1047
0
    use_list->count = use_count;
1048
0
  }
1049
1050
0
  if (life_inputs != &holder) {
1051
0
    ir_mem_free(life_inputs);
1052
0
  }
1053
1054
0
  return 1;
1055
0
}
1056
1057
static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
1058
0
{
1059
0
  ir_ref i, j;
1060
0
  ir_insn *value;
1061
1062
0
  for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
1063
0
    if (value->op == IR_BOTTOM) {
1064
0
      continue;
1065
0
    } else if (IR_IS_CONST_OP(value->op)) {
1066
      /* replace instruction by constant */
1067
0
      j = ir_const(ctx, value->val, value->type);
1068
0
      ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1069
0
    } else if (IR_IS_SYM_CONST(value->op)) {
1070
      /* replace instruction by constant */
1071
0
      j = ir_const_ex(ctx, value->val, value->type, value->optx);
1072
0
      ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1073
0
#if IR_COMBO_COPY_PROPAGATION
1074
0
    } else if (value->op == IR_COPY) {
1075
0
      ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->op1), iter_worklist);
1076
0
#endif
1077
0
    } else if (value->op == IR_TOP) {
1078
      /* remove unreachable instruction */
1079
0
      ir_insn *insn = &ctx->ir_base[i];
1080
1081
0
      if (insn->op == IR_NOP) {
1082
        /* already removed */
1083
0
      } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
1084
0
        if (insn->op != IR_PARAM) {
1085
0
          ir_sccp_remove_insn(ctx, _values, i, iter_worklist);
1086
0
        }
1087
0
      } else {
1088
0
        if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) {
1089
          /* remove from terminators list */
1090
0
          ir_ref prev = ctx->ir_base[1].op1;
1091
0
          if (prev == i) {
1092
0
            ctx->ir_base[1].op1 = insn->op3;
1093
0
          } else {
1094
0
            while (prev) {
1095
0
              if (ctx->ir_base[prev].op3 == i) {
1096
0
                ctx->ir_base[prev].op3 = insn->op3;
1097
0
                break;
1098
0
              }
1099
0
              prev = ctx->ir_base[prev].op3;
1100
0
            }
1101
0
          }
1102
0
        }
1103
0
        ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist);
1104
0
      }
1105
0
    } else if (value->op == IR_IF) {
1106
      /* remove one way IF/SWITCH */
1107
0
      ir_sccp_remove_if(ctx, _values, i, value->op1);
1108
0
    } else if (value->op == IR_MERGE) {
1109
      /* schedule merge to remove unfeasible MERGE inputs */
1110
0
      ir_bitqueue_add(worklist, i);
1111
0
    }
1112
0
  }
1113
1114
0
  while ((i = ir_bitqueue_pop(worklist)) >= 0) {
1115
0
    IR_ASSERT(_values[i].op == IR_MERGE);
1116
0
    ir_sccp_remove_unfeasible_merge_inputs(ctx, i, &ctx->ir_base[i], iter_worklist);
1117
0
  }
1118
0
}
1119
1120
/***************************/
1121
/* Iterative Optimizations */
1122
/***************************/
1123
1124
/* Modification of some instruction may open new optimization oprtunities for other
1125
 * instructions that use this one.
1126
 *
1127
 * For example, let "a = ADD(x, y)" became "a = ADD(x, C1)". In case we also have
1128
 * "b = ADD(a, C2)" we may optimize it into "b = ADD(x, C1 + C2)" and then might
1129
 * also remove "a".
1130
 *
1131
 * This implementation supports only few optimization of combinations from ir_fold.h
1132
 *
1133
 * TODO: Think abput a more general solution ???
1134
 */
1135
static void ir_iter_add_related_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1136
0
{
1137
0
  ir_insn *insn = &ctx->ir_base[ref];
1138
1139
0
  if (insn->op == IR_ADD || insn->op == IR_SUB) {
1140
0
    ir_use_list *use_list = &ctx->use_lists[ref];
1141
1142
0
    if (use_list->count == 1) {
1143
0
      ir_ref use = ctx->use_edges[use_list->refs];
1144
0
      ir_insn *use_insn = &ctx->ir_base[ref];
1145
1146
0
      if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) {
1147
0
        ir_bitqueue_add(worklist, use);
1148
0
      }
1149
0
    }
1150
0
  }
1151
0
}
1152
1153
static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1154
0
{
1155
0
  ir_ref j, n, *p;
1156
0
  ir_insn *insn;
1157
1158
0
  CLEAR_USES(ref);
1159
0
  insn = &ctx->ir_base[ref];
1160
0
  n = insn->inputs_count;
1161
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
1162
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
1163
0
    ir_ref input = *p;
1164
0
    *p = IR_UNUSED;
1165
0
    if (input > 0) {
1166
0
      ir_use_list_remove_all(ctx, input, ref);
1167
0
      if (ir_is_dead(ctx, input)) {
1168
        /* schedule DCE */
1169
0
        ir_bitqueue_add(worklist, input);
1170
0
      } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1171
        /* try to optimize PHI into ABS/MIN/MAX/COND */
1172
0
        ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1173
0
      }
1174
0
    }
1175
0
  }
1176
0
}
1177
1178
void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1179
0
{
1180
0
  ir_ref i, j, n, *p, use;
1181
0
  ir_insn *insn;
1182
0
  ir_use_list *use_list;
1183
1184
0
  IR_ASSERT(ref != new_ref);
1185
1186
0
  use_list = &ctx->use_lists[ref];
1187
0
  n = use_list->count;
1188
0
  p = &ctx->use_edges[use_list->refs];
1189
0
  if (new_ref <= 0) {
1190
    /* constant or IR_UNUSED */
1191
0
    for (; n; p++, n--) {
1192
0
      use = *p;
1193
0
      IR_ASSERT(use != ref);
1194
0
      insn = &ctx->ir_base[use];
1195
0
      i = ir_insn_find_op(insn, ref);
1196
0
      IR_ASSERT(i > 0);
1197
0
      ir_insn_set_op(insn, i, new_ref);
1198
      /* schedule folding */
1199
0
      ir_bitqueue_add(worklist, use);
1200
0
      ir_iter_add_related_uses(ctx, use, worklist);
1201
0
    }
1202
0
  } else {
1203
0
    for (j = 0; j < n; j++, p++) {
1204
0
      use = *p;
1205
0
      IR_ASSERT(use != ref);
1206
0
      insn = &ctx->ir_base[use];
1207
0
      i = ir_insn_find_op(insn, ref);
1208
0
      IR_ASSERT(i > 0);
1209
0
      ir_insn_set_op(insn, i, new_ref);
1210
0
      if (ir_use_list_add(ctx, new_ref, use)) {
1211
        /* restore after reallocation */
1212
0
        use_list = &ctx->use_lists[ref];
1213
0
        n = use_list->count;
1214
0
        p = &ctx->use_edges[use_list->refs + j];
1215
0
      }
1216
      /* schedule folding */
1217
0
      ir_bitqueue_add(worklist, use);
1218
0
    }
1219
0
  }
1220
0
}
1221
1222
static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1223
0
{
1224
0
  ir_ref j, n, *p;
1225
0
  ir_insn *insn;
1226
1227
0
  insn = &ctx->ir_base[ref];
1228
0
  n = insn->inputs_count;
1229
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
1230
0
  for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
1231
0
    ir_ref input = *p;
1232
0
    *p = IR_UNUSED;
1233
0
    if (input > 0) {
1234
0
      ir_use_list_remove_all(ctx, input, ref);
1235
0
      if (ir_is_dead(ctx, input)) {
1236
        /* schedule DCE */
1237
0
        ir_bitqueue_add(worklist, input);
1238
0
      } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1239
        /* try to optimize PHI into ABS/MIN/MAX/COND */
1240
0
        ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1241
0
      }
1242
0
    }
1243
0
  }
1244
1245
0
  ir_iter_replace(ctx, ref, new_ref, worklist);
1246
1247
0
  CLEAR_USES(ref);
1248
0
}
1249
1250
void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist)
1251
0
{
1252
0
  ir_insn *insn = &ctx->ir_base[ref];
1253
0
  ir_ref old_val = ir_insn_op(insn, idx);
1254
1255
0
  IR_ASSERT(old_val != new_val);
1256
0
  if (!IR_IS_CONST_REF(new_val)) {
1257
0
    ir_use_list_add(ctx, new_val, ref);
1258
0
  }
1259
0
  ir_insn_set_op(insn, idx, new_val);
1260
0
  if (!IR_IS_CONST_REF(old_val)) {
1261
0
    ir_use_list_remove_one(ctx, old_val, ref);
1262
0
    if (ir_is_dead(ctx, old_val)) {
1263
      /* schedule DCE */
1264
0
      ir_bitqueue_add(worklist, old_val);
1265
0
    }
1266
0
  }
1267
0
}
1268
1269
static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
1270
0
{
1271
0
  IR_ASSERT(!IR_IS_CONST_REF(op1));
1272
1273
0
  ir_use_list *use_list = &ctx->use_lists[op1];
1274
0
  ir_ref *p, n = use_list->count;
1275
1276
0
  for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1277
0
    ir_ref use = *p;
1278
0
    ir_insn *use_insn = &ctx->ir_base[use];
1279
1280
0
    if (use_insn->optx == optx) {
1281
0
      IR_ASSERT(use_insn->op1 == op1);
1282
0
      return use;
1283
0
    }
1284
0
  }
1285
0
  return IR_UNUSED;
1286
0
}
1287
1288
static ir_ref ir_iter_find_cse(ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
1289
0
{
1290
0
  uint32_t n = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1291
0
  ir_use_list *use_list = NULL;
1292
0
  ir_ref *p, use;
1293
0
  ir_insn *use_insn;
1294
1295
0
  if (n == 2) {
1296
0
    if (!IR_IS_CONST_REF(op1)) {
1297
0
      use_list = &ctx->use_lists[op1];
1298
0
    }
1299
0
    if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1300
0
      use_list = &ctx->use_lists[op2];
1301
0
    }
1302
0
    if (use_list) {
1303
0
      n = use_list->count;
1304
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1305
0
        use = *p;
1306
0
        if (use != ref) {
1307
0
          use_insn = &ctx->ir_base[use];
1308
0
          if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2) {
1309
0
            IR_ASSERT(use_insn->op3 == op3);
1310
0
            if (use < ref) {
1311
0
              return use;
1312
0
            } else {
1313
0
              ir_bitqueue_add(worklist, use);
1314
0
            }
1315
0
          }
1316
0
        }
1317
0
      }
1318
0
    }
1319
0
   } else if (n < 2) {
1320
0
    if (op1 > 0) {
1321
0
      IR_ASSERT(n == 1);
1322
0
      use_list = &ctx->use_lists[op1];
1323
0
      n = use_list->count;
1324
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1325
0
        use = *p;
1326
0
        if (use != ref) {
1327
0
          use_insn = &ctx->ir_base[use];
1328
0
          if (use_insn->opt == opt) {
1329
0
            IR_ASSERT(use_insn->op1 == op1);
1330
0
            IR_ASSERT(use_insn->op2 == op2);
1331
0
            IR_ASSERT(use_insn->op3 == op3);
1332
0
            if (use < ref) {
1333
0
              return use;
1334
0
            } else {
1335
0
              ir_bitqueue_add(worklist, use);
1336
0
            }
1337
0
          }
1338
0
        }
1339
0
      }
1340
0
    }
1341
0
  } else {
1342
0
    IR_ASSERT(n == 3);
1343
0
    if (!IR_IS_CONST_REF(op1)) {
1344
0
      use_list = &ctx->use_lists[op1];
1345
0
    }
1346
0
    if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1347
0
      use_list = &ctx->use_lists[op2];
1348
0
    }
1349
0
    if (!IR_IS_CONST_REF(op3) && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
1350
0
      use_list = &ctx->use_lists[op3];
1351
0
    }
1352
0
    if (use_list) {
1353
0
      n = use_list->count;
1354
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1355
0
        use = *p;
1356
0
        if (use != ref) {
1357
0
          use_insn = &ctx->ir_base[use];
1358
0
          if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2 && use_insn->op3 == op3) {
1359
0
            if (use < ref) {
1360
0
              return use;
1361
0
            } else {
1362
0
              ir_bitqueue_add(worklist, use);
1363
0
            }
1364
0
          }
1365
0
        }
1366
0
      }
1367
0
    }
1368
0
  }
1369
0
  return IR_UNUSED;
1370
0
}
1371
1372
static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1373
0
{
1374
0
  uint32_t opt;
1375
0
  ir_ref op1, op2, op3, copy;
1376
0
  ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
1377
1378
0
  insn = &ctx->ir_base[ref];
1379
0
  opt = insn->opt;
1380
0
  op1 = insn->op1;
1381
0
  op2 = insn->op2;
1382
0
  op3 = insn->op3;
1383
1384
0
restart:
1385
0
  op1_insn = ctx->ir_base + op1;
1386
0
  op2_insn = ctx->ir_base + op2;
1387
0
  op3_insn = ctx->ir_base + op3;
1388
1389
0
  switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
1390
0
    case IR_FOLD_DO_RESTART:
1391
0
      opt = ctx->fold_insn.optx;
1392
0
      op1 = ctx->fold_insn.op1;
1393
0
      op2 = ctx->fold_insn.op2;
1394
0
      op3 = ctx->fold_insn.op3;
1395
0
      goto restart;
1396
0
    case IR_FOLD_DO_CSE:
1397
0
      copy = ir_iter_find_cse(ctx, ref, ctx->fold_insn.opt,
1398
0
        ctx->fold_insn.op1, ctx->fold_insn.op2, ctx->fold_insn.op3, worklist);
1399
0
      if (copy) {
1400
0
        ir_iter_replace_insn(ctx, ref, copy, worklist);
1401
0
        break;
1402
0
      }
1403
0
      IR_FALLTHROUGH;
1404
0
    case IR_FOLD_DO_EMIT:
1405
0
      insn = &ctx->ir_base[ref];
1406
0
      if (insn->opt != ctx->fold_insn.opt
1407
0
       || insn->op1 != ctx->fold_insn.op1
1408
0
       || insn->op2 != ctx->fold_insn.op2
1409
0
       || insn->op3 != ctx->fold_insn.op3) {
1410
1411
0
        ir_use_list *use_list;
1412
0
        ir_ref n, j, *p, use;
1413
1414
0
        insn->optx = ctx->fold_insn.opt;
1415
0
        IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
1416
0
        insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1417
0
        if (insn->op1 != ctx->fold_insn.op1) {
1418
0
          if (insn->op1 > 0) {
1419
0
            ir_use_list_remove_one(ctx, insn->op1, ref);
1420
0
          }
1421
0
          if (ctx->fold_insn.op1 > 0) {
1422
0
            ir_use_list_add(ctx, ctx->fold_insn.op1, ref);
1423
0
          }
1424
0
        }
1425
0
        if (insn->op2 != ctx->fold_insn.op2) {
1426
0
          if (insn->op2 > 0) {
1427
0
            ir_use_list_remove_one(ctx, insn->op2, ref);
1428
0
          }
1429
0
          if (ctx->fold_insn.op2 > 0) {
1430
0
            ir_use_list_add(ctx, ctx->fold_insn.op2, ref);
1431
0
          }
1432
0
        }
1433
0
        if (insn->op3 != ctx->fold_insn.op3) {
1434
0
          if (insn->op3 > 0) {
1435
0
            ir_use_list_remove_one(ctx, insn->op3, ref);
1436
0
          }
1437
0
          if (ctx->fold_insn.op3 > 0) {
1438
0
            ir_use_list_add(ctx, ctx->fold_insn.op3, ref);
1439
0
          }
1440
0
        }
1441
0
        insn->op1 = ctx->fold_insn.op1;
1442
0
        insn->op2 = ctx->fold_insn.op2;
1443
0
        insn->op3 = ctx->fold_insn.op3;
1444
1445
0
        use_list = &ctx->use_lists[ref];
1446
0
        n = use_list->count;
1447
0
        for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
1448
0
          use = *p;
1449
0
          ir_bitqueue_add(worklist, use);
1450
0
        }
1451
0
      }
1452
0
      break;
1453
0
    case IR_FOLD_DO_COPY:
1454
0
      op1 = ctx->fold_insn.op1;
1455
0
      ir_iter_replace_insn(ctx, ref, op1, worklist);
1456
0
      break;
1457
0
    case IR_FOLD_DO_CONST:
1458
0
      op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type);
1459
0
      ir_iter_replace_insn(ctx, ref, op1, worklist);
1460
0
      break;
1461
0
    default:
1462
0
      IR_ASSERT(0);
1463
0
      break;
1464
0
  }
1465
0
}
1466
1467
static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
1468
0
{
1469
0
  ir_insn *insn = &ctx->ir_base[ref];
1470
1471
0
  IR_ASSERT(insn->type == IR_DOUBLE);
1472
0
  if (IR_IS_CONST_REF(ref)) {
1473
0
    return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d;
1474
0
  } else {
1475
0
    switch (insn->op) {
1476
0
      case IR_FP2FP:
1477
0
        return 1;
1478
//      case IR_INT2FP:
1479
//        return ctx->use_lists[ref].count == 1;
1480
0
      case IR_NEG:
1481
0
      case IR_ABS:
1482
0
        return ctx->use_lists[ref].count == 1 &&
1483
0
          ir_may_promote_d2f(ctx, insn->op1);
1484
0
      case IR_ADD:
1485
0
      case IR_SUB:
1486
0
      case IR_MUL:
1487
0
      case IR_DIV:
1488
0
      case IR_MIN:
1489
0
      case IR_MAX:
1490
0
        return ctx->use_lists[ref].count == 1 &&
1491
0
          ir_may_promote_d2f(ctx, insn->op1) &&
1492
0
          ir_may_promote_d2f(ctx, insn->op2);
1493
0
      default:
1494
0
        break;
1495
0
    }
1496
0
  }
1497
0
  return 0;
1498
0
}
1499
1500
static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
1501
0
{
1502
0
  ir_insn *insn = &ctx->ir_base[ref];
1503
1504
0
  IR_ASSERT(insn->type == IR_FLOAT);
1505
0
  if (IR_IS_CONST_REF(ref)) {
1506
0
    return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f;
1507
0
  } else {
1508
0
    switch (insn->op) {
1509
0
      case IR_FP2FP:
1510
0
        return 1;
1511
0
      case IR_INT2FP:
1512
0
        return ctx->use_lists[ref].count == 1;
1513
0
      case IR_NEG:
1514
0
      case IR_ABS:
1515
0
        return ctx->use_lists[ref].count == 1 &&
1516
0
          ir_may_promote_f2d(ctx, insn->op1);
1517
0
      case IR_ADD:
1518
0
      case IR_SUB:
1519
0
      case IR_MUL:
1520
//      case IR_DIV:
1521
0
      case IR_MIN:
1522
0
      case IR_MAX:
1523
0
        return ctx->use_lists[ref].count == 1 &&
1524
0
          ir_may_promote_f2d(ctx, insn->op1) &&
1525
0
          ir_may_promote_f2d(ctx, insn->op2);
1526
0
      default:
1527
0
        break;
1528
0
    }
1529
0
  }
1530
0
  return 0;
1531
0
}
1532
1533
static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1534
0
{
1535
0
  ir_insn *insn = &ctx->ir_base[ref];
1536
0
  uint32_t count;
1537
1538
0
  IR_ASSERT(insn->type == IR_DOUBLE);
1539
0
  if (IR_IS_CONST_REF(ref)) {
1540
0
    return ir_const_float(ctx, (float)insn->val.d);
1541
0
  } else {
1542
0
    ir_bitqueue_add(worklist, ref);
1543
0
    switch (insn->op) {
1544
0
      case IR_FP2FP:
1545
0
        count = ctx->use_lists[ref].count;
1546
0
        ir_use_list_remove_all(ctx, ref, use);
1547
0
        if (ctx->use_lists[ref].count == 0) {
1548
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1549
0
          if (count > 1) {
1550
0
            do {
1551
0
              ir_use_list_add(ctx, insn->op1, use);
1552
0
            } while (--count > 1);
1553
0
          }
1554
0
          ref = insn->op1;
1555
0
          MAKE_NOP(insn);
1556
0
          return ref;
1557
0
        } else {
1558
0
          ir_use_list_add(ctx, insn->op1, use);
1559
0
          count -= ctx->use_lists[ref].count;
1560
0
          if (count > 1) {
1561
0
            do {
1562
0
              ir_use_list_add(ctx, insn->op1, use);
1563
0
            } while (--count > 1);
1564
0
          }
1565
0
        }
1566
0
        return insn->op1;
1567
//      case IR_INT2FP:
1568
//        insn->type = IR_FLOAT;
1569
//        return ref;
1570
0
      case IR_NEG:
1571
0
      case IR_ABS:
1572
0
        insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1573
0
        insn->type = IR_FLOAT;
1574
0
        return ref;
1575
0
      case IR_ADD:
1576
0
      case IR_SUB:
1577
0
      case IR_MUL:
1578
0
      case IR_DIV:
1579
0
      case IR_MIN:
1580
0
      case IR_MAX:
1581
0
        if (insn->op1 == insn->op2) {
1582
0
          insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1583
0
        } else {
1584
0
          insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1585
0
          insn->op2 = ir_promote_d2f(ctx, insn->op2, ref, worklist);
1586
0
        }
1587
0
        insn->type = IR_FLOAT;
1588
0
        return ref;
1589
0
      default:
1590
0
        break;
1591
0
    }
1592
0
  }
1593
0
  IR_ASSERT(0);
1594
0
  return ref;
1595
0
}
1596
1597
static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1598
0
{
1599
0
  ir_insn *insn = &ctx->ir_base[ref];
1600
0
  uint32_t count;
1601
0
  ir_ref old_ref;
1602
1603
0
  IR_ASSERT(insn->type == IR_FLOAT);
1604
0
  if (IR_IS_CONST_REF(ref)) {
1605
0
    return ir_const_double(ctx, (double)insn->val.f);
1606
0
  } else {
1607
0
    ir_bitqueue_add(worklist, ref);
1608
0
    switch (insn->op) {
1609
0
      case IR_FP2FP:
1610
0
        count = ctx->use_lists[ref].count;
1611
0
        ir_use_list_remove_all(ctx, ref, use);
1612
0
        if (ctx->use_lists[ref].count == 0) {
1613
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1614
0
          if (count > 1) {
1615
0
            do {
1616
0
              ir_use_list_add(ctx, insn->op1, use);
1617
0
            } while (--count > 1);
1618
0
          }
1619
0
          ref = insn->op1;
1620
0
          MAKE_NOP(insn);
1621
0
          return ref;
1622
0
        } else {
1623
0
          ir_use_list_add(ctx, insn->op1, use);
1624
0
          count -= ctx->use_lists[ref].count;
1625
0
          if (count > 1) {
1626
0
            do {
1627
0
              ir_use_list_add(ctx, insn->op1, use);
1628
0
            } while (--count > 1);
1629
0
          }
1630
0
        }
1631
0
        return insn->op1;
1632
0
      case IR_INT2FP:
1633
0
        old_ref = ir_iter_find_cse1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1);
1634
0
        if (old_ref) {
1635
0
          IR_ASSERT(ctx->use_lists[ref].count == 1);
1636
0
          ir_use_list_remove_one(ctx, insn->op1, ref);
1637
0
          CLEAR_USES(ref);
1638
0
          MAKE_NOP(insn);
1639
0
          ir_use_list_add(ctx, old_ref, use);
1640
0
          return old_ref;
1641
0
        }
1642
0
        insn->type = IR_DOUBLE;
1643
0
        return ref;
1644
0
      case IR_NEG:
1645
0
      case IR_ABS:
1646
0
        insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1647
0
        insn->type = IR_DOUBLE;
1648
0
        return ref;
1649
0
      case IR_ADD:
1650
0
      case IR_SUB:
1651
0
      case IR_MUL:
1652
//      case IR_DIV:
1653
0
      case IR_MIN:
1654
0
      case IR_MAX:
1655
0
        if (insn->op1 == insn->op2) {
1656
0
          insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1657
0
        } else {
1658
0
          insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1659
0
          insn->op2 = ir_promote_f2d(ctx, insn->op2, ref, worklist);
1660
0
        }
1661
0
        insn->type = IR_DOUBLE;
1662
0
        return ref;
1663
0
      default:
1664
0
        break;
1665
0
    }
1666
0
  }
1667
0
  IR_ASSERT(0);
1668
0
  return ref;
1669
0
}
1670
1671
static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref)
1672
0
{
1673
0
  ir_insn *insn = &ctx->ir_base[ref];
1674
0
  ir_ref *p, n, input;
1675
1676
0
  if (IR_IS_CONST_REF(ref)) {
1677
0
    return !IR_IS_SYM_CONST(insn->op);
1678
0
  } else {
1679
0
    switch (insn->op) {
1680
0
      case IR_ZEXT:
1681
0
      case IR_SEXT:
1682
0
      case IR_TRUNC:
1683
0
        return ctx->ir_base[insn->op1].type == type || ctx->use_lists[ref].count == 1;
1684
0
      case IR_NEG:
1685
0
      case IR_ABS:
1686
0
      case IR_NOT:
1687
0
        return ctx->use_lists[ref].count == 1 &&
1688
0
          ir_may_promote_trunc(ctx, type, insn->op1);
1689
0
      case IR_ADD:
1690
0
      case IR_SUB:
1691
0
      case IR_MUL:
1692
0
      case IR_MIN:
1693
0
      case IR_MAX:
1694
0
      case IR_OR:
1695
0
      case IR_AND:
1696
0
      case IR_XOR:
1697
0
      case IR_SHL:
1698
0
        return ctx->use_lists[ref].count == 1 &&
1699
0
          ir_may_promote_trunc(ctx, type, insn->op1) &&
1700
0
          ir_may_promote_trunc(ctx, type, insn->op2);
1701
//      case IR_SHR:
1702
//      case IR_SAR:
1703
//      case IR_DIV:
1704
//      case IR_MOD:
1705
//      case IR_FP2INT:
1706
//        TODO: ???
1707
0
      case IR_COND:
1708
0
        return ctx->use_lists[ref].count == 1 &&
1709
0
          ir_may_promote_trunc(ctx, type, insn->op2) &&
1710
0
          ir_may_promote_trunc(ctx, type, insn->op3);
1711
0
      case IR_PHI:
1712
0
        if (ctx->use_lists[ref].count != 1) {
1713
0
          ir_use_list *use_list = &ctx->use_lists[ref];
1714
0
          ir_ref count = 0;
1715
1716
0
          for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
1717
0
            if (*p != ref) {
1718
0
              if (count) {
1719
0
                return 0;
1720
0
              }
1721
0
              count = 1;
1722
0
            }
1723
0
          }
1724
0
        }
1725
0
        for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1726
0
          input = *p;
1727
0
          if (input != ref) {
1728
0
            if (!ir_may_promote_trunc(ctx, type, input)) {
1729
0
              return 0;
1730
0
            }
1731
0
          }
1732
0
        }
1733
0
        return 1;
1734
0
      default:
1735
0
        break;
1736
0
    }
1737
0
  }
1738
0
  return 0;
1739
0
}
1740
1741
static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1742
0
{
1743
0
  ir_insn *insn = &ctx->ir_base[ref];
1744
0
  uint32_t count;
1745
0
  ir_ref *p, n, input;
1746
1747
0
  if (IR_IS_CONST_REF(ref)) {
1748
0
    ir_val val;
1749
1750
0
    switch (type) {
1751
0
      case IR_I8:  val.i64 = insn->val.i8; break;
1752
0
      case IR_U8:  val.u64 = insn->val.u8; break;
1753
0
      case IR_I16: val.i64 = insn->val.i16; break;
1754
0
      case IR_U16: val.u64 = insn->val.u16; break;
1755
0
      case IR_I32: val.i64 = insn->val.i32; break;
1756
0
      case IR_U32: val.u64 = insn->val.u32; break;
1757
0
      case IR_CHAR:val.i64 = insn->val.i8; break;
1758
0
      case IR_BOOL:val.u64 = insn->val.u8 != 0; break;
1759
0
      default: IR_ASSERT(0); val.u64 = 0;
1760
0
    }
1761
0
    return ir_const(ctx, val, type);
1762
0
  } else {
1763
0
    ir_bitqueue_add(worklist, ref);
1764
0
    switch (insn->op) {
1765
0
      case IR_ZEXT:
1766
0
      case IR_SEXT:
1767
0
      case IR_TRUNC:
1768
0
        if (ctx->ir_base[insn->op1].type != type) {
1769
0
          ir_type src_type = ctx->ir_base[insn->op1].type;
1770
0
          if (ir_type_size[src_type] == ir_type_size[type]) {
1771
0
            insn->op = IR_BITCAST;
1772
0
          } else if (ir_type_size[src_type] > ir_type_size[type]) {
1773
0
            insn->op = IR_TRUNC;
1774
0
          } else {
1775
0
            if (insn->op != IR_SEXT && insn->op != IR_ZEXT) {
1776
0
              insn->op = IR_IS_TYPE_SIGNED(type) ? IR_SEXT : IR_ZEXT;
1777
0
            }
1778
0
          }
1779
0
          insn->type = type;
1780
0
          return ref;
1781
0
        }
1782
1783
0
        count = ctx->use_lists[ref].count;
1784
0
        ir_use_list_remove_all(ctx, ref, use);
1785
0
        if (ctx->use_lists[ref].count == 0) {
1786
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1787
0
          if (count > 1) {
1788
0
            do {
1789
0
              ir_use_list_add(ctx, insn->op1, use);
1790
0
            } while (--count > 1);
1791
0
          }
1792
0
          ref = insn->op1;
1793
0
          MAKE_NOP(insn);
1794
0
          return ref;
1795
0
        } else {
1796
0
          ir_use_list_add(ctx, insn->op1, use);
1797
0
          count -= ctx->use_lists[ref].count;
1798
0
          if (count > 1) {
1799
0
            do {
1800
0
              ir_use_list_add(ctx, insn->op1, use);
1801
0
            } while (--count > 1);
1802
0
          }
1803
0
        }
1804
0
        return insn->op1;
1805
0
      case IR_NEG:
1806
0
      case IR_ABS:
1807
0
      case IR_NOT:
1808
0
        insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1809
0
        insn->type = type;
1810
0
        return ref;
1811
0
      case IR_ADD:
1812
0
      case IR_SUB:
1813
0
      case IR_MUL:
1814
0
      case IR_MIN:
1815
0
      case IR_MAX:
1816
0
      case IR_OR:
1817
0
      case IR_AND:
1818
0
      case IR_XOR:
1819
0
      case IR_SHL:
1820
0
        if (insn->op1 == insn->op2) {
1821
0
          insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1822
0
        } else {
1823
0
          insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1824
0
          insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1825
0
        }
1826
0
        insn->type = type;
1827
0
        return ref;
1828
//      case IR_DIV:
1829
//      case IR_MOD:
1830
//      case IR_SHR:
1831
//      case IR_SAR:
1832
//      case IR_FP2INT:
1833
//        TODO: ???
1834
0
      case IR_COND:
1835
0
        if (insn->op2 == insn->op3) {
1836
0
          insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1837
0
        } else {
1838
0
          insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1839
0
          insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist);
1840
0
        }
1841
0
        insn->type = type;
1842
0
        return ref;
1843
0
      case IR_PHI:
1844
0
        for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1845
0
          input = *p;
1846
0
          if (input != ref) {
1847
0
            *p = ir_promote_i2i(ctx, type, input, ref, worklist);
1848
0
          }
1849
0
        }
1850
0
        insn->type = type;
1851
0
        return ref;
1852
0
      default:
1853
0
        break;
1854
0
    }
1855
0
  }
1856
0
  IR_ASSERT(0);
1857
0
  return ref;
1858
0
}
1859
1860
static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type)
1861
0
{
1862
0
  ir_val new_val;
1863
1864
0
  switch (val_insn->type) {
1865
0
    default:
1866
0
      IR_ASSERT(0);
1867
0
    case IR_I8:
1868
0
    case IR_U8:
1869
0
    case IR_BOOL:
1870
0
    case IR_CHAR:
1871
0
      if (op == IR_SEXT) {
1872
0
        new_val.i64 = (int64_t)val_insn->val.i8;
1873
0
      } else {
1874
0
        new_val.u64 = (uint64_t)val_insn->val.u8;
1875
0
      }
1876
0
      break;
1877
0
    case IR_I16:
1878
0
    case IR_U16:
1879
0
      if (op == IR_SEXT) {
1880
0
        new_val.i64 = (int64_t)val_insn->val.i16;
1881
0
      } else {
1882
0
        new_val.u64 = (uint64_t)val_insn->val.u16;
1883
0
      }
1884
0
      break;
1885
0
    case IR_I32:
1886
0
    case IR_U32:
1887
0
      if (op == IR_SEXT) {
1888
0
        new_val.i64 = (int64_t)val_insn->val.i32;
1889
0
      } else {
1890
0
        new_val.u64 = (uint64_t)val_insn->val.u32;
1891
0
      }
1892
0
      break;
1893
0
  }
1894
0
  return ir_const(ctx, new_val, type);
1895
0
}
1896
1897
static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist)
1898
0
{
1899
0
  uint32_t optx = IR_OPTX(op, type, 1);
1900
0
  ir_ref ref;
1901
1902
0
  if (!IR_IS_CONST_REF(src_ref)) {
1903
0
    ref = ir_iter_find_cse1(ctx, optx, src_ref);
1904
0
    if (ref) {
1905
0
      ir_use_list_add(ctx, ref, var_ref);
1906
0
      if (!IR_IS_CONST_REF(src_ref)) {
1907
0
        ir_use_list_remove_one(ctx, src_ref, var_ref);
1908
0
      }
1909
0
      ir_bitqueue_add(worklist, ref);
1910
0
      return ref;
1911
0
    }
1912
0
  }
1913
1914
0
  ref = ir_emit1(ctx, optx, src_ref);
1915
0
  ir_use_list_add(ctx, ref, var_ref);
1916
0
  if (!IR_IS_CONST_REF(src_ref)) {
1917
0
    ir_use_list_replace_one(ctx, src_ref, var_ref, ref);
1918
0
  }
1919
0
  ir_bitqueue_grow(worklist, ref + 1);
1920
0
  ir_bitqueue_add(worklist, ref);
1921
0
  return ref;
1922
0
}
1923
1924
static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop)
1925
0
{
1926
0
  ir_insn *insn;
1927
0
  ir_ref n, *p, input, result, ctrl;
1928
1929
0
  if (IR_IS_CONST_REF(val)) {
1930
0
    return 1; /* IR_START */
1931
0
  }
1932
1933
0
  insn = &ctx->ir_base[val];
1934
0
  if (ir_op_flags[insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM)) {
1935
0
    return val;
1936
0
  }
1937
1938
0
  IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_DATA);
1939
0
  if (IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL_DEP) {
1940
0
    return insn->op1;
1941
0
  }
1942
1943
0
  n = insn->inputs_count;
1944
0
  p = insn->ops + 1;
1945
1946
0
  result = 1;
1947
0
  for (; n > 0; p++, n--) {
1948
0
    input = *p;
1949
0
    ctrl = _ir_estimated_control(ctx, input, loop);
1950
0
    if (ctrl >= loop) return ctrl;
1951
0
    if (ctrl > result) { // TODO: check dominance depth instead of order
1952
0
      result = ctrl;
1953
0
    }
1954
0
  }
1955
0
  return result;
1956
0
}
1957
1958
static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop)
1959
0
{
1960
0
  ref = _ir_estimated_control(ctx, ref, loop);
1961
0
  return ref < loop; // TODO: check dominance instead of order
1962
0
}
1963
1964
static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
1965
0
{
1966
0
  if (IR_IS_CONST_REF(ref)) {
1967
0
    return 1;
1968
0
  } else {
1969
0
    ir_insn *insn = &ctx->ir_base[ref];
1970
1971
0
    if (insn->op == IR_LOAD) {
1972
0
      if (ir_is_loop_invariant(ctx, ref, loop)) {
1973
0
        return 1;
1974
0
      } else {
1975
        /* ZEXT(LOAD(_, _)) costs the same as LOAD(_, _) */
1976
0
        if (ctx->use_lists[ref].count == 2) {
1977
0
          return 1;
1978
0
        } else if (ctx->use_lists[ref].count == 3) {
1979
0
          ir_use_list *use_list = &ctx->use_lists[ref];
1980
0
          ir_ref *p, n, use;
1981
1982
0
          for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
1983
0
            use = *p;
1984
0
            if (use != ext_ref) {
1985
0
              ir_insn *use_insn = &ctx->ir_base[use];
1986
1987
0
              if (use_insn->op != op
1988
0
               && (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
1989
0
                || use_insn->op1 != ref)) {
1990
0
                return 0;
1991
0
              }
1992
0
            }
1993
0
          }
1994
0
          return 1;
1995
0
        }
1996
0
      }
1997
0
      return 0;
1998
0
    } else {
1999
0
      return ir_is_loop_invariant(ctx, ref, loop);
2000
0
    }
2001
0
  }
2002
0
}
2003
2004
static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
2005
0
{
2006
0
  ir_op op = ctx->ir_base[ext_ref].op;
2007
0
  ir_type type = ctx->ir_base[ext_ref].type;
2008
0
  ir_insn *phi_insn;
2009
0
  ir_use_list *use_list;
2010
0
  ir_ref n, *p, use, ext_ref_2 = IR_UNUSED;
2011
2012
  /* Check if we may change the type of the induction variable */
2013
0
  use_list = &ctx->use_lists[phi_ref];
2014
0
  n = use_list->count;
2015
0
  if (n > 1) {
2016
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2017
0
      use = *p;
2018
0
      if (use == op_ref || use == ext_ref) {
2019
0
        continue;
2020
0
      } else {
2021
0
        ir_insn *use_insn = &ctx->ir_base[use];
2022
2023
0
        if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2024
0
          if (use_insn->op1 == phi_ref) {
2025
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
2026
0
              return 0;
2027
0
            }
2028
0
            if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2029
0
              continue;
2030
0
              }
2031
0
          } else if (use_insn->op2 == phi_ref) {
2032
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
2033
0
              return 0;
2034
0
            }
2035
0
            if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2036
0
              continue;
2037
0
              }
2038
0
          }
2039
0
          return 0;
2040
0
        } else if (use_insn->op == IR_IF) {
2041
0
          continue;
2042
0
        } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2043
0
          ext_ref_2 = use;
2044
0
          continue;
2045
0
        } else {
2046
0
          return 0;
2047
0
        }
2048
0
      }
2049
0
    }
2050
0
  }
2051
2052
0
  use_list = &ctx->use_lists[op_ref];
2053
0
  n = use_list->count;
2054
0
  if (n > 1) {
2055
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2056
0
      use = *p;
2057
0
      if (use == phi_ref || use == ext_ref) {
2058
0
        continue;
2059
0
      } else {
2060
0
        ir_insn *use_insn = &ctx->ir_base[use];
2061
2062
0
        if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2063
0
          if (use_insn->op1 == phi_ref) {
2064
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
2065
0
              return 0;
2066
0
            }
2067
0
            if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2068
0
              continue;
2069
0
              }
2070
0
          } else if (use_insn->op2 == phi_ref) {
2071
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
2072
0
              return 0;
2073
0
            }
2074
0
            if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2075
0
              continue;
2076
0
              }
2077
0
          }
2078
0
          return 0;
2079
0
        } else if (use_insn->op == IR_IF) {
2080
0
          continue;
2081
0
        } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2082
0
          ext_ref_2 = use;
2083
0
          continue;
2084
0
        } else {
2085
0
          return 0;
2086
0
        }
2087
0
      }
2088
0
    }
2089
0
  }
2090
2091
0
  for (n = 0; n < ctx->use_lists[phi_ref].count; n++) {
2092
    /* "use_lists" may be reallocated by ir_ext_ref() */
2093
0
    use = ctx->use_edges[ctx->use_lists[phi_ref].refs + n];
2094
0
    if (use == ext_ref) {
2095
0
      continue;
2096
0
    } else {
2097
0
      ir_insn *use_insn = &ctx->ir_base[use];
2098
2099
0
      if (use_insn->op == IR_IF) {
2100
0
        continue;
2101
0
      } else if (use_insn->op == op) {
2102
0
        IR_ASSERT(ext_ref_2 == use);
2103
0
        continue;
2104
0
      }
2105
0
      IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
2106
0
        || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL)
2107
0
       && (use_insn->op1 == phi_ref || use_insn->op2 == phi_ref));
2108
0
      if (use_insn->op1 != phi_ref) {
2109
0
        if (IR_IS_CONST_REF(use_insn->op1)
2110
0
         && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2111
0
          ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2112
0
        } else {
2113
0
          ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2114
0
        }
2115
0
        ir_bitqueue_add(worklist, use);
2116
0
      }
2117
0
      if (use_insn->op2 != phi_ref) {
2118
0
        if (IR_IS_CONST_REF(use_insn->op2)
2119
0
         && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2120
0
          ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2121
0
        } else {
2122
0
          ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2123
0
        }
2124
0
        ir_bitqueue_add(worklist, use);
2125
0
      }
2126
0
    }
2127
0
  }
2128
2129
0
  if (ctx->use_lists[op_ref].count > 1) {
2130
0
    for (n = 0; n < ctx->use_lists[op_ref].count; n++) {
2131
      /* "use_lists" may be reallocated by ir_ext_ref() */
2132
0
      use = ctx->use_edges[ctx->use_lists[op_ref].refs + n];
2133
0
      if (use == ext_ref || use == phi_ref) {
2134
0
        continue;
2135
0
      } else {
2136
0
        ir_insn *use_insn = &ctx->ir_base[use];
2137
2138
0
        if (use_insn->op == IR_IF) {
2139
0
          continue;
2140
0
        } else if (use_insn->op == op) {
2141
0
          IR_ASSERT(ext_ref_2 == use);
2142
0
          continue;
2143
0
        }
2144
0
        IR_ASSERT(use_insn->op >= IR_EQ && use_insn->op <= IR_UGT);
2145
0
        if (use_insn->op1 != op_ref) {
2146
0
          if (IR_IS_CONST_REF(use_insn->op1)
2147
0
           && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2148
0
            ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2149
0
          } else {
2150
0
            ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2151
0
          }
2152
0
          ir_bitqueue_add(worklist, use);
2153
0
        }
2154
0
        if (use_insn->op2 != op_ref) {
2155
0
          if (IR_IS_CONST_REF(use_insn->op2)
2156
0
           && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2157
0
            ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2158
0
          } else {
2159
0
            ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2160
0
          }
2161
0
          ir_bitqueue_add(worklist, use);
2162
0
        }
2163
0
      }
2164
0
    }
2165
0
  }
2166
2167
0
  ir_iter_replace_insn(ctx, ext_ref, ctx->ir_base[ext_ref].op1, worklist);
2168
2169
0
  if (ext_ref_2) {
2170
0
    ir_iter_replace_insn(ctx, ext_ref_2, ctx->ir_base[ext_ref_2].op1, worklist);
2171
0
  }
2172
2173
0
  ctx->ir_base[op_ref].type = type;
2174
2175
0
  phi_insn = &ctx->ir_base[phi_ref];
2176
0
  phi_insn->type = type;
2177
0
  if (IR_IS_CONST_REF(phi_insn->op2)
2178
0
   && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
2179
0
    ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
2180
0
  } else {
2181
0
    ctx->ir_base[phi_ref].op2 = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist);
2182
0
  }
2183
2184
0
  return 1;
2185
0
}
2186
2187
static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
2188
0
 {
2189
0
  ir_ref ref = insn->op1;
2190
2191
  /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
2192
0
  insn = &ctx->ir_base[ref];
2193
0
  if (insn->op == IR_PHI
2194
0
   && insn->inputs_count == 3 /* (2 values) */
2195
0
   && ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
2196
0
    ir_ref op_ref = insn->op3;
2197
0
    ir_insn *op_insn = &ctx->ir_base[op_ref];
2198
2199
0
    if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
2200
0
      if (op_insn->op1 == ref) {
2201
0
        if (ir_is_loop_invariant(ctx, op_insn->op2, insn->op1)) {
2202
0
          return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2203
0
        }
2204
0
      } else if (op_insn->op2 == ref) {
2205
0
        if (ir_is_loop_invariant(ctx, op_insn->op1, insn->op1)) {
2206
0
          return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2207
0
        }
2208
0
      }
2209
0
    }
2210
0
  } else if (insn->op == IR_ADD || insn->op == IR_SUB || insn->op == IR_MUL) {
2211
0
    if (!IR_IS_CONST_REF(insn->op1)
2212
0
     && ctx->ir_base[insn->op1].op == IR_PHI
2213
0
     && ctx->ir_base[insn->op1].inputs_count == 3 /* (2 values) */
2214
0
     && ctx->ir_base[insn->op1].op3 == ref
2215
0
     && ctx->ir_base[ctx->ir_base[insn->op1].op1].op == IR_LOOP_BEGIN
2216
0
     && ir_is_loop_invariant(ctx, insn->op2, ctx->ir_base[insn->op1].op1)) {
2217
0
      return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op1, ref, worklist);
2218
0
    } else if (!IR_IS_CONST_REF(insn->op2)
2219
0
     && ctx->ir_base[insn->op2].op == IR_PHI
2220
0
     && ctx->ir_base[insn->op2].inputs_count == 3 /* (2 values) */
2221
0
     && ctx->ir_base[insn->op2].op3 == ref
2222
0
     && ctx->ir_base[ctx->ir_base[insn->op2].op1].op == IR_LOOP_BEGIN
2223
0
     && ir_is_loop_invariant(ctx, insn->op1, ctx->ir_base[insn->op2].op1)) {
2224
0
      return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op2, ref, worklist);
2225
0
    }
2226
0
  }
2227
2228
0
  return 0;
2229
0
}
2230
2231
static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref)
2232
0
{
2233
0
  ir_use_list *use_list = &ctx->use_lists[if_ref];
2234
0
  ir_ref *p = &ctx->use_edges[use_list->refs];
2235
2236
0
  IR_ASSERT(use_list->count == 2);
2237
0
  if (ctx->ir_base[*p].op == IR_IF_TRUE) {
2238
0
    IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE);
2239
0
    *if_true_ref = *p;
2240
0
    *if_false_ref = *(p + 1);
2241
0
  } else {
2242
0
    IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE);
2243
0
    IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE);
2244
0
    *if_false_ref = *p;
2245
0
    *if_true_ref = *(p + 1);
2246
0
  }
2247
0
}
2248
2249
static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist)
2250
0
{
2251
0
  ir_ref prev, next;
2252
0
  ir_use_list *use_list;
2253
2254
0
  if (ctx->use_lists[begin].count > 1) {
2255
0
    ir_ref *p, n, i, use;
2256
0
    ir_insn *use_insn;
2257
0
    ir_ref region = end;
2258
0
    ir_ref next = IR_UNUSED;
2259
2260
0
    while (!IR_IS_BB_START(ctx->ir_base[region].op)) {
2261
0
      region = ctx->ir_base[region].op1;
2262
0
    }
2263
2264
0
    use_list = &ctx->use_lists[begin];
2265
0
    n = use_list->count;
2266
0
    for (p = &ctx->use_edges[use_list->refs], i = 0; i < n; p++, i++) {
2267
0
      use = *p;
2268
0
      use_insn = &ctx->ir_base[use];
2269
0
      if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) {
2270
0
        IR_ASSERT(!next);
2271
0
        next = use;
2272
0
      } else {
2273
0
        IR_ASSERT(use_insn->op == IR_VAR);
2274
0
        IR_ASSERT(use_insn->op1 == begin);
2275
0
        use_insn->op1 = region;
2276
0
        if (ir_use_list_add(ctx, region, use)) {
2277
          /* restore after reallocation */
2278
0
          use_list = &ctx->use_lists[begin];
2279
0
          n = use_list->count;
2280
0
          p = &ctx->use_edges[use_list->refs + i];
2281
0
        }
2282
0
      }
2283
0
    }
2284
2285
0
    IR_ASSERT(next);
2286
0
    ctx->use_edges[use_list->refs] = next;
2287
0
    use_list->count = 1;
2288
0
  }
2289
2290
0
  IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
2291
0
  IR_ASSERT(ctx->ir_base[end].op == IR_END);
2292
0
  IR_ASSERT(ctx->ir_base[begin].op1 == end);
2293
0
  IR_ASSERT(ctx->use_lists[end].count == 1);
2294
2295
0
  prev = ctx->ir_base[end].op1;
2296
2297
0
  use_list = &ctx->use_lists[begin];
2298
0
  IR_ASSERT(use_list->count == 1);
2299
0
  next = ctx->use_edges[use_list->refs];
2300
2301
  /* remove BEGIN and END */
2302
0
  MAKE_NOP(&ctx->ir_base[begin]); CLEAR_USES(begin);
2303
0
  MAKE_NOP(&ctx->ir_base[end]);   CLEAR_USES(end);
2304
2305
  /* connect their predecessor and successor */
2306
0
  ctx->ir_base[next].op1 = prev;
2307
0
  ir_use_list_replace_one(ctx, prev, end, next);
2308
2309
0
  if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
2310
0
    ir_bitqueue_add(worklist, prev);
2311
0
  }
2312
0
}
2313
2314
static void ir_remove_unused_vars(ir_ctx *ctx, ir_ref start, ir_ref end)
2315
0
{
2316
0
  ir_use_list *use_list = &ctx->use_lists[start];
2317
0
  ir_ref *p, use, n = use_list->count;
2318
2319
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2320
0
    use = *p;
2321
0
    if (use != end) {
2322
0
      ir_insn *use_insn = &ctx->ir_base[use];
2323
0
      IR_ASSERT(use_insn->op == IR_VAR);
2324
0
      IR_ASSERT(ctx->use_lists[use].count == 0);
2325
0
      MAKE_NOP(use_insn);
2326
0
    }
2327
0
  }
2328
0
}
2329
2330
static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2331
0
{
2332
0
  if (insn->inputs_count == 2) {
2333
0
    ir_ref end1_ref = insn->op1, end2_ref = insn->op2;
2334
0
    ir_insn *end1 = &ctx->ir_base[end1_ref];
2335
0
    ir_insn *end2 = &ctx->ir_base[end2_ref];
2336
2337
0
    if (end1->op != IR_END || end2->op != IR_END) {
2338
0
      return 0;
2339
0
    }
2340
2341
0
    ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2342
0
    ir_insn *start1 = &ctx->ir_base[start1_ref];
2343
0
    ir_insn *start2 = &ctx->ir_base[start2_ref];
2344
2345
0
    if (start1->op1 != start2->op1) {
2346
0
      return 0;
2347
0
    }
2348
2349
0
    ir_ref root_ref = start1->op1;
2350
0
    ir_insn *root = &ctx->ir_base[root_ref];
2351
2352
0
    if (root->op != IR_IF
2353
0
     && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) {
2354
0
      return 0;
2355
0
    }
2356
2357
    /* Empty Diamond
2358
     *
2359
     *    prev                     prev
2360
     *    |  condition             |  condition
2361
     *    | /                      |
2362
     *    IF                       |
2363
     *    | \                      |
2364
     *    |  +-----+               |
2365
     *    |        IF_FALSE        |
2366
     *    IF_TRUE  |           =>  |
2367
     *    |        END             |
2368
     *    END     /                |
2369
     *    |  +---+                 |
2370
     *    | /                      |
2371
     *    MERGE                    |
2372
     *    |                        |
2373
     *    next                     next
2374
     */
2375
2376
0
    ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2377
0
    ir_insn *next = &ctx->ir_base[next_ref];
2378
2379
0
    if (ctx->use_lists[start1_ref].count != 1) {
2380
0
      ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2381
0
    }
2382
0
    if (ctx->use_lists[start2_ref].count != 1) {
2383
0
      ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2384
0
    }
2385
2386
0
    next->op1 = root->op1;
2387
0
    ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2388
0
    if (!IR_IS_CONST_REF(root->op2)) {
2389
0
      ir_use_list_remove_all(ctx, root->op2, root_ref);
2390
0
      if (ir_is_dead(ctx, root->op2)) {
2391
0
        ir_bitqueue_add(worklist, root->op2);
2392
0
      }
2393
0
    }
2394
2395
0
    MAKE_NOP(root);   CLEAR_USES(root_ref);
2396
0
    MAKE_NOP(start1); CLEAR_USES(start1_ref);
2397
0
    MAKE_NOP(start2); CLEAR_USES(start2_ref);
2398
0
    MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2399
0
    MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2400
0
    MAKE_NOP(insn);   CLEAR_USES(ref);
2401
2402
0
    if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2403
0
      ir_bitqueue_add(worklist, next->op1);
2404
0
    }
2405
2406
0
    return 1;
2407
0
  } else {
2408
0
    ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1;
2409
0
    ir_ref root_ref = IR_UNUSED;
2410
2411
0
    for (i = 0; i < count; i++) {
2412
0
      ir_ref end_ref, start_ref;
2413
0
      ir_insn *end, *start;
2414
2415
0
      end_ref = ops[i];
2416
0
      end = &ctx->ir_base[end_ref];
2417
0
      if (end->op != IR_END) {
2418
0
        return 0;
2419
0
      }
2420
0
      start_ref = end->op1;
2421
0
      start = &ctx->ir_base[start_ref];
2422
0
      if (start->op != IR_CASE_VAL && start->op != IR_CASE_RANGE && start->op != IR_CASE_DEFAULT) {
2423
0
        return 0;
2424
0
      }
2425
0
      if (ctx->use_lists[start_ref].count != 1) {
2426
0
        ir_remove_unused_vars(ctx, start_ref, end_ref);
2427
0
      }
2428
0
      if (!root_ref) {
2429
0
        root_ref = start->op1;
2430
0
        if (ctx->use_lists[root_ref].count != count) {
2431
0
          return 0;
2432
0
        }
2433
0
      } else if (start->op1 != root_ref) {
2434
0
        return 0;
2435
0
      }
2436
0
    }
2437
2438
    /* Empty N-Diamond */
2439
0
    ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2440
0
    ir_insn *next = &ctx->ir_base[next_ref];
2441
0
    ir_insn *root = &ctx->ir_base[root_ref];
2442
2443
0
    next->op1 = root->op1;
2444
0
    ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2445
2446
0
    if (!IR_IS_CONST_REF(root->op2)) {
2447
0
      ir_use_list_remove_all(ctx, root->op2, root_ref);
2448
0
      if (ir_is_dead(ctx, root->op2)) {
2449
0
        ir_bitqueue_add(worklist, root->op2);
2450
0
      }
2451
0
    }
2452
2453
0
    MAKE_NOP(root);   CLEAR_USES(root_ref);
2454
2455
0
    for (i = 0; i < count; i++) {
2456
0
      ir_ref end_ref = ops[i];
2457
0
      ir_insn *end = &ctx->ir_base[end_ref];
2458
0
      ir_ref start_ref = end->op1;
2459
0
      ir_insn *start = &ctx->ir_base[start_ref];
2460
2461
0
      MAKE_NOP(start); CLEAR_USES(start_ref);
2462
0
      MAKE_NOP(end);   CLEAR_USES(end_ref);
2463
0
    }
2464
2465
0
    MAKE_NOP(insn);   CLEAR_USES(ref);
2466
2467
0
    if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2468
0
      ir_bitqueue_add(worklist, next->op1);
2469
0
    }
2470
2471
0
    return 1;
2472
0
  }
2473
0
}
2474
2475
static bool ir_is_zero(ir_ctx *ctx, ir_ref ref)
2476
0
{
2477
0
  return IR_IS_CONST_REF(ref)
2478
0
    && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2479
0
    && ctx->ir_base[ref].val.u32 == 0;
2480
0
}
2481
2482
static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2483
0
{
2484
0
  IR_ASSERT(insn->inputs_count == 3);
2485
0
  IR_ASSERT(ctx->use_lists[merge_ref].count == 2);
2486
2487
0
  ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2488
0
  ir_insn *end1 = &ctx->ir_base[end1_ref];
2489
0
  ir_insn *end2 = &ctx->ir_base[end2_ref];
2490
2491
0
  if (end1->op == IR_END && end2->op == IR_END) {
2492
0
    ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2493
0
    ir_insn *start1 = &ctx->ir_base[start1_ref];
2494
0
    ir_insn *start2 = &ctx->ir_base[start2_ref];
2495
2496
0
    if (start1->op1 == start2->op1) {
2497
0
      ir_ref root_ref = start1->op1;
2498
0
      ir_insn *root = &ctx->ir_base[root_ref];
2499
2500
0
      if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2) && ctx->use_lists[root->op2].count == 1) {
2501
0
        ir_ref cond_ref = root->op2;
2502
0
        ir_insn *cond = &ctx->ir_base[cond_ref];
2503
0
        ir_type type = insn->type;
2504
0
        bool is_cmp, is_less;
2505
2506
0
        if (IR_IS_TYPE_FP(type)) {
2507
0
          is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE ||
2508
0
            cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2509
0
          is_less = (cond->op == IR_LT || cond->op == IR_LE ||
2510
0
            cond->op == IR_ULT || cond->op == IR_ULE);
2511
0
        } else if (IR_IS_TYPE_SIGNED(type)) {
2512
0
          is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE);
2513
0
          is_less = (cond->op == IR_LT || cond->op == IR_LE);
2514
0
        } else {
2515
0
          IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
2516
0
          is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2517
0
          is_less = (cond->op == IR_ULT || cond->op == IR_ULE);
2518
0
        }
2519
2520
0
        if (is_cmp
2521
0
         && ((insn->op2 == cond->op1 && insn->op3 == cond->op2)
2522
0
           || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) {
2523
          /* MAX/MIN
2524
           *
2525
           *    prev                     prev
2526
           *    |  LT(A, B)              |
2527
           *    | /                      |
2528
           *    IF                       |
2529
           *    | \                      |
2530
           *    |  +-----+               |
2531
           *    |        IF_FALSE        |
2532
           *    IF_TRUE  |           =>  |
2533
           *    |        END             |
2534
           *    END     /                |
2535
           *    |  +---+                 |
2536
           *    | /                      |
2537
           *    MERGE                    |
2538
           *    |    \                   |
2539
           *    |     PHI(A, B)          |    MIN(A, B)
2540
           *    next                     next
2541
           */
2542
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2543
0
          ir_insn *next;
2544
2545
0
          if (next_ref == ref) {
2546
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2547
0
          }
2548
0
          next = &ctx->ir_base[next_ref];
2549
2550
0
          if (ctx->use_lists[start1_ref].count != 1) {
2551
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2552
0
          }
2553
0
          if (ctx->use_lists[start2_ref].count != 1) {
2554
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2555
0
          }
2556
2557
0
          insn->op = (
2558
0
            (is_less ? cond->op1 : cond->op2)
2559
0
            ==
2560
0
            ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3)
2561
0
            ) ? IR_MIN : IR_MAX;
2562
0
          insn->inputs_count = 2;
2563
0
          if (insn->op2 > insn->op3) {
2564
0
            insn->op1 = insn->op2;
2565
0
            insn->op2 = insn->op3;
2566
0
          } else {
2567
0
            insn->op1 = insn->op3;
2568
0
          }
2569
0
          insn->op3 = IR_UNUSED;
2570
2571
0
          next->op1 = root->op1;
2572
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2573
0
          if (!IR_IS_CONST_REF(insn->op1)) {
2574
0
            ir_use_list_remove_all(ctx, insn->op1, cond_ref);
2575
0
          }
2576
0
          if (!IR_IS_CONST_REF(insn->op2)) {
2577
0
            ir_use_list_remove_all(ctx, insn->op2, cond_ref);
2578
0
          }
2579
2580
0
          MAKE_NOP(cond);   CLEAR_USES(cond_ref);
2581
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2582
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2583
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2584
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2585
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2586
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2587
2588
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2589
0
            ir_bitqueue_add(worklist, next->op1);
2590
0
          }
2591
2592
0
          return 1;
2593
0
        } else if (is_cmp
2594
0
            && ((ctx->ir_base[insn->op2].op == IR_NEG
2595
0
              && ctx->use_lists[insn->op2].count == 1
2596
0
              && ctx->ir_base[insn->op2].op1 == insn->op3
2597
0
              && ((cond->op1 == insn->op3
2598
0
                && ir_is_zero(ctx, cond->op2)
2599
0
                && is_less == (start1->op == IR_IF_TRUE))
2600
0
               || (cond->op2 == insn->op3
2601
0
                && ir_is_zero(ctx, cond->op1)
2602
0
                && is_less != (start1->op == IR_IF_TRUE))))
2603
0
             || (ctx->ir_base[insn->op3].op == IR_NEG
2604
0
              && ctx->use_lists[insn->op3].count == 1
2605
0
              && ctx->ir_base[insn->op3].op1 == insn->op2
2606
0
              && ((cond->op1 == insn->op2
2607
0
                && ir_is_zero(ctx, cond->op2)
2608
0
                && is_less != (start1->op == IR_IF_TRUE))
2609
0
               || (cond->op2 == insn->op2
2610
0
                && ir_is_zero(ctx, cond->op1)
2611
0
                && is_less == (start1->op == IR_IF_TRUE)))))) {
2612
          /* ABS
2613
           *
2614
           *    prev                     prev
2615
           *    |  LT(A, 0)              |
2616
           *    | /                      |
2617
           *    IF                       |
2618
           *    | \                      |
2619
           *    |  +-----+               |
2620
           *    |        IF_FALSE        |
2621
           *    IF_TRUE  |           =>  |
2622
           *    |        END             |
2623
           *    END     /                |
2624
           *    |  +---+                 |
2625
           *    | /                      |
2626
           *    MERGE                    |
2627
           *    |    \                   |
2628
           *    |     PHI(A, NEG(A))     |    ABS(A)
2629
           *    next                     next
2630
           */
2631
0
          ir_ref neg_ref;
2632
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2633
0
          ir_insn *next;
2634
2635
0
          if (next_ref == ref) {
2636
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2637
0
          }
2638
0
          next = &ctx->ir_base[next_ref];
2639
2640
0
          if (ctx->use_lists[start1_ref].count != 1) {
2641
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2642
0
          }
2643
0
          if (ctx->use_lists[start2_ref].count != 1) {
2644
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2645
0
          }
2646
2647
0
          insn->op = IR_ABS;
2648
0
          insn->inputs_count = 1;
2649
0
          if (ctx->ir_base[insn->op2].op == IR_NEG) {
2650
0
            neg_ref = insn->op2;
2651
0
            insn->op1 = insn->op3;
2652
0
          } else {
2653
0
            neg_ref = insn->op3;
2654
0
            insn->op1 = insn->op2;
2655
0
          }
2656
0
          insn->op2 = IR_UNUSED;
2657
0
          insn->op3 = IR_UNUSED;
2658
2659
0
          next->op1 = root->op1;
2660
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2661
0
          ir_use_list_remove_one(ctx, insn->op1, neg_ref);
2662
0
          if (!IR_IS_CONST_REF(insn->op1)) {
2663
0
            ir_use_list_remove_all(ctx, insn->op1, cond_ref);
2664
0
          }
2665
2666
0
          MAKE_NOP(cond);   CLEAR_USES(cond_ref);
2667
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2668
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2669
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2670
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2671
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2672
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2673
0
          MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref);
2674
2675
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2676
0
            ir_bitqueue_add(worklist, next->op1);
2677
0
          }
2678
2679
0
          return 1;
2680
#if 0
2681
        } else {
2682
          /* COND
2683
           *
2684
           *    prev                     prev
2685
           *    |  cond                  |
2686
           *    | /                      |
2687
           *    IF                       |
2688
           *    | \                      |
2689
           *    |  +-----+               |
2690
           *    |        IF_FALSE        |
2691
           *    IF_TRUE  |           =>  |
2692
           *    |        END             |
2693
           *    END     /                |
2694
           *    |  +---+                 |
2695
           *    | /                      |
2696
           *    MERGE                    |
2697
           *    |    \                   |
2698
           *    |     PHI(A, B)          |    COND(cond, A, B)
2699
           *    next                     next
2700
           */
2701
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2702
          ir_insn *next;
2703
2704
          if (next_ref == ref) {
2705
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2706
          }
2707
          next = &ctx->ir_base[next_ref];
2708
2709
          if (ctx->use_lists[start1_ref].count != 1) {
2710
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2711
          }
2712
          if (ctx->use_lists[start2_ref].count != 1) {
2713
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2714
          }
2715
2716
          insn->op = IR_COND;
2717
          insn->inputs_count = 3;
2718
          insn->op1 = cond_ref;
2719
          if (start1->op == IR_IF_FALSE) {
2720
            SWAP_REFS(insn->op2, insn->op3);
2721
          }
2722
2723
          next->op1 = root->op1;
2724
          ir_use_list_replace_one(ctx, cond_ref, root_ref, ref);
2725
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2726
          ir_use_list_remove_all(ctx, root->op2, root_ref);
2727
2728
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2729
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2730
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2731
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2732
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2733
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2734
2735
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2736
            ir_bitqueue_add(worklist, next->op1);
2737
          }
2738
2739
          return 1;
2740
#endif
2741
0
        }
2742
0
      }
2743
0
    }
2744
0
  }
2745
2746
0
  return 0;
2747
0
}
2748
2749
static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2)
2750
0
{
2751
0
  IR_ASSERT(op1->type == op2->type);
2752
0
  if (IR_IS_TYPE_INT(op1->type)) {
2753
0
    if (op == IR_EQ) {
2754
0
      return op1->val.u64 == op2->val.u64;
2755
0
    } else if (op == IR_NE) {
2756
0
      return op1->val.u64 != op2->val.u64;
2757
0
    } else if (op == IR_LT) {
2758
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2759
0
        return op1->val.i64 < op2->val.i64;
2760
0
      } else {
2761
0
        return op1->val.u64 < op2->val.u64;
2762
0
      }
2763
0
    } else if (op == IR_GE) {
2764
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2765
0
        return op1->val.i64 >= op2->val.i64;
2766
0
      } else {
2767
0
        return op1->val.u64 >= op2->val.u64;
2768
0
      }
2769
0
    } else if (op == IR_LE) {
2770
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2771
0
        return op1->val.i64 <= op2->val.i64;
2772
0
      } else {
2773
0
        return op1->val.u64 <= op2->val.u64;
2774
0
      }
2775
0
    } else if (op == IR_GT) {
2776
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2777
0
        return op1->val.i64 > op2->val.i64;
2778
0
      } else {
2779
0
        return op1->val.u64 > op2->val.u64;
2780
0
      }
2781
0
    } else if (op == IR_ULT) {
2782
0
      return op1->val.u64 < op2->val.u64;
2783
0
    } else if (op == IR_UGE) {
2784
0
      return op1->val.u64 >= op2->val.u64;
2785
0
    } else if (op == IR_ULE) {
2786
0
      return op1->val.u64 <= op2->val.u64;
2787
0
    } else if (op == IR_UGT) {
2788
0
      return op1->val.u64 > op2->val.u64;
2789
0
    } else {
2790
0
      IR_ASSERT(0);
2791
0
      return 0;
2792
0
    }
2793
0
  } else if (op1->type == IR_DOUBLE) {
2794
0
    if (op == IR_EQ) {
2795
0
      return op1->val.d == op2->val.d;
2796
0
    } else if (op == IR_NE) {
2797
0
      return op1->val.d != op2->val.d;
2798
0
    } else if (op == IR_LT) {
2799
0
      return op1->val.d < op2->val.d;
2800
0
    } else if (op == IR_GE) {
2801
0
      return op1->val.d >= op2->val.d;
2802
0
    } else if (op == IR_LE) {
2803
0
      return op1->val.d <= op2->val.d;
2804
0
    } else if (op == IR_GT) {
2805
0
      return op1->val.d > op2->val.d;
2806
0
    } else if (op == IR_ULT) {
2807
0
      return !(op1->val.d >= op2->val.d);
2808
0
    } else if (op == IR_UGE) {
2809
0
      return !(op1->val.d < op2->val.d);
2810
0
    } else if (op == IR_ULE) {
2811
0
      return !(op1->val.d > op2->val.d);
2812
0
    } else if (op == IR_UGT) {
2813
0
      return !(op1->val.d <= op2->val.d);
2814
0
    } else if (op == IR_ORDERED) {
2815
0
      return !isnan(op1->val.d) && !isnan(op2->val.d);
2816
0
    } else if (op == IR_UNORDERED) {
2817
0
      return isnan(op1->val.d) || isnan(op2->val.d);
2818
0
    } else {
2819
0
      IR_ASSERT(0);
2820
0
      return 0;
2821
0
    }
2822
0
  } else {
2823
0
    IR_ASSERT(op1->type == IR_FLOAT);
2824
0
    if (op == IR_EQ) {
2825
0
      return op1->val.f == op2->val.f;
2826
0
    } else if (op == IR_NE) {
2827
0
      return op1->val.f != op2->val.f;
2828
0
    } else if (op == IR_LT) {
2829
0
      return op1->val.f < op2->val.f;
2830
0
    } else if (op == IR_GE) {
2831
0
      return op1->val.f >= op2->val.f;
2832
0
    } else if (op == IR_LE) {
2833
0
      return op1->val.f <= op2->val.f;
2834
0
    } else if (op == IR_GT) {
2835
0
      return op1->val.f > op2->val.f;
2836
0
    } else if (op == IR_ULT) {
2837
0
      return !(op1->val.f >= op2->val.f);
2838
0
    } else if (op == IR_UGE) {
2839
0
      return !(op1->val.f < op2->val.f);
2840
0
    } else if (op == IR_ULE) {
2841
0
      return !(op1->val.f > op2->val.f);
2842
0
    } else if (op == IR_UGT) {
2843
0
      return !(op1->val.f <= op2->val.f);
2844
0
    } else if (op == IR_ORDERED) {
2845
0
      return !isnan(op1->val.f) && !isnan(op2->val.f);
2846
0
    } else if (op == IR_UNORDERED) {
2847
0
      return isnan(op1->val.f) || isnan(op2->val.f);
2848
0
    } else {
2849
0
      IR_ASSERT(0);
2850
0
      return 0;
2851
0
    }
2852
0
  }
2853
0
}
2854
2855
static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2856
0
{
2857
0
  ir_ref cond_ref = insn->op2;
2858
0
  ir_insn *cond = &ctx->ir_base[cond_ref];
2859
2860
0
  if (cond->op == IR_PHI
2861
0
   && cond->inputs_count == 3
2862
0
   && cond->op1 == insn->op1
2863
0
   && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op))
2864
0
    || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) {
2865
0
    ir_ref merge_ref = insn->op1;
2866
0
    ir_insn *merge = &ctx->ir_base[merge_ref];
2867
2868
0
    if (ctx->use_lists[merge_ref].count == 2) {
2869
0
      ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2870
0
      ir_insn *end1 = &ctx->ir_base[end1_ref];
2871
0
      ir_insn *end2 = &ctx->ir_base[end2_ref];
2872
2873
0
      if (end1->op == IR_END && end2->op == IR_END) {
2874
0
        ir_ref if_true_ref, if_false_ref;
2875
0
        ir_insn *if_true, *if_false;
2876
0
        ir_op op = IR_IF_FALSE;
2877
2878
0
        ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
2879
2880
0
        if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) {
2881
0
          IR_ASSERT(IR_IS_CONST_REF(cond->op3));
2882
0
          SWAP_REFS(cond->op2, cond->op3);
2883
0
          SWAP_REFS(merge->op1, merge->op2);
2884
0
          SWAP_REFS(end1_ref, end2_ref);
2885
0
          SWAP_INSNS(end1, end2);
2886
0
        }
2887
0
        if (ir_const_is_true(&ctx->ir_base[cond->op2])) {
2888
0
          SWAP_REFS(if_true_ref, if_false_ref);
2889
0
          op = IR_IF_TRUE;
2890
0
        }
2891
0
        if_true = &ctx->ir_base[if_true_ref];
2892
0
        if_false = &ctx->ir_base[if_false_ref];
2893
2894
0
        if (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)) {
2895
0
          if (ir_const_is_true(&ctx->ir_base[cond->op3]) ^ (op == IR_IF_TRUE)) {
2896
            /* Simple IF Split
2897
             *
2898
             *    |        |               |        |
2899
             *    |        END             |        END
2900
             *    END     /                END       \
2901
             *    |  +---+                 |          +
2902
             *    | /                      |          |
2903
             *    MERGE                    |          |
2904
             *    | \                      |          |
2905
             *    |  PHI(false, true)      |          |
2906
             *    | /                      |          |
2907
             *    IF                   =>  |          |
2908
             *    | \                      |          |
2909
             *    |  +------+              |          |
2910
             *    |         IF_TRUE        |          BEGIN
2911
             *    IF_FALSE  |              BEGIN
2912
             *    |                        |
2913
             */
2914
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2915
0
            ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
2916
2917
0
            MAKE_NOP(merge); CLEAR_USES(merge_ref);
2918
0
            MAKE_NOP(cond);  CLEAR_USES(cond_ref);
2919
0
            MAKE_NOP(insn);  CLEAR_USES(ref);
2920
2921
0
            if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2922
0
            if_false->op1 = end1_ref;
2923
2924
0
            if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2925
0
            if_true->op1 = end2_ref;
2926
2927
0
            ir_bitqueue_add(worklist, if_false_ref);
2928
0
            ir_bitqueue_add(worklist, if_true_ref);
2929
2930
0
            return 1;
2931
0
          } else {
2932
            /* Simple IF Split
2933
             *
2934
             *    |        |               |        |
2935
             *    |        END             |        END
2936
             *    END     /                END      |
2937
             *    |  +---+                 |        |
2938
             *    | /                      |        |
2939
             *    MERGE                    |        +
2940
             *    | \                      |       /
2941
             *    |  PHI(false, false)     |      /
2942
             *    | /                      |     /
2943
             *    IF                   =>  |    /
2944
             *    | \                      |   /
2945
             *    |  +------+              |  /
2946
             *    |         IF_TRUE        | /        BEGIN(unreachable)
2947
             *    IF_FALSE  |              MERGE
2948
             *    |                        |
2949
             */
2950
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2951
0
            ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
2952
2953
0
            MAKE_NOP(merge); CLEAR_USES(merge_ref);
2954
0
            MAKE_NOP(cond);  CLEAR_USES(cond_ref);
2955
0
            MAKE_NOP(insn);  CLEAR_USES(ref);
2956
2957
0
            if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
2958
0
            if_false->op1 = end1_ref;
2959
0
            if_false->op2 = end2_ref;
2960
2961
0
            if_true->optx = IR_BEGIN;
2962
0
            if_true->op1 = IR_UNUSED;
2963
2964
0
            ctx->flags2 &= ~IR_CFG_REACHABLE;
2965
2966
0
            ir_bitqueue_add(worklist, if_false_ref);
2967
2968
0
            return 1;
2969
0
          }
2970
0
        }
2971
2972
        /* Simple IF Split
2973
         *
2974
         *    |        |               |        |
2975
         *    |        END             |        IF(X)
2976
         *    END     /                END     / \
2977
         *    |  +---+                 |   +--+   +
2978
         *    | /                      |  /       |
2979
         *    MERGE                    | IF_FALSE |
2980
         *    | \                      | |        |
2981
         *    |  PHI(false, X)         | |        |
2982
         *    | /                      | |        |
2983
         *    IF                   =>  | END      |
2984
         *    | \                      | |        |
2985
         *    |  +------+              | |        |
2986
         *    |         IF_TRUE        | |        IF_TRUE
2987
         *    IF_FALSE  |              MERGE
2988
         *    |                        |
2989
         */
2990
0
        ir_use_list_remove_all(ctx, merge_ref, cond_ref);
2991
0
        ir_use_list_remove_all(ctx, ref, if_true_ref);
2992
0
        if (!IR_IS_CONST_REF(cond->op3)) {
2993
0
          ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref);
2994
0
        }
2995
0
        ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2996
0
        ir_use_list_add(ctx, end2_ref, if_true_ref);
2997
2998
0
        end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
2999
0
        end2->op2 = cond->op3;
3000
0
        ir_bitqueue_add(worklist, end2_ref);
3001
3002
0
        merge->optx = IR_OPTX(op, IR_VOID, 1);
3003
0
        merge->op1 = end2_ref;
3004
0
        merge->op2 = IR_UNUSED;
3005
3006
0
        MAKE_NOP(cond);
3007
0
        CLEAR_USES(cond_ref);
3008
3009
0
        insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3010
0
        insn->op1 = merge_ref;
3011
0
        insn->op2 = IR_UNUSED;
3012
3013
0
        if_true->op1 = end2_ref;
3014
3015
0
        if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3016
0
        if_false->op1 = end1_ref;
3017
0
        if_false->op2 = ref;
3018
3019
0
        ir_bitqueue_add(worklist, if_false_ref);
3020
0
        if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3021
0
          ir_bitqueue_add(worklist, end2->op1);
3022
0
        }
3023
3024
0
        return 1;
3025
0
      }
3026
0
    }
3027
0
  }
3028
3029
0
  return 0;
3030
0
}
3031
3032
static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3033
0
{
3034
0
  ir_ref cond_ref = insn->op2;
3035
0
  ir_insn *cond = &ctx->ir_base[cond_ref];
3036
3037
0
  if (cond->op >= IR_EQ && cond->op <= IR_UGT
3038
0
   && IR_IS_CONST_REF(cond->op2)
3039
0
   && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
3040
0
   && ctx->use_lists[insn->op2].count == 1) {
3041
0
    ir_ref phi_ref = cond->op1;
3042
0
    ir_insn *phi = &ctx->ir_base[phi_ref];
3043
3044
0
    if (phi->op == IR_PHI
3045
0
     && phi->inputs_count == 3
3046
0
     && phi->op1 == insn->op1
3047
0
     && ctx->use_lists[phi_ref].count == 1
3048
0
     && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op))
3049
0
      || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) {
3050
0
      ir_ref merge_ref = insn->op1;
3051
0
      ir_insn *merge = &ctx->ir_base[merge_ref];
3052
3053
0
      if (ctx->use_lists[merge_ref].count == 2) {
3054
0
        ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
3055
0
        ir_insn *end1 = &ctx->ir_base[end1_ref];
3056
0
        ir_insn *end2 = &ctx->ir_base[end2_ref];
3057
3058
0
        if (end1->op == IR_END && end2->op == IR_END) {
3059
0
          ir_ref if_true_ref, if_false_ref;
3060
0
          ir_insn *if_true, *if_false;
3061
0
          ir_op op = IR_IF_FALSE;
3062
3063
0
          ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3064
3065
0
          if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) {
3066
0
            IR_ASSERT(IR_IS_CONST_REF(phi->op3));
3067
0
            SWAP_REFS(phi->op2, phi->op3);
3068
0
            SWAP_REFS(merge->op1, merge->op2);
3069
0
            SWAP_REFS(end1_ref, end2_ref);
3070
0
            SWAP_INSNS(end1, end2);
3071
0
          }
3072
0
          if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) {
3073
0
            SWAP_REFS(if_true_ref, if_false_ref);
3074
0
            op = IR_IF_TRUE;
3075
0
          }
3076
0
          if_true = &ctx->ir_base[if_true_ref];
3077
0
          if_false = &ctx->ir_base[if_false_ref];
3078
3079
0
          if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) {
3080
0
            if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) {
3081
              /* IF Split
3082
               *
3083
               *    |        |               |        |
3084
               *    |        END             |        END
3085
               *    END     /                END      |
3086
               *    |  +---+                 |        |
3087
               *    | /                      |        |
3088
               *    MERGE                    |        |
3089
               *    | \                      |        |
3090
               *    |  PHI(C1, X)            |        |
3091
               *    |  |                     |        |
3092
               *    |  CMP(_, C2)            |        |
3093
               *    | /                      |        |
3094
               *    IF                   =>  |        |
3095
               *    | \                      |        |
3096
               *    |  +------+              |        |
3097
               *    |         IF_TRUE        |        BEGIN
3098
               *    IF_FALSE  |              BEGIN    |
3099
               *    |                        |
3100
               */
3101
3102
0
              ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3103
0
              ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
3104
3105
0
              MAKE_NOP(merge); CLEAR_USES(merge_ref);
3106
0
              MAKE_NOP(phi);   CLEAR_USES(phi_ref);
3107
0
              MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3108
0
              MAKE_NOP(insn);  CLEAR_USES(ref);
3109
3110
0
              if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3111
0
              if_false->op1 = end1_ref;
3112
3113
0
              if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3114
0
              if_true->op1 = end2_ref;
3115
3116
0
              ir_bitqueue_add(worklist, if_false_ref);
3117
0
              ir_bitqueue_add(worklist, if_true_ref);
3118
3119
0
              return 1;
3120
0
            } else {
3121
              /* IF Split
3122
               *
3123
               *    |        |               |        |
3124
               *    |        END             |        END
3125
               *    END     /                END      |
3126
               *    |  +---+                 |        |
3127
               *    | /                      |        |
3128
               *    MERGE                    |        |
3129
               *    | \                      |        |
3130
               *    |  PHI(C1, X)            |        |
3131
               *    |  |                     |        +
3132
               *    |  CMP(_, C2)            |       /
3133
               *    | /                      |      /
3134
               *    IF                   =>  |     /
3135
               *    | \                      |    /
3136
               *    |  +------+              |   /
3137
               *    |         IF_TRUE        |  /      BEGIN(unreachable)
3138
               *    IF_FALSE  |              MERGE     |
3139
               *    |                        |
3140
               */
3141
3142
0
              ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3143
0
              ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
3144
3145
0
              MAKE_NOP(merge); CLEAR_USES(merge_ref);
3146
0
              MAKE_NOP(phi);   CLEAR_USES(phi_ref);
3147
0
              MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3148
0
              MAKE_NOP(insn);  CLEAR_USES(ref);
3149
3150
0
              if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3151
0
              if_false->op1 = end1_ref;
3152
0
              if_false->op2 = end2_ref;
3153
3154
0
              if_true->optx = IR_BEGIN;
3155
0
              if_true->op1 = IR_UNUSED;
3156
3157
0
              ctx->flags2 &= ~IR_CFG_REACHABLE;
3158
3159
0
              ir_bitqueue_add(worklist, if_false_ref);
3160
3161
0
              return 1;
3162
0
            }
3163
0
          } else {
3164
            /* IF Split
3165
             *
3166
             *    |        |               |        |
3167
             *    |        END             |        IF<----+
3168
             *    END     /                END     / \     |
3169
             *    |  +---+                 |   +--+   +    |
3170
             *    | /                      |  /       |    |
3171
             *    MERGE                    | IF_FALSE |    |
3172
             *    | \                      | |        |    |
3173
             *    |  PHI(C1, X)            | |        |    |
3174
             *    |  |                     | |        |    |
3175
             *    |  CMP(_, C2)            | |        |    CMP(X, C2)
3176
             *    | /                      | |        |
3177
             *    IF                   =>  | END      |
3178
             *    | \                      | |        |
3179
             *    |  +------+              | |        |
3180
             *    |         IF_TRUE        | |        IF_TRUE
3181
             *    IF_FALSE  |              MERGE
3182
             *    |                        |
3183
             */
3184
3185
0
            ir_use_list_remove_all(ctx, merge_ref, phi_ref);
3186
0
            ir_use_list_remove_all(ctx, ref, if_true_ref);
3187
0
            if (!IR_IS_CONST_REF(phi->op3)) {
3188
0
              ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2);
3189
0
            }
3190
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3191
0
            ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref);
3192
0
            ir_use_list_add(ctx, end2_ref, if_true_ref);
3193
3194
0
            end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
3195
0
            end2->op2 = insn->op2;
3196
0
            ir_bitqueue_add(worklist, end2_ref);
3197
3198
0
            merge->optx = IR_OPTX(op, IR_VOID, 1);
3199
0
            merge->op1 = end2_ref;
3200
0
            merge->op2 = IR_UNUSED;
3201
3202
0
            cond->op1 = phi->op3;
3203
0
            MAKE_NOP(phi);
3204
0
            CLEAR_USES(phi_ref);
3205
3206
0
            insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3207
0
            insn->op1 = merge_ref;
3208
0
            insn->op2 = IR_UNUSED;
3209
3210
0
            if_true->op1 = end2_ref;
3211
3212
0
            if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3213
0
            if_false->op1 = end1_ref;
3214
0
            if_false->op2 = ref;
3215
3216
0
            ir_bitqueue_add(worklist, if_false_ref);
3217
0
            if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3218
0
              ir_bitqueue_add(worklist, end2->op1);
3219
0
            }
3220
3221
0
            return 1;
3222
0
          }
3223
0
        }
3224
0
      }
3225
0
    }
3226
0
  }
3227
3228
0
  return 0;
3229
0
}
3230
3231
static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist)
3232
0
{
3233
0
  ir_use_list *use_list = &ctx->use_lists[merge_ref];
3234
3235
0
  if (use_list->count == 1) {
3236
0
    ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist);
3237
0
  } else if (use_list->count == 2) {
3238
0
    if (merge->inputs_count == 2) {
3239
0
      ir_ref phi_ref = ctx->use_edges[use_list->refs];
3240
0
      ir_insn *phi = &ctx->ir_base[phi_ref];
3241
3242
0
      ir_ref next_ref = ctx->use_edges[use_list->refs + 1];
3243
0
      ir_insn *next = &ctx->ir_base[next_ref];
3244
3245
0
      if (next->op == IR_PHI) {
3246
0
        SWAP_REFS(phi_ref, next_ref);
3247
0
        SWAP_INSNS(phi, next);
3248
0
      }
3249
3250
0
      if (phi->op == IR_PHI && next->op != IR_PHI) {
3251
0
        if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) {
3252
0
          if (next->op2 == phi_ref) {
3253
0
            if (ir_try_split_if(ctx, next_ref, next, worklist)) {
3254
0
              return;
3255
0
            }
3256
0
          } else {
3257
0
            ir_insn *cmp = &ctx->ir_base[next->op2];
3258
3259
0
            if (cmp->op >= IR_EQ && cmp->op <= IR_UGT
3260
0
             && cmp->op1 == phi_ref
3261
0
             && IR_IS_CONST_REF(cmp->op2)
3262
0
             && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op)
3263
0
             && ctx->use_lists[next->op2].count == 1) {
3264
0
              if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) {
3265
0
                return;
3266
0
              }
3267
0
            }
3268
0
          }
3269
0
        }
3270
0
        ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist);
3271
0
      }
3272
0
    }
3273
0
  }
3274
0
}
3275
3276
static ir_ref ir_find_ext_use(ir_ctx *ctx, ir_ref ref)
3277
0
{
3278
0
  ir_use_list *use_list = &ctx->use_lists[ref];
3279
0
  ir_ref *p, n, use;
3280
0
  ir_insn *use_insn;
3281
3282
0
  for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
3283
0
    use = *p;
3284
0
    use_insn = &ctx->ir_base[use];
3285
0
    if (use_insn->op == IR_SEXT || use_insn->op == IR_ZEXT) {
3286
0
      return use;
3287
0
    }
3288
0
  }
3289
0
  return IR_UNUSED;
3290
0
}
3291
3292
static void ir_iter_optimize_induction_var(ir_ctx *ctx, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
3293
0
{
3294
0
  ir_ref ext_ref;
3295
3296
0
  ext_ref = ir_find_ext_use(ctx, phi_ref);
3297
0
  if (!ext_ref) {
3298
0
    ext_ref = ir_find_ext_use(ctx, op_ref);
3299
0
  }
3300
0
  if (ext_ref) {
3301
0
    ir_try_promote_induction_var_ext(ctx, ext_ref, phi_ref, op_ref, worklist);
3302
0
  }
3303
0
}
3304
3305
static void ir_iter_optimize_loop(ir_ctx *ctx, ir_ref loop_ref, ir_insn *loop, ir_bitqueue *worklist)
3306
0
{
3307
0
  ir_ref n;
3308
3309
0
  if (loop->inputs_count != 2 || ctx->use_lists[loop_ref].count <= 1) {
3310
0
    return;
3311
0
  }
3312
3313
  /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
3314
0
  for (n = 0; n < ctx->use_lists[loop_ref].count; n++) {
3315
    /* "use_lists" may be reallocated by ir_ext_ref() */
3316
0
    ir_ref use = ctx->use_edges[ctx->use_lists[loop_ref].refs + n];
3317
0
    ir_insn *use_insn = &ctx->ir_base[use];
3318
3319
0
    if (use_insn->op == IR_PHI) {
3320
0
      ir_ref op_ref = use_insn->op3;
3321
0
      ir_insn *op_insn = &ctx->ir_base[op_ref];
3322
3323
0
      if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
3324
0
        if (op_insn->op1 == use) {
3325
0
          if (ir_is_loop_invariant(ctx, op_insn->op2, loop_ref)) {
3326
0
            ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3327
0
          }
3328
0
        } else if (op_insn->op2 == use) {
3329
0
          if (ir_is_loop_invariant(ctx, op_insn->op1, loop_ref)) {
3330
0
            ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3331
0
          }
3332
0
        }
3333
0
        }
3334
0
    }
3335
0
  }
3336
0
}
3337
3338
static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref condition, bool *swap)
3339
0
{
3340
0
  ir_insn *condition_insn = &ctx->ir_base[condition];
3341
3342
0
  while ((condition_insn->op == IR_BITCAST
3343
0
    || condition_insn->op == IR_ZEXT
3344
0
    || condition_insn->op == IR_SEXT)
3345
0
   && ctx->use_lists[condition].count == 1) {
3346
0
    condition = condition_insn->op1;
3347
0
    condition_insn = &ctx->ir_base[condition];
3348
0
  }
3349
3350
0
  if (condition_insn->opt == IR_OPT(IR_NOT, IR_BOOL)) {
3351
0
    *swap = 1;
3352
0
    condition = condition_insn->op1;
3353
0
    condition_insn = &ctx->ir_base[condition];
3354
0
  }
3355
3356
0
  if (condition_insn->op == IR_NE && IR_IS_CONST_REF(condition_insn->op2)) {
3357
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3358
3359
0
    if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3360
0
      condition = condition_insn->op1;
3361
0
      condition_insn = &ctx->ir_base[condition];
3362
0
    }
3363
0
  } else if (condition_insn->op == IR_EQ && IR_IS_CONST_REF(condition_insn->op2)) {
3364
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3365
3366
0
    if (condition_insn->op2 == IR_TRUE) {
3367
0
      condition = condition_insn->op1;
3368
0
      condition_insn = &ctx->ir_base[condition];
3369
0
    } else if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3370
0
      condition = condition_insn->op1;
3371
0
      condition_insn = &ctx->ir_base[condition];
3372
0
      *swap = !*swap;
3373
0
    }
3374
0
  }
3375
3376
0
  while ((condition_insn->op == IR_BITCAST
3377
0
    || condition_insn->op == IR_ZEXT
3378
0
    || condition_insn->op == IR_SEXT)
3379
0
   && ctx->use_lists[condition].count == 1) {
3380
0
    condition = condition_insn->op1;
3381
0
    condition_insn = &ctx->ir_base[condition];
3382
0
  }
3383
3384
0
  if (condition_insn->op == IR_ALLOCA || condition_insn->op == IR_VADDR) {
3385
0
    return IR_TRUE;
3386
0
  }
3387
3388
0
  if (!IR_IS_CONST_REF(condition) && ctx->use_lists[condition].count > 1) {
3389
0
    condition = ir_check_dominating_predicates(ctx, control, condition);
3390
0
  }
3391
3392
0
  return condition;
3393
0
}
3394
3395
static void ir_iter_optimize_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3396
0
{
3397
0
  bool swap = 0;
3398
0
  ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3399
3400
0
  if (swap) {
3401
0
    ir_use_list *use_list = &ctx->use_lists[ref];
3402
0
    ir_ref *p, use;
3403
3404
0
    IR_ASSERT(use_list->count == 2);
3405
0
    p = ctx->use_edges + use_list->refs;
3406
0
    use = *p;
3407
0
    if (ctx->ir_base[use].op == IR_IF_TRUE) {
3408
0
      ctx->ir_base[use].op = IR_IF_FALSE;
3409
0
      use = *(p+1);
3410
0
      ctx->ir_base[use].op = IR_IF_TRUE;
3411
0
    } else {
3412
0
      ctx->ir_base[use].op = IR_IF_TRUE;
3413
0
      use = *(p+1);
3414
0
      ctx->ir_base[use].op = IR_IF_FALSE;
3415
0
    }
3416
0
  }
3417
3418
0
  if (IR_IS_CONST_REF(condition)) {
3419
    /*
3420
     *    |                        |
3421
     *    IF(TRUE)             =>  END
3422
     *    | \                      |
3423
     *    |  +------+              |
3424
     *    |         IF_TRUE        |        BEGIN(unreachable)
3425
     *    IF_FALSE  |              BEGIN
3426
     *    |                        |
3427
     */
3428
0
    ir_ref if_true_ref, if_false_ref;
3429
0
    ir_insn *if_true, *if_false;
3430
3431
0
    insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3432
0
    if (!IR_IS_CONST_REF(insn->op2)) {
3433
0
      ir_use_list_remove_one(ctx, insn->op2, ref);
3434
0
      ir_bitqueue_add(worklist, insn->op2);
3435
0
    }
3436
0
    insn->op2 = IR_UNUSED;
3437
3438
0
    ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3439
0
    if_true = &ctx->ir_base[if_true_ref];
3440
0
    if_false = &ctx->ir_base[if_false_ref];
3441
0
    if_true->op = IR_BEGIN;
3442
0
    if_false->op = IR_BEGIN;
3443
0
    if (ir_ref_is_true(ctx, condition)) {
3444
0
      if_false->op1 = IR_UNUSED;
3445
0
      ir_use_list_remove_one(ctx, ref, if_false_ref);
3446
0
      ir_bitqueue_add(worklist, if_true_ref);
3447
0
    } else {
3448
0
      if_true->op1 = IR_UNUSED;
3449
0
      ir_use_list_remove_one(ctx, ref, if_true_ref);
3450
0
      ir_bitqueue_add(worklist, if_false_ref);
3451
0
    }
3452
0
    ctx->flags2 &= ~IR_CFG_REACHABLE;
3453
0
  } else if (insn->op2 != condition) {
3454
0
    ir_iter_update_op(ctx, ref, 2, condition, worklist);
3455
0
  }
3456
0
}
3457
3458
static void ir_iter_optimize_guard(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3459
0
{
3460
0
  bool swap = 0;
3461
0
  ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3462
3463
0
  if (swap) {
3464
0
    if (insn->op == IR_GUARD) {
3465
0
      insn->op = IR_GUARD_NOT;
3466
0
    } else {
3467
0
      insn->op = IR_GUARD;
3468
0
    }
3469
0
  }
3470
3471
0
  if (IR_IS_CONST_REF(condition)) {
3472
0
    if (insn->op == IR_GUARD) {
3473
0
      if (ir_ref_is_true(ctx, condition)) {
3474
0
        ir_ref prev, next;
3475
3476
0
remove_guard:
3477
0
        prev = insn->op1;
3478
0
        next = ir_next_control(ctx, ref);
3479
0
        if (ctx->ir_base[prev].op == IR_SNAPSHOT) {
3480
0
          ir_ref snapshot = prev;
3481
0
          prev = ctx->ir_base[prev].op1;
3482
0
          ir_use_list_remove_one(ctx, snapshot, ref);
3483
0
          ir_use_list_remove_one(ctx, ref, next);
3484
0
          ir_use_list_replace_one(ctx, prev, snapshot, next);
3485
0
          ir_iter_remove_insn(ctx, snapshot, worklist);
3486
0
        } else {
3487
0
          ir_use_list_remove_one(ctx, ref, next);
3488
0
          ir_use_list_replace_one(ctx, prev, ref, next);
3489
0
        }
3490
0
        ctx->ir_base[next].op1 = prev;
3491
0
        insn->op1 = IR_UNUSED;
3492
3493
0
        if (!IR_IS_CONST_REF(insn->op2)) {
3494
0
          ir_use_list_remove_one(ctx, insn->op2, ref);
3495
0
          if (ir_is_dead(ctx, insn->op2)) {
3496
            /* schedule DCE */
3497
0
            ir_bitqueue_add(worklist, insn->op2);
3498
0
          }
3499
0
        }
3500
3501
0
        if (!IR_IS_CONST_REF(insn->op3)) {
3502
0
          ir_use_list_remove_one(ctx, insn->op3, ref);
3503
0
          if (ir_is_dead(ctx, insn->op3)) {
3504
            /* schedule DCE */
3505
0
            ir_bitqueue_add(worklist, insn->op3);
3506
0
          }
3507
0
        }
3508
3509
0
        MAKE_NOP(insn);
3510
0
        return;
3511
0
      } else {
3512
0
        condition = IR_FALSE;
3513
0
      }
3514
0
    } else {
3515
0
      if (ir_ref_is_true(ctx, condition)) {
3516
0
        condition = IR_TRUE;
3517
0
      } else {
3518
0
        goto remove_guard;
3519
0
      }
3520
0
    }
3521
0
  }
3522
3523
0
  if (insn->op2 != condition) {
3524
0
    ir_iter_update_op(ctx, ref, 2, condition, worklist);
3525
0
  }
3526
0
}
3527
3528
void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
3529
0
{
3530
0
  ir_ref i, val;
3531
0
  ir_insn *insn;
3532
3533
0
  while ((i = ir_bitqueue_pop(worklist)) >= 0) {
3534
0
    insn = &ctx->ir_base[i];
3535
0
    if (IR_IS_FOLDABLE_OP(insn->op)) {
3536
0
      if (ctx->use_lists[i].count == 0) {
3537
0
        if (insn->op == IR_PHI) {
3538
0
          ir_bitqueue_add(worklist, insn->op1);
3539
0
        }
3540
0
        ir_iter_remove_insn(ctx, i, worklist);
3541
0
      } else {
3542
0
        insn = &ctx->ir_base[i];
3543
0
        switch (insn->op) {
3544
0
          case IR_FP2FP:
3545
0
            if (insn->type == IR_FLOAT) {
3546
0
              if (ir_may_promote_d2f(ctx, insn->op1)) {
3547
0
                ir_ref ref = ir_promote_d2f(ctx, insn->op1, i, worklist);
3548
0
                insn->op1 = ref;
3549
0
                ir_iter_replace_insn(ctx, i, ref, worklist);
3550
0
                break;
3551
0
              }
3552
0
            } else {
3553
0
              if (ir_may_promote_f2d(ctx, insn->op1)) {
3554
0
                ir_ref ref = ir_promote_f2d(ctx, insn->op1, i, worklist);
3555
0
                insn->op1 = ref;
3556
0
                ir_iter_replace_insn(ctx, i, ref, worklist);
3557
0
                break;
3558
0
              }
3559
0
            }
3560
0
            goto folding;
3561
0
          case IR_FP2INT:
3562
0
            if (ctx->ir_base[insn->op1].type == IR_DOUBLE) {
3563
0
              if (ir_may_promote_d2f(ctx, insn->op1)) {
3564
0
                insn->op1 = ir_promote_d2f(ctx, insn->op1, i, worklist);
3565
0
              }
3566
0
            } else {
3567
0
              if (ir_may_promote_f2d(ctx, insn->op1)) {
3568
0
                insn->op1 = ir_promote_f2d(ctx, insn->op1, i, worklist);
3569
0
              }
3570
0
            }
3571
0
            goto folding;
3572
0
          case IR_TRUNC:
3573
0
            if (ir_may_promote_trunc(ctx, insn->type, insn->op1)) {
3574
0
              ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i, worklist);
3575
0
              insn->op1 = ref;
3576
0
              ir_iter_replace_insn(ctx, i, ref, worklist);
3577
0
              break;
3578
0
            }
3579
0
            goto folding;
3580
0
          case IR_SEXT:
3581
0
          case IR_ZEXT:
3582
0
            if (ir_try_promote_ext(ctx, i, insn, worklist)) {
3583
0
              break;
3584
0
            }
3585
0
            goto folding;
3586
0
          case IR_PHI:
3587
0
            break;
3588
0
          default:
3589
0
folding:
3590
0
            ir_iter_fold(ctx, i, worklist);
3591
0
            break;
3592
0
        }
3593
0
      }
3594
0
    } else if (ir_op_flags[insn->op] & IR_OP_FLAG_BB_START) {
3595
0
      if (!(ctx->flags & IR_OPT_CFG)) {
3596
        /* pass */
3597
0
      } else if (insn->op == IR_BEGIN) {
3598
0
        if (insn->op1 && ctx->ir_base[insn->op1].op == IR_END) {
3599
0
          ir_merge_blocks(ctx, insn->op1, i, worklist);
3600
0
        }
3601
0
      } else if (insn->op == IR_MERGE) {
3602
0
        ir_iter_optimize_merge(ctx, i, insn, worklist);
3603
0
      } else if (insn->op == IR_LOOP_BEGIN) {
3604
0
        ir_iter_optimize_loop(ctx, i, insn, worklist);
3605
0
      }
3606
0
    } else if (ir_is_dead_load(ctx, i)) {
3607
0
      ir_ref next;
3608
3609
      /* remove LOAD from double linked control list */
3610
0
remove_mem_insn:
3611
0
      next = ctx->use_edges[ctx->use_lists[i].refs];
3612
0
      IR_ASSERT(ctx->use_lists[i].count == 1);
3613
0
      ctx->ir_base[next].op1 = insn->op1;
3614
0
      ir_use_list_replace_one(ctx, insn->op1, i, next);
3615
0
      insn->op1 = IR_UNUSED;
3616
0
      ir_iter_remove_insn(ctx, i, worklist);
3617
0
    } else if (insn->op == IR_LOAD) {
3618
0
      val = ir_find_aliasing_load(ctx, insn->op1, insn->type, insn->op2);
3619
0
      if (val) {
3620
0
        ir_insn *val_insn;
3621
0
        ir_ref prev, next;
3622
3623
0
remove_aliased_load:
3624
0
        prev = insn->op1;
3625
0
        next = ir_next_control(ctx, i);
3626
0
        ctx->ir_base[next].op1 = prev;
3627
0
        ir_use_list_remove_one(ctx, i, next);
3628
0
        ir_use_list_replace_one(ctx, prev, i, next);
3629
0
        insn->op1 = IR_UNUSED;
3630
3631
0
        val_insn = &ctx->ir_base[val];
3632
0
        if (val_insn->type == insn->type) {
3633
0
          ir_iter_replace_insn(ctx, i, val, worklist);
3634
0
        } else {
3635
0
          if (!IR_IS_CONST_REF(insn->op2)) {
3636
0
            ir_use_list_remove_one(ctx, insn->op2, i);
3637
0
            if (ir_is_dead(ctx, insn->op2)) {
3638
              /* schedule DCE */
3639
0
              ir_bitqueue_add(worklist, insn->op2);
3640
0
            }
3641
0
          }
3642
0
          if (!IR_IS_CONST_REF(val)) {
3643
0
            ir_use_list_add(ctx, val, i);
3644
0
          }
3645
0
          if (ir_type_size[val_insn->type] == ir_type_size[insn->type]) {
3646
            /* load forwarding with bitcast (L2L) */
3647
0
            insn->optx = IR_OPTX(IR_BITCAST, insn->type, 1);
3648
0
          } else {
3649
            /* partial load forwarding (L2L) */
3650
0
            insn->optx = IR_OPTX(IR_TRUNC, insn->type, 1);
3651
0
          }
3652
0
          insn->op1 = val;
3653
0
          insn->op2 = IR_UNUSED;
3654
0
          ir_bitqueue_add(worklist, i);
3655
0
        }
3656
0
      }
3657
0
    } else if (insn->op == IR_STORE) {
3658
0
      if (ir_find_aliasing_store(ctx, insn->op1, insn->op2, insn->op3)) {
3659
0
        goto remove_mem_insn;
3660
0
      } else {
3661
0
        ir_insn *val_insn;
3662
3663
0
remove_bitcast:
3664
0
        val = insn->op3;
3665
0
        val_insn = &ctx->ir_base[val];
3666
0
        if (val_insn->op == IR_BITCAST
3667
0
         && ir_type_size[val_insn->type] == ir_type_size[ctx->ir_base[val_insn->op1].type]) {
3668
0
          insn->op3 = val_insn->op1;
3669
0
          ir_use_list_remove_one(ctx, val, i);
3670
0
          if (ctx->use_lists[val].count == 0) {
3671
0
            if (!IR_IS_CONST_REF(val_insn->op1)) {
3672
0
              ir_use_list_replace_one(ctx, val_insn->op1, val, i);
3673
0
            }
3674
0
            ir_iter_remove_insn(ctx, val, worklist);
3675
0
          } else {
3676
0
            if (!IR_IS_CONST_REF(val_insn->op1)) {
3677
0
              ir_use_list_add(ctx, val_insn->op1, i);
3678
0
            }
3679
0
          }
3680
0
        }
3681
0
      }
3682
0
    } else if (insn->op == IR_VLOAD) {
3683
0
      val = ir_find_aliasing_vload(ctx, insn->op1, insn->type, insn->op2);
3684
0
      if (val) {
3685
0
        goto remove_aliased_load;
3686
0
      }
3687
0
    } else if (insn->op == IR_VSTORE) {
3688
0
      if (ir_find_aliasing_vstore(ctx, insn->op1, insn->op2, insn->op3)) {
3689
0
        goto remove_mem_insn;
3690
0
      } else {
3691
0
        goto remove_bitcast;
3692
0
      }
3693
0
    } else if (insn->op == IR_IF) {
3694
0
      ir_iter_optimize_if(ctx, i, insn, worklist);
3695
0
    } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
3696
0
      ir_iter_optimize_guard(ctx, i, insn, worklist);
3697
0
    }
3698
0
  }
3699
0
}
3700
3701
int ir_sccp(ir_ctx *ctx)
3702
0
{
3703
0
  ir_bitqueue sccp_worklist, iter_worklist;
3704
0
  ir_insn *_values;
3705
3706
0
  ir_bitqueue_init(&iter_worklist, ctx->insns_count);
3707
0
  ir_bitqueue_init(&sccp_worklist, ctx->insns_count);
3708
0
  _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
3709
3710
0
  ctx->flags2 |= IR_OPT_IN_SCCP;
3711
0
  ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist);
3712
0
  ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist);
3713
0
  ctx->flags2 &= ~IR_OPT_IN_SCCP;
3714
3715
0
  ir_mem_free(_values);
3716
0
  ir_bitqueue_free(&sccp_worklist);
3717
3718
0
  ctx->flags2 |= IR_CFG_REACHABLE;
3719
3720
0
  ir_iter_opt(ctx, &iter_worklist);
3721
3722
0
  ir_bitqueue_free(&iter_worklist);
3723
3724
0
  return 1;
3725
0
}