Coverage Report

Created: 2026-02-14 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_sccp.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (SCCP - Sparse Conditional Constant Propagation)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 *
7
 * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication
8
 * See:  M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches"
9
 * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991
10
 */
11
12
#include "ir.h"
13
#include "ir_private.h"
14
15
#include <math.h>
16
17
#define IR_COMBO_COPY_PROPAGATION 1
18
19
0
#define IR_TOP                  IR_UNUSED
20
0
#define IR_BOTTOM               IR_LAST_OP
21
22
0
#define IR_MAKE_BOTTOM(ref)     do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
23
24
0
#define IR_IS_TOP(ref)          (ref >= 0 && _values[ref].op == IR_TOP)
25
0
#define IR_IS_BOTTOM(ref)       (ref >= 0 && _values[ref].op == IR_BOTTOM)
26
0
#define IR_IS_REACHABLE(ref)    _ir_is_reachable_ctrl(ctx, _values, ref)
27
0
#define IR_IS_CONST(ref)        (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op))
28
29
typedef struct {
30
  union {
31
    struct {
32
      IR_STRUCT_LOHI(
33
        union {
34
          IR_STRUCT_LOHI(
35
            union {
36
              IR_STRUCT_LOHI(
37
                uint8_t        op,   /* [IR_TOP - unreachable, IR_BOTTOM - reachable} for control */
38
                                     /* {IR_TOP | IR_COPY() | IR_CONST() | IR_BOTTOM} for data */
39
                                     /* {IR_TOP | IR_MERGE() | IR_BOTTOM} for IR_MERGE */
40
                                     /* {IR_TOP | IR_IF() | IR_BOTTOM} for IR_IF and IR_SWITCH */
41
                uint8_t        type
42
              );
43
              uint16_t           opt;
44
            },
45
            uint16_t               _space_1
46
          );
47
          uint32_t                   optx;
48
        },
49
        union {
50
          ir_ref                     copy;              /* identity for IR_COPY */
51
          ir_ref                     unfeasible_inputs; /* number of unfeasible inputs for IR_MERGE */
52
          ir_ref                     single_output;     /* reachable output for IR_IF */
53
          ir_ref                     visited;           /* for IR_TOP */
54
        }
55
      );
56
      union {
57
        struct {
58
          ir_ref                     next; /* double-linked identities list for IR_COPY */
59
          ir_ref                     prev; /* double-linked identities list for IR_COPY */
60
        };
61
        ir_val                         val;  /* constant value for IR_CONST */
62
      };
63
    };
64
    ir_insn                                insn; /* constant insn for IR_CONST */
65
  };
66
} ir_sccp_val;
67
68
IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref)
69
0
{
70
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
71
0
  IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL);
72
0
  return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */
73
0
}
74
75
IR_ALWAYS_INLINE void ir_sccp_add_uses(const ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
76
0
{
77
0
  const ir_use_list *use_list;
78
0
  const ir_ref *p;
79
0
  ir_ref n, use;
80
81
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
82
0
  use_list = &ctx->use_lists[ref];
83
0
  n = use_list->count;
84
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
85
0
    use = *p;
86
0
    if (_values[use].op != IR_BOTTOM) {
87
0
      ir_bitqueue_add(worklist, use);
88
0
    }
89
0
  }
90
0
}
91
92
IR_ALWAYS_INLINE void ir_sccp_add_input(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
93
0
{
94
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
95
0
  IR_ASSERT(_values[ref].op == IR_TOP);
96
  /* do backward propagaton only once */
97
0
  if (!_values[ref].visited) {
98
0
    _values[ref].visited = 1;
99
0
    ir_bitqueue_add(worklist, ref);
100
0
  }
101
0
}
102
103
#if IR_COMBO_COPY_PROPAGATION
104
IR_ALWAYS_INLINE ir_ref ir_sccp_identity(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
105
0
{
106
0
  if (a > 0 && _values[a].op == IR_COPY) {
107
0
    do {
108
0
      a = _values[a].copy;
109
0
      IR_ASSERT(a > 0);
110
0
    } while (_values[a].op == IR_COPY);
111
0
    IR_ASSERT(_values[a].op == IR_BOTTOM);
112
0
  }
113
0
  return a;
114
0
}
115
116
#if 0
117
static void CHECK_LIST(ir_sccp_val *_values, ir_ref ref)
118
{
119
  ir_ref member = _values[ref].op2;
120
  while (member != ref) {
121
    IR_ASSERT(_values[_values[member].op2].op3 == member);
122
    member = _values[member].op2;
123
  }
124
  IR_ASSERT(_values[_values[ref].op2].op3 == ref);
125
}
126
#else
127
# define CHECK_LIST(_values, ref)
128
#endif
129
130
static void ir_sccp_add_identity(const ir_ctx *ctx, ir_sccp_val *_values, ir_ref src, ir_ref dst)
131
0
{
132
0
  IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY);
133
0
  IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)));
134
0
  IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst);
135
136
0
  _values[dst].optx = IR_COPY;
137
0
  _values[dst].copy = src;
138
139
0
  if (_values[src].op == IR_BOTTOM) {
140
    /* initialize empty double-linked list */
141
0
    if (_values[src].copy != src) {
142
0
      _values[src].copy = src;
143
0
      _values[src].next = src;
144
0
      _values[src].prev = src;
145
0
    }
146
0
  } else {
147
0
    src = ir_sccp_identity(ctx, _values, src);
148
0
  }
149
150
  /* insert into circular double-linked list */
151
0
  ir_ref prev = _values[src].prev;
152
0
  _values[dst].next = src;
153
0
  _values[dst].prev = prev;
154
0
  _values[src].prev = dst;
155
0
  _values[prev].next = dst;
156
0
  CHECK_LIST(_values, dst);
157
0
}
158
159
static void ir_sccp_split_partition(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
160
0
{
161
0
  ir_ref member, head, tail, next, prev;
162
163
0
  CHECK_LIST(_values, ref);
164
0
  IR_MAKE_BOTTOM(ref);
165
0
  _values[ref].copy = ref;
166
167
0
  member = _values[ref].next;
168
0
  head = tail = IR_UNUSED;
169
0
  while (member != ref) {
170
0
    if (_values[member].op != IR_BOTTOM) {
171
0
      ir_bitqueue_add(worklist, member);
172
0
    }
173
0
    ir_sccp_add_uses(ctx, _values, worklist, member);
174
175
0
    next = _values[member].next;
176
0
    if (ir_sccp_identity(ctx, _values, member) == ref) {
177
      /* remove "member" from the old circular double-linked list */
178
0
      prev = _values[member].prev;
179
0
      _values[prev].next = next;
180
0
      _values[next].prev = prev;
181
182
      /* insert "member" into the new double-linked list */
183
0
      if (!head) {
184
0
        head = tail = member;
185
0
      } else {
186
0
        _values[tail].next = member;
187
0
        _values[member].prev = tail;
188
0
        tail = member;
189
0
      }
190
0
    }
191
0
    member = next;
192
0
  }
193
194
  /* remove "ref" from the old circular double-linked list */
195
0
  next = _values[ref].next;
196
0
  prev = _values[ref].prev;
197
0
  _values[prev].next = next;
198
0
  _values[next].prev = prev;
199
0
  CHECK_LIST(_values, next);
200
201
  /* close the new circle */
202
0
  if (head) {
203
0
    _values[ref].next = head;
204
0
    _values[ref].prev = tail;
205
0
    _values[tail].next = ref;
206
0
    _values[head].prev = ref;
207
0
  } else {
208
0
    _values[ref].next = ref;
209
0
    _values[ref].prev = ref;
210
0
  }
211
0
  CHECK_LIST(_values, ref);
212
0
}
213
214
IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
215
0
{
216
0
  if (_values[ref].op == IR_COPY) {
217
0
    ir_sccp_split_partition(ctx, _values, worklist, ref);
218
0
  } else {
219
0
    IR_MAKE_BOTTOM(ref);
220
0
  }
221
0
}
222
223
0
# define IR_MAKE_BOTTOM_EX(ref) ir_sccp_make_bottom_ex(ctx, _values, worklist, ref)
224
#else
225
# define ir_sccp_identity(_ctx, _values, ref) (ref)
226
# define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref)
227
#endif
228
229
IR_ALWAYS_INLINE bool ir_sccp_meet_const(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *val_insn)
230
0
{
231
0
  IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op));
232
233
0
  if (_values[ref].op == IR_TOP) {
234
    /* TOP meet NEW_CONST => NEW_CONST */
235
0
    _values[ref].optx = val_insn->opt;
236
0
    _values[ref].val.u64 = val_insn->val.u64;
237
0
    return 1;
238
0
  } else if (_values[ref].opt == val_insn->opt) {
239
    /* OLD_CONST meet NEW_CONST => (OLD_CONST == NEW_CONST) ? OLD_CONST : BOTTOM */
240
0
    if (_values[ref].val.u64 == val_insn->val.u64) {
241
0
      return 0;
242
0
    }
243
0
  }
244
245
0
  IR_MAKE_BOTTOM_EX(ref);
246
0
  return 1;
247
0
}
248
249
IR_ALWAYS_INLINE bool ir_sccp_meet_copy(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
250
0
{
251
0
#if IR_COMBO_COPY_PROPAGATION
252
0
  if (_values[ref].op == IR_COPY) {
253
    /* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
254
     *   (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
255
0
    if (ir_sccp_identity(ctx, _values, ref) == ir_sccp_identity(ctx, _values, val)) {
256
0
      return 0; /* not changed */
257
0
    }
258
0
    ir_sccp_split_partition(ctx, _values, worklist, ref);
259
0
    return 1;
260
0
  } else {
261
0
    IR_ASSERT(_values[ref].op != IR_BOTTOM);
262
    /* TOP       meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
263
    /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
264
0
    ir_sccp_add_identity(ctx, _values, val, ref);
265
0
    return 1;
266
0
  }
267
0
#endif
268
0
  IR_MAKE_BOTTOM(ref);
269
0
  return 1;
270
0
}
271
272
#if 0
273
IR_ALWAYS_INLINE bool ir_sccp_meet(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
274
{
275
  const ir_insn *val_insn;
276
277
  if (IR_IS_CONST_REF(val)) {
278
    val_insn = &ctx->ir_base[val];
279
  } else {
280
    val_insn = &_values[val].insn;
281
282
    if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) {
283
      return ir_sccp_meet_copy(ctx, _values, worklist, ref, val);
284
    }
285
  }
286
287
  return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn);
288
}
289
#endif
290
291
static ir_ref ir_sccp_fold(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *insn)
292
0
{
293
0
  const ir_insn *op1_insn, *op2_insn, *op3_insn;
294
0
  ir_ref op1, op2, op3, copy;
295
0
  uint32_t opt = insn->opt;
296
297
0
  op1 = ir_sccp_identity(ctx, _values, insn->op1);
298
0
  op2 = ir_sccp_identity(ctx, _values, insn->op2);
299
0
  op3 = ir_sccp_identity(ctx, _values, insn->op3);
300
301
0
restart:
302
0
  op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? &_values[op1].insn : ctx->ir_base + op1;
303
0
  op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? &_values[op2].insn : ctx->ir_base + op2;
304
0
  op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? &_values[op3].insn : ctx->ir_base + op3;
305
306
0
  switch (ir_folding((ir_ctx*)ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
307
0
    case IR_FOLD_DO_RESTART:
308
0
      opt = ctx->fold_insn.optx;
309
0
      op1 = ctx->fold_insn.op1;
310
0
      op2 = ctx->fold_insn.op2;
311
0
      op3 = ctx->fold_insn.op3;
312
0
      goto restart;
313
0
    case IR_FOLD_DO_CSE:
314
0
    case IR_FOLD_DO_EMIT:
315
0
      IR_MAKE_BOTTOM_EX(ref);
316
0
      return 1;
317
0
    case IR_FOLD_DO_COPY:
318
0
      copy = ctx->fold_insn.op1;
319
0
      if (IR_IS_CONST_REF(copy)) {
320
0
        insn = &ctx->ir_base[copy];
321
0
      } else {
322
0
        insn = &_values[copy].insn;
323
0
        if (!IR_IS_CONST_OP(insn->op) && !IR_IS_SYM_CONST(insn->op)) {
324
0
          return ir_sccp_meet_copy(ctx, _values, worklist, ref, copy);
325
0
        }
326
0
        }
327
0
      goto meet_const;
328
0
    case IR_FOLD_DO_CONST:
329
0
      insn = &ctx->fold_insn;
330
0
meet_const:
331
0
      return ir_sccp_meet_const(ctx, _values, worklist, ref, insn);
332
0
    default:
333
0
      IR_ASSERT(0);
334
0
      return 0;
335
0
  }
336
0
}
337
338
static bool ir_sccp_analyze_phi(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref i, const ir_insn *insn)
339
0
{
340
0
  ir_ref j, n, input;
341
0
  const ir_ref *merge_input, *p;
342
0
  const ir_insn *v, *new_const = NULL;
343
0
#if IR_COMBO_COPY_PROPAGATION
344
0
  ir_ref new_copy = IR_UNUSED;
345
0
  ir_ref new_copy_identity = IR_UNUSED;
346
0
  ir_ref phi_identity = ir_sccp_identity(ctx, _values, i);
347
0
#endif
348
349
0
  if (!IR_IS_REACHABLE(insn->op1)) {
350
0
    return 0;
351
0
  }
352
0
  n = insn->inputs_count;
353
0
  if (n > 3 && _values[i].op == IR_TOP) {
354
0
    for (j = 0; j < (n>>2); j++) {
355
0
      _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
356
0
    }
357
0
  }
358
359
0
  p = insn->ops + 2;
360
0
  merge_input = ctx->ir_base[insn->op1].ops + 1;
361
0
  for (; --n > 0; p++, merge_input++) {
362
0
    IR_ASSERT(*merge_input > 0);
363
0
    if (!IR_IS_REACHABLE(*merge_input)) {
364
0
      continue;
365
0
    }
366
367
0
    input = *p;
368
0
    if (IR_IS_CONST_REF(input)) {
369
0
      v = &ctx->ir_base[input];
370
0
    } else if (input == i) {
371
0
      continue;
372
0
    } else {
373
0
      v = &_values[input].insn;
374
0
      if (v->op == IR_TOP) {
375
0
        ir_sccp_add_input(ctx, _values, worklist, input);
376
0
        continue;
377
0
#if IR_COMBO_COPY_PROPAGATION
378
0
      } else if (v->op == IR_COPY) {
379
0
        input = v->op1;
380
0
        new_copy_identity = ir_sccp_identity(ctx, _values, input);
381
0
        if (new_copy_identity == phi_identity) {
382
0
          new_copy_identity = IR_UNUSED;
383
0
          continue;
384
0
        }
385
0
        new_copy = input;
386
0
        goto next;
387
0
#endif
388
0
      } else if (v->op == IR_BOTTOM) {
389
0
#if IR_COMBO_COPY_PROPAGATION
390
0
        if (input == phi_identity) {
391
0
          continue;
392
0
        }
393
0
        new_copy = new_copy_identity = input;
394
0
        goto next;
395
#else
396
        goto make_bottom;
397
#endif
398
0
      }
399
0
    }
400
0
    new_const = v;
401
0
    goto next;
402
0
  }
403
404
0
  return 0;
405
406
0
next:
407
0
  p++;
408
0
  merge_input++;
409
  /* for all live merge inputs */
410
0
  for (; --n > 0; p++, merge_input++) {
411
0
    IR_ASSERT(*merge_input > 0);
412
0
    if (!IR_IS_REACHABLE(*merge_input)) {
413
0
      continue;
414
0
    }
415
416
0
    input = *p;
417
0
    if (IR_IS_CONST_REF(input)) {
418
0
#if IR_COMBO_COPY_PROPAGATION
419
0
      if (new_copy) {
420
0
        goto make_bottom;
421
0
      }
422
0
#endif
423
0
      v = &ctx->ir_base[input];
424
0
    } else if (input == i) {
425
0
      continue;
426
0
    } else {
427
0
      v = &_values[input].insn;
428
0
      if (v->op == IR_TOP) {
429
0
        ir_sccp_add_input(ctx, _values, worklist, input);
430
0
        continue;
431
0
#if IR_COMBO_COPY_PROPAGATION
432
0
      } else if (v->op == IR_COPY) {
433
0
        ir_ref identity = ir_sccp_identity(ctx, _values, v->op1);
434
435
0
        if (identity == phi_identity || identity == new_copy_identity) {
436
0
          continue;
437
0
        }
438
0
        goto make_bottom;
439
0
#endif
440
0
      } else if (v->op == IR_BOTTOM) {
441
0
#if IR_COMBO_COPY_PROPAGATION
442
0
        if (input  == phi_identity || input == new_copy_identity) {
443
0
          continue;
444
0
        }
445
0
#endif
446
0
        goto make_bottom;
447
0
      }
448
0
    }
449
0
    if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) {
450
0
      goto make_bottom;
451
0
    }
452
0
  }
453
454
0
#if IR_COMBO_COPY_PROPAGATION
455
0
  if (new_copy) {
456
0
    IR_ASSERT(!IR_IS_CONST_REF(new_copy));
457
0
    IR_ASSERT(!IR_IS_CONST_OP(_values[new_copy].op) && !IR_IS_SYM_CONST(_values[new_copy].op));
458
0
    return ir_sccp_meet_copy(ctx, _values, worklist, i, new_copy);
459
0
  }
460
0
#endif
461
462
0
  return ir_sccp_meet_const(ctx, _values, worklist, i, new_const);
463
464
0
make_bottom:
465
0
  IR_MAKE_BOTTOM_EX(i);
466
0
  return 1;
467
0
}
468
469
static bool ir_is_dead_load_ex(const ir_ctx *ctx, ir_ref ref, uint32_t flags, const ir_insn *insn)
470
0
{
471
0
  if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
472
0
    return ctx->use_lists[ref].count == 1;
473
0
  } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) {
474
0
    return ctx->use_lists[ref].count == 1;
475
0
  }
476
0
  return 0;
477
0
}
478
479
static bool ir_is_dead_load(const ir_ctx *ctx, ir_ref ref)
480
0
{
481
0
  if (ctx->use_lists[ref].count == 1) {
482
0
    const ir_insn *insn = &ctx->ir_base[ref];
483
0
    uint32_t flags = ir_op_flags[insn->op];
484
485
0
    if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
486
0
      return 1;
487
0
    } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) {
488
0
      return 1;
489
0
    }
490
0
  }
491
0
  return 0;
492
0
}
493
494
static bool ir_is_dead(const ir_ctx *ctx, ir_ref ref)
495
0
{
496
0
  if (ctx->use_lists[ref].count == 0) {
497
0
    return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
498
0
  } else {
499
0
    return ir_is_dead_load(ctx, ref);
500
0
  }
501
0
  return 0;
502
0
}
503
504
static bool ir_sccp_is_true(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
505
0
{
506
0
  const ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
507
508
0
  return ir_const_is_true(v);
509
0
}
510
511
static bool ir_sccp_is_equal(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b)
512
0
{
513
0
  const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
514
0
  const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
515
516
0
  IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
517
0
  IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
518
0
  return v1->val.u64 == v2->val.u64;
519
0
}
520
521
static bool ir_sccp_in_range(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b, ir_ref c)
522
0
{
523
0
  const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
524
0
  const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
525
0
  const ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c].insn;
526
527
0
  IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
528
0
  IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
529
0
  IR_ASSERT(!IR_IS_SYM_CONST(v3->op));
530
0
  if (IR_IS_TYPE_SIGNED(v1->type)) {
531
0
    return v1->val.i64 >= v2->val.i64 && v1->val.i64 <= v3->val.i64;
532
0
  } else {
533
0
    return v1->val.u64 >= v2->val.u64 && v1->val.u64 <= v3->val.u64;
534
0
    }
535
0
}
536
537
#ifdef IR_SCCP_TRACE
538
static void ir_sccp_trace_val(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
539
{
540
  if (IR_IS_BOTTOM(i)) {
541
    fprintf(stderr, "BOTTOM");
542
  } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
543
    fprintf(stderr, "CONST(");
544
    ir_print_const(ctx, &_values[i].insn, stderr, true);
545
    fprintf(stderr, ")");
546
#if IR_COMBO_COPY_PROPAGATION
547
  } else if (_values[i].op == IR_COPY) {
548
    fprintf(stderr, "COPY(%d)", _values[i].op1);
549
#endif
550
  } else if (IR_IS_TOP(i)) {
551
    fprintf(stderr, "TOP");
552
  } else if (_values[i].op == IR_IF) {
553
    fprintf(stderr, "IF(%d)", _values[i].op1);
554
  } else if (_values[i].op == IR_MERGE) {
555
    fprintf(stderr, "MERGE(%d)", _values[i].op1);
556
  } else {
557
    fprintf(stderr, "%d", _values[i].op);
558
  }
559
}
560
561
static void ir_sccp_trace_start(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
562
{
563
  fprintf(stderr, "%d. ", i);
564
  ir_sccp_trace_val(ctx, _values, i);
565
}
566
567
static void ir_sccp_trace_end(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
568
{
569
  fprintf(stderr, " -> ");
570
  ir_sccp_trace_val(ctx, _values, i);
571
  fprintf(stderr, "\n");
572
}
573
#else
574
# define ir_sccp_trace_start(c, v, i)
575
# define ir_sccp_trace_end(c, v, i)
576
#endif
577
578
static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
579
0
{
580
0
  ir_ref i, j, n, use;
581
0
  const ir_ref *p;
582
0
  const ir_use_list *use_list;
583
0
  const ir_insn *insn, *use_insn;
584
0
  uint32_t flags;
585
586
  /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
587
0
  worklist->pos = 0;
588
0
  ir_bitset_incl(worklist->set, 1);
589
0
  for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) {
590
0
    IR_ASSERT(_values[i].op != IR_BOTTOM);
591
0
    ir_sccp_trace_start(ctx, _values, i);
592
0
    insn = &ctx->ir_base[i];
593
0
    flags = ir_op_flags[insn->op];
594
0
    if (flags & IR_OP_FLAG_DATA) {
595
0
      if (ctx->use_lists[i].count == 0) {
596
        /* dead code */
597
0
        continue;
598
0
      } else if (insn->op == IR_PHI) {
599
0
        if (!ir_sccp_analyze_phi(ctx, _values, worklist, i, insn)) {
600
0
          continue;
601
0
        }
602
0
      } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) {
603
0
        bool may_benefit = 0;
604
0
        bool has_top = 0;
605
606
0
        if (_values[i].op != IR_TOP) {
607
0
          may_benefit = 1;
608
0
        }
609
610
0
        IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
611
0
        n = IR_INPUT_EDGES_COUNT(flags);
612
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
613
0
          ir_ref input = *p;
614
0
          if (input > 0) {
615
0
            if (_values[input].op == IR_TOP) {
616
0
              has_top = 1;
617
0
              ir_sccp_add_input(ctx, _values, worklist, input);
618
0
            } else if (_values[input].op != IR_BOTTOM) {
619
              /* Perform folding only if some of direct inputs
620
               * is going to be replaced by a constant or copy.
621
               * This approach may miss some folding optimizations
622
               * dependent on indirect inputs. e.g. reassociation.
623
               */
624
0
              may_benefit = 1;
625
0
            }
626
0
          }
627
0
        }
628
0
        if (has_top) {
629
0
          continue;
630
0
        }
631
0
        if (!may_benefit) {
632
0
          IR_MAKE_BOTTOM_EX(i);
633
0
          if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
634
0
            ir_bitqueue_add(iter_worklist, i);
635
0
          }
636
0
        } else if (!ir_sccp_fold(ctx, _values, worklist, i, insn)) {
637
          /* not changed */
638
0
          continue;
639
0
        } else if (_values[i].op == IR_BOTTOM) {
640
0
          insn = &ctx->ir_base[i];
641
0
          if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
642
0
            ir_bitqueue_add(iter_worklist, i);
643
0
          }
644
0
        }
645
0
      } else {
646
0
        IR_MAKE_BOTTOM_EX(i);
647
0
        n = IR_INPUT_EDGES_COUNT(flags);
648
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
649
0
          ir_ref input = *p;
650
0
          if (input > 0) {
651
0
            if (_values[input].op == IR_TOP) {
652
0
              ir_sccp_add_input(ctx, _values, worklist, input);
653
0
            }
654
0
          }
655
0
        }
656
0
      }
657
0
    } else if (flags & IR_OP_FLAG_BB_START) {
658
0
      if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_BEGIN) {
659
0
        ir_bitqueue_add(iter_worklist, i);
660
0
      }
661
0
      if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
662
0
        ir_ref unfeasible_inputs = 0;
663
664
0
        n = insn->inputs_count;
665
0
        if (n > 3 && _values[i].op == IR_TOP) {
666
0
          for (j = 0; j < (n>>2); j++) {
667
0
            _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
668
0
          }
669
0
        }
670
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
671
0
          const ir_ref input = *p;
672
0
          IR_ASSERT(input > 0);
673
0
          if (!IR_IS_REACHABLE(input)) {
674
0
            unfeasible_inputs++;
675
0
          }
676
0
        }
677
0
        if (unfeasible_inputs == 0) {
678
0
          IR_MAKE_BOTTOM(i);
679
0
        } else if (_values[i].op != IR_MERGE || _values[i].unfeasible_inputs != unfeasible_inputs) {
680
0
          _values[i].optx = IR_MERGE;
681
0
          _values[i].unfeasible_inputs = unfeasible_inputs;
682
0
        } else {
683
0
          continue;
684
0
        }
685
0
        if (ctx->flags2 & IR_MEM2SSA_VARS) {
686
          /* MEM2SSA puts new PHI at the bottom, but we like to process them now */
687
0
          use_list = &ctx->use_lists[i];
688
0
          n = use_list->count;
689
0
          for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
690
0
            use = *p;
691
0
            if (_values[use].op != IR_BOTTOM) {
692
0
              if (ctx->ir_base[use].op == IR_PHI) {
693
0
                ir_bitqueue_del(worklist, use);
694
0
                if (ctx->use_lists[use].count != 0) {
695
0
                  if (ir_sccp_analyze_phi(ctx, _values, worklist, use, &ctx->ir_base[use])) {
696
0
                    ir_sccp_add_uses(ctx, _values, worklist, use);
697
0
                  }
698
0
                }
699
0
              } else {
700
0
                ir_bitqueue_add(worklist, use);
701
0
              }
702
0
            }
703
0
          }
704
0
          continue;
705
0
        }
706
0
      } else {
707
0
        IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1));
708
0
        IR_MAKE_BOTTOM(i);
709
0
      }
710
0
    } else {
711
0
      IR_ASSERT(insn->op1 > 0);
712
0
      if (!IR_IS_REACHABLE(insn->op1)) {
713
        /* control inpt is not feasible */
714
0
        continue;
715
0
      }
716
0
      if (insn->op == IR_IF) {
717
0
        if (IR_IS_TOP(insn->op2)) {
718
0
          ir_sccp_add_input(ctx, _values, worklist, insn->op2);
719
0
          continue;
720
0
        }
721
0
        if (IR_IS_CONST(insn->op2)) {
722
0
          bool b = ir_sccp_is_true(ctx, _values, insn->op2);
723
0
          use_list = &ctx->use_lists[i];
724
0
          IR_ASSERT(use_list->count == 2);
725
0
          p = &ctx->use_edges[use_list->refs];
726
0
          use = *p;
727
0
          use_insn = &ctx->ir_base[use];
728
0
          IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE);
729
0
          if ((use_insn->op == IR_IF_TRUE) != b) {
730
0
            use = *(p+1);
731
0
            IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE);
732
0
          }
733
0
          if (_values[i].op == IR_TOP) {
734
0
            _values[i].optx = IR_IF;
735
0
            _values[i].single_output = use;
736
0
            ir_bitqueue_add(worklist, use);
737
0
            continue;
738
0
          } else if (_values[i].op == IR_IF && _values[i].single_output == use) {
739
0
            continue;
740
0
          }
741
0
        }
742
0
        IR_MAKE_BOTTOM(i);
743
0
        ir_bitqueue_add(iter_worklist, i);
744
0
      } else if (insn->op == IR_SWITCH) {
745
0
        if (IR_IS_TOP(insn->op2)) {
746
0
          ir_sccp_add_input(ctx, _values, worklist, insn->op2);
747
0
          continue;
748
0
        }
749
0
        if (IR_IS_CONST(insn->op2)) {
750
0
          ir_ref use_case = IR_UNUSED;
751
752
0
          use_list = &ctx->use_lists[i];
753
0
          n = use_list->count;
754
0
          for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
755
0
            use = *p;
756
0
            IR_ASSERT(use > 0);
757
0
            use_insn = &ctx->ir_base[use];
758
0
            if (use_insn->op == IR_CASE_VAL) {
759
0
              if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) {
760
0
                use_case = use;
761
0
                break;
762
0
              }
763
0
            } else if (use_insn->op == IR_CASE_DEFAULT) {
764
0
              use_case = use;
765
0
            } else if (use_insn->op == IR_CASE_RANGE) {
766
0
              if (ir_sccp_in_range(ctx, _values, insn->op2, use_insn->op2, use_insn->op3)) {
767
0
                use_case = use;
768
0
                break;
769
0
              }
770
0
            }
771
0
          }
772
0
          if (use_case) {
773
0
            use_insn = &ctx->ir_base[use_case];
774
0
            if (_values[i].op == IR_TOP) {
775
0
              _values[i].optx = IR_IF;
776
0
              _values[i].single_output = use_case;
777
0
              ir_bitqueue_add(worklist, use_case);
778
0
              continue;
779
0
            } else if (_values[i].op == IR_IF || _values[i].single_output == use_case) {
780
0
              continue;
781
0
            }
782
0
          }
783
0
        }
784
0
        IR_MAKE_BOTTOM(i);
785
0
      } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) {
786
        /* schedule dead load elimination */
787
0
        ir_bitqueue_add(iter_worklist, i);
788
0
        IR_MAKE_BOTTOM(i);
789
0
      } else {
790
        /* control, call, load and store instructions may have unprocessed inputs */
791
0
        n = IR_INPUT_EDGES_COUNT(flags);
792
0
        if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) {
793
0
          for (j = 0; j < (n>>2); j++) {
794
0
            _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
795
0
          }
796
0
          for (j = 2, p = insn->ops + j; j <= n; j++, p++) {
797
0
            IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
798
0
            use = *p;
799
0
            if (use > 0 && _values[use].op == IR_TOP) {
800
0
              ir_sccp_add_input(ctx, _values, worklist, use);
801
0
            }
802
0
          }
803
0
        } else if (n >= 2) {
804
0
          IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA);
805
0
          use = insn->op2;
806
0
          if (use > 0 && _values[use].op == IR_TOP) {
807
0
            ir_sccp_add_input(ctx, _values, worklist, use);
808
0
          }
809
0
          if (n > 2) {
810
0
            IR_ASSERT(n == 3);
811
0
            IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA);
812
0
            use = insn->op3;
813
0
            if (use > 0 && _values[use].op == IR_TOP) {
814
0
              ir_sccp_add_input(ctx, _values, worklist, use);
815
0
            }
816
0
          }
817
0
        }
818
0
        IR_MAKE_BOTTOM(i);
819
0
      }
820
0
    }
821
0
    ir_sccp_add_uses(ctx, _values, worklist, i);
822
0
  }
823
824
0
#ifdef IR_DEBUG
825
0
  if (ctx->flags & IR_DEBUG_SCCP) {
826
0
    for (i = 1; i < ctx->insns_count; i++) {
827
0
      if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
828
0
        fprintf(stderr, "%d. CONST(", i);
829
0
        ir_print_const(ctx, &_values[i].insn, stderr, true);
830
0
        fprintf(stderr, ")\n");
831
0
#if IR_COMBO_COPY_PROPAGATION
832
0
      } else if (_values[i].op == IR_COPY) {
833
0
        fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].copy);
834
0
#endif
835
0
      } else if (IR_IS_TOP(i)) {
836
0
        if (ctx->ir_base[i].op != IR_TOP) {
837
0
          fprintf(stderr, "%d. TOP\n", i);
838
0
        }
839
0
      } else if (_values[i].op == IR_IF) {
840
0
        fprintf(stderr, "%d. IF(%d)\n", i, _values[i].single_output);
841
0
      } else if (_values[i].op == IR_MERGE) {
842
0
        fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].unfeasible_inputs);
843
0
      } else if (!IR_IS_BOTTOM(i)) {
844
0
        fprintf(stderr, "%d. %d\n", i, _values[i].op);
845
0
      }
846
0
    }
847
0
  }
848
0
#endif
849
0
}
850
851
/**********************/
852
/* SCCP trasformation */
853
/**********************/
854
855
static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
856
0
{
857
0
  ir_ref j, n, *p;
858
0
  ir_insn *insn;
859
860
0
  CLEAR_USES(ref);
861
0
  insn = &ctx->ir_base[ref];
862
0
  n = insn->inputs_count;
863
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
864
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
865
0
    *p = IR_UNUSED;
866
0
  }
867
0
}
868
869
static void ir_sccp_remove_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_bitqueue *worklist)
870
0
{
871
0
  ir_ref j, n, *p;
872
0
  ir_insn *insn;
873
874
0
  CLEAR_USES(ref);
875
0
  insn = &ctx->ir_base[ref];
876
0
  n = insn->inputs_count;
877
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
878
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
879
0
    ir_ref input = *p;
880
0
    *p = IR_UNUSED;
881
    /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
882
0
    if (input > 0 && _values[input].op > IR_COPY) {
883
0
      ir_use_list_remove_all(ctx, input, ref);
884
0
      if (ir_is_dead(ctx, input)) {
885
        /* schedule DCE */
886
0
        ir_bitqueue_add(worklist, input);
887
0
      }
888
0
    }
889
0
  }
890
0
}
891
892
static void ir_sccp_replace_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
893
0
{
894
0
  ir_ref j, n, *p, use, i;
895
0
  ir_insn *insn;
896
0
  ir_use_list *use_list;
897
898
0
  IR_ASSERT(ref != new_ref);
899
900
0
  insn = &ctx->ir_base[ref];
901
902
0
#if IR_COMBO_COPY_PROPAGATION
903
0
  if ((ir_op_flags[insn->op] & IR_OP_FLAG_MEM) && IR_IS_REACHABLE(insn->op1)) {
904
    /* remove from control list */
905
0
    ir_ref prev = insn->op1;
906
0
    ir_ref next = ir_next_control(ctx, ref);
907
0
    ctx->ir_base[next].op1 = prev;
908
0
    ir_use_list_remove_one(ctx, ref, next);
909
0
    ir_use_list_replace_one(ctx, prev, ref, next);
910
0
    insn->op1 = IR_UNUSED;
911
0
  }
912
0
#endif
913
914
0
  n = insn->inputs_count;
915
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
916
0
  for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
917
0
    ir_ref input = *p;
918
0
    *p = IR_UNUSED;
919
    /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
920
0
    if (input > 0 && _values[input].op > IR_COPY) {
921
0
      ir_use_list_remove_all(ctx, input, ref);
922
0
      if (ir_is_dead(ctx, input)) {
923
        /* schedule DCE */
924
0
        ir_bitqueue_add(worklist, input);
925
0
      }
926
0
    }
927
0
  }
928
929
0
  use_list = &ctx->use_lists[ref];
930
0
  n = use_list->count;
931
0
  p = &ctx->use_edges[use_list->refs];
932
0
  if (new_ref <= 0) {
933
    /* constant or IR_UNUSED */
934
0
    for (; n; p++, n--) {
935
0
      use = *p;
936
      /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
937
0
      if (_values[use].op > IR_COPY) {
938
0
        insn = &ctx->ir_base[use];
939
0
        i = ir_insn_find_op(insn, ref);
940
0
        if (!i) continue;
941
0
        IR_ASSERT(i > 0);
942
0
        ir_insn_set_op(insn, i, new_ref);
943
        /* schedule folding */
944
0
        ir_bitqueue_add(worklist, use);
945
0
      }
946
0
    }
947
0
  } else {
948
0
    for (j = 0; j < n; j++, p++) {
949
0
      use = *p;
950
      /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
951
0
      if (_values[use].op == IR_BOTTOM) {
952
0
        insn = &ctx->ir_base[use];
953
0
        i = ir_insn_find_op(insn, ref);
954
0
        IR_ASSERT(i > 0);
955
0
        ir_insn_set_op(insn, i, new_ref);
956
0
        if (ir_use_list_add(ctx, new_ref, use)) {
957
          /* restore after reallocation */
958
0
          use_list = &ctx->use_lists[ref];
959
0
          n = use_list->count;
960
0
          p = &ctx->use_edges[use_list->refs + j];
961
0
        }
962
        /* schedule folding */
963
0
        ir_bitqueue_add(worklist, use);
964
0
      }
965
0
    }
966
0
  }
967
0
  CLEAR_USES(ref);
968
0
}
969
970
static void ir_sccp_remove_if(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref dst)
971
0
{
972
0
  ir_ref next;
973
0
  ir_insn *insn, *next_insn;
974
975
0
  insn = &ctx->ir_base[ref];
976
0
  if (ctx->use_lists[dst].count == 1) {
977
0
    next = ctx->use_edges[ctx->use_lists[dst].refs];
978
0
    next_insn = &ctx->ir_base[next];
979
    /* remove IF and IF_TRUE/FALSE from double linked control list */
980
0
    next_insn->op1 = insn->op1;
981
0
    ir_use_list_replace_one(ctx, insn->op1, ref, next);
982
    /* remove IF and IF_TRUE/FALSE instructions */
983
0
    ir_sccp_make_nop(ctx, ref);
984
0
    ir_sccp_make_nop(ctx, dst);
985
0
  } else {
986
0
    insn->op2 = IR_UNUSED;
987
0
    insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
988
0
    next_insn = &ctx->ir_base[dst];
989
0
    next_insn->op = IR_BEGIN;
990
0
  }
991
0
}
992
993
static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
994
0
{
995
0
  ir_ref old_merge_inputs, new_merge_inputs, i, *p;
996
0
  ir_use_list *use_list;
997
0
  ir_bitset life_inputs;
998
0
  ir_bitset_base_t holder = 0;
999
1000
0
  IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
1001
0
  old_merge_inputs = insn->inputs_count;
1002
0
  new_merge_inputs = 0;
1003
0
  life_inputs = (old_merge_inputs < IR_BITSET_BITS) ? &holder : ir_bitset_malloc(old_merge_inputs + 1);
1004
1005
0
  for (i = 1; i <= old_merge_inputs; i++) {
1006
0
    ir_ref input = ir_insn_op(insn, i);
1007
1008
0
    if (input) {
1009
0
      new_merge_inputs++;
1010
0
      if (new_merge_inputs != i) {
1011
0
        ir_insn_set_op(insn, new_merge_inputs, input);
1012
0
      }
1013
0
      ir_bitset_incl(life_inputs, i);
1014
0
    }
1015
0
  }
1016
1017
0
  if (new_merge_inputs == old_merge_inputs) {
1018
    /* All inputs are feasible */
1019
0
    if (life_inputs != &holder) {
1020
0
      ir_mem_free(life_inputs);
1021
0
    }
1022
0
    return 0;
1023
0
  }
1024
1025
0
  for (i = new_merge_inputs + 1; i <= old_merge_inputs; i++) {
1026
0
    ir_insn_set_op(insn, i, IR_UNUSED);
1027
0
  }
1028
1029
0
  if (new_merge_inputs <= 1) {
1030
#if 0
1031
    if (new_merge_inputs == 1
1032
     && insn->op == IR_LOOP_BEGIN
1033
     && insn->op1 > ref) { // TODO: check dominance instead of order
1034
      /* dead loop */
1035
      ir_use_list_remove_one(ctx, insn->op1, ref);
1036
      insn->op1 = IR_UNUSED;
1037
      new_merge_inputs = 0;
1038
    }
1039
#endif
1040
0
    insn->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1041
0
    ir_bitqueue_add(worklist, ref);
1042
0
  } else {
1043
0
    insn->inputs_count = new_merge_inputs;
1044
0
  }
1045
1046
  /* Update PHIs */
1047
0
  use_list = &ctx->use_lists[ref];
1048
0
  if (use_list->count > 1) {
1049
0
    ir_ref use_count = 0;
1050
0
    ir_ref *q;
1051
1052
0
    for (i = 0, p = q = &ctx->use_edges[use_list->refs]; i < use_list->count; p++, i++) {
1053
0
      ir_ref use = *p;
1054
0
      ir_insn *use_insn = &ctx->ir_base[use];
1055
1056
0
      if (use_insn->op == IR_PHI) {
1057
0
        ir_ref j, k;
1058
1059
        /* compress PHI */
1060
0
        for (j = k = 1; j <= old_merge_inputs; j++) {
1061
0
          ir_ref input = ir_insn_op(use_insn, j + 1);
1062
1063
0
          if (ir_bitset_in(life_inputs, j)) {
1064
0
            IR_ASSERT(input);
1065
0
            if (k != j) {
1066
0
              ir_insn_set_op(use_insn, k + 1, input);
1067
0
            }
1068
0
            k++;
1069
0
          } else if (input > 0) {
1070
0
            ir_use_list_remove_one(ctx, input, use);
1071
0
          }
1072
0
        }
1073
0
        while (k <= old_merge_inputs) {
1074
0
          k++;
1075
0
          ir_insn_set_op(use_insn, k, IR_UNUSED);
1076
0
        }
1077
1078
0
        if (new_merge_inputs == 0) {
1079
          /* remove PHI */
1080
#if 0
1081
          use_insn->op1 = IR_UNUSED;
1082
          ir_iter_remove_insn(ctx, use, worklist);
1083
#else
1084
0
          IR_ASSERT(0);
1085
0
#endif
1086
0
          continue;
1087
0
        } else if (new_merge_inputs == 1) {
1088
          /* replace PHI by COPY */
1089
0
          use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1);
1090
0
          use_insn->op1 = use_insn->op2;
1091
0
          use_insn->op2 = IR_UNUSED;
1092
0
          ir_bitqueue_add(worklist, use);
1093
0
          continue;
1094
0
        } else {
1095
0
          use_insn->inputs_count = new_merge_inputs + 1;
1096
0
        }
1097
0
      }
1098
0
      if (p != q) {
1099
0
        *q = use;
1100
0
      }
1101
0
      q++;
1102
0
      use_count++;
1103
0
    }
1104
0
    for (i = use_count; i < use_list->count; q++, i++) {
1105
0
      *q = IR_UNUSED;
1106
0
    }
1107
0
    use_list->count = use_count;
1108
0
  }
1109
1110
0
  if (life_inputs != &holder) {
1111
0
    ir_mem_free(life_inputs);
1112
0
  }
1113
1114
0
  return 1;
1115
0
}
1116
1117
static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
1118
0
{
1119
0
  ir_ref i, j;
1120
0
  const ir_sccp_val *value;
1121
1122
0
  for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
1123
0
    if (value->op == IR_BOTTOM) {
1124
0
      continue;
1125
0
    } else if (IR_IS_CONST_OP(value->op)) {
1126
      /* replace instruction by constant */
1127
0
      j = ir_const(ctx, value->val, value->type);
1128
0
      ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1129
0
    } else if (IR_IS_SYM_CONST(value->op)) {
1130
      /* replace instruction by constant */
1131
0
      j = ir_const_ex(ctx, value->val, value->type, value->optx);
1132
0
      ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1133
0
#if IR_COMBO_COPY_PROPAGATION
1134
0
    } else if (value->op == IR_COPY) {
1135
0
      ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->copy), iter_worklist);
1136
0
#endif
1137
0
    } else if (value->op == IR_TOP) {
1138
      /* remove unreachable instruction */
1139
0
      ir_insn *insn = &ctx->ir_base[i];
1140
1141
0
      if (insn->op == IR_NOP) {
1142
        /* already removed */
1143
0
      } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
1144
0
        if (insn->op != IR_PARAM) {
1145
0
          ir_sccp_remove_insn(ctx, _values, i, iter_worklist);
1146
0
        }
1147
0
      } else {
1148
0
        if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) {
1149
          /* remove from terminators list */
1150
0
          ir_ref prev = ctx->ir_base[1].op1;
1151
0
          if (prev == i) {
1152
0
            ctx->ir_base[1].op1 = insn->op3;
1153
0
          } else {
1154
0
            while (prev) {
1155
0
              if (ctx->ir_base[prev].op3 == i) {
1156
0
                ctx->ir_base[prev].op3 = insn->op3;
1157
0
                break;
1158
0
              }
1159
0
              prev = ctx->ir_base[prev].op3;
1160
0
            }
1161
0
          }
1162
0
        }
1163
0
        ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist);
1164
0
      }
1165
0
    } else if (value->op == IR_IF) {
1166
      /* remove one way IF/SWITCH */
1167
0
      ir_sccp_remove_if(ctx, _values, i, value->single_output);
1168
0
    } else if (value->op == IR_MERGE) {
1169
      /* schedule merge to remove unfeasible MERGE inputs */
1170
0
      ir_bitqueue_add(worklist, i);
1171
0
    }
1172
0
  }
1173
1174
0
  while ((i = ir_bitqueue_pop(worklist)) >= 0) {
1175
0
    IR_ASSERT(_values[i].op == IR_MERGE);
1176
0
    ir_sccp_remove_unfeasible_merge_inputs(ctx, i, &ctx->ir_base[i], iter_worklist);
1177
0
  }
1178
0
}
1179
1180
/***************************/
1181
/* Iterative Optimizations */
1182
/***************************/
1183
1184
void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1185
0
{
1186
0
  ir_use_list *use_list = &ctx->use_lists[ref];
1187
0
  ir_ref *p, n = use_list->count;
1188
1189
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1190
0
    ir_bitqueue_add(worklist, *p);
1191
0
  }
1192
0
}
1193
1194
/* Modification of some instruction may open new optimization oprtunities for other
1195
 * instructions that use this one.
1196
 *
1197
 * For example, let "a = ADD(x, y)" became "a = ADD(x, C1)". In case we also have
1198
 * "b = ADD(a, C2)" we may optimize it into "b = ADD(x, C1 + C2)" and then might
1199
 * also remove "a".
1200
 *
1201
 * This implementation supports only few optimization of combinations from ir_fold.h
1202
 *
1203
 * TODO: Think abput a more general solution ???
1204
 */
1205
static void ir_iter_add_related_uses(const ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1206
0
{
1207
0
  const ir_insn *insn = &ctx->ir_base[ref];
1208
1209
0
  if (insn->op == IR_ADD || insn->op == IR_SUB) {
1210
0
    const ir_use_list *use_list = &ctx->use_lists[ref];
1211
1212
0
    if (use_list->count == 1) {
1213
0
      ir_ref use = ctx->use_edges[use_list->refs];
1214
0
      const ir_insn *use_insn = &ctx->ir_base[ref];
1215
1216
0
      if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) {
1217
0
        ir_bitqueue_add(worklist, use);
1218
0
      }
1219
0
    }
1220
0
  }
1221
0
}
1222
1223
static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1224
0
{
1225
0
  ir_ref j, n, *p;
1226
0
  ir_insn *insn;
1227
1228
0
  CLEAR_USES(ref);
1229
0
  insn = &ctx->ir_base[ref];
1230
0
  n = insn->inputs_count;
1231
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
1232
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
1233
0
    ir_ref input = *p;
1234
0
    *p = IR_UNUSED;
1235
0
    if (input > 0) {
1236
0
      ir_use_list_remove_all(ctx, input, ref);
1237
0
      if (ir_is_dead(ctx, input)) {
1238
        /* schedule DCE */
1239
0
        ir_bitqueue_add(worklist, input);
1240
0
      } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1241
        /* try to optimize PHI into ABS/MIN/MAX/COND */
1242
0
        ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1243
0
      }
1244
0
    }
1245
0
  }
1246
0
}
1247
1248
void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1249
0
{
1250
0
  ir_ref i, j, n, *p, use;
1251
0
  ir_insn *insn;
1252
0
  ir_use_list *use_list;
1253
1254
0
  IR_ASSERT(ref != new_ref);
1255
1256
0
  use_list = &ctx->use_lists[ref];
1257
0
  n = use_list->count;
1258
0
  p = &ctx->use_edges[use_list->refs];
1259
0
  if (new_ref <= 0) {
1260
    /* constant or IR_UNUSED */
1261
0
    for (; n; p++, n--) {
1262
0
      use = *p;
1263
0
      IR_ASSERT(use != ref);
1264
0
      insn = &ctx->ir_base[use];
1265
0
      i = ir_insn_find_op(insn, ref);
1266
0
      IR_ASSERT(i > 0);
1267
0
      ir_insn_set_op(insn, i, new_ref);
1268
      /* schedule folding */
1269
0
      ir_bitqueue_add(worklist, use);
1270
0
      ir_iter_add_related_uses(ctx, use, worklist);
1271
0
    }
1272
0
  } else {
1273
0
    for (j = 0; j < n; j++, p++) {
1274
0
      use = *p;
1275
0
      IR_ASSERT(use != ref);
1276
0
      insn = &ctx->ir_base[use];
1277
0
      i = ir_insn_find_op(insn, ref);
1278
0
      IR_ASSERT(i > 0);
1279
0
      ir_insn_set_op(insn, i, new_ref);
1280
0
      if (ir_use_list_add(ctx, new_ref, use)) {
1281
        /* restore after reallocation */
1282
0
        use_list = &ctx->use_lists[ref];
1283
0
        n = use_list->count;
1284
0
        p = &ctx->use_edges[use_list->refs + j];
1285
0
      }
1286
      /* schedule folding */
1287
0
      ir_bitqueue_add(worklist, use);
1288
0
    }
1289
0
  }
1290
0
}
1291
1292
static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1293
0
{
1294
0
  ir_ref j, n, *p;
1295
0
  ir_insn *insn;
1296
1297
0
  insn = &ctx->ir_base[ref];
1298
0
  n = insn->inputs_count;
1299
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
1300
0
  for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
1301
0
    ir_ref input = *p;
1302
0
    *p = IR_UNUSED;
1303
0
    if (input > 0) {
1304
0
      ir_use_list_remove_all(ctx, input, ref);
1305
0
      if (ir_is_dead(ctx, input)) {
1306
        /* schedule DCE */
1307
0
        ir_bitqueue_add(worklist, input);
1308
0
      } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1309
        /* try to optimize PHI into ABS/MIN/MAX/COND */
1310
0
        ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1311
0
      }
1312
0
    }
1313
0
  }
1314
1315
0
  ir_iter_replace(ctx, ref, new_ref, worklist);
1316
1317
0
  CLEAR_USES(ref);
1318
0
}
1319
1320
void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist)
1321
0
{
1322
0
  ir_insn *insn = &ctx->ir_base[ref];
1323
0
  ir_ref old_val = ir_insn_op(insn, idx);
1324
1325
0
  IR_ASSERT(old_val != new_val);
1326
0
  if (!IR_IS_CONST_REF(new_val)) {
1327
0
    ir_use_list_add(ctx, new_val, ref);
1328
0
  }
1329
0
  ir_insn_set_op(insn, idx, new_val);
1330
0
  if (!IR_IS_CONST_REF(old_val)) {
1331
0
    ir_use_list_remove_one(ctx, old_val, ref);
1332
0
    if (ir_is_dead(ctx, old_val)) {
1333
      /* schedule DCE */
1334
0
      ir_bitqueue_add(worklist, old_val);
1335
0
    }
1336
0
  }
1337
0
}
1338
1339
static ir_ref ir_iter_find_cse1(const ir_ctx *ctx, uint32_t optx, ir_ref op1)
1340
0
{
1341
0
  IR_ASSERT(!IR_IS_CONST_REF(op1));
1342
1343
0
  const ir_use_list *use_list = &ctx->use_lists[op1];
1344
0
  const ir_ref *p;
1345
0
  ir_ref n = use_list->count;
1346
1347
0
  for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1348
0
    ir_ref use = *p;
1349
0
    const ir_insn *use_insn = &ctx->ir_base[use];
1350
1351
0
    if (use_insn->optx == optx) {
1352
0
      IR_ASSERT(use_insn->op1 == op1);
1353
0
      return use;
1354
0
    }
1355
0
  }
1356
0
  return IR_UNUSED;
1357
0
}
1358
1359
static ir_ref ir_iter_find_cse(const ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
1360
0
{
1361
0
  uint32_t n = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1362
0
  const ir_use_list *use_list = NULL;
1363
0
  const ir_ref *p;
1364
0
  ir_ref use;
1365
0
  const ir_insn *use_insn;
1366
1367
0
  if (n == 2) {
1368
0
    if (!IR_IS_CONST_REF(op1)) {
1369
0
      use_list = &ctx->use_lists[op1];
1370
0
    }
1371
0
    if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1372
0
      use_list = &ctx->use_lists[op2];
1373
0
    }
1374
0
    if (use_list) {
1375
0
      n = use_list->count;
1376
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1377
0
        use = *p;
1378
0
        if (use != ref) {
1379
0
          use_insn = &ctx->ir_base[use];
1380
0
          if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2) {
1381
0
            IR_ASSERT(use_insn->op3 == op3);
1382
0
            if (use < ref) {
1383
0
              return use;
1384
0
            } else {
1385
0
              ir_bitqueue_add(worklist, use);
1386
0
            }
1387
0
          }
1388
0
        }
1389
0
      }
1390
0
    }
1391
0
   } else if (n < 2) {
1392
0
    if (op1 > 0) {
1393
0
      IR_ASSERT(n == 1);
1394
0
      use_list = &ctx->use_lists[op1];
1395
0
      n = use_list->count;
1396
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1397
0
        use = *p;
1398
0
        if (use != ref) {
1399
0
          use_insn = &ctx->ir_base[use];
1400
0
          if (use_insn->opt == opt) {
1401
0
            IR_ASSERT(use_insn->op1 == op1);
1402
0
            IR_ASSERT(use_insn->op2 == op2);
1403
0
            IR_ASSERT(use_insn->op3 == op3);
1404
0
            if (use < ref) {
1405
0
              return use;
1406
0
            } else {
1407
0
              ir_bitqueue_add(worklist, use);
1408
0
            }
1409
0
          }
1410
0
        }
1411
0
      }
1412
0
    }
1413
0
  } else {
1414
0
    IR_ASSERT(n == 3);
1415
0
    if (!IR_IS_CONST_REF(op1)) {
1416
0
      use_list = &ctx->use_lists[op1];
1417
0
    }
1418
0
    if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1419
0
      use_list = &ctx->use_lists[op2];
1420
0
    }
1421
0
    if (!IR_IS_CONST_REF(op3) && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
1422
0
      use_list = &ctx->use_lists[op3];
1423
0
    }
1424
0
    if (use_list) {
1425
0
      n = use_list->count;
1426
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1427
0
        use = *p;
1428
0
        if (use != ref) {
1429
0
          use_insn = &ctx->ir_base[use];
1430
0
          if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2 && use_insn->op3 == op3) {
1431
0
            if (use < ref) {
1432
0
              return use;
1433
0
            } else {
1434
0
              ir_bitqueue_add(worklist, use);
1435
0
            }
1436
0
          }
1437
0
        }
1438
0
      }
1439
0
    }
1440
0
  }
1441
0
  return IR_UNUSED;
1442
0
}
1443
1444
static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1445
0
{
1446
0
  uint32_t opt;
1447
0
  ir_ref op1, op2, op3, copy;
1448
0
  const ir_insn *op1_insn, *op2_insn, *op3_insn;
1449
0
  ir_insn *insn;
1450
1451
0
  insn = &ctx->ir_base[ref];
1452
0
  opt = insn->opt;
1453
0
  op1 = insn->op1;
1454
0
  op2 = insn->op2;
1455
0
  op3 = insn->op3;
1456
1457
0
restart:
1458
0
  op1_insn = ctx->ir_base + op1;
1459
0
  op2_insn = ctx->ir_base + op2;
1460
0
  op3_insn = ctx->ir_base + op3;
1461
1462
0
  switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
1463
0
    case IR_FOLD_DO_RESTART:
1464
0
      opt = ctx->fold_insn.optx;
1465
0
      op1 = ctx->fold_insn.op1;
1466
0
      op2 = ctx->fold_insn.op2;
1467
0
      op3 = ctx->fold_insn.op3;
1468
0
      goto restart;
1469
0
    case IR_FOLD_DO_CSE:
1470
0
      copy = ir_iter_find_cse(ctx, ref, ctx->fold_insn.opt,
1471
0
        ctx->fold_insn.op1, ctx->fold_insn.op2, ctx->fold_insn.op3, worklist);
1472
0
      if (copy) {
1473
0
        ir_iter_replace_insn(ctx, ref, copy, worklist);
1474
0
        break;
1475
0
      }
1476
0
      IR_FALLTHROUGH;
1477
0
    case IR_FOLD_DO_EMIT:
1478
0
      insn = &ctx->ir_base[ref];
1479
0
      if (insn->opt != ctx->fold_insn.opt
1480
0
       || insn->op1 != ctx->fold_insn.op1
1481
0
       || insn->op2 != ctx->fold_insn.op2
1482
0
       || insn->op3 != ctx->fold_insn.op3) {
1483
1484
0
        insn->optx = ctx->fold_insn.opt;
1485
0
        IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
1486
0
        insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1487
0
        if (insn->op1 != ctx->fold_insn.op1) {
1488
0
          if (insn->op1 > 0) {
1489
0
            ir_use_list_remove_one(ctx, insn->op1, ref);
1490
0
          }
1491
0
          if (ctx->fold_insn.op1 > 0) {
1492
0
            ir_use_list_add(ctx, ctx->fold_insn.op1, ref);
1493
0
          }
1494
0
        }
1495
0
        if (insn->op2 != ctx->fold_insn.op2) {
1496
0
          if (insn->op2 > 0) {
1497
0
            ir_use_list_remove_one(ctx, insn->op2, ref);
1498
0
          }
1499
0
          if (ctx->fold_insn.op2 > 0) {
1500
0
            ir_use_list_add(ctx, ctx->fold_insn.op2, ref);
1501
0
          }
1502
0
        }
1503
0
        if (insn->op3 != ctx->fold_insn.op3) {
1504
0
          if (insn->op3 > 0) {
1505
0
            ir_use_list_remove_one(ctx, insn->op3, ref);
1506
0
          }
1507
0
          if (ctx->fold_insn.op3 > 0) {
1508
0
            ir_use_list_add(ctx, ctx->fold_insn.op3, ref);
1509
0
          }
1510
0
        }
1511
0
        insn->op1 = ctx->fold_insn.op1;
1512
0
        insn->op2 = ctx->fold_insn.op2;
1513
0
        insn->op3 = ctx->fold_insn.op3;
1514
1515
0
        ir_iter_add_uses(ctx, ref, worklist);
1516
0
      }
1517
0
      break;
1518
0
    case IR_FOLD_DO_COPY:
1519
0
      op1 = ctx->fold_insn.op1;
1520
0
      ir_iter_replace_insn(ctx, ref, op1, worklist);
1521
0
      break;
1522
0
    case IR_FOLD_DO_CONST:
1523
0
      op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type);
1524
0
      ir_iter_replace_insn(ctx, ref, op1, worklist);
1525
0
      break;
1526
0
    default:
1527
0
      IR_ASSERT(0);
1528
0
      break;
1529
0
  }
1530
0
}
1531
1532
static bool ir_may_promote_d2f(const ir_ctx *ctx, ir_ref ref)
1533
0
{
1534
0
  const ir_insn *insn = &ctx->ir_base[ref];
1535
1536
0
  IR_ASSERT(insn->type == IR_DOUBLE);
1537
0
  if (IR_IS_CONST_REF(ref)) {
1538
0
    return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d;
1539
0
  } else {
1540
0
    switch (insn->op) {
1541
0
      case IR_FP2FP:
1542
0
        return 1;
1543
//      case IR_INT2FP:
1544
//        return ctx->use_lists[ref].count == 1;
1545
0
      case IR_NEG:
1546
0
      case IR_ABS:
1547
0
        return ctx->use_lists[ref].count == 1 &&
1548
0
          ir_may_promote_d2f(ctx, insn->op1);
1549
0
      case IR_ADD:
1550
0
      case IR_SUB:
1551
0
      case IR_MUL:
1552
0
      case IR_DIV:
1553
0
      case IR_MIN:
1554
0
      case IR_MAX:
1555
0
        return ctx->use_lists[ref].count == 1 &&
1556
0
          ir_may_promote_d2f(ctx, insn->op1) &&
1557
0
          ir_may_promote_d2f(ctx, insn->op2);
1558
0
      default:
1559
0
        break;
1560
0
    }
1561
0
  }
1562
0
  return 0;
1563
0
}
1564
1565
static bool ir_may_promote_f2d(const ir_ctx *ctx, ir_ref ref)
1566
0
{
1567
0
  const ir_insn *insn = &ctx->ir_base[ref];
1568
1569
0
  IR_ASSERT(insn->type == IR_FLOAT);
1570
0
  if (IR_IS_CONST_REF(ref)) {
1571
0
    return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f;
1572
0
  } else {
1573
0
    switch (insn->op) {
1574
0
      case IR_FP2FP:
1575
0
        return 1;
1576
//      case IR_INT2FP:
1577
//        return ctx->use_lists[ref].count == 1;
1578
0
      case IR_NEG:
1579
0
      case IR_ABS:
1580
0
        return ctx->use_lists[ref].count == 1 &&
1581
0
          ir_may_promote_f2d(ctx, insn->op1);
1582
0
      case IR_ADD:
1583
0
      case IR_SUB:
1584
0
      case IR_MUL:
1585
//      case IR_DIV:
1586
0
      case IR_MIN:
1587
0
      case IR_MAX:
1588
0
        return ctx->use_lists[ref].count == 1 &&
1589
0
          ir_may_promote_f2d(ctx, insn->op1) &&
1590
0
          ir_may_promote_f2d(ctx, insn->op2);
1591
0
      default:
1592
0
        break;
1593
0
    }
1594
0
  }
1595
0
  return 0;
1596
0
}
1597
1598
static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1599
0
{
1600
0
  ir_insn *insn = &ctx->ir_base[ref];
1601
0
  uint32_t count;
1602
1603
0
  IR_ASSERT(insn->type == IR_DOUBLE);
1604
0
  if (IR_IS_CONST_REF(ref)) {
1605
0
    return ir_const_float(ctx, (float)insn->val.d);
1606
0
  } else {
1607
0
    ir_bitqueue_add(worklist, ref);
1608
0
    switch (insn->op) {
1609
0
      case IR_FP2FP:
1610
0
        count = ctx->use_lists[ref].count;
1611
0
        ir_use_list_remove_all(ctx, ref, use);
1612
0
        if (ctx->use_lists[ref].count == 0) {
1613
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1614
0
          if (count > 1) {
1615
0
            do {
1616
0
              ir_use_list_add(ctx, insn->op1, use);
1617
0
            } while (--count > 1);
1618
0
          }
1619
0
          ref = insn->op1;
1620
0
          MAKE_NOP(insn);
1621
0
          return ref;
1622
0
        } else {
1623
0
          ir_use_list_add(ctx, insn->op1, use);
1624
0
          count -= ctx->use_lists[ref].count;
1625
0
          if (count > 1) {
1626
0
            do {
1627
0
              ir_use_list_add(ctx, insn->op1, use);
1628
0
            } while (--count > 1);
1629
0
          }
1630
0
        }
1631
0
        return insn->op1;
1632
//      case IR_INT2FP:
1633
//        insn->type = IR_FLOAT;
1634
//        return ref;
1635
0
      case IR_NEG:
1636
0
      case IR_ABS:
1637
0
        insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1638
0
        insn->type = IR_FLOAT;
1639
0
        return ref;
1640
0
      case IR_ADD:
1641
0
      case IR_SUB:
1642
0
      case IR_MUL:
1643
0
      case IR_DIV:
1644
0
      case IR_MIN:
1645
0
      case IR_MAX:
1646
0
        if (insn->op1 == insn->op2) {
1647
0
          insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1648
0
        } else {
1649
0
          insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1650
0
          insn->op2 = ir_promote_d2f(ctx, insn->op2, ref, worklist);
1651
0
        }
1652
0
        insn->type = IR_FLOAT;
1653
0
        return ref;
1654
0
      default:
1655
0
        break;
1656
0
    }
1657
0
  }
1658
0
  IR_ASSERT(0);
1659
0
  return ref;
1660
0
}
1661
1662
static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1663
0
{
1664
0
  ir_insn *insn = &ctx->ir_base[ref];
1665
0
  uint32_t count;
1666
0
  ir_ref old_ref;
1667
1668
0
  IR_ASSERT(insn->type == IR_FLOAT);
1669
0
  if (IR_IS_CONST_REF(ref)) {
1670
0
    return ir_const_double(ctx, (double)insn->val.f);
1671
0
  } else {
1672
0
    ir_bitqueue_add(worklist, ref);
1673
0
    switch (insn->op) {
1674
0
      case IR_FP2FP:
1675
0
        count = ctx->use_lists[ref].count;
1676
0
        ir_use_list_remove_all(ctx, ref, use);
1677
0
        if (ctx->use_lists[ref].count == 0) {
1678
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1679
0
          if (count > 1) {
1680
0
            do {
1681
0
              ir_use_list_add(ctx, insn->op1, use);
1682
0
            } while (--count > 1);
1683
0
          }
1684
0
          ref = insn->op1;
1685
0
          MAKE_NOP(insn);
1686
0
          return ref;
1687
0
        } else {
1688
0
          ir_use_list_add(ctx, insn->op1, use);
1689
0
          count -= ctx->use_lists[ref].count;
1690
0
          if (count > 1) {
1691
0
            do {
1692
0
              ir_use_list_add(ctx, insn->op1, use);
1693
0
            } while (--count > 1);
1694
0
          }
1695
0
        }
1696
0
        return insn->op1;
1697
0
      case IR_INT2FP:
1698
0
        old_ref = ir_iter_find_cse1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1);
1699
0
        if (old_ref) {
1700
0
          IR_ASSERT(ctx->use_lists[ref].count == 1);
1701
0
          ir_use_list_remove_one(ctx, insn->op1, ref);
1702
0
          CLEAR_USES(ref);
1703
0
          MAKE_NOP(insn);
1704
0
          ir_use_list_add(ctx, old_ref, use);
1705
0
          return old_ref;
1706
0
        }
1707
0
        insn->type = IR_DOUBLE;
1708
0
        return ref;
1709
0
      case IR_NEG:
1710
0
      case IR_ABS:
1711
0
        insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1712
0
        insn->type = IR_DOUBLE;
1713
0
        return ref;
1714
0
      case IR_ADD:
1715
0
      case IR_SUB:
1716
0
      case IR_MUL:
1717
//      case IR_DIV:
1718
0
      case IR_MIN:
1719
0
      case IR_MAX:
1720
0
        if (insn->op1 == insn->op2) {
1721
0
          insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1722
0
        } else {
1723
0
          insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1724
0
          insn->op2 = ir_promote_f2d(ctx, insn->op2, ref, worklist);
1725
0
        }
1726
0
        insn->type = IR_DOUBLE;
1727
0
        return ref;
1728
0
      default:
1729
0
        break;
1730
0
    }
1731
0
  }
1732
0
  IR_ASSERT(0);
1733
0
  return ref;
1734
0
}
1735
1736
static bool ir_may_promote_trunc(const ir_ctx *ctx, ir_type type, ir_ref ref)
1737
0
{
1738
0
  const ir_insn *insn = &ctx->ir_base[ref];
1739
0
  const ir_ref *p;
1740
0
  ir_ref n, input;
1741
1742
0
  if (IR_IS_CONST_REF(ref)) {
1743
0
    return !IR_IS_SYM_CONST(insn->op);
1744
0
  } else {
1745
0
    switch (insn->op) {
1746
0
      case IR_ZEXT:
1747
0
      case IR_SEXT:
1748
0
      case IR_TRUNC:
1749
0
        return ctx->ir_base[insn->op1].type == type || ctx->use_lists[ref].count == 1;
1750
0
      case IR_NEG:
1751
0
      case IR_ABS:
1752
0
      case IR_NOT:
1753
0
        return ctx->use_lists[ref].count == 1 &&
1754
0
          ir_may_promote_trunc(ctx, type, insn->op1);
1755
0
      case IR_ADD:
1756
0
      case IR_SUB:
1757
0
      case IR_MUL:
1758
0
      case IR_MIN:
1759
0
      case IR_MAX:
1760
0
      case IR_OR:
1761
0
      case IR_AND:
1762
0
      case IR_XOR:
1763
0
      case IR_SHL:
1764
0
        return ctx->use_lists[ref].count == 1 &&
1765
0
          ir_may_promote_trunc(ctx, type, insn->op1) &&
1766
0
          ir_may_promote_trunc(ctx, type, insn->op2);
1767
//      case IR_SHR:
1768
//      case IR_SAR:
1769
//      case IR_DIV:
1770
//      case IR_MOD:
1771
//      case IR_FP2INT:
1772
//        TODO: ???
1773
0
      case IR_COND:
1774
0
        return ctx->use_lists[ref].count == 1 &&
1775
0
          ir_may_promote_trunc(ctx, type, insn->op2) &&
1776
0
          ir_may_promote_trunc(ctx, type, insn->op3);
1777
0
      case IR_PHI:
1778
0
        if (ctx->use_lists[ref].count != 1) {
1779
0
          ir_use_list *use_list = &ctx->use_lists[ref];
1780
0
          ir_ref count = 0;
1781
1782
0
          for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
1783
0
            if (*p != ref) {
1784
0
              if (count) {
1785
0
                return 0;
1786
0
              }
1787
0
              count = 1;
1788
0
            }
1789
0
          }
1790
0
        }
1791
0
        for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1792
0
          input = *p;
1793
0
          if (input != ref) {
1794
0
            if (!ir_may_promote_trunc(ctx, type, input)) {
1795
0
              return 0;
1796
0
            }
1797
0
          }
1798
0
        }
1799
0
        return 1;
1800
0
      default:
1801
0
        break;
1802
0
    }
1803
0
  }
1804
0
  return 0;
1805
0
}
1806
1807
static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1808
0
{
1809
0
  ir_insn *insn = &ctx->ir_base[ref];
1810
0
  uint32_t count;
1811
0
  ir_ref *p, n, input;
1812
1813
0
  if (IR_IS_CONST_REF(ref)) {
1814
0
    ir_val val;
1815
1816
0
    switch (type) {
1817
0
      case IR_I8:  val.i64 = insn->val.i8; break;
1818
0
      case IR_U8:  val.u64 = insn->val.u8; break;
1819
0
      case IR_I16: val.i64 = insn->val.i16; break;
1820
0
      case IR_U16: val.u64 = insn->val.u16; break;
1821
0
      case IR_I32: val.i64 = insn->val.i32; break;
1822
0
      case IR_U32: val.u64 = insn->val.u32; break;
1823
0
      case IR_CHAR:val.i64 = insn->val.i8; break;
1824
0
      case IR_BOOL:val.u64 = insn->val.u8 != 0; break;
1825
0
      default: IR_ASSERT(0); val.u64 = 0;
1826
0
    }
1827
0
    return ir_const(ctx, val, type);
1828
0
  } else {
1829
0
    ir_bitqueue_add(worklist, ref);
1830
0
    switch (insn->op) {
1831
0
      case IR_ZEXT:
1832
0
      case IR_SEXT:
1833
0
      case IR_TRUNC:
1834
0
        if (ctx->ir_base[insn->op1].type != type) {
1835
0
          ir_type src_type = ctx->ir_base[insn->op1].type;
1836
0
          if (ir_type_size[src_type] == ir_type_size[type]) {
1837
0
            insn->op = IR_BITCAST;
1838
0
          } else if (ir_type_size[src_type] > ir_type_size[type]) {
1839
0
            insn->op = IR_TRUNC;
1840
0
          } else {
1841
0
            if (insn->op != IR_SEXT && insn->op != IR_ZEXT) {
1842
0
              insn->op = IR_IS_TYPE_SIGNED(type) ? IR_SEXT : IR_ZEXT;
1843
0
            }
1844
0
          }
1845
0
          insn->type = type;
1846
0
          ir_iter_add_uses(ctx, ref, worklist);
1847
0
          return ref;
1848
0
        }
1849
1850
0
        count = ctx->use_lists[ref].count;
1851
0
        ir_use_list_remove_all(ctx, ref, use);
1852
0
        if (ctx->use_lists[ref].count == 0) {
1853
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1854
0
          if (count > 1) {
1855
0
            do {
1856
0
              ir_use_list_add(ctx, insn->op1, use);
1857
0
            } while (--count > 1);
1858
0
          }
1859
0
          ref = insn->op1;
1860
0
          MAKE_NOP(insn);
1861
0
          return ref;
1862
0
        } else {
1863
0
          ir_use_list_add(ctx, insn->op1, use);
1864
0
          count -= ctx->use_lists[ref].count;
1865
0
          if (count > 1) {
1866
0
            do {
1867
0
              ir_use_list_add(ctx, insn->op1, use);
1868
0
            } while (--count > 1);
1869
0
          }
1870
0
        }
1871
0
        return insn->op1;
1872
0
      case IR_NEG:
1873
0
      case IR_ABS:
1874
0
      case IR_NOT:
1875
0
        insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1876
0
        insn->type = type;
1877
0
        return ref;
1878
0
      case IR_ADD:
1879
0
      case IR_SUB:
1880
0
      case IR_MUL:
1881
0
      case IR_MIN:
1882
0
      case IR_MAX:
1883
0
      case IR_OR:
1884
0
      case IR_AND:
1885
0
      case IR_XOR:
1886
0
      case IR_SHL:
1887
0
        if (insn->op1 == insn->op2) {
1888
0
          insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1889
0
        } else {
1890
0
          insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1891
0
          insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1892
0
        }
1893
0
        insn->type = type;
1894
0
        return ref;
1895
//      case IR_DIV:
1896
//      case IR_MOD:
1897
//      case IR_SHR:
1898
//      case IR_SAR:
1899
//      case IR_FP2INT:
1900
//        TODO: ???
1901
0
      case IR_COND:
1902
0
        if (insn->op2 == insn->op3) {
1903
0
          insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1904
0
        } else {
1905
0
          insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1906
0
          insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist);
1907
0
        }
1908
0
        insn->type = type;
1909
0
        return ref;
1910
0
      case IR_PHI:
1911
0
        for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1912
0
          input = *p;
1913
0
          if (input != ref) {
1914
0
            *p = ir_promote_i2i(ctx, type, input, ref, worklist);
1915
0
          }
1916
0
        }
1917
0
        insn->type = type;
1918
0
        return ref;
1919
0
      default:
1920
0
        break;
1921
0
    }
1922
0
  }
1923
0
  IR_ASSERT(0);
1924
0
  return ref;
1925
0
}
1926
1927
static ir_ref ir_ext_const(ir_ctx *ctx, const ir_insn *val_insn, ir_op op, ir_type type)
1928
0
{
1929
0
  ir_val new_val;
1930
1931
0
  switch (val_insn->type) {
1932
0
    default:
1933
0
      IR_ASSERT(0);
1934
0
    case IR_I8:
1935
0
    case IR_U8:
1936
0
    case IR_BOOL:
1937
0
    case IR_CHAR:
1938
0
      if (op == IR_SEXT) {
1939
0
        new_val.i64 = (int64_t)val_insn->val.i8;
1940
0
      } else {
1941
0
        new_val.u64 = (uint64_t)val_insn->val.u8;
1942
0
      }
1943
0
      break;
1944
0
    case IR_I16:
1945
0
    case IR_U16:
1946
0
      if (op == IR_SEXT) {
1947
0
        new_val.i64 = (int64_t)val_insn->val.i16;
1948
0
      } else {
1949
0
        new_val.u64 = (uint64_t)val_insn->val.u16;
1950
0
      }
1951
0
      break;
1952
0
    case IR_I32:
1953
0
    case IR_U32:
1954
0
      if (op == IR_SEXT) {
1955
0
        new_val.i64 = (int64_t)val_insn->val.i32;
1956
0
      } else {
1957
0
        new_val.u64 = (uint64_t)val_insn->val.u32;
1958
0
      }
1959
0
      break;
1960
0
  }
1961
0
  return ir_const(ctx, new_val, type);
1962
0
}
1963
1964
static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist)
1965
0
{
1966
0
  uint32_t optx = IR_OPTX(op, type, 1);
1967
0
  ir_ref ref;
1968
1969
0
  if (!IR_IS_CONST_REF(src_ref)) {
1970
0
    ref = ir_iter_find_cse1(ctx, optx, src_ref);
1971
0
    if (ref) {
1972
0
      ir_use_list_add(ctx, ref, var_ref);
1973
0
      if (!IR_IS_CONST_REF(src_ref)) {
1974
0
        ir_use_list_remove_one(ctx, src_ref, var_ref);
1975
0
      }
1976
0
      ir_bitqueue_add(worklist, ref);
1977
0
      return ref;
1978
0
    }
1979
0
  }
1980
1981
0
  ref = ir_emit1(ctx, optx, src_ref);
1982
0
  ir_use_list_add(ctx, ref, var_ref);
1983
0
  if (!IR_IS_CONST_REF(src_ref)) {
1984
0
    ir_use_list_replace_one(ctx, src_ref, var_ref, ref);
1985
0
  }
1986
0
  ir_bitqueue_grow(worklist, ref + 1);
1987
0
  ir_bitqueue_add(worklist, ref);
1988
0
  return ref;
1989
0
}
1990
1991
static uint32_t _ir_estimated_control(const ir_ctx *ctx, ir_ref val, ir_ref loop)
1992
0
{
1993
0
  const ir_insn *insn;
1994
0
  const ir_ref *p;
1995
0
  ir_ref n, input, result, ctrl;
1996
1997
0
  if (IR_IS_CONST_REF(val)) {
1998
0
    return 1; /* IR_START */
1999
0
  }
2000
2001
0
  insn = &ctx->ir_base[val];
2002
0
  if (ir_op_flags[insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM)) {
2003
0
    return val;
2004
0
  }
2005
2006
0
  IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_DATA);
2007
0
  if (IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL_DEP) {
2008
0
    return insn->op1;
2009
0
  }
2010
2011
0
  n = insn->inputs_count;
2012
0
  p = insn->ops + 1;
2013
2014
0
  result = 1;
2015
0
  for (; n > 0; p++, n--) {
2016
0
    input = *p;
2017
0
    ctrl = _ir_estimated_control(ctx, input, loop);
2018
0
    if (ctrl >= loop) return ctrl;
2019
0
    if (ctrl > result) { // TODO: check dominance depth instead of order
2020
0
      result = ctrl;
2021
0
    }
2022
0
  }
2023
0
  return result;
2024
0
}
2025
2026
static bool ir_is_loop_invariant(const ir_ctx *ctx, ir_ref ref, ir_ref loop)
2027
0
{
2028
0
  ref = _ir_estimated_control(ctx, ref, loop);
2029
0
  return ref < loop; // TODO: check dominance instead of order
2030
0
}
2031
2032
static bool ir_is_cheaper_ext(const ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
2033
0
{
2034
0
  if (IR_IS_CONST_REF(ref)) {
2035
0
    return 1;
2036
0
  } else {
2037
0
    const ir_insn *insn = &ctx->ir_base[ref];
2038
2039
0
    if (insn->op == IR_LOAD) {
2040
0
      if (ir_is_loop_invariant(ctx, ref, loop)) {
2041
0
        return 1;
2042
0
      } else {
2043
        /* ZEXT(LOAD(_, _)) costs the same as LOAD(_, _) */
2044
0
        if (ctx->use_lists[ref].count == 2) {
2045
0
          return 1;
2046
0
        } else if (ctx->use_lists[ref].count == 3) {
2047
0
          ir_use_list *use_list = &ctx->use_lists[ref];
2048
0
          ir_ref *p, n, use;
2049
2050
0
          for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
2051
0
            use = *p;
2052
0
            if (use != ext_ref) {
2053
0
              const ir_insn *use_insn = &ctx->ir_base[use];
2054
2055
0
              if (use_insn->op != op
2056
0
               && (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
2057
0
                || use_insn->op1 != ref)) {
2058
0
                return 0;
2059
0
              }
2060
0
            }
2061
0
          }
2062
0
          return 1;
2063
0
        }
2064
0
      }
2065
0
      return 0;
2066
0
    } else {
2067
0
      return ir_is_loop_invariant(ctx, ref, loop);
2068
0
    }
2069
0
  }
2070
0
}
2071
2072
static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
2073
0
{
2074
0
  ir_op op = ctx->ir_base[ext_ref].op;
2075
0
  ir_type type = ctx->ir_base[ext_ref].type;
2076
0
  ir_insn *phi_insn;
2077
0
  ir_use_list *use_list;
2078
0
  ir_ref n, *p, use, ext_ref_2 = IR_UNUSED;
2079
2080
  /* Check if we may change the type of the induction variable */
2081
0
  use_list = &ctx->use_lists[phi_ref];
2082
0
  n = use_list->count;
2083
0
  if (n > 1) {
2084
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2085
0
      use = *p;
2086
0
      if (use == op_ref || use == ext_ref) {
2087
0
        continue;
2088
0
      } else {
2089
0
        const ir_insn *use_insn = &ctx->ir_base[use];
2090
2091
0
        if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2092
0
          if (use_insn->op1 == phi_ref) {
2093
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
2094
0
              return 0;
2095
0
            }
2096
0
            if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2097
0
              continue;
2098
0
              }
2099
0
          } else if (use_insn->op2 == phi_ref) {
2100
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
2101
0
              return 0;
2102
0
            }
2103
0
            if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2104
0
              continue;
2105
0
              }
2106
0
          }
2107
0
          return 0;
2108
0
        } else if (use_insn->op == IR_IF) {
2109
0
          continue;
2110
0
        } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2111
0
          ext_ref_2 = use;
2112
0
          continue;
2113
0
        } else {
2114
0
          return 0;
2115
0
        }
2116
0
      }
2117
0
    }
2118
0
  }
2119
2120
0
  use_list = &ctx->use_lists[op_ref];
2121
0
  n = use_list->count;
2122
0
  if (n > 1) {
2123
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2124
0
      use = *p;
2125
0
      if (use == phi_ref || use == ext_ref) {
2126
0
        continue;
2127
0
      } else {
2128
0
        const ir_insn *use_insn = &ctx->ir_base[use];
2129
2130
0
        if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2131
0
          if (use_insn->op1 == phi_ref) {
2132
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
2133
0
              return 0;
2134
0
            }
2135
0
            if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2136
0
              continue;
2137
0
              }
2138
0
          } else if (use_insn->op2 == phi_ref) {
2139
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
2140
0
              return 0;
2141
0
            }
2142
0
            if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2143
0
              continue;
2144
0
              }
2145
0
          }
2146
0
          return 0;
2147
0
        } else if (use_insn->op == IR_IF) {
2148
0
          continue;
2149
0
        } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2150
0
          ext_ref_2 = use;
2151
0
          continue;
2152
0
        } else {
2153
0
          return 0;
2154
0
        }
2155
0
      }
2156
0
    }
2157
0
  }
2158
2159
0
  for (n = 0; n < ctx->use_lists[phi_ref].count; n++) {
2160
    /* "use_lists" may be reallocated by ir_ext_ref() */
2161
0
    use = ctx->use_edges[ctx->use_lists[phi_ref].refs + n];
2162
0
    if (use == ext_ref) {
2163
0
      continue;
2164
0
    } else {
2165
0
      ir_insn *use_insn = &ctx->ir_base[use];
2166
2167
0
      if (use_insn->op == IR_IF) {
2168
0
        continue;
2169
0
      } else if (use_insn->op == op) {
2170
0
        IR_ASSERT(ext_ref_2 == use);
2171
0
        continue;
2172
0
      }
2173
0
      IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
2174
0
        || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL)
2175
0
       && (use_insn->op1 == phi_ref || use_insn->op2 == phi_ref));
2176
0
      if (use_insn->op1 != phi_ref) {
2177
0
        if (IR_IS_CONST_REF(use_insn->op1)
2178
0
         && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2179
0
          ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2180
0
        } else {
2181
0
          ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2182
0
          use_insn = &ctx->ir_base[use];
2183
0
          use_insn->op1 = tmp;
2184
0
        }
2185
0
        ir_bitqueue_add(worklist, use);
2186
0
      }
2187
0
      if (use_insn->op2 != phi_ref) {
2188
0
        if (IR_IS_CONST_REF(use_insn->op2)
2189
0
         && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2190
0
          ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2191
0
        } else {
2192
0
          ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2193
0
          use_insn = &ctx->ir_base[use];
2194
0
          use_insn->op2 = tmp;
2195
0
        }
2196
0
        ir_bitqueue_add(worklist, use);
2197
0
      }
2198
0
    }
2199
0
  }
2200
2201
0
  if (ctx->use_lists[op_ref].count > 1) {
2202
0
    for (n = 0; n < ctx->use_lists[op_ref].count; n++) {
2203
      /* "use_lists" may be reallocated by ir_ext_ref() */
2204
0
      use = ctx->use_edges[ctx->use_lists[op_ref].refs + n];
2205
0
      if (use == ext_ref || use == phi_ref) {
2206
0
        continue;
2207
0
      } else {
2208
0
        ir_insn *use_insn = &ctx->ir_base[use];
2209
2210
0
        if (use_insn->op == IR_IF) {
2211
0
          continue;
2212
0
        } else if (use_insn->op == op) {
2213
0
          IR_ASSERT(ext_ref_2 == use);
2214
0
          continue;
2215
0
        }
2216
0
        IR_ASSERT(use_insn->op >= IR_EQ && use_insn->op <= IR_UGT);
2217
0
        if (use_insn->op1 != op_ref) {
2218
0
          if (IR_IS_CONST_REF(use_insn->op1)
2219
0
           && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2220
0
            ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2221
0
          } else {
2222
0
            ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2223
0
            use_insn = &ctx->ir_base[use];
2224
0
            use_insn->op1 = tmp;
2225
0
          }
2226
0
          ir_bitqueue_add(worklist, use);
2227
0
        }
2228
0
        if (use_insn->op2 != op_ref) {
2229
0
          if (IR_IS_CONST_REF(use_insn->op2)
2230
0
           && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2231
0
            ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2232
0
          } else {
2233
0
            ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2234
0
            use_insn = &ctx->ir_base[use];
2235
0
            use_insn->op2 = tmp;
2236
0
          }
2237
0
          ir_bitqueue_add(worklist, use);
2238
0
        }
2239
0
      }
2240
0
    }
2241
0
  }
2242
2243
0
  ir_iter_replace_insn(ctx, ext_ref, ctx->ir_base[ext_ref].op1, worklist);
2244
2245
0
  if (ext_ref_2) {
2246
0
    ir_iter_replace_insn(ctx, ext_ref_2, ctx->ir_base[ext_ref_2].op1, worklist);
2247
0
  }
2248
2249
0
  ctx->ir_base[op_ref].type = type;
2250
2251
0
  phi_insn = &ctx->ir_base[phi_ref];
2252
0
  phi_insn->type = type;
2253
0
  if (IR_IS_CONST_REF(phi_insn->op2)
2254
0
   && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
2255
0
    ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
2256
0
  } else {
2257
0
    ir_ref tmp = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist);
2258
0
    ctx->ir_base[phi_ref].op2 = tmp;
2259
0
  }
2260
2261
0
  return 1;
2262
0
}
2263
2264
static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
2265
0
{
2266
0
  ir_ref ref = insn->op1;
2267
2268
  /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
2269
0
  insn = &ctx->ir_base[ref];
2270
0
  if (insn->op == IR_PHI
2271
0
   && insn->inputs_count == 3 /* (2 values) */
2272
0
   && ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
2273
0
    ir_ref op_ref = insn->op3;
2274
0
    ir_insn *op_insn = &ctx->ir_base[op_ref];
2275
2276
0
    if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
2277
0
      if (op_insn->op1 == ref) {
2278
0
        if (ir_is_loop_invariant(ctx, op_insn->op2, insn->op1)) {
2279
0
          return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2280
0
        }
2281
0
      } else if (op_insn->op2 == ref) {
2282
0
        if (ir_is_loop_invariant(ctx, op_insn->op1, insn->op1)) {
2283
0
          return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2284
0
        }
2285
0
      }
2286
0
    }
2287
0
  } else if (insn->op == IR_ADD || insn->op == IR_SUB || insn->op == IR_MUL) {
2288
0
    if (!IR_IS_CONST_REF(insn->op1)
2289
0
     && ctx->ir_base[insn->op1].op == IR_PHI
2290
0
     && ctx->ir_base[insn->op1].inputs_count == 3 /* (2 values) */
2291
0
     && ctx->ir_base[insn->op1].op3 == ref
2292
0
     && ctx->ir_base[ctx->ir_base[insn->op1].op1].op == IR_LOOP_BEGIN
2293
0
     && ir_is_loop_invariant(ctx, insn->op2, ctx->ir_base[insn->op1].op1)) {
2294
0
      return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op1, ref, worklist);
2295
0
    } else if (!IR_IS_CONST_REF(insn->op2)
2296
0
     && ctx->ir_base[insn->op2].op == IR_PHI
2297
0
     && ctx->ir_base[insn->op2].inputs_count == 3 /* (2 values) */
2298
0
     && ctx->ir_base[insn->op2].op3 == ref
2299
0
     && ctx->ir_base[ctx->ir_base[insn->op2].op1].op == IR_LOOP_BEGIN
2300
0
     && ir_is_loop_invariant(ctx, insn->op1, ctx->ir_base[insn->op2].op1)) {
2301
0
      return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op2, ref, worklist);
2302
0
    }
2303
0
  }
2304
2305
0
  return 0;
2306
0
}
2307
2308
static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref)
2309
0
{
2310
0
  ir_use_list *use_list = &ctx->use_lists[if_ref];
2311
0
  ir_ref *p = &ctx->use_edges[use_list->refs];
2312
2313
0
  IR_ASSERT(use_list->count == 2);
2314
0
  if (ctx->ir_base[*p].op == IR_IF_TRUE) {
2315
0
    IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE);
2316
0
    *if_true_ref = *p;
2317
0
    *if_false_ref = *(p + 1);
2318
0
  } else {
2319
0
    IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE);
2320
0
    IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE);
2321
0
    *if_false_ref = *p;
2322
0
    *if_true_ref = *(p + 1);
2323
0
  }
2324
0
}
2325
2326
static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist)
2327
0
{
2328
0
  ir_ref prev, next;
2329
0
  ir_use_list *use_list;
2330
2331
0
  IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
2332
0
  IR_ASSERT(ctx->ir_base[end].op == IR_END);
2333
0
  IR_ASSERT(ctx->ir_base[begin].op1 == end);
2334
0
  IR_ASSERT(ctx->use_lists[end].count == 1);
2335
2336
0
  prev = ctx->ir_base[end].op1;
2337
2338
0
  use_list = &ctx->use_lists[begin];
2339
0
  IR_ASSERT(use_list->count == 1);
2340
0
  next = ctx->use_edges[use_list->refs];
2341
2342
  /* remove BEGIN and END */
2343
0
  MAKE_NOP(&ctx->ir_base[begin]); CLEAR_USES(begin);
2344
0
  MAKE_NOP(&ctx->ir_base[end]);   CLEAR_USES(end);
2345
2346
  /* connect their predecessor and successor */
2347
0
  ctx->ir_base[next].op1 = prev;
2348
0
  ir_use_list_replace_one(ctx, prev, end, next);
2349
2350
0
  if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
2351
0
    ir_bitqueue_add(worklist, prev);
2352
0
  }
2353
0
}
2354
2355
static void ir_remove_unused_vars(ir_ctx *ctx, ir_ref start, ir_ref end)
2356
0
{
2357
0
  ir_use_list *use_list = &ctx->use_lists[start];
2358
0
  ir_ref *p, use, n = use_list->count;
2359
2360
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2361
0
    use = *p;
2362
0
    if (use != end) {
2363
0
      ir_insn *use_insn = &ctx->ir_base[use];
2364
0
      IR_ASSERT(use_insn->op == IR_VAR);
2365
0
      IR_ASSERT(ctx->use_lists[use].count == 0);
2366
0
      MAKE_NOP(use_insn);
2367
0
    }
2368
0
  }
2369
0
}
2370
2371
static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2372
0
{
2373
0
  if (insn->inputs_count == 2) {
2374
0
    ir_ref end1_ref = insn->op1, end2_ref = insn->op2;
2375
0
    ir_insn *end1 = &ctx->ir_base[end1_ref];
2376
0
    ir_insn *end2 = &ctx->ir_base[end2_ref];
2377
2378
0
    if (end1->op != IR_END || end2->op != IR_END) {
2379
0
      return 0;
2380
0
    }
2381
2382
0
    ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2383
0
    ir_insn *start1 = &ctx->ir_base[start1_ref];
2384
0
    ir_insn *start2 = &ctx->ir_base[start2_ref];
2385
2386
0
    if (start1->op1 != start2->op1) {
2387
0
      return 0;
2388
0
    }
2389
2390
0
    ir_ref root_ref = start1->op1;
2391
0
    ir_insn *root = &ctx->ir_base[root_ref];
2392
2393
0
    if (root->op != IR_IF
2394
0
     && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) {
2395
0
      return 0;
2396
0
    }
2397
2398
    /* Empty Diamond
2399
     *
2400
     *    prev                     prev
2401
     *    |  condition             |  condition
2402
     *    | /                      |
2403
     *    IF                       |
2404
     *    | \                      |
2405
     *    |  +-----+               |
2406
     *    |        IF_FALSE        |
2407
     *    IF_TRUE  |           =>  |
2408
     *    |        END             |
2409
     *    END     /                |
2410
     *    |  +---+                 |
2411
     *    | /                      |
2412
     *    MERGE                    |
2413
     *    |                        |
2414
     *    next                     next
2415
     */
2416
2417
0
    ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2418
0
    ir_insn *next = &ctx->ir_base[next_ref];
2419
2420
0
    if (ctx->use_lists[start1_ref].count != 1) {
2421
0
      ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2422
0
    }
2423
0
    if (ctx->use_lists[start2_ref].count != 1) {
2424
0
      ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2425
0
    }
2426
2427
0
    next->op1 = root->op1;
2428
0
    ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2429
0
    if (!IR_IS_CONST_REF(root->op2)) {
2430
0
      ir_use_list_remove_all(ctx, root->op2, root_ref);
2431
0
      if (ir_is_dead(ctx, root->op2)) {
2432
0
        ir_bitqueue_add(worklist, root->op2);
2433
0
      }
2434
0
    }
2435
2436
0
    MAKE_NOP(root);   CLEAR_USES(root_ref);
2437
0
    MAKE_NOP(start1); CLEAR_USES(start1_ref);
2438
0
    MAKE_NOP(start2); CLEAR_USES(start2_ref);
2439
0
    MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2440
0
    MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2441
0
    MAKE_NOP(insn);   CLEAR_USES(ref);
2442
2443
0
    if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2444
0
      ir_bitqueue_add(worklist, next->op1);
2445
0
    }
2446
2447
0
    return 1;
2448
0
  } else {
2449
0
    ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1;
2450
0
    ir_ref root_ref = IR_UNUSED;
2451
2452
0
    for (i = 0; i < count; i++) {
2453
0
      ir_ref end_ref, start_ref;
2454
0
      ir_insn *end, *start;
2455
2456
0
      end_ref = ops[i];
2457
0
      end = &ctx->ir_base[end_ref];
2458
0
      if (end->op != IR_END) {
2459
0
        return 0;
2460
0
      }
2461
0
      start_ref = end->op1;
2462
0
      start = &ctx->ir_base[start_ref];
2463
0
      if (start->op != IR_CASE_VAL && start->op != IR_CASE_RANGE && start->op != IR_CASE_DEFAULT) {
2464
0
        return 0;
2465
0
      }
2466
0
      if (ctx->use_lists[start_ref].count != 1) {
2467
0
        ir_remove_unused_vars(ctx, start_ref, end_ref);
2468
0
      }
2469
0
      if (!root_ref) {
2470
0
        root_ref = start->op1;
2471
0
        if (ctx->use_lists[root_ref].count != count) {
2472
0
          return 0;
2473
0
        }
2474
0
      } else if (start->op1 != root_ref) {
2475
0
        return 0;
2476
0
      }
2477
0
    }
2478
2479
    /* Empty N-Diamond */
2480
0
    ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2481
0
    ir_insn *next = &ctx->ir_base[next_ref];
2482
0
    ir_insn *root = &ctx->ir_base[root_ref];
2483
2484
0
    next->op1 = root->op1;
2485
0
    ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2486
2487
0
    if (!IR_IS_CONST_REF(root->op2)) {
2488
0
      ir_use_list_remove_all(ctx, root->op2, root_ref);
2489
0
      if (ir_is_dead(ctx, root->op2)) {
2490
0
        ir_bitqueue_add(worklist, root->op2);
2491
0
      }
2492
0
    }
2493
2494
0
    MAKE_NOP(root);   CLEAR_USES(root_ref);
2495
2496
0
    for (i = 0; i < count; i++) {
2497
0
      ir_ref end_ref = ops[i];
2498
0
      ir_insn *end = &ctx->ir_base[end_ref];
2499
0
      ir_ref start_ref = end->op1;
2500
0
      ir_insn *start = &ctx->ir_base[start_ref];
2501
2502
0
      MAKE_NOP(start); CLEAR_USES(start_ref);
2503
0
      MAKE_NOP(end);   CLEAR_USES(end_ref);
2504
0
    }
2505
2506
0
    MAKE_NOP(insn);   CLEAR_USES(ref);
2507
2508
0
    if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2509
0
      ir_bitqueue_add(worklist, next->op1);
2510
0
    }
2511
2512
0
    return 1;
2513
0
  }
2514
0
}
2515
2516
static bool ir_is_zero(const ir_ctx *ctx, ir_ref ref)
2517
0
{
2518
0
  return IR_IS_CONST_REF(ref)
2519
0
    && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2520
0
    && ctx->ir_base[ref].val.u32 == 0;
2521
0
}
2522
2523
static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2524
0
{
2525
0
  IR_ASSERT(insn->inputs_count == 3);
2526
0
  IR_ASSERT(ctx->use_lists[merge_ref].count == 2);
2527
2528
0
  ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2529
0
  ir_insn *end1 = &ctx->ir_base[end1_ref];
2530
0
  ir_insn *end2 = &ctx->ir_base[end2_ref];
2531
2532
0
  if (end1->op == IR_END && end2->op == IR_END) {
2533
0
    ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2534
0
    ir_insn *start1 = &ctx->ir_base[start1_ref];
2535
0
    ir_insn *start2 = &ctx->ir_base[start2_ref];
2536
2537
0
    if (start1->op1 == start2->op1) {
2538
0
      ir_ref root_ref = start1->op1;
2539
0
      ir_insn *root = &ctx->ir_base[root_ref];
2540
2541
0
      if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2)) {
2542
0
        ir_ref cond_ref = root->op2;
2543
0
        ir_insn *cond = &ctx->ir_base[cond_ref];
2544
0
        ir_type type = insn->type;
2545
0
        bool is_cmp, is_less;
2546
2547
0
        if (IR_IS_TYPE_FP(type)) {
2548
0
          is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE ||
2549
0
            cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2550
0
          is_less = (cond->op == IR_LT || cond->op == IR_LE ||
2551
0
            cond->op == IR_ULT || cond->op == IR_ULE);
2552
0
        } else if (IR_IS_TYPE_SIGNED(type)) {
2553
0
          is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE);
2554
0
          is_less = (cond->op == IR_LT || cond->op == IR_LE);
2555
0
        } else {
2556
0
          IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
2557
0
          is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2558
0
          is_less = (cond->op == IR_ULT || cond->op == IR_ULE);
2559
0
        }
2560
2561
0
        if (is_cmp
2562
0
         && ((insn->op2 == cond->op1 && insn->op3 == cond->op2)
2563
0
           || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) {
2564
          /* MAX/MIN
2565
           *
2566
           *    prev                     prev
2567
           *    |  LT(A, B)              |
2568
           *    | /                      |
2569
           *    IF                       |
2570
           *    | \                      |
2571
           *    |  +-----+               |
2572
           *    |        IF_FALSE        |
2573
           *    IF_TRUE  |           =>  |
2574
           *    |        END             |
2575
           *    END     /                |
2576
           *    |  +---+                 |
2577
           *    | /                      |
2578
           *    MERGE                    |
2579
           *    |    \                   |
2580
           *    |     PHI(A, B)          |    MIN(A, B)
2581
           *    next                     next
2582
           */
2583
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2584
0
          ir_insn *next;
2585
2586
0
          if (next_ref == ref) {
2587
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2588
0
          }
2589
0
          next = &ctx->ir_base[next_ref];
2590
2591
0
          if (ctx->use_lists[start1_ref].count != 1) {
2592
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2593
0
          }
2594
0
          if (ctx->use_lists[start2_ref].count != 1) {
2595
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2596
0
          }
2597
2598
0
          insn->op = (
2599
0
            (is_less ? cond->op1 : cond->op2)
2600
0
            ==
2601
0
            ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3)
2602
0
            ) ? IR_MIN : IR_MAX;
2603
0
          insn->inputs_count = 2;
2604
0
          if (insn->op2 > insn->op3) {
2605
0
            insn->op1 = insn->op2;
2606
0
            insn->op2 = insn->op3;
2607
0
          } else {
2608
0
            insn->op1 = insn->op3;
2609
0
          }
2610
0
          insn->op3 = IR_UNUSED;
2611
2612
0
          next->op1 = root->op1;
2613
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2614
0
          if (!IR_IS_CONST_REF(insn->op1)) {
2615
0
            ir_use_list_remove_all(ctx, insn->op1, cond_ref);
2616
0
          }
2617
0
          if (!IR_IS_CONST_REF(insn->op2)) {
2618
0
            ir_use_list_remove_all(ctx, insn->op2, cond_ref);
2619
0
          }
2620
2621
0
          if (ctx->use_lists[cond_ref].count == 1) {
2622
0
            MAKE_NOP(cond);   CLEAR_USES(cond_ref);
2623
0
          } else {
2624
0
            ir_use_list_remove_one(ctx, cond_ref, root_ref);
2625
0
          }
2626
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2627
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2628
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2629
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2630
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2631
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2632
2633
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2634
0
            ir_bitqueue_add(worklist, next->op1);
2635
0
          }
2636
2637
0
          return 1;
2638
0
        } else if (is_cmp
2639
0
            && ((ctx->ir_base[insn->op2].op == IR_NEG
2640
0
              && ctx->use_lists[insn->op2].count == 1
2641
0
              && ctx->ir_base[insn->op2].op1 == insn->op3
2642
0
              && ((cond->op1 == insn->op3
2643
0
                && ir_is_zero(ctx, cond->op2)
2644
0
                && is_less == (start1->op == IR_IF_TRUE))
2645
0
               || (cond->op2 == insn->op3
2646
0
                && ir_is_zero(ctx, cond->op1)
2647
0
                && is_less != (start1->op == IR_IF_TRUE))))
2648
0
             || (ctx->ir_base[insn->op3].op == IR_NEG
2649
0
              && ctx->use_lists[insn->op3].count == 1
2650
0
              && ctx->ir_base[insn->op3].op1 == insn->op2
2651
0
              && ((cond->op1 == insn->op2
2652
0
                && ir_is_zero(ctx, cond->op2)
2653
0
                && is_less != (start1->op == IR_IF_TRUE))
2654
0
               || (cond->op2 == insn->op2
2655
0
                && ir_is_zero(ctx, cond->op1)
2656
0
                && is_less == (start1->op == IR_IF_TRUE)))))) {
2657
          /* ABS
2658
           *
2659
           *    prev                     prev
2660
           *    |  LT(A, 0)              |
2661
           *    | /                      |
2662
           *    IF                       |
2663
           *    | \                      |
2664
           *    |  +-----+               |
2665
           *    |        IF_FALSE        |
2666
           *    IF_TRUE  |           =>  |
2667
           *    |        END             |
2668
           *    END     /                |
2669
           *    |  +---+                 |
2670
           *    | /                      |
2671
           *    MERGE                    |
2672
           *    |    \                   |
2673
           *    |     PHI(A, NEG(A))     |    ABS(A)
2674
           *    next                     next
2675
           */
2676
0
          ir_ref neg_ref;
2677
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2678
0
          ir_insn *next;
2679
2680
0
          if (next_ref == ref) {
2681
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2682
0
          }
2683
0
          next = &ctx->ir_base[next_ref];
2684
2685
0
          if (ctx->use_lists[start1_ref].count != 1) {
2686
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2687
0
          }
2688
0
          if (ctx->use_lists[start2_ref].count != 1) {
2689
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2690
0
          }
2691
2692
0
          insn->op = IR_ABS;
2693
0
          insn->inputs_count = 1;
2694
0
          if (ctx->ir_base[insn->op2].op == IR_NEG) {
2695
0
            neg_ref = insn->op2;
2696
0
            insn->op1 = insn->op3;
2697
0
          } else {
2698
0
            neg_ref = insn->op3;
2699
0
            insn->op1 = insn->op2;
2700
0
          }
2701
0
          insn->op2 = IR_UNUSED;
2702
0
          insn->op3 = IR_UNUSED;
2703
2704
0
          next->op1 = root->op1;
2705
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2706
0
          ir_use_list_remove_one(ctx, insn->op1, neg_ref);
2707
0
          if (!IR_IS_CONST_REF(insn->op1)) {
2708
0
            ir_use_list_remove_all(ctx, insn->op1, cond_ref);
2709
0
          }
2710
2711
0
          if (ctx->use_lists[cond_ref].count == 1) {
2712
0
            MAKE_NOP(cond);   CLEAR_USES(cond_ref);
2713
0
          } else {
2714
0
            ir_use_list_remove_one(ctx, cond_ref, root_ref);
2715
0
            }
2716
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2717
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2718
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2719
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2720
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2721
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2722
0
          MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref);
2723
2724
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2725
0
            ir_bitqueue_add(worklist, next->op1);
2726
0
          }
2727
2728
0
          return 1;
2729
0
        } else if (cond->op != IR_OVERFLOW && insn->op2 <= cond_ref && insn->op3 <= cond_ref) {
2730
          /* COND
2731
           *
2732
           *    prev                     prev
2733
           *    |  cond                  |
2734
           *    | /                      |
2735
           *    IF                       |
2736
           *    | \                      |
2737
           *    |  +-----+               |
2738
           *    |        IF_FALSE        |
2739
           *    IF_TRUE  |           =>  |
2740
           *    |        END             |
2741
           *    END     /                |
2742
           *    |  +---+                 |
2743
           *    | /                      |
2744
           *    MERGE                    |
2745
           *    |    \                   |
2746
           *    |     PHI(A, B)          |    COND(cond, A, B)
2747
           *    next                     next
2748
           */
2749
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2750
0
          ir_insn *next;
2751
2752
0
          if (next_ref == ref) {
2753
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2754
0
          }
2755
0
          next = &ctx->ir_base[next_ref];
2756
2757
0
          if (ctx->use_lists[start1_ref].count != 1) {
2758
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2759
0
          }
2760
0
          if (ctx->use_lists[start2_ref].count != 1) {
2761
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2762
0
          }
2763
2764
0
          insn->op = IR_COND;
2765
0
          insn->inputs_count = 3;
2766
0
          insn->op1 = cond_ref;
2767
0
          if (start1->op == IR_IF_FALSE) {
2768
0
            SWAP_REFS(insn->op2, insn->op3);
2769
0
          }
2770
2771
0
          next->op1 = root->op1;
2772
0
          ir_use_list_replace_one(ctx, cond_ref, root_ref, ref);
2773
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2774
0
          ir_use_list_remove_all(ctx, root->op2, root_ref);
2775
2776
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2777
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2778
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2779
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2780
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2781
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2782
2783
0
          ir_bitqueue_add(worklist, ref);
2784
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2785
0
            ir_bitqueue_add(worklist, next->op1);
2786
0
          }
2787
2788
0
          return 1;
2789
0
        }
2790
0
      }
2791
0
    }
2792
0
  }
2793
2794
0
  return 0;
2795
0
}
2796
2797
static bool ir_cmp_is_true(ir_op op, const ir_insn *op1, const ir_insn *op2)
2798
0
{
2799
0
  IR_ASSERT(op1->type == op2->type);
2800
0
  if (IR_IS_TYPE_INT(op1->type)) {
2801
0
    if (op == IR_EQ) {
2802
0
      return op1->val.u64 == op2->val.u64;
2803
0
    } else if (op == IR_NE) {
2804
0
      return op1->val.u64 != op2->val.u64;
2805
0
    } else if (op == IR_LT) {
2806
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2807
0
        return op1->val.i64 < op2->val.i64;
2808
0
      } else {
2809
0
        return op1->val.u64 < op2->val.u64;
2810
0
      }
2811
0
    } else if (op == IR_GE) {
2812
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2813
0
        return op1->val.i64 >= op2->val.i64;
2814
0
      } else {
2815
0
        return op1->val.u64 >= op2->val.u64;
2816
0
      }
2817
0
    } else if (op == IR_LE) {
2818
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2819
0
        return op1->val.i64 <= op2->val.i64;
2820
0
      } else {
2821
0
        return op1->val.u64 <= op2->val.u64;
2822
0
      }
2823
0
    } else if (op == IR_GT) {
2824
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2825
0
        return op1->val.i64 > op2->val.i64;
2826
0
      } else {
2827
0
        return op1->val.u64 > op2->val.u64;
2828
0
      }
2829
0
    } else if (op == IR_ULT) {
2830
0
      return op1->val.u64 < op2->val.u64;
2831
0
    } else if (op == IR_UGE) {
2832
0
      return op1->val.u64 >= op2->val.u64;
2833
0
    } else if (op == IR_ULE) {
2834
0
      return op1->val.u64 <= op2->val.u64;
2835
0
    } else if (op == IR_UGT) {
2836
0
      return op1->val.u64 > op2->val.u64;
2837
0
    } else {
2838
0
      IR_ASSERT(0);
2839
0
      return 0;
2840
0
    }
2841
0
  } else if (op1->type == IR_DOUBLE) {
2842
0
    if (op == IR_EQ) {
2843
0
      return op1->val.d == op2->val.d;
2844
0
    } else if (op == IR_NE) {
2845
0
      return op1->val.d != op2->val.d;
2846
0
    } else if (op == IR_LT) {
2847
0
      return op1->val.d < op2->val.d;
2848
0
    } else if (op == IR_GE) {
2849
0
      return op1->val.d >= op2->val.d;
2850
0
    } else if (op == IR_LE) {
2851
0
      return op1->val.d <= op2->val.d;
2852
0
    } else if (op == IR_GT) {
2853
0
      return op1->val.d > op2->val.d;
2854
0
    } else if (op == IR_ULT) {
2855
0
      return !(op1->val.d >= op2->val.d);
2856
0
    } else if (op == IR_UGE) {
2857
0
      return !(op1->val.d < op2->val.d);
2858
0
    } else if (op == IR_ULE) {
2859
0
      return !(op1->val.d > op2->val.d);
2860
0
    } else if (op == IR_UGT) {
2861
0
      return !(op1->val.d <= op2->val.d);
2862
0
    } else if (op == IR_ORDERED) {
2863
0
      return !isnan(op1->val.d) && !isnan(op2->val.d);
2864
0
    } else if (op == IR_UNORDERED) {
2865
0
      return isnan(op1->val.d) || isnan(op2->val.d);
2866
0
    } else {
2867
0
      IR_ASSERT(0);
2868
0
      return 0;
2869
0
    }
2870
0
  } else {
2871
0
    IR_ASSERT(op1->type == IR_FLOAT);
2872
0
    if (op == IR_EQ) {
2873
0
      return op1->val.f == op2->val.f;
2874
0
    } else if (op == IR_NE) {
2875
0
      return op1->val.f != op2->val.f;
2876
0
    } else if (op == IR_LT) {
2877
0
      return op1->val.f < op2->val.f;
2878
0
    } else if (op == IR_GE) {
2879
0
      return op1->val.f >= op2->val.f;
2880
0
    } else if (op == IR_LE) {
2881
0
      return op1->val.f <= op2->val.f;
2882
0
    } else if (op == IR_GT) {
2883
0
      return op1->val.f > op2->val.f;
2884
0
    } else if (op == IR_ULT) {
2885
0
      return !(op1->val.f >= op2->val.f);
2886
0
    } else if (op == IR_UGE) {
2887
0
      return !(op1->val.f < op2->val.f);
2888
0
    } else if (op == IR_ULE) {
2889
0
      return !(op1->val.f > op2->val.f);
2890
0
    } else if (op == IR_UGT) {
2891
0
      return !(op1->val.f <= op2->val.f);
2892
0
    } else if (op == IR_ORDERED) {
2893
0
      return !isnan(op1->val.f) && !isnan(op2->val.f);
2894
0
    } else if (op == IR_UNORDERED) {
2895
0
      return isnan(op1->val.f) || isnan(op2->val.f);
2896
0
    } else {
2897
0
      IR_ASSERT(0);
2898
0
      return 0;
2899
0
    }
2900
0
  }
2901
0
}
2902
2903
static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2904
0
{
2905
0
  ir_ref cond_ref = insn->op2;
2906
0
  ir_insn *cond = &ctx->ir_base[cond_ref];
2907
2908
0
  if (cond->op == IR_PHI
2909
0
   && cond->inputs_count == 3
2910
0
   && cond->op1 == insn->op1
2911
0
   && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op))
2912
0
    || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) {
2913
0
    ir_ref merge_ref = insn->op1;
2914
0
    ir_insn *merge = &ctx->ir_base[merge_ref];
2915
2916
0
    if (ctx->use_lists[merge_ref].count == 2) {
2917
0
      ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2918
0
      ir_insn *end1 = &ctx->ir_base[end1_ref];
2919
0
      ir_insn *end2 = &ctx->ir_base[end2_ref];
2920
2921
0
      if (end1->op == IR_END && end2->op == IR_END) {
2922
0
        ir_ref if_true_ref, if_false_ref;
2923
0
        ir_insn *if_true, *if_false;
2924
0
        ir_op op = IR_IF_FALSE;
2925
2926
0
        ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
2927
2928
0
        if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) {
2929
0
          IR_ASSERT(IR_IS_CONST_REF(cond->op3));
2930
0
          SWAP_REFS(cond->op2, cond->op3);
2931
0
          SWAP_REFS(merge->op1, merge->op2);
2932
0
          SWAP_REFS(end1_ref, end2_ref);
2933
0
          SWAP_INSNS(end1, end2);
2934
0
        }
2935
0
        if (ir_const_is_true(&ctx->ir_base[cond->op2])) {
2936
0
          SWAP_REFS(if_true_ref, if_false_ref);
2937
0
          op = IR_IF_TRUE;
2938
0
        }
2939
0
        if_true = &ctx->ir_base[if_true_ref];
2940
0
        if_false = &ctx->ir_base[if_false_ref];
2941
2942
0
        if (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)) {
2943
0
          if (ir_const_is_true(&ctx->ir_base[cond->op3]) ^ (op == IR_IF_TRUE)) {
2944
            /* Simple IF Split
2945
             *
2946
             *    |        |               |        |
2947
             *    |        END             |        END
2948
             *    END     /                END       \
2949
             *    |  +---+                 |          +
2950
             *    | /                      |          |
2951
             *    MERGE                    |          |
2952
             *    | \                      |          |
2953
             *    |  PHI(false, true)      |          |
2954
             *    | /                      |          |
2955
             *    IF                   =>  |          |
2956
             *    | \                      |          |
2957
             *    |  +------+              |          |
2958
             *    |         IF_TRUE        |          BEGIN
2959
             *    IF_FALSE  |              BEGIN
2960
             *    |                        |
2961
             */
2962
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2963
0
            ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
2964
2965
0
            MAKE_NOP(merge); CLEAR_USES(merge_ref);
2966
0
            MAKE_NOP(cond);  CLEAR_USES(cond_ref);
2967
0
            MAKE_NOP(insn);  CLEAR_USES(ref);
2968
2969
0
            if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2970
0
            if_false->op1 = end1_ref;
2971
2972
0
            if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2973
0
            if_true->op1 = end2_ref;
2974
2975
0
            ir_bitqueue_add(worklist, if_false_ref);
2976
0
            ir_bitqueue_add(worklist, if_true_ref);
2977
2978
0
            return 1;
2979
0
          } else {
2980
            /* Simple IF Split
2981
             *
2982
             *    |        |               |        |
2983
             *    |        END             |        END
2984
             *    END     /                END      |
2985
             *    |  +---+                 |        |
2986
             *    | /                      |        |
2987
             *    MERGE                    |        +
2988
             *    | \                      |       /
2989
             *    |  PHI(false, false)     |      /
2990
             *    | /                      |     /
2991
             *    IF                   =>  |    /
2992
             *    | \                      |   /
2993
             *    |  +------+              |  /
2994
             *    |         IF_TRUE        | /        BEGIN(unreachable)
2995
             *    IF_FALSE  |              MERGE
2996
             *    |                        |
2997
             */
2998
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2999
0
            ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
3000
3001
0
            MAKE_NOP(merge); CLEAR_USES(merge_ref);
3002
0
            MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3003
0
            MAKE_NOP(insn);  CLEAR_USES(ref);
3004
3005
0
            if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3006
0
            if_false->op1 = end1_ref;
3007
0
            if_false->op2 = end2_ref;
3008
3009
0
            if_true->optx = IR_BEGIN;
3010
0
            if_true->op1 = IR_UNUSED;
3011
3012
0
            ctx->flags2 &= ~IR_CFG_REACHABLE;
3013
3014
0
            ir_bitqueue_add(worklist, if_false_ref);
3015
3016
0
            return 1;
3017
0
          }
3018
0
        }
3019
3020
        /* Simple IF Split
3021
         *
3022
         *    |        |               |        |
3023
         *    |        END             |        IF(X)
3024
         *    END     /                END     / \
3025
         *    |  +---+                 |   +--+   +
3026
         *    | /                      |  /       |
3027
         *    MERGE                    | IF_FALSE |
3028
         *    | \                      | |        |
3029
         *    |  PHI(false, X)         | |        |
3030
         *    | /                      | |        |
3031
         *    IF                   =>  | END      |
3032
         *    | \                      | |        |
3033
         *    |  +------+              | |        |
3034
         *    |         IF_TRUE        | |        IF_TRUE
3035
         *    IF_FALSE  |              MERGE
3036
         *    |                        |
3037
         */
3038
0
        ir_use_list_remove_all(ctx, merge_ref, cond_ref);
3039
0
        ir_use_list_remove_all(ctx, ref, if_true_ref);
3040
0
        if (!IR_IS_CONST_REF(cond->op3)) {
3041
0
          ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref);
3042
0
        }
3043
0
        ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3044
0
        ir_use_list_add(ctx, end2_ref, if_true_ref);
3045
3046
0
        end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
3047
0
        end2->op2 = cond->op3;
3048
0
        ir_bitqueue_add(worklist, end2_ref);
3049
3050
0
        merge->optx = IR_OPTX(op, IR_VOID, 1);
3051
0
        merge->op1 = end2_ref;
3052
0
        merge->op2 = IR_UNUSED;
3053
3054
0
        MAKE_NOP(cond);
3055
0
        CLEAR_USES(cond_ref);
3056
3057
0
        insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3058
0
        insn->op1 = merge_ref;
3059
0
        insn->op2 = IR_UNUSED;
3060
3061
0
        if_true->op1 = end2_ref;
3062
3063
0
        if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3064
0
        if_false->op1 = end1_ref;
3065
0
        if_false->op2 = ref;
3066
3067
0
        ir_bitqueue_add(worklist, if_false_ref);
3068
0
        if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3069
0
          ir_bitqueue_add(worklist, end2->op1);
3070
0
        }
3071
3072
0
        return 1;
3073
0
      }
3074
0
    }
3075
0
  }
3076
3077
0
  return 0;
3078
0
}
3079
3080
static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3081
0
{
3082
0
  ir_ref cond_ref = insn->op2;
3083
0
  ir_insn *cond = &ctx->ir_base[cond_ref];
3084
3085
0
  if (cond->op >= IR_EQ && cond->op <= IR_UGT
3086
0
   && IR_IS_CONST_REF(cond->op2)
3087
0
   && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
3088
0
   && ctx->use_lists[insn->op2].count == 1) {
3089
0
    ir_ref phi_ref = cond->op1;
3090
0
    ir_insn *phi = &ctx->ir_base[phi_ref];
3091
3092
0
    if (phi->op == IR_PHI
3093
0
     && phi->inputs_count == 3
3094
0
     && phi->op1 == insn->op1
3095
0
     && ctx->use_lists[phi_ref].count == 1
3096
0
     && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op))
3097
0
      || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) {
3098
0
      ir_ref merge_ref = insn->op1;
3099
0
      ir_insn *merge = &ctx->ir_base[merge_ref];
3100
3101
0
      if (ctx->use_lists[merge_ref].count == 2) {
3102
0
        ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
3103
0
        ir_insn *end1 = &ctx->ir_base[end1_ref];
3104
0
        ir_insn *end2 = &ctx->ir_base[end2_ref];
3105
3106
0
        if (end1->op == IR_END && end2->op == IR_END) {
3107
0
          ir_ref if_true_ref, if_false_ref;
3108
0
          ir_insn *if_true, *if_false;
3109
0
          ir_op op = IR_IF_FALSE;
3110
3111
0
          ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3112
3113
0
          if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) {
3114
0
            IR_ASSERT(IR_IS_CONST_REF(phi->op3));
3115
0
            SWAP_REFS(phi->op2, phi->op3);
3116
0
            SWAP_REFS(merge->op1, merge->op2);
3117
0
            SWAP_REFS(end1_ref, end2_ref);
3118
0
            SWAP_INSNS(end1, end2);
3119
0
          }
3120
0
          if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) {
3121
0
            SWAP_REFS(if_true_ref, if_false_ref);
3122
0
            op = IR_IF_TRUE;
3123
0
          }
3124
0
          if_true = &ctx->ir_base[if_true_ref];
3125
0
          if_false = &ctx->ir_base[if_false_ref];
3126
3127
0
          if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) {
3128
0
            if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) {
3129
              /* IF Split
3130
               *
3131
               *    |        |               |        |
3132
               *    |        END             |        END
3133
               *    END     /                END      |
3134
               *    |  +---+                 |        |
3135
               *    | /                      |        |
3136
               *    MERGE                    |        |
3137
               *    | \                      |        |
3138
               *    |  PHI(C1, X)            |        |
3139
               *    |  |                     |        |
3140
               *    |  CMP(_, C2)            |        |
3141
               *    | /                      |        |
3142
               *    IF                   =>  |        |
3143
               *    | \                      |        |
3144
               *    |  +------+              |        |
3145
               *    |         IF_TRUE        |        BEGIN
3146
               *    IF_FALSE  |              BEGIN    |
3147
               *    |                        |
3148
               */
3149
3150
0
              ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3151
0
              ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
3152
3153
0
              MAKE_NOP(merge); CLEAR_USES(merge_ref);
3154
0
              MAKE_NOP(phi);   CLEAR_USES(phi_ref);
3155
0
              MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3156
0
              MAKE_NOP(insn);  CLEAR_USES(ref);
3157
3158
0
              if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3159
0
              if_false->op1 = end1_ref;
3160
3161
0
              if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3162
0
              if_true->op1 = end2_ref;
3163
3164
0
              ir_bitqueue_add(worklist, if_false_ref);
3165
0
              ir_bitqueue_add(worklist, if_true_ref);
3166
3167
0
              return 1;
3168
0
            } else {
3169
              /* IF Split
3170
               *
3171
               *    |        |               |        |
3172
               *    |        END             |        END
3173
               *    END     /                END      |
3174
               *    |  +---+                 |        |
3175
               *    | /                      |        |
3176
               *    MERGE                    |        |
3177
               *    | \                      |        |
3178
               *    |  PHI(C1, X)            |        |
3179
               *    |  |                     |        +
3180
               *    |  CMP(_, C2)            |       /
3181
               *    | /                      |      /
3182
               *    IF                   =>  |     /
3183
               *    | \                      |    /
3184
               *    |  +------+              |   /
3185
               *    |         IF_TRUE        |  /      BEGIN(unreachable)
3186
               *    IF_FALSE  |              MERGE     |
3187
               *    |                        |
3188
               */
3189
3190
0
              ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3191
0
              ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
3192
3193
0
              MAKE_NOP(merge); CLEAR_USES(merge_ref);
3194
0
              MAKE_NOP(phi);   CLEAR_USES(phi_ref);
3195
0
              MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3196
0
              MAKE_NOP(insn);  CLEAR_USES(ref);
3197
3198
0
              if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3199
0
              if_false->op1 = end1_ref;
3200
0
              if_false->op2 = end2_ref;
3201
3202
0
              if_true->optx = IR_BEGIN;
3203
0
              if_true->op1 = IR_UNUSED;
3204
3205
0
              ctx->flags2 &= ~IR_CFG_REACHABLE;
3206
3207
0
              ir_bitqueue_add(worklist, if_false_ref);
3208
3209
0
              return 1;
3210
0
            }
3211
0
          } else {
3212
            /* IF Split
3213
             *
3214
             *    |        |               |        |
3215
             *    |        END             |        IF<----+
3216
             *    END     /                END     / \     |
3217
             *    |  +---+                 |   +--+   +    |
3218
             *    | /                      |  /       |    |
3219
             *    MERGE                    | IF_FALSE |    |
3220
             *    | \                      | |        |    |
3221
             *    |  PHI(C1, X)            | |        |    |
3222
             *    |  |                     | |        |    |
3223
             *    |  CMP(_, C2)            | |        |    CMP(X, C2)
3224
             *    | /                      | |        |
3225
             *    IF                   =>  | END      |
3226
             *    | \                      | |        |
3227
             *    |  +------+              | |        |
3228
             *    |         IF_TRUE        | |        IF_TRUE
3229
             *    IF_FALSE  |              MERGE
3230
             *    |                        |
3231
             */
3232
3233
0
            ir_use_list_remove_all(ctx, merge_ref, phi_ref);
3234
0
            ir_use_list_remove_all(ctx, ref, if_true_ref);
3235
0
            if (!IR_IS_CONST_REF(phi->op3)) {
3236
0
              ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2);
3237
0
            }
3238
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3239
0
            ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref);
3240
0
            ir_use_list_add(ctx, end2_ref, if_true_ref);
3241
3242
0
            end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
3243
0
            end2->op2 = insn->op2;
3244
0
            ir_bitqueue_add(worklist, end2_ref);
3245
3246
0
            merge->optx = IR_OPTX(op, IR_VOID, 1);
3247
0
            merge->op1 = end2_ref;
3248
0
            merge->op2 = IR_UNUSED;
3249
3250
0
            cond->op1 = phi->op3;
3251
0
            MAKE_NOP(phi);
3252
0
            CLEAR_USES(phi_ref);
3253
3254
0
            insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3255
0
            insn->op1 = merge_ref;
3256
0
            insn->op2 = IR_UNUSED;
3257
3258
0
            if_true->op1 = end2_ref;
3259
3260
0
            if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3261
0
            if_false->op1 = end1_ref;
3262
0
            if_false->op2 = ref;
3263
3264
0
            ir_bitqueue_add(worklist, if_false_ref);
3265
0
            if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3266
0
              ir_bitqueue_add(worklist, end2->op1);
3267
0
            }
3268
3269
0
            return 1;
3270
0
          }
3271
0
        }
3272
0
      }
3273
0
    }
3274
0
  }
3275
3276
0
  return 0;
3277
0
}
3278
3279
static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist)
3280
0
{
3281
0
  ir_use_list *use_list = &ctx->use_lists[merge_ref];
3282
3283
0
  if (use_list->count == 1) {
3284
0
    ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist);
3285
0
  } else if (use_list->count == 2) {
3286
0
    if (merge->inputs_count == 2) {
3287
0
      ir_ref phi_ref = ctx->use_edges[use_list->refs];
3288
0
      ir_insn *phi = &ctx->ir_base[phi_ref];
3289
3290
0
      ir_ref next_ref = ctx->use_edges[use_list->refs + 1];
3291
0
      ir_insn *next = &ctx->ir_base[next_ref];
3292
3293
0
      if (next->op == IR_PHI) {
3294
0
        SWAP_REFS(phi_ref, next_ref);
3295
0
        SWAP_INSNS(phi, next);
3296
0
      }
3297
3298
0
      if (phi->op == IR_PHI && next->op != IR_PHI) {
3299
0
        if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) {
3300
0
          if (next->op2 == phi_ref) {
3301
0
            if (ir_try_split_if(ctx, next_ref, next, worklist)) {
3302
0
              return;
3303
0
            }
3304
0
          } else {
3305
0
            ir_insn *cmp = &ctx->ir_base[next->op2];
3306
3307
0
            if (cmp->op >= IR_EQ && cmp->op <= IR_UGT
3308
0
             && cmp->op1 == phi_ref
3309
0
             && IR_IS_CONST_REF(cmp->op2)
3310
0
             && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op)
3311
0
             && ctx->use_lists[next->op2].count == 1) {
3312
0
              if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) {
3313
0
                return;
3314
0
              }
3315
0
            }
3316
0
          }
3317
0
        }
3318
0
        ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist);
3319
0
      }
3320
0
    }
3321
0
  }
3322
0
}
3323
3324
static ir_ref ir_find_ext_use(const ir_ctx *ctx, ir_ref ref)
3325
0
{
3326
0
  ir_use_list *use_list = &ctx->use_lists[ref];
3327
0
  ir_ref *p, n, use;
3328
0
  ir_insn *use_insn;
3329
3330
0
  for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
3331
0
    use = *p;
3332
0
    use_insn = &ctx->ir_base[use];
3333
0
    if (use_insn->op == IR_SEXT || use_insn->op == IR_ZEXT) {
3334
0
      return use;
3335
0
    }
3336
0
  }
3337
0
  return IR_UNUSED;
3338
0
}
3339
3340
static void ir_iter_optimize_induction_var(ir_ctx *ctx, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
3341
0
{
3342
0
  ir_ref ext_ref;
3343
3344
0
  ext_ref = ir_find_ext_use(ctx, phi_ref);
3345
0
  if (!ext_ref) {
3346
0
    ext_ref = ir_find_ext_use(ctx, op_ref);
3347
0
  }
3348
0
  if (ext_ref) {
3349
0
    ir_try_promote_induction_var_ext(ctx, ext_ref, phi_ref, op_ref, worklist);
3350
0
  }
3351
0
}
3352
3353
static void ir_iter_optimize_loop(ir_ctx *ctx, ir_ref loop_ref, ir_insn *loop, ir_bitqueue *worklist)
3354
0
{
3355
0
  ir_ref n;
3356
3357
0
  if (loop->inputs_count != 2 || ctx->use_lists[loop_ref].count <= 1) {
3358
0
    return;
3359
0
  }
3360
3361
  /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
3362
0
  for (n = 0; n < ctx->use_lists[loop_ref].count; n++) {
3363
    /* "use_lists" may be reallocated by ir_ext_ref() */
3364
0
    ir_ref use = ctx->use_edges[ctx->use_lists[loop_ref].refs + n];
3365
0
    ir_insn *use_insn = &ctx->ir_base[use];
3366
3367
0
    if (use_insn->op == IR_PHI) {
3368
0
      ir_ref op_ref = use_insn->op3;
3369
0
      ir_insn *op_insn = &ctx->ir_base[op_ref];
3370
3371
0
      if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
3372
0
        if (op_insn->op1 == use) {
3373
0
          if (ir_is_loop_invariant(ctx, op_insn->op2, loop_ref)) {
3374
0
            ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3375
0
          }
3376
0
        } else if (op_insn->op2 == use) {
3377
0
          if (ir_is_loop_invariant(ctx, op_insn->op1, loop_ref)) {
3378
0
            ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3379
0
          }
3380
0
        }
3381
0
        }
3382
0
    }
3383
0
  }
3384
0
}
3385
3386
static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref condition, bool *swap)
3387
0
{
3388
0
  ir_insn *condition_insn = &ctx->ir_base[condition];
3389
3390
0
  while ((condition_insn->op == IR_BITCAST
3391
0
    || condition_insn->op == IR_ZEXT
3392
0
    || condition_insn->op == IR_SEXT)
3393
0
   && ctx->use_lists[condition].count == 1) {
3394
0
    condition = condition_insn->op1;
3395
0
    condition_insn = &ctx->ir_base[condition];
3396
0
  }
3397
3398
0
  if (condition_insn->opt == IR_OPT(IR_NOT, IR_BOOL)) {
3399
0
    *swap = 1;
3400
0
    condition = condition_insn->op1;
3401
0
    condition_insn = &ctx->ir_base[condition];
3402
0
  }
3403
3404
0
  if (condition_insn->op == IR_NE && IR_IS_CONST_REF(condition_insn->op2)) {
3405
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3406
3407
0
    if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3408
0
      condition = condition_insn->op1;
3409
0
      condition_insn = &ctx->ir_base[condition];
3410
0
    }
3411
0
  } else if (condition_insn->op == IR_EQ && IR_IS_CONST_REF(condition_insn->op2)) {
3412
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3413
3414
0
    if (condition_insn->op2 == IR_TRUE) {
3415
0
      condition = condition_insn->op1;
3416
0
      condition_insn = &ctx->ir_base[condition];
3417
0
    } else if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3418
0
      condition = condition_insn->op1;
3419
0
      condition_insn = &ctx->ir_base[condition];
3420
0
      *swap = !*swap;
3421
0
    }
3422
0
  }
3423
3424
0
  while ((condition_insn->op == IR_BITCAST
3425
0
    || condition_insn->op == IR_ZEXT
3426
0
    || condition_insn->op == IR_SEXT)
3427
0
   && ctx->use_lists[condition].count == 1) {
3428
0
    condition = condition_insn->op1;
3429
0
    condition_insn = &ctx->ir_base[condition];
3430
0
  }
3431
3432
0
  if (condition_insn->op == IR_ALLOCA || condition_insn->op == IR_VADDR) {
3433
0
    return IR_TRUE;
3434
0
  }
3435
3436
0
  if (!IR_IS_CONST_REF(condition) && ctx->use_lists[condition].count > 1) {
3437
0
    condition = ir_check_dominating_predicates(ctx, control, condition);
3438
0
  }
3439
3440
0
  return condition;
3441
0
}
3442
3443
static void ir_iter_optimize_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3444
0
{
3445
0
  bool swap = 0;
3446
0
  ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3447
3448
0
  if (swap) {
3449
0
    ir_use_list *use_list = &ctx->use_lists[ref];
3450
0
    ir_ref *p, use;
3451
3452
0
    IR_ASSERT(use_list->count == 2);
3453
0
    p = ctx->use_edges + use_list->refs;
3454
0
    use = *p;
3455
0
    if (ctx->ir_base[use].op == IR_IF_TRUE) {
3456
0
      ctx->ir_base[use].op = IR_IF_FALSE;
3457
0
      use = *(p+1);
3458
0
      ctx->ir_base[use].op = IR_IF_TRUE;
3459
0
    } else {
3460
0
      ctx->ir_base[use].op = IR_IF_TRUE;
3461
0
      use = *(p+1);
3462
0
      ctx->ir_base[use].op = IR_IF_FALSE;
3463
0
    }
3464
0
  }
3465
3466
0
  if (IR_IS_CONST_REF(condition)) {
3467
    /*
3468
     *    |                        |
3469
     *    IF(TRUE)             =>  END
3470
     *    | \                      |
3471
     *    |  +------+              |
3472
     *    |         IF_TRUE        |        BEGIN(unreachable)
3473
     *    IF_FALSE  |              BEGIN
3474
     *    |                        |
3475
     */
3476
0
    ir_ref if_true_ref, if_false_ref;
3477
0
    ir_insn *if_true, *if_false;
3478
3479
0
    insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3480
0
    if (!IR_IS_CONST_REF(insn->op2)) {
3481
0
      ir_use_list_remove_one(ctx, insn->op2, ref);
3482
0
      ir_bitqueue_add(worklist, insn->op2);
3483
0
    }
3484
0
    insn->op2 = IR_UNUSED;
3485
3486
0
    ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3487
0
    if_true = &ctx->ir_base[if_true_ref];
3488
0
    if_false = &ctx->ir_base[if_false_ref];
3489
0
    if_true->op = IR_BEGIN;
3490
0
    if_false->op = IR_BEGIN;
3491
0
    if (ir_ref_is_true(ctx, condition)) {
3492
0
      if_false->op1 = IR_UNUSED;
3493
0
      ir_use_list_remove_one(ctx, ref, if_false_ref);
3494
0
      ir_bitqueue_add(worklist, if_true_ref);
3495
0
    } else {
3496
0
      if_true->op1 = IR_UNUSED;
3497
0
      ir_use_list_remove_one(ctx, ref, if_true_ref);
3498
0
      ir_bitqueue_add(worklist, if_false_ref);
3499
0
    }
3500
0
    ctx->flags2 &= ~IR_CFG_REACHABLE;
3501
0
  } else if (insn->op2 != condition) {
3502
0
    ir_iter_update_op(ctx, ref, 2, condition, worklist);
3503
0
  }
3504
0
}
3505
3506
static void ir_iter_optimize_guard(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3507
0
{
3508
0
  bool swap = 0;
3509
0
  ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3510
3511
0
  if (swap) {
3512
0
    if (insn->op == IR_GUARD) {
3513
0
      insn->op = IR_GUARD_NOT;
3514
0
    } else {
3515
0
      insn->op = IR_GUARD;
3516
0
    }
3517
0
  }
3518
3519
0
  if (IR_IS_CONST_REF(condition)) {
3520
0
    if (insn->op == IR_GUARD) {
3521
0
      if (ir_ref_is_true(ctx, condition)) {
3522
0
        ir_ref prev, next;
3523
3524
0
remove_guard:
3525
0
        prev = insn->op1;
3526
0
        next = ir_next_control(ctx, ref);
3527
0
        if (ctx->ir_base[prev].op == IR_SNAPSHOT) {
3528
0
          ir_ref snapshot = prev;
3529
0
          prev = ctx->ir_base[prev].op1;
3530
0
          ir_use_list_remove_one(ctx, snapshot, ref);
3531
0
          ir_use_list_remove_one(ctx, ref, next);
3532
0
          ir_use_list_replace_one(ctx, prev, snapshot, next);
3533
0
          ir_iter_remove_insn(ctx, snapshot, worklist);
3534
0
        } else {
3535
0
          ir_use_list_remove_one(ctx, ref, next);
3536
0
          ir_use_list_replace_one(ctx, prev, ref, next);
3537
0
        }
3538
0
        ctx->ir_base[next].op1 = prev;
3539
0
        insn->op1 = IR_UNUSED;
3540
3541
0
        if (!IR_IS_CONST_REF(insn->op2)) {
3542
0
          ir_use_list_remove_one(ctx, insn->op2, ref);
3543
0
          if (ir_is_dead(ctx, insn->op2)) {
3544
            /* schedule DCE */
3545
0
            ir_bitqueue_add(worklist, insn->op2);
3546
0
          }
3547
0
        }
3548
3549
0
        if (!IR_IS_CONST_REF(insn->op3)) {
3550
0
          ir_use_list_remove_one(ctx, insn->op3, ref);
3551
0
          if (ir_is_dead(ctx, insn->op3)) {
3552
            /* schedule DCE */
3553
0
            ir_bitqueue_add(worklist, insn->op3);
3554
0
          }
3555
0
        }
3556
3557
0
        MAKE_NOP(insn);
3558
0
        return;
3559
0
      } else {
3560
0
        condition = IR_FALSE;
3561
0
      }
3562
0
    } else {
3563
0
      if (ir_ref_is_true(ctx, condition)) {
3564
0
        condition = IR_TRUE;
3565
0
      } else {
3566
0
        goto remove_guard;
3567
0
      }
3568
0
    }
3569
0
  }
3570
3571
0
  if (insn->op2 != condition) {
3572
0
    ir_iter_update_op(ctx, ref, 2, condition, worklist);
3573
0
  }
3574
0
}
3575
3576
void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
3577
0
{
3578
0
  ir_ref i, val;
3579
0
  ir_insn *insn;
3580
3581
0
  while ((i = ir_bitqueue_pop(worklist)) >= 0) {
3582
0
    insn = &ctx->ir_base[i];
3583
0
    if (IR_IS_FOLDABLE_OP(insn->op)) {
3584
0
      if (ctx->use_lists[i].count == 0) {
3585
0
        if (insn->op == IR_PHI) {
3586
0
          ir_bitqueue_add(worklist, insn->op1);
3587
0
        }
3588
0
        ir_iter_remove_insn(ctx, i, worklist);
3589
0
      } else {
3590
0
        insn = &ctx->ir_base[i];
3591
0
        switch (insn->op) {
3592
0
          case IR_FP2FP:
3593
0
            if (insn->type == IR_FLOAT) {
3594
0
              if (ir_may_promote_d2f(ctx, insn->op1)) {
3595
0
                ir_ref ref = ir_promote_d2f(ctx, insn->op1, i, worklist);
3596
0
                insn->op1 = ref;
3597
0
                ir_iter_replace_insn(ctx, i, ref, worklist);
3598
0
                break;
3599
0
              }
3600
0
            } else {
3601
0
              if (ir_may_promote_f2d(ctx, insn->op1)) {
3602
0
                ir_ref ref = ir_promote_f2d(ctx, insn->op1, i, worklist);
3603
0
                insn->op1 = ref;
3604
0
                ir_iter_replace_insn(ctx, i, ref, worklist);
3605
0
                break;
3606
0
              }
3607
0
            }
3608
0
            goto folding;
3609
0
          case IR_FP2INT:
3610
0
            if (ctx->ir_base[insn->op1].type == IR_DOUBLE) {
3611
0
              if (ir_may_promote_d2f(ctx, insn->op1)) {
3612
0
                insn->op1 = ir_promote_d2f(ctx, insn->op1, i, worklist);
3613
0
              }
3614
0
            } else {
3615
0
              if (ir_may_promote_f2d(ctx, insn->op1)) {
3616
0
                insn->op1 = ir_promote_f2d(ctx, insn->op1, i, worklist);
3617
0
              }
3618
0
            }
3619
0
            goto folding;
3620
0
          case IR_TRUNC:
3621
0
            if (ir_may_promote_trunc(ctx, insn->type, insn->op1)) {
3622
0
              ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i, worklist);
3623
0
              insn->op1 = ref;
3624
0
              ir_iter_replace_insn(ctx, i, ref, worklist);
3625
0
              break;
3626
0
            }
3627
0
            goto folding;
3628
0
          case IR_SEXT:
3629
0
          case IR_ZEXT:
3630
0
            if (ir_try_promote_ext(ctx, i, insn, worklist)) {
3631
0
              break;
3632
0
            }
3633
0
            goto folding;
3634
0
          case IR_PHI:
3635
0
            break;
3636
0
          default:
3637
0
folding:
3638
0
            ir_iter_fold(ctx, i, worklist);
3639
0
            break;
3640
0
        }
3641
0
      }
3642
0
    } else if (ir_op_flags[insn->op] & IR_OP_FLAG_BB_START) {
3643
0
      if (!(ctx->flags & IR_OPT_CFG)) {
3644
        /* pass */
3645
0
      } else if (insn->op == IR_BEGIN) {
3646
0
        if (insn->op1
3647
0
         && !insn->op2 /* no computed goto label */
3648
0
         && ctx->use_lists[i].count == 1
3649
0
         && ctx->ir_base[insn->op1].op == IR_END) {
3650
0
          ir_merge_blocks(ctx, insn->op1, i, worklist);
3651
0
        }
3652
0
      } else if (insn->op == IR_MERGE) {
3653
0
        ir_iter_optimize_merge(ctx, i, insn, worklist);
3654
0
      } else if (insn->op == IR_LOOP_BEGIN) {
3655
0
        ir_iter_optimize_loop(ctx, i, insn, worklist);
3656
0
      }
3657
0
    } else if (ir_is_dead_load(ctx, i)) {
3658
0
      ir_ref next;
3659
3660
      /* remove LOAD from double linked control list */
3661
0
remove_mem_insn:
3662
0
      next = ctx->use_edges[ctx->use_lists[i].refs];
3663
0
      IR_ASSERT(ctx->use_lists[i].count == 1);
3664
0
      ctx->ir_base[next].op1 = insn->op1;
3665
0
      ir_use_list_replace_one(ctx, insn->op1, i, next);
3666
0
      insn->op1 = IR_UNUSED;
3667
0
      ir_iter_remove_insn(ctx, i, worklist);
3668
0
    } else if (insn->op == IR_LOAD) {
3669
0
      val = ir_find_aliasing_load(ctx, insn->op1, insn->type, insn->op2);
3670
0
      if (val) {
3671
0
        ir_insn *val_insn;
3672
0
        ir_ref prev, next;
3673
3674
0
remove_aliased_load:
3675
0
        prev = insn->op1;
3676
0
        next = ir_next_control(ctx, i);
3677
0
        ctx->ir_base[next].op1 = prev;
3678
0
        ir_use_list_remove_one(ctx, i, next);
3679
0
        ir_use_list_replace_one(ctx, prev, i, next);
3680
0
        insn->op1 = IR_UNUSED;
3681
3682
0
        val_insn = &ctx->ir_base[val];
3683
0
        if (val_insn->type == insn->type) {
3684
0
          ir_iter_replace_insn(ctx, i, val, worklist);
3685
0
        } else {
3686
0
          if (!IR_IS_CONST_REF(insn->op2)) {
3687
0
            ir_use_list_remove_one(ctx, insn->op2, i);
3688
0
            if (ir_is_dead(ctx, insn->op2)) {
3689
              /* schedule DCE */
3690
0
              ir_bitqueue_add(worklist, insn->op2);
3691
0
            }
3692
0
          }
3693
0
          if (!IR_IS_CONST_REF(val)) {
3694
0
            ir_use_list_add(ctx, val, i);
3695
0
          }
3696
0
          if (ir_type_size[val_insn->type] == ir_type_size[insn->type]) {
3697
            /* load forwarding with bitcast (L2L) */
3698
0
            insn->optx = IR_OPTX(IR_BITCAST, insn->type, 1);
3699
0
          } else {
3700
            /* partial load forwarding (L2L) */
3701
0
            insn->optx = IR_OPTX(IR_TRUNC, insn->type, 1);
3702
0
          }
3703
0
          insn->op1 = val;
3704
0
          insn->op2 = IR_UNUSED;
3705
0
          ir_bitqueue_add(worklist, i);
3706
0
          ir_iter_add_uses(ctx, i, worklist);
3707
0
        }
3708
0
      }
3709
0
    } else if (insn->op == IR_STORE) {
3710
0
      if (ir_find_aliasing_store(ctx, insn->op1, insn->op2, insn->op3)) {
3711
0
        goto remove_mem_insn;
3712
0
      } else {
3713
0
        ir_insn *val_insn;
3714
3715
0
remove_bitcast:
3716
0
        val = insn->op3;
3717
0
        val_insn = &ctx->ir_base[val];
3718
0
        if (val_insn->op == IR_BITCAST
3719
0
         && ir_type_size[val_insn->type] == ir_type_size[ctx->ir_base[val_insn->op1].type]) {
3720
0
          insn->op3 = val_insn->op1;
3721
0
          ir_use_list_remove_one(ctx, val, i);
3722
0
          if (ctx->use_lists[val].count == 0) {
3723
0
            if (!IR_IS_CONST_REF(val_insn->op1)) {
3724
0
              ir_use_list_replace_one(ctx, val_insn->op1, val, i);
3725
0
            }
3726
0
            ir_iter_remove_insn(ctx, val, worklist);
3727
0
          } else {
3728
0
            if (!IR_IS_CONST_REF(val_insn->op1)) {
3729
0
              ir_use_list_add(ctx, val_insn->op1, i);
3730
0
            }
3731
0
          }
3732
0
        }
3733
0
      }
3734
0
    } else if (insn->op == IR_VLOAD) {
3735
0
      val = ir_find_aliasing_vload(ctx, insn->op1, insn->type, insn->op2);
3736
0
      if (val) {
3737
0
        goto remove_aliased_load;
3738
0
      }
3739
0
    } else if (insn->op == IR_VSTORE) {
3740
0
      if (ir_find_aliasing_vstore(ctx, insn->op1, insn->op2, insn->op3)) {
3741
0
        goto remove_mem_insn;
3742
0
      } else {
3743
0
        goto remove_bitcast;
3744
0
      }
3745
0
    } else if (insn->op == IR_IF) {
3746
0
      ir_iter_optimize_if(ctx, i, insn, worklist);
3747
0
    } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
3748
0
      ir_iter_optimize_guard(ctx, i, insn, worklist);
3749
0
    }
3750
0
  }
3751
0
}
3752
3753
int ir_sccp(ir_ctx *ctx)
3754
0
{
3755
0
  ir_bitqueue sccp_worklist, iter_worklist;
3756
0
  ir_sccp_val *_values;
3757
3758
0
  ir_bitqueue_init(&iter_worklist, ctx->insns_count);
3759
0
  ir_bitqueue_init(&sccp_worklist, ctx->insns_count);
3760
0
  _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_sccp_val));
3761
3762
0
  ctx->flags2 |= IR_OPT_IN_SCCP;
3763
0
  ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist);
3764
0
  ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist);
3765
0
  ctx->flags2 &= ~IR_OPT_IN_SCCP;
3766
3767
0
  ir_mem_free(_values);
3768
0
  ir_bitqueue_free(&sccp_worklist);
3769
3770
0
  ctx->flags2 |= IR_CFG_REACHABLE;
3771
3772
0
  ir_iter_opt(ctx, &iter_worklist);
3773
3774
0
  ir_bitqueue_free(&iter_worklist);
3775
3776
0
  return 1;
3777
0
}