Coverage Report

Created: 2026-06-13 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/opcache/jit/ir/ir_sccp.c
Line
Count
Source
1
/*
2
 * IR - Lightweight JIT Compilation Framework
3
 * (SCCP - Sparse Conditional Constant Propagation)
4
 * Copyright (C) 2022 Zend by Perforce.
5
 * Authors: Dmitry Stogov <dmitry@php.net>
6
 *
7
 * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication
8
 * See:  M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches"
9
 * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991
10
 */
11
12
#include "ir.h"
13
#include "ir_private.h"
14
15
#include <math.h>
16
17
#define IR_COMBO_COPY_PROPAGATION 1
18
19
0
#define IR_TOP                  IR_UNUSED
20
0
#define IR_BOTTOM               IR_LAST_OP
21
22
0
#define IR_MAKE_BOTTOM(ref)     do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
23
24
0
#define IR_IS_TOP(ref)          (ref >= 0 && _values[ref].op == IR_TOP)
25
0
#define IR_IS_BOTTOM(ref)       (ref >= 0 && _values[ref].op == IR_BOTTOM)
26
0
#define IR_IS_REACHABLE(ref)    _ir_is_reachable_ctrl(ctx, _values, ref)
27
0
#define IR_IS_CONST(ref)        (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op))
28
29
typedef struct {
30
  union {
31
    struct {
32
      IR_STRUCT_LOHI(
33
        union {
34
          IR_STRUCT_LOHI(
35
            union {
36
              IR_STRUCT_LOHI(
37
                uint8_t        op,   /* [IR_TOP - unreachable, IR_BOTTOM - reachable} for control */
38
                                     /* {IR_TOP | IR_COPY() | IR_CONST() | IR_BOTTOM} for data */
39
                                     /* {IR_TOP | IR_MERGE() | IR_BOTTOM} for IR_MERGE */
40
                                     /* {IR_TOP | IR_IF() | IR_BOTTOM} for IR_IF and IR_SWITCH */
41
                uint8_t        type
42
              );
43
              uint16_t           opt;
44
            },
45
            uint16_t               _space_1
46
          );
47
          uint32_t                   optx;
48
        },
49
        union {
50
          ir_ref                     copy;              /* identity for IR_COPY */
51
          ir_ref                     unfeasible_inputs; /* number of unfeasible inputs for IR_MERGE */
52
          ir_ref                     single_output;     /* reachable output for IR_IF */
53
          ir_ref                     visited;           /* for IR_TOP */
54
        }
55
      );
56
      union {
57
        struct {
58
          ir_ref                     next; /* double-linked identities list for IR_COPY */
59
          ir_ref                     prev; /* double-linked identities list for IR_COPY */
60
        };
61
        ir_val                         val;  /* constant value for IR_CONST */
62
      };
63
    };
64
    ir_insn                                insn; /* constant insn for IR_CONST */
65
  };
66
} ir_sccp_val;
67
68
IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref)
69
0
{
70
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
71
0
  IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL);
72
0
  return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */
73
0
}
74
75
IR_ALWAYS_INLINE void ir_sccp_add_uses(const ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
76
0
{
77
0
  const ir_use_list *use_list;
78
0
  const ir_ref *p;
79
0
  ir_ref n, use;
80
81
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
82
0
  use_list = &ctx->use_lists[ref];
83
0
  n = use_list->count;
84
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
85
0
    use = *p;
86
0
    if (_values[use].op != IR_BOTTOM) {
87
0
      ir_bitqueue_add(worklist, use);
88
0
    }
89
0
  }
90
0
}
91
92
IR_ALWAYS_INLINE void ir_sccp_add_input(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
93
0
{
94
0
  IR_ASSERT(!IR_IS_CONST_REF(ref));
95
0
  IR_ASSERT(_values[ref].op == IR_TOP);
96
  /* do backward propagaton only once */
97
0
  if (!_values[ref].visited) {
98
0
    _values[ref].visited = 1;
99
0
    ir_bitqueue_add(worklist, ref);
100
0
  }
101
0
}
102
103
#if IR_COMBO_COPY_PROPAGATION
104
IR_ALWAYS_INLINE ir_ref ir_sccp_identity(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
105
0
{
106
0
  if (a > 0 && _values[a].op == IR_COPY) {
107
0
    do {
108
0
      a = _values[a].copy;
109
0
      IR_ASSERT(a > 0);
110
0
    } while (_values[a].op == IR_COPY);
111
0
    IR_ASSERT(_values[a].op == IR_BOTTOM);
112
0
  }
113
0
  return a;
114
0
}
115
116
#if 0
117
static void CHECK_LIST(ir_sccp_val *_values, ir_ref ref)
118
{
119
  ir_ref member = _values[ref].op2;
120
  while (member != ref) {
121
    IR_ASSERT(_values[_values[member].op2].op3 == member);
122
    member = _values[member].op2;
123
  }
124
  IR_ASSERT(_values[_values[ref].op2].op3 == ref);
125
}
126
#else
127
# define CHECK_LIST(_values, ref)
128
#endif
129
130
static void ir_sccp_add_identity(const ir_ctx *ctx, ir_sccp_val *_values, ir_ref src, ir_ref dst)
131
0
{
132
0
  IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY);
133
0
  IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY)));
134
0
  IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst);
135
136
0
  _values[dst].optx = IR_COPY;
137
0
  _values[dst].copy = src;
138
139
0
  if (_values[src].op == IR_BOTTOM) {
140
    /* initialize empty double-linked list */
141
0
    if (_values[src].copy != src) {
142
0
      _values[src].copy = src;
143
0
      _values[src].next = src;
144
0
      _values[src].prev = src;
145
0
    }
146
0
  } else {
147
0
    src = ir_sccp_identity(ctx, _values, src);
148
0
  }
149
150
  /* insert into circular double-linked list */
151
0
  ir_ref prev = _values[src].prev;
152
0
  _values[dst].next = src;
153
0
  _values[dst].prev = prev;
154
0
  _values[src].prev = dst;
155
0
  _values[prev].next = dst;
156
0
  CHECK_LIST(_values, dst);
157
0
}
158
159
static void ir_sccp_split_partition(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
160
0
{
161
0
  ir_ref member, head, tail, next, prev;
162
163
0
  CHECK_LIST(_values, ref);
164
0
  IR_MAKE_BOTTOM(ref);
165
0
  _values[ref].copy = ref;
166
167
0
  member = _values[ref].next;
168
0
  head = tail = IR_UNUSED;
169
0
  while (member != ref) {
170
0
    if (_values[member].op != IR_BOTTOM) {
171
0
      ir_bitqueue_add(worklist, member);
172
0
    }
173
0
    ir_sccp_add_uses(ctx, _values, worklist, member);
174
175
0
    next = _values[member].next;
176
0
    if (ir_sccp_identity(ctx, _values, member) == ref) {
177
      /* remove "member" from the old circular double-linked list */
178
0
      prev = _values[member].prev;
179
0
      _values[prev].next = next;
180
0
      _values[next].prev = prev;
181
182
      /* insert "member" into the new double-linked list */
183
0
      if (!head) {
184
0
        head = tail = member;
185
0
      } else {
186
0
        _values[tail].next = member;
187
0
        _values[member].prev = tail;
188
0
        tail = member;
189
0
      }
190
0
    }
191
0
    member = next;
192
0
  }
193
194
  /* remove "ref" from the old circular double-linked list */
195
0
  next = _values[ref].next;
196
0
  prev = _values[ref].prev;
197
0
  _values[prev].next = next;
198
0
  _values[next].prev = prev;
199
0
  CHECK_LIST(_values, next);
200
201
  /* close the new circle */
202
0
  if (head) {
203
0
    _values[ref].next = head;
204
0
    _values[ref].prev = tail;
205
0
    _values[tail].next = ref;
206
0
    _values[head].prev = ref;
207
0
  } else {
208
0
    _values[ref].next = ref;
209
0
    _values[ref].prev = ref;
210
0
  }
211
0
  CHECK_LIST(_values, ref);
212
0
}
213
214
IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref)
215
0
{
216
0
  if (_values[ref].op == IR_COPY) {
217
0
    ir_sccp_split_partition(ctx, _values, worklist, ref);
218
0
  } else {
219
0
    IR_MAKE_BOTTOM(ref);
220
0
  }
221
0
}
222
223
0
# define IR_MAKE_BOTTOM_EX(ref) ir_sccp_make_bottom_ex(ctx, _values, worklist, ref)
224
#else
225
# define ir_sccp_identity(_ctx, _values, ref) (ref)
226
# define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref)
227
#endif
228
229
IR_ALWAYS_INLINE bool ir_sccp_meet_const(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *val_insn)
230
0
{
231
0
  IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op));
232
233
0
  if (_values[ref].op == IR_TOP) {
234
    /* TOP meet NEW_CONST => NEW_CONST */
235
0
    _values[ref].optx = val_insn->opt;
236
0
    _values[ref].val.u64 = val_insn->val.u64;
237
0
    return 1;
238
0
  } else if (_values[ref].opt == val_insn->opt) {
239
    /* OLD_CONST meet NEW_CONST => (OLD_CONST == NEW_CONST) ? OLD_CONST : BOTTOM */
240
0
    if (_values[ref].val.u64 == val_insn->val.u64) {
241
0
      return 0;
242
0
    }
243
0
  }
244
245
0
  IR_MAKE_BOTTOM_EX(ref);
246
0
  return 1;
247
0
}
248
249
IR_ALWAYS_INLINE bool ir_sccp_meet_copy(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
250
0
{
251
0
#if IR_COMBO_COPY_PROPAGATION
252
0
  if (_values[ref].op == IR_COPY) {
253
    /* COPY(OLD_VAL) meet COPY(NEW_VAL) =>
254
     *   (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */
255
0
    if (ir_sccp_identity(ctx, _values, ref) == ir_sccp_identity(ctx, _values, val)) {
256
0
      return 0; /* not changed */
257
0
    }
258
0
    ir_sccp_split_partition(ctx, _values, worklist, ref);
259
0
    return 1;
260
0
  } else {
261
0
    IR_ASSERT(_values[ref].op != IR_BOTTOM);
262
    /* TOP       meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
263
    /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */
264
0
    ir_sccp_add_identity(ctx, _values, val, ref);
265
0
    return 1;
266
0
  }
267
0
#endif
268
0
  IR_MAKE_BOTTOM(ref);
269
0
  return 1;
270
0
}
271
272
#if 0
273
IR_ALWAYS_INLINE bool ir_sccp_meet(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val)
274
{
275
  const ir_insn *val_insn;
276
277
  if (IR_IS_CONST_REF(val)) {
278
    val_insn = &ctx->ir_base[val];
279
  } else {
280
    val_insn = &_values[val].insn;
281
282
    if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) {
283
      return ir_sccp_meet_copy(ctx, _values, worklist, ref, val);
284
    }
285
  }
286
287
  return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn);
288
}
289
#endif
290
291
static ir_ref ir_sccp_fold(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref ref, const ir_insn *insn)
292
0
{
293
0
  const ir_insn *op1_insn, *op2_insn, *op3_insn;
294
0
  ir_ref op1, op2, op3, copy;
295
0
  uint32_t opt = insn->opt;
296
297
0
  op1 = ir_sccp_identity(ctx, _values, insn->op1);
298
0
  op2 = ir_sccp_identity(ctx, _values, insn->op2);
299
0
  op3 = ir_sccp_identity(ctx, _values, insn->op3);
300
301
0
restart:
302
0
  op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? &_values[op1].insn : ctx->ir_base + op1;
303
0
  op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? &_values[op2].insn : ctx->ir_base + op2;
304
0
  op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? &_values[op3].insn : ctx->ir_base + op3;
305
306
0
  switch (ir_folding((ir_ctx*)ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
307
0
    case IR_FOLD_DO_RESTART:
308
0
      opt = ctx->fold_insn.optx;
309
0
      op1 = ctx->fold_insn.op1;
310
0
      op2 = ctx->fold_insn.op2;
311
0
      op3 = ctx->fold_insn.op3;
312
0
      goto restart;
313
0
    case IR_FOLD_DO_CSE:
314
0
    case IR_FOLD_DO_EMIT:
315
0
      IR_MAKE_BOTTOM_EX(ref);
316
0
      return 1;
317
0
    case IR_FOLD_DO_COPY:
318
0
      copy = ctx->fold_insn.op1;
319
0
      if (IR_IS_CONST_REF(copy)) {
320
0
        insn = &ctx->ir_base[copy];
321
0
      } else {
322
0
        insn = &_values[copy].insn;
323
0
        if (!IR_IS_CONST_OP(insn->op) && !IR_IS_SYM_CONST(insn->op)) {
324
0
          return ir_sccp_meet_copy(ctx, _values, worklist, ref, copy);
325
0
        }
326
0
        }
327
0
      goto meet_const;
328
0
    case IR_FOLD_DO_CONST:
329
0
      insn = &ctx->fold_insn;
330
0
meet_const:
331
0
      return ir_sccp_meet_const(ctx, _values, worklist, ref, insn);
332
0
    default:
333
0
      IR_ASSERT(0);
334
0
      return 0;
335
0
  }
336
0
}
337
338
static bool ir_sccp_analyze_phi(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_ref i, const ir_insn *insn)
339
0
{
340
0
  ir_ref j, n, input;
341
0
  const ir_ref *merge_input, *p;
342
0
  const ir_insn *v, *new_const = NULL;
343
0
#if IR_COMBO_COPY_PROPAGATION
344
0
  ir_ref new_copy = IR_UNUSED;
345
0
  ir_ref new_copy_identity = IR_UNUSED;
346
0
  ir_ref phi_identity = ir_sccp_identity(ctx, _values, i);
347
0
#endif
348
349
0
  if (!IR_IS_REACHABLE(insn->op1)) {
350
0
    return 0;
351
0
  }
352
0
  n = insn->inputs_count;
353
0
  if (n > 3 && _values[i].op == IR_TOP) {
354
0
    for (j = 0; j < (n>>2); j++) {
355
0
      _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
356
0
    }
357
0
  }
358
359
0
  p = insn->ops + 2;
360
0
  merge_input = ctx->ir_base[insn->op1].ops + 1;
361
0
  for (; --n > 0; p++, merge_input++) {
362
0
    IR_ASSERT(*merge_input > 0);
363
0
    if (!IR_IS_REACHABLE(*merge_input)) {
364
0
      continue;
365
0
    }
366
367
0
    input = *p;
368
0
    if (IR_IS_CONST_REF(input)) {
369
0
      v = &ctx->ir_base[input];
370
0
    } else if (input == i) {
371
0
      continue;
372
0
    } else {
373
0
      v = &_values[input].insn;
374
0
      if (v->op == IR_TOP) {
375
0
        ir_sccp_add_input(ctx, _values, worklist, input);
376
0
        continue;
377
0
#if IR_COMBO_COPY_PROPAGATION
378
0
      } else if (v->op == IR_COPY) {
379
0
        input = v->op1;
380
0
        new_copy_identity = ir_sccp_identity(ctx, _values, input);
381
0
        if (new_copy_identity == phi_identity) {
382
0
          new_copy_identity = IR_UNUSED;
383
0
          continue;
384
0
        }
385
0
        new_copy = input;
386
0
        goto next;
387
0
#endif
388
0
      } else if (v->op == IR_BOTTOM) {
389
0
#if IR_COMBO_COPY_PROPAGATION
390
0
        if (input == phi_identity) {
391
0
          continue;
392
0
        }
393
0
        new_copy = new_copy_identity = input;
394
0
        goto next;
395
#else
396
        goto make_bottom;
397
#endif
398
0
      }
399
0
    }
400
0
    new_const = v;
401
0
    goto next;
402
0
  }
403
404
0
  return 0;
405
406
0
next:
407
0
  p++;
408
0
  merge_input++;
409
  /* for all live merge inputs */
410
0
  for (; --n > 0; p++, merge_input++) {
411
0
    IR_ASSERT(*merge_input > 0);
412
0
    if (!IR_IS_REACHABLE(*merge_input)) {
413
0
      continue;
414
0
    }
415
416
0
    input = *p;
417
0
    if (IR_IS_CONST_REF(input)) {
418
0
#if IR_COMBO_COPY_PROPAGATION
419
0
      if (new_copy) {
420
0
        goto make_bottom;
421
0
      }
422
0
#endif
423
0
      v = &ctx->ir_base[input];
424
0
    } else if (input == i) {
425
0
      continue;
426
0
    } else {
427
0
      v = &_values[input].insn;
428
0
      if (v->op == IR_TOP) {
429
0
        ir_sccp_add_input(ctx, _values, worklist, input);
430
0
        continue;
431
0
#if IR_COMBO_COPY_PROPAGATION
432
0
      } else if (v->op == IR_COPY) {
433
0
        ir_ref identity = ir_sccp_identity(ctx, _values, v->op1);
434
435
0
        if (identity == phi_identity || identity == new_copy_identity) {
436
0
          continue;
437
0
        }
438
0
        goto make_bottom;
439
0
#endif
440
0
      } else if (v->op == IR_BOTTOM) {
441
0
#if IR_COMBO_COPY_PROPAGATION
442
0
        if (input  == phi_identity || input == new_copy_identity) {
443
0
          continue;
444
0
        }
445
0
#endif
446
0
        goto make_bottom;
447
0
      }
448
0
    }
449
0
    if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) {
450
0
      goto make_bottom;
451
0
    }
452
0
  }
453
454
0
#if IR_COMBO_COPY_PROPAGATION
455
0
  if (new_copy) {
456
0
    IR_ASSERT(!IR_IS_CONST_REF(new_copy));
457
0
    IR_ASSERT(!IR_IS_CONST_OP(_values[new_copy].op) && !IR_IS_SYM_CONST(_values[new_copy].op));
458
0
    return ir_sccp_meet_copy(ctx, _values, worklist, i, new_copy);
459
0
  }
460
0
#endif
461
462
0
  return ir_sccp_meet_const(ctx, _values, worklist, i, new_const);
463
464
0
make_bottom:
465
0
  IR_MAKE_BOTTOM_EX(i);
466
0
  return 1;
467
0
}
468
469
static bool ir_is_dead_load_ex(const ir_ctx *ctx, ir_ref ref, uint32_t flags, const ir_insn *insn)
470
0
{
471
0
  if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
472
0
    return ctx->use_lists[ref].count == 1;
473
0
  } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) {
474
0
    return ctx->use_lists[ref].count == 1;
475
0
  }
476
0
  return 0;
477
0
}
478
479
static bool ir_is_dead_load(const ir_ctx *ctx, ir_ref ref)
480
0
{
481
0
  if (ctx->use_lists[ref].count == 1) {
482
0
    const ir_insn *insn = &ctx->ir_base[ref];
483
0
    uint32_t flags = ir_op_flags[insn->op];
484
485
0
    if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
486
0
      return 1;
487
0
    } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) {
488
0
      return 1;
489
0
    }
490
0
  }
491
0
  return 0;
492
0
}
493
494
static bool ir_is_dead(const ir_ctx *ctx, ir_ref ref)
495
0
{
496
0
  if (ctx->use_lists[ref].count == 0) {
497
0
    return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
498
0
  } else {
499
0
    return ir_is_dead_load(ctx, ref);
500
0
  }
501
0
  return 0;
502
0
}
503
504
static bool ir_sccp_is_true(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a)
505
0
{
506
0
  const ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
507
508
0
  return ir_const_is_true(v);
509
0
}
510
511
static bool ir_sccp_is_equal(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b)
512
0
{
513
0
  const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
514
0
  const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
515
516
0
  IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
517
0
  IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
518
0
  return v1->val.u64 == v2->val.u64;
519
0
}
520
521
static bool ir_sccp_in_range(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref a, ir_ref b, ir_ref c)
522
0
{
523
0
  const ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a].insn;
524
0
  const ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b].insn;
525
0
  const ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c].insn;
526
527
0
  IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
528
0
  IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
529
0
  IR_ASSERT(!IR_IS_SYM_CONST(v3->op));
530
0
  if (IR_IS_TYPE_SIGNED(v1->type)) {
531
0
    return v1->val.i64 >= v2->val.i64 && v1->val.i64 <= v3->val.i64;
532
0
  } else {
533
0
    return v1->val.u64 >= v2->val.u64 && v1->val.u64 <= v3->val.u64;
534
0
    }
535
0
}
536
537
#ifdef IR_SCCP_TRACE
538
static void ir_sccp_trace_val(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
539
{
540
  if (IR_IS_BOTTOM(i)) {
541
    fprintf(stderr, "BOTTOM");
542
  } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
543
    fprintf(stderr, "CONST(");
544
    ir_print_const(ctx, &_values[i].insn, stderr, true);
545
    fprintf(stderr, ")");
546
#if IR_COMBO_COPY_PROPAGATION
547
  } else if (_values[i].op == IR_COPY) {
548
    fprintf(stderr, "COPY(%d)", _values[i].op1);
549
#endif
550
  } else if (IR_IS_TOP(i)) {
551
    fprintf(stderr, "TOP");
552
  } else if (_values[i].op == IR_IF) {
553
    fprintf(stderr, "IF(%d)", _values[i].op1);
554
  } else if (_values[i].op == IR_MERGE) {
555
    fprintf(stderr, "MERGE(%d)", _values[i].op1);
556
  } else {
557
    fprintf(stderr, "%d", _values[i].op);
558
  }
559
}
560
561
static void ir_sccp_trace_start(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
562
{
563
  fprintf(stderr, "%d. ", i);
564
  ir_sccp_trace_val(ctx, _values, i);
565
}
566
567
static void ir_sccp_trace_end(const ir_ctx *ctx, const ir_sccp_val *_values, ir_ref i)
568
{
569
  fprintf(stderr, " -> ");
570
  ir_sccp_trace_val(ctx, _values, i);
571
  fprintf(stderr, "\n");
572
}
573
#else
574
# define ir_sccp_trace_start(c, v, i)
575
# define ir_sccp_trace_end(c, v, i)
576
#endif
577
578
static IR_NEVER_INLINE void ir_sccp_analyze(const ir_ctx *ctx, ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
579
0
{
580
0
  ir_ref i, j, n, use;
581
0
  const ir_ref *p;
582
0
  const ir_use_list *use_list;
583
0
  const ir_insn *insn, *use_insn;
584
0
  uint32_t flags;
585
586
  /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
587
0
  worklist->pos = 0;
588
0
  ir_bitset_incl(worklist->set, 1);
589
0
  for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) {
590
0
    IR_ASSERT(_values[i].op != IR_BOTTOM);
591
0
    ir_sccp_trace_start(ctx, _values, i);
592
0
    insn = &ctx->ir_base[i];
593
0
    flags = ir_op_flags[insn->op];
594
0
    if (flags & IR_OP_FLAG_DATA) {
595
0
      if (ctx->use_lists[i].count == 0) {
596
        /* dead code */
597
0
        continue;
598
0
      } else if (insn->op == IR_PHI) {
599
0
        if (!ir_sccp_analyze_phi(ctx, _values, worklist, i, insn)) {
600
0
          continue;
601
0
        }
602
0
      } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) {
603
0
        bool may_benefit = 0;
604
0
        bool has_top = 0;
605
606
0
        if (_values[i].op != IR_TOP || insn->op == IR_COPY) {
607
0
          may_benefit = 1;
608
0
        }
609
610
0
        IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
611
0
        n = IR_INPUT_EDGES_COUNT(flags);
612
0
        if (insn->op == IR_DIV || insn->op == IR_MOD) {
613
          /* skip data-control guard edge */
614
0
          n--;
615
0
        }
616
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
617
0
          ir_ref input = *p;
618
0
          if (input > 0) {
619
0
            if (_values[input].op == IR_TOP) {
620
0
              has_top = 1;
621
0
              ir_sccp_add_input(ctx, _values, worklist, input);
622
0
            } else if (_values[input].op != IR_BOTTOM) {
623
              /* Perform folding only if some of direct inputs
624
               * is going to be replaced by a constant or copy.
625
               * This approach may miss some folding optimizations
626
               * dependent on indirect inputs. e.g. reassociation.
627
               */
628
0
              may_benefit = 1;
629
0
            }
630
0
          }
631
0
        }
632
0
        if (has_top) {
633
0
          continue;
634
0
        }
635
0
        if (!may_benefit) {
636
0
          IR_MAKE_BOTTOM_EX(i);
637
0
          if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
638
0
            ir_bitqueue_add(iter_worklist, i);
639
0
          }
640
0
        } else if (!ir_sccp_fold(ctx, _values, worklist, i, insn)) {
641
          /* not changed */
642
0
          continue;
643
0
        } else if (_values[i].op == IR_BOTTOM) {
644
0
          insn = &ctx->ir_base[i];
645
0
          if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) {
646
0
            ir_bitqueue_add(iter_worklist, i);
647
0
          }
648
0
        }
649
0
      } else {
650
0
        IR_MAKE_BOTTOM_EX(i);
651
0
        n = IR_INPUT_EDGES_COUNT(flags);
652
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
653
0
          ir_ref input = *p;
654
0
          if (input > 0) {
655
0
            if (_values[input].op == IR_TOP) {
656
0
              ir_sccp_add_input(ctx, _values, worklist, input);
657
0
            }
658
0
          }
659
0
        }
660
0
      }
661
0
    } else if (flags & IR_OP_FLAG_BB_START) {
662
0
      if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_BEGIN) {
663
0
        ir_bitqueue_add(iter_worklist, i);
664
0
      }
665
0
      if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
666
0
        ir_ref unfeasible_inputs = 0;
667
668
0
        n = insn->inputs_count;
669
0
        if (n > 3 && _values[i].op == IR_TOP) {
670
0
          for (j = 0; j < (n>>2); j++) {
671
0
            _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
672
0
          }
673
0
        }
674
0
        for (p = insn->ops + 1; n > 0; p++, n--) {
675
0
          const ir_ref input = *p;
676
0
          IR_ASSERT(input > 0);
677
0
          if (!IR_IS_REACHABLE(input)) {
678
0
            unfeasible_inputs++;
679
0
          }
680
0
        }
681
0
        if (unfeasible_inputs == 0) {
682
0
          IR_MAKE_BOTTOM(i);
683
0
        } else if (_values[i].op != IR_MERGE || _values[i].unfeasible_inputs != unfeasible_inputs) {
684
0
          _values[i].optx = IR_MERGE;
685
0
          _values[i].unfeasible_inputs = unfeasible_inputs;
686
0
        } else {
687
0
          continue;
688
0
        }
689
0
        if (ctx->flags2 & IR_MEM2SSA_VARS) {
690
          /* MEM2SSA puts new PHI at the bottom, but we like to process them now */
691
0
          use_list = &ctx->use_lists[i];
692
0
          n = use_list->count;
693
0
          for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
694
0
            use = *p;
695
0
            if (_values[use].op != IR_BOTTOM) {
696
0
              if (ctx->ir_base[use].op == IR_PHI) {
697
0
                ir_bitqueue_del(worklist, use);
698
0
                if (ctx->use_lists[use].count != 0) {
699
0
                  if (ir_sccp_analyze_phi(ctx, _values, worklist, use, &ctx->ir_base[use])) {
700
0
                    ir_sccp_add_uses(ctx, _values, worklist, use);
701
0
                  }
702
0
                }
703
0
              } else {
704
0
                ir_bitqueue_add(worklist, use);
705
0
              }
706
0
            }
707
0
          }
708
0
          continue;
709
0
        }
710
0
      } else {
711
0
        IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1));
712
0
        IR_MAKE_BOTTOM(i);
713
0
      }
714
0
    } else {
715
0
      IR_ASSERT(insn->op1 > 0);
716
0
      if (!IR_IS_REACHABLE(insn->op1)) {
717
        /* control inpt is not feasible */
718
0
        continue;
719
0
      }
720
0
      if (insn->op == IR_IF) {
721
0
        if (IR_IS_TOP(insn->op2)) {
722
0
          ir_sccp_add_input(ctx, _values, worklist, insn->op2);
723
0
          continue;
724
0
        }
725
0
        if (IR_IS_CONST(insn->op2)) {
726
0
          bool b = ir_sccp_is_true(ctx, _values, insn->op2);
727
0
          use_list = &ctx->use_lists[i];
728
0
          IR_ASSERT(use_list->count == 2);
729
0
          p = &ctx->use_edges[use_list->refs];
730
0
          use = *p;
731
0
          use_insn = &ctx->ir_base[use];
732
0
          IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE);
733
0
          if ((use_insn->op == IR_IF_TRUE) != b) {
734
0
            use = *(p+1);
735
0
            IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE);
736
0
          }
737
0
          if (_values[i].op == IR_TOP) {
738
0
            _values[i].optx = IR_IF;
739
0
            _values[i].single_output = use;
740
0
            ir_bitqueue_add(worklist, use);
741
0
            continue;
742
0
          } else if (_values[i].op == IR_IF && _values[i].single_output == use) {
743
0
            continue;
744
0
          }
745
0
        }
746
0
        IR_MAKE_BOTTOM(i);
747
0
        ir_bitqueue_add(iter_worklist, i);
748
0
      } else if (insn->op == IR_SWITCH) {
749
0
        if (IR_IS_TOP(insn->op2)) {
750
0
          ir_sccp_add_input(ctx, _values, worklist, insn->op2);
751
0
          continue;
752
0
        }
753
0
        if (IR_IS_CONST(insn->op2)) {
754
0
          ir_ref use_case = IR_UNUSED;
755
756
0
          use_list = &ctx->use_lists[i];
757
0
          n = use_list->count;
758
0
          for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
759
0
            use = *p;
760
0
            IR_ASSERT(use > 0);
761
0
            use_insn = &ctx->ir_base[use];
762
0
            if (use_insn->op == IR_CASE_VAL) {
763
0
              if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) {
764
0
                use_case = use;
765
0
                break;
766
0
              }
767
0
            } else if (use_insn->op == IR_CASE_DEFAULT) {
768
0
              use_case = use;
769
0
            } else if (use_insn->op == IR_CASE_RANGE) {
770
0
              if (ir_sccp_in_range(ctx, _values, insn->op2, use_insn->op2, use_insn->op3)) {
771
0
                use_case = use;
772
0
                break;
773
0
              }
774
0
            }
775
0
          }
776
0
          if (use_case) {
777
0
            use_insn = &ctx->ir_base[use_case];
778
0
            if (_values[i].op == IR_TOP) {
779
0
              _values[i].optx = IR_IF;
780
0
              _values[i].single_output = use_case;
781
0
              ir_bitqueue_add(worklist, use_case);
782
0
              continue;
783
0
            } else if (_values[i].op == IR_IF || _values[i].single_output == use_case) {
784
0
              continue;
785
0
            }
786
0
          }
787
0
        }
788
0
        IR_MAKE_BOTTOM(i);
789
0
      } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) {
790
        /* schedule dead load elimination */
791
0
        ir_bitqueue_add(iter_worklist, i);
792
0
        IR_MAKE_BOTTOM(i);
793
0
      } else {
794
        /* control, call, load and store instructions may have unprocessed inputs */
795
0
        n = IR_INPUT_EDGES_COUNT(flags);
796
0
        if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) {
797
0
          for (j = 0; j < (n>>2); j++) {
798
0
            _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
799
0
          }
800
0
          for (j = 2, p = insn->ops + j; j <= n; j++, p++) {
801
0
            IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
802
0
            use = *p;
803
0
            if (use > 0 && _values[use].op == IR_TOP) {
804
0
              ir_sccp_add_input(ctx, _values, worklist, use);
805
0
            }
806
0
          }
807
0
        } else if (n >= 2) {
808
0
          IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA);
809
0
          use = insn->op2;
810
0
          if (use > 0 && _values[use].op == IR_TOP) {
811
0
            ir_sccp_add_input(ctx, _values, worklist, use);
812
0
          }
813
0
          if (n > 2) {
814
0
            IR_ASSERT(n == 3);
815
0
            IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA);
816
0
            use = insn->op3;
817
0
            if (use > 0 && _values[use].op == IR_TOP) {
818
0
              ir_sccp_add_input(ctx, _values, worklist, use);
819
0
            }
820
0
          }
821
0
        }
822
0
        IR_MAKE_BOTTOM(i);
823
0
      }
824
0
    }
825
0
    ir_sccp_add_uses(ctx, _values, worklist, i);
826
0
  }
827
828
0
#ifdef IR_DEBUG
829
0
  if (ctx->flags & IR_DEBUG_SCCP) {
830
0
    for (i = 1; i < ctx->insns_count; i++) {
831
0
      if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
832
0
        fprintf(stderr, "%d. CONST(", i);
833
0
        ir_print_const(ctx, &_values[i].insn, stderr, true);
834
0
        fprintf(stderr, ")\n");
835
0
#if IR_COMBO_COPY_PROPAGATION
836
0
      } else if (_values[i].op == IR_COPY) {
837
0
        fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].copy);
838
0
#endif
839
0
      } else if (IR_IS_TOP(i)) {
840
0
        if (ctx->ir_base[i].op != IR_TOP) {
841
0
          fprintf(stderr, "%d. TOP\n", i);
842
0
        }
843
0
      } else if (_values[i].op == IR_IF) {
844
0
        fprintf(stderr, "%d. IF(%d)\n", i, _values[i].single_output);
845
0
      } else if (_values[i].op == IR_MERGE) {
846
0
        fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].unfeasible_inputs);
847
0
      } else if (!IR_IS_BOTTOM(i)) {
848
0
        fprintf(stderr, "%d. %d\n", i, _values[i].op);
849
0
      }
850
0
    }
851
0
  }
852
0
#endif
853
0
}
854
855
/**********************/
856
/* SCCP trasformation */
857
/**********************/
858
859
static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
860
0
{
861
0
  ir_ref j, n, *p;
862
0
  ir_insn *insn;
863
864
0
  CLEAR_USES(ref);
865
0
  insn = &ctx->ir_base[ref];
866
0
  n = insn->inputs_count;
867
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
868
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
869
0
    *p = IR_UNUSED;
870
0
  }
871
0
}
872
873
static void ir_sccp_remove_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_bitqueue *worklist)
874
0
{
875
0
  ir_ref j, n, *p;
876
0
  ir_insn *insn;
877
878
0
  CLEAR_USES(ref);
879
0
  insn = &ctx->ir_base[ref];
880
0
  n = insn->inputs_count;
881
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
882
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
883
0
    ir_ref input = *p;
884
0
    *p = IR_UNUSED;
885
    /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
886
0
    if (input > 0 && _values[input].op > IR_COPY) {
887
0
      ir_use_list_remove_one(ctx, input, ref);
888
0
      if (ir_is_dead(ctx, input)) {
889
        /* schedule DCE */
890
0
        ir_bitqueue_add(worklist, input);
891
0
      }
892
0
    }
893
0
  }
894
0
}
895
896
static void ir_sccp_replace_insn(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
897
0
{
898
0
  ir_ref j, n, *p, use, i;
899
0
  ir_insn *insn;
900
0
  ir_use_list *use_list;
901
902
0
  IR_ASSERT(ref != new_ref);
903
904
0
  insn = &ctx->ir_base[ref];
905
906
0
#if IR_COMBO_COPY_PROPAGATION
907
0
  if ((ir_op_flags[insn->op] & IR_OP_FLAG_MEM) && IR_IS_REACHABLE(insn->op1)) {
908
    /* remove from control list */
909
0
    ir_ref prev = insn->op1;
910
0
    ir_ref next = ir_next_control(ctx, ref);
911
0
    ctx->ir_base[next].op1 = prev;
912
0
    ir_use_list_remove_one(ctx, ref, next);
913
0
    ir_use_list_replace_one(ctx, prev, ref, next);
914
0
    insn->op1 = IR_UNUSED;
915
0
  }
916
0
#endif
917
918
0
  n = insn->inputs_count;
919
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
920
0
  for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
921
0
    ir_ref input = *p;
922
0
    *p = IR_UNUSED;
923
    /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
924
0
    if (input > 0 && _values[input].op > IR_COPY) {
925
0
      ir_use_list_remove_one(ctx, input, ref);
926
0
      if (ir_is_dead(ctx, input)) {
927
        /* schedule DCE */
928
0
        ir_bitqueue_add(worklist, input);
929
0
      }
930
0
    }
931
0
  }
932
933
0
  use_list = &ctx->use_lists[ref];
934
0
  n = use_list->count;
935
0
  p = &ctx->use_edges[use_list->refs];
936
0
  if (new_ref <= 0) {
937
    /* constant or IR_UNUSED */
938
0
    for (; n; p++, n--) {
939
0
      use = *p;
940
      /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
941
0
      if (_values[use].op > IR_COPY) {
942
0
        insn = &ctx->ir_base[use];
943
0
        i = ir_insn_find_op(insn, ref);
944
0
        if (!i) continue;
945
0
        IR_ASSERT(i > 0);
946
0
        ir_insn_set_op(insn, i, new_ref);
947
        /* schedule folding */
948
0
        ir_bitqueue_add(worklist, use);
949
0
      }
950
0
    }
951
0
  } else {
952
0
    for (j = 0; j < n; j++, p++) {
953
0
      use = *p;
954
      /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
955
0
      if (_values[use].op == IR_BOTTOM) {
956
0
        insn = &ctx->ir_base[use];
957
0
        i = ir_insn_find_op(insn, ref);
958
0
        IR_ASSERT(i > 0);
959
0
        ir_insn_set_op(insn, i, new_ref);
960
0
        if (ir_use_list_add(ctx, new_ref, use)) {
961
          /* restore after reallocation */
962
0
          use_list = &ctx->use_lists[ref];
963
0
          n = use_list->count;
964
0
          p = &ctx->use_edges[use_list->refs + j];
965
0
        }
966
        /* schedule folding */
967
0
        ir_bitqueue_add(worklist, use);
968
0
      }
969
0
    }
970
0
  }
971
0
  CLEAR_USES(ref);
972
0
}
973
974
static void ir_sccp_remove_if(ir_ctx *ctx, const ir_sccp_val *_values, ir_ref ref, ir_ref dst)
975
0
{
976
0
  ir_ref next;
977
0
  ir_insn *insn, *next_insn;
978
979
0
  insn = &ctx->ir_base[ref];
980
0
  if (ctx->use_lists[dst].count == 1) {
981
0
    next = ctx->use_edges[ctx->use_lists[dst].refs];
982
0
    next_insn = &ctx->ir_base[next];
983
    /* remove IF and IF_TRUE/FALSE from double linked control list */
984
0
    next_insn->op1 = insn->op1;
985
0
    ir_use_list_replace_one(ctx, insn->op1, ref, next);
986
    /* remove IF and IF_TRUE/FALSE instructions */
987
0
    ir_sccp_make_nop(ctx, ref);
988
0
    ir_sccp_make_nop(ctx, dst);
989
0
  } else {
990
0
    insn->op2 = IR_UNUSED;
991
0
    insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
992
0
    next_insn = &ctx->ir_base[dst];
993
0
    next_insn->op = IR_BEGIN;
994
0
    next_insn->op2 = IR_UNUSED;
995
0
  }
996
0
}
997
998
static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
999
0
{
1000
0
  ir_ref old_merge_inputs, new_merge_inputs, i, *p;
1001
0
  ir_use_list *use_list;
1002
0
  ir_bitset life_inputs;
1003
0
  ir_bitset_base_t holder = 0;
1004
1005
0
  IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
1006
0
  old_merge_inputs = insn->inputs_count;
1007
0
  new_merge_inputs = 0;
1008
0
  life_inputs = (old_merge_inputs < IR_BITSET_BITS) ? &holder : ir_bitset_malloc(old_merge_inputs + 1);
1009
1010
0
  for (i = 1; i <= old_merge_inputs; i++) {
1011
0
    ir_ref input = ir_insn_op(insn, i);
1012
1013
0
    if (input) {
1014
0
      new_merge_inputs++;
1015
0
      if (new_merge_inputs != i) {
1016
0
        ir_insn_set_op(insn, new_merge_inputs, input);
1017
0
      }
1018
0
      ir_bitset_incl(life_inputs, i);
1019
0
    }
1020
0
  }
1021
1022
0
  if (new_merge_inputs == old_merge_inputs) {
1023
    /* All inputs are feasible */
1024
0
    if (life_inputs != &holder) {
1025
0
      ir_mem_free(life_inputs);
1026
0
    }
1027
0
    return 0;
1028
0
  }
1029
1030
0
  for (i = new_merge_inputs + 1; i <= old_merge_inputs; i++) {
1031
0
    ir_insn_set_op(insn, i, IR_UNUSED);
1032
0
  }
1033
1034
0
  if (new_merge_inputs <= 1) {
1035
#if 0
1036
    if (new_merge_inputs == 1
1037
     && insn->op == IR_LOOP_BEGIN
1038
     && insn->op1 > ref) { // TODO: check dominance instead of order
1039
      /* dead loop */
1040
      ir_use_list_remove_one(ctx, insn->op1, ref);
1041
      insn->op1 = IR_UNUSED;
1042
      new_merge_inputs = 0;
1043
    }
1044
#endif
1045
0
    insn->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1046
0
    ir_bitqueue_add(worklist, ref);
1047
0
  } else {
1048
0
    insn->inputs_count = new_merge_inputs;
1049
0
  }
1050
1051
  /* Update PHIs */
1052
0
  use_list = &ctx->use_lists[ref];
1053
0
  if (use_list->count > 1) {
1054
0
    ir_ref use_count = 0;
1055
0
    ir_ref *q;
1056
1057
0
    for (i = 0, p = q = &ctx->use_edges[use_list->refs]; i < use_list->count; p++, i++) {
1058
0
      ir_ref use = *p;
1059
0
      ir_insn *use_insn = &ctx->ir_base[use];
1060
1061
0
      if (use_insn->op == IR_PHI) {
1062
0
        ir_ref j, k;
1063
1064
        /* compress PHI */
1065
0
        for (j = k = 1; j <= old_merge_inputs; j++) {
1066
0
          ir_ref input = ir_insn_op(use_insn, j + 1);
1067
1068
0
          if (ir_bitset_in(life_inputs, j)) {
1069
0
            IR_ASSERT(input);
1070
0
            if (k != j) {
1071
0
              ir_insn_set_op(use_insn, k + 1, input);
1072
0
            }
1073
0
            k++;
1074
0
          } else if (input > 0) {
1075
0
            ir_use_list_remove_one(ctx, input, use);
1076
0
          }
1077
0
        }
1078
0
        while (k <= old_merge_inputs) {
1079
0
          k++;
1080
0
          ir_insn_set_op(use_insn, k, IR_UNUSED);
1081
0
        }
1082
1083
0
        if (new_merge_inputs == 0) {
1084
          /* remove PHI */
1085
#if 0
1086
          use_insn->op1 = IR_UNUSED;
1087
          ir_iter_remove_insn(ctx, use, worklist);
1088
#else
1089
0
          IR_ASSERT(0);
1090
0
#endif
1091
0
          continue;
1092
0
        } else if (new_merge_inputs == 1) {
1093
          /* replace PHI by COPY */
1094
0
          use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1);
1095
0
          use_insn->op1 = use_insn->op2;
1096
0
          use_insn->op2 = IR_UNUSED;
1097
0
          ir_bitqueue_add(worklist, use);
1098
0
          continue;
1099
0
        } else {
1100
0
          use_insn->inputs_count = new_merge_inputs + 1;
1101
0
        }
1102
0
      }
1103
0
      if (p != q) {
1104
0
        *q = use;
1105
0
      }
1106
0
      q++;
1107
0
      use_count++;
1108
0
    }
1109
0
    for (i = use_count; i < use_list->count; q++, i++) {
1110
0
      *q = IR_UNUSED;
1111
0
    }
1112
0
    use_list->count = use_count;
1113
0
  }
1114
1115
0
  if (life_inputs != &holder) {
1116
0
    ir_mem_free(life_inputs);
1117
0
  }
1118
1119
0
  return 1;
1120
0
}
1121
1122
static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, const ir_sccp_val *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist)
1123
0
{
1124
0
  ir_ref i, j;
1125
0
  const ir_sccp_val *value;
1126
1127
0
  for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
1128
0
    if (value->op == IR_BOTTOM) {
1129
0
      continue;
1130
0
    } else if (IR_IS_CONST_OP(value->op)) {
1131
      /* replace instruction by constant */
1132
0
      j = ir_const(ctx, value->val, value->type);
1133
0
      ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1134
0
    } else if (IR_IS_SYM_CONST(value->op)) {
1135
      /* replace instruction by constant */
1136
0
      j = ir_const_ex(ctx, value->val, value->type, value->optx);
1137
0
      ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist);
1138
0
#if IR_COMBO_COPY_PROPAGATION
1139
0
    } else if (value->op == IR_COPY) {
1140
0
      ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->copy), iter_worklist);
1141
0
#endif
1142
0
    } else if (value->op == IR_TOP) {
1143
      /* remove unreachable instruction */
1144
0
      ir_insn *insn = &ctx->ir_base[i];
1145
1146
0
      if (insn->op == IR_NOP) {
1147
        /* already removed */
1148
0
      } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
1149
0
        if (insn->op != IR_PARAM) {
1150
0
          ir_sccp_remove_insn(ctx, _values, i, iter_worklist);
1151
0
        }
1152
0
      } else {
1153
0
        if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) {
1154
          /* remove from terminators list */
1155
0
          ir_ref prev = ctx->ir_base[1].op1;
1156
0
          if (prev == i) {
1157
0
            ctx->ir_base[1].op1 = insn->op3;
1158
0
          } else {
1159
0
            while (prev) {
1160
0
              if (ctx->ir_base[prev].op3 == i) {
1161
0
                ctx->ir_base[prev].op3 = insn->op3;
1162
0
                break;
1163
0
              }
1164
0
              prev = ctx->ir_base[prev].op3;
1165
0
            }
1166
0
          }
1167
0
        }
1168
0
        ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist);
1169
0
      }
1170
0
    } else if (value->op == IR_IF) {
1171
      /* remove one way IF/SWITCH */
1172
0
      ir_sccp_remove_if(ctx, _values, i, value->single_output);
1173
0
    } else if (value->op == IR_MERGE) {
1174
      /* schedule merge to remove unfeasible MERGE inputs */
1175
0
      ir_bitqueue_add(worklist, i);
1176
0
    }
1177
0
  }
1178
1179
0
  while ((i = ir_bitqueue_pop(worklist)) >= 0) {
1180
0
    IR_ASSERT(_values[i].op == IR_MERGE);
1181
0
    ir_sccp_remove_unfeasible_merge_inputs(ctx, i, &ctx->ir_base[i], iter_worklist);
1182
0
  }
1183
0
}
1184
1185
/***************************/
1186
/* Iterative Optimizations */
1187
/***************************/
1188
1189
void ir_iter_add_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1190
0
{
1191
0
  ir_use_list *use_list = &ctx->use_lists[ref];
1192
0
  ir_ref *p, n = use_list->count;
1193
1194
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1195
0
    ir_bitqueue_add(worklist, *p);
1196
0
  }
1197
0
}
1198
1199
/* Modification of some instruction may open new optimization oprtunities for other
1200
 * instructions that use this one.
1201
 *
1202
 * For example, let "a = ADD(x, y)" became "a = ADD(x, C1)". In case we also have
1203
 * "b = ADD(a, C2)" we may optimize it into "b = ADD(x, C1 + C2)" and then might
1204
 * also remove "a".
1205
 *
1206
 * This implementation supports only few optimization of combinations from ir_fold.h
1207
 *
1208
 * TODO: Think abput a more general solution ???
1209
 */
1210
static void ir_iter_add_related_uses(const ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1211
0
{
1212
0
  const ir_insn *insn = &ctx->ir_base[ref];
1213
1214
0
  if (insn->op == IR_ADD || insn->op == IR_SUB) {
1215
0
    const ir_use_list *use_list = &ctx->use_lists[ref];
1216
1217
0
    if (use_list->count == 1) {
1218
0
      ir_ref use = ctx->use_edges[use_list->refs];
1219
0
      const ir_insn *use_insn = &ctx->ir_base[ref];
1220
1221
0
      if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) {
1222
0
        ir_bitqueue_add(worklist, use);
1223
0
      }
1224
0
    }
1225
0
  }
1226
0
}
1227
1228
static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1229
0
{
1230
0
  ir_ref j, n, *p;
1231
0
  ir_insn *insn;
1232
1233
0
  CLEAR_USES(ref);
1234
0
  insn = &ctx->ir_base[ref];
1235
0
  n = insn->inputs_count;
1236
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
1237
0
  for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
1238
0
    ir_ref input = *p;
1239
0
    *p = IR_UNUSED;
1240
0
    if (input > 0) {
1241
0
      ir_use_list_remove_one(ctx, input, ref);
1242
0
      if (ir_is_dead(ctx, input)) {
1243
        /* schedule DCE */
1244
0
        ir_bitqueue_add(worklist, input);
1245
0
      } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1246
        /* try to optimize PHI into ABS/MIN/MAX/COND */
1247
0
        ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1248
0
      }
1249
0
    }
1250
0
  }
1251
0
}
1252
1253
void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1254
0
{
1255
0
  ir_ref i, j, n, *p, use;
1256
0
  ir_insn *insn;
1257
0
  ir_use_list *use_list;
1258
1259
0
  IR_ASSERT(ref != new_ref);
1260
1261
0
  use_list = &ctx->use_lists[ref];
1262
0
  n = use_list->count;
1263
0
  p = &ctx->use_edges[use_list->refs];
1264
0
  if (new_ref <= 0) {
1265
    /* constant or IR_UNUSED */
1266
0
    for (; n; p++, n--) {
1267
0
      use = *p;
1268
0
      IR_ASSERT(use != ref);
1269
0
      insn = &ctx->ir_base[use];
1270
0
      i = ir_insn_find_op(insn, ref);
1271
0
      IR_ASSERT(i > 0);
1272
0
      ir_insn_set_op(insn, i, new_ref);
1273
      /* schedule folding */
1274
0
      ir_bitqueue_add(worklist, use);
1275
0
      ir_iter_add_related_uses(ctx, use, worklist);
1276
0
    }
1277
0
  } else {
1278
0
    for (j = 0; j < n; j++, p++) {
1279
0
      use = *p;
1280
0
      IR_ASSERT(use != ref);
1281
0
      insn = &ctx->ir_base[use];
1282
0
      i = ir_insn_find_op(insn, ref);
1283
0
      IR_ASSERT(i > 0);
1284
0
      ir_insn_set_op(insn, i, new_ref);
1285
0
      if (ir_use_list_add(ctx, new_ref, use)) {
1286
        /* restore after reallocation */
1287
0
        use_list = &ctx->use_lists[ref];
1288
0
        n = use_list->count;
1289
0
        p = &ctx->use_edges[use_list->refs + j];
1290
0
      }
1291
      /* schedule folding */
1292
0
      ir_bitqueue_add(worklist, use);
1293
0
    }
1294
0
  }
1295
0
}
1296
1297
static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
1298
0
{
1299
0
  ir_ref j, n, *p;
1300
0
  ir_insn *insn;
1301
1302
0
  insn = &ctx->ir_base[ref];
1303
0
  n = insn->inputs_count;
1304
0
  insn->opt = IR_NOP; /* keep "inputs_count" */
1305
0
  for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
1306
0
    ir_ref input = *p;
1307
0
    *p = IR_UNUSED;
1308
0
    if (input > 0) {
1309
0
      ir_use_list_remove_one(ctx, input, ref);
1310
0
      if (ir_is_dead(ctx, input)) {
1311
        /* schedule DCE */
1312
0
        ir_bitqueue_add(worklist, input);
1313
0
      } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
1314
        /* try to optimize PHI into ABS/MIN/MAX/COND */
1315
0
        ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
1316
0
      }
1317
0
    }
1318
0
  }
1319
1320
0
  ir_iter_replace(ctx, ref, new_ref, worklist);
1321
1322
0
  CLEAR_USES(ref);
1323
0
}
1324
1325
void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist)
1326
0
{
1327
0
  ir_insn *insn = &ctx->ir_base[ref];
1328
0
  ir_ref old_val = ir_insn_op(insn, idx);
1329
1330
0
  IR_ASSERT(old_val != new_val);
1331
0
  if (!IR_IS_CONST_REF(new_val)) {
1332
0
    ir_use_list_add(ctx, new_val, ref);
1333
0
  }
1334
0
  ir_insn_set_op(insn, idx, new_val);
1335
0
  if (!IR_IS_CONST_REF(old_val)) {
1336
0
    ir_use_list_remove_one(ctx, old_val, ref);
1337
0
    if (ir_is_dead(ctx, old_val)) {
1338
      /* schedule DCE */
1339
0
      ir_bitqueue_add(worklist, old_val);
1340
0
    }
1341
0
  }
1342
0
}
1343
1344
static ir_ref ir_iter_find_cse1(const ir_ctx *ctx, uint32_t optx, ir_ref op1)
1345
0
{
1346
0
  IR_ASSERT(!IR_IS_CONST_REF(op1));
1347
1348
0
  const ir_use_list *use_list = &ctx->use_lists[op1];
1349
0
  const ir_ref *p;
1350
0
  ir_ref n = use_list->count;
1351
1352
0
  for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1353
0
    ir_ref use = *p;
1354
0
    const ir_insn *use_insn = &ctx->ir_base[use];
1355
1356
0
    if (use_insn->optx == optx) {
1357
0
      IR_ASSERT(use_insn->op1 == op1);
1358
0
      return use;
1359
0
    }
1360
0
  }
1361
0
  return IR_UNUSED;
1362
0
}
1363
1364
static ir_ref ir_iter_find_cse(const ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist)
1365
0
{
1366
0
  uint32_t n = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1367
0
  const ir_use_list *use_list = NULL;
1368
0
  const ir_ref *p;
1369
0
  ir_ref use;
1370
0
  const ir_insn *use_insn;
1371
1372
0
  if (n == 2) {
1373
0
    if (!IR_IS_CONST_REF(op1)) {
1374
0
      use_list = &ctx->use_lists[op1];
1375
0
    }
1376
0
    if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1377
0
      use_list = &ctx->use_lists[op2];
1378
0
    }
1379
0
    if (use_list) {
1380
0
      n = use_list->count;
1381
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1382
0
        use = *p;
1383
0
        if (use != ref) {
1384
0
          use_insn = &ctx->ir_base[use];
1385
0
          if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2) {
1386
0
            IR_ASSERT(use_insn->op3 == op3);
1387
0
            if (use < ref) {
1388
0
              return use;
1389
0
            } else {
1390
0
              ir_bitqueue_add(worklist, use);
1391
0
            }
1392
0
          }
1393
0
        }
1394
0
      }
1395
0
    }
1396
0
   } else if (n < 2) {
1397
0
    if (op1 > 0) {
1398
0
      IR_ASSERT(n == 1);
1399
0
      use_list = &ctx->use_lists[op1];
1400
0
      n = use_list->count;
1401
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1402
0
        use = *p;
1403
0
        if (use != ref) {
1404
0
          use_insn = &ctx->ir_base[use];
1405
0
          if (use_insn->opt == opt) {
1406
0
            IR_ASSERT(use_insn->op1 == op1);
1407
0
            IR_ASSERT(use_insn->op2 == op2);
1408
0
            IR_ASSERT(use_insn->op3 == op3);
1409
0
            if (use < ref) {
1410
0
              return use;
1411
0
            } else {
1412
0
              ir_bitqueue_add(worklist, use);
1413
0
            }
1414
0
          }
1415
0
        }
1416
0
      }
1417
0
    }
1418
0
  } else {
1419
0
    IR_ASSERT(n == 3);
1420
0
    if (!IR_IS_CONST_REF(op1)) {
1421
0
      use_list = &ctx->use_lists[op1];
1422
0
    }
1423
0
    if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) {
1424
0
      use_list = &ctx->use_lists[op2];
1425
0
    }
1426
0
    if (op3 > 0 && (!use_list || use_list->count > ctx->use_lists[op3].count)) {
1427
0
      use_list = &ctx->use_lists[op3];
1428
0
    }
1429
0
    if (use_list) {
1430
0
      n = use_list->count;
1431
0
      for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
1432
0
        use = *p;
1433
0
        if (use != ref) {
1434
0
          use_insn = &ctx->ir_base[use];
1435
0
          if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2 && use_insn->op3 == op3) {
1436
0
            if (use < ref) {
1437
0
              return use;
1438
0
            } else {
1439
0
              ir_bitqueue_add(worklist, use);
1440
0
            }
1441
0
          }
1442
0
        }
1443
0
      }
1444
0
    }
1445
0
  }
1446
0
  return IR_UNUSED;
1447
0
}
1448
1449
static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
1450
0
{
1451
0
  uint32_t opt;
1452
0
  ir_ref op1, op2, op3, copy;
1453
0
  const ir_insn *op1_insn, *op2_insn, *op3_insn;
1454
0
  ir_insn *insn;
1455
1456
0
  insn = &ctx->ir_base[ref];
1457
0
  opt = insn->opt;
1458
0
  op1 = insn->op1;
1459
0
  op2 = insn->op2;
1460
0
  op3 = insn->op3;
1461
1462
0
restart:
1463
0
  op1_insn = ctx->ir_base + op1;
1464
0
  op2_insn = ctx->ir_base + op2;
1465
0
  op3_insn = ctx->ir_base + op3;
1466
1467
0
  switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
1468
0
    case IR_FOLD_DO_RESTART:
1469
0
      opt = ctx->fold_insn.optx;
1470
0
      op1 = ctx->fold_insn.op1;
1471
0
      op2 = ctx->fold_insn.op2;
1472
0
      op3 = ctx->fold_insn.op3;
1473
0
      goto restart;
1474
0
    case IR_FOLD_DO_CSE:
1475
0
      copy = ir_iter_find_cse(ctx, ref, ctx->fold_insn.opt,
1476
0
        ctx->fold_insn.op1, ctx->fold_insn.op2, ctx->fold_insn.op3, worklist);
1477
0
      if (copy) {
1478
0
        ir_iter_replace_insn(ctx, ref, copy, worklist);
1479
0
        break;
1480
0
      }
1481
0
      IR_FALLTHROUGH;
1482
0
    case IR_FOLD_DO_EMIT:
1483
0
      insn = &ctx->ir_base[ref];
1484
0
      if (insn->opt != ctx->fold_insn.opt
1485
0
       || insn->op1 != ctx->fold_insn.op1
1486
0
       || insn->op2 != ctx->fold_insn.op2
1487
0
       || insn->op3 != ctx->fold_insn.op3) {
1488
1489
0
        insn->optx = ctx->fold_insn.opt;
1490
0
        IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
1491
0
        insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
1492
0
        if (insn->op1 != ctx->fold_insn.op1) {
1493
0
          if (insn->op1 > 0) {
1494
0
            ir_use_list_remove_one(ctx, insn->op1, ref);
1495
0
          }
1496
0
          if (ctx->fold_insn.op1 > 0) {
1497
0
            ir_use_list_add(ctx, ctx->fold_insn.op1, ref);
1498
0
          }
1499
0
        }
1500
0
        if (insn->op2 != ctx->fold_insn.op2) {
1501
0
          if (insn->op2 > 0) {
1502
0
            ir_use_list_remove_one(ctx, insn->op2, ref);
1503
0
          }
1504
0
          if (ctx->fold_insn.op2 > 0) {
1505
0
            ir_use_list_add(ctx, ctx->fold_insn.op2, ref);
1506
0
          }
1507
0
        }
1508
0
        if (insn->op3 != ctx->fold_insn.op3) {
1509
0
          if (insn->op3 > 0) {
1510
0
            ir_use_list_remove_one(ctx, insn->op3, ref);
1511
0
          }
1512
0
          if (ctx->fold_insn.op3 > 0) {
1513
0
            ir_use_list_add(ctx, ctx->fold_insn.op3, ref);
1514
0
          }
1515
0
        }
1516
0
        insn->op1 = ctx->fold_insn.op1;
1517
0
        insn->op2 = ctx->fold_insn.op2;
1518
0
        insn->op3 = ctx->fold_insn.op3;
1519
1520
0
        ir_iter_add_uses(ctx, ref, worklist);
1521
0
      }
1522
0
      break;
1523
0
    case IR_FOLD_DO_COPY:
1524
0
      op1 = ctx->fold_insn.op1;
1525
0
      ir_iter_replace_insn(ctx, ref, op1, worklist);
1526
0
      break;
1527
0
    case IR_FOLD_DO_CONST:
1528
0
      op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type);
1529
0
      ir_iter_replace_insn(ctx, ref, op1, worklist);
1530
0
      break;
1531
0
    default:
1532
0
      IR_ASSERT(0);
1533
0
      break;
1534
0
  }
1535
0
}
1536
1537
static bool ir_may_promote_d2f(const ir_ctx *ctx, ir_ref ref)
1538
0
{
1539
0
  const ir_insn *insn = &ctx->ir_base[ref];
1540
1541
0
  IR_ASSERT(insn->type == IR_DOUBLE);
1542
0
  if (IR_IS_CONST_REF(ref)) {
1543
0
    return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d;
1544
0
  } else {
1545
0
    switch (insn->op) {
1546
0
      case IR_FP2FP:
1547
0
        return 1;
1548
//      case IR_INT2FP:
1549
//        return ctx->use_lists[ref].count == 1;
1550
0
      case IR_NEG:
1551
0
      case IR_ABS:
1552
0
        return ctx->use_lists[ref].count == 1 &&
1553
0
          ir_may_promote_d2f(ctx, insn->op1);
1554
0
      case IR_ADD:
1555
0
      case IR_SUB:
1556
0
      case IR_MUL:
1557
0
      case IR_DIV:
1558
0
      case IR_MIN:
1559
0
      case IR_MAX:
1560
0
        return ctx->use_lists[ref].count == 1 &&
1561
0
          ir_may_promote_d2f(ctx, insn->op1) &&
1562
0
          ir_may_promote_d2f(ctx, insn->op2);
1563
0
      default:
1564
0
        break;
1565
0
    }
1566
0
  }
1567
0
  return 0;
1568
0
}
1569
1570
static bool ir_may_promote_f2d(const ir_ctx *ctx, ir_ref ref)
1571
0
{
1572
0
  const ir_insn *insn = &ctx->ir_base[ref];
1573
1574
0
  IR_ASSERT(insn->type == IR_FLOAT);
1575
0
  if (IR_IS_CONST_REF(ref)) {
1576
0
    return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f;
1577
0
  } else {
1578
0
    switch (insn->op) {
1579
0
      case IR_FP2FP:
1580
0
        return 1;
1581
//      case IR_INT2FP:
1582
//        return ctx->use_lists[ref].count == 1;
1583
0
      case IR_NEG:
1584
0
      case IR_ABS:
1585
0
        return ctx->use_lists[ref].count == 1 &&
1586
0
          ir_may_promote_f2d(ctx, insn->op1);
1587
0
      case IR_ADD:
1588
0
      case IR_SUB:
1589
0
      case IR_MUL:
1590
//      case IR_DIV:
1591
0
      case IR_MIN:
1592
0
      case IR_MAX:
1593
0
        return ctx->use_lists[ref].count == 1 &&
1594
0
          ir_may_promote_f2d(ctx, insn->op1) &&
1595
0
          ir_may_promote_f2d(ctx, insn->op2);
1596
0
      default:
1597
0
        break;
1598
0
    }
1599
0
  }
1600
0
  return 0;
1601
0
}
1602
1603
static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1604
0
{
1605
0
  ir_insn *insn = &ctx->ir_base[ref];
1606
0
  uint32_t count;
1607
1608
0
  IR_ASSERT(insn->type == IR_DOUBLE);
1609
0
  if (IR_IS_CONST_REF(ref)) {
1610
0
    return ir_const_float(ctx, (float)insn->val.d);
1611
0
  } else {
1612
0
    ir_bitqueue_add(worklist, ref);
1613
0
    switch (insn->op) {
1614
0
      case IR_FP2FP:
1615
0
        count = ctx->use_lists[ref].count;
1616
0
        ir_use_list_remove_all(ctx, ref, use);
1617
0
        if (ctx->use_lists[ref].count == 0) {
1618
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1619
0
          if (count > 1) {
1620
0
            do {
1621
0
              ir_use_list_add(ctx, insn->op1, use);
1622
0
            } while (--count > 1);
1623
0
          }
1624
0
          ref = insn->op1;
1625
0
          MAKE_NOP(insn);
1626
0
          return ref;
1627
0
        } else {
1628
0
          ir_use_list_add(ctx, insn->op1, use);
1629
0
          count -= ctx->use_lists[ref].count;
1630
0
          if (count > 1) {
1631
0
            do {
1632
0
              ir_use_list_add(ctx, insn->op1, use);
1633
0
            } while (--count > 1);
1634
0
          }
1635
0
        }
1636
0
        return insn->op1;
1637
//      case IR_INT2FP:
1638
//        insn->type = IR_FLOAT;
1639
//        return ref;
1640
0
      case IR_NEG:
1641
0
      case IR_ABS:
1642
0
        insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1643
0
        insn->type = IR_FLOAT;
1644
0
        return ref;
1645
0
      case IR_ADD:
1646
0
      case IR_SUB:
1647
0
      case IR_MUL:
1648
0
      case IR_DIV:
1649
0
      case IR_MIN:
1650
0
      case IR_MAX:
1651
0
        if (insn->op1 == insn->op2) {
1652
0
          insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1653
0
        } else {
1654
0
          insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist);
1655
0
          insn->op2 = ir_promote_d2f(ctx, insn->op2, ref, worklist);
1656
0
        }
1657
0
        insn->type = IR_FLOAT;
1658
0
        return ref;
1659
0
      default:
1660
0
        break;
1661
0
    }
1662
0
  }
1663
0
  IR_ASSERT(0);
1664
0
  return ref;
1665
0
}
1666
1667
static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1668
0
{
1669
0
  ir_insn *insn = &ctx->ir_base[ref];
1670
0
  uint32_t count;
1671
0
  ir_ref old_ref;
1672
1673
0
  IR_ASSERT(insn->type == IR_FLOAT);
1674
0
  if (IR_IS_CONST_REF(ref)) {
1675
0
    return ir_const_double(ctx, (double)insn->val.f);
1676
0
  } else {
1677
0
    ir_bitqueue_add(worklist, ref);
1678
0
    switch (insn->op) {
1679
0
      case IR_FP2FP:
1680
0
        count = ctx->use_lists[ref].count;
1681
0
        ir_use_list_remove_all(ctx, ref, use);
1682
0
        if (ctx->use_lists[ref].count == 0) {
1683
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1684
0
          if (count > 1) {
1685
0
            do {
1686
0
              ir_use_list_add(ctx, insn->op1, use);
1687
0
            } while (--count > 1);
1688
0
          }
1689
0
          ref = insn->op1;
1690
0
          MAKE_NOP(insn);
1691
0
          return ref;
1692
0
        } else {
1693
0
          ir_use_list_add(ctx, insn->op1, use);
1694
0
          count -= ctx->use_lists[ref].count;
1695
0
          if (count > 1) {
1696
0
            do {
1697
0
              ir_use_list_add(ctx, insn->op1, use);
1698
0
            } while (--count > 1);
1699
0
          }
1700
0
        }
1701
0
        return insn->op1;
1702
0
      case IR_INT2FP:
1703
0
        old_ref = ir_iter_find_cse1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1);
1704
0
        if (old_ref) {
1705
0
          IR_ASSERT(ctx->use_lists[ref].count == 1);
1706
0
          ir_use_list_remove_one(ctx, insn->op1, ref);
1707
0
          CLEAR_USES(ref);
1708
0
          MAKE_NOP(insn);
1709
0
          ir_use_list_add(ctx, old_ref, use);
1710
0
          return old_ref;
1711
0
        }
1712
0
        insn->type = IR_DOUBLE;
1713
0
        return ref;
1714
0
      case IR_NEG:
1715
0
      case IR_ABS:
1716
0
        insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1717
0
        insn->type = IR_DOUBLE;
1718
0
        return ref;
1719
0
      case IR_ADD:
1720
0
      case IR_SUB:
1721
0
      case IR_MUL:
1722
//      case IR_DIV:
1723
0
      case IR_MIN:
1724
0
      case IR_MAX:
1725
0
        if (insn->op1 == insn->op2) {
1726
0
          insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1727
0
        } else {
1728
0
          insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist);
1729
0
          insn->op2 = ir_promote_f2d(ctx, insn->op2, ref, worklist);
1730
0
        }
1731
0
        insn->type = IR_DOUBLE;
1732
0
        return ref;
1733
0
      default:
1734
0
        break;
1735
0
    }
1736
0
  }
1737
0
  IR_ASSERT(0);
1738
0
  return ref;
1739
0
}
1740
1741
static bool ir_may_promote_trunc(const ir_ctx *ctx, ir_type type, ir_ref ref)
1742
0
{
1743
0
  const ir_insn *insn = &ctx->ir_base[ref];
1744
0
  const ir_ref *p;
1745
0
  ir_ref n, input;
1746
1747
0
  if (IR_IS_CONST_REF(ref)) {
1748
0
    return !IR_IS_SYM_CONST(insn->op);
1749
0
  } else {
1750
0
    switch (insn->op) {
1751
0
      case IR_ZEXT:
1752
0
      case IR_SEXT:
1753
0
      case IR_TRUNC:
1754
0
        return ctx->ir_base[insn->op1].type == type || ctx->use_lists[ref].count == 1;
1755
0
      case IR_NEG:
1756
0
      case IR_ABS:
1757
0
      case IR_NOT:
1758
0
        return ctx->use_lists[ref].count == 1 &&
1759
0
          ir_may_promote_trunc(ctx, type, insn->op1);
1760
0
      case IR_ADD:
1761
0
      case IR_SUB:
1762
0
      case IR_MUL:
1763
0
      case IR_MIN:
1764
0
      case IR_MAX:
1765
0
      case IR_OR:
1766
0
      case IR_AND:
1767
0
      case IR_XOR:
1768
0
      case IR_SHL:
1769
0
        return ctx->use_lists[ref].count == 1 &&
1770
0
          ir_may_promote_trunc(ctx, type, insn->op1) &&
1771
0
          ir_may_promote_trunc(ctx, type, insn->op2);
1772
//      case IR_SHR:
1773
//      case IR_SAR:
1774
//      case IR_DIV:
1775
//      case IR_MOD:
1776
//      case IR_FP2INT:
1777
//        TODO: ???
1778
0
      case IR_COND:
1779
0
        return ctx->use_lists[ref].count == 1 &&
1780
0
          ir_may_promote_trunc(ctx, type, insn->op2) &&
1781
0
          ir_may_promote_trunc(ctx, type, insn->op3);
1782
0
      case IR_PHI:
1783
0
        if (ctx->use_lists[ref].count != 1) {
1784
0
          ir_use_list *use_list = &ctx->use_lists[ref];
1785
0
          ir_ref count = 0;
1786
1787
0
          for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
1788
0
            if (*p != ref) {
1789
0
              if (count) {
1790
0
                return 0;
1791
0
              }
1792
0
              count = 1;
1793
0
            }
1794
0
          }
1795
0
        }
1796
0
        for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1797
0
          input = *p;
1798
0
          if (input != ref) {
1799
0
            if (!ir_may_promote_trunc(ctx, type, input)) {
1800
0
              return 0;
1801
0
            }
1802
0
          }
1803
0
        }
1804
0
        return 1;
1805
0
      default:
1806
0
        break;
1807
0
    }
1808
0
  }
1809
0
  return 0;
1810
0
}
1811
1812
static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_bitqueue *worklist)
1813
0
{
1814
0
  ir_insn *insn = &ctx->ir_base[ref];
1815
0
  uint32_t count;
1816
0
  ir_ref *p, n, input;
1817
1818
0
  if (IR_IS_CONST_REF(ref)) {
1819
0
    ir_val val;
1820
1821
0
    switch (type) {
1822
0
      case IR_I8:  val.i64 = insn->val.i8; break;
1823
0
      case IR_U8:  val.u64 = insn->val.u8; break;
1824
0
      case IR_I16: val.i64 = insn->val.i16; break;
1825
0
      case IR_U16: val.u64 = insn->val.u16; break;
1826
0
      case IR_I32: val.i64 = insn->val.i32; break;
1827
0
      case IR_U32: val.u64 = insn->val.u32; break;
1828
0
      case IR_CHAR:val.i64 = insn->val.i8; break;
1829
0
      case IR_BOOL:val.u64 = insn->val.u8 != 0; break;
1830
0
      default: IR_ASSERT(0); val.u64 = 0;
1831
0
    }
1832
0
    return ir_const(ctx, val, type);
1833
0
  } else {
1834
0
    ir_bitqueue_add(worklist, ref);
1835
0
    switch (insn->op) {
1836
0
      case IR_ZEXT:
1837
0
      case IR_SEXT:
1838
0
      case IR_TRUNC:
1839
0
        if (ctx->ir_base[insn->op1].type != type) {
1840
0
          ir_type src_type = ctx->ir_base[insn->op1].type;
1841
0
          if (ir_type_size[src_type] == ir_type_size[type]) {
1842
0
            insn->op = IR_BITCAST;
1843
0
          } else if (ir_type_size[src_type] > ir_type_size[type]) {
1844
0
            insn->op = IR_TRUNC;
1845
0
          } else {
1846
0
            if (insn->op != IR_SEXT && insn->op != IR_ZEXT) {
1847
0
              insn->op = IR_IS_TYPE_SIGNED(type) ? IR_SEXT : IR_ZEXT;
1848
0
            }
1849
0
          }
1850
0
          insn->type = type;
1851
0
          ir_iter_add_uses(ctx, ref, worklist);
1852
0
          return ref;
1853
0
        }
1854
1855
0
        count = ctx->use_lists[ref].count;
1856
0
        ir_use_list_remove_all(ctx, ref, use);
1857
0
        if (ctx->use_lists[ref].count == 0) {
1858
0
          ir_use_list_replace_one(ctx, insn->op1, ref, use);
1859
0
          if (count > 1) {
1860
0
            do {
1861
0
              ir_use_list_add(ctx, insn->op1, use);
1862
0
            } while (--count > 1);
1863
0
          }
1864
0
          ref = insn->op1;
1865
0
          MAKE_NOP(insn);
1866
0
          return ref;
1867
0
        } else {
1868
0
          ir_use_list_add(ctx, insn->op1, use);
1869
0
          count -= ctx->use_lists[ref].count;
1870
0
          if (count > 1) {
1871
0
            do {
1872
0
              ir_use_list_add(ctx, insn->op1, use);
1873
0
            } while (--count > 1);
1874
0
          }
1875
0
        }
1876
0
        return insn->op1;
1877
0
      case IR_NEG:
1878
0
      case IR_ABS:
1879
0
      case IR_NOT:
1880
0
        insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1881
0
        insn->type = type;
1882
0
        return ref;
1883
0
      case IR_ADD:
1884
0
      case IR_SUB:
1885
0
      case IR_MUL:
1886
0
      case IR_MIN:
1887
0
      case IR_MAX:
1888
0
      case IR_OR:
1889
0
      case IR_AND:
1890
0
      case IR_XOR:
1891
0
      case IR_SHL:
1892
0
        if (insn->op1 == insn->op2) {
1893
0
          insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1894
0
        } else {
1895
0
          insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist);
1896
0
          insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1897
0
        }
1898
0
        insn->type = type;
1899
0
        return ref;
1900
//      case IR_DIV:
1901
//      case IR_MOD:
1902
//      case IR_SHR:
1903
//      case IR_SAR:
1904
//      case IR_FP2INT:
1905
//        TODO: ???
1906
0
      case IR_COND:
1907
0
        if (insn->op2 == insn->op3) {
1908
0
          insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1909
0
        } else {
1910
0
          insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist);
1911
0
          insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist);
1912
0
        }
1913
0
        insn->type = type;
1914
0
        if (IR_IS_TYPE_SIGNED(type)) {
1915
0
          ir_insn *cond = &ctx->ir_base[insn->op1];
1916
0
          if (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE) {
1917
0
            if (cond->op1 == insn->op2 && cond->op2 == insn->op3) {
1918
0
              insn->op = (cond->op == IR_LT || cond->op == IR_LE) ? IR_MIN : IR_MAX;
1919
0
              ir_use_list_remove_one(ctx, insn->op1, ref);
1920
0
              ir_bitqueue_add(worklist, insn->op1);
1921
0
              insn->op1 = insn->op2;
1922
0
              insn->op2 = insn->op3;
1923
0
              insn->op3 = IR_UNUSED;
1924
0
            } else if (cond->op1 == insn->op3 && cond->op2 == insn->op1) {
1925
0
              insn->op = (cond->op == IR_LT || cond->op == IR_LE) ? IR_MAX : IR_MIN;
1926
0
              ir_use_list_remove_one(ctx, insn->op1, ref);
1927
0
              ir_bitqueue_add(worklist, insn->op1);
1928
0
              insn->op1 = insn->op2;
1929
0
              insn->op2 = insn->op3;
1930
0
              insn->op3 = IR_UNUSED;
1931
0
            }
1932
0
          }
1933
0
        } else {
1934
0
          IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
1935
0
          ir_insn *cond = &ctx->ir_base[insn->op1];
1936
0
          if (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE) {
1937
0
            if (cond->op1 == insn->op2 && cond->op2 == insn->op3) {
1938
0
              insn->op = (cond->op == IR_ULT || cond->op == IR_ULE) ? IR_MIN : IR_MAX;
1939
0
              ir_use_list_remove_one(ctx, insn->op1, ref);
1940
0
              ir_bitqueue_add(worklist, insn->op1);
1941
0
              insn->op1 = insn->op2;
1942
0
              insn->op2 = insn->op3;
1943
0
              insn->op3 = IR_UNUSED;
1944
0
            } else if (cond->op1 == insn->op3 && cond->op2 == insn->op1) {
1945
0
              insn->op = (cond->op == IR_ULT || cond->op == IR_ULE) ? IR_MAX : IR_MIN;
1946
0
              ir_use_list_remove_one(ctx, insn->op1, ref);
1947
0
              ir_bitqueue_add(worklist, insn->op1);
1948
0
              insn->op1 = insn->op2;
1949
0
              insn->op2 = insn->op3;
1950
0
              insn->op3 = IR_UNUSED;
1951
0
            }
1952
0
          }
1953
0
        }
1954
0
        return ref;
1955
0
      case IR_PHI:
1956
0
        for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) {
1957
0
          input = *p;
1958
0
          if (input != ref) {
1959
0
            *p = ir_promote_i2i(ctx, type, input, ref, worklist);
1960
0
          }
1961
0
        }
1962
0
        insn->type = type;
1963
0
        return ref;
1964
0
      default:
1965
0
        break;
1966
0
    }
1967
0
  }
1968
0
  IR_ASSERT(0);
1969
0
  return ref;
1970
0
}
1971
1972
static ir_ref ir_ext_const(ir_ctx *ctx, const ir_insn *val_insn, ir_op op, ir_type type)
1973
0
{
1974
0
  ir_val new_val;
1975
1976
0
  switch (val_insn->type) {
1977
0
    default:
1978
0
      IR_ASSERT(0);
1979
0
    case IR_I8:
1980
0
    case IR_U8:
1981
0
    case IR_BOOL:
1982
0
    case IR_CHAR:
1983
0
      if (op == IR_SEXT) {
1984
0
        new_val.i64 = (int64_t)val_insn->val.i8;
1985
0
      } else {
1986
0
        new_val.u64 = (uint64_t)val_insn->val.u8;
1987
0
      }
1988
0
      break;
1989
0
    case IR_I16:
1990
0
    case IR_U16:
1991
0
      if (op == IR_SEXT) {
1992
0
        new_val.i64 = (int64_t)val_insn->val.i16;
1993
0
      } else {
1994
0
        new_val.u64 = (uint64_t)val_insn->val.u16;
1995
0
      }
1996
0
      break;
1997
0
    case IR_I32:
1998
0
    case IR_U32:
1999
0
      if (op == IR_SEXT) {
2000
0
        new_val.i64 = (int64_t)val_insn->val.i32;
2001
0
      } else {
2002
0
        new_val.u64 = (uint64_t)val_insn->val.u32;
2003
0
      }
2004
0
      break;
2005
0
  }
2006
0
  return ir_const(ctx, new_val, type);
2007
0
}
2008
2009
static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist)
2010
0
{
2011
0
  uint32_t optx = IR_OPTX(op, type, 1);
2012
0
  ir_ref ref;
2013
2014
0
  if (!IR_IS_CONST_REF(src_ref)) {
2015
0
    ref = ir_iter_find_cse1(ctx, optx, src_ref);
2016
0
    if (ref) {
2017
0
      ir_use_list_add(ctx, ref, var_ref);
2018
0
      if (!IR_IS_CONST_REF(src_ref)) {
2019
0
        ir_use_list_remove_one(ctx, src_ref, var_ref);
2020
0
      }
2021
0
      ir_bitqueue_add(worklist, ref);
2022
0
      return ref;
2023
0
    }
2024
0
  }
2025
2026
0
  ref = ir_emit1(ctx, optx, src_ref);
2027
0
  ir_use_list_add(ctx, ref, var_ref);
2028
0
  if (!IR_IS_CONST_REF(src_ref)) {
2029
0
    ir_use_list_replace_one(ctx, src_ref, var_ref, ref);
2030
0
  }
2031
0
  ir_bitqueue_grow(worklist, ref + 1);
2032
0
  ir_bitqueue_add(worklist, ref);
2033
0
  return ref;
2034
0
}
2035
2036
static uint32_t _ir_estimated_control(const ir_ctx *ctx, ir_ref val, ir_ref loop)
2037
0
{
2038
0
  const ir_insn *insn;
2039
0
  const ir_ref *p;
2040
0
  ir_ref n, input, result, ctrl;
2041
2042
0
  if (val <= 0) { /* constant or IR_UNUSED */
2043
0
    return 1; /* IR_START */
2044
0
  }
2045
2046
0
  insn = &ctx->ir_base[val];
2047
0
  if (ir_op_flags[insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM)) {
2048
0
    return val;
2049
0
  }
2050
2051
0
  IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_DATA);
2052
0
  if (IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL_DEP) {
2053
0
    return insn->op1;
2054
0
  }
2055
2056
0
  n = insn->inputs_count;
2057
0
  p = insn->ops + 1;
2058
2059
0
  result = 1;
2060
0
  for (; n > 0; p++, n--) {
2061
0
    input = *p;
2062
0
    ctrl = _ir_estimated_control(ctx, input, loop);
2063
0
    if (ctrl >= loop) return ctrl;
2064
0
    if (ctrl > result) { // TODO: check dominance depth instead of order
2065
0
      result = ctrl;
2066
0
    }
2067
0
  }
2068
0
  return result;
2069
0
}
2070
2071
static bool ir_is_loop_invariant(const ir_ctx *ctx, ir_ref ref, ir_ref loop)
2072
0
{
2073
0
  ref = _ir_estimated_control(ctx, ref, loop);
2074
0
  return ref < loop; // TODO: check dominance instead of order
2075
0
}
2076
2077
static bool ir_is_cheaper_ext(const ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op)
2078
0
{
2079
0
  if (IR_IS_CONST_REF(ref)) {
2080
0
    return 1;
2081
0
  } else {
2082
0
    const ir_insn *insn = &ctx->ir_base[ref];
2083
2084
0
    if (insn->op == IR_LOAD) {
2085
0
      if (ir_is_loop_invariant(ctx, ref, loop)) {
2086
0
        return 1;
2087
0
      } else {
2088
        /* ZEXT(LOAD(_, _)) costs the same as LOAD(_, _) */
2089
0
        if (ctx->use_lists[ref].count == 2) {
2090
0
          return 1;
2091
0
        } else if (ctx->use_lists[ref].count == 3) {
2092
0
          ir_use_list *use_list = &ctx->use_lists[ref];
2093
0
          ir_ref *p, n, use;
2094
2095
0
          for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
2096
0
            use = *p;
2097
0
            if (use != ext_ref) {
2098
0
              const ir_insn *use_insn = &ctx->ir_base[use];
2099
2100
0
              if (use_insn->op != op
2101
0
               && (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM))
2102
0
                || use_insn->op1 != ref)) {
2103
0
                return 0;
2104
0
              }
2105
0
            }
2106
0
          }
2107
0
          return 1;
2108
0
        }
2109
0
      }
2110
0
      return 0;
2111
0
    } else {
2112
0
      return ir_is_loop_invariant(ctx, ref, loop);
2113
0
    }
2114
0
  }
2115
0
}
2116
2117
static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
2118
0
{
2119
0
  ir_op op = ctx->ir_base[ext_ref].op;
2120
0
  ir_type type = ctx->ir_base[ext_ref].type;
2121
0
  ir_insn *phi_insn;
2122
0
  ir_use_list *use_list;
2123
0
  ir_ref n, *p, use, ext_ref_2 = IR_UNUSED;
2124
2125
  /* Check if we may change the type of the induction variable */
2126
0
  use_list = &ctx->use_lists[phi_ref];
2127
0
  n = use_list->count;
2128
0
  if (n > 1) {
2129
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2130
0
      use = *p;
2131
0
      if (use == op_ref || use == ext_ref) {
2132
0
        continue;
2133
0
      } else {
2134
0
        const ir_insn *use_insn = &ctx->ir_base[use];
2135
2136
0
        if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2137
0
          if (use_insn->op1 == phi_ref) {
2138
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
2139
0
              return 0;
2140
0
            }
2141
0
            if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2142
0
              continue;
2143
0
              }
2144
0
          } else if (use_insn->op2 == phi_ref) {
2145
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
2146
0
              return 0;
2147
0
            }
2148
0
            if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2149
0
              continue;
2150
0
              }
2151
0
          }
2152
0
          return 0;
2153
0
        } else if (use_insn->op == IR_IF) {
2154
0
          continue;
2155
0
        } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2156
0
          ext_ref_2 = use;
2157
0
          continue;
2158
0
        } else {
2159
0
          return 0;
2160
0
        }
2161
0
      }
2162
0
    }
2163
0
  }
2164
2165
0
  use_list = &ctx->use_lists[op_ref];
2166
0
  n = use_list->count;
2167
0
  if (n > 1) {
2168
0
    for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2169
0
      use = *p;
2170
0
      if (use == phi_ref || use == ext_ref) {
2171
0
        continue;
2172
0
      } else {
2173
0
        const ir_insn *use_insn = &ctx->ir_base[use];
2174
2175
0
        if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) {
2176
0
          if (use_insn->op1 == op_ref) {
2177
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) {
2178
0
              return 0;
2179
0
            }
2180
0
            if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2181
0
              continue;
2182
0
              }
2183
0
          } else if (use_insn->op2 == op_ref) {
2184
0
            if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) {
2185
0
              return 0;
2186
0
            }
2187
0
            if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) {
2188
0
              continue;
2189
0
              }
2190
0
          }
2191
0
          return 0;
2192
0
        } else if (use_insn->op == IR_IF) {
2193
0
          continue;
2194
0
        } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) {
2195
0
          ext_ref_2 = use;
2196
0
          continue;
2197
0
        } else {
2198
0
          return 0;
2199
0
        }
2200
0
      }
2201
0
    }
2202
0
  }
2203
2204
0
  for (n = 0; n < ctx->use_lists[phi_ref].count; n++) {
2205
    /* "use_lists" may be reallocated by ir_ext_ref() */
2206
0
    use = ctx->use_edges[ctx->use_lists[phi_ref].refs + n];
2207
0
    if (use == ext_ref) {
2208
0
      continue;
2209
0
    } else {
2210
0
      ir_insn *use_insn = &ctx->ir_base[use];
2211
2212
0
      if (use_insn->op == IR_IF) {
2213
0
        continue;
2214
0
      } else if (use_insn->op == op) {
2215
0
        IR_ASSERT(ext_ref_2 == use);
2216
0
        continue;
2217
0
      }
2218
0
      IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
2219
0
        || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL)
2220
0
       && (use_insn->op1 == phi_ref || use_insn->op2 == phi_ref));
2221
0
      if (use_insn->op1 != phi_ref) {
2222
0
        if (IR_IS_CONST_REF(use_insn->op1)
2223
0
         && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2224
0
          ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2225
0
        } else {
2226
0
          ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2227
0
          use_insn = &ctx->ir_base[use];
2228
0
          use_insn->op1 = tmp;
2229
0
        }
2230
0
        ir_bitqueue_add(worklist, use);
2231
0
      }
2232
0
      if (use_insn->op2 != phi_ref) {
2233
0
        if (IR_IS_CONST_REF(use_insn->op2)
2234
0
         && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2235
0
          ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2236
0
        } else {
2237
0
          ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2238
0
          use_insn = &ctx->ir_base[use];
2239
0
          use_insn->op2 = tmp;
2240
0
        }
2241
0
        ir_bitqueue_add(worklist, use);
2242
0
      }
2243
0
    }
2244
0
  }
2245
2246
0
  if (ctx->use_lists[op_ref].count > 1) {
2247
0
    for (n = 0; n < ctx->use_lists[op_ref].count; n++) {
2248
      /* "use_lists" may be reallocated by ir_ext_ref() */
2249
0
      use = ctx->use_edges[ctx->use_lists[op_ref].refs + n];
2250
0
      if (use == ext_ref || use == phi_ref) {
2251
0
        continue;
2252
0
      } else {
2253
0
        ir_insn *use_insn = &ctx->ir_base[use];
2254
2255
0
        if (use_insn->op == IR_IF) {
2256
0
          continue;
2257
0
        } else if (use_insn->op == op) {
2258
0
          IR_ASSERT(ext_ref_2 == use);
2259
0
          continue;
2260
0
        }
2261
0
        IR_ASSERT(use_insn->op >= IR_EQ && use_insn->op <= IR_UGT);
2262
0
        if (use_insn->op1 != op_ref) {
2263
0
          if (IR_IS_CONST_REF(use_insn->op1)
2264
0
           && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
2265
0
            ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
2266
0
          } else {
2267
0
            ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
2268
0
            use_insn = &ctx->ir_base[use];
2269
0
            use_insn->op1 = tmp;
2270
0
          }
2271
0
          ir_bitqueue_add(worklist, use);
2272
0
        }
2273
0
        if (use_insn->op2 != op_ref) {
2274
0
          if (IR_IS_CONST_REF(use_insn->op2)
2275
0
           && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
2276
0
            ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
2277
0
          } else {
2278
0
            ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
2279
0
            use_insn = &ctx->ir_base[use];
2280
0
            use_insn->op2 = tmp;
2281
0
          }
2282
0
          ir_bitqueue_add(worklist, use);
2283
0
        }
2284
0
      }
2285
0
    }
2286
0
  }
2287
2288
0
  ir_iter_replace_insn(ctx, ext_ref, ctx->ir_base[ext_ref].op1, worklist);
2289
2290
0
  if (ext_ref_2) {
2291
0
    ir_iter_replace_insn(ctx, ext_ref_2, ctx->ir_base[ext_ref_2].op1, worklist);
2292
0
  }
2293
2294
0
  ctx->ir_base[op_ref].type = type;
2295
2296
0
  phi_insn = &ctx->ir_base[phi_ref];
2297
0
  phi_insn->type = type;
2298
0
  if (IR_IS_CONST_REF(phi_insn->op2)
2299
0
   && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
2300
0
    ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
2301
0
  } else {
2302
0
    ir_ref tmp = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist);
2303
0
    ctx->ir_base[phi_ref].op2 = tmp;
2304
0
  }
2305
2306
0
  return 1;
2307
0
}
2308
2309
static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
2310
0
{
2311
0
  ir_ref ref = insn->op1;
2312
2313
  /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
2314
0
  insn = &ctx->ir_base[ref];
2315
0
  if (insn->op == IR_PHI
2316
0
   && insn->inputs_count == 3 /* (2 values) */
2317
0
   && ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) {
2318
0
    ir_ref op_ref = insn->op3;
2319
0
    ir_insn *op_insn = &ctx->ir_base[op_ref];
2320
2321
0
    if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
2322
0
      if (op_insn->op1 == ref) {
2323
0
        if (ir_is_loop_invariant(ctx, op_insn->op2, insn->op1)) {
2324
0
          return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2325
0
        }
2326
0
      } else if (op_insn->op2 == ref) {
2327
0
        if (ir_is_loop_invariant(ctx, op_insn->op1, insn->op1)) {
2328
0
          return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist);
2329
0
        }
2330
0
      }
2331
0
    }
2332
0
  } else if (insn->op == IR_ADD || insn->op == IR_SUB || insn->op == IR_MUL) {
2333
0
    if (!IR_IS_CONST_REF(insn->op1)
2334
0
     && ctx->ir_base[insn->op1].op == IR_PHI
2335
0
     && ctx->ir_base[insn->op1].inputs_count == 3 /* (2 values) */
2336
0
     && ctx->ir_base[insn->op1].op3 == ref
2337
0
     && ctx->ir_base[ctx->ir_base[insn->op1].op1].op == IR_LOOP_BEGIN
2338
0
     && ir_is_loop_invariant(ctx, insn->op2, ctx->ir_base[insn->op1].op1)) {
2339
0
      return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op1, ref, worklist);
2340
0
    } else if (!IR_IS_CONST_REF(insn->op2)
2341
0
     && ctx->ir_base[insn->op2].op == IR_PHI
2342
0
     && ctx->ir_base[insn->op2].inputs_count == 3 /* (2 values) */
2343
0
     && ctx->ir_base[insn->op2].op3 == ref
2344
0
     && ctx->ir_base[ctx->ir_base[insn->op2].op1].op == IR_LOOP_BEGIN
2345
0
     && ir_is_loop_invariant(ctx, insn->op1, ctx->ir_base[insn->op2].op1)) {
2346
0
      return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op2, ref, worklist);
2347
0
    }
2348
0
  }
2349
2350
0
  return 0;
2351
0
}
2352
2353
static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref)
2354
0
{
2355
0
  ir_use_list *use_list = &ctx->use_lists[if_ref];
2356
0
  ir_ref *p = &ctx->use_edges[use_list->refs];
2357
2358
0
  IR_ASSERT(use_list->count == 2);
2359
0
  if (ctx->ir_base[*p].op == IR_IF_TRUE) {
2360
0
    IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE);
2361
0
    *if_true_ref = *p;
2362
0
    *if_false_ref = *(p + 1);
2363
0
  } else {
2364
0
    IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE);
2365
0
    IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE);
2366
0
    *if_false_ref = *p;
2367
0
    *if_true_ref = *(p + 1);
2368
0
  }
2369
0
}
2370
2371
static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist)
2372
0
{
2373
0
  ir_ref prev, next;
2374
0
  ir_use_list *use_list;
2375
2376
0
  IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
2377
0
  IR_ASSERT(ctx->ir_base[end].op == IR_END);
2378
0
  IR_ASSERT(ctx->ir_base[begin].op1 == end);
2379
0
  IR_ASSERT(ctx->use_lists[end].count == 1);
2380
2381
0
  prev = ctx->ir_base[end].op1;
2382
2383
0
  use_list = &ctx->use_lists[begin];
2384
0
  IR_ASSERT(use_list->count == 1);
2385
0
  next = ctx->use_edges[use_list->refs];
2386
2387
  /* remove BEGIN and END */
2388
0
  MAKE_NOP(&ctx->ir_base[begin]); CLEAR_USES(begin);
2389
0
  MAKE_NOP(&ctx->ir_base[end]);   CLEAR_USES(end);
2390
2391
  /* connect their predecessor and successor */
2392
0
  ctx->ir_base[next].op1 = prev;
2393
0
  ir_use_list_replace_one(ctx, prev, end, next);
2394
2395
0
  if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
2396
0
    ir_bitqueue_add(worklist, prev);
2397
0
  }
2398
0
}
2399
2400
static void ir_remove_unused_vars(ir_ctx *ctx, ir_ref start, ir_ref end)
2401
0
{
2402
0
  ir_use_list *use_list = &ctx->use_lists[start];
2403
0
  ir_ref *p, use, n = use_list->count;
2404
2405
0
  for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2406
0
    use = *p;
2407
0
    if (use != end) {
2408
0
      ir_insn *use_insn = &ctx->ir_base[use];
2409
0
      IR_ASSERT(use_insn->op == IR_VAR);
2410
0
      IR_ASSERT(ctx->use_lists[use].count == 0);
2411
0
      MAKE_NOP(use_insn);
2412
0
    }
2413
0
  }
2414
0
}
2415
2416
static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2417
0
{
2418
0
  if (insn->inputs_count == 2) {
2419
0
    ir_ref end1_ref = insn->op1, end2_ref = insn->op2;
2420
0
    ir_insn *end1 = &ctx->ir_base[end1_ref];
2421
0
    ir_insn *end2 = &ctx->ir_base[end2_ref];
2422
2423
0
    if (end1->op != IR_END || end2->op != IR_END) {
2424
0
      return 0;
2425
0
    }
2426
2427
0
    ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2428
0
    ir_insn *start1 = &ctx->ir_base[start1_ref];
2429
0
    ir_insn *start2 = &ctx->ir_base[start2_ref];
2430
2431
0
    if (start1->op1 != start2->op1) {
2432
0
      return 0;
2433
0
    }
2434
2435
0
    ir_ref root_ref = start1->op1;
2436
0
    ir_insn *root = &ctx->ir_base[root_ref];
2437
2438
0
    if (root->op != IR_IF
2439
0
     && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) {
2440
0
      return 0;
2441
0
    }
2442
2443
    /* Empty Diamond
2444
     *
2445
     *    prev                     prev
2446
     *    |  condition             |  condition
2447
     *    | /                      |
2448
     *    IF                       |
2449
     *    | \                      |
2450
     *    |  +-----+               |
2451
     *    |        IF_FALSE        |
2452
     *    IF_TRUE  |           =>  |
2453
     *    |        END             |
2454
     *    END     /                |
2455
     *    |  +---+                 |
2456
     *    | /                      |
2457
     *    MERGE                    |
2458
     *    |                        |
2459
     *    next                     next
2460
     */
2461
2462
0
    ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2463
0
    ir_insn *next = &ctx->ir_base[next_ref];
2464
2465
0
    if (ctx->use_lists[start1_ref].count != 1) {
2466
0
      ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2467
0
    }
2468
0
    if (ctx->use_lists[start2_ref].count != 1) {
2469
0
      ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2470
0
    }
2471
2472
0
    next->op1 = root->op1;
2473
0
    ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2474
0
    if (!IR_IS_CONST_REF(root->op2)) {
2475
0
      ir_use_list_remove_one(ctx, root->op2, root_ref);
2476
0
      if (ir_is_dead(ctx, root->op2)) {
2477
0
        ir_bitqueue_add(worklist, root->op2);
2478
0
      }
2479
0
    }
2480
2481
0
    MAKE_NOP(root);   CLEAR_USES(root_ref);
2482
0
    MAKE_NOP(start1); CLEAR_USES(start1_ref);
2483
0
    MAKE_NOP(start2); CLEAR_USES(start2_ref);
2484
0
    MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2485
0
    MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2486
0
    MAKE_NOP(insn);   CLEAR_USES(ref);
2487
2488
0
    if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2489
0
      ir_bitqueue_add(worklist, next->op1);
2490
0
    }
2491
2492
0
    return 1;
2493
0
  } else {
2494
0
    ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1;
2495
0
    ir_ref root_ref = IR_UNUSED;
2496
2497
0
    for (i = 0; i < count; i++) {
2498
0
      ir_ref end_ref, start_ref;
2499
0
      ir_insn *end, *start;
2500
2501
0
      end_ref = ops[i];
2502
0
      end = &ctx->ir_base[end_ref];
2503
0
      if (end->op != IR_END) {
2504
0
        return 0;
2505
0
      }
2506
0
      start_ref = end->op1;
2507
0
      start = &ctx->ir_base[start_ref];
2508
0
      if (start->op != IR_CASE_VAL && start->op != IR_CASE_RANGE && start->op != IR_CASE_DEFAULT) {
2509
0
        return 0;
2510
0
      }
2511
0
      if (ctx->use_lists[start_ref].count != 1) {
2512
0
        ir_remove_unused_vars(ctx, start_ref, end_ref);
2513
0
      }
2514
0
      if (!root_ref) {
2515
0
        root_ref = start->op1;
2516
0
        if (ctx->use_lists[root_ref].count != count) {
2517
0
          return 0;
2518
0
        }
2519
0
      } else if (start->op1 != root_ref) {
2520
0
        return 0;
2521
0
      }
2522
0
    }
2523
2524
    /* Empty N-Diamond */
2525
0
    ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
2526
0
    ir_insn *next = &ctx->ir_base[next_ref];
2527
0
    ir_insn *root = &ctx->ir_base[root_ref];
2528
2529
0
    next->op1 = root->op1;
2530
0
    ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2531
2532
0
    if (!IR_IS_CONST_REF(root->op2)) {
2533
0
      ir_use_list_remove_one(ctx, root->op2, root_ref);
2534
0
      if (ir_is_dead(ctx, root->op2)) {
2535
0
        ir_bitqueue_add(worklist, root->op2);
2536
0
      }
2537
0
    }
2538
2539
0
    MAKE_NOP(root);   CLEAR_USES(root_ref);
2540
2541
0
    for (i = 0; i < count; i++) {
2542
0
      ir_ref end_ref = ops[i];
2543
0
      ir_insn *end = &ctx->ir_base[end_ref];
2544
0
      ir_ref start_ref = end->op1;
2545
0
      ir_insn *start = &ctx->ir_base[start_ref];
2546
2547
0
      MAKE_NOP(start); CLEAR_USES(start_ref);
2548
0
      MAKE_NOP(end);   CLEAR_USES(end_ref);
2549
0
    }
2550
2551
0
    MAKE_NOP(insn);   CLEAR_USES(ref);
2552
2553
0
    if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2554
0
      ir_bitqueue_add(worklist, next->op1);
2555
0
    }
2556
2557
0
    return 1;
2558
0
  }
2559
0
}
2560
2561
static bool ir_is_zero(const ir_ctx *ctx, ir_ref ref)
2562
0
{
2563
0
  return IR_IS_CONST_REF(ref)
2564
0
    && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2565
0
    && ctx->ir_base[ref].val.u32 == 0;
2566
0
}
2567
2568
static bool ir_fix_min_max_const(ir_ctx *ctx, ir_insn *cond, ir_ref ref)
2569
0
{
2570
0
  if (cond->op == IR_ULE) {
2571
    /* (x <= 3 ? 4 : x) => (x < 4 ? 4 : x) =>  max(x, 4) */
2572
    /* (x <= 3 ? x : 4) => (x < 4 ? x : 4) =>  min(x, 4) */
2573
0
    if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
2574
0
     && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2575
0
     && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 - 1
2576
0
     && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
2577
0
      cond->op2 = ref;
2578
0
      return 1;
2579
0
    }
2580
0
  } else if (cond->op == IR_UGE) {
2581
    /* (x >= 3 ? 2 : x) => (x > 2 ? 2 : x) =>  min(x, 2) */
2582
    /* (x >= 3 ? x : 2) => (x > 2 ? x : 2) =>  max(x, 2) */
2583
0
    if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
2584
0
     && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2585
0
     && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 + 1
2586
0
     && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
2587
0
      cond->op2 = ref;
2588
0
      return 1;
2589
0
    }
2590
0
  } else if (cond->op == IR_LE) {
2591
    /* (x <= 3 ? 4 : x) => (x < 4 ? 4 : x) =>  max(x, 4) */
2592
    /* (x <= 3 ? x : 4) => (x < 4 ? x : 4) =>  min(x, 4) */
2593
0
    if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
2594
0
     && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2595
0
     && ctx->ir_base[cond->op2].val.u64 == ctx->ir_base[ref].val.u64 - 1
2596
0
     && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
2597
0
      cond->op2 = ref;
2598
0
      return 1;
2599
0
    }
2600
0
  } else if (cond->op == IR_GE) {
2601
    /* (x >= 3 ? 2 : x) => (x > 2 ? 2 : x) =>  min(x, 2) */
2602
    /* (x >= 3 ? x : 2) => (x > 2 ? x : 2) =>  max(x, 2) */
2603
0
    if (!IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
2604
0
     && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
2605
0
     && ctx->ir_base[cond->op2].val.i64 == ctx->ir_base[ref].val.i64 + 1
2606
0
     && ctx->ir_base[cond->op2].type == ctx->ir_base[ref].type) {
2607
0
      cond->op2 = ref;
2608
0
      return 1;
2609
0
    }
2610
0
  }
2611
0
  return 0;
2612
0
}
2613
2614
static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
2615
0
{
2616
0
  IR_ASSERT(insn->inputs_count == 3);
2617
0
  IR_ASSERT(ctx->use_lists[merge_ref].count == 2);
2618
2619
0
  ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
2620
0
  ir_insn *end1 = &ctx->ir_base[end1_ref];
2621
0
  ir_insn *end2 = &ctx->ir_base[end2_ref];
2622
2623
0
  if (end1->op == IR_END && end2->op == IR_END) {
2624
0
    ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
2625
0
    ir_insn *start1 = &ctx->ir_base[start1_ref];
2626
0
    ir_insn *start2 = &ctx->ir_base[start2_ref];
2627
2628
0
    if (start1->op1 == start2->op1) {
2629
0
      ir_ref root_ref = start1->op1;
2630
0
      ir_insn *root = &ctx->ir_base[root_ref];
2631
2632
0
      if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2)) {
2633
0
        ir_ref cond_ref = root->op2;
2634
0
        ir_insn *cond = &ctx->ir_base[cond_ref];
2635
0
        ir_type type = insn->type;
2636
0
        bool is_cmp, is_less;
2637
2638
0
        if (IR_IS_TYPE_FP(type)) {
2639
0
          is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE ||
2640
0
            cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2641
0
          is_less = (cond->op == IR_LT || cond->op == IR_LE ||
2642
0
            cond->op == IR_ULT || cond->op == IR_ULE);
2643
0
        } else if (IR_IS_TYPE_SIGNED(type)) {
2644
0
          is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE);
2645
0
          is_less = (cond->op == IR_LT || cond->op == IR_LE);
2646
0
        } else {
2647
0
          IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
2648
0
          is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
2649
0
          is_less = (cond->op == IR_ULT || cond->op == IR_ULE);
2650
0
        }
2651
2652
0
        if (is_cmp
2653
0
         && ((insn->op2 == cond->op1
2654
0
           && (insn->op3 == cond->op2
2655
0
            || (IR_IS_CONST_REF(cond->op2)
2656
0
             && (IR_IS_CONST_REF(insn->op3)
2657
0
           && IR_IS_TYPE_INT(insn->type)
2658
0
             && ir_fix_min_max_const(ctx, cond, insn->op3)))))
2659
0
          || (insn->op3 == cond->op1
2660
0
           && (insn->op2 == cond->op2
2661
0
            || (IR_IS_CONST_REF(cond->op2)
2662
0
             && (IR_IS_CONST_REF(insn->op2)
2663
0
           && IR_IS_TYPE_INT(insn->type)
2664
0
             && ir_fix_min_max_const(ctx, cond, insn->op2))))))) {
2665
          /* MAX/MIN
2666
           *
2667
           *    prev                     prev
2668
           *    |  LT(A, B)              |
2669
           *    | /                      |
2670
           *    IF                       |
2671
           *    | \                      |
2672
           *    |  +-----+               |
2673
           *    |        IF_FALSE        |
2674
           *    IF_TRUE  |           =>  |
2675
           *    |        END             |
2676
           *    END     /                |
2677
           *    |  +---+                 |
2678
           *    | /                      |
2679
           *    MERGE                    |
2680
           *    |    \                   |
2681
           *    |     PHI(A, B)          |    MIN(A, B)
2682
           *    next                     next
2683
           */
2684
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2685
0
          ir_insn *next;
2686
2687
0
          if (next_ref == ref) {
2688
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2689
0
          }
2690
0
          next = &ctx->ir_base[next_ref];
2691
2692
0
          if (ctx->use_lists[start1_ref].count != 1) {
2693
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2694
0
          }
2695
0
          if (ctx->use_lists[start2_ref].count != 1) {
2696
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2697
0
          }
2698
2699
0
          insn->op = (
2700
0
            (is_less ? cond->op1 : cond->op2)
2701
0
            ==
2702
0
            ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3)
2703
0
            ) ? IR_MIN : IR_MAX;
2704
0
          insn->inputs_count = 2;
2705
0
          if (insn->op2 > insn->op3) {
2706
0
            insn->op1 = insn->op2;
2707
0
            insn->op2 = insn->op3;
2708
0
          } else {
2709
0
            insn->op1 = insn->op3;
2710
0
          }
2711
0
          insn->op3 = IR_UNUSED;
2712
2713
0
          next->op1 = root->op1;
2714
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2715
2716
0
          if (ctx->use_lists[cond_ref].count == 1) {
2717
0
            if (!IR_IS_CONST_REF(insn->op1)) {
2718
0
              ir_use_list_remove_one(ctx, insn->op1, cond_ref);
2719
0
            }
2720
0
            if (!IR_IS_CONST_REF(insn->op2)) {
2721
0
              ir_use_list_remove_one(ctx, insn->op2, cond_ref);
2722
0
            }
2723
0
            MAKE_NOP(cond);   CLEAR_USES(cond_ref);
2724
0
          } else {
2725
0
            ir_use_list_remove_one(ctx, cond_ref, root_ref);
2726
0
          }
2727
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2728
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2729
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2730
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2731
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2732
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2733
2734
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2735
0
            ir_bitqueue_add(worklist, next->op1);
2736
0
          }
2737
2738
0
          return 1;
2739
0
        } else if (is_cmp
2740
0
            && ((ctx->ir_base[insn->op2].op == IR_NEG
2741
0
              && ctx->use_lists[insn->op2].count == 1
2742
0
              && ctx->ir_base[insn->op2].op1 == insn->op3
2743
0
              && ((cond->op1 == insn->op3
2744
0
                && ir_is_zero(ctx, cond->op2)
2745
0
                && is_less == (start1->op == IR_IF_TRUE))
2746
0
               || (cond->op2 == insn->op3
2747
0
                && ir_is_zero(ctx, cond->op1)
2748
0
                && is_less != (start1->op == IR_IF_TRUE))))
2749
0
             || (ctx->ir_base[insn->op3].op == IR_NEG
2750
0
              && ctx->use_lists[insn->op3].count == 1
2751
0
              && ctx->ir_base[insn->op3].op1 == insn->op2
2752
0
              && ((cond->op1 == insn->op2
2753
0
                && ir_is_zero(ctx, cond->op2)
2754
0
                && is_less != (start1->op == IR_IF_TRUE))
2755
0
               || (cond->op2 == insn->op2
2756
0
                && ir_is_zero(ctx, cond->op1)
2757
0
                && is_less == (start1->op == IR_IF_TRUE)))))) {
2758
          /* ABS
2759
           *
2760
           *    prev                     prev
2761
           *    |  LT(A, 0)              |
2762
           *    | /                      |
2763
           *    IF                       |
2764
           *    | \                      |
2765
           *    |  +-----+               |
2766
           *    |        IF_FALSE        |
2767
           *    IF_TRUE  |           =>  |
2768
           *    |        END             |
2769
           *    END     /                |
2770
           *    |  +---+                 |
2771
           *    | /                      |
2772
           *    MERGE                    |
2773
           *    |    \                   |
2774
           *    |     PHI(A, NEG(A))     |    ABS(A)
2775
           *    next                     next
2776
           */
2777
0
          ir_ref neg_ref;
2778
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2779
0
          ir_insn *next;
2780
2781
0
          if (next_ref == ref) {
2782
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2783
0
          }
2784
0
          next = &ctx->ir_base[next_ref];
2785
2786
0
          if (ctx->use_lists[start1_ref].count != 1) {
2787
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2788
0
          }
2789
0
          if (ctx->use_lists[start2_ref].count != 1) {
2790
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2791
0
          }
2792
2793
0
          insn->op = IR_ABS;
2794
0
          insn->inputs_count = 1;
2795
0
          if (ctx->ir_base[insn->op2].op == IR_NEG) {
2796
0
            neg_ref = insn->op2;
2797
0
            insn->op1 = insn->op3;
2798
0
          } else {
2799
0
            neg_ref = insn->op3;
2800
0
            insn->op1 = insn->op2;
2801
0
          }
2802
0
          insn->op2 = IR_UNUSED;
2803
0
          insn->op3 = IR_UNUSED;
2804
2805
0
          next->op1 = root->op1;
2806
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2807
0
          ir_use_list_remove_one(ctx, insn->op1, neg_ref);
2808
2809
0
          if (ctx->use_lists[cond_ref].count == 1) {
2810
0
            if (!IR_IS_CONST_REF(insn->op1)) {
2811
0
              ir_use_list_remove_one(ctx, insn->op1, cond_ref);
2812
0
            }
2813
0
            MAKE_NOP(cond);   CLEAR_USES(cond_ref);
2814
0
          } else {
2815
0
            ir_use_list_remove_one(ctx, cond_ref, root_ref);
2816
0
            }
2817
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2818
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2819
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2820
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2821
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2822
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2823
0
          MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref);
2824
2825
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2826
0
            ir_bitqueue_add(worklist, next->op1);
2827
0
          }
2828
2829
0
          return 1;
2830
0
        } else if (insn->op2 <= root_ref && insn->op3 <= root_ref
2831
0
          && cond->op != IR_OVERFLOW
2832
          // TODO: temporary disable IF-conversion for RLOAD.
2833
          // We don't track anti-dependencies in GCM and Local Scheduling.
2834
          // As result COND may be scheduled below the following RSTORE.
2835
          // See: https://github.com/dstogov/ir/issues/132
2836
0
          && cond->op != IR_RLOAD
2837
0
          && !((cond->op >= IR_EQ && cond->op <= IR_UNORDERED)
2838
0
            && ((!IR_IS_CONST_REF(cond->op1) && ctx->ir_base[cond->op1].op == IR_RLOAD)
2839
0
             || (!IR_IS_CONST_REF(cond->op2) && ctx->ir_base[cond->op2].op == IR_RLOAD)))) {
2840
          /* COND
2841
           *
2842
           *    prev                     prev
2843
           *    |  cond                  |
2844
           *    | /                      |
2845
           *    IF                       |
2846
           *    | \                      |
2847
           *    |  +-----+               |
2848
           *    |        IF_FALSE        |
2849
           *    IF_TRUE  |           =>  |
2850
           *    |        END             |
2851
           *    END     /                |
2852
           *    |  +---+                 |
2853
           *    | /                      |
2854
           *    MERGE                    |
2855
           *    |    \                   |
2856
           *    |     PHI(A, B)          |    COND(cond, A, B)
2857
           *    next                     next
2858
           */
2859
0
          ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
2860
0
          ir_insn *next;
2861
2862
0
          if (next_ref == ref) {
2863
0
            next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
2864
0
          }
2865
0
          next = &ctx->ir_base[next_ref];
2866
2867
0
          if (ctx->use_lists[start1_ref].count != 1) {
2868
0
            ir_remove_unused_vars(ctx, start1_ref, end1_ref);
2869
0
          }
2870
0
          if (ctx->use_lists[start2_ref].count != 1) {
2871
0
            ir_remove_unused_vars(ctx, start2_ref, end2_ref);
2872
0
          }
2873
2874
0
          insn->op = IR_COND;
2875
0
          insn->inputs_count = 3;
2876
0
          insn->op1 = cond_ref;
2877
0
          if (start1->op == IR_IF_FALSE) {
2878
0
            SWAP_REFS(insn->op2, insn->op3);
2879
0
          }
2880
2881
0
          next->op1 = root->op1;
2882
0
          ir_use_list_replace_one(ctx, cond_ref, root_ref, ref);
2883
0
          ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
2884
0
          ir_use_list_remove_one(ctx, root->op2, root_ref);
2885
2886
0
          MAKE_NOP(root);   CLEAR_USES(root_ref);
2887
0
          MAKE_NOP(start1); CLEAR_USES(start1_ref);
2888
0
          MAKE_NOP(start2); CLEAR_USES(start2_ref);
2889
0
          MAKE_NOP(end1);   CLEAR_USES(end1_ref);
2890
0
          MAKE_NOP(end2);   CLEAR_USES(end2_ref);
2891
0
          MAKE_NOP(merge);  CLEAR_USES(merge_ref);
2892
2893
0
          ir_bitqueue_add(worklist, ref);
2894
0
          if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
2895
0
            ir_bitqueue_add(worklist, next->op1);
2896
0
          }
2897
2898
0
          return 1;
2899
0
        }
2900
0
      }
2901
0
    }
2902
0
  }
2903
2904
0
  return 0;
2905
0
}
2906
2907
static bool ir_cmp_is_true(ir_op op, const ir_insn *op1, const ir_insn *op2)
2908
0
{
2909
0
  IR_ASSERT(op1->type == op2->type);
2910
0
  if (IR_IS_TYPE_INT(op1->type)) {
2911
0
    if (op == IR_EQ) {
2912
0
      return op1->val.u64 == op2->val.u64;
2913
0
    } else if (op == IR_NE) {
2914
0
      return op1->val.u64 != op2->val.u64;
2915
0
    } else if (op == IR_LT) {
2916
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2917
0
        return op1->val.i64 < op2->val.i64;
2918
0
      } else {
2919
0
        return op1->val.u64 < op2->val.u64;
2920
0
      }
2921
0
    } else if (op == IR_GE) {
2922
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2923
0
        return op1->val.i64 >= op2->val.i64;
2924
0
      } else {
2925
0
        return op1->val.u64 >= op2->val.u64;
2926
0
      }
2927
0
    } else if (op == IR_LE) {
2928
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2929
0
        return op1->val.i64 <= op2->val.i64;
2930
0
      } else {
2931
0
        return op1->val.u64 <= op2->val.u64;
2932
0
      }
2933
0
    } else if (op == IR_GT) {
2934
0
      if (IR_IS_TYPE_SIGNED(op1->type)) {
2935
0
        return op1->val.i64 > op2->val.i64;
2936
0
      } else {
2937
0
        return op1->val.u64 > op2->val.u64;
2938
0
      }
2939
0
    } else if (op == IR_ULT) {
2940
0
      return op1->val.u64 < op2->val.u64;
2941
0
    } else if (op == IR_UGE) {
2942
0
      return op1->val.u64 >= op2->val.u64;
2943
0
    } else if (op == IR_ULE) {
2944
0
      return op1->val.u64 <= op2->val.u64;
2945
0
    } else if (op == IR_UGT) {
2946
0
      return op1->val.u64 > op2->val.u64;
2947
0
    } else {
2948
0
      IR_ASSERT(0);
2949
0
      return 0;
2950
0
    }
2951
0
  } else if (op1->type == IR_DOUBLE) {
2952
0
    if (op == IR_EQ) {
2953
0
      return op1->val.d == op2->val.d;
2954
0
    } else if (op == IR_NE) {
2955
0
      return op1->val.d != op2->val.d;
2956
0
    } else if (op == IR_LT) {
2957
0
      return op1->val.d < op2->val.d;
2958
0
    } else if (op == IR_GE) {
2959
0
      return op1->val.d >= op2->val.d;
2960
0
    } else if (op == IR_LE) {
2961
0
      return op1->val.d <= op2->val.d;
2962
0
    } else if (op == IR_GT) {
2963
0
      return op1->val.d > op2->val.d;
2964
0
    } else if (op == IR_ULT) {
2965
0
      return !(op1->val.d >= op2->val.d);
2966
0
    } else if (op == IR_UGE) {
2967
0
      return !(op1->val.d < op2->val.d);
2968
0
    } else if (op == IR_ULE) {
2969
0
      return !(op1->val.d > op2->val.d);
2970
0
    } else if (op == IR_UGT) {
2971
0
      return !(op1->val.d <= op2->val.d);
2972
0
    } else if (op == IR_ORDERED) {
2973
0
      return !isnan(op1->val.d) && !isnan(op2->val.d);
2974
0
    } else if (op == IR_UNORDERED) {
2975
0
      return isnan(op1->val.d) || isnan(op2->val.d);
2976
0
    } else {
2977
0
      IR_ASSERT(0);
2978
0
      return 0;
2979
0
    }
2980
0
  } else {
2981
0
    IR_ASSERT(op1->type == IR_FLOAT);
2982
0
    if (op == IR_EQ) {
2983
0
      return op1->val.f == op2->val.f;
2984
0
    } else if (op == IR_NE) {
2985
0
      return op1->val.f != op2->val.f;
2986
0
    } else if (op == IR_LT) {
2987
0
      return op1->val.f < op2->val.f;
2988
0
    } else if (op == IR_GE) {
2989
0
      return op1->val.f >= op2->val.f;
2990
0
    } else if (op == IR_LE) {
2991
0
      return op1->val.f <= op2->val.f;
2992
0
    } else if (op == IR_GT) {
2993
0
      return op1->val.f > op2->val.f;
2994
0
    } else if (op == IR_ULT) {
2995
0
      return !(op1->val.f >= op2->val.f);
2996
0
    } else if (op == IR_UGE) {
2997
0
      return !(op1->val.f < op2->val.f);
2998
0
    } else if (op == IR_ULE) {
2999
0
      return !(op1->val.f > op2->val.f);
3000
0
    } else if (op == IR_UGT) {
3001
0
      return !(op1->val.f <= op2->val.f);
3002
0
    } else if (op == IR_ORDERED) {
3003
0
      return !isnan(op1->val.f) && !isnan(op2->val.f);
3004
0
    } else if (op == IR_UNORDERED) {
3005
0
      return isnan(op1->val.f) || isnan(op2->val.f);
3006
0
    } else {
3007
0
      IR_ASSERT(0);
3008
0
      return 0;
3009
0
    }
3010
0
  }
3011
0
}
3012
3013
static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3014
0
{
3015
0
  ir_ref cond_ref = insn->op2;
3016
0
  ir_insn *cond = &ctx->ir_base[cond_ref];
3017
3018
0
  if (cond->op == IR_PHI
3019
0
   && cond->inputs_count == 3
3020
0
   && cond->op1 == insn->op1
3021
0
   && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op))
3022
0
    || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) {
3023
0
    ir_ref merge_ref = insn->op1;
3024
0
    ir_insn *merge = &ctx->ir_base[merge_ref];
3025
3026
0
    if (ctx->use_lists[merge_ref].count == 2) {
3027
0
      ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
3028
0
      ir_insn *end1 = &ctx->ir_base[end1_ref];
3029
0
      ir_insn *end2 = &ctx->ir_base[end2_ref];
3030
3031
0
      if (end1->op == IR_END && end2->op == IR_END) {
3032
0
        ir_ref if_true_ref, if_false_ref;
3033
0
        ir_insn *if_true, *if_false;
3034
0
        ir_op op = IR_IF_FALSE;
3035
3036
0
        ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3037
3038
0
        if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) {
3039
0
          IR_ASSERT(IR_IS_CONST_REF(cond->op3));
3040
0
          SWAP_REFS(cond->op2, cond->op3);
3041
0
          SWAP_REFS(merge->op1, merge->op2);
3042
0
          SWAP_REFS(end1_ref, end2_ref);
3043
0
          SWAP_INSNS(end1, end2);
3044
0
        }
3045
0
        if (ir_const_is_true(&ctx->ir_base[cond->op2])) {
3046
0
          SWAP_REFS(if_true_ref, if_false_ref);
3047
0
          op = IR_IF_TRUE;
3048
0
        }
3049
0
        if_true = &ctx->ir_base[if_true_ref];
3050
0
        if_false = &ctx->ir_base[if_false_ref];
3051
3052
0
        if (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)) {
3053
0
          if (ir_const_is_true(&ctx->ir_base[cond->op3]) ^ (op == IR_IF_TRUE)) {
3054
            /* Simple IF Split
3055
             *
3056
             *    |        |               |        |
3057
             *    |        END             |        END
3058
             *    END     /                END       \
3059
             *    |  +---+                 |          +
3060
             *    | /                      |          |
3061
             *    MERGE                    |          |
3062
             *    | \                      |          |
3063
             *    |  PHI(false, true)      |          |
3064
             *    | /                      |          |
3065
             *    IF                   =>  |          |
3066
             *    | \                      |          |
3067
             *    |  +------+              |          |
3068
             *    |         IF_TRUE        |          BEGIN
3069
             *    IF_FALSE  |              BEGIN
3070
             *    |                        |
3071
             */
3072
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3073
0
            ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
3074
3075
0
            MAKE_NOP(merge); CLEAR_USES(merge_ref);
3076
0
            MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3077
0
            MAKE_NOP(insn);  CLEAR_USES(ref);
3078
3079
0
            if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3080
0
            if_false->op1 = end1_ref;
3081
0
            if_false->op2 = IR_UNUSED;
3082
3083
0
            if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3084
0
            if_true->op1 = end2_ref;
3085
0
            if_true->op2 = IR_UNUSED;
3086
3087
0
            ir_bitqueue_add(worklist, if_false_ref);
3088
0
            ir_bitqueue_add(worklist, if_true_ref);
3089
3090
0
            return 1;
3091
0
          } else {
3092
            /* Simple IF Split
3093
             *
3094
             *    |        |               |        |
3095
             *    |        END             |        END
3096
             *    END     /                END      |
3097
             *    |  +---+                 |        |
3098
             *    | /                      |        |
3099
             *    MERGE                    |        +
3100
             *    | \                      |       /
3101
             *    |  PHI(false, false)     |      /
3102
             *    | /                      |     /
3103
             *    IF                   =>  |    /
3104
             *    | \                      |   /
3105
             *    |  +------+              |  /
3106
             *    |         IF_TRUE        | /        BEGIN(unreachable)
3107
             *    IF_FALSE  |              MERGE
3108
             *    |                        |
3109
             */
3110
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3111
0
            ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
3112
3113
0
            MAKE_NOP(merge); CLEAR_USES(merge_ref);
3114
0
            MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3115
0
            MAKE_NOP(insn);  CLEAR_USES(ref);
3116
3117
0
            if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3118
0
            if_false->op1 = end1_ref;
3119
0
            if_false->op2 = end2_ref;
3120
3121
0
            if_true->optx = IR_BEGIN;
3122
0
            if_true->op1 = IR_UNUSED;
3123
0
            if_true->op2 = IR_UNUSED;
3124
3125
0
            ctx->flags2 &= ~IR_CFG_REACHABLE;
3126
3127
0
            ir_bitqueue_add(worklist, if_false_ref);
3128
3129
0
            return 1;
3130
0
          }
3131
0
        }
3132
3133
        /* Simple IF Split
3134
         *
3135
         *    |        |               |        |
3136
         *    |        END             |        IF(X)
3137
         *    END     /                END     / \
3138
         *    |  +---+                 |   +--+   +
3139
         *    | /                      |  /       |
3140
         *    MERGE                    | IF_FALSE |
3141
         *    | \                      | |        |
3142
         *    |  PHI(false, X)         | |        |
3143
         *    | /                      | |        |
3144
         *    IF                   =>  | END      |
3145
         *    | \                      | |        |
3146
         *    |  +------+              | |        |
3147
         *    |         IF_TRUE        | |        IF_TRUE
3148
         *    IF_FALSE  |              MERGE
3149
         *    |                        |
3150
         */
3151
0
        ir_use_list_remove_one(ctx, merge_ref, cond_ref);
3152
0
        ir_use_list_remove_one(ctx, ref, if_true_ref);
3153
0
        if (!IR_IS_CONST_REF(cond->op3)) {
3154
0
          ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref);
3155
0
        }
3156
0
        ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3157
0
        ir_use_list_add(ctx, end2_ref, if_true_ref);
3158
3159
0
        end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
3160
0
        end2->op2 = cond->op3;
3161
0
        ir_bitqueue_add(worklist, end2_ref);
3162
3163
0
        merge->optx = IR_OPTX(op, IR_VOID, 1);
3164
0
        merge->op1 = end2_ref;
3165
0
        merge->op2 = IR_UNUSED;
3166
3167
0
        MAKE_NOP(cond);
3168
0
        CLEAR_USES(cond_ref);
3169
3170
0
        insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3171
0
        insn->op1 = merge_ref;
3172
0
        insn->op2 = IR_UNUSED;
3173
3174
0
        if_true->op1 = end2_ref;
3175
3176
0
        if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3177
0
        if_false->op1 = end1_ref;
3178
0
        if_false->op2 = ref;
3179
3180
0
        ir_bitqueue_add(worklist, if_false_ref);
3181
0
        if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3182
0
          ir_bitqueue_add(worklist, end2->op1);
3183
0
        }
3184
3185
0
        return 1;
3186
0
      }
3187
0
    }
3188
0
  }
3189
3190
0
  return 0;
3191
0
}
3192
3193
static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3194
0
{
3195
0
  ir_ref cond_ref = insn->op2;
3196
0
  ir_insn *cond = &ctx->ir_base[cond_ref];
3197
3198
0
  if (cond->op >= IR_EQ && cond->op <= IR_UGT
3199
0
   && IR_IS_CONST_REF(cond->op2)
3200
0
   && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
3201
0
   && ctx->use_lists[insn->op2].count == 1) {
3202
0
    ir_ref phi_ref = cond->op1;
3203
0
    ir_insn *phi = &ctx->ir_base[phi_ref];
3204
3205
0
    if (phi->op == IR_PHI
3206
0
     && phi->inputs_count == 3
3207
0
     && phi->op1 == insn->op1
3208
0
     && ctx->use_lists[phi_ref].count == 1
3209
0
     && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op))
3210
0
      || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) {
3211
0
      ir_ref merge_ref = insn->op1;
3212
0
      ir_insn *merge = &ctx->ir_base[merge_ref];
3213
3214
0
      if (ctx->use_lists[merge_ref].count == 2) {
3215
0
        ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
3216
0
        ir_insn *end1 = &ctx->ir_base[end1_ref];
3217
0
        ir_insn *end2 = &ctx->ir_base[end2_ref];
3218
3219
0
        if (end1->op == IR_END && end2->op == IR_END) {
3220
0
          ir_ref if_true_ref, if_false_ref;
3221
0
          ir_insn *if_true, *if_false;
3222
0
          ir_op op = IR_IF_FALSE;
3223
3224
0
          ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3225
3226
0
          if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) {
3227
0
            IR_ASSERT(IR_IS_CONST_REF(phi->op3));
3228
0
            SWAP_REFS(phi->op2, phi->op3);
3229
0
            SWAP_REFS(merge->op1, merge->op2);
3230
0
            SWAP_REFS(end1_ref, end2_ref);
3231
0
            SWAP_INSNS(end1, end2);
3232
0
          }
3233
0
          if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) {
3234
0
            SWAP_REFS(if_true_ref, if_false_ref);
3235
0
            op = IR_IF_TRUE;
3236
0
          }
3237
0
          if_true = &ctx->ir_base[if_true_ref];
3238
0
          if_false = &ctx->ir_base[if_false_ref];
3239
3240
0
          if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) {
3241
0
            if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) {
3242
              /* IF Split
3243
               *
3244
               *    |        |               |        |
3245
               *    |        END             |        END
3246
               *    END     /                END      |
3247
               *    |  +---+                 |        |
3248
               *    | /                      |        |
3249
               *    MERGE                    |        |
3250
               *    | \                      |        |
3251
               *    |  PHI(C1, X)            |        |
3252
               *    |  |                     |        |
3253
               *    |  CMP(_, C2)            |        |
3254
               *    | /                      |        |
3255
               *    IF                   =>  |        |
3256
               *    | \                      |        |
3257
               *    |  +------+              |        |
3258
               *    |         IF_TRUE        |        BEGIN
3259
               *    IF_FALSE  |              BEGIN    |
3260
               *    |                        |
3261
               */
3262
3263
0
              ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3264
0
              ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
3265
3266
0
              MAKE_NOP(merge); CLEAR_USES(merge_ref);
3267
0
              MAKE_NOP(phi);   CLEAR_USES(phi_ref);
3268
0
              MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3269
0
              MAKE_NOP(insn);  CLEAR_USES(ref);
3270
3271
0
              if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3272
0
              if_false->op1 = end1_ref;
3273
0
              if_false->op2 = IR_UNUSED;
3274
3275
0
              if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
3276
0
              if_true->op1 = end2_ref;
3277
0
              if_true->op2 = IR_UNUSED;
3278
3279
0
              ir_bitqueue_add(worklist, if_false_ref);
3280
0
              ir_bitqueue_add(worklist, if_true_ref);
3281
3282
0
              return 1;
3283
0
            } else {
3284
              /* IF Split
3285
               *
3286
               *    |        |               |        |
3287
               *    |        END             |        END
3288
               *    END     /                END      |
3289
               *    |  +---+                 |        |
3290
               *    | /                      |        |
3291
               *    MERGE                    |        |
3292
               *    | \                      |        |
3293
               *    |  PHI(C1, X)            |        |
3294
               *    |  |                     |        +
3295
               *    |  CMP(_, C2)            |       /
3296
               *    | /                      |      /
3297
               *    IF                   =>  |     /
3298
               *    | \                      |    /
3299
               *    |  +------+              |   /
3300
               *    |         IF_TRUE        |  /      BEGIN(unreachable)
3301
               *    IF_FALSE  |              MERGE     |
3302
               *    |                        |
3303
               */
3304
3305
0
              ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3306
0
              ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
3307
3308
0
              MAKE_NOP(merge); CLEAR_USES(merge_ref);
3309
0
              MAKE_NOP(phi);   CLEAR_USES(phi_ref);
3310
0
              MAKE_NOP(cond);  CLEAR_USES(cond_ref);
3311
0
              MAKE_NOP(insn);  CLEAR_USES(ref);
3312
3313
0
              if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3314
0
              if_false->op1 = end1_ref;
3315
0
              if_false->op2 = end2_ref;
3316
3317
0
              if_true->optx = IR_BEGIN;
3318
0
              if_true->op1 = IR_UNUSED;
3319
0
              if_true->op2 = IR_UNUSED;
3320
3321
0
              ctx->flags2 &= ~IR_CFG_REACHABLE;
3322
3323
0
              ir_bitqueue_add(worklist, if_false_ref);
3324
3325
0
              return 1;
3326
0
            }
3327
0
          } else {
3328
            /* IF Split
3329
             *
3330
             *    |        |               |        |
3331
             *    |        END             |        IF<----+
3332
             *    END     /                END     / \     |
3333
             *    |  +---+                 |   +--+   +    |
3334
             *    | /                      |  /       |    |
3335
             *    MERGE                    | IF_FALSE |    |
3336
             *    | \                      | |        |    |
3337
             *    |  PHI(C1, X)            | |        |    |
3338
             *    |  |                     | |        |    |
3339
             *    |  CMP(_, C2)            | |        |    CMP(X, C2)
3340
             *    | /                      | |        |
3341
             *    IF                   =>  | END      |
3342
             *    | \                      | |        |
3343
             *    |  +------+              | |        |
3344
             *    |         IF_TRUE        | |        IF_TRUE
3345
             *    IF_FALSE  |              MERGE
3346
             *    |                        |
3347
             */
3348
3349
0
            ir_use_list_remove_one(ctx, merge_ref, phi_ref);
3350
0
            ir_use_list_remove_one(ctx, ref, if_true_ref);
3351
0
            if (!IR_IS_CONST_REF(phi->op3)) {
3352
0
              ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2);
3353
0
            }
3354
0
            ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
3355
0
            ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref);
3356
0
            ir_use_list_add(ctx, end2_ref, if_true_ref);
3357
3358
0
            end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
3359
0
            end2->op2 = insn->op2;
3360
0
            ir_bitqueue_add(worklist, end2_ref);
3361
3362
0
            merge->optx = IR_OPTX(op, IR_VOID, 1);
3363
0
            merge->op1 = end2_ref;
3364
0
            merge->op2 = IR_UNUSED;
3365
3366
0
            cond->op1 = phi->op3;
3367
0
            MAKE_NOP(phi);
3368
0
            CLEAR_USES(phi_ref);
3369
3370
0
            insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3371
0
            insn->op1 = merge_ref;
3372
0
            insn->op2 = IR_UNUSED;
3373
3374
0
            if_true->op1 = end2_ref;
3375
3376
0
            if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
3377
0
            if_false->op1 = end1_ref;
3378
0
            if_false->op2 = ref;
3379
3380
0
            ir_bitqueue_add(worklist, if_false_ref);
3381
0
            if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
3382
0
              ir_bitqueue_add(worklist, end2->op1);
3383
0
            }
3384
3385
0
            return 1;
3386
0
          }
3387
0
        }
3388
0
      }
3389
0
    }
3390
0
  }
3391
3392
0
  return 0;
3393
0
}
3394
3395
static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist)
3396
0
{
3397
0
  ir_use_list *use_list = &ctx->use_lists[merge_ref];
3398
3399
0
  if (use_list->count == 1) {
3400
0
    ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist);
3401
0
  } else if (use_list->count == 2) {
3402
0
    if (merge->inputs_count == 2) {
3403
0
      ir_ref phi_ref = ctx->use_edges[use_list->refs];
3404
0
      ir_insn *phi = &ctx->ir_base[phi_ref];
3405
3406
0
      ir_ref next_ref = ctx->use_edges[use_list->refs + 1];
3407
0
      ir_insn *next = &ctx->ir_base[next_ref];
3408
3409
0
      if (next->op == IR_PHI) {
3410
0
        SWAP_REFS(phi_ref, next_ref);
3411
0
        SWAP_INSNS(phi, next);
3412
0
      }
3413
3414
0
      if (phi->op == IR_PHI && next->op != IR_PHI) {
3415
0
        if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) {
3416
0
          if (next->op2 == phi_ref) {
3417
0
            if (ir_try_split_if(ctx, next_ref, next, worklist)) {
3418
0
              return;
3419
0
            }
3420
0
          } else {
3421
0
            ir_insn *cmp = &ctx->ir_base[next->op2];
3422
3423
0
            if (cmp->op >= IR_EQ && cmp->op <= IR_UGT
3424
0
             && cmp->op1 == phi_ref
3425
0
             && IR_IS_CONST_REF(cmp->op2)
3426
0
             && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op)
3427
0
             && ctx->use_lists[next->op2].count == 1) {
3428
0
              if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) {
3429
0
                return;
3430
0
              }
3431
0
            }
3432
0
          }
3433
0
        }
3434
0
        ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist);
3435
0
      }
3436
0
    }
3437
0
  }
3438
0
}
3439
3440
static ir_ref ir_find_ext_use(const ir_ctx *ctx, ir_ref ref)
3441
0
{
3442
0
  ir_use_list *use_list = &ctx->use_lists[ref];
3443
0
  ir_ref *p, n, use;
3444
0
  ir_insn *use_insn;
3445
3446
0
  for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) {
3447
0
    use = *p;
3448
0
    use_insn = &ctx->ir_base[use];
3449
0
    if (use_insn->op == IR_SEXT || use_insn->op == IR_ZEXT) {
3450
0
      return use;
3451
0
    }
3452
0
  }
3453
0
  return IR_UNUSED;
3454
0
}
3455
3456
static void ir_iter_optimize_induction_var(ir_ctx *ctx, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist)
3457
0
{
3458
0
  ir_ref ext_ref;
3459
3460
0
  ext_ref = ir_find_ext_use(ctx, phi_ref);
3461
0
  if (!ext_ref) {
3462
0
    ext_ref = ir_find_ext_use(ctx, op_ref);
3463
0
  }
3464
0
  if (ext_ref) {
3465
0
    ir_try_promote_induction_var_ext(ctx, ext_ref, phi_ref, op_ref, worklist);
3466
0
  }
3467
0
}
3468
3469
static void ir_iter_optimize_loop(ir_ctx *ctx, ir_ref loop_ref, ir_insn *loop, ir_bitqueue *worklist)
3470
0
{
3471
0
  ir_ref n;
3472
3473
0
  if (loop->inputs_count != 2 || ctx->use_lists[loop_ref].count <= 1) {
3474
0
    return;
3475
0
  }
3476
3477
  /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
3478
0
  for (n = 0; n < ctx->use_lists[loop_ref].count; n++) {
3479
    /* "use_lists" may be reallocated by ir_ext_ref() */
3480
0
    ir_ref use = ctx->use_edges[ctx->use_lists[loop_ref].refs + n];
3481
0
    ir_insn *use_insn = &ctx->ir_base[use];
3482
3483
0
    if (use_insn->op == IR_PHI) {
3484
0
      ir_ref op_ref = use_insn->op3;
3485
0
      ir_insn *op_insn = &ctx->ir_base[op_ref];
3486
3487
0
      if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) {
3488
0
        if (op_insn->op1 == use) {
3489
0
          if (ir_is_loop_invariant(ctx, op_insn->op2, loop_ref)) {
3490
0
            ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3491
0
          }
3492
0
        } else if (op_insn->op2 == use) {
3493
0
          if (ir_is_loop_invariant(ctx, op_insn->op1, loop_ref)) {
3494
0
            ir_iter_optimize_induction_var(ctx, use, op_ref, worklist);
3495
0
          }
3496
0
        }
3497
0
        }
3498
0
    }
3499
0
  }
3500
0
}
3501
3502
static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref condition, bool *swap)
3503
0
{
3504
0
  ir_insn *condition_insn = &ctx->ir_base[condition];
3505
3506
0
  while ((condition_insn->op == IR_BITCAST
3507
0
    || condition_insn->op == IR_ZEXT
3508
0
    || condition_insn->op == IR_SEXT)
3509
0
   && ctx->use_lists[condition].count == 1) {
3510
0
    condition = condition_insn->op1;
3511
0
    condition_insn = &ctx->ir_base[condition];
3512
0
  }
3513
3514
0
  if (condition_insn->opt == IR_OPT(IR_NOT, IR_BOOL)) {
3515
0
    *swap = 1;
3516
0
    condition = condition_insn->op1;
3517
0
    condition_insn = &ctx->ir_base[condition];
3518
0
  }
3519
3520
0
  if (condition_insn->op == IR_NE && IR_IS_CONST_REF(condition_insn->op2)) {
3521
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3522
3523
0
    if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3524
0
      condition = condition_insn->op1;
3525
0
      condition_insn = &ctx->ir_base[condition];
3526
0
    }
3527
0
  } else if (condition_insn->op == IR_EQ && IR_IS_CONST_REF(condition_insn->op2)) {
3528
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op2];
3529
3530
0
    if (condition_insn->op2 == IR_TRUE) {
3531
0
      condition = condition_insn->op1;
3532
0
      condition_insn = &ctx->ir_base[condition];
3533
0
    } else if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) {
3534
0
      condition = condition_insn->op1;
3535
0
      condition_insn = &ctx->ir_base[condition];
3536
0
      *swap = !*swap;
3537
0
    }
3538
0
  }
3539
3540
0
  if (condition_insn->op == IR_SHL && IR_IS_CONST_REF(condition_insn->op1)) {
3541
0
    ir_insn *val_insn = &ctx->ir_base[condition_insn->op1];
3542
0
    if (!IR_IS_SYM_CONST(val_insn->op) && val_insn->val.u64 == 1) {
3543
0
      return IR_TRUE;
3544
0
    }
3545
0
  }
3546
3547
0
  while ((condition_insn->op == IR_BITCAST
3548
0
    || condition_insn->op == IR_ZEXT
3549
0
    || condition_insn->op == IR_SEXT)
3550
0
   && ctx->use_lists[condition].count == 1) {
3551
0
    condition = condition_insn->op1;
3552
0
    condition_insn = &ctx->ir_base[condition];
3553
0
  }
3554
3555
0
  if (condition_insn->op == IR_ALLOCA || condition_insn->op == IR_VADDR) {
3556
0
    return IR_TRUE;
3557
0
  }
3558
3559
0
  if (!IR_IS_CONST_REF(condition) && ctx->use_lists[condition].count > 1) {
3560
0
    condition = ir_check_dominating_predicates(ctx, control, condition);
3561
0
  }
3562
3563
0
  return condition;
3564
0
}
3565
3566
static void ir_iter_optimize_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3567
0
{
3568
0
  bool swap = 0;
3569
0
  ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3570
3571
0
  if (swap) {
3572
0
    ir_use_list *use_list = &ctx->use_lists[ref];
3573
0
    ir_ref *p, use;
3574
3575
0
    IR_ASSERT(use_list->count == 2);
3576
0
    p = ctx->use_edges + use_list->refs;
3577
0
    use = *p;
3578
0
    if (ctx->ir_base[use].op == IR_IF_TRUE) {
3579
0
      ctx->ir_base[use].op = IR_IF_FALSE;
3580
0
      use = *(p+1);
3581
0
      ctx->ir_base[use].op = IR_IF_TRUE;
3582
0
    } else {
3583
0
      ctx->ir_base[use].op = IR_IF_TRUE;
3584
0
      use = *(p+1);
3585
0
      ctx->ir_base[use].op = IR_IF_FALSE;
3586
0
    }
3587
0
  }
3588
3589
0
  if (IR_IS_CONST_REF(condition)) {
3590
    /*
3591
     *    |                        |
3592
     *    IF(TRUE)             =>  END
3593
     *    | \                      |
3594
     *    |  +------+              |
3595
     *    |         IF_TRUE        |        BEGIN(unreachable)
3596
     *    IF_FALSE  |              BEGIN
3597
     *    |                        |
3598
     */
3599
0
    ir_ref if_true_ref, if_false_ref;
3600
0
    ir_insn *if_true, *if_false;
3601
3602
0
    insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
3603
0
    if (!IR_IS_CONST_REF(insn->op2)) {
3604
0
      ir_use_list_remove_one(ctx, insn->op2, ref);
3605
0
      ir_bitqueue_add(worklist, insn->op2);
3606
0
    }
3607
0
    insn->op2 = IR_UNUSED;
3608
3609
0
    ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
3610
0
    if_true = &ctx->ir_base[if_true_ref];
3611
0
    if_false = &ctx->ir_base[if_false_ref];
3612
0
    if_true->op = IR_BEGIN;
3613
0
    if_true->op2 = IR_UNUSED;
3614
0
    if_false->op = IR_BEGIN;
3615
0
    if_false->op2 = IR_UNUSED;
3616
0
    if (ir_ref_is_true(ctx, condition)) {
3617
0
      if_false->op1 = IR_UNUSED;
3618
0
      ir_use_list_remove_one(ctx, ref, if_false_ref);
3619
0
      ir_bitqueue_add(worklist, if_true_ref);
3620
0
    } else {
3621
0
      if_true->op1 = IR_UNUSED;
3622
0
      ir_use_list_remove_one(ctx, ref, if_true_ref);
3623
0
      ir_bitqueue_add(worklist, if_false_ref);
3624
0
    }
3625
0
    ctx->flags2 &= ~IR_CFG_REACHABLE;
3626
0
  } else if (insn->op2 != condition) {
3627
0
    ir_iter_update_op(ctx, ref, 2, condition, worklist);
3628
0
  }
3629
0
}
3630
3631
static void ir_iter_optimize_guard(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
3632
0
{
3633
0
  bool swap = 0;
3634
0
  ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap);
3635
3636
0
  if (swap) {
3637
0
    if (insn->op == IR_GUARD) {
3638
0
      insn->op = IR_GUARD_NOT;
3639
0
    } else {
3640
0
      insn->op = IR_GUARD;
3641
0
    }
3642
0
  }
3643
3644
0
  if (IR_IS_CONST_REF(condition)) {
3645
0
    if (insn->op == IR_GUARD) {
3646
0
      if (ir_ref_is_true(ctx, condition)) {
3647
0
        ir_ref prev, next;
3648
3649
0
remove_guard:
3650
0
        prev = insn->op1;
3651
0
        next = ir_next_control(ctx, ref);
3652
0
        if (ctx->ir_base[prev].op == IR_SNAPSHOT) {
3653
0
          ir_ref snapshot = prev;
3654
0
          prev = ctx->ir_base[prev].op1;
3655
0
          ir_use_list_remove_one(ctx, snapshot, ref);
3656
0
          ir_use_list_remove_one(ctx, ref, next);
3657
0
          ir_use_list_replace_one(ctx, prev, snapshot, next);
3658
0
          ir_iter_remove_insn(ctx, snapshot, worklist);
3659
0
        } else {
3660
0
          ir_use_list_remove_one(ctx, ref, next);
3661
0
          ir_use_list_replace_one(ctx, prev, ref, next);
3662
0
        }
3663
0
        ctx->ir_base[next].op1 = prev;
3664
0
        insn->op1 = IR_UNUSED;
3665
3666
0
        if (!IR_IS_CONST_REF(insn->op2)) {
3667
0
          ir_use_list_remove_one(ctx, insn->op2, ref);
3668
0
          if (ir_is_dead(ctx, insn->op2)) {
3669
            /* schedule DCE */
3670
0
            ir_bitqueue_add(worklist, insn->op2);
3671
0
          }
3672
0
        }
3673
3674
0
        if (!IR_IS_CONST_REF(insn->op3)) {
3675
0
          ir_use_list_remove_one(ctx, insn->op3, ref);
3676
0
          if (ir_is_dead(ctx, insn->op3)) {
3677
            /* schedule DCE */
3678
0
            ir_bitqueue_add(worklist, insn->op3);
3679
0
          }
3680
0
        }
3681
3682
0
        MAKE_NOP(insn);
3683
0
        return;
3684
0
      } else {
3685
0
        condition = IR_FALSE;
3686
0
      }
3687
0
    } else {
3688
0
      if (ir_ref_is_true(ctx, condition)) {
3689
0
        condition = IR_TRUE;
3690
0
      } else {
3691
0
        goto remove_guard;
3692
0
      }
3693
0
    }
3694
0
  }
3695
3696
0
  if (insn->op2 != condition) {
3697
0
    ir_iter_update_op(ctx, ref, 2, condition, worklist);
3698
0
  }
3699
0
}
3700
3701
void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist)
3702
0
{
3703
0
  ir_ref i, val;
3704
0
  ir_insn *insn;
3705
3706
0
  while ((i = ir_bitqueue_pop(worklist)) >= 0) {
3707
0
    insn = &ctx->ir_base[i];
3708
0
    if (IR_IS_FOLDABLE_OP(insn->op)) {
3709
0
      if (ctx->use_lists[i].count == 0) {
3710
0
        if (insn->op == IR_PHI) {
3711
0
          ir_bitqueue_add(worklist, insn->op1);
3712
0
        }
3713
0
        ir_iter_remove_insn(ctx, i, worklist);
3714
0
      } else {
3715
0
        insn = &ctx->ir_base[i];
3716
0
        switch (insn->op) {
3717
0
          case IR_FP2FP:
3718
0
            if (insn->type == IR_FLOAT) {
3719
0
              if (ir_may_promote_d2f(ctx, insn->op1)) {
3720
0
                ir_ref ref = ir_promote_d2f(ctx, insn->op1, i, worklist);
3721
0
                insn->op1 = ref;
3722
0
                ir_iter_replace_insn(ctx, i, ref, worklist);
3723
0
                break;
3724
0
              }
3725
0
            } else {
3726
0
              if (ir_may_promote_f2d(ctx, insn->op1)) {
3727
0
                ir_ref ref = ir_promote_f2d(ctx, insn->op1, i, worklist);
3728
0
                insn->op1 = ref;
3729
0
                ir_iter_replace_insn(ctx, i, ref, worklist);
3730
0
                break;
3731
0
              }
3732
0
            }
3733
0
            goto folding;
3734
0
          case IR_FP2INT:
3735
0
            if (ctx->ir_base[insn->op1].type == IR_DOUBLE) {
3736
0
              if (ir_may_promote_d2f(ctx, insn->op1)) {
3737
0
                insn->op1 = ir_promote_d2f(ctx, insn->op1, i, worklist);
3738
0
              }
3739
0
            } else {
3740
0
              if (ir_may_promote_f2d(ctx, insn->op1)) {
3741
0
                insn->op1 = ir_promote_f2d(ctx, insn->op1, i, worklist);
3742
0
              }
3743
0
            }
3744
0
            goto folding;
3745
0
          case IR_TRUNC:
3746
0
            if (ir_may_promote_trunc(ctx, insn->type, insn->op1)) {
3747
0
              ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i, worklist);
3748
0
              insn->op1 = ref;
3749
0
              ir_iter_replace_insn(ctx, i, ref, worklist);
3750
0
              break;
3751
0
            }
3752
0
            goto folding;
3753
0
          case IR_SEXT:
3754
0
          case IR_ZEXT:
3755
0
            if (ir_try_promote_ext(ctx, i, insn, worklist)) {
3756
0
              break;
3757
0
            }
3758
0
            goto folding;
3759
0
          case IR_PHI:
3760
0
            break;
3761
0
          default:
3762
0
folding:
3763
0
            ir_iter_fold(ctx, i, worklist);
3764
0
            break;
3765
0
        }
3766
0
      }
3767
0
    } else if (ir_op_flags[insn->op] & IR_OP_FLAG_BB_START) {
3768
0
      if (!(ctx->flags & IR_OPT_CFG)) {
3769
        /* pass */
3770
0
      } else if (insn->op == IR_BEGIN) {
3771
0
        if (insn->op1
3772
0
         && !insn->op2 /* no computed goto label */
3773
0
         && ctx->use_lists[i].count == 1
3774
0
         && ctx->ir_base[insn->op1].op == IR_END) {
3775
0
          ir_merge_blocks(ctx, insn->op1, i, worklist);
3776
0
        }
3777
0
      } else if (insn->op == IR_MERGE) {
3778
0
        ir_iter_optimize_merge(ctx, i, insn, worklist);
3779
0
      } else if (insn->op == IR_LOOP_BEGIN) {
3780
0
        ir_iter_optimize_loop(ctx, i, insn, worklist);
3781
0
      }
3782
0
    } else if (ir_is_dead_load(ctx, i)) {
3783
0
      ir_ref next;
3784
3785
      /* remove LOAD from double linked control list */
3786
0
remove_mem_insn:
3787
0
      next = ctx->use_edges[ctx->use_lists[i].refs];
3788
0
      IR_ASSERT(ctx->use_lists[i].count == 1);
3789
0
      ctx->ir_base[next].op1 = insn->op1;
3790
0
      ir_use_list_replace_one(ctx, insn->op1, i, next);
3791
0
      insn->op1 = IR_UNUSED;
3792
0
      ir_iter_remove_insn(ctx, i, worklist);
3793
0
    } else if (insn->op == IR_LOAD) {
3794
0
      val = ir_find_aliasing_load(ctx, insn->op1, insn->type, insn->op2);
3795
0
      if (val) {
3796
0
        ir_insn *val_insn;
3797
0
        ir_ref prev, next;
3798
3799
0
remove_aliased_load:
3800
0
        prev = insn->op1;
3801
0
        next = ir_next_control(ctx, i);
3802
0
        ctx->ir_base[next].op1 = prev;
3803
0
        ir_use_list_remove_one(ctx, i, next);
3804
0
        ir_use_list_replace_one(ctx, prev, i, next);
3805
0
        insn->op1 = IR_UNUSED;
3806
3807
0
        val_insn = &ctx->ir_base[val];
3808
0
        if (val_insn->type == insn->type) {
3809
0
          ir_iter_replace_insn(ctx, i, val, worklist);
3810
0
        } else {
3811
0
          if (!IR_IS_CONST_REF(insn->op2)) {
3812
0
            ir_use_list_remove_one(ctx, insn->op2, i);
3813
0
            if (ir_is_dead(ctx, insn->op2)) {
3814
              /* schedule DCE */
3815
0
              ir_bitqueue_add(worklist, insn->op2);
3816
0
            }
3817
0
          }
3818
0
          if (!IR_IS_CONST_REF(val)) {
3819
0
            ir_use_list_add(ctx, val, i);
3820
0
          }
3821
0
          if (ir_type_size[val_insn->type] == ir_type_size[insn->type]) {
3822
            /* load forwarding with bitcast (L2L) */
3823
0
            insn->optx = IR_OPTX(IR_BITCAST, insn->type, 1);
3824
0
          } else {
3825
            /* partial load forwarding (L2L) */
3826
0
            insn->optx = IR_OPTX(IR_TRUNC, insn->type, 1);
3827
0
          }
3828
0
          insn->op1 = val;
3829
0
          insn->op2 = IR_UNUSED;
3830
0
          ir_bitqueue_add(worklist, i);
3831
0
          ir_iter_add_uses(ctx, i, worklist);
3832
0
        }
3833
0
      }
3834
0
    } else if (insn->op == IR_STORE) {
3835
0
      if (ir_find_aliasing_store(ctx, insn->op1, insn->op2, insn->op3)) {
3836
0
        goto remove_mem_insn;
3837
0
      } else {
3838
0
        ir_insn *val_insn;
3839
3840
0
remove_bitcast:
3841
0
        val = insn->op3;
3842
0
        val_insn = &ctx->ir_base[val];
3843
0
        if (val_insn->op == IR_BITCAST
3844
0
         && ir_type_size[val_insn->type] == ir_type_size[ctx->ir_base[val_insn->op1].type]) {
3845
0
          insn->op3 = val_insn->op1;
3846
0
          ir_use_list_remove_one(ctx, val, i);
3847
0
          if (ctx->use_lists[val].count == 0) {
3848
0
            if (!IR_IS_CONST_REF(val_insn->op1)) {
3849
0
              ir_use_list_replace_one(ctx, val_insn->op1, val, i);
3850
0
            }
3851
0
            ir_iter_remove_insn(ctx, val, worklist);
3852
0
          } else {
3853
0
            if (!IR_IS_CONST_REF(val_insn->op1)) {
3854
0
              ir_use_list_add(ctx, val_insn->op1, i);
3855
0
            }
3856
0
          }
3857
0
        }
3858
0
      }
3859
0
    } else if (insn->op == IR_VLOAD) {
3860
0
      val = ir_find_aliasing_vload(ctx, insn->op1, insn->type, insn->op2);
3861
0
      if (val) {
3862
0
        goto remove_aliased_load;
3863
0
      }
3864
0
    } else if (insn->op == IR_VSTORE) {
3865
0
      if (ir_find_aliasing_vstore(ctx, insn->op1, insn->op2, insn->op3)) {
3866
0
        goto remove_mem_insn;
3867
0
      } else {
3868
0
        goto remove_bitcast;
3869
0
      }
3870
0
    } else if (insn->op == IR_IF) {
3871
0
      ir_iter_optimize_if(ctx, i, insn, worklist);
3872
0
    } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
3873
0
      ir_iter_optimize_guard(ctx, i, insn, worklist);
3874
0
    }
3875
0
  }
3876
0
}
3877
3878
void ir_iter_cleanup(ir_ctx *ctx)
3879
0
{
3880
0
  ir_bitqueue iter_worklist;
3881
0
  ir_bitqueue cfg_worklist;
3882
0
  ir_ref i, n;
3883
0
  ir_insn *insn;
3884
3885
0
  ir_bitqueue_init(&cfg_worklist, ctx->insns_count);
3886
0
  ir_bitqueue_init(&iter_worklist, ctx->insns_count);
3887
3888
  /* Remove unused nodes */
3889
0
  for (i = IR_UNUSED + 1, insn = ctx->ir_base + i; i < ctx->insns_count;) {
3890
0
    if (IR_IS_FOLDABLE_OP(insn->op)) {
3891
0
      if (insn->op != IR_NOP && ctx->use_lists[i].count == 0) {
3892
0
        ir_iter_remove_insn(ctx, i, &iter_worklist);
3893
0
      }
3894
0
    } else if (insn->op == IR_IF || insn->op == IR_MERGE) {
3895
0
      ir_bitqueue_add(&cfg_worklist, i);
3896
0
    }
3897
0
    n = insn->inputs_count;
3898
0
    n = ir_insn_inputs_to_len(n);
3899
0
    i += n;
3900
0
    insn += n;
3901
0
  }
3902
3903
0
  while ((i = ir_bitqueue_pop(&iter_worklist)) >= 0) {
3904
0
    insn = &ctx->ir_base[i];
3905
0
    if (IR_IS_FOLDABLE_OP(insn->op)) {
3906
0
      if (ctx->use_lists[i].count == 0) {
3907
0
        ir_iter_remove_insn(ctx, i, &iter_worklist);
3908
0
      }
3909
0
    }
3910
0
  }
3911
3912
  /* Cleanup Control Flow */
3913
0
  ir_iter_opt(ctx, &cfg_worklist);
3914
3915
0
  ir_bitqueue_free(&iter_worklist);
3916
0
  ir_bitqueue_free(&cfg_worklist);
3917
0
}
3918
3919
int ir_sccp(ir_ctx *ctx)
3920
0
{
3921
0
  ir_bitqueue sccp_worklist, iter_worklist;
3922
0
  ir_sccp_val *_values;
3923
3924
0
  ir_bitqueue_init(&iter_worklist, ctx->insns_count);
3925
0
  ir_bitqueue_init(&sccp_worklist, ctx->insns_count);
3926
0
  _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_sccp_val));
3927
3928
0
  ctx->flags2 |= IR_OPT_IN_SCCP;
3929
0
  ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist);
3930
0
  ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist);
3931
0
  ctx->flags2 &= ~IR_OPT_IN_SCCP;
3932
3933
0
  ir_mem_free(_values);
3934
0
  ir_bitqueue_free(&sccp_worklist);
3935
3936
0
  ctx->flags2 |= IR_CFG_REACHABLE;
3937
3938
0
  ir_iter_opt(ctx, &iter_worklist);
3939
3940
0
  ir_bitqueue_free(&iter_worklist);
3941
3942
0
  return 1;
3943
0
}