/src/php-src/ext/opcache/jit/ir/ir_sccp.c
Line | Count | Source |
1 | | /* |
2 | | * IR - Lightweight JIT Compilation Framework |
3 | | * (SCCP - Sparse Conditional Constant Propagation) |
4 | | * Copyright (C) 2022 Zend by Perforce. |
5 | | * Authors: Dmitry Stogov <dmitry@php.net> |
6 | | * |
7 | | * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication |
8 | | * See: M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches" |
9 | | * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991 |
10 | | */ |
11 | | |
12 | | #include "ir.h" |
13 | | #include "ir_private.h" |
14 | | |
15 | | #define IR_COMBO_COPY_PROPAGATION 1 |
16 | | |
17 | 0 | #define IR_TOP IR_UNUSED |
18 | 0 | #define IR_BOTTOM IR_LAST_OP |
19 | | |
20 | | #define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0) |
21 | 0 | #define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0) |
22 | | |
23 | 0 | #define IR_IS_TOP(ref) (ref >= 0 && _values[ref].op == IR_TOP) |
24 | 0 | #define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].op == IR_BOTTOM) |
25 | 0 | #define IR_IS_REACHABLE(ref) _ir_is_reachable_ctrl(ctx, _values, ref) |
26 | 0 | #define IR_IS_CONST(ref) (IR_IS_CONST_REF(ref) || IR_IS_CONST_OP(_values[ref].op)) |
27 | | |
28 | | IR_ALWAYS_INLINE bool _ir_is_reachable_ctrl(ir_ctx *ctx, ir_insn *_values, ir_ref ref) |
29 | 0 | { |
30 | 0 | IR_ASSERT(!IR_IS_CONST_REF(ref)); |
31 | 0 | IR_ASSERT(ir_op_flags[ctx->ir_base[ref].op] & IR_OP_FLAG_CONTROL); |
32 | 0 | return _values[ref].op != IR_TOP; /* BOTTOM, IF or MERGE */ |
33 | 0 | } |
34 | | |
35 | | IR_ALWAYS_INLINE void ir_sccp_add_uses(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref) |
36 | 0 | { |
37 | 0 | ir_use_list *use_list; |
38 | 0 | ir_ref n, *p, use; |
39 | |
|
40 | 0 | IR_ASSERT(!IR_IS_CONST_REF(ref)); |
41 | 0 | use_list = &ctx->use_lists[ref]; |
42 | 0 | n = use_list->count; |
43 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
44 | 0 | use = *p; |
45 | 0 | if (_values[use].op != IR_BOTTOM) { |
46 | 0 | ir_bitqueue_add(worklist, use); |
47 | 0 | } |
48 | 0 | } |
49 | 0 | } |
50 | | |
51 | | IR_ALWAYS_INLINE void ir_sccp_add_input(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref) |
52 | 0 | { |
53 | 0 | IR_ASSERT(!IR_IS_CONST_REF(ref)); |
54 | 0 | IR_ASSERT(_values[ref].op == IR_TOP); |
55 | | /* do backward propagaton only once */ |
56 | 0 | if (!_values[ref].op1) { |
57 | 0 | _values[ref].op1 = 1; |
58 | 0 | ir_bitqueue_add(worklist, ref); |
59 | 0 | } |
60 | 0 | } |
61 | | |
62 | | #if IR_COMBO_COPY_PROPAGATION |
63 | | IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_ctx *ctx, ir_insn *_values, ir_ref a) |
64 | 0 | { |
65 | 0 | if (a > 0 && _values[a].op == IR_COPY) { |
66 | 0 | do { |
67 | 0 | a = _values[a].op1; |
68 | 0 | IR_ASSERT(a > 0); |
69 | 0 | } while (_values[a].op == IR_COPY); |
70 | 0 | IR_ASSERT(_values[a].op == IR_BOTTOM); |
71 | 0 | } |
72 | 0 | return a; |
73 | 0 | } |
74 | | |
75 | | #if 0 |
76 | | static void CHECK_LIST(ir_insn *_values, ir_ref ref) |
77 | | { |
78 | | ir_ref member = _values[ref].op2; |
79 | | while (member != ref) { |
80 | | IR_ASSERT(_values[_values[member].op2].op3 == member); |
81 | | member = _values[member].op2; |
82 | | } |
83 | | IR_ASSERT(_values[_values[ref].op2].op3 == ref); |
84 | | } |
85 | | #else |
86 | | # define CHECK_LIST(_values, ref) |
87 | | #endif |
88 | | |
89 | | static void ir_sccp_add_identity(ir_ctx *ctx, ir_insn *_values, ir_ref src, ir_ref dst) |
90 | 0 | { |
91 | 0 | IR_ASSERT(dst > 0 && _values[dst].op != IR_BOTTOM && _values[dst].op != IR_COPY); |
92 | 0 | IR_ASSERT((src > 0 && (_values[src].op == IR_BOTTOM || _values[src].op == IR_COPY))); |
93 | 0 | IR_ASSERT(ir_sccp_identity(ctx, _values, src) != dst); |
94 | |
|
95 | 0 | _values[dst].optx = IR_COPY; |
96 | 0 | _values[dst].op1 = src; |
97 | |
|
98 | 0 | if (_values[src].op == IR_BOTTOM) { |
99 | | /* initialize empty double-linked list */ |
100 | 0 | if (_values[src].op1 != src) { |
101 | 0 | _values[src].op1 = src; |
102 | 0 | _values[src].op2 = src; |
103 | 0 | _values[src].op3 = src; |
104 | 0 | } |
105 | 0 | } else { |
106 | 0 | src = ir_sccp_identity(ctx, _values, src); |
107 | 0 | } |
108 | | |
109 | | /* insert into circular double-linked list */ |
110 | 0 | ir_ref prev = _values[src].op3; |
111 | 0 | _values[dst].op2 = src; |
112 | 0 | _values[dst].op3 = prev; |
113 | 0 | _values[src].op3 = dst; |
114 | 0 | _values[prev].op2 = dst; |
115 | 0 | CHECK_LIST(_values, dst); |
116 | 0 | } |
117 | | |
118 | | static void ir_sccp_split_partition(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref) |
119 | 0 | { |
120 | 0 | ir_ref member, head, tail, next, prev; |
121 | |
|
122 | 0 | CHECK_LIST(_values, ref); |
123 | 0 | IR_MAKE_BOTTOM(ref); |
124 | 0 | _values[ref].op1 = ref; |
125 | |
|
126 | 0 | member = _values[ref].op2; |
127 | 0 | head = tail = IR_UNUSED; |
128 | 0 | while (member != ref) { |
129 | 0 | if (_values[member].op != IR_BOTTOM) { |
130 | 0 | ir_bitqueue_add(worklist, member); |
131 | 0 | } |
132 | 0 | ir_sccp_add_uses(ctx, _values, worklist, member); |
133 | |
|
134 | 0 | next = _values[member].op2; |
135 | 0 | if (ir_sccp_identity(ctx, _values, member) == ref) { |
136 | | /* remove "member" from the old circular double-linked list */ |
137 | 0 | prev = _values[member].op3; |
138 | 0 | _values[prev].op2 = next; |
139 | 0 | _values[next].op3 = prev; |
140 | | |
141 | | /* insert "member" into the new double-linked list */ |
142 | 0 | if (!head) { |
143 | 0 | head = tail = member; |
144 | 0 | } else { |
145 | 0 | _values[tail].op2 = member; |
146 | 0 | _values[member].op3 = tail; |
147 | 0 | tail = member; |
148 | 0 | } |
149 | 0 | } |
150 | 0 | member = next; |
151 | 0 | } |
152 | | |
153 | | /* remove "ref" from the old circular double-linked list */ |
154 | 0 | next = _values[ref].op2; |
155 | 0 | prev = _values[ref].op3; |
156 | 0 | _values[prev].op2 = next; |
157 | 0 | _values[next].op3 = prev; |
158 | 0 | CHECK_LIST(_values, next); |
159 | | |
160 | | /* close the new circle */ |
161 | 0 | if (head) { |
162 | 0 | _values[ref].op2 = head; |
163 | 0 | _values[ref].op3 = tail; |
164 | 0 | _values[tail].op2 = ref; |
165 | 0 | _values[head].op3 = ref; |
166 | 0 | } else { |
167 | 0 | _values[ref].op2 = ref; |
168 | 0 | _values[ref].op3 = ref; |
169 | 0 | } |
170 | 0 | CHECK_LIST(_values, ref); |
171 | 0 | } |
172 | | |
173 | | IR_ALWAYS_INLINE void ir_sccp_make_bottom_ex(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref) |
174 | 0 | { |
175 | 0 | if (_values[ref].op == IR_COPY) { |
176 | 0 | ir_sccp_split_partition(ctx, _values, worklist, ref); |
177 | 0 | } else { |
178 | 0 | IR_MAKE_BOTTOM(ref); |
179 | 0 | } |
180 | 0 | } |
181 | | |
182 | 0 | # define IR_MAKE_BOTTOM_EX(ref) ir_sccp_make_bottom_ex(ctx, _values, worklist, ref) |
183 | | #else |
184 | | # define ir_sccp_identity(_ctx, _values, ref) (ref) |
185 | | # define IR_MAKE_BOTTOM_EX(ref) IR_MAKE_BOTTOM(ref) |
186 | | #endif |
187 | | |
188 | | IR_ALWAYS_INLINE bool ir_sccp_meet_const(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *val_insn) |
189 | 0 | { |
190 | 0 | IR_ASSERT(IR_IS_CONST_OP(val_insn->op) || IR_IS_SYM_CONST(val_insn->op)); |
191 | |
|
192 | 0 | if (_values[ref].op == IR_TOP) { |
193 | | /* TOP meet NEW_CONST => NEW_CONST */ |
194 | 0 | _values[ref].optx = val_insn->opt; |
195 | 0 | _values[ref].val.u64 = val_insn->val.u64; |
196 | 0 | return 1; |
197 | 0 | } else if (_values[ref].opt == val_insn->opt) { |
198 | | /* OLD_CONST meet NEW_CONST => (OLD_CONST == NEW_CONST) ? OLD_CONST : BOTTOM */ |
199 | 0 | if (_values[ref].val.u64 == val_insn->val.u64) { |
200 | 0 | return 0; |
201 | 0 | } |
202 | 0 | } |
203 | | |
204 | 0 | IR_MAKE_BOTTOM_EX(ref); |
205 | 0 | return 1; |
206 | 0 | } |
207 | | |
208 | | IR_ALWAYS_INLINE bool ir_sccp_meet(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_ref val) |
209 | 0 | { |
210 | 0 | ir_ref val_identity = ir_sccp_identity(ctx, _values, val); |
211 | 0 | ir_insn *val_insn; |
212 | |
|
213 | 0 | if (IR_IS_CONST_REF(val_identity)) { |
214 | 0 | val_insn = &ctx->ir_base[val_identity]; |
215 | 0 | } else { |
216 | 0 | val_insn = &_values[val_identity]; |
217 | |
|
218 | 0 | if (!IR_IS_CONST_OP(val_insn->op) && !IR_IS_SYM_CONST(val_insn->op)) { |
219 | 0 | #if IR_COMBO_COPY_PROPAGATION |
220 | 0 | if (_values[ref].op == IR_COPY) { |
221 | | /* COPY(OLD_VAL) meet COPY(NEW_VAL) => |
222 | | * (IDENTITY(OLD_VAL) == IDENTITY(NEW_VAL) ? COPY(OLD_VAL) ? BOTTOM */ |
223 | 0 | if (ir_sccp_identity(ctx, _values, ref) == val_identity) { |
224 | 0 | return 0; /* not changed */ |
225 | 0 | } |
226 | 0 | ir_sccp_split_partition(ctx, _values, worklist, ref); |
227 | 0 | return 1; |
228 | 0 | } else { |
229 | 0 | IR_ASSERT(_values[ref].op != IR_BOTTOM); |
230 | | /* TOP meet COPY(NEW_VAL) -> COPY(NEW_VAL) */ |
231 | | /* OLD_CONST meet COPY(NEW_VAL) -> COPY(NEW_VAL) */ |
232 | 0 | ir_sccp_add_identity(ctx, _values, val, ref); |
233 | 0 | return 1; |
234 | 0 | } |
235 | 0 | #endif |
236 | | |
237 | 0 | IR_MAKE_BOTTOM(ref); |
238 | 0 | return 1; |
239 | 0 | } |
240 | 0 | } |
241 | | |
242 | 0 | return ir_sccp_meet_const(ctx, _values, worklist, ref, val_insn); |
243 | 0 | } |
244 | | |
245 | | static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref ref, ir_insn *insn) |
246 | 0 | { |
247 | 0 | ir_insn *op1_insn, *op2_insn, *op3_insn; |
248 | 0 | ir_ref op1, op2, op3, copy; |
249 | 0 | uint32_t opt = insn->opt; |
250 | |
|
251 | 0 | op1 = ir_sccp_identity(ctx, _values, insn->op1); |
252 | 0 | op2 = ir_sccp_identity(ctx, _values, insn->op2); |
253 | 0 | op3 = ir_sccp_identity(ctx, _values, insn->op3); |
254 | |
|
255 | 0 | restart: |
256 | 0 | op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1; |
257 | 0 | op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2; |
258 | 0 | op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3; |
259 | |
|
260 | 0 | switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { |
261 | 0 | case IR_FOLD_DO_RESTART: |
262 | 0 | opt = ctx->fold_insn.optx; |
263 | 0 | op1 = ctx->fold_insn.op1; |
264 | 0 | op2 = ctx->fold_insn.op2; |
265 | 0 | op3 = ctx->fold_insn.op3; |
266 | 0 | goto restart; |
267 | 0 | case IR_FOLD_DO_CSE: |
268 | 0 | case IR_FOLD_DO_EMIT: |
269 | 0 | IR_MAKE_BOTTOM_EX(ref); |
270 | 0 | return 1; |
271 | 0 | case IR_FOLD_DO_COPY: |
272 | 0 | copy = ctx->fold_insn.op1; |
273 | 0 | return ir_sccp_meet(ctx, _values, worklist, ref, copy); |
274 | 0 | case IR_FOLD_DO_CONST: |
275 | 0 | return ir_sccp_meet_const(ctx, _values, worklist, ref, &ctx->fold_insn); |
276 | 0 | default: |
277 | 0 | IR_ASSERT(0); |
278 | 0 | return 0; |
279 | 0 | } |
280 | 0 | } |
281 | | |
282 | | static bool ir_sccp_analyze_phi(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_ref i, ir_insn *insn) |
283 | 0 | { |
284 | 0 | ir_ref j, n, input, *merge_input, *p; |
285 | 0 | ir_insn *v, *new_const = NULL; |
286 | 0 | #if IR_COMBO_COPY_PROPAGATION |
287 | 0 | ir_ref new_copy = IR_UNUSED; |
288 | 0 | ir_ref new_copy_identity = IR_UNUSED; |
289 | 0 | ir_ref phi_identity = ir_sccp_identity(ctx, _values, i); |
290 | 0 | #endif |
291 | |
|
292 | 0 | if (!IR_IS_REACHABLE(insn->op1)) { |
293 | 0 | return 0; |
294 | 0 | } |
295 | 0 | n = insn->inputs_count; |
296 | 0 | if (n > 3 && _values[i].op == IR_TOP) { |
297 | 0 | for (j = 0; j < (n>>2); j++) { |
298 | 0 | _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ |
299 | 0 | } |
300 | 0 | } |
301 | |
|
302 | 0 | p = insn->ops + 2; |
303 | 0 | merge_input = ctx->ir_base[insn->op1].ops + 1; |
304 | 0 | for (; --n > 0; p++, merge_input++) { |
305 | 0 | IR_ASSERT(*merge_input > 0); |
306 | 0 | if (!IR_IS_REACHABLE(*merge_input)) { |
307 | 0 | continue; |
308 | 0 | } |
309 | | |
310 | 0 | input = *p; |
311 | 0 | if (IR_IS_CONST_REF(input)) { |
312 | 0 | v = &ctx->ir_base[input]; |
313 | 0 | } else if (input == i) { |
314 | 0 | continue; |
315 | 0 | } else { |
316 | 0 | v = &_values[input]; |
317 | 0 | if (v->op == IR_TOP) { |
318 | 0 | ir_sccp_add_input(ctx, _values, worklist, input); |
319 | 0 | continue; |
320 | 0 | #if IR_COMBO_COPY_PROPAGATION |
321 | 0 | } else if (v->op == IR_COPY) { |
322 | 0 | input = v->op1; |
323 | 0 | new_copy_identity = ir_sccp_identity(ctx, _values, input); |
324 | 0 | if (new_copy_identity == phi_identity) { |
325 | 0 | new_copy_identity = IR_UNUSED; |
326 | 0 | continue; |
327 | 0 | } |
328 | 0 | new_copy = input; |
329 | 0 | goto next; |
330 | 0 | #endif |
331 | 0 | } else if (v->op == IR_BOTTOM) { |
332 | 0 | #if IR_COMBO_COPY_PROPAGATION |
333 | 0 | if (input == phi_identity) { |
334 | 0 | continue; |
335 | 0 | } |
336 | 0 | new_copy = new_copy_identity = input; |
337 | 0 | goto next; |
338 | | #else |
339 | | goto make_bottom; |
340 | | #endif |
341 | 0 | } |
342 | 0 | } |
343 | 0 | new_const = v; |
344 | 0 | goto next; |
345 | 0 | } |
346 | | |
347 | 0 | return 0; |
348 | | |
349 | 0 | next: |
350 | 0 | p++; |
351 | 0 | merge_input++; |
352 | | /* for all live merge inputs */ |
353 | 0 | for (; --n > 0; p++, merge_input++) { |
354 | 0 | IR_ASSERT(*merge_input > 0); |
355 | 0 | if (!IR_IS_REACHABLE(*merge_input)) { |
356 | 0 | continue; |
357 | 0 | } |
358 | | |
359 | 0 | input = *p; |
360 | 0 | if (IR_IS_CONST_REF(input)) { |
361 | 0 | #if IR_COMBO_COPY_PROPAGATION |
362 | 0 | if (new_copy) { |
363 | 0 | goto make_bottom; |
364 | 0 | } |
365 | 0 | #endif |
366 | 0 | v = &ctx->ir_base[input]; |
367 | 0 | } else if (input == i) { |
368 | 0 | continue; |
369 | 0 | } else { |
370 | 0 | v = &_values[input]; |
371 | 0 | if (v->op == IR_TOP) { |
372 | 0 | ir_sccp_add_input(ctx, _values, worklist, input); |
373 | 0 | continue; |
374 | 0 | #if IR_COMBO_COPY_PROPAGATION |
375 | 0 | } else if (v->op == IR_COPY) { |
376 | 0 | ir_ref identity = ir_sccp_identity(ctx, _values, v->op1); |
377 | |
|
378 | 0 | if (identity == phi_identity || identity == new_copy_identity) { |
379 | 0 | continue; |
380 | 0 | } |
381 | 0 | goto make_bottom; |
382 | 0 | #endif |
383 | 0 | } else if (v->op == IR_BOTTOM) { |
384 | 0 | #if IR_COMBO_COPY_PROPAGATION |
385 | 0 | if (input == phi_identity || input == new_copy_identity) { |
386 | 0 | continue; |
387 | 0 | } |
388 | 0 | #endif |
389 | 0 | goto make_bottom; |
390 | 0 | } |
391 | 0 | } |
392 | 0 | if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) { |
393 | 0 | goto make_bottom; |
394 | 0 | } |
395 | 0 | } |
396 | | |
397 | 0 | #if IR_COMBO_COPY_PROPAGATION |
398 | 0 | if (new_copy) { |
399 | 0 | return ir_sccp_meet(ctx, _values, worklist, i, new_copy); |
400 | 0 | } |
401 | 0 | #endif |
402 | | |
403 | 0 | return ir_sccp_meet_const(ctx, _values, worklist, i, new_const); |
404 | | |
405 | 0 | make_bottom: |
406 | 0 | IR_MAKE_BOTTOM_EX(i); |
407 | 0 | return 1; |
408 | 0 | } |
409 | | |
410 | | static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn) |
411 | 0 | { |
412 | 0 | if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) { |
413 | 0 | return ctx->use_lists[ref].count == 1; |
414 | 0 | } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) { |
415 | 0 | return ctx->use_lists[ref].count == 1; |
416 | 0 | } |
417 | 0 | return 0; |
418 | 0 | } |
419 | | |
420 | | static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref) |
421 | 0 | { |
422 | 0 | if (ctx->use_lists[ref].count == 1) { |
423 | 0 | uint32_t flags = ir_op_flags[ctx->ir_base[ref].op]; |
424 | |
|
425 | 0 | if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) { |
426 | 0 | return 1; |
427 | 0 | } else if (ctx->ir_base[ref].op == IR_ALLOCA) { |
428 | 0 | return 1; |
429 | 0 | } |
430 | 0 | } |
431 | 0 | return 0; |
432 | 0 | } |
433 | | |
434 | | static bool ir_is_dead(ir_ctx *ctx, ir_ref ref) |
435 | 0 | { |
436 | 0 | if (ctx->use_lists[ref].count == 0) { |
437 | 0 | return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op); |
438 | 0 | } else { |
439 | 0 | return ir_is_dead_load(ctx, ref); |
440 | 0 | } |
441 | 0 | return 0; |
442 | 0 | } |
443 | | |
444 | | static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a) |
445 | 0 | { |
446 | 0 | ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; |
447 | |
|
448 | 0 | return ir_const_is_true(v); |
449 | 0 | } |
450 | | |
451 | | static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b) |
452 | 0 | { |
453 | 0 | ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; |
454 | 0 | ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b]; |
455 | |
|
456 | 0 | IR_ASSERT(!IR_IS_SYM_CONST(v1->op)); |
457 | 0 | IR_ASSERT(!IR_IS_SYM_CONST(v2->op)); |
458 | 0 | return v1->val.u64 == v2->val.u64; |
459 | 0 | } |
460 | | |
461 | | static bool ir_sccp_in_range(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b, ir_ref c) |
462 | 0 | { |
463 | 0 | ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a]; |
464 | 0 | ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b]; |
465 | 0 | ir_insn *v3 = IR_IS_CONST_REF(c) ? &ctx->ir_base[c] : &_values[c]; |
466 | |
|
467 | 0 | IR_ASSERT(!IR_IS_SYM_CONST(v1->op)); |
468 | 0 | IR_ASSERT(!IR_IS_SYM_CONST(v2->op)); |
469 | 0 | IR_ASSERT(!IR_IS_SYM_CONST(v3->op)); |
470 | 0 | if (IR_IS_TYPE_SIGNED(v1->type)) { |
471 | 0 | return v1->val.i64 >= v2->val.i64 && v1->val.i64 <= v3->val.i64; |
472 | 0 | } else { |
473 | 0 | return v1->val.u64 >= v2->val.u64 && v1->val.u64 <= v3->val.u64; |
474 | 0 | } |
475 | 0 | } |
476 | | |
477 | | #ifdef IR_SCCP_TRACE |
478 | | static void ir_sccp_trace_val(ir_ctx *ctx, ir_insn *_values, ir_ref i) |
479 | | { |
480 | | if (IR_IS_BOTTOM(i)) { |
481 | | fprintf(stderr, "BOTTOM"); |
482 | | } else if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { |
483 | | fprintf(stderr, "CONST("); |
484 | | ir_print_const(ctx, &_values[i], stderr, true); |
485 | | fprintf(stderr, ")"); |
486 | | #if IR_COMBO_COPY_PROPAGATION |
487 | | } else if (_values[i].op == IR_COPY) { |
488 | | fprintf(stderr, "COPY(%d)", _values[i].op1); |
489 | | #endif |
490 | | } else if (IR_IS_TOP(i)) { |
491 | | fprintf(stderr, "TOP"); |
492 | | } else if (_values[i].op == IR_IF) { |
493 | | fprintf(stderr, "IF(%d)", _values[i].op1); |
494 | | } else if (_values[i].op == IR_MERGE) { |
495 | | fprintf(stderr, "MERGE(%d)", _values[i].op1); |
496 | | } else { |
497 | | fprintf(stderr, "%d", _values[i].op); |
498 | | } |
499 | | } |
500 | | |
501 | | static void ir_sccp_trace_start(ir_ctx *ctx, ir_insn *_values, ir_ref i) |
502 | | { |
503 | | fprintf(stderr, "%d. ", i); |
504 | | ir_sccp_trace_val(ctx, _values, i); |
505 | | } |
506 | | |
507 | | static void ir_sccp_trace_end(ir_ctx *ctx, ir_insn *_values, ir_ref i) |
508 | | { |
509 | | fprintf(stderr, " -> "); |
510 | | ir_sccp_trace_val(ctx, _values, i); |
511 | | fprintf(stderr, "\n"); |
512 | | } |
513 | | #else |
514 | | # define ir_sccp_trace_start(c, v, i) |
515 | | # define ir_sccp_trace_end(c, v, i) |
516 | | #endif |
517 | | |
518 | | static IR_NEVER_INLINE void ir_sccp_analyze(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) |
519 | 0 | { |
520 | 0 | ir_ref i, j, n, *p, use; |
521 | 0 | ir_use_list *use_list; |
522 | 0 | ir_insn *insn, *use_insn; |
523 | 0 | uint32_t flags; |
524 | | |
525 | | /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */ |
526 | 0 | worklist->pos = 0; |
527 | 0 | ir_bitset_incl(worklist->set, 1); |
528 | 0 | for (; (i = ir_bitqueue_pop(worklist)) >= 0; ir_sccp_trace_end(ctx, _values, i)) { |
529 | 0 | IR_ASSERT(_values[i].op != IR_BOTTOM); |
530 | 0 | ir_sccp_trace_start(ctx, _values, i); |
531 | 0 | insn = &ctx->ir_base[i]; |
532 | 0 | flags = ir_op_flags[insn->op]; |
533 | 0 | if (flags & IR_OP_FLAG_DATA) { |
534 | 0 | if (ctx->use_lists[i].count == 0) { |
535 | | /* dead code */ |
536 | 0 | continue; |
537 | 0 | } else if (insn->op == IR_PHI) { |
538 | 0 | if (!ir_sccp_analyze_phi(ctx, _values, worklist, i, insn)) { |
539 | 0 | continue; |
540 | 0 | } |
541 | 0 | } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) { |
542 | 0 | bool may_benefit = 0; |
543 | 0 | bool has_top = 0; |
544 | |
|
545 | 0 | if (_values[i].op != IR_TOP) { |
546 | 0 | may_benefit = 1; |
547 | 0 | } |
548 | |
|
549 | 0 | IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags)); |
550 | 0 | n = IR_INPUT_EDGES_COUNT(flags); |
551 | 0 | for (p = insn->ops + 1; n > 0; p++, n--) { |
552 | 0 | ir_ref input = *p; |
553 | 0 | if (input > 0) { |
554 | 0 | if (_values[input].op == IR_TOP) { |
555 | 0 | has_top = 1; |
556 | 0 | ir_sccp_add_input(ctx, _values, worklist, input); |
557 | 0 | } else if (_values[input].op != IR_BOTTOM) { |
558 | | /* Perform folding only if some of direct inputs |
559 | | * is going to be replaced by a constant or copy. |
560 | | * This approach may miss some folding optimizations |
561 | | * dependent on indirect inputs. e.g. reassociation. |
562 | | */ |
563 | 0 | may_benefit = 1; |
564 | 0 | } |
565 | 0 | } |
566 | 0 | } |
567 | 0 | if (has_top) { |
568 | 0 | continue; |
569 | 0 | } |
570 | 0 | if (!may_benefit) { |
571 | 0 | IR_MAKE_BOTTOM_EX(i); |
572 | 0 | if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) { |
573 | 0 | ir_bitqueue_add(iter_worklist, i); |
574 | 0 | } |
575 | 0 | } else if (!ir_sccp_fold(ctx, _values, worklist, i, insn)) { |
576 | | /* not changed */ |
577 | 0 | continue; |
578 | 0 | } else if (_values[i].op == IR_BOTTOM) { |
579 | 0 | insn = &ctx->ir_base[i]; |
580 | 0 | if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC) { |
581 | 0 | ir_bitqueue_add(iter_worklist, i); |
582 | 0 | } |
583 | 0 | } |
584 | 0 | } else { |
585 | 0 | IR_MAKE_BOTTOM_EX(i); |
586 | 0 | } |
587 | 0 | } else if (flags & IR_OP_FLAG_BB_START) { |
588 | 0 | if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_BEGIN) { |
589 | 0 | ir_bitqueue_add(iter_worklist, i); |
590 | 0 | } |
591 | 0 | if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) { |
592 | 0 | ir_ref unfeasible_inputs = 0; |
593 | |
|
594 | 0 | n = insn->inputs_count; |
595 | 0 | if (n > 3 && _values[i].op == IR_TOP) { |
596 | 0 | for (j = 0; j < (n>>2); j++) { |
597 | 0 | _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ |
598 | 0 | } |
599 | 0 | } |
600 | 0 | for (p = insn->ops + 1; n > 0; p++, n--) { |
601 | 0 | ir_ref input = *p; |
602 | 0 | IR_ASSERT(input > 0); |
603 | 0 | if (!IR_IS_REACHABLE(input)) { |
604 | 0 | unfeasible_inputs++; |
605 | 0 | } |
606 | 0 | } |
607 | 0 | if (unfeasible_inputs == 0) { |
608 | 0 | IR_MAKE_BOTTOM(i); |
609 | 0 | } else if (_values[i].op != IR_MERGE || _values[i].op1 != unfeasible_inputs) { |
610 | 0 | _values[i].optx = IR_MERGE; |
611 | 0 | _values[i].op1 = unfeasible_inputs; |
612 | 0 | } else { |
613 | 0 | continue; |
614 | 0 | } |
615 | 0 | if (ctx->flags2 & IR_MEM2SSA_VARS) { |
616 | | /* MEM2SSA puts new PHI at the bottom, but we like to process them now */ |
617 | 0 | use_list = &ctx->use_lists[i]; |
618 | 0 | n = use_list->count; |
619 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
620 | 0 | use = *p; |
621 | 0 | if (_values[use].op != IR_BOTTOM) { |
622 | 0 | if (ctx->ir_base[use].op == IR_PHI) { |
623 | 0 | ir_bitqueue_del(worklist, use); |
624 | 0 | if (ctx->use_lists[use].count != 0) { |
625 | 0 | if (ir_sccp_analyze_phi(ctx, _values, worklist, use, &ctx->ir_base[use])) { |
626 | 0 | ir_sccp_add_uses(ctx, _values, worklist, use); |
627 | 0 | } |
628 | 0 | } |
629 | 0 | } else { |
630 | 0 | ir_bitqueue_add(worklist, use); |
631 | 0 | } |
632 | 0 | } |
633 | 0 | } |
634 | 0 | continue; |
635 | 0 | } |
636 | 0 | } else { |
637 | 0 | IR_ASSERT(insn->op == IR_START || IR_IS_REACHABLE(insn->op1)); |
638 | 0 | IR_MAKE_BOTTOM(i); |
639 | 0 | } |
640 | 0 | } else { |
641 | 0 | IR_ASSERT(insn->op1 > 0); |
642 | 0 | if (!IR_IS_REACHABLE(insn->op1)) { |
643 | | /* control inpt is not feasible */ |
644 | 0 | continue; |
645 | 0 | } |
646 | 0 | if (insn->op == IR_IF) { |
647 | 0 | if (IR_IS_TOP(insn->op2)) { |
648 | 0 | ir_sccp_add_input(ctx, _values, worklist, insn->op2); |
649 | 0 | continue; |
650 | 0 | } |
651 | 0 | if (IR_IS_CONST(insn->op2)) { |
652 | 0 | bool b = ir_sccp_is_true(ctx, _values, insn->op2); |
653 | 0 | use_list = &ctx->use_lists[i]; |
654 | 0 | IR_ASSERT(use_list->count == 2); |
655 | 0 | p = &ctx->use_edges[use_list->refs]; |
656 | 0 | use = *p; |
657 | 0 | use_insn = &ctx->ir_base[use]; |
658 | 0 | IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE); |
659 | 0 | if ((use_insn->op == IR_IF_TRUE) != b) { |
660 | 0 | use = *(p+1); |
661 | 0 | IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE); |
662 | 0 | } |
663 | 0 | if (_values[i].op == IR_TOP) { |
664 | 0 | _values[i].optx = IR_IF; |
665 | 0 | _values[i].op1 = use; |
666 | 0 | ir_bitqueue_add(worklist, use); |
667 | 0 | continue; |
668 | 0 | } else if (_values[i].op == IR_IF && _values[i].op1 == use) { |
669 | 0 | continue; |
670 | 0 | } |
671 | 0 | } |
672 | 0 | IR_MAKE_BOTTOM(i); |
673 | 0 | ir_bitqueue_add(iter_worklist, i); |
674 | 0 | } else if (insn->op == IR_SWITCH) { |
675 | 0 | if (IR_IS_TOP(insn->op2)) { |
676 | 0 | ir_sccp_add_input(ctx, _values, worklist, insn->op2); |
677 | 0 | continue; |
678 | 0 | } |
679 | 0 | if (IR_IS_CONST(insn->op2)) { |
680 | 0 | ir_ref use_case = IR_UNUSED; |
681 | |
|
682 | 0 | use_list = &ctx->use_lists[i]; |
683 | 0 | n = use_list->count; |
684 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
685 | 0 | use = *p; |
686 | 0 | IR_ASSERT(use > 0); |
687 | 0 | use_insn = &ctx->ir_base[use]; |
688 | 0 | if (use_insn->op == IR_CASE_VAL) { |
689 | 0 | if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) { |
690 | 0 | use_case = use; |
691 | 0 | break; |
692 | 0 | } |
693 | 0 | } else if (use_insn->op == IR_CASE_DEFAULT) { |
694 | 0 | use_case = use; |
695 | 0 | } else if (use_insn->op == IR_CASE_RANGE) { |
696 | 0 | if (ir_sccp_in_range(ctx, _values, insn->op2, use_insn->op2, use_insn->op3)) { |
697 | 0 | use_case = use; |
698 | 0 | break; |
699 | 0 | } |
700 | 0 | } |
701 | 0 | } |
702 | 0 | if (use_case) { |
703 | 0 | use_insn = &ctx->ir_base[use_case]; |
704 | 0 | if (_values[i].op == IR_TOP) { |
705 | 0 | _values[i].optx = IR_IF; |
706 | 0 | _values[i].op1 = use_case; |
707 | 0 | ir_bitqueue_add(worklist, use_case); |
708 | 0 | continue; |
709 | 0 | } else if (_values[i].op == IR_IF || _values[i].op1 == use_case) { |
710 | 0 | continue; |
711 | 0 | } |
712 | 0 | } |
713 | 0 | } |
714 | 0 | IR_MAKE_BOTTOM(i); |
715 | 0 | } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) { |
716 | | /* schedule dead load elimination */ |
717 | 0 | ir_bitqueue_add(iter_worklist, i); |
718 | 0 | IR_MAKE_BOTTOM(i); |
719 | 0 | } else { |
720 | 0 | if (_values[i].op == IR_TOP) { |
721 | 0 | bool has_top = 0; |
722 | | |
723 | | /* control, call, load and store instructions may have unprocessed inputs */ |
724 | 0 | n = IR_INPUT_EDGES_COUNT(flags); |
725 | 0 | if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) { |
726 | 0 | for (j = 0; j < (n>>2); j++) { |
727 | 0 | _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */ |
728 | 0 | } |
729 | 0 | for (j = 2, p = insn->ops + j; j <= n; j++, p++) { |
730 | 0 | IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA); |
731 | 0 | use = *p; |
732 | 0 | if (use > 0 && _values[use].op == IR_TOP) { |
733 | 0 | has_top = 1; |
734 | 0 | ir_sccp_add_input(ctx, _values, worklist, use); |
735 | 0 | } |
736 | 0 | } |
737 | 0 | } else if (n >= 2) { |
738 | 0 | IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA); |
739 | 0 | use = insn->op2; |
740 | 0 | if (use > 0 && _values[use].op == IR_TOP) { |
741 | 0 | has_top = 1; |
742 | 0 | ir_sccp_add_input(ctx, _values, worklist, use); |
743 | 0 | } |
744 | 0 | if (n > 2) { |
745 | 0 | IR_ASSERT(n == 3); |
746 | 0 | IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA); |
747 | 0 | use = insn->op3; |
748 | 0 | if (use > 0 && _values[use].op == IR_TOP) { |
749 | 0 | has_top = 1; |
750 | 0 | ir_sccp_add_input(ctx, _values, worklist, use); |
751 | 0 | } |
752 | 0 | } |
753 | 0 | } |
754 | |
|
755 | 0 | if (has_top && !(flags & IR_OP_FLAG_BB_END)) { |
756 | 0 | use = ir_next_control(ctx, i); |
757 | 0 | if (_values[use].op == IR_TOP) { |
758 | 0 | has_top = 1; |
759 | | /* do forward control propagaton only once */ |
760 | 0 | if (!_values[use].op1) { |
761 | 0 | _values[use].op1 = 1; |
762 | 0 | ir_bitqueue_add(worklist, use); |
763 | 0 | } |
764 | 0 | } |
765 | 0 | continue; |
766 | 0 | } |
767 | 0 | } |
768 | 0 | IR_MAKE_BOTTOM(i); |
769 | 0 | } |
770 | 0 | } |
771 | 0 | ir_sccp_add_uses(ctx, _values, worklist, i); |
772 | 0 | } |
773 | | |
774 | 0 | #ifdef IR_DEBUG |
775 | 0 | if (ctx->flags & IR_DEBUG_SCCP) { |
776 | 0 | for (i = 1; i < ctx->insns_count; i++) { |
777 | 0 | if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) { |
778 | 0 | fprintf(stderr, "%d. CONST(", i); |
779 | 0 | ir_print_const(ctx, &_values[i], stderr, true); |
780 | 0 | fprintf(stderr, ")\n"); |
781 | 0 | #if IR_COMBO_COPY_PROPAGATION |
782 | 0 | } else if (_values[i].op == IR_COPY) { |
783 | 0 | fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1); |
784 | 0 | #endif |
785 | 0 | } else if (IR_IS_TOP(i)) { |
786 | 0 | fprintf(stderr, "%d. TOP\n", i); |
787 | 0 | } else if (_values[i].op == IR_IF) { |
788 | 0 | fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1); |
789 | 0 | } else if (_values[i].op == IR_MERGE) { |
790 | 0 | fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1); |
791 | 0 | } else if (!IR_IS_BOTTOM(i)) { |
792 | 0 | fprintf(stderr, "%d. %d\n", i, _values[i].op); |
793 | 0 | } |
794 | 0 | } |
795 | 0 | } |
796 | 0 | #endif |
797 | 0 | } |
798 | | |
799 | | /**********************/ |
800 | | /* SCCP trasformation */ |
801 | | /**********************/ |
802 | | |
803 | | static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref) |
804 | 0 | { |
805 | 0 | ir_ref j, n, *p; |
806 | 0 | ir_insn *insn; |
807 | |
|
808 | 0 | CLEAR_USES(ref); |
809 | 0 | insn = &ctx->ir_base[ref]; |
810 | 0 | n = insn->inputs_count; |
811 | 0 | insn->opt = IR_NOP; /* keep "inputs_count" */ |
812 | 0 | for (j = 1, p = insn->ops + j; j <= n; j++, p++) { |
813 | 0 | *p = IR_UNUSED; |
814 | 0 | } |
815 | 0 | } |
816 | | |
817 | | static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist) |
818 | 0 | { |
819 | 0 | ir_ref j, n, *p; |
820 | 0 | ir_insn *insn; |
821 | |
|
822 | 0 | CLEAR_USES(ref); |
823 | 0 | insn = &ctx->ir_base[ref]; |
824 | 0 | n = insn->inputs_count; |
825 | 0 | insn->opt = IR_NOP; /* keep "inputs_count" */ |
826 | 0 | for (j = 1, p = insn->ops + j; j <= n; j++, p++) { |
827 | 0 | ir_ref input = *p; |
828 | 0 | *p = IR_UNUSED; |
829 | | /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ |
830 | 0 | if (input > 0 && _values[input].op > IR_COPY) { |
831 | 0 | ir_use_list_remove_all(ctx, input, ref); |
832 | 0 | if (ir_is_dead(ctx, input)) { |
833 | | /* schedule DCE */ |
834 | 0 | ir_bitqueue_add(worklist, input); |
835 | 0 | } |
836 | 0 | } |
837 | 0 | } |
838 | 0 | } |
839 | | |
840 | | static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) |
841 | 0 | { |
842 | 0 | ir_ref j, n, *p, use, i; |
843 | 0 | ir_insn *insn; |
844 | 0 | ir_use_list *use_list; |
845 | |
|
846 | 0 | IR_ASSERT(ref != new_ref); |
847 | |
|
848 | 0 | insn = &ctx->ir_base[ref]; |
849 | |
|
850 | 0 | #if IR_COMBO_COPY_PROPAGATION |
851 | 0 | if ((ir_op_flags[insn->op] & IR_OP_FLAG_MEM) && IR_IS_REACHABLE(insn->op1)) { |
852 | | /* remove from control list */ |
853 | 0 | ir_ref prev = insn->op1; |
854 | 0 | ir_ref next = ir_next_control(ctx, ref); |
855 | 0 | ctx->ir_base[next].op1 = prev; |
856 | 0 | ir_use_list_remove_one(ctx, ref, next); |
857 | 0 | ir_use_list_replace_one(ctx, prev, ref, next); |
858 | 0 | insn->op1 = IR_UNUSED; |
859 | 0 | } |
860 | 0 | #endif |
861 | |
|
862 | 0 | n = insn->inputs_count; |
863 | 0 | insn->opt = IR_NOP; /* keep "inputs_count" */ |
864 | 0 | for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { |
865 | 0 | ir_ref input = *p; |
866 | 0 | *p = IR_UNUSED; |
867 | | /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ |
868 | 0 | if (input > 0 && _values[input].op > IR_COPY) { |
869 | 0 | ir_use_list_remove_all(ctx, input, ref); |
870 | 0 | if (ir_is_dead(ctx, input)) { |
871 | | /* schedule DCE */ |
872 | 0 | ir_bitqueue_add(worklist, input); |
873 | 0 | } |
874 | 0 | } |
875 | 0 | } |
876 | |
|
877 | 0 | use_list = &ctx->use_lists[ref]; |
878 | 0 | n = use_list->count; |
879 | 0 | p = &ctx->use_edges[use_list->refs]; |
880 | 0 | if (new_ref <= 0) { |
881 | | /* constant or IR_UNUSED */ |
882 | 0 | for (; n; p++, n--) { |
883 | 0 | use = *p; |
884 | | /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ |
885 | 0 | if (_values[use].op > IR_COPY) { |
886 | 0 | insn = &ctx->ir_base[use]; |
887 | 0 | i = ir_insn_find_op(insn, ref); |
888 | 0 | if (!i) continue; |
889 | 0 | IR_ASSERT(i > 0); |
890 | 0 | ir_insn_set_op(insn, i, new_ref); |
891 | | /* schedule folding */ |
892 | 0 | ir_bitqueue_add(worklist, use); |
893 | 0 | } |
894 | 0 | } |
895 | 0 | } else { |
896 | 0 | for (j = 0; j < n; j++, p++) { |
897 | 0 | use = *p; |
898 | | /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */ |
899 | 0 | if (_values[use].op == IR_BOTTOM) { |
900 | 0 | insn = &ctx->ir_base[use]; |
901 | 0 | i = ir_insn_find_op(insn, ref); |
902 | 0 | IR_ASSERT(i > 0); |
903 | 0 | ir_insn_set_op(insn, i, new_ref); |
904 | 0 | if (ir_use_list_add(ctx, new_ref, use)) { |
905 | | /* restore after reallocation */ |
906 | 0 | use_list = &ctx->use_lists[ref]; |
907 | 0 | n = use_list->count; |
908 | 0 | p = &ctx->use_edges[use_list->refs + j]; |
909 | 0 | } |
910 | | /* schedule folding */ |
911 | 0 | ir_bitqueue_add(worklist, use); |
912 | 0 | } |
913 | 0 | } |
914 | 0 | } |
915 | 0 | CLEAR_USES(ref); |
916 | 0 | } |
917 | | |
918 | | static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst) |
919 | 0 | { |
920 | 0 | ir_ref next; |
921 | 0 | ir_insn *insn, *next_insn; |
922 | |
|
923 | 0 | insn = &ctx->ir_base[ref]; |
924 | 0 | if (ctx->use_lists[dst].count == 1) { |
925 | 0 | next = ctx->use_edges[ctx->use_lists[dst].refs]; |
926 | 0 | next_insn = &ctx->ir_base[next]; |
927 | | /* remove IF and IF_TRUE/FALSE from double linked control list */ |
928 | 0 | next_insn->op1 = insn->op1; |
929 | 0 | ir_use_list_replace_one(ctx, insn->op1, ref, next); |
930 | | /* remove IF and IF_TRUE/FALSE instructions */ |
931 | 0 | ir_sccp_make_nop(ctx, ref); |
932 | 0 | ir_sccp_make_nop(ctx, dst); |
933 | 0 | } else { |
934 | 0 | insn->op2 = IR_UNUSED; |
935 | 0 | insn->optx = IR_OPTX(IR_END, IR_VOID, 1); |
936 | 0 | next_insn = &ctx->ir_base[dst]; |
937 | 0 | next_insn->op = IR_BEGIN; |
938 | 0 | } |
939 | 0 | } |
940 | | |
941 | | static bool ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) |
942 | 0 | { |
943 | 0 | ir_ref old_merge_inputs, new_merge_inputs, i, *p; |
944 | 0 | ir_use_list *use_list; |
945 | 0 | ir_bitset life_inputs; |
946 | 0 | ir_bitset_base_t holder = 0; |
947 | |
|
948 | 0 | IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN); |
949 | 0 | old_merge_inputs = insn->inputs_count; |
950 | 0 | new_merge_inputs = 0; |
951 | 0 | life_inputs = (old_merge_inputs < IR_BITSET_BITS) ? &holder : ir_bitset_malloc(old_merge_inputs + 1); |
952 | |
|
953 | 0 | for (i = 1; i <= old_merge_inputs; i++) { |
954 | 0 | ir_ref input = ir_insn_op(insn, i); |
955 | |
|
956 | 0 | if (input) { |
957 | 0 | new_merge_inputs++; |
958 | 0 | if (new_merge_inputs != i) { |
959 | 0 | ir_insn_set_op(insn, new_merge_inputs, input); |
960 | 0 | } |
961 | 0 | ir_bitset_incl(life_inputs, i); |
962 | 0 | } |
963 | 0 | } |
964 | |
|
965 | 0 | if (new_merge_inputs == old_merge_inputs) { |
966 | | /* All inputs are feasible */ |
967 | 0 | if (life_inputs != &holder) { |
968 | 0 | ir_mem_free(life_inputs); |
969 | 0 | } |
970 | 0 | return 0; |
971 | 0 | } |
972 | | |
973 | 0 | for (i = new_merge_inputs + 1; i <= old_merge_inputs; i++) { |
974 | 0 | ir_insn_set_op(insn, i, IR_UNUSED); |
975 | 0 | } |
976 | |
|
977 | 0 | if (new_merge_inputs <= 1) { |
978 | | #if 0 |
979 | | if (new_merge_inputs == 1 |
980 | | && insn->op == IR_LOOP_BEGIN |
981 | | && insn->op1 > ref) { // TODO: check dominance instead of order |
982 | | /* dead loop */ |
983 | | ir_use_list_remove_one(ctx, insn->op1, ref); |
984 | | insn->op1 = IR_UNUSED; |
985 | | new_merge_inputs = 0; |
986 | | } |
987 | | #endif |
988 | 0 | insn->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); |
989 | 0 | ir_bitqueue_add(worklist, ref); |
990 | 0 | } else { |
991 | 0 | insn->inputs_count = new_merge_inputs; |
992 | 0 | } |
993 | | |
994 | | /* Update PHIs */ |
995 | 0 | use_list = &ctx->use_lists[ref]; |
996 | 0 | if (use_list->count > 1) { |
997 | 0 | ir_ref use_count = 0; |
998 | 0 | ir_ref *q; |
999 | |
|
1000 | 0 | for (i = 0, p = q = &ctx->use_edges[use_list->refs]; i < use_list->count; p++, i++) { |
1001 | 0 | ir_ref use = *p; |
1002 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
1003 | |
|
1004 | 0 | if (use_insn->op == IR_PHI) { |
1005 | 0 | ir_ref j, k; |
1006 | | |
1007 | | /* compress PHI */ |
1008 | 0 | for (j = k = 1; j <= old_merge_inputs; j++) { |
1009 | 0 | ir_ref input = ir_insn_op(use_insn, j + 1); |
1010 | |
|
1011 | 0 | if (ir_bitset_in(life_inputs, j)) { |
1012 | 0 | IR_ASSERT(input); |
1013 | 0 | if (k != j) { |
1014 | 0 | ir_insn_set_op(use_insn, k + 1, input); |
1015 | 0 | } |
1016 | 0 | k++; |
1017 | 0 | } else if (input > 0) { |
1018 | 0 | ir_use_list_remove_one(ctx, input, use); |
1019 | 0 | } |
1020 | 0 | } |
1021 | 0 | while (k <= old_merge_inputs) { |
1022 | 0 | k++; |
1023 | 0 | ir_insn_set_op(use_insn, k, IR_UNUSED); |
1024 | 0 | } |
1025 | |
|
1026 | 0 | if (new_merge_inputs == 0) { |
1027 | | /* remove PHI */ |
1028 | | #if 0 |
1029 | | use_insn->op1 = IR_UNUSED; |
1030 | | ir_iter_remove_insn(ctx, use, worklist); |
1031 | | #else |
1032 | 0 | IR_ASSERT(0); |
1033 | 0 | #endif |
1034 | 0 | continue; |
1035 | 0 | } else if (new_merge_inputs == 1) { |
1036 | | /* replace PHI by COPY */ |
1037 | 0 | use_insn->optx = IR_OPTX(IR_COPY, use_insn->type, 1); |
1038 | 0 | use_insn->op1 = use_insn->op2; |
1039 | 0 | use_insn->op2 = IR_UNUSED; |
1040 | 0 | ir_bitqueue_add(worklist, use); |
1041 | 0 | continue; |
1042 | 0 | } else { |
1043 | 0 | use_insn->inputs_count = new_merge_inputs + 1; |
1044 | 0 | } |
1045 | 0 | } |
1046 | 0 | if (p != q) { |
1047 | 0 | *q = use; |
1048 | 0 | } |
1049 | 0 | q++; |
1050 | 0 | use_count++; |
1051 | 0 | } |
1052 | 0 | for (i = use_count; i < use_list->count; q++, i++) { |
1053 | 0 | *q = IR_UNUSED; |
1054 | 0 | } |
1055 | 0 | use_list->count = use_count; |
1056 | 0 | } |
1057 | | |
1058 | 0 | if (life_inputs != &holder) { |
1059 | 0 | ir_mem_free(life_inputs); |
1060 | 0 | } |
1061 | |
|
1062 | 0 | return 1; |
1063 | 0 | } |
1064 | | |
1065 | | static IR_NEVER_INLINE void ir_sccp_transform(ir_ctx *ctx, ir_insn *_values, ir_bitqueue *worklist, ir_bitqueue *iter_worklist) |
1066 | 0 | { |
1067 | 0 | ir_ref i, j; |
1068 | 0 | ir_insn *value; |
1069 | |
|
1070 | 0 | for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) { |
1071 | 0 | if (value->op == IR_BOTTOM) { |
1072 | 0 | continue; |
1073 | 0 | } else if (IR_IS_CONST_OP(value->op)) { |
1074 | | /* replace instruction by constant */ |
1075 | 0 | j = ir_const(ctx, value->val, value->type); |
1076 | 0 | ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); |
1077 | 0 | } else if (IR_IS_SYM_CONST(value->op)) { |
1078 | | /* replace instruction by constant */ |
1079 | 0 | j = ir_const_ex(ctx, value->val, value->type, value->optx); |
1080 | 0 | ir_sccp_replace_insn(ctx, _values, i, j, iter_worklist); |
1081 | 0 | #if IR_COMBO_COPY_PROPAGATION |
1082 | 0 | } else if (value->op == IR_COPY) { |
1083 | 0 | ir_sccp_replace_insn(ctx, _values, i, ir_sccp_identity(ctx, _values, value->op1), iter_worklist); |
1084 | 0 | #endif |
1085 | 0 | } else if (value->op == IR_TOP) { |
1086 | | /* remove unreachable instruction */ |
1087 | 0 | ir_insn *insn = &ctx->ir_base[i]; |
1088 | |
|
1089 | 0 | if (insn->op == IR_NOP) { |
1090 | | /* already removed */ |
1091 | 0 | } else if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) { |
1092 | 0 | if (insn->op != IR_PARAM) { |
1093 | 0 | ir_sccp_remove_insn(ctx, _values, i, iter_worklist); |
1094 | 0 | } |
1095 | 0 | } else { |
1096 | 0 | if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) { |
1097 | | /* remove from terminators list */ |
1098 | 0 | ir_ref prev = ctx->ir_base[1].op1; |
1099 | 0 | if (prev == i) { |
1100 | 0 | ctx->ir_base[1].op1 = insn->op3; |
1101 | 0 | } else { |
1102 | 0 | while (prev) { |
1103 | 0 | if (ctx->ir_base[prev].op3 == i) { |
1104 | 0 | ctx->ir_base[prev].op3 = insn->op3; |
1105 | 0 | break; |
1106 | 0 | } |
1107 | 0 | prev = ctx->ir_base[prev].op3; |
1108 | 0 | } |
1109 | 0 | } |
1110 | 0 | } |
1111 | 0 | ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, iter_worklist); |
1112 | 0 | } |
1113 | 0 | } else if (value->op == IR_IF) { |
1114 | | /* remove one way IF/SWITCH */ |
1115 | 0 | ir_sccp_remove_if(ctx, _values, i, value->op1); |
1116 | 0 | } else if (value->op == IR_MERGE) { |
1117 | | /* schedule merge to remove unfeasible MERGE inputs */ |
1118 | 0 | ir_bitqueue_add(worklist, i); |
1119 | 0 | } |
1120 | 0 | } |
1121 | |
|
1122 | 0 | while ((i = ir_bitqueue_pop(worklist)) >= 0) { |
1123 | 0 | IR_ASSERT(_values[i].op == IR_MERGE); |
1124 | 0 | ir_sccp_remove_unfeasible_merge_inputs(ctx, i, &ctx->ir_base[i], iter_worklist); |
1125 | 0 | } |
1126 | 0 | } |
1127 | | |
1128 | | /***************************/ |
1129 | | /* Iterative Optimizations */ |
1130 | | /***************************/ |
1131 | | |
1132 | | /* Modification of some instruction may open new optimization oprtunities for other |
1133 | | * instructions that use this one. |
1134 | | * |
1135 | | * For example, let "a = ADD(x, y)" became "a = ADD(x, C1)". In case we also have |
1136 | | * "b = ADD(a, C2)" we may optimize it into "b = ADD(x, C1 + C2)" and then might |
1137 | | * also remove "a". |
1138 | | * |
1139 | | * This implementation supports only few optimization of combinations from ir_fold.h |
1140 | | * |
1141 | | * TODO: Think abput a more general solution ??? |
1142 | | */ |
1143 | | static void ir_iter_add_related_uses(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) |
1144 | 0 | { |
1145 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1146 | |
|
1147 | 0 | if (insn->op == IR_ADD || insn->op == IR_SUB) { |
1148 | 0 | ir_use_list *use_list = &ctx->use_lists[ref]; |
1149 | |
|
1150 | 0 | if (use_list->count == 1) { |
1151 | 0 | ir_ref use = ctx->use_edges[use_list->refs]; |
1152 | 0 | ir_insn *use_insn = &ctx->ir_base[ref]; |
1153 | |
|
1154 | 0 | if (use_insn->op == IR_ADD || use_insn->op == IR_SUB) { |
1155 | 0 | ir_bitqueue_add(worklist, use); |
1156 | 0 | } |
1157 | 0 | } |
1158 | 0 | } |
1159 | 0 | } |
1160 | | |
1161 | | static void ir_iter_remove_insn(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) |
1162 | 0 | { |
1163 | 0 | ir_ref j, n, *p; |
1164 | 0 | ir_insn *insn; |
1165 | |
|
1166 | 0 | CLEAR_USES(ref); |
1167 | 0 | insn = &ctx->ir_base[ref]; |
1168 | 0 | n = insn->inputs_count; |
1169 | 0 | insn->opt = IR_NOP; /* keep "inputs_count" */ |
1170 | 0 | for (j = 1, p = insn->ops + j; j <= n; j++, p++) { |
1171 | 0 | ir_ref input = *p; |
1172 | 0 | *p = IR_UNUSED; |
1173 | 0 | if (input > 0) { |
1174 | 0 | ir_use_list_remove_all(ctx, input, ref); |
1175 | 0 | if (ir_is_dead(ctx, input)) { |
1176 | | /* schedule DCE */ |
1177 | 0 | ir_bitqueue_add(worklist, input); |
1178 | 0 | } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { |
1179 | | /* try to optimize PHI into ABS/MIN/MAX/COND */ |
1180 | 0 | ir_bitqueue_add(worklist, ctx->ir_base[input].op1); |
1181 | 0 | } |
1182 | 0 | } |
1183 | 0 | } |
1184 | 0 | } |
1185 | | |
1186 | | void ir_iter_replace(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) |
1187 | 0 | { |
1188 | 0 | ir_ref i, j, n, *p, use; |
1189 | 0 | ir_insn *insn; |
1190 | 0 | ir_use_list *use_list; |
1191 | |
|
1192 | 0 | IR_ASSERT(ref != new_ref); |
1193 | |
|
1194 | 0 | use_list = &ctx->use_lists[ref]; |
1195 | 0 | n = use_list->count; |
1196 | 0 | p = &ctx->use_edges[use_list->refs]; |
1197 | 0 | if (new_ref <= 0) { |
1198 | | /* constant or IR_UNUSED */ |
1199 | 0 | for (; n; p++, n--) { |
1200 | 0 | use = *p; |
1201 | 0 | IR_ASSERT(use != ref); |
1202 | 0 | insn = &ctx->ir_base[use]; |
1203 | 0 | i = ir_insn_find_op(insn, ref); |
1204 | 0 | IR_ASSERT(i > 0); |
1205 | 0 | ir_insn_set_op(insn, i, new_ref); |
1206 | | /* schedule folding */ |
1207 | 0 | ir_bitqueue_add(worklist, use); |
1208 | 0 | ir_iter_add_related_uses(ctx, use, worklist); |
1209 | 0 | } |
1210 | 0 | } else { |
1211 | 0 | for (j = 0; j < n; j++, p++) { |
1212 | 0 | use = *p; |
1213 | 0 | IR_ASSERT(use != ref); |
1214 | 0 | insn = &ctx->ir_base[use]; |
1215 | 0 | i = ir_insn_find_op(insn, ref); |
1216 | 0 | IR_ASSERT(i > 0); |
1217 | 0 | ir_insn_set_op(insn, i, new_ref); |
1218 | 0 | if (ir_use_list_add(ctx, new_ref, use)) { |
1219 | | /* restore after reallocation */ |
1220 | 0 | use_list = &ctx->use_lists[ref]; |
1221 | 0 | n = use_list->count; |
1222 | 0 | p = &ctx->use_edges[use_list->refs + j]; |
1223 | 0 | } |
1224 | | /* schedule folding */ |
1225 | 0 | ir_bitqueue_add(worklist, use); |
1226 | 0 | } |
1227 | 0 | } |
1228 | 0 | } |
1229 | | |
1230 | | static void ir_iter_replace_insn(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist) |
1231 | 0 | { |
1232 | 0 | ir_ref j, n, *p; |
1233 | 0 | ir_insn *insn; |
1234 | |
|
1235 | 0 | insn = &ctx->ir_base[ref]; |
1236 | 0 | n = insn->inputs_count; |
1237 | 0 | insn->opt = IR_NOP; /* keep "inputs_count" */ |
1238 | 0 | for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { |
1239 | 0 | ir_ref input = *p; |
1240 | 0 | *p = IR_UNUSED; |
1241 | 0 | if (input > 0) { |
1242 | 0 | ir_use_list_remove_all(ctx, input, ref); |
1243 | 0 | if (ir_is_dead(ctx, input)) { |
1244 | | /* schedule DCE */ |
1245 | 0 | ir_bitqueue_add(worklist, input); |
1246 | 0 | } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) { |
1247 | | /* try to optimize PHI into ABS/MIN/MAX/COND */ |
1248 | 0 | ir_bitqueue_add(worklist, ctx->ir_base[input].op1); |
1249 | 0 | } |
1250 | 0 | } |
1251 | 0 | } |
1252 | |
|
1253 | 0 | ir_iter_replace(ctx, ref, new_ref, worklist); |
1254 | |
|
1255 | 0 | CLEAR_USES(ref); |
1256 | 0 | } |
1257 | | |
1258 | | void ir_iter_update_op(ir_ctx *ctx, ir_ref ref, uint32_t idx, ir_ref new_val, ir_bitqueue *worklist) |
1259 | 0 | { |
1260 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1261 | 0 | ir_ref old_val = ir_insn_op(insn, idx); |
1262 | |
|
1263 | 0 | IR_ASSERT(old_val != new_val); |
1264 | 0 | if (!IR_IS_CONST_REF(new_val)) { |
1265 | 0 | ir_use_list_add(ctx, new_val, ref); |
1266 | 0 | } |
1267 | 0 | ir_insn_set_op(insn, idx, new_val); |
1268 | 0 | if (!IR_IS_CONST_REF(old_val)) { |
1269 | 0 | ir_use_list_remove_one(ctx, old_val, ref); |
1270 | 0 | if (ir_is_dead(ctx, old_val)) { |
1271 | | /* schedule DCE */ |
1272 | 0 | ir_bitqueue_add(worklist, old_val); |
1273 | 0 | } |
1274 | 0 | } |
1275 | 0 | } |
1276 | | |
1277 | | static ir_ref ir_iter_find_cse1(ir_ctx *ctx, uint32_t optx, ir_ref op1) |
1278 | 0 | { |
1279 | 0 | IR_ASSERT(!IR_IS_CONST_REF(op1)); |
1280 | |
|
1281 | 0 | ir_use_list *use_list = &ctx->use_lists[op1]; |
1282 | 0 | ir_ref *p, n = use_list->count; |
1283 | |
|
1284 | 0 | for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { |
1285 | 0 | ir_ref use = *p; |
1286 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
1287 | |
|
1288 | 0 | if (use_insn->optx == optx) { |
1289 | 0 | IR_ASSERT(use_insn->op1 == op1); |
1290 | 0 | return use; |
1291 | 0 | } |
1292 | 0 | } |
1293 | 0 | return IR_UNUSED; |
1294 | 0 | } |
1295 | | |
1296 | | static ir_ref ir_iter_find_cse(ir_ctx *ctx, ir_ref ref, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_bitqueue *worklist) |
1297 | 0 | { |
1298 | 0 | uint32_t n = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); |
1299 | 0 | ir_use_list *use_list = NULL; |
1300 | 0 | ir_ref *p, use; |
1301 | 0 | ir_insn *use_insn; |
1302 | |
|
1303 | 0 | if (n == 2) { |
1304 | 0 | if (!IR_IS_CONST_REF(op1)) { |
1305 | 0 | use_list = &ctx->use_lists[op1]; |
1306 | 0 | } |
1307 | 0 | if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) { |
1308 | 0 | use_list = &ctx->use_lists[op2]; |
1309 | 0 | } |
1310 | 0 | if (use_list) { |
1311 | 0 | n = use_list->count; |
1312 | 0 | for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { |
1313 | 0 | use = *p; |
1314 | 0 | if (use != ref) { |
1315 | 0 | use_insn = &ctx->ir_base[use]; |
1316 | 0 | if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2) { |
1317 | 0 | IR_ASSERT(use_insn->op3 == op3); |
1318 | 0 | if (use < ref) { |
1319 | 0 | return use; |
1320 | 0 | } else { |
1321 | 0 | ir_bitqueue_add(worklist, use); |
1322 | 0 | } |
1323 | 0 | } |
1324 | 0 | } |
1325 | 0 | } |
1326 | 0 | } |
1327 | 0 | } else if (n < 2) { |
1328 | 0 | IR_ASSERT(n == 1); |
1329 | 0 | if (!IR_IS_CONST_REF(op1)) { |
1330 | 0 | use_list = &ctx->use_lists[op1]; |
1331 | 0 | n = use_list->count; |
1332 | 0 | for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { |
1333 | 0 | use = *p; |
1334 | 0 | if (use != ref) { |
1335 | 0 | use_insn = &ctx->ir_base[use]; |
1336 | 0 | if (use_insn->opt == opt) { |
1337 | 0 | IR_ASSERT(use_insn->op1 == op1); |
1338 | 0 | IR_ASSERT(use_insn->op2 == op2); |
1339 | 0 | IR_ASSERT(use_insn->op3 == op3); |
1340 | 0 | if (use < ref) { |
1341 | 0 | return use; |
1342 | 0 | } else { |
1343 | 0 | ir_bitqueue_add(worklist, use); |
1344 | 0 | } |
1345 | 0 | } |
1346 | 0 | } |
1347 | 0 | } |
1348 | 0 | } |
1349 | 0 | } else { |
1350 | 0 | IR_ASSERT(n == 3); |
1351 | 0 | if (!IR_IS_CONST_REF(op1)) { |
1352 | 0 | use_list = &ctx->use_lists[op1]; |
1353 | 0 | } |
1354 | 0 | if (!IR_IS_CONST_REF(op2) && (!use_list || use_list->count > ctx->use_lists[op2].count)) { |
1355 | 0 | use_list = &ctx->use_lists[op2]; |
1356 | 0 | } |
1357 | 0 | if (!IR_IS_CONST_REF(op3) && (!use_list || use_list->count > ctx->use_lists[op3].count)) { |
1358 | 0 | use_list = &ctx->use_lists[op3]; |
1359 | 0 | } |
1360 | 0 | if (use_list) { |
1361 | 0 | n = use_list->count; |
1362 | 0 | for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) { |
1363 | 0 | use = *p; |
1364 | 0 | if (use != ref) { |
1365 | 0 | use_insn = &ctx->ir_base[use]; |
1366 | 0 | if (use_insn->opt == opt && use_insn->op1 == op1 && use_insn->op2 == op2 && use_insn->op3 == op3) { |
1367 | 0 | if (use < ref) { |
1368 | 0 | return use; |
1369 | 0 | } else { |
1370 | 0 | ir_bitqueue_add(worklist, use); |
1371 | 0 | } |
1372 | 0 | } |
1373 | 0 | } |
1374 | 0 | } |
1375 | 0 | } |
1376 | 0 | } |
1377 | 0 | return IR_UNUSED; |
1378 | 0 | } |
1379 | | |
1380 | | static void ir_iter_fold(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist) |
1381 | 0 | { |
1382 | 0 | uint32_t opt; |
1383 | 0 | ir_ref op1, op2, op3, copy; |
1384 | 0 | ir_insn *op1_insn, *op2_insn, *op3_insn, *insn; |
1385 | |
|
1386 | 0 | insn = &ctx->ir_base[ref]; |
1387 | 0 | opt = insn->opt; |
1388 | 0 | op1 = insn->op1; |
1389 | 0 | op2 = insn->op2; |
1390 | 0 | op3 = insn->op3; |
1391 | |
|
1392 | 0 | restart: |
1393 | 0 | op1_insn = ctx->ir_base + op1; |
1394 | 0 | op2_insn = ctx->ir_base + op2; |
1395 | 0 | op3_insn = ctx->ir_base + op3; |
1396 | |
|
1397 | 0 | switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) { |
1398 | 0 | case IR_FOLD_DO_RESTART: |
1399 | 0 | opt = ctx->fold_insn.optx; |
1400 | 0 | op1 = ctx->fold_insn.op1; |
1401 | 0 | op2 = ctx->fold_insn.op2; |
1402 | 0 | op3 = ctx->fold_insn.op3; |
1403 | 0 | goto restart; |
1404 | 0 | case IR_FOLD_DO_CSE: |
1405 | 0 | copy = ir_iter_find_cse(ctx, ref, ctx->fold_insn.opt, |
1406 | 0 | ctx->fold_insn.op1, ctx->fold_insn.op2, ctx->fold_insn.op3, worklist); |
1407 | 0 | if (copy) { |
1408 | 0 | ir_iter_replace_insn(ctx, ref, copy, worklist); |
1409 | 0 | break; |
1410 | 0 | } |
1411 | 0 | IR_FALLTHROUGH; |
1412 | 0 | case IR_FOLD_DO_EMIT: |
1413 | 0 | insn = &ctx->ir_base[ref]; |
1414 | 0 | if (insn->opt != ctx->fold_insn.opt |
1415 | 0 | || insn->op1 != ctx->fold_insn.op1 |
1416 | 0 | || insn->op2 != ctx->fold_insn.op2 |
1417 | 0 | || insn->op3 != ctx->fold_insn.op3) { |
1418 | |
|
1419 | 0 | ir_use_list *use_list; |
1420 | 0 | ir_ref n, j, *p, use; |
1421 | |
|
1422 | 0 | insn->optx = ctx->fold_insn.opt; |
1423 | 0 | IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK])); |
1424 | 0 | insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]); |
1425 | 0 | if (insn->op1 != ctx->fold_insn.op1) { |
1426 | 0 | if (insn->op1 > 0) { |
1427 | 0 | ir_use_list_remove_one(ctx, insn->op1, ref); |
1428 | 0 | } |
1429 | 0 | if (ctx->fold_insn.op1 > 0) { |
1430 | 0 | ir_use_list_add(ctx, ctx->fold_insn.op1, ref); |
1431 | 0 | } |
1432 | 0 | } |
1433 | 0 | if (insn->op2 != ctx->fold_insn.op2) { |
1434 | 0 | if (insn->op2 > 0) { |
1435 | 0 | ir_use_list_remove_one(ctx, insn->op2, ref); |
1436 | 0 | } |
1437 | 0 | if (ctx->fold_insn.op2 > 0) { |
1438 | 0 | ir_use_list_add(ctx, ctx->fold_insn.op2, ref); |
1439 | 0 | } |
1440 | 0 | } |
1441 | 0 | if (insn->op3 != ctx->fold_insn.op3) { |
1442 | 0 | if (insn->op3 > 0) { |
1443 | 0 | ir_use_list_remove_one(ctx, insn->op3, ref); |
1444 | 0 | } |
1445 | 0 | if (ctx->fold_insn.op3 > 0) { |
1446 | 0 | ir_use_list_add(ctx, ctx->fold_insn.op3, ref); |
1447 | 0 | } |
1448 | 0 | } |
1449 | 0 | insn->op1 = ctx->fold_insn.op1; |
1450 | 0 | insn->op2 = ctx->fold_insn.op2; |
1451 | 0 | insn->op3 = ctx->fold_insn.op3; |
1452 | |
|
1453 | 0 | use_list = &ctx->use_lists[ref]; |
1454 | 0 | n = use_list->count; |
1455 | 0 | for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) { |
1456 | 0 | use = *p; |
1457 | 0 | ir_bitqueue_add(worklist, use); |
1458 | 0 | } |
1459 | 0 | } |
1460 | 0 | break; |
1461 | 0 | case IR_FOLD_DO_COPY: |
1462 | 0 | op1 = ctx->fold_insn.op1; |
1463 | 0 | ir_iter_replace_insn(ctx, ref, op1, worklist); |
1464 | 0 | break; |
1465 | 0 | case IR_FOLD_DO_CONST: |
1466 | 0 | op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type); |
1467 | 0 | ir_iter_replace_insn(ctx, ref, op1, worklist); |
1468 | 0 | break; |
1469 | 0 | default: |
1470 | 0 | IR_ASSERT(0); |
1471 | 0 | break; |
1472 | 0 | } |
1473 | 0 | } |
1474 | | |
1475 | | static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref) |
1476 | 0 | { |
1477 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1478 | |
|
1479 | 0 | IR_ASSERT(insn->type == IR_DOUBLE); |
1480 | 0 | if (IR_IS_CONST_REF(ref)) { |
1481 | 0 | return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d; |
1482 | 0 | } else { |
1483 | 0 | switch (insn->op) { |
1484 | 0 | case IR_FP2FP: |
1485 | 0 | return 1; |
1486 | | // case IR_INT2FP: |
1487 | | // return ctx->use_lists[ref].count == 1; |
1488 | 0 | case IR_NEG: |
1489 | 0 | case IR_ABS: |
1490 | 0 | return ctx->use_lists[ref].count == 1 && |
1491 | 0 | ir_may_promote_d2f(ctx, insn->op1); |
1492 | 0 | case IR_ADD: |
1493 | 0 | case IR_SUB: |
1494 | 0 | case IR_MUL: |
1495 | 0 | case IR_DIV: |
1496 | 0 | case IR_MIN: |
1497 | 0 | case IR_MAX: |
1498 | 0 | return ctx->use_lists[ref].count == 1 && |
1499 | 0 | ir_may_promote_d2f(ctx, insn->op1) && |
1500 | 0 | ir_may_promote_d2f(ctx, insn->op2); |
1501 | 0 | default: |
1502 | 0 | break; |
1503 | 0 | } |
1504 | 0 | } |
1505 | 0 | return 0; |
1506 | 0 | } |
1507 | | |
1508 | | static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref) |
1509 | 0 | { |
1510 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1511 | |
|
1512 | 0 | IR_ASSERT(insn->type == IR_FLOAT); |
1513 | 0 | if (IR_IS_CONST_REF(ref)) { |
1514 | 0 | return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f; |
1515 | 0 | } else { |
1516 | 0 | switch (insn->op) { |
1517 | 0 | case IR_FP2FP: |
1518 | 0 | return 1; |
1519 | 0 | case IR_INT2FP: |
1520 | 0 | return ctx->use_lists[ref].count == 1; |
1521 | 0 | case IR_NEG: |
1522 | 0 | case IR_ABS: |
1523 | 0 | return ctx->use_lists[ref].count == 1 && |
1524 | 0 | ir_may_promote_f2d(ctx, insn->op1); |
1525 | 0 | case IR_ADD: |
1526 | 0 | case IR_SUB: |
1527 | 0 | case IR_MUL: |
1528 | | // case IR_DIV: |
1529 | 0 | case IR_MIN: |
1530 | 0 | case IR_MAX: |
1531 | 0 | return ctx->use_lists[ref].count == 1 && |
1532 | 0 | ir_may_promote_f2d(ctx, insn->op1) && |
1533 | 0 | ir_may_promote_f2d(ctx, insn->op2); |
1534 | 0 | default: |
1535 | 0 | break; |
1536 | 0 | } |
1537 | 0 | } |
1538 | 0 | return 0; |
1539 | 0 | } |
1540 | | |
1541 | | static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist) |
1542 | 0 | { |
1543 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1544 | 0 | uint32_t count; |
1545 | |
|
1546 | 0 | IR_ASSERT(insn->type == IR_DOUBLE); |
1547 | 0 | if (IR_IS_CONST_REF(ref)) { |
1548 | 0 | return ir_const_float(ctx, (float)insn->val.d); |
1549 | 0 | } else { |
1550 | 0 | ir_bitqueue_add(worklist, ref); |
1551 | 0 | switch (insn->op) { |
1552 | 0 | case IR_FP2FP: |
1553 | 0 | count = ctx->use_lists[ref].count; |
1554 | 0 | ir_use_list_remove_all(ctx, ref, use); |
1555 | 0 | if (ctx->use_lists[ref].count == 0) { |
1556 | 0 | ir_use_list_replace_one(ctx, insn->op1, ref, use); |
1557 | 0 | if (count > 1) { |
1558 | 0 | do { |
1559 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1560 | 0 | } while (--count > 1); |
1561 | 0 | } |
1562 | 0 | ref = insn->op1; |
1563 | 0 | MAKE_NOP(insn); |
1564 | 0 | return ref; |
1565 | 0 | } else { |
1566 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1567 | 0 | count -= ctx->use_lists[ref].count; |
1568 | 0 | if (count > 1) { |
1569 | 0 | do { |
1570 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1571 | 0 | } while (--count > 1); |
1572 | 0 | } |
1573 | 0 | } |
1574 | 0 | return insn->op1; |
1575 | | // case IR_INT2FP: |
1576 | | // insn->type = IR_FLOAT; |
1577 | | // return ref; |
1578 | 0 | case IR_NEG: |
1579 | 0 | case IR_ABS: |
1580 | 0 | insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist); |
1581 | 0 | insn->type = IR_FLOAT; |
1582 | 0 | return ref; |
1583 | 0 | case IR_ADD: |
1584 | 0 | case IR_SUB: |
1585 | 0 | case IR_MUL: |
1586 | 0 | case IR_DIV: |
1587 | 0 | case IR_MIN: |
1588 | 0 | case IR_MAX: |
1589 | 0 | if (insn->op1 == insn->op2) { |
1590 | 0 | insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist); |
1591 | 0 | } else { |
1592 | 0 | insn->op1 = ir_promote_d2f(ctx, insn->op1, ref, worklist); |
1593 | 0 | insn->op2 = ir_promote_d2f(ctx, insn->op2, ref, worklist); |
1594 | 0 | } |
1595 | 0 | insn->type = IR_FLOAT; |
1596 | 0 | return ref; |
1597 | 0 | default: |
1598 | 0 | break; |
1599 | 0 | } |
1600 | 0 | } |
1601 | 0 | IR_ASSERT(0); |
1602 | 0 | return ref; |
1603 | 0 | } |
1604 | | |
1605 | | static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_bitqueue *worklist) |
1606 | 0 | { |
1607 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1608 | 0 | uint32_t count; |
1609 | 0 | ir_ref old_ref; |
1610 | |
|
1611 | 0 | IR_ASSERT(insn->type == IR_FLOAT); |
1612 | 0 | if (IR_IS_CONST_REF(ref)) { |
1613 | 0 | return ir_const_double(ctx, (double)insn->val.f); |
1614 | 0 | } else { |
1615 | 0 | ir_bitqueue_add(worklist, ref); |
1616 | 0 | switch (insn->op) { |
1617 | 0 | case IR_FP2FP: |
1618 | 0 | count = ctx->use_lists[ref].count; |
1619 | 0 | ir_use_list_remove_all(ctx, ref, use); |
1620 | 0 | if (ctx->use_lists[ref].count == 0) { |
1621 | 0 | ir_use_list_replace_one(ctx, insn->op1, ref, use); |
1622 | 0 | if (count > 1) { |
1623 | 0 | do { |
1624 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1625 | 0 | } while (--count > 1); |
1626 | 0 | } |
1627 | 0 | ref = insn->op1; |
1628 | 0 | MAKE_NOP(insn); |
1629 | 0 | return ref; |
1630 | 0 | } else { |
1631 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1632 | 0 | count -= ctx->use_lists[ref].count; |
1633 | 0 | if (count > 1) { |
1634 | 0 | do { |
1635 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1636 | 0 | } while (--count > 1); |
1637 | 0 | } |
1638 | 0 | } |
1639 | 0 | return insn->op1; |
1640 | 0 | case IR_INT2FP: |
1641 | 0 | old_ref = ir_iter_find_cse1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1); |
1642 | 0 | if (old_ref) { |
1643 | 0 | IR_ASSERT(ctx->use_lists[ref].count == 1); |
1644 | 0 | ir_use_list_remove_one(ctx, insn->op1, ref); |
1645 | 0 | CLEAR_USES(ref); |
1646 | 0 | MAKE_NOP(insn); |
1647 | 0 | ir_use_list_add(ctx, old_ref, use); |
1648 | 0 | return old_ref; |
1649 | 0 | } |
1650 | 0 | insn->type = IR_DOUBLE; |
1651 | 0 | return ref; |
1652 | 0 | case IR_NEG: |
1653 | 0 | case IR_ABS: |
1654 | 0 | insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist); |
1655 | 0 | insn->type = IR_DOUBLE; |
1656 | 0 | return ref; |
1657 | 0 | case IR_ADD: |
1658 | 0 | case IR_SUB: |
1659 | 0 | case IR_MUL: |
1660 | | // case IR_DIV: |
1661 | 0 | case IR_MIN: |
1662 | 0 | case IR_MAX: |
1663 | 0 | if (insn->op1 == insn->op2) { |
1664 | 0 | insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist); |
1665 | 0 | } else { |
1666 | 0 | insn->op1 = ir_promote_f2d(ctx, insn->op1, ref, worklist); |
1667 | 0 | insn->op2 = ir_promote_f2d(ctx, insn->op2, ref, worklist); |
1668 | 0 | } |
1669 | 0 | insn->type = IR_DOUBLE; |
1670 | 0 | return ref; |
1671 | 0 | default: |
1672 | 0 | break; |
1673 | 0 | } |
1674 | 0 | } |
1675 | 0 | IR_ASSERT(0); |
1676 | 0 | return ref; |
1677 | 0 | } |
1678 | | |
1679 | | static bool ir_may_promote_trunc(ir_ctx *ctx, ir_type type, ir_ref ref) |
1680 | 0 | { |
1681 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1682 | 0 | ir_ref *p, n, input; |
1683 | |
|
1684 | 0 | if (IR_IS_CONST_REF(ref)) { |
1685 | 0 | return !IR_IS_SYM_CONST(insn->op); |
1686 | 0 | } else { |
1687 | 0 | switch (insn->op) { |
1688 | 0 | case IR_ZEXT: |
1689 | 0 | case IR_SEXT: |
1690 | 0 | case IR_TRUNC: |
1691 | 0 | return ctx->ir_base[insn->op1].type == type || ctx->use_lists[ref].count == 1; |
1692 | 0 | case IR_NEG: |
1693 | 0 | case IR_ABS: |
1694 | 0 | case IR_NOT: |
1695 | 0 | return ctx->use_lists[ref].count == 1 && |
1696 | 0 | ir_may_promote_trunc(ctx, type, insn->op1); |
1697 | 0 | case IR_ADD: |
1698 | 0 | case IR_SUB: |
1699 | 0 | case IR_MUL: |
1700 | 0 | case IR_MIN: |
1701 | 0 | case IR_MAX: |
1702 | 0 | case IR_OR: |
1703 | 0 | case IR_AND: |
1704 | 0 | case IR_XOR: |
1705 | 0 | case IR_SHL: |
1706 | 0 | return ctx->use_lists[ref].count == 1 && |
1707 | 0 | ir_may_promote_trunc(ctx, type, insn->op1) && |
1708 | 0 | ir_may_promote_trunc(ctx, type, insn->op2); |
1709 | | // case IR_SHR: |
1710 | | // case IR_SAR: |
1711 | | // case IR_DIV: |
1712 | | // case IR_MOD: |
1713 | | // case IR_FP2INT: |
1714 | | // TODO: ??? |
1715 | 0 | case IR_COND: |
1716 | 0 | return ctx->use_lists[ref].count == 1 && |
1717 | 0 | ir_may_promote_trunc(ctx, type, insn->op2) && |
1718 | 0 | ir_may_promote_trunc(ctx, type, insn->op3); |
1719 | 0 | case IR_PHI: |
1720 | 0 | if (ctx->use_lists[ref].count != 1) { |
1721 | 0 | ir_use_list *use_list = &ctx->use_lists[ref]; |
1722 | 0 | ir_ref count = 0; |
1723 | |
|
1724 | 0 | for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) { |
1725 | 0 | if (*p != ref) { |
1726 | 0 | if (count) { |
1727 | 0 | return 0; |
1728 | 0 | } |
1729 | 0 | count = 1; |
1730 | 0 | } |
1731 | 0 | } |
1732 | 0 | } |
1733 | 0 | for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) { |
1734 | 0 | input = *p; |
1735 | 0 | if (input != ref) { |
1736 | 0 | if (!ir_may_promote_trunc(ctx, type, input)) { |
1737 | 0 | return 0; |
1738 | 0 | } |
1739 | 0 | } |
1740 | 0 | } |
1741 | 0 | return 1; |
1742 | 0 | default: |
1743 | 0 | break; |
1744 | 0 | } |
1745 | 0 | } |
1746 | 0 | return 0; |
1747 | 0 | } |
1748 | | |
1749 | | static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use, ir_bitqueue *worklist) |
1750 | 0 | { |
1751 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1752 | 0 | uint32_t count; |
1753 | 0 | ir_ref *p, n, input; |
1754 | |
|
1755 | 0 | if (IR_IS_CONST_REF(ref)) { |
1756 | 0 | ir_val val; |
1757 | |
|
1758 | 0 | switch (type) { |
1759 | 0 | case IR_I8: val.i64 = insn->val.i8; break; |
1760 | 0 | case IR_U8: val.u64 = insn->val.u8; break; |
1761 | 0 | case IR_I16: val.i64 = insn->val.i16; break; |
1762 | 0 | case IR_U16: val.u64 = insn->val.u16; break; |
1763 | 0 | case IR_I32: val.i64 = insn->val.i32; break; |
1764 | 0 | case IR_U32: val.u64 = insn->val.u32; break; |
1765 | 0 | case IR_CHAR:val.i64 = insn->val.i8; break; |
1766 | 0 | case IR_BOOL:val.u64 = insn->val.u8 != 0; break; |
1767 | 0 | default: IR_ASSERT(0); val.u64 = 0; |
1768 | 0 | } |
1769 | 0 | return ir_const(ctx, val, type); |
1770 | 0 | } else { |
1771 | 0 | ir_bitqueue_add(worklist, ref); |
1772 | 0 | switch (insn->op) { |
1773 | 0 | case IR_ZEXT: |
1774 | 0 | case IR_SEXT: |
1775 | 0 | case IR_TRUNC: |
1776 | 0 | if (ctx->ir_base[insn->op1].type != type) { |
1777 | 0 | ir_type src_type = ctx->ir_base[insn->op1].type; |
1778 | 0 | if (ir_type_size[src_type] == ir_type_size[type]) { |
1779 | 0 | insn->op = IR_BITCAST; |
1780 | 0 | } else if (ir_type_size[src_type] > ir_type_size[type]) { |
1781 | 0 | insn->op = IR_TRUNC; |
1782 | 0 | } else { |
1783 | 0 | if (insn->op != IR_SEXT && insn->op != IR_ZEXT) { |
1784 | 0 | insn->op = IR_IS_TYPE_SIGNED(type) ? IR_SEXT : IR_ZEXT; |
1785 | 0 | } |
1786 | 0 | } |
1787 | 0 | insn->type = type; |
1788 | 0 | return ref; |
1789 | 0 | } |
1790 | | |
1791 | 0 | count = ctx->use_lists[ref].count; |
1792 | 0 | ir_use_list_remove_all(ctx, ref, use); |
1793 | 0 | if (ctx->use_lists[ref].count == 0) { |
1794 | 0 | ir_use_list_replace_one(ctx, insn->op1, ref, use); |
1795 | 0 | if (count > 1) { |
1796 | 0 | do { |
1797 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1798 | 0 | } while (--count > 1); |
1799 | 0 | } |
1800 | 0 | ref = insn->op1; |
1801 | 0 | MAKE_NOP(insn); |
1802 | 0 | return ref; |
1803 | 0 | } else { |
1804 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1805 | 0 | count -= ctx->use_lists[ref].count; |
1806 | 0 | if (count > 1) { |
1807 | 0 | do { |
1808 | 0 | ir_use_list_add(ctx, insn->op1, use); |
1809 | 0 | } while (--count > 1); |
1810 | 0 | } |
1811 | 0 | } |
1812 | 0 | return insn->op1; |
1813 | 0 | case IR_NEG: |
1814 | 0 | case IR_ABS: |
1815 | 0 | case IR_NOT: |
1816 | 0 | insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist); |
1817 | 0 | insn->type = type; |
1818 | 0 | return ref; |
1819 | 0 | case IR_ADD: |
1820 | 0 | case IR_SUB: |
1821 | 0 | case IR_MUL: |
1822 | 0 | case IR_MIN: |
1823 | 0 | case IR_MAX: |
1824 | 0 | case IR_OR: |
1825 | 0 | case IR_AND: |
1826 | 0 | case IR_XOR: |
1827 | 0 | case IR_SHL: |
1828 | 0 | if (insn->op1 == insn->op2) { |
1829 | 0 | insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist); |
1830 | 0 | } else { |
1831 | 0 | insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref, worklist); |
1832 | 0 | insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist); |
1833 | 0 | } |
1834 | 0 | insn->type = type; |
1835 | 0 | return ref; |
1836 | | // case IR_DIV: |
1837 | | // case IR_MOD: |
1838 | | // case IR_SHR: |
1839 | | // case IR_SAR: |
1840 | | // case IR_FP2INT: |
1841 | | // TODO: ??? |
1842 | 0 | case IR_COND: |
1843 | 0 | if (insn->op2 == insn->op3) { |
1844 | 0 | insn->op3 = insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist); |
1845 | 0 | } else { |
1846 | 0 | insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref, worklist); |
1847 | 0 | insn->op3 = ir_promote_i2i(ctx, type, insn->op3, ref, worklist); |
1848 | 0 | } |
1849 | 0 | insn->type = type; |
1850 | 0 | return ref; |
1851 | 0 | case IR_PHI: |
1852 | 0 | for (p = insn->ops + 2, n = insn->inputs_count - 1; n > 0; p++, n--) { |
1853 | 0 | input = *p; |
1854 | 0 | if (input != ref) { |
1855 | 0 | *p = ir_promote_i2i(ctx, type, input, ref, worklist); |
1856 | 0 | } |
1857 | 0 | } |
1858 | 0 | insn->type = type; |
1859 | 0 | return ref; |
1860 | 0 | default: |
1861 | 0 | break; |
1862 | 0 | } |
1863 | 0 | } |
1864 | 0 | IR_ASSERT(0); |
1865 | 0 | return ref; |
1866 | 0 | } |
1867 | | |
1868 | | static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type) |
1869 | 0 | { |
1870 | 0 | ir_val new_val; |
1871 | |
|
1872 | 0 | switch (val_insn->type) { |
1873 | 0 | default: |
1874 | 0 | IR_ASSERT(0); |
1875 | 0 | case IR_I8: |
1876 | 0 | case IR_U8: |
1877 | 0 | case IR_BOOL: |
1878 | 0 | case IR_CHAR: |
1879 | 0 | if (op == IR_SEXT) { |
1880 | 0 | new_val.i64 = (int64_t)val_insn->val.i8; |
1881 | 0 | } else { |
1882 | 0 | new_val.u64 = (uint64_t)val_insn->val.u8; |
1883 | 0 | } |
1884 | 0 | break; |
1885 | 0 | case IR_I16: |
1886 | 0 | case IR_U16: |
1887 | 0 | if (op == IR_SEXT) { |
1888 | 0 | new_val.i64 = (int64_t)val_insn->val.i16; |
1889 | 0 | } else { |
1890 | 0 | new_val.u64 = (uint64_t)val_insn->val.u16; |
1891 | 0 | } |
1892 | 0 | break; |
1893 | 0 | case IR_I32: |
1894 | 0 | case IR_U32: |
1895 | 0 | if (op == IR_SEXT) { |
1896 | 0 | new_val.i64 = (int64_t)val_insn->val.i32; |
1897 | 0 | } else { |
1898 | 0 | new_val.u64 = (uint64_t)val_insn->val.u32; |
1899 | 0 | } |
1900 | 0 | break; |
1901 | 0 | } |
1902 | 0 | return ir_const(ctx, new_val, type); |
1903 | 0 | } |
1904 | | |
1905 | | static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist) |
1906 | 0 | { |
1907 | 0 | uint32_t optx = IR_OPTX(op, type, 1); |
1908 | 0 | ir_ref ref; |
1909 | |
|
1910 | 0 | if (!IR_IS_CONST_REF(src_ref)) { |
1911 | 0 | ref = ir_iter_find_cse1(ctx, optx, src_ref); |
1912 | 0 | if (ref) { |
1913 | 0 | ir_use_list_add(ctx, ref, var_ref); |
1914 | 0 | if (!IR_IS_CONST_REF(src_ref)) { |
1915 | 0 | ir_use_list_remove_one(ctx, src_ref, var_ref); |
1916 | 0 | } |
1917 | 0 | ir_bitqueue_add(worklist, ref); |
1918 | 0 | return ref; |
1919 | 0 | } |
1920 | 0 | } |
1921 | | |
1922 | 0 | ref = ir_emit1(ctx, optx, src_ref); |
1923 | 0 | ir_use_list_add(ctx, ref, var_ref); |
1924 | 0 | if (!IR_IS_CONST_REF(src_ref)) { |
1925 | 0 | ir_use_list_replace_one(ctx, src_ref, var_ref, ref); |
1926 | 0 | } |
1927 | 0 | ir_bitqueue_grow(worklist, ref + 1); |
1928 | 0 | ir_bitqueue_add(worklist, ref); |
1929 | 0 | return ref; |
1930 | 0 | } |
1931 | | |
1932 | | static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop) |
1933 | 0 | { |
1934 | 0 | ir_insn *insn; |
1935 | 0 | ir_ref n, *p, input, result, ctrl; |
1936 | |
|
1937 | 0 | if (IR_IS_CONST_REF(val)) { |
1938 | 0 | return 1; /* IR_START */ |
1939 | 0 | } |
1940 | | |
1941 | 0 | insn = &ctx->ir_base[val]; |
1942 | 0 | if (ir_op_flags[insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM)) { |
1943 | 0 | return val; |
1944 | 0 | } |
1945 | | |
1946 | 0 | IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_DATA); |
1947 | 0 | if (IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL_DEP) { |
1948 | 0 | return insn->op1; |
1949 | 0 | } |
1950 | | |
1951 | 0 | n = insn->inputs_count; |
1952 | 0 | p = insn->ops + 1; |
1953 | |
|
1954 | 0 | result = 1; |
1955 | 0 | for (; n > 0; p++, n--) { |
1956 | 0 | input = *p; |
1957 | 0 | ctrl = _ir_estimated_control(ctx, input, loop); |
1958 | 0 | if (ctrl >= loop) return ctrl; |
1959 | 0 | if (ctrl > result) { // TODO: check dominance depth instead of order |
1960 | 0 | result = ctrl; |
1961 | 0 | } |
1962 | 0 | } |
1963 | 0 | return result; |
1964 | 0 | } |
1965 | | |
1966 | | static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop) |
1967 | 0 | { |
1968 | 0 | ref = _ir_estimated_control(ctx, ref, loop); |
1969 | 0 | return ref < loop; // TODO: check dominance instead of order |
1970 | 0 | } |
1971 | | |
1972 | | static bool ir_is_cheaper_ext(ir_ctx *ctx, ir_ref ref, ir_ref loop, ir_ref ext_ref, ir_op op) |
1973 | 0 | { |
1974 | 0 | if (IR_IS_CONST_REF(ref)) { |
1975 | 0 | return 1; |
1976 | 0 | } else { |
1977 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
1978 | |
|
1979 | 0 | if (insn->op == IR_LOAD) { |
1980 | 0 | if (ir_is_loop_invariant(ctx, ref, loop)) { |
1981 | 0 | return 1; |
1982 | 0 | } else { |
1983 | | /* ZEXT(LOAD(_, _)) costs the same as LOAD(_, _) */ |
1984 | 0 | if (ctx->use_lists[ref].count == 2) { |
1985 | 0 | return 1; |
1986 | 0 | } else if (ctx->use_lists[ref].count == 3) { |
1987 | 0 | ir_use_list *use_list = &ctx->use_lists[ref]; |
1988 | 0 | ir_ref *p, n, use; |
1989 | |
|
1990 | 0 | for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) { |
1991 | 0 | use = *p; |
1992 | 0 | if (use != ext_ref) { |
1993 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
1994 | |
|
1995 | 0 | if (use_insn->op != op |
1996 | 0 | && (!(ir_op_flags[use_insn->op] & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM)) |
1997 | 0 | || use_insn->op1 != ref)) { |
1998 | 0 | return 0; |
1999 | 0 | } |
2000 | 0 | } |
2001 | 0 | } |
2002 | 0 | return 1; |
2003 | 0 | } |
2004 | 0 | } |
2005 | 0 | return 0; |
2006 | 0 | } else { |
2007 | 0 | return ir_is_loop_invariant(ctx, ref, loop); |
2008 | 0 | } |
2009 | 0 | } |
2010 | 0 | } |
2011 | | |
2012 | | static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist) |
2013 | 0 | { |
2014 | 0 | ir_op op = ctx->ir_base[ext_ref].op; |
2015 | 0 | ir_type type = ctx->ir_base[ext_ref].type; |
2016 | 0 | ir_insn *phi_insn; |
2017 | 0 | ir_use_list *use_list; |
2018 | 0 | ir_ref n, *p, use, ext_ref_2 = IR_UNUSED; |
2019 | | |
2020 | | /* Check if we may change the type of the induction variable */ |
2021 | 0 | use_list = &ctx->use_lists[phi_ref]; |
2022 | 0 | n = use_list->count; |
2023 | 0 | if (n > 1) { |
2024 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
2025 | 0 | use = *p; |
2026 | 0 | if (use == op_ref || use == ext_ref) { |
2027 | 0 | continue; |
2028 | 0 | } else { |
2029 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
2030 | |
|
2031 | 0 | if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) { |
2032 | 0 | if (use_insn->op1 == phi_ref) { |
2033 | 0 | if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) { |
2034 | 0 | return 0; |
2035 | 0 | } |
2036 | 0 | if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) { |
2037 | 0 | continue; |
2038 | 0 | } |
2039 | 0 | } else if (use_insn->op2 == phi_ref) { |
2040 | 0 | if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) { |
2041 | 0 | return 0; |
2042 | 0 | } |
2043 | 0 | if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) { |
2044 | 0 | continue; |
2045 | 0 | } |
2046 | 0 | } |
2047 | 0 | return 0; |
2048 | 0 | } else if (use_insn->op == IR_IF) { |
2049 | 0 | continue; |
2050 | 0 | } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) { |
2051 | 0 | ext_ref_2 = use; |
2052 | 0 | continue; |
2053 | 0 | } else { |
2054 | 0 | return 0; |
2055 | 0 | } |
2056 | 0 | } |
2057 | 0 | } |
2058 | 0 | } |
2059 | | |
2060 | 0 | use_list = &ctx->use_lists[op_ref]; |
2061 | 0 | n = use_list->count; |
2062 | 0 | if (n > 1) { |
2063 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
2064 | 0 | use = *p; |
2065 | 0 | if (use == phi_ref || use == ext_ref) { |
2066 | 0 | continue; |
2067 | 0 | } else { |
2068 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
2069 | |
|
2070 | 0 | if (use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) { |
2071 | 0 | if (use_insn->op1 == phi_ref) { |
2072 | 0 | if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op2].type)) { |
2073 | 0 | return 0; |
2074 | 0 | } |
2075 | 0 | if (ir_is_cheaper_ext(ctx, use_insn->op2, ctx->ir_base[phi_ref].op1, ext_ref, op)) { |
2076 | 0 | continue; |
2077 | 0 | } |
2078 | 0 | } else if (use_insn->op2 == phi_ref) { |
2079 | 0 | if (IR_IS_TYPE_SIGNED(type) != IR_IS_TYPE_SIGNED(ctx->ir_base[use_insn->op1].type)) { |
2080 | 0 | return 0; |
2081 | 0 | } |
2082 | 0 | if (ir_is_cheaper_ext(ctx, use_insn->op1, ctx->ir_base[phi_ref].op1, ext_ref, op)) { |
2083 | 0 | continue; |
2084 | 0 | } |
2085 | 0 | } |
2086 | 0 | return 0; |
2087 | 0 | } else if (use_insn->op == IR_IF) { |
2088 | 0 | continue; |
2089 | 0 | } else if (!ext_ref_2 && use_insn->op == op && use_insn->type == type) { |
2090 | 0 | ext_ref_2 = use; |
2091 | 0 | continue; |
2092 | 0 | } else { |
2093 | 0 | return 0; |
2094 | 0 | } |
2095 | 0 | } |
2096 | 0 | } |
2097 | 0 | } |
2098 | | |
2099 | 0 | for (n = 0; n < ctx->use_lists[phi_ref].count; n++) { |
2100 | | /* "use_lists" may be reallocated by ir_ext_ref() */ |
2101 | 0 | use = ctx->use_edges[ctx->use_lists[phi_ref].refs + n]; |
2102 | 0 | if (use == ext_ref) { |
2103 | 0 | continue; |
2104 | 0 | } else { |
2105 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
2106 | |
|
2107 | 0 | if (use_insn->op == IR_IF) { |
2108 | 0 | continue; |
2109 | 0 | } else if (use_insn->op == op) { |
2110 | 0 | IR_ASSERT(ext_ref_2 == use); |
2111 | 0 | continue; |
2112 | 0 | } |
2113 | 0 | IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT) |
2114 | 0 | || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL) |
2115 | 0 | && (use_insn->op1 == phi_ref || use_insn->op2 == phi_ref)); |
2116 | 0 | if (use_insn->op1 != phi_ref) { |
2117 | 0 | if (IR_IS_CONST_REF(use_insn->op1) |
2118 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) { |
2119 | 0 | ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type); |
2120 | 0 | } else { |
2121 | 0 | ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist); |
2122 | 0 | } |
2123 | 0 | ir_bitqueue_add(worklist, use); |
2124 | 0 | } |
2125 | 0 | if (use_insn->op2 != phi_ref) { |
2126 | 0 | if (IR_IS_CONST_REF(use_insn->op2) |
2127 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) { |
2128 | 0 | ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type); |
2129 | 0 | } else { |
2130 | 0 | ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist); |
2131 | 0 | } |
2132 | 0 | ir_bitqueue_add(worklist, use); |
2133 | 0 | } |
2134 | 0 | } |
2135 | 0 | } |
2136 | | |
2137 | 0 | if (ctx->use_lists[op_ref].count > 1) { |
2138 | 0 | for (n = 0; n < ctx->use_lists[op_ref].count; n++) { |
2139 | | /* "use_lists" may be reallocated by ir_ext_ref() */ |
2140 | 0 | use = ctx->use_edges[ctx->use_lists[op_ref].refs + n]; |
2141 | 0 | if (use == ext_ref || use == phi_ref) { |
2142 | 0 | continue; |
2143 | 0 | } else { |
2144 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
2145 | |
|
2146 | 0 | if (use_insn->op == IR_IF) { |
2147 | 0 | continue; |
2148 | 0 | } else if (use_insn->op == op) { |
2149 | 0 | IR_ASSERT(ext_ref_2 == use); |
2150 | 0 | continue; |
2151 | 0 | } |
2152 | 0 | IR_ASSERT(use_insn->op >= IR_EQ && use_insn->op <= IR_UGT); |
2153 | 0 | if (use_insn->op1 != op_ref) { |
2154 | 0 | if (IR_IS_CONST_REF(use_insn->op1) |
2155 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) { |
2156 | 0 | ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type); |
2157 | 0 | } else { |
2158 | 0 | ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist); |
2159 | 0 | } |
2160 | 0 | ir_bitqueue_add(worklist, use); |
2161 | 0 | } |
2162 | 0 | if (use_insn->op2 != op_ref) { |
2163 | 0 | if (IR_IS_CONST_REF(use_insn->op2) |
2164 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) { |
2165 | 0 | ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type); |
2166 | 0 | } else { |
2167 | 0 | ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist); |
2168 | 0 | } |
2169 | 0 | ir_bitqueue_add(worklist, use); |
2170 | 0 | } |
2171 | 0 | } |
2172 | 0 | } |
2173 | 0 | } |
2174 | | |
2175 | 0 | ir_iter_replace_insn(ctx, ext_ref, ctx->ir_base[ext_ref].op1, worklist); |
2176 | |
|
2177 | 0 | if (ext_ref_2) { |
2178 | 0 | ir_iter_replace_insn(ctx, ext_ref_2, ctx->ir_base[ext_ref_2].op1, worklist); |
2179 | 0 | } |
2180 | |
|
2181 | 0 | ctx->ir_base[op_ref].type = type; |
2182 | |
|
2183 | 0 | phi_insn = &ctx->ir_base[phi_ref]; |
2184 | 0 | phi_insn->type = type; |
2185 | 0 | if (IR_IS_CONST_REF(phi_insn->op2) |
2186 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) { |
2187 | 0 | ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type); |
2188 | 0 | } else { |
2189 | 0 | ctx->ir_base[phi_ref].op2 = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist); |
2190 | 0 | } |
2191 | |
|
2192 | 0 | return 1; |
2193 | 0 | } |
2194 | | |
2195 | | static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist) |
2196 | 0 | { |
2197 | 0 | ir_ref ref = insn->op1; |
2198 | | |
2199 | | /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */ |
2200 | 0 | insn = &ctx->ir_base[ref]; |
2201 | 0 | if (insn->op == IR_PHI |
2202 | 0 | && insn->inputs_count == 3 /* (2 values) */ |
2203 | 0 | && ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) { |
2204 | 0 | ir_ref op_ref = insn->op3; |
2205 | 0 | ir_insn *op_insn = &ctx->ir_base[op_ref]; |
2206 | |
|
2207 | 0 | if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) { |
2208 | 0 | if (op_insn->op1 == ref) { |
2209 | 0 | if (ir_is_loop_invariant(ctx, op_insn->op2, insn->op1)) { |
2210 | 0 | return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist); |
2211 | 0 | } |
2212 | 0 | } else if (op_insn->op2 == ref) { |
2213 | 0 | if (ir_is_loop_invariant(ctx, op_insn->op1, insn->op1)) { |
2214 | 0 | return ir_try_promote_induction_var_ext(ctx, ext_ref, ref, op_ref, worklist); |
2215 | 0 | } |
2216 | 0 | } |
2217 | 0 | } |
2218 | 0 | } else if (insn->op == IR_ADD || insn->op == IR_SUB || insn->op == IR_MUL) { |
2219 | 0 | if (!IR_IS_CONST_REF(insn->op1) |
2220 | 0 | && ctx->ir_base[insn->op1].op == IR_PHI |
2221 | 0 | && ctx->ir_base[insn->op1].inputs_count == 3 /* (2 values) */ |
2222 | 0 | && ctx->ir_base[insn->op1].op3 == ref |
2223 | 0 | && ctx->ir_base[ctx->ir_base[insn->op1].op1].op == IR_LOOP_BEGIN |
2224 | 0 | && ir_is_loop_invariant(ctx, insn->op2, ctx->ir_base[insn->op1].op1)) { |
2225 | 0 | return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op1, ref, worklist); |
2226 | 0 | } else if (!IR_IS_CONST_REF(insn->op2) |
2227 | 0 | && ctx->ir_base[insn->op2].op == IR_PHI |
2228 | 0 | && ctx->ir_base[insn->op2].inputs_count == 3 /* (2 values) */ |
2229 | 0 | && ctx->ir_base[insn->op2].op3 == ref |
2230 | 0 | && ctx->ir_base[ctx->ir_base[insn->op2].op1].op == IR_LOOP_BEGIN |
2231 | 0 | && ir_is_loop_invariant(ctx, insn->op1, ctx->ir_base[insn->op2].op1)) { |
2232 | 0 | return ir_try_promote_induction_var_ext(ctx, ext_ref, insn->op2, ref, worklist); |
2233 | 0 | } |
2234 | 0 | } |
2235 | | |
2236 | 0 | return 0; |
2237 | 0 | } |
2238 | | |
2239 | | static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref) |
2240 | 0 | { |
2241 | 0 | ir_use_list *use_list = &ctx->use_lists[if_ref]; |
2242 | 0 | ir_ref *p = &ctx->use_edges[use_list->refs]; |
2243 | |
|
2244 | 0 | IR_ASSERT(use_list->count == 2); |
2245 | 0 | if (ctx->ir_base[*p].op == IR_IF_TRUE) { |
2246 | 0 | IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE); |
2247 | 0 | *if_true_ref = *p; |
2248 | 0 | *if_false_ref = *(p + 1); |
2249 | 0 | } else { |
2250 | 0 | IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE); |
2251 | 0 | IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE); |
2252 | 0 | *if_false_ref = *p; |
2253 | 0 | *if_true_ref = *(p + 1); |
2254 | 0 | } |
2255 | 0 | } |
2256 | | |
2257 | | static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist) |
2258 | 0 | { |
2259 | 0 | ir_ref prev, next; |
2260 | 0 | ir_use_list *use_list; |
2261 | |
|
2262 | 0 | if (ctx->use_lists[begin].count > 1) { |
2263 | 0 | ir_ref *p, n, i, use; |
2264 | 0 | ir_insn *use_insn; |
2265 | 0 | ir_ref region = end; |
2266 | 0 | ir_ref next = IR_UNUSED; |
2267 | |
|
2268 | 0 | while (!IR_IS_BB_START(ctx->ir_base[region].op)) { |
2269 | 0 | region = ctx->ir_base[region].op1; |
2270 | 0 | } |
2271 | |
|
2272 | 0 | use_list = &ctx->use_lists[begin]; |
2273 | 0 | n = use_list->count; |
2274 | 0 | for (p = &ctx->use_edges[use_list->refs], i = 0; i < n; p++, i++) { |
2275 | 0 | use = *p; |
2276 | 0 | use_insn = &ctx->ir_base[use]; |
2277 | 0 | if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { |
2278 | 0 | IR_ASSERT(!next); |
2279 | 0 | next = use; |
2280 | 0 | } else { |
2281 | 0 | IR_ASSERT(use_insn->op == IR_VAR); |
2282 | 0 | IR_ASSERT(use_insn->op1 == begin); |
2283 | 0 | use_insn->op1 = region; |
2284 | 0 | if (ir_use_list_add(ctx, region, use)) { |
2285 | | /* restore after reallocation */ |
2286 | 0 | use_list = &ctx->use_lists[begin]; |
2287 | 0 | n = use_list->count; |
2288 | 0 | p = &ctx->use_edges[use_list->refs + i]; |
2289 | 0 | } |
2290 | 0 | } |
2291 | 0 | } |
2292 | |
|
2293 | 0 | IR_ASSERT(next); |
2294 | 0 | ctx->use_edges[use_list->refs] = next; |
2295 | 0 | use_list->count = 1; |
2296 | 0 | } |
2297 | |
|
2298 | 0 | IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN); |
2299 | 0 | IR_ASSERT(ctx->ir_base[end].op == IR_END); |
2300 | 0 | IR_ASSERT(ctx->ir_base[begin].op1 == end); |
2301 | 0 | IR_ASSERT(ctx->use_lists[end].count == 1); |
2302 | |
|
2303 | 0 | prev = ctx->ir_base[end].op1; |
2304 | |
|
2305 | 0 | use_list = &ctx->use_lists[begin]; |
2306 | 0 | IR_ASSERT(use_list->count == 1); |
2307 | 0 | next = ctx->use_edges[use_list->refs]; |
2308 | | |
2309 | | /* remove BEGIN and END */ |
2310 | 0 | MAKE_NOP(&ctx->ir_base[begin]); CLEAR_USES(begin); |
2311 | 0 | MAKE_NOP(&ctx->ir_base[end]); CLEAR_USES(end); |
2312 | | |
2313 | | /* connect their predecessor and successor */ |
2314 | 0 | ctx->ir_base[next].op1 = prev; |
2315 | 0 | ir_use_list_replace_one(ctx, prev, end, next); |
2316 | |
|
2317 | 0 | if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) { |
2318 | 0 | ir_bitqueue_add(worklist, prev); |
2319 | 0 | } |
2320 | 0 | } |
2321 | | |
2322 | | static void ir_remove_unused_vars(ir_ctx *ctx, ir_ref start, ir_ref end) |
2323 | 0 | { |
2324 | 0 | ir_use_list *use_list = &ctx->use_lists[start]; |
2325 | 0 | ir_ref *p, use, n = use_list->count; |
2326 | |
|
2327 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
2328 | 0 | use = *p; |
2329 | 0 | if (use != end) { |
2330 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
2331 | 0 | IR_ASSERT(use_insn->op == IR_VAR); |
2332 | 0 | IR_ASSERT(ctx->use_lists[use].count == 0); |
2333 | 0 | MAKE_NOP(use_insn); |
2334 | 0 | } |
2335 | 0 | } |
2336 | 0 | } |
2337 | | |
2338 | | static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) |
2339 | 0 | { |
2340 | 0 | if (insn->inputs_count == 2) { |
2341 | 0 | ir_ref end1_ref = insn->op1, end2_ref = insn->op2; |
2342 | 0 | ir_insn *end1 = &ctx->ir_base[end1_ref]; |
2343 | 0 | ir_insn *end2 = &ctx->ir_base[end2_ref]; |
2344 | |
|
2345 | 0 | if (end1->op != IR_END || end2->op != IR_END) { |
2346 | 0 | return 0; |
2347 | 0 | } |
2348 | | |
2349 | 0 | ir_ref start1_ref = end1->op1, start2_ref = end2->op1; |
2350 | 0 | ir_insn *start1 = &ctx->ir_base[start1_ref]; |
2351 | 0 | ir_insn *start2 = &ctx->ir_base[start2_ref]; |
2352 | |
|
2353 | 0 | if (start1->op1 != start2->op1) { |
2354 | 0 | return 0; |
2355 | 0 | } |
2356 | | |
2357 | 0 | ir_ref root_ref = start1->op1; |
2358 | 0 | ir_insn *root = &ctx->ir_base[root_ref]; |
2359 | |
|
2360 | 0 | if (root->op != IR_IF |
2361 | 0 | && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) { |
2362 | 0 | return 0; |
2363 | 0 | } |
2364 | | |
2365 | | /* Empty Diamond |
2366 | | * |
2367 | | * prev prev |
2368 | | * | condition | condition |
2369 | | * | / | |
2370 | | * IF | |
2371 | | * | \ | |
2372 | | * | +-----+ | |
2373 | | * | IF_FALSE | |
2374 | | * IF_TRUE | => | |
2375 | | * | END | |
2376 | | * END / | |
2377 | | * | +---+ | |
2378 | | * | / | |
2379 | | * MERGE | |
2380 | | * | | |
2381 | | * next next |
2382 | | */ |
2383 | | |
2384 | 0 | ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs]; |
2385 | 0 | ir_insn *next = &ctx->ir_base[next_ref]; |
2386 | |
|
2387 | 0 | if (ctx->use_lists[start1_ref].count != 1) { |
2388 | 0 | ir_remove_unused_vars(ctx, start1_ref, end1_ref); |
2389 | 0 | } |
2390 | 0 | if (ctx->use_lists[start2_ref].count != 1) { |
2391 | 0 | ir_remove_unused_vars(ctx, start2_ref, end2_ref); |
2392 | 0 | } |
2393 | |
|
2394 | 0 | next->op1 = root->op1; |
2395 | 0 | ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref); |
2396 | 0 | if (!IR_IS_CONST_REF(root->op2)) { |
2397 | 0 | ir_use_list_remove_all(ctx, root->op2, root_ref); |
2398 | 0 | if (ir_is_dead(ctx, root->op2)) { |
2399 | 0 | ir_bitqueue_add(worklist, root->op2); |
2400 | 0 | } |
2401 | 0 | } |
2402 | |
|
2403 | 0 | MAKE_NOP(root); CLEAR_USES(root_ref); |
2404 | 0 | MAKE_NOP(start1); CLEAR_USES(start1_ref); |
2405 | 0 | MAKE_NOP(start2); CLEAR_USES(start2_ref); |
2406 | 0 | MAKE_NOP(end1); CLEAR_USES(end1_ref); |
2407 | 0 | MAKE_NOP(end2); CLEAR_USES(end2_ref); |
2408 | 0 | MAKE_NOP(insn); CLEAR_USES(ref); |
2409 | |
|
2410 | 0 | if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) { |
2411 | 0 | ir_bitqueue_add(worklist, next->op1); |
2412 | 0 | } |
2413 | |
|
2414 | 0 | return 1; |
2415 | 0 | } else { |
2416 | 0 | ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1; |
2417 | 0 | ir_ref root_ref = IR_UNUSED; |
2418 | |
|
2419 | 0 | for (i = 0; i < count; i++) { |
2420 | 0 | ir_ref end_ref, start_ref; |
2421 | 0 | ir_insn *end, *start; |
2422 | |
|
2423 | 0 | end_ref = ops[i]; |
2424 | 0 | end = &ctx->ir_base[end_ref]; |
2425 | 0 | if (end->op != IR_END) { |
2426 | 0 | return 0; |
2427 | 0 | } |
2428 | 0 | start_ref = end->op1; |
2429 | 0 | start = &ctx->ir_base[start_ref]; |
2430 | 0 | if (start->op != IR_CASE_VAL && start->op != IR_CASE_RANGE && start->op != IR_CASE_DEFAULT) { |
2431 | 0 | return 0; |
2432 | 0 | } |
2433 | 0 | if (ctx->use_lists[start_ref].count != 1) { |
2434 | 0 | ir_remove_unused_vars(ctx, start_ref, end_ref); |
2435 | 0 | } |
2436 | 0 | if (!root_ref) { |
2437 | 0 | root_ref = start->op1; |
2438 | 0 | if (ctx->use_lists[root_ref].count != count) { |
2439 | 0 | return 0; |
2440 | 0 | } |
2441 | 0 | } else if (start->op1 != root_ref) { |
2442 | 0 | return 0; |
2443 | 0 | } |
2444 | 0 | } |
2445 | | |
2446 | | /* Empty N-Diamond */ |
2447 | 0 | ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs]; |
2448 | 0 | ir_insn *next = &ctx->ir_base[next_ref]; |
2449 | 0 | ir_insn *root = &ctx->ir_base[root_ref]; |
2450 | |
|
2451 | 0 | next->op1 = root->op1; |
2452 | 0 | ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref); |
2453 | |
|
2454 | 0 | if (!IR_IS_CONST_REF(root->op2)) { |
2455 | 0 | ir_use_list_remove_all(ctx, root->op2, root_ref); |
2456 | 0 | if (ir_is_dead(ctx, root->op2)) { |
2457 | 0 | ir_bitqueue_add(worklist, root->op2); |
2458 | 0 | } |
2459 | 0 | } |
2460 | |
|
2461 | 0 | MAKE_NOP(root); CLEAR_USES(root_ref); |
2462 | |
|
2463 | 0 | for (i = 0; i < count; i++) { |
2464 | 0 | ir_ref end_ref = ops[i]; |
2465 | 0 | ir_insn *end = &ctx->ir_base[end_ref]; |
2466 | 0 | ir_ref start_ref = end->op1; |
2467 | 0 | ir_insn *start = &ctx->ir_base[start_ref]; |
2468 | |
|
2469 | 0 | MAKE_NOP(start); CLEAR_USES(start_ref); |
2470 | 0 | MAKE_NOP(end); CLEAR_USES(end_ref); |
2471 | 0 | } |
2472 | |
|
2473 | 0 | MAKE_NOP(insn); CLEAR_USES(ref); |
2474 | |
|
2475 | 0 | if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) { |
2476 | 0 | ir_bitqueue_add(worklist, next->op1); |
2477 | 0 | } |
2478 | |
|
2479 | 0 | return 1; |
2480 | 0 | } |
2481 | 0 | } |
2482 | | |
2483 | | static bool ir_is_zero(ir_ctx *ctx, ir_ref ref) |
2484 | 0 | { |
2485 | 0 | return IR_IS_CONST_REF(ref) |
2486 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[ref].op) |
2487 | 0 | && ctx->ir_base[ref].val.u32 == 0; |
2488 | 0 | } |
2489 | | |
2490 | | static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) |
2491 | 0 | { |
2492 | 0 | IR_ASSERT(insn->inputs_count == 3); |
2493 | 0 | IR_ASSERT(ctx->use_lists[merge_ref].count == 2); |
2494 | |
|
2495 | 0 | ir_ref end1_ref = merge->op1, end2_ref = merge->op2; |
2496 | 0 | ir_insn *end1 = &ctx->ir_base[end1_ref]; |
2497 | 0 | ir_insn *end2 = &ctx->ir_base[end2_ref]; |
2498 | |
|
2499 | 0 | if (end1->op == IR_END && end2->op == IR_END) { |
2500 | 0 | ir_ref start1_ref = end1->op1, start2_ref = end2->op1; |
2501 | 0 | ir_insn *start1 = &ctx->ir_base[start1_ref]; |
2502 | 0 | ir_insn *start2 = &ctx->ir_base[start2_ref]; |
2503 | |
|
2504 | 0 | if (start1->op1 == start2->op1) { |
2505 | 0 | ir_ref root_ref = start1->op1; |
2506 | 0 | ir_insn *root = &ctx->ir_base[root_ref]; |
2507 | |
|
2508 | 0 | if (root->op == IR_IF && !IR_IS_CONST_REF(root->op2) && ctx->use_lists[root->op2].count == 1) { |
2509 | 0 | ir_ref cond_ref = root->op2; |
2510 | 0 | ir_insn *cond = &ctx->ir_base[cond_ref]; |
2511 | 0 | ir_type type = insn->type; |
2512 | 0 | bool is_cmp, is_less; |
2513 | |
|
2514 | 0 | if (IR_IS_TYPE_FP(type)) { |
2515 | 0 | is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE || |
2516 | 0 | cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE); |
2517 | 0 | is_less = (cond->op == IR_LT || cond->op == IR_LE || |
2518 | 0 | cond->op == IR_ULT || cond->op == IR_ULE); |
2519 | 0 | } else if (IR_IS_TYPE_SIGNED(type)) { |
2520 | 0 | is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE); |
2521 | 0 | is_less = (cond->op == IR_LT || cond->op == IR_LE); |
2522 | 0 | } else { |
2523 | 0 | IR_ASSERT(IR_IS_TYPE_UNSIGNED(type)); |
2524 | 0 | is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE); |
2525 | 0 | is_less = (cond->op == IR_ULT || cond->op == IR_ULE); |
2526 | 0 | } |
2527 | |
|
2528 | 0 | if (is_cmp |
2529 | 0 | && ((insn->op2 == cond->op1 && insn->op3 == cond->op2) |
2530 | 0 | || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) { |
2531 | | /* MAX/MIN |
2532 | | * |
2533 | | * prev prev |
2534 | | * | LT(A, B) | |
2535 | | * | / | |
2536 | | * IF | |
2537 | | * | \ | |
2538 | | * | +-----+ | |
2539 | | * | IF_FALSE | |
2540 | | * IF_TRUE | => | |
2541 | | * | END | |
2542 | | * END / | |
2543 | | * | +---+ | |
2544 | | * | / | |
2545 | | * MERGE | |
2546 | | * | \ | |
2547 | | * | PHI(A, B) | MIN(A, B) |
2548 | | * next next |
2549 | | */ |
2550 | 0 | ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs]; |
2551 | 0 | ir_insn *next; |
2552 | |
|
2553 | 0 | if (next_ref == ref) { |
2554 | 0 | next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1]; |
2555 | 0 | } |
2556 | 0 | next = &ctx->ir_base[next_ref]; |
2557 | |
|
2558 | 0 | if (ctx->use_lists[start1_ref].count != 1) { |
2559 | 0 | ir_remove_unused_vars(ctx, start1_ref, end1_ref); |
2560 | 0 | } |
2561 | 0 | if (ctx->use_lists[start2_ref].count != 1) { |
2562 | 0 | ir_remove_unused_vars(ctx, start2_ref, end2_ref); |
2563 | 0 | } |
2564 | |
|
2565 | 0 | insn->op = ( |
2566 | 0 | (is_less ? cond->op1 : cond->op2) |
2567 | 0 | == |
2568 | 0 | ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3) |
2569 | 0 | ) ? IR_MIN : IR_MAX; |
2570 | 0 | insn->inputs_count = 2; |
2571 | 0 | if (insn->op2 > insn->op3) { |
2572 | 0 | insn->op1 = insn->op2; |
2573 | 0 | insn->op2 = insn->op3; |
2574 | 0 | } else { |
2575 | 0 | insn->op1 = insn->op3; |
2576 | 0 | } |
2577 | 0 | insn->op3 = IR_UNUSED; |
2578 | |
|
2579 | 0 | next->op1 = root->op1; |
2580 | 0 | ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref); |
2581 | 0 | if (!IR_IS_CONST_REF(insn->op1)) { |
2582 | 0 | ir_use_list_remove_all(ctx, insn->op1, cond_ref); |
2583 | 0 | } |
2584 | 0 | if (!IR_IS_CONST_REF(insn->op2)) { |
2585 | 0 | ir_use_list_remove_all(ctx, insn->op2, cond_ref); |
2586 | 0 | } |
2587 | |
|
2588 | 0 | MAKE_NOP(cond); CLEAR_USES(cond_ref); |
2589 | 0 | MAKE_NOP(root); CLEAR_USES(root_ref); |
2590 | 0 | MAKE_NOP(start1); CLEAR_USES(start1_ref); |
2591 | 0 | MAKE_NOP(start2); CLEAR_USES(start2_ref); |
2592 | 0 | MAKE_NOP(end1); CLEAR_USES(end1_ref); |
2593 | 0 | MAKE_NOP(end2); CLEAR_USES(end2_ref); |
2594 | 0 | MAKE_NOP(merge); CLEAR_USES(merge_ref); |
2595 | |
|
2596 | 0 | if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) { |
2597 | 0 | ir_bitqueue_add(worklist, next->op1); |
2598 | 0 | } |
2599 | |
|
2600 | 0 | return 1; |
2601 | 0 | } else if (is_cmp |
2602 | 0 | && ((ctx->ir_base[insn->op2].op == IR_NEG |
2603 | 0 | && ctx->use_lists[insn->op2].count == 1 |
2604 | 0 | && ctx->ir_base[insn->op2].op1 == insn->op3 |
2605 | 0 | && ((cond->op1 == insn->op3 |
2606 | 0 | && ir_is_zero(ctx, cond->op2) |
2607 | 0 | && is_less == (start1->op == IR_IF_TRUE)) |
2608 | 0 | || (cond->op2 == insn->op3 |
2609 | 0 | && ir_is_zero(ctx, cond->op1) |
2610 | 0 | && is_less != (start1->op == IR_IF_TRUE)))) |
2611 | 0 | || (ctx->ir_base[insn->op3].op == IR_NEG |
2612 | 0 | && ctx->use_lists[insn->op3].count == 1 |
2613 | 0 | && ctx->ir_base[insn->op3].op1 == insn->op2 |
2614 | 0 | && ((cond->op1 == insn->op2 |
2615 | 0 | && ir_is_zero(ctx, cond->op2) |
2616 | 0 | && is_less != (start1->op == IR_IF_TRUE)) |
2617 | 0 | || (cond->op2 == insn->op2 |
2618 | 0 | && ir_is_zero(ctx, cond->op1) |
2619 | 0 | && is_less == (start1->op == IR_IF_TRUE)))))) { |
2620 | | /* ABS |
2621 | | * |
2622 | | * prev prev |
2623 | | * | LT(A, 0) | |
2624 | | * | / | |
2625 | | * IF | |
2626 | | * | \ | |
2627 | | * | +-----+ | |
2628 | | * | IF_FALSE | |
2629 | | * IF_TRUE | => | |
2630 | | * | END | |
2631 | | * END / | |
2632 | | * | +---+ | |
2633 | | * | / | |
2634 | | * MERGE | |
2635 | | * | \ | |
2636 | | * | PHI(A, NEG(A)) | ABS(A) |
2637 | | * next next |
2638 | | */ |
2639 | 0 | ir_ref neg_ref; |
2640 | 0 | ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs]; |
2641 | 0 | ir_insn *next; |
2642 | |
|
2643 | 0 | if (next_ref == ref) { |
2644 | 0 | next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1]; |
2645 | 0 | } |
2646 | 0 | next = &ctx->ir_base[next_ref]; |
2647 | |
|
2648 | 0 | if (ctx->use_lists[start1_ref].count != 1) { |
2649 | 0 | ir_remove_unused_vars(ctx, start1_ref, end1_ref); |
2650 | 0 | } |
2651 | 0 | if (ctx->use_lists[start2_ref].count != 1) { |
2652 | 0 | ir_remove_unused_vars(ctx, start2_ref, end2_ref); |
2653 | 0 | } |
2654 | |
|
2655 | 0 | insn->op = IR_ABS; |
2656 | 0 | insn->inputs_count = 1; |
2657 | 0 | if (ctx->ir_base[insn->op2].op == IR_NEG) { |
2658 | 0 | neg_ref = insn->op2; |
2659 | 0 | insn->op1 = insn->op3; |
2660 | 0 | } else { |
2661 | 0 | neg_ref = insn->op3; |
2662 | 0 | insn->op1 = insn->op2; |
2663 | 0 | } |
2664 | 0 | insn->op2 = IR_UNUSED; |
2665 | 0 | insn->op3 = IR_UNUSED; |
2666 | |
|
2667 | 0 | next->op1 = root->op1; |
2668 | 0 | ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref); |
2669 | 0 | ir_use_list_remove_one(ctx, insn->op1, neg_ref); |
2670 | 0 | if (!IR_IS_CONST_REF(insn->op1)) { |
2671 | 0 | ir_use_list_remove_all(ctx, insn->op1, cond_ref); |
2672 | 0 | } |
2673 | |
|
2674 | 0 | MAKE_NOP(cond); CLEAR_USES(cond_ref); |
2675 | 0 | MAKE_NOP(root); CLEAR_USES(root_ref); |
2676 | 0 | MAKE_NOP(start1); CLEAR_USES(start1_ref); |
2677 | 0 | MAKE_NOP(start2); CLEAR_USES(start2_ref); |
2678 | 0 | MAKE_NOP(end1); CLEAR_USES(end1_ref); |
2679 | 0 | MAKE_NOP(end2); CLEAR_USES(end2_ref); |
2680 | 0 | MAKE_NOP(merge); CLEAR_USES(merge_ref); |
2681 | 0 | MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref); |
2682 | |
|
2683 | 0 | if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) { |
2684 | 0 | ir_bitqueue_add(worklist, next->op1); |
2685 | 0 | } |
2686 | |
|
2687 | 0 | return 1; |
2688 | | #if 0 |
2689 | | } else { |
2690 | | /* COND |
2691 | | * |
2692 | | * prev prev |
2693 | | * | cond | |
2694 | | * | / | |
2695 | | * IF | |
2696 | | * | \ | |
2697 | | * | +-----+ | |
2698 | | * | IF_FALSE | |
2699 | | * IF_TRUE | => | |
2700 | | * | END | |
2701 | | * END / | |
2702 | | * | +---+ | |
2703 | | * | / | |
2704 | | * MERGE | |
2705 | | * | \ | |
2706 | | * | PHI(A, B) | COND(cond, A, B) |
2707 | | * next next |
2708 | | */ |
2709 | | ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs]; |
2710 | | ir_insn *next; |
2711 | | |
2712 | | if (next_ref == ref) { |
2713 | | next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1]; |
2714 | | } |
2715 | | next = &ctx->ir_base[next_ref]; |
2716 | | |
2717 | | if (ctx->use_lists[start1_ref].count != 1) { |
2718 | | ir_remove_unused_vars(ctx, start1_ref, end1_ref); |
2719 | | } |
2720 | | if (ctx->use_lists[start2_ref].count != 1) { |
2721 | | ir_remove_unused_vars(ctx, start2_ref, end2_ref); |
2722 | | } |
2723 | | |
2724 | | insn->op = IR_COND; |
2725 | | insn->inputs_count = 3; |
2726 | | insn->op1 = cond_ref; |
2727 | | if (start1->op == IR_IF_FALSE) { |
2728 | | SWAP_REFS(insn->op2, insn->op3); |
2729 | | } |
2730 | | |
2731 | | next->op1 = root->op1; |
2732 | | ir_use_list_replace_one(ctx, cond_ref, root_ref, ref); |
2733 | | ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref); |
2734 | | ir_use_list_remove_all(ctx, root->op2, root_ref); |
2735 | | |
2736 | | MAKE_NOP(root); CLEAR_USES(root_ref); |
2737 | | MAKE_NOP(start1); CLEAR_USES(start1_ref); |
2738 | | MAKE_NOP(start2); CLEAR_USES(start2_ref); |
2739 | | MAKE_NOP(end1); CLEAR_USES(end1_ref); |
2740 | | MAKE_NOP(end2); CLEAR_USES(end2_ref); |
2741 | | MAKE_NOP(merge); CLEAR_USES(merge_ref); |
2742 | | |
2743 | | if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) { |
2744 | | ir_bitqueue_add(worklist, next->op1); |
2745 | | } |
2746 | | |
2747 | | return 1; |
2748 | | #endif |
2749 | 0 | } |
2750 | 0 | } |
2751 | 0 | } |
2752 | 0 | } |
2753 | | |
2754 | 0 | return 0; |
2755 | 0 | } |
2756 | | |
2757 | | static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2) |
2758 | 0 | { |
2759 | 0 | IR_ASSERT(op1->type == op2->type); |
2760 | 0 | if (IR_IS_TYPE_INT(op1->type)) { |
2761 | 0 | if (op == IR_EQ) { |
2762 | 0 | return op1->val.u64 == op2->val.u64; |
2763 | 0 | } else if (op == IR_NE) { |
2764 | 0 | return op1->val.u64 != op2->val.u64; |
2765 | 0 | } else if (op == IR_LT) { |
2766 | 0 | if (IR_IS_TYPE_SIGNED(op1->type)) { |
2767 | 0 | return op1->val.i64 < op2->val.i64; |
2768 | 0 | } else { |
2769 | 0 | return op1->val.u64 < op2->val.u64; |
2770 | 0 | } |
2771 | 0 | } else if (op == IR_GE) { |
2772 | 0 | if (IR_IS_TYPE_SIGNED(op1->type)) { |
2773 | 0 | return op1->val.i64 >= op2->val.i64; |
2774 | 0 | } else { |
2775 | 0 | return op1->val.u64 >= op2->val.u64; |
2776 | 0 | } |
2777 | 0 | } else if (op == IR_LE) { |
2778 | 0 | if (IR_IS_TYPE_SIGNED(op1->type)) { |
2779 | 0 | return op1->val.i64 <= op2->val.i64; |
2780 | 0 | } else { |
2781 | 0 | return op1->val.u64 <= op2->val.u64; |
2782 | 0 | } |
2783 | 0 | } else if (op == IR_GT) { |
2784 | 0 | if (IR_IS_TYPE_SIGNED(op1->type)) { |
2785 | 0 | return op1->val.i64 > op2->val.i64; |
2786 | 0 | } else { |
2787 | 0 | return op1->val.u64 > op2->val.u64; |
2788 | 0 | } |
2789 | 0 | } else if (op == IR_ULT) { |
2790 | 0 | return op1->val.u64 < op2->val.u64; |
2791 | 0 | } else if (op == IR_UGE) { |
2792 | 0 | return op1->val.u64 >= op2->val.u64; |
2793 | 0 | } else if (op == IR_ULE) { |
2794 | 0 | return op1->val.u64 <= op2->val.u64; |
2795 | 0 | } else if (op == IR_UGT) { |
2796 | 0 | return op1->val.u64 > op2->val.u64; |
2797 | 0 | } else { |
2798 | 0 | IR_ASSERT(0); |
2799 | 0 | return 0; |
2800 | 0 | } |
2801 | 0 | } else if (op1->type == IR_DOUBLE) { |
2802 | 0 | if (op == IR_EQ) { |
2803 | 0 | return op1->val.d == op2->val.d; |
2804 | 0 | } else if (op == IR_NE) { |
2805 | 0 | return op1->val.d != op2->val.d; |
2806 | 0 | } else if (op == IR_LT) { |
2807 | 0 | return op1->val.d < op2->val.d; |
2808 | 0 | } else if (op == IR_GE) { |
2809 | 0 | return op1->val.d >= op2->val.d; |
2810 | 0 | } else if (op == IR_LE) { |
2811 | 0 | return op1->val.d <= op2->val.d; |
2812 | 0 | } else if (op == IR_GT) { |
2813 | 0 | return op1->val.d > op2->val.d; |
2814 | 0 | } else if (op == IR_ULT) { |
2815 | 0 | return !(op1->val.d >= op2->val.d); |
2816 | 0 | } else if (op == IR_UGE) { |
2817 | 0 | return !(op1->val.d < op2->val.d); |
2818 | 0 | } else if (op == IR_ULE) { |
2819 | 0 | return !(op1->val.d > op2->val.d); |
2820 | 0 | } else if (op == IR_UGT) { |
2821 | 0 | return !(op1->val.d <= op2->val.d); |
2822 | 0 | } else { |
2823 | 0 | IR_ASSERT(0); |
2824 | 0 | return 0; |
2825 | 0 | } |
2826 | 0 | } else { |
2827 | 0 | IR_ASSERT(op1->type == IR_FLOAT); |
2828 | 0 | if (op == IR_EQ) { |
2829 | 0 | return op1->val.f == op2->val.f; |
2830 | 0 | } else if (op == IR_NE) { |
2831 | 0 | return op1->val.f != op2->val.f; |
2832 | 0 | } else if (op == IR_LT) { |
2833 | 0 | return op1->val.f < op2->val.f; |
2834 | 0 | } else if (op == IR_GE) { |
2835 | 0 | return op1->val.f >= op2->val.f; |
2836 | 0 | } else if (op == IR_LE) { |
2837 | 0 | return op1->val.f <= op2->val.f; |
2838 | 0 | } else if (op == IR_GT) { |
2839 | 0 | return op1->val.f > op2->val.f; |
2840 | 0 | } else if (op == IR_ULT) { |
2841 | 0 | return !(op1->val.f >= op2->val.f); |
2842 | 0 | } else if (op == IR_UGE) { |
2843 | 0 | return !(op1->val.f < op2->val.f); |
2844 | 0 | } else if (op == IR_ULE) { |
2845 | 0 | return !(op1->val.f > op2->val.f); |
2846 | 0 | } else if (op == IR_UGT) { |
2847 | 0 | return !(op1->val.f <= op2->val.f); |
2848 | 0 | } else { |
2849 | 0 | IR_ASSERT(0); |
2850 | 0 | return 0; |
2851 | 0 | } |
2852 | 0 | } |
2853 | 0 | } |
2854 | | |
2855 | | static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) |
2856 | 0 | { |
2857 | 0 | ir_ref cond_ref = insn->op2; |
2858 | 0 | ir_insn *cond = &ctx->ir_base[cond_ref]; |
2859 | |
|
2860 | 0 | if (cond->op == IR_PHI |
2861 | 0 | && cond->inputs_count == 3 |
2862 | 0 | && cond->op1 == insn->op1 |
2863 | 0 | && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) |
2864 | 0 | || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) { |
2865 | 0 | ir_ref merge_ref = insn->op1; |
2866 | 0 | ir_insn *merge = &ctx->ir_base[merge_ref]; |
2867 | |
|
2868 | 0 | if (ctx->use_lists[merge_ref].count == 2) { |
2869 | 0 | ir_ref end1_ref = merge->op1, end2_ref = merge->op2; |
2870 | 0 | ir_insn *end1 = &ctx->ir_base[end1_ref]; |
2871 | 0 | ir_insn *end2 = &ctx->ir_base[end2_ref]; |
2872 | |
|
2873 | 0 | if (end1->op == IR_END && end2->op == IR_END) { |
2874 | 0 | ir_ref if_true_ref, if_false_ref; |
2875 | 0 | ir_insn *if_true, *if_false; |
2876 | 0 | ir_op op = IR_IF_FALSE; |
2877 | |
|
2878 | 0 | ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref); |
2879 | |
|
2880 | 0 | if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) { |
2881 | 0 | IR_ASSERT(IR_IS_CONST_REF(cond->op3)); |
2882 | 0 | SWAP_REFS(cond->op2, cond->op3); |
2883 | 0 | SWAP_REFS(merge->op1, merge->op2); |
2884 | 0 | SWAP_REFS(end1_ref, end2_ref); |
2885 | 0 | SWAP_INSNS(end1, end2); |
2886 | 0 | } |
2887 | 0 | if (ir_const_is_true(&ctx->ir_base[cond->op2])) { |
2888 | 0 | SWAP_REFS(if_true_ref, if_false_ref); |
2889 | 0 | op = IR_IF_TRUE; |
2890 | 0 | } |
2891 | 0 | if_true = &ctx->ir_base[if_true_ref]; |
2892 | 0 | if_false = &ctx->ir_base[if_false_ref]; |
2893 | |
|
2894 | 0 | if (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)) { |
2895 | 0 | if (ir_const_is_true(&ctx->ir_base[cond->op3]) ^ (op == IR_IF_TRUE)) { |
2896 | | /* Simple IF Split |
2897 | | * |
2898 | | * | | | | |
2899 | | * | END | END |
2900 | | * END / END \ |
2901 | | * | +---+ | + |
2902 | | * | / | | |
2903 | | * MERGE | | |
2904 | | * | \ | | |
2905 | | * | PHI(false, true) | | |
2906 | | * | / | | |
2907 | | * IF => | | |
2908 | | * | \ | | |
2909 | | * | +------+ | | |
2910 | | * | IF_TRUE | BEGIN |
2911 | | * IF_FALSE | BEGIN |
2912 | | * | | |
2913 | | */ |
2914 | 0 | ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); |
2915 | 0 | ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref); |
2916 | |
|
2917 | 0 | MAKE_NOP(merge); CLEAR_USES(merge_ref); |
2918 | 0 | MAKE_NOP(cond); CLEAR_USES(cond_ref); |
2919 | 0 | MAKE_NOP(insn); CLEAR_USES(ref); |
2920 | |
|
2921 | 0 | if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); |
2922 | 0 | if_false->op1 = end1_ref; |
2923 | |
|
2924 | 0 | if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); |
2925 | 0 | if_true->op1 = end2_ref; |
2926 | |
|
2927 | 0 | ir_bitqueue_add(worklist, if_false_ref); |
2928 | 0 | ir_bitqueue_add(worklist, if_true_ref); |
2929 | |
|
2930 | 0 | return 1; |
2931 | 0 | } else { |
2932 | | /* Simple IF Split |
2933 | | * |
2934 | | * | | | | |
2935 | | * | END | END |
2936 | | * END / END | |
2937 | | * | +---+ | | |
2938 | | * | / | | |
2939 | | * MERGE | + |
2940 | | * | \ | / |
2941 | | * | PHI(false, false) | / |
2942 | | * | / | / |
2943 | | * IF => | / |
2944 | | * | \ | / |
2945 | | * | +------+ | / |
2946 | | * | IF_TRUE | / BEGIN(unreachable) |
2947 | | * IF_FALSE | MERGE |
2948 | | * | | |
2949 | | */ |
2950 | 0 | ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); |
2951 | 0 | ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref); |
2952 | |
|
2953 | 0 | MAKE_NOP(merge); CLEAR_USES(merge_ref); |
2954 | 0 | MAKE_NOP(cond); CLEAR_USES(cond_ref); |
2955 | 0 | MAKE_NOP(insn); CLEAR_USES(ref); |
2956 | |
|
2957 | 0 | if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); |
2958 | 0 | if_false->op1 = end1_ref; |
2959 | 0 | if_false->op2 = end2_ref; |
2960 | |
|
2961 | 0 | if_true->optx = IR_BEGIN; |
2962 | 0 | if_true->op1 = IR_UNUSED; |
2963 | |
|
2964 | 0 | ctx->flags2 &= ~IR_CFG_REACHABLE; |
2965 | |
|
2966 | 0 | ir_bitqueue_add(worklist, if_false_ref); |
2967 | |
|
2968 | 0 | return 1; |
2969 | 0 | } |
2970 | 0 | } |
2971 | | |
2972 | | /* Simple IF Split |
2973 | | * |
2974 | | * | | | | |
2975 | | * | END | IF(X) |
2976 | | * END / END / \ |
2977 | | * | +---+ | +--+ + |
2978 | | * | / | / | |
2979 | | * MERGE | IF_FALSE | |
2980 | | * | \ | | | |
2981 | | * | PHI(false, X) | | | |
2982 | | * | / | | | |
2983 | | * IF => | END | |
2984 | | * | \ | | | |
2985 | | * | +------+ | | | |
2986 | | * | IF_TRUE | | IF_TRUE |
2987 | | * IF_FALSE | MERGE |
2988 | | * | | |
2989 | | */ |
2990 | 0 | ir_use_list_remove_all(ctx, merge_ref, cond_ref); |
2991 | 0 | ir_use_list_remove_all(ctx, ref, if_true_ref); |
2992 | 0 | if (!IR_IS_CONST_REF(cond->op3)) { |
2993 | 0 | ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref); |
2994 | 0 | } |
2995 | 0 | ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); |
2996 | 0 | ir_use_list_add(ctx, end2_ref, if_true_ref); |
2997 | |
|
2998 | 0 | end2->optx = IR_OPTX(IR_IF, IR_VOID, 2); |
2999 | 0 | end2->op2 = cond->op3; |
3000 | 0 | ir_bitqueue_add(worklist, end2_ref); |
3001 | |
|
3002 | 0 | merge->optx = IR_OPTX(op, IR_VOID, 1); |
3003 | 0 | merge->op1 = end2_ref; |
3004 | 0 | merge->op2 = IR_UNUSED; |
3005 | |
|
3006 | 0 | MAKE_NOP(cond); |
3007 | 0 | CLEAR_USES(cond_ref); |
3008 | |
|
3009 | 0 | insn->optx = IR_OPTX(IR_END, IR_VOID, 1); |
3010 | 0 | insn->op1 = merge_ref; |
3011 | 0 | insn->op2 = IR_UNUSED; |
3012 | |
|
3013 | 0 | if_true->op1 = end2_ref; |
3014 | |
|
3015 | 0 | if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); |
3016 | 0 | if_false->op1 = end1_ref; |
3017 | 0 | if_false->op2 = ref; |
3018 | |
|
3019 | 0 | ir_bitqueue_add(worklist, if_false_ref); |
3020 | 0 | if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) { |
3021 | 0 | ir_bitqueue_add(worklist, end2->op1); |
3022 | 0 | } |
3023 | |
|
3024 | 0 | return 1; |
3025 | 0 | } |
3026 | 0 | } |
3027 | 0 | } |
3028 | | |
3029 | 0 | return 0; |
3030 | 0 | } |
3031 | | |
3032 | | static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) |
3033 | 0 | { |
3034 | 0 | ir_ref cond_ref = insn->op2; |
3035 | 0 | ir_insn *cond = &ctx->ir_base[cond_ref]; |
3036 | |
|
3037 | 0 | if (cond->op >= IR_EQ && cond->op <= IR_UGT |
3038 | 0 | && IR_IS_CONST_REF(cond->op2) |
3039 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op) |
3040 | 0 | && ctx->use_lists[insn->op2].count == 1) { |
3041 | 0 | ir_ref phi_ref = cond->op1; |
3042 | 0 | ir_insn *phi = &ctx->ir_base[phi_ref]; |
3043 | |
|
3044 | 0 | if (phi->op == IR_PHI |
3045 | 0 | && phi->inputs_count == 3 |
3046 | 0 | && phi->op1 == insn->op1 |
3047 | 0 | && ctx->use_lists[phi_ref].count == 1 |
3048 | 0 | && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) |
3049 | 0 | || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) { |
3050 | 0 | ir_ref merge_ref = insn->op1; |
3051 | 0 | ir_insn *merge = &ctx->ir_base[merge_ref]; |
3052 | |
|
3053 | 0 | if (ctx->use_lists[merge_ref].count == 2) { |
3054 | 0 | ir_ref end1_ref = merge->op1, end2_ref = merge->op2; |
3055 | 0 | ir_insn *end1 = &ctx->ir_base[end1_ref]; |
3056 | 0 | ir_insn *end2 = &ctx->ir_base[end2_ref]; |
3057 | |
|
3058 | 0 | if (end1->op == IR_END && end2->op == IR_END) { |
3059 | 0 | ir_ref if_true_ref, if_false_ref; |
3060 | 0 | ir_insn *if_true, *if_false; |
3061 | 0 | ir_op op = IR_IF_FALSE; |
3062 | |
|
3063 | 0 | ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref); |
3064 | |
|
3065 | 0 | if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) { |
3066 | 0 | IR_ASSERT(IR_IS_CONST_REF(phi->op3)); |
3067 | 0 | SWAP_REFS(phi->op2, phi->op3); |
3068 | 0 | SWAP_REFS(merge->op1, merge->op2); |
3069 | 0 | SWAP_REFS(end1_ref, end2_ref); |
3070 | 0 | SWAP_INSNS(end1, end2); |
3071 | 0 | } |
3072 | 0 | if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) { |
3073 | 0 | SWAP_REFS(if_true_ref, if_false_ref); |
3074 | 0 | op = IR_IF_TRUE; |
3075 | 0 | } |
3076 | 0 | if_true = &ctx->ir_base[if_true_ref]; |
3077 | 0 | if_false = &ctx->ir_base[if_false_ref]; |
3078 | |
|
3079 | 0 | if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) { |
3080 | 0 | if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) { |
3081 | | /* IF Split |
3082 | | * |
3083 | | * | | | | |
3084 | | * | END | END |
3085 | | * END / END | |
3086 | | * | +---+ | | |
3087 | | * | / | | |
3088 | | * MERGE | | |
3089 | | * | \ | | |
3090 | | * | PHI(C1, X) | | |
3091 | | * | | | | |
3092 | | * | CMP(_, C2) | | |
3093 | | * | / | | |
3094 | | * IF => | | |
3095 | | * | \ | | |
3096 | | * | +------+ | | |
3097 | | * | IF_TRUE | BEGIN |
3098 | | * IF_FALSE | BEGIN | |
3099 | | * | | |
3100 | | */ |
3101 | |
|
3102 | 0 | ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); |
3103 | 0 | ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref); |
3104 | |
|
3105 | 0 | MAKE_NOP(merge); CLEAR_USES(merge_ref); |
3106 | 0 | MAKE_NOP(phi); CLEAR_USES(phi_ref); |
3107 | 0 | MAKE_NOP(cond); CLEAR_USES(cond_ref); |
3108 | 0 | MAKE_NOP(insn); CLEAR_USES(ref); |
3109 | |
|
3110 | 0 | if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); |
3111 | 0 | if_false->op1 = end1_ref; |
3112 | |
|
3113 | 0 | if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1); |
3114 | 0 | if_true->op1 = end2_ref; |
3115 | |
|
3116 | 0 | ir_bitqueue_add(worklist, if_false_ref); |
3117 | 0 | ir_bitqueue_add(worklist, if_true_ref); |
3118 | |
|
3119 | 0 | return 1; |
3120 | 0 | } else { |
3121 | | /* IF Split |
3122 | | * |
3123 | | * | | | | |
3124 | | * | END | END |
3125 | | * END / END | |
3126 | | * | +---+ | | |
3127 | | * | / | | |
3128 | | * MERGE | | |
3129 | | * | \ | | |
3130 | | * | PHI(C1, X) | | |
3131 | | * | | | + |
3132 | | * | CMP(_, C2) | / |
3133 | | * | / | / |
3134 | | * IF => | / |
3135 | | * | \ | / |
3136 | | * | +------+ | / |
3137 | | * | IF_TRUE | / BEGIN(unreachable) |
3138 | | * IF_FALSE | MERGE | |
3139 | | * | | |
3140 | | */ |
3141 | |
|
3142 | 0 | ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); |
3143 | 0 | ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref); |
3144 | |
|
3145 | 0 | MAKE_NOP(merge); CLEAR_USES(merge_ref); |
3146 | 0 | MAKE_NOP(phi); CLEAR_USES(phi_ref); |
3147 | 0 | MAKE_NOP(cond); CLEAR_USES(cond_ref); |
3148 | 0 | MAKE_NOP(insn); CLEAR_USES(ref); |
3149 | |
|
3150 | 0 | if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); |
3151 | 0 | if_false->op1 = end1_ref; |
3152 | 0 | if_false->op2 = end2_ref; |
3153 | |
|
3154 | 0 | if_true->optx = IR_BEGIN; |
3155 | 0 | if_true->op1 = IR_UNUSED; |
3156 | |
|
3157 | 0 | ctx->flags2 &= ~IR_CFG_REACHABLE; |
3158 | |
|
3159 | 0 | ir_bitqueue_add(worklist, if_false_ref); |
3160 | |
|
3161 | 0 | return 1; |
3162 | 0 | } |
3163 | 0 | } else { |
3164 | | /* IF Split |
3165 | | * |
3166 | | * | | | | |
3167 | | * | END | IF<----+ |
3168 | | * END / END / \ | |
3169 | | * | +---+ | +--+ + | |
3170 | | * | / | / | | |
3171 | | * MERGE | IF_FALSE | | |
3172 | | * | \ | | | | |
3173 | | * | PHI(C1, X) | | | | |
3174 | | * | | | | | | |
3175 | | * | CMP(_, C2) | | | CMP(X, C2) |
3176 | | * | / | | | |
3177 | | * IF => | END | |
3178 | | * | \ | | | |
3179 | | * | +------+ | | | |
3180 | | * | IF_TRUE | | IF_TRUE |
3181 | | * IF_FALSE | MERGE |
3182 | | * | | |
3183 | | */ |
3184 | |
|
3185 | 0 | ir_use_list_remove_all(ctx, merge_ref, phi_ref); |
3186 | 0 | ir_use_list_remove_all(ctx, ref, if_true_ref); |
3187 | 0 | if (!IR_IS_CONST_REF(phi->op3)) { |
3188 | 0 | ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2); |
3189 | 0 | } |
3190 | 0 | ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref); |
3191 | 0 | ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref); |
3192 | 0 | ir_use_list_add(ctx, end2_ref, if_true_ref); |
3193 | |
|
3194 | 0 | end2->optx = IR_OPTX(IR_IF, IR_VOID, 2); |
3195 | 0 | end2->op2 = insn->op2; |
3196 | 0 | ir_bitqueue_add(worklist, end2_ref); |
3197 | |
|
3198 | 0 | merge->optx = IR_OPTX(op, IR_VOID, 1); |
3199 | 0 | merge->op1 = end2_ref; |
3200 | 0 | merge->op2 = IR_UNUSED; |
3201 | |
|
3202 | 0 | cond->op1 = phi->op3; |
3203 | 0 | MAKE_NOP(phi); |
3204 | 0 | CLEAR_USES(phi_ref); |
3205 | |
|
3206 | 0 | insn->optx = IR_OPTX(IR_END, IR_VOID, 1); |
3207 | 0 | insn->op1 = merge_ref; |
3208 | 0 | insn->op2 = IR_UNUSED; |
3209 | |
|
3210 | 0 | if_true->op1 = end2_ref; |
3211 | |
|
3212 | 0 | if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2); |
3213 | 0 | if_false->op1 = end1_ref; |
3214 | 0 | if_false->op2 = ref; |
3215 | |
|
3216 | 0 | ir_bitqueue_add(worklist, if_false_ref); |
3217 | 0 | if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) { |
3218 | 0 | ir_bitqueue_add(worklist, end2->op1); |
3219 | 0 | } |
3220 | |
|
3221 | 0 | return 1; |
3222 | 0 | } |
3223 | 0 | } |
3224 | 0 | } |
3225 | 0 | } |
3226 | 0 | } |
3227 | | |
3228 | 0 | return 0; |
3229 | 0 | } |
3230 | | |
3231 | | static void ir_iter_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist) |
3232 | 0 | { |
3233 | 0 | ir_use_list *use_list = &ctx->use_lists[merge_ref]; |
3234 | |
|
3235 | 0 | if (use_list->count == 1) { |
3236 | 0 | ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist); |
3237 | 0 | } else if (use_list->count == 2) { |
3238 | 0 | if (merge->inputs_count == 2) { |
3239 | 0 | ir_ref phi_ref = ctx->use_edges[use_list->refs]; |
3240 | 0 | ir_insn *phi = &ctx->ir_base[phi_ref]; |
3241 | |
|
3242 | 0 | ir_ref next_ref = ctx->use_edges[use_list->refs + 1]; |
3243 | 0 | ir_insn *next = &ctx->ir_base[next_ref]; |
3244 | |
|
3245 | 0 | if (next->op == IR_PHI) { |
3246 | 0 | SWAP_REFS(phi_ref, next_ref); |
3247 | 0 | SWAP_INSNS(phi, next); |
3248 | 0 | } |
3249 | |
|
3250 | 0 | if (phi->op == IR_PHI && next->op != IR_PHI) { |
3251 | 0 | if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) { |
3252 | 0 | if (next->op2 == phi_ref) { |
3253 | 0 | if (ir_try_split_if(ctx, next_ref, next, worklist)) { |
3254 | 0 | return; |
3255 | 0 | } |
3256 | 0 | } else { |
3257 | 0 | ir_insn *cmp = &ctx->ir_base[next->op2]; |
3258 | |
|
3259 | 0 | if (cmp->op >= IR_EQ && cmp->op <= IR_UGT |
3260 | 0 | && cmp->op1 == phi_ref |
3261 | 0 | && IR_IS_CONST_REF(cmp->op2) |
3262 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op) |
3263 | 0 | && ctx->use_lists[next->op2].count == 1) { |
3264 | 0 | if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) { |
3265 | 0 | return; |
3266 | 0 | } |
3267 | 0 | } |
3268 | 0 | } |
3269 | 0 | } |
3270 | 0 | ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist); |
3271 | 0 | } |
3272 | 0 | } |
3273 | 0 | } |
3274 | 0 | } |
3275 | | |
3276 | | static ir_ref ir_find_ext_use(ir_ctx *ctx, ir_ref ref) |
3277 | 0 | { |
3278 | 0 | ir_use_list *use_list = &ctx->use_lists[ref]; |
3279 | 0 | ir_ref *p, n, use; |
3280 | 0 | ir_insn *use_insn; |
3281 | |
|
3282 | 0 | for (p = &ctx->use_edges[use_list->refs], n = use_list->count; n > 0; p++, n--) { |
3283 | 0 | use = *p; |
3284 | 0 | use_insn = &ctx->ir_base[use]; |
3285 | 0 | if (use_insn->op == IR_SEXT || use_insn->op == IR_ZEXT) { |
3286 | 0 | return use; |
3287 | 0 | } |
3288 | 0 | } |
3289 | 0 | return IR_UNUSED; |
3290 | 0 | } |
3291 | | |
3292 | | static void ir_iter_optimize_induction_var(ir_ctx *ctx, ir_ref phi_ref, ir_ref op_ref, ir_bitqueue *worklist) |
3293 | 0 | { |
3294 | 0 | ir_ref ext_ref; |
3295 | |
|
3296 | 0 | ext_ref = ir_find_ext_use(ctx, phi_ref); |
3297 | 0 | if (!ext_ref) { |
3298 | 0 | ext_ref = ir_find_ext_use(ctx, op_ref); |
3299 | 0 | } |
3300 | 0 | if (ext_ref) { |
3301 | 0 | ir_try_promote_induction_var_ext(ctx, ext_ref, phi_ref, op_ref, worklist); |
3302 | 0 | } |
3303 | 0 | } |
3304 | | |
3305 | | static void ir_iter_optimize_loop(ir_ctx *ctx, ir_ref loop_ref, ir_insn *loop, ir_bitqueue *worklist) |
3306 | 0 | { |
3307 | 0 | ir_ref n; |
3308 | |
|
3309 | 0 | if (loop->inputs_count != 2 || ctx->use_lists[loop_ref].count <= 1) { |
3310 | 0 | return; |
3311 | 0 | } |
3312 | | |
3313 | | /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */ |
3314 | 0 | for (n = 0; n < ctx->use_lists[loop_ref].count; n++) { |
3315 | | /* "use_lists" may be reallocated by ir_ext_ref() */ |
3316 | 0 | ir_ref use = ctx->use_edges[ctx->use_lists[loop_ref].refs + n]; |
3317 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
3318 | |
|
3319 | 0 | if (use_insn->op == IR_PHI) { |
3320 | 0 | ir_ref op_ref = use_insn->op3; |
3321 | 0 | ir_insn *op_insn = &ctx->ir_base[op_ref]; |
3322 | |
|
3323 | 0 | if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || op_insn->op == IR_MUL) { |
3324 | 0 | if (op_insn->op1 == use) { |
3325 | 0 | if (ir_is_loop_invariant(ctx, op_insn->op2, loop_ref)) { |
3326 | 0 | ir_iter_optimize_induction_var(ctx, use, op_ref, worklist); |
3327 | 0 | } |
3328 | 0 | } else if (op_insn->op2 == use) { |
3329 | 0 | if (ir_is_loop_invariant(ctx, op_insn->op1, loop_ref)) { |
3330 | 0 | ir_iter_optimize_induction_var(ctx, use, op_ref, worklist); |
3331 | 0 | } |
3332 | 0 | } |
3333 | 0 | } |
3334 | 0 | } |
3335 | 0 | } |
3336 | 0 | } |
3337 | | |
3338 | | static ir_ref ir_iter_optimize_condition(ir_ctx *ctx, ir_ref control, ir_ref condition, bool *swap) |
3339 | 0 | { |
3340 | 0 | ir_insn *condition_insn = &ctx->ir_base[condition]; |
3341 | |
|
3342 | 0 | while ((condition_insn->op == IR_BITCAST |
3343 | 0 | || condition_insn->op == IR_ZEXT |
3344 | 0 | || condition_insn->op == IR_SEXT) |
3345 | 0 | && ctx->use_lists[condition].count == 1) { |
3346 | 0 | condition = condition_insn->op1; |
3347 | 0 | condition_insn = &ctx->ir_base[condition]; |
3348 | 0 | } |
3349 | |
|
3350 | 0 | if (condition_insn->opt == IR_OPT(IR_NOT, IR_BOOL)) { |
3351 | 0 | *swap = 1; |
3352 | 0 | condition = condition_insn->op1; |
3353 | 0 | condition_insn = &ctx->ir_base[condition]; |
3354 | 0 | } |
3355 | |
|
3356 | 0 | if (condition_insn->op == IR_NE && IR_IS_CONST_REF(condition_insn->op2)) { |
3357 | 0 | ir_insn *val_insn = &ctx->ir_base[condition_insn->op2]; |
3358 | |
|
3359 | 0 | if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) { |
3360 | 0 | condition = condition_insn->op1; |
3361 | 0 | condition_insn = &ctx->ir_base[condition]; |
3362 | 0 | } |
3363 | 0 | } else if (condition_insn->op == IR_EQ && IR_IS_CONST_REF(condition_insn->op2)) { |
3364 | 0 | ir_insn *val_insn = &ctx->ir_base[condition_insn->op2]; |
3365 | |
|
3366 | 0 | if (condition_insn->op2 == IR_TRUE) { |
3367 | 0 | condition = condition_insn->op1; |
3368 | 0 | condition_insn = &ctx->ir_base[condition]; |
3369 | 0 | } else if (IR_IS_TYPE_INT(val_insn->type) && val_insn->val.u64 == 0) { |
3370 | 0 | condition = condition_insn->op1; |
3371 | 0 | condition_insn = &ctx->ir_base[condition]; |
3372 | 0 | *swap = !*swap; |
3373 | 0 | } |
3374 | 0 | } |
3375 | |
|
3376 | 0 | while ((condition_insn->op == IR_BITCAST |
3377 | 0 | || condition_insn->op == IR_ZEXT |
3378 | 0 | || condition_insn->op == IR_SEXT) |
3379 | 0 | && ctx->use_lists[condition].count == 1) { |
3380 | 0 | condition = condition_insn->op1; |
3381 | 0 | condition_insn = &ctx->ir_base[condition]; |
3382 | 0 | } |
3383 | |
|
3384 | 0 | if (condition_insn->op == IR_ALLOCA || condition_insn->op == IR_VADDR) { |
3385 | 0 | return IR_TRUE; |
3386 | 0 | } |
3387 | | |
3388 | 0 | if (!IR_IS_CONST_REF(condition) && ctx->use_lists[condition].count > 1) { |
3389 | 0 | condition = ir_check_dominating_predicates(ctx, control, condition); |
3390 | 0 | } |
3391 | |
|
3392 | 0 | return condition; |
3393 | 0 | } |
3394 | | |
3395 | | static void ir_iter_optimize_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) |
3396 | 0 | { |
3397 | 0 | bool swap = 0; |
3398 | 0 | ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap); |
3399 | |
|
3400 | 0 | if (swap) { |
3401 | 0 | ir_use_list *use_list = &ctx->use_lists[ref]; |
3402 | 0 | ir_ref *p, use; |
3403 | |
|
3404 | 0 | IR_ASSERT(use_list->count == 2); |
3405 | 0 | p = ctx->use_edges + use_list->refs; |
3406 | 0 | use = *p; |
3407 | 0 | if (ctx->ir_base[use].op == IR_IF_TRUE) { |
3408 | 0 | ctx->ir_base[use].op = IR_IF_FALSE; |
3409 | 0 | use = *(p+1); |
3410 | 0 | ctx->ir_base[use].op = IR_IF_TRUE; |
3411 | 0 | } else { |
3412 | 0 | ctx->ir_base[use].op = IR_IF_TRUE; |
3413 | 0 | use = *(p+1); |
3414 | 0 | ctx->ir_base[use].op = IR_IF_FALSE; |
3415 | 0 | } |
3416 | 0 | } |
3417 | |
|
3418 | 0 | if (IR_IS_CONST_REF(condition)) { |
3419 | | /* |
3420 | | * | | |
3421 | | * IF(TRUE) => END |
3422 | | * | \ | |
3423 | | * | +------+ | |
3424 | | * | IF_TRUE | BEGIN(unreachable) |
3425 | | * IF_FALSE | BEGIN |
3426 | | * | | |
3427 | | */ |
3428 | 0 | ir_ref if_true_ref, if_false_ref; |
3429 | 0 | ir_insn *if_true, *if_false; |
3430 | |
|
3431 | 0 | insn->optx = IR_OPTX(IR_END, IR_VOID, 1); |
3432 | 0 | if (!IR_IS_CONST_REF(insn->op2)) { |
3433 | 0 | ir_use_list_remove_one(ctx, insn->op2, ref); |
3434 | 0 | ir_bitqueue_add(worklist, insn->op2); |
3435 | 0 | } |
3436 | 0 | insn->op2 = IR_UNUSED; |
3437 | |
|
3438 | 0 | ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref); |
3439 | 0 | if_true = &ctx->ir_base[if_true_ref]; |
3440 | 0 | if_false = &ctx->ir_base[if_false_ref]; |
3441 | 0 | if_true->op = IR_BEGIN; |
3442 | 0 | if_false->op = IR_BEGIN; |
3443 | 0 | if (ir_ref_is_true(ctx, condition)) { |
3444 | 0 | if_false->op1 = IR_UNUSED; |
3445 | 0 | ir_use_list_remove_one(ctx, ref, if_false_ref); |
3446 | 0 | ir_bitqueue_add(worklist, if_true_ref); |
3447 | 0 | } else { |
3448 | 0 | if_true->op1 = IR_UNUSED; |
3449 | 0 | ir_use_list_remove_one(ctx, ref, if_true_ref); |
3450 | 0 | ir_bitqueue_add(worklist, if_false_ref); |
3451 | 0 | } |
3452 | 0 | ctx->flags2 &= ~IR_CFG_REACHABLE; |
3453 | 0 | } else if (insn->op2 != condition) { |
3454 | 0 | ir_iter_update_op(ctx, ref, 2, condition, worklist); |
3455 | 0 | } |
3456 | 0 | } |
3457 | | |
3458 | | static void ir_iter_optimize_guard(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) |
3459 | 0 | { |
3460 | 0 | bool swap = 0; |
3461 | 0 | ir_ref condition = ir_iter_optimize_condition(ctx, insn->op1, insn->op2, &swap); |
3462 | |
|
3463 | 0 | if (swap) { |
3464 | 0 | if (insn->op == IR_GUARD) { |
3465 | 0 | insn->op = IR_GUARD_NOT; |
3466 | 0 | } else { |
3467 | 0 | insn->op = IR_GUARD; |
3468 | 0 | } |
3469 | 0 | } |
3470 | |
|
3471 | 0 | if (IR_IS_CONST_REF(condition)) { |
3472 | 0 | if (insn->op == IR_GUARD) { |
3473 | 0 | if (ir_ref_is_true(ctx, condition)) { |
3474 | 0 | ir_ref prev, next; |
3475 | |
|
3476 | 0 | remove_guard: |
3477 | 0 | prev = insn->op1; |
3478 | 0 | next = ir_next_control(ctx, ref); |
3479 | 0 | ctx->ir_base[next].op1 = prev; |
3480 | 0 | ir_use_list_remove_one(ctx, ref, next); |
3481 | 0 | ir_use_list_replace_one(ctx, prev, ref, next); |
3482 | 0 | insn->op1 = IR_UNUSED; |
3483 | |
|
3484 | 0 | if (!IR_IS_CONST_REF(insn->op2)) { |
3485 | 0 | ir_use_list_remove_one(ctx, insn->op2, ref); |
3486 | 0 | if (ir_is_dead(ctx, insn->op2)) { |
3487 | | /* schedule DCE */ |
3488 | 0 | ir_bitqueue_add(worklist, insn->op2); |
3489 | 0 | } |
3490 | 0 | } |
3491 | |
|
3492 | 0 | if (insn->op3) { |
3493 | | /* SNAPSHOT */ |
3494 | 0 | ir_iter_remove_insn(ctx, insn->op3, worklist); |
3495 | 0 | } |
3496 | |
|
3497 | 0 | MAKE_NOP(insn); |
3498 | 0 | return; |
3499 | 0 | } else { |
3500 | 0 | condition = IR_FALSE; |
3501 | 0 | } |
3502 | 0 | } else { |
3503 | 0 | if (ir_ref_is_true(ctx, condition)) { |
3504 | 0 | condition = IR_TRUE; |
3505 | 0 | } else { |
3506 | 0 | goto remove_guard; |
3507 | 0 | } |
3508 | 0 | } |
3509 | 0 | } |
3510 | | |
3511 | 0 | if (insn->op2 != condition) { |
3512 | 0 | ir_iter_update_op(ctx, ref, 2, condition, worklist); |
3513 | 0 | } |
3514 | 0 | } |
3515 | | |
3516 | | void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) |
3517 | 0 | { |
3518 | 0 | ir_ref i, val; |
3519 | 0 | ir_insn *insn; |
3520 | |
|
3521 | 0 | while ((i = ir_bitqueue_pop(worklist)) >= 0) { |
3522 | 0 | insn = &ctx->ir_base[i]; |
3523 | 0 | if (IR_IS_FOLDABLE_OP(insn->op)) { |
3524 | 0 | if (ctx->use_lists[i].count == 0) { |
3525 | 0 | if (insn->op == IR_PHI) { |
3526 | 0 | ir_bitqueue_add(worklist, insn->op1); |
3527 | 0 | } |
3528 | 0 | ir_iter_remove_insn(ctx, i, worklist); |
3529 | 0 | } else { |
3530 | 0 | insn = &ctx->ir_base[i]; |
3531 | 0 | switch (insn->op) { |
3532 | 0 | case IR_FP2FP: |
3533 | 0 | if (insn->type == IR_FLOAT) { |
3534 | 0 | if (ir_may_promote_d2f(ctx, insn->op1)) { |
3535 | 0 | ir_ref ref = ir_promote_d2f(ctx, insn->op1, i, worklist); |
3536 | 0 | insn->op1 = ref; |
3537 | 0 | ir_iter_replace_insn(ctx, i, ref, worklist); |
3538 | 0 | break; |
3539 | 0 | } |
3540 | 0 | } else { |
3541 | 0 | if (ir_may_promote_f2d(ctx, insn->op1)) { |
3542 | 0 | ir_ref ref = ir_promote_f2d(ctx, insn->op1, i, worklist); |
3543 | 0 | insn->op1 = ref; |
3544 | 0 | ir_iter_replace_insn(ctx, i, ref, worklist); |
3545 | 0 | break; |
3546 | 0 | } |
3547 | 0 | } |
3548 | 0 | goto folding; |
3549 | 0 | case IR_FP2INT: |
3550 | 0 | if (ctx->ir_base[insn->op1].type == IR_DOUBLE) { |
3551 | 0 | if (ir_may_promote_d2f(ctx, insn->op1)) { |
3552 | 0 | insn->op1 = ir_promote_d2f(ctx, insn->op1, i, worklist); |
3553 | 0 | } |
3554 | 0 | } else { |
3555 | 0 | if (ir_may_promote_f2d(ctx, insn->op1)) { |
3556 | 0 | insn->op1 = ir_promote_f2d(ctx, insn->op1, i, worklist); |
3557 | 0 | } |
3558 | 0 | } |
3559 | 0 | goto folding; |
3560 | 0 | case IR_TRUNC: |
3561 | 0 | if (ir_may_promote_trunc(ctx, insn->type, insn->op1)) { |
3562 | 0 | ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i, worklist); |
3563 | 0 | insn->op1 = ref; |
3564 | 0 | ir_iter_replace_insn(ctx, i, ref, worklist); |
3565 | 0 | break; |
3566 | 0 | } |
3567 | 0 | goto folding; |
3568 | 0 | case IR_SEXT: |
3569 | 0 | case IR_ZEXT: |
3570 | 0 | if (ir_try_promote_ext(ctx, i, insn, worklist)) { |
3571 | 0 | break; |
3572 | 0 | } |
3573 | 0 | goto folding; |
3574 | 0 | case IR_PHI: |
3575 | 0 | break; |
3576 | 0 | default: |
3577 | 0 | folding: |
3578 | 0 | ir_iter_fold(ctx, i, worklist); |
3579 | 0 | break; |
3580 | 0 | } |
3581 | 0 | } |
3582 | 0 | } else if (ir_op_flags[insn->op] & IR_OP_FLAG_BB_START) { |
3583 | 0 | if (!(ctx->flags & IR_OPT_CFG)) { |
3584 | | /* pass */ |
3585 | 0 | } else if (insn->op == IR_BEGIN) { |
3586 | 0 | if (insn->op1 && ctx->ir_base[insn->op1].op == IR_END) { |
3587 | 0 | ir_merge_blocks(ctx, insn->op1, i, worklist); |
3588 | 0 | } |
3589 | 0 | } else if (insn->op == IR_MERGE) { |
3590 | 0 | ir_iter_optimize_merge(ctx, i, insn, worklist); |
3591 | 0 | } else if (insn->op == IR_LOOP_BEGIN) { |
3592 | 0 | ir_iter_optimize_loop(ctx, i, insn, worklist); |
3593 | 0 | } |
3594 | 0 | } else if (ir_is_dead_load(ctx, i)) { |
3595 | 0 | ir_ref next; |
3596 | | |
3597 | | /* remove LOAD from double linked control list */ |
3598 | 0 | remove_mem_insn: |
3599 | 0 | next = ctx->use_edges[ctx->use_lists[i].refs]; |
3600 | 0 | IR_ASSERT(ctx->use_lists[i].count == 1); |
3601 | 0 | ctx->ir_base[next].op1 = insn->op1; |
3602 | 0 | ir_use_list_replace_one(ctx, insn->op1, i, next); |
3603 | 0 | insn->op1 = IR_UNUSED; |
3604 | 0 | ir_iter_remove_insn(ctx, i, worklist); |
3605 | 0 | } else if (insn->op == IR_LOAD) { |
3606 | 0 | val = ir_find_aliasing_load(ctx, insn->op1, insn->type, insn->op2); |
3607 | 0 | if (val) { |
3608 | 0 | ir_insn *val_insn; |
3609 | 0 | ir_ref prev, next; |
3610 | |
|
3611 | 0 | remove_aliased_load: |
3612 | 0 | prev = insn->op1; |
3613 | 0 | next = ir_next_control(ctx, i); |
3614 | 0 | ctx->ir_base[next].op1 = prev; |
3615 | 0 | ir_use_list_remove_one(ctx, i, next); |
3616 | 0 | ir_use_list_replace_one(ctx, prev, i, next); |
3617 | 0 | insn->op1 = IR_UNUSED; |
3618 | |
|
3619 | 0 | val_insn = &ctx->ir_base[val]; |
3620 | 0 | if (val_insn->type == insn->type) { |
3621 | 0 | ir_iter_replace_insn(ctx, i, val, worklist); |
3622 | 0 | } else { |
3623 | 0 | if (!IR_IS_CONST_REF(insn->op2)) { |
3624 | 0 | ir_use_list_remove_one(ctx, insn->op2, i); |
3625 | 0 | if (ir_is_dead(ctx, insn->op2)) { |
3626 | | /* schedule DCE */ |
3627 | 0 | ir_bitqueue_add(worklist, insn->op2); |
3628 | 0 | } |
3629 | 0 | } |
3630 | 0 | if (!IR_IS_CONST_REF(val)) { |
3631 | 0 | ir_use_list_add(ctx, val, i); |
3632 | 0 | } |
3633 | 0 | if (ir_type_size[val_insn->type] == ir_type_size[insn->type]) { |
3634 | | /* load forwarding with bitcast (L2L) */ |
3635 | 0 | insn->optx = IR_OPTX(IR_BITCAST, insn->type, 1); |
3636 | 0 | } else { |
3637 | | /* partial load forwarding (L2L) */ |
3638 | 0 | insn->optx = IR_OPTX(IR_TRUNC, insn->type, 1); |
3639 | 0 | } |
3640 | 0 | insn->op1 = val; |
3641 | 0 | insn->op2 = IR_UNUSED; |
3642 | 0 | ir_bitqueue_add(worklist, i); |
3643 | 0 | } |
3644 | 0 | } |
3645 | 0 | } else if (insn->op == IR_STORE) { |
3646 | 0 | if (ir_find_aliasing_store(ctx, insn->op1, insn->op2, insn->op3)) { |
3647 | 0 | goto remove_mem_insn; |
3648 | 0 | } else { |
3649 | 0 | ir_insn *val_insn; |
3650 | |
|
3651 | 0 | remove_bitcast: |
3652 | 0 | val = insn->op3; |
3653 | 0 | val_insn = &ctx->ir_base[val]; |
3654 | 0 | if (val_insn->op == IR_BITCAST |
3655 | 0 | && ir_type_size[val_insn->type] == ir_type_size[ctx->ir_base[val_insn->op1].type]) { |
3656 | 0 | insn->op3 = val_insn->op1; |
3657 | 0 | ir_use_list_remove_one(ctx, val, i); |
3658 | 0 | if (ctx->use_lists[val].count == 0) { |
3659 | 0 | if (!IR_IS_CONST_REF(val_insn->op1)) { |
3660 | 0 | ir_use_list_replace_one(ctx, val_insn->op1, val, i); |
3661 | 0 | } |
3662 | 0 | ir_iter_remove_insn(ctx, val, worklist); |
3663 | 0 | } else { |
3664 | 0 | if (!IR_IS_CONST_REF(val_insn->op1)) { |
3665 | 0 | ir_use_list_add(ctx, val_insn->op1, i); |
3666 | 0 | } |
3667 | 0 | } |
3668 | 0 | } |
3669 | 0 | } |
3670 | 0 | } else if (insn->op == IR_VLOAD) { |
3671 | 0 | val = ir_find_aliasing_vload(ctx, insn->op1, insn->type, insn->op2); |
3672 | 0 | if (val) { |
3673 | 0 | goto remove_aliased_load; |
3674 | 0 | } |
3675 | 0 | } else if (insn->op == IR_VSTORE) { |
3676 | 0 | if (ir_find_aliasing_vstore(ctx, insn->op1, insn->op2, insn->op3)) { |
3677 | 0 | goto remove_mem_insn; |
3678 | 0 | } else { |
3679 | 0 | goto remove_bitcast; |
3680 | 0 | } |
3681 | 0 | } else if (insn->op == IR_IF) { |
3682 | 0 | ir_iter_optimize_if(ctx, i, insn, worklist); |
3683 | 0 | } else if (insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { |
3684 | 0 | ir_iter_optimize_guard(ctx, i, insn, worklist); |
3685 | 0 | } |
3686 | 0 | } |
3687 | 0 | } |
3688 | | |
3689 | | int ir_sccp(ir_ctx *ctx) |
3690 | 0 | { |
3691 | 0 | ir_bitqueue sccp_worklist, iter_worklist; |
3692 | 0 | ir_insn *_values; |
3693 | |
|
3694 | 0 | ir_bitqueue_init(&iter_worklist, ctx->insns_count); |
3695 | 0 | ir_bitqueue_init(&sccp_worklist, ctx->insns_count); |
3696 | 0 | _values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn)); |
3697 | |
|
3698 | 0 | ctx->flags2 |= IR_OPT_IN_SCCP; |
3699 | 0 | ir_sccp_analyze(ctx, _values, &sccp_worklist, &iter_worklist); |
3700 | 0 | ir_sccp_transform(ctx, _values, &sccp_worklist, &iter_worklist); |
3701 | 0 | ctx->flags2 &= ~IR_OPT_IN_SCCP; |
3702 | |
|
3703 | 0 | ir_mem_free(_values); |
3704 | 0 | ir_bitqueue_free(&sccp_worklist); |
3705 | |
|
3706 | 0 | ctx->flags2 |= IR_CFG_REACHABLE; |
3707 | |
|
3708 | 0 | ir_iter_opt(ctx, &iter_worklist); |
3709 | |
|
3710 | 0 | ir_bitqueue_free(&iter_worklist); |
3711 | |
|
3712 | 0 | return 1; |
3713 | 0 | } |