/src/php-src/ext/opcache/jit/ir/ir_gcm.c
Line | Count | Source |
1 | | /* |
2 | | * IR - Lightweight JIT Compilation Framework |
3 | | * (GCM - Global Code Motion and Scheduler) |
4 | | * Copyright (C) 2022 Zend by Perforce. |
5 | | * Authors: Dmitry Stogov <dmitry@php.net> |
6 | | * |
7 | | * The GCM algorithm is based on Cliff Click's publication |
8 | | * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95. |
9 | | */ |
10 | | |
11 | | #include "ir.h" |
12 | | #include "ir_private.h" |
13 | | |
14 | 0 | #define IR_GCM_IS_SCHEDULED_EARLY(b) (((int32_t)(b)) < 0) |
15 | 0 | #define IR_GCM_EARLY_BLOCK(b) ((uint32_t)-((int32_t)(b))) |
16 | | |
17 | | #define IR_GCM_SPLIT 1 |
18 | | #define IR_SCHEDULE_SWAP_OPS 1 |
19 | | |
20 | | static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late) |
21 | 0 | { |
22 | 0 | ir_ref n, *p, input; |
23 | 0 | ir_insn *insn; |
24 | 0 | uint32_t dom_depth; |
25 | 0 | uint32_t b, result; |
26 | |
|
27 | 0 | insn = &ctx->ir_base[ref]; |
28 | |
|
29 | 0 | IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR); |
30 | 0 | IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI); |
31 | |
|
32 | 0 | result = 1; |
33 | 0 | dom_depth = 0; |
34 | |
|
35 | 0 | n = insn->inputs_count; |
36 | 0 | for (p = insn->ops + 1; n > 0; p++, n--) { |
37 | 0 | input = *p; |
38 | 0 | if (input > 0) { |
39 | 0 | b = ctx->cfg_map[input]; |
40 | 0 | if (IR_GCM_IS_SCHEDULED_EARLY(b)) { |
41 | 0 | b = IR_GCM_EARLY_BLOCK(b); |
42 | 0 | } else if (!b) { |
43 | 0 | b = ir_gcm_schedule_early(ctx, input, queue_late); |
44 | 0 | } |
45 | 0 | if (dom_depth < ctx->cfg_blocks[b].dom_depth) { |
46 | 0 | dom_depth = ctx->cfg_blocks[b].dom_depth; |
47 | 0 | result = b; |
48 | 0 | } |
49 | 0 | } |
50 | 0 | } |
51 | |
|
52 | 0 | ctx->cfg_map[ref] = IR_GCM_EARLY_BLOCK(result); |
53 | 0 | ir_list_push_unchecked(queue_late, ref); |
54 | 0 | return result; |
55 | 0 | } |
56 | | |
57 | | /* Last Common Ancestor */ |
58 | | static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2) |
59 | 0 | { |
60 | 0 | uint32_t dom_depth; |
61 | |
|
62 | 0 | dom_depth = ctx->cfg_blocks[b2].dom_depth; |
63 | 0 | while (ctx->cfg_blocks[b1].dom_depth > dom_depth) { |
64 | 0 | b1 = ctx->cfg_blocks[b1].dom_parent; |
65 | 0 | } |
66 | 0 | dom_depth = ctx->cfg_blocks[b1].dom_depth; |
67 | 0 | while (ctx->cfg_blocks[b2].dom_depth > dom_depth) { |
68 | 0 | b2 = ctx->cfg_blocks[b2].dom_parent; |
69 | 0 | } |
70 | 0 | while (b1 != b2) { |
71 | 0 | b1 = ctx->cfg_blocks[b1].dom_parent; |
72 | 0 | b2 = ctx->cfg_blocks[b2].dom_parent; |
73 | 0 | } |
74 | 0 | return b2; |
75 | 0 | } |
76 | | |
77 | | static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca) |
78 | 0 | { |
79 | 0 | ir_block *bb = &ctx->cfg_blocks[lca]; |
80 | 0 | uint32_t loop_depth = bb->loop_depth; |
81 | 0 | uint32_t flags, best, b; |
82 | |
|
83 | 0 | if (!loop_depth) { |
84 | 0 | return lca; |
85 | 0 | } |
86 | | |
87 | | #if 0 /* This is not necessary anymore. Conditions may be fused with IF across BBs. */ |
88 | | if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) { |
89 | | ir_use_list *use_list = &ctx->use_lists[ref]; |
90 | | |
91 | | if (use_list->count == 1) { |
92 | | ir_ref use = ctx->use_edges[use_list->refs]; |
93 | | ir_insn *insn = &ctx->ir_base[use]; |
94 | | if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { |
95 | | /* Don't hoist invariant comparison */ |
96 | | return lca; |
97 | | } |
98 | | } |
99 | | } |
100 | | #endif |
101 | | |
102 | 0 | flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; |
103 | 0 | if ((flags & IR_BB_LOOP_WITH_ENTRY) |
104 | 0 | && !(ctx->binding && ir_binding_find(ctx, ref))) { |
105 | | /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */ |
106 | 0 | return lca; |
107 | 0 | } |
108 | | |
109 | 0 | best = b = lca; |
110 | 0 | do { |
111 | 0 | b = bb->dom_parent; |
112 | 0 | bb = &ctx->cfg_blocks[b]; |
113 | 0 | if (bb->loop_depth < loop_depth) { |
114 | 0 | if (!bb->loop_depth) { |
115 | 0 | #if 1 |
116 | | /* Avoid LICM if LOOP doesn't have a pre-header block */ |
117 | 0 | ir_block *loop_bb = &ctx->cfg_blocks[best]; |
118 | |
|
119 | 0 | if (!(loop_bb->flags & IR_BB_LOOP_HEADER)) { |
120 | 0 | loop_bb = &ctx->cfg_blocks[loop_bb->loop_header]; |
121 | 0 | } |
122 | 0 | if (loop_bb->predecessors_count > 2) { |
123 | 0 | int n = loop_bb->predecessors_count; |
124 | 0 | uint32_t *p = ctx->cfg_edges + loop_bb->predecessors; |
125 | |
|
126 | 0 | while (n && *p != b) { |
127 | 0 | n--; p++; |
128 | 0 | } |
129 | 0 | if (!n) { |
130 | 0 | break; |
131 | 0 | } |
132 | 0 | } |
133 | 0 | #endif |
134 | 0 | best = b; |
135 | 0 | break; |
136 | 0 | } |
137 | 0 | flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; |
138 | 0 | if ((flags & IR_BB_LOOP_WITH_ENTRY) |
139 | 0 | && !(ctx->binding && ir_binding_find(ctx, ref))) { |
140 | 0 | break; |
141 | 0 | } |
142 | 0 | loop_depth = bb->loop_depth; |
143 | 0 | best = b; |
144 | 0 | } |
145 | 0 | } while (b != ctx->cfg_map[ref]); |
146 | |
|
147 | 0 | return best; |
148 | 0 | } |
149 | | |
150 | | #if IR_GCM_SPLIT |
151 | | /* Partially Dead Code Elimination through splitting the node and sinking the clones |
152 | | * |
153 | | * This code is based on the Benedikt Meurer's idea first implemented in V8. |
154 | | * See: https://codereview.chromium.org/899433005 |
155 | | */ |
156 | | |
157 | | typedef struct _ir_gcm_split_data { |
158 | | ir_sparse_set totally_useful; |
159 | | ir_list worklist; |
160 | | } ir_gcm_split_data; |
161 | | |
162 | | static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data) |
163 | 0 | { |
164 | 0 | uint32_t *p, i, n = bb->predecessors_count; |
165 | |
|
166 | 0 | IR_ASSERT(n > 0); |
167 | 0 | p = ctx->cfg_edges + bb->predecessors; |
168 | 0 | do { |
169 | 0 | i = *p; |
170 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
171 | 0 | ir_list_push(&data->worklist, i); |
172 | 0 | } |
173 | 0 | p++; |
174 | 0 | n--; |
175 | 0 | } while (n > 0); |
176 | 0 | } |
177 | | |
178 | | static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data) |
179 | 0 | { |
180 | 0 | uint32_t *p, i, n = bb->successors_count; |
181 | |
|
182 | 0 | if (n <= 1) { |
183 | 0 | IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors])); |
184 | 0 | return 1; |
185 | 0 | } |
186 | | |
187 | 0 | p = ctx->cfg_edges + bb->successors; |
188 | 0 | do { |
189 | 0 | i = *p; |
190 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
191 | 0 | return 0; |
192 | 0 | } |
193 | 0 | p++; |
194 | 0 | n--; |
195 | 0 | } while (n > 0); |
196 | | |
197 | 0 | return 1; |
198 | 0 | } |
199 | | |
200 | | static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) |
201 | 0 | { |
202 | 0 | ir_use_list *use_list; |
203 | 0 | ir_insn *insn; |
204 | 0 | ir_ref n, *p, use; |
205 | 0 | uint32_t i; |
206 | 0 | ir_gcm_split_data *data = ctx->data; |
207 | |
|
208 | 0 | IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count); |
209 | | |
210 | | /* 1. Find a set of blocks where the node is TOTALLY_USEFUL (not PARTIALLY_DEAD) |
211 | | * 1.1. Collect the blocks where the node is really USED. |
212 | | */ |
213 | 0 | ir_sparse_set_clear(&data->totally_useful); |
214 | |
|
215 | 0 | use_list = &ctx->use_lists[ref]; |
216 | 0 | n = use_list->count; |
217 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
218 | 0 | use = *p; |
219 | 0 | insn = &ctx->ir_base[use]; |
220 | 0 | if (insn->op == IR_PHI) { |
221 | 0 | ir_ref *p = insn->ops + 2; /* PHI data inputs */ |
222 | 0 | ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ |
223 | 0 | ir_ref n = insn->inputs_count - 1; |
224 | |
|
225 | 0 | for (;n > 0; p++, q++, n--) { |
226 | 0 | if (*p == ref) { |
227 | 0 | i = ctx->cfg_map[*q]; |
228 | 0 | IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count); |
229 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
230 | 0 | if (i == b) return 0; /* node is totally-useful in the scheduled block */ |
231 | 0 | ir_sparse_set_add(&data->totally_useful, i); |
232 | 0 | } |
233 | 0 | } |
234 | 0 | } |
235 | 0 | } else { |
236 | 0 | i = ctx->cfg_map[use]; |
237 | 0 | if (!i) { |
238 | 0 | continue; |
239 | 0 | } |
240 | 0 | IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count); |
241 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
242 | 0 | if (i == b) return 0; /* node is totally-useful in the scheduled block */ |
243 | 0 | ir_sparse_set_add(&data->totally_useful, i); |
244 | 0 | } |
245 | 0 | } |
246 | 0 | } |
247 | | |
248 | 0 | #ifdef IR_DEBUG |
249 | 0 | if (ctx->flags & IR_DEBUG_GCM_SPLIT) { |
250 | 0 | bool first = 1; |
251 | 0 | fprintf(stderr, "*** Split partially dead node d_%d scheduled to BB%d\n", ref, b); |
252 | 0 | IR_SPARSE_SET_FOREACH(&data->totally_useful, i) { |
253 | 0 | if (first) { |
254 | 0 | fprintf(stderr, "\td_%d is USED in [BB%d", ref, i); |
255 | 0 | first = 0; |
256 | 0 | } else { |
257 | 0 | fprintf(stderr, ", BB%d", i); |
258 | 0 | } |
259 | 0 | } IR_SPARSE_SET_FOREACH_END(); |
260 | 0 | fprintf(stderr, "]\n"); |
261 | 0 | } |
262 | 0 | #endif |
263 | | |
264 | | /* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and |
265 | | * add them into TOTALLY_USEFUL set if all of their successors are already there. |
266 | | */ |
267 | 0 | IR_SPARSE_SET_FOREACH(&data->totally_useful, i) { |
268 | 0 | _push_predecessors(ctx, &ctx->cfg_blocks[i], data); |
269 | 0 | } IR_SPARSE_SET_FOREACH_END(); |
270 | |
|
271 | 0 | while (ir_list_len(&data->worklist)) { |
272 | 0 | i = ir_list_pop(&data->worklist); |
273 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
274 | 0 | ir_block *bb = &ctx->cfg_blocks[i]; |
275 | |
|
276 | 0 | if (_check_successors(ctx, bb, data)) { |
277 | 0 | if (i == b) { |
278 | | /* node is TOTALLY_USEFUL in the scheduled block */ |
279 | 0 | ir_list_clear(&data->worklist); |
280 | 0 | return 0; |
281 | 0 | } |
282 | 0 | ir_sparse_set_add(&data->totally_useful, i); |
283 | 0 | _push_predecessors(ctx, bb, data); |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | 0 | IR_ASSERT(!ir_sparse_set_in(&data->totally_useful, b)); |
289 | |
|
290 | 0 | #ifdef IR_DEBUG |
291 | 0 | if (ctx->flags & IR_DEBUG_GCM_SPLIT) { |
292 | 0 | bool first = 1; |
293 | 0 | IR_SPARSE_SET_FOREACH(&data->totally_useful, i) { |
294 | 0 | if (first) { |
295 | 0 | fprintf(stderr, "\td_%d is TOTALLY_USEFUL in [BB%d", ref, i); |
296 | 0 | first = 0; |
297 | 0 | } else { |
298 | 0 | fprintf(stderr, ", BB%d", i); |
299 | 0 | } |
300 | 0 | } IR_SPARSE_SET_FOREACH_END(); |
301 | 0 | fprintf(stderr, "]\n"); |
302 | 0 | } |
303 | 0 | #endif |
304 | | |
305 | | /* 2. Split the USEs into partitions */ |
306 | 0 | use_list = &ctx->use_lists[ref]; |
307 | 0 | ir_hashtab hash; |
308 | 0 | uint32_t j, clone, clones_count = 0, uses_count = 0; |
309 | 0 | struct { |
310 | 0 | ir_ref ref; |
311 | 0 | uint32_t block; |
312 | 0 | uint32_t lca; |
313 | 0 | uint32_t use_count; |
314 | 0 | uint32_t use; |
315 | 0 | } *clones = ir_mem_malloc(sizeof(*clones) * use_list->count); |
316 | 0 | struct { |
317 | 0 | ir_ref ref; |
318 | 0 | uint32_t block; |
319 | 0 | uint32_t next; |
320 | 0 | } *uses = ir_mem_malloc(sizeof(*uses) * use_list->count); |
321 | |
|
322 | 0 | ir_hashtab_init(&hash, use_list->count); |
323 | 0 | n = use_list->count; |
324 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
325 | 0 | use = *p; |
326 | 0 | insn = &ctx->ir_base[use]; |
327 | 0 | if (insn->op == IR_PHI) { |
328 | 0 | ir_ref *p = insn->ops + 2; /* PHI data inputs */ |
329 | 0 | ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ |
330 | 0 | ir_ref n = insn->inputs_count - 1; |
331 | | |
332 | | /* PHIs must be processed once */ |
333 | 0 | if (ir_hashtab_find(&hash, -use) != (ir_ref)IR_INVALID_VAL) { |
334 | 0 | continue; |
335 | 0 | } |
336 | 0 | ir_hashtab_add(&hash, -use, IR_NULL); |
337 | 0 | for (;n > 0; p++, q++, n--) { |
338 | 0 | if (*p == ref) { |
339 | 0 | j = i = ctx->cfg_map[*q]; |
340 | 0 | while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { |
341 | 0 | j = ctx->cfg_blocks[j].idom; |
342 | 0 | } |
343 | 0 | clone = ir_hashtab_find(&hash, j); |
344 | 0 | if (clone == IR_INVALID_VAL) { |
345 | 0 | clone = clones_count++; |
346 | 0 | ir_hashtab_add(&hash, j, clone); |
347 | 0 | clones[clone].block = j; |
348 | 0 | clones[clone].lca = i; |
349 | 0 | clones[clone].use_count = 0; |
350 | 0 | clones[clone].use = (uint32_t)-1; |
351 | 0 | } else { |
352 | 0 | clones[clone].lca = ir_gcm_find_lca(ctx, clones[clone].lca, i); |
353 | 0 | } |
354 | 0 | uses[uses_count].ref = use; |
355 | 0 | uses[uses_count].block = i; |
356 | 0 | uses[uses_count].next = clones[clone].use; |
357 | 0 | clones[clone].use_count++; |
358 | 0 | clones[clone].use = uses_count++; |
359 | 0 | } |
360 | 0 | } |
361 | 0 | } else { |
362 | 0 | j = i = ctx->cfg_map[use]; |
363 | 0 | if (i) { |
364 | 0 | IR_ASSERT(i > 0); |
365 | 0 | while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { |
366 | 0 | j = ctx->cfg_blocks[j].idom; |
367 | 0 | } |
368 | 0 | } |
369 | 0 | clone = ir_hashtab_find(&hash, j); |
370 | 0 | if (clone == IR_INVALID_VAL) { |
371 | 0 | clone = clones_count++; |
372 | 0 | ir_hashtab_add(&hash, j, clone); |
373 | 0 | clones[clone].block = j; |
374 | 0 | clones[clone].lca = i; |
375 | 0 | clones[clone].use_count = 0; |
376 | 0 | clones[clone].use = -1; |
377 | 0 | } else { |
378 | 0 | clones[clone].lca = ir_gcm_find_lca(ctx, clones[clone].lca, i); |
379 | 0 | } |
380 | 0 | uses[uses_count].ref = use; |
381 | 0 | uses[uses_count].block = i; |
382 | 0 | uses[uses_count].next = clones[clone].use; |
383 | 0 | clones[clone].use_count++; |
384 | 0 | clones[clone].use = uses_count++; |
385 | 0 | } |
386 | 0 | } |
387 | | |
388 | | /* Select best blocks to insert clones */ |
389 | 0 | for (i = 0; i < clones_count; i++) { |
390 | 0 | uint32_t b0 = clones[i].block; |
391 | 0 | uint32_t lca = clones[i].lca; |
392 | |
|
393 | 0 | if (b0 != lca) { |
394 | 0 | ir_block *bb = &ctx->cfg_blocks[lca]; |
395 | 0 | uint32_t loop_depth = bb->loop_depth; |
396 | |
|
397 | 0 | if (loop_depth) { |
398 | 0 | uint32_t b; |
399 | 0 | uint32_t best; |
400 | |
|
401 | 0 | best = b = lca; |
402 | 0 | do { |
403 | 0 | b = bb->dom_parent; |
404 | 0 | bb = &ctx->cfg_blocks[b]; |
405 | 0 | if (bb->loop_depth < loop_depth) { |
406 | 0 | if (!bb->loop_depth) { |
407 | 0 | best = b; |
408 | 0 | break; |
409 | 0 | } |
410 | 0 | loop_depth = bb->loop_depth; |
411 | 0 | best = b; |
412 | 0 | } |
413 | 0 | } while (b != b0); |
414 | 0 | lca = best; |
415 | 0 | } |
416 | 0 | clones[i].block = lca; |
417 | 0 | } |
418 | 0 | } |
419 | | |
420 | | // TODO: instead of inserting clone into the block where the expressin is partially available, |
421 | | // we should insert PHI and the actual clones into the block sources where it's not available |
422 | | // (similar to SSAPRE) |
423 | |
|
424 | 0 | #ifdef IR_DEBUG |
425 | 0 | if (ctx->flags & IR_DEBUG_GCM_SPLIT) { |
426 | 0 | for (i = 0; i < clones_count; i++) { |
427 | 0 | uint32_t u = clones[i].use; |
428 | |
|
429 | 0 | fprintf(stderr, "\tCLONE #%d in BB%d USES(%d)=[d_%d/BB%d", |
430 | 0 | i, clones[i].block, clones[i].use_count, uses[u].ref, uses[u].block); |
431 | 0 | u = uses[u].next; |
432 | 0 | while (u != (uint32_t)-1) { |
433 | 0 | fprintf(stderr, ", d_%d/BB%d", uses[u].ref, uses[u].block); |
434 | 0 | u = uses[u].next; |
435 | 0 | } |
436 | 0 | fprintf(stderr, "]\n"); |
437 | 0 | } |
438 | 0 | } |
439 | 0 | #endif |
440 | | |
441 | | /* Create Clones */ |
442 | 0 | insn = &ctx->ir_base[ref]; |
443 | 0 | clones[0].ref = ref; |
444 | 0 | for (i = 1; i < clones_count; i++) { |
445 | 0 | clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3); |
446 | 0 | insn = &ctx->ir_base[ref]; |
447 | | /* Depending on the flags in IR_OPS, these can be references or data. */ |
448 | 0 | if (insn->op1 > 0 && insn->inputs_count >= 1) ir_use_list_add(ctx, insn->op1, clone); |
449 | 0 | if (insn->op2 > 0 && insn->inputs_count >= 2) ir_use_list_add(ctx, insn->op2, clone); |
450 | 0 | if (insn->op3 > 0 && insn->inputs_count >= 3) ir_use_list_add(ctx, insn->op3, clone); |
451 | 0 | } |
452 | | |
453 | | /* Reconstruct IR: Update DEF->USE lists, CFG mapping and etc */ |
454 | 0 | n = ctx->use_lists[ref].refs; |
455 | 0 | for (i = 0; i < clones_count; i++) { |
456 | 0 | clone = clones[i].ref; |
457 | 0 | if (clones[i].block |
458 | 0 | && clones[i].use_count == 1 |
459 | 0 | && ctx->cfg_blocks[clones[i].block].loop_depth >= ctx->cfg_blocks[uses[clones[i].use].block].loop_depth) { |
460 | | /* TOTALLY_USEFUL block may be a head of a diamond above the real usage. |
461 | | * Sink it down to the real usage block. |
462 | | * Clones with few uses will be sunk into the LCA block. |
463 | | */ |
464 | 0 | clones[i].block = uses[clones[i].use].block; |
465 | 0 | } |
466 | 0 | ctx->cfg_map[clone] = clones[i].block; |
467 | 0 | ctx->use_lists[clone].count = clones[i].use_count; |
468 | 0 | ctx->use_lists[clone].refs = n; |
469 | |
|
470 | 0 | uint32_t u = clones[i].use; |
471 | 0 | while (u != (uint32_t)-1) { |
472 | 0 | uint32_t src = uses[u].block; |
473 | 0 | use = uses[u].ref; |
474 | 0 | ctx->use_edges[n++] = use; |
475 | 0 | u = uses[u].next; |
476 | 0 | if (i > 0) { |
477 | | /* replace inputs */ |
478 | 0 | ir_insn *insn = &ctx->ir_base[use]; |
479 | 0 | ir_ref k, l = insn->inputs_count; |
480 | |
|
481 | 0 | if (insn->op == IR_PHI) { |
482 | 0 | ir_insn *merge = &ctx->ir_base[insn->op1]; |
483 | 0 | for (k = 2; k <= l; k++) { |
484 | 0 | j = ctx->cfg_map[ir_insn_op(merge, k - 1)]; |
485 | 0 | if (j == src) { |
486 | 0 | IR_ASSERT(ir_insn_op(insn, k) == ref); |
487 | 0 | if (j != clones[i].block) { |
488 | 0 | uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth; |
489 | 0 | while (ctx->cfg_blocks[j].dom_depth > dom_depth) { |
490 | 0 | j = ctx->cfg_blocks[j].dom_parent; |
491 | 0 | } |
492 | 0 | if (j != clones[i].block) { |
493 | 0 | continue; |
494 | 0 | } |
495 | 0 | } |
496 | 0 | ir_insn_set_op(insn, k, clone); |
497 | 0 | break; |
498 | 0 | } |
499 | 0 | } |
500 | 0 | } else { |
501 | 0 | for (k = 1; k <= l; k++) { |
502 | 0 | if (ir_insn_op(insn, k) == ref) { |
503 | 0 | ir_insn_set_op(insn, k, clone); |
504 | 0 | break; |
505 | 0 | } |
506 | 0 | } |
507 | 0 | } |
508 | 0 | } |
509 | 0 | } |
510 | 0 | } |
511 | | |
512 | 0 | ir_mem_free(uses); |
513 | 0 | ir_mem_free(clones); |
514 | 0 | ir_hashtab_free(&hash); |
515 | |
|
516 | 0 | #ifdef IR_DEBUG |
517 | 0 | if (ctx->flags & IR_DEBUG_GCM_SPLIT) { |
518 | 0 | ir_check(ctx); |
519 | 0 | } |
520 | 0 | #endif |
521 | |
|
522 | 0 | return 1; |
523 | 0 | } |
524 | | #endif |
525 | | |
526 | | #ifdef IR_DEBUG |
527 | | static bool ir_gcm_dominates(ir_ctx *ctx, uint32_t b1, uint32_t b2) |
528 | 0 | { |
529 | 0 | uint32_t b1_depth = ctx->cfg_blocks[b1].dom_depth; |
530 | 0 | const ir_block *bb2 = &ctx->cfg_blocks[b2]; |
531 | |
|
532 | 0 | while (bb2->dom_depth > b1_depth) { |
533 | 0 | b2 = bb2->dom_parent; |
534 | 0 | bb2 = &ctx->cfg_blocks[b2]; |
535 | 0 | } |
536 | 0 | return b1 == b2; |
537 | 0 | } |
538 | | #endif |
539 | | |
540 | | static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b) |
541 | 0 | { |
542 | 0 | ir_ref n, use; |
543 | 0 | uint32_t lca = 0; |
544 | |
|
545 | 0 | IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR); |
546 | 0 | IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI); |
547 | |
|
548 | 0 | IR_ASSERT(IR_GCM_IS_SCHEDULED_EARLY(b)); |
549 | 0 | b = IR_GCM_EARLY_BLOCK(b); |
550 | 0 | ctx->cfg_map[ref] = b; |
551 | |
|
552 | 0 | for (n = 0; n < ctx->use_lists[ref].count; n++) { |
553 | 0 | use = ctx->use_edges[ctx->use_lists[ref].refs + n]; |
554 | 0 | b = ctx->cfg_map[use]; |
555 | 0 | if (IR_GCM_IS_SCHEDULED_EARLY(b)) { |
556 | 0 | ir_gcm_schedule_late(ctx, use, b); |
557 | 0 | b = ctx->cfg_map[use]; |
558 | 0 | IR_ASSERT(b != 0); |
559 | 0 | } else if (!b) { |
560 | 0 | continue; |
561 | 0 | } else if (ctx->ir_base[use].op == IR_PHI) { |
562 | 0 | ir_insn *insn = &ctx->ir_base[use]; |
563 | 0 | ir_ref *p = insn->ops + 2; /* PHI data inputs */ |
564 | 0 | ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ |
565 | 0 | ir_ref n = insn->inputs_count - 1; |
566 | |
|
567 | 0 | for (;n > 0; p++, q++, n--) { |
568 | 0 | if (*p == ref) { |
569 | 0 | b = ctx->cfg_map[*q]; |
570 | 0 | lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); |
571 | 0 | } |
572 | 0 | } |
573 | 0 | continue; |
574 | 0 | } |
575 | 0 | lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); |
576 | 0 | } |
577 | | |
578 | 0 | IR_ASSERT(lca != 0 && "No Common Ancestor"); |
579 | 0 | IR_ASSERT(ir_gcm_dominates(ctx, ctx->cfg_map[ref], lca) && "Early placement doesn't dominate the late"); |
580 | |
|
581 | 0 | #if IR_GCM_SPLIT |
582 | 0 | if (ctx->use_lists[ref].count > 1 |
583 | 0 | && ir_split_partially_dead_node(ctx, ref, lca)) { |
584 | 0 | return; |
585 | 0 | } |
586 | 0 | #endif |
587 | | |
588 | 0 | if (lca != ctx->cfg_map[ref]) { |
589 | 0 | b = ir_gcm_select_best_block(ctx, ref, lca); |
590 | |
|
591 | 0 | ctx->cfg_map[ref] = b; |
592 | | |
593 | | /* OVERFLOW is a projection of ADD/SUB/MUL_OV and must be scheduled into the same block */ |
594 | 0 | if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) { |
595 | 0 | ir_use_list *use_list = &ctx->use_lists[ref]; |
596 | 0 | ir_ref n, *p, use; |
597 | |
|
598 | 0 | for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) { |
599 | 0 | use = *p; |
600 | 0 | if (ctx->ir_base[use].op == IR_OVERFLOW) { |
601 | 0 | ctx->cfg_map[use] = b; |
602 | 0 | break; |
603 | 0 | } |
604 | 0 | } |
605 | 0 | } |
606 | 0 | } |
607 | 0 | } |
608 | | |
609 | | int ir_gcm(ir_ctx *ctx) |
610 | 0 | { |
611 | 0 | ir_ref k, n, *p, ref; |
612 | 0 | ir_block *bb; |
613 | 0 | ir_list queue_early; |
614 | 0 | ir_list queue_late; |
615 | 0 | uint32_t *_blocks, b; |
616 | 0 | ir_insn *insn, *use_insn; |
617 | 0 | ir_use_list *use_list; |
618 | |
|
619 | 0 | IR_ASSERT(ctx->cfg_map); |
620 | 0 | _blocks = ctx->cfg_map; |
621 | |
|
622 | 0 | ir_list_init(&queue_early, ctx->insns_count); |
623 | |
|
624 | 0 | if (ctx->cfg_blocks_count == 1) { |
625 | 0 | ref = ctx->cfg_blocks[1].end; |
626 | 0 | do { |
627 | 0 | insn = &ctx->ir_base[ref]; |
628 | 0 | _blocks[ref] = 1; /* pin to block */ |
629 | 0 | if (insn->inputs_count > 1) { |
630 | | /* insn has input data edges */ |
631 | 0 | ir_list_push_unchecked(&queue_early, ref); |
632 | 0 | } |
633 | 0 | ref = insn->op1; /* control predecessor */ |
634 | 0 | } while (ref != 1); /* IR_START */ |
635 | 0 | _blocks[1] = 1; /* pin to block */ |
636 | |
|
637 | 0 | use_list = &ctx->use_lists[1]; |
638 | 0 | n = use_list->count; |
639 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { |
640 | 0 | ref = *p; |
641 | 0 | use_insn = &ctx->ir_base[ref]; |
642 | 0 | if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) { |
643 | 0 | ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR; |
644 | 0 | _blocks[ref] = 1; /* pin to block */ |
645 | 0 | } |
646 | 0 | } |
647 | | |
648 | | /* Place all live nodes to the first block */ |
649 | 0 | while (ir_list_len(&queue_early)) { |
650 | 0 | ref = ir_list_pop(&queue_early); |
651 | 0 | insn = &ctx->ir_base[ref]; |
652 | 0 | n = insn->inputs_count; |
653 | 0 | for (p = insn->ops + 1; n > 0; p++, n--) { |
654 | 0 | ref = *p; |
655 | 0 | if (ref > 0 && _blocks[ref] == 0) { |
656 | 0 | _blocks[ref] = 1; |
657 | 0 | ir_list_push_unchecked(&queue_early, ref); |
658 | 0 | } |
659 | 0 | } |
660 | 0 | } |
661 | |
|
662 | 0 | ir_list_free(&queue_early); |
663 | |
|
664 | 0 | return 1; |
665 | 0 | } |
666 | | |
667 | 0 | ir_list_init(&queue_late, ctx->insns_count); |
668 | | |
669 | | /* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */ |
670 | 0 | b = ctx->cfg_blocks_count; |
671 | 0 | for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) { |
672 | 0 | IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); |
673 | 0 | ref = bb->end; |
674 | | |
675 | | /* process the last instruction of the block */ |
676 | 0 | insn = &ctx->ir_base[ref]; |
677 | 0 | _blocks[ref] = b; /* pin to block */ |
678 | 0 | if (insn->inputs_count > 1) { |
679 | | /* insn has input data edges */ |
680 | 0 | ir_list_push_unchecked(&queue_early, ref); |
681 | 0 | } |
682 | 0 | ref = insn->op1; /* control predecessor */ |
683 | |
|
684 | 0 | while (ref != bb->start) { |
685 | 0 | insn = &ctx->ir_base[ref]; |
686 | 0 | _blocks[ref] = b; /* pin to block */ |
687 | 0 | if (insn->inputs_count > 1) { |
688 | | /* insn has input data edges */ |
689 | 0 | ir_list_push_unchecked(&queue_early, ref); |
690 | 0 | } |
691 | 0 | if (insn->type != IR_VOID) { |
692 | 0 | IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM); |
693 | 0 | } |
694 | 0 | ref = insn->op1; /* control predecessor */ |
695 | 0 | } |
696 | | |
697 | | /* process the first instruction of the block */ |
698 | 0 | _blocks[ref] = b; /* pin to block */ |
699 | |
|
700 | 0 | use_list = &ctx->use_lists[ref]; |
701 | 0 | n = use_list->count; |
702 | 0 | if (n > 1) { |
703 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { |
704 | 0 | ref = *p; |
705 | 0 | use_insn = &ctx->ir_base[ref]; |
706 | 0 | if (use_insn->op == IR_PHI || use_insn->op == IR_PI) { |
707 | 0 | bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI; |
708 | 0 | if (EXPECTED(ctx->use_lists[ref].count != 0)) { |
709 | 0 | _blocks[ref] = b; /* pin to block */ |
710 | 0 | ir_list_push_unchecked(&queue_early, ref); |
711 | 0 | } |
712 | 0 | } else if (use_insn->op == IR_PARAM) { |
713 | 0 | bb->flags |= IR_BB_HAS_PARAM; |
714 | 0 | _blocks[ref] = b; /* pin to block */ |
715 | 0 | } else if (use_insn->op == IR_VAR) { |
716 | 0 | bb->flags |= IR_BB_HAS_VAR; |
717 | 0 | _blocks[ref] = b; /* pin to block */ |
718 | 0 | } |
719 | 0 | } |
720 | 0 | } |
721 | 0 | } |
722 | |
|
723 | 0 | n = ir_list_len(&queue_early); |
724 | 0 | while (n > 0) { |
725 | 0 | n--; |
726 | 0 | ref = ir_list_at(&queue_early, n); |
727 | 0 | insn = &ctx->ir_base[ref]; |
728 | 0 | k = insn->inputs_count - 1; |
729 | 0 | for (p = insn->ops + 2; k > 0; p++, k--) { |
730 | 0 | ref = *p; |
731 | 0 | if (ref > 0 && _blocks[ref] == 0) { |
732 | 0 | ir_gcm_schedule_early(ctx, ref, &queue_late); |
733 | 0 | } |
734 | 0 | } |
735 | 0 | } |
736 | |
|
737 | 0 | #ifdef IR_DEBUG |
738 | 0 | if (ctx->flags & IR_DEBUG_GCM) { |
739 | 0 | fprintf(stderr, "GCM Schedule Early\n"); |
740 | 0 | for (n = 1; n < ctx->insns_count; n++) { |
741 | 0 | fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]); |
742 | 0 | } |
743 | 0 | } |
744 | 0 | #endif |
745 | |
|
746 | 0 | #if IR_GCM_SPLIT |
747 | 0 | ir_gcm_split_data data; |
748 | |
|
749 | 0 | ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1); |
750 | 0 | ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1); |
751 | 0 | ctx->data = &data; |
752 | 0 | #endif |
753 | |
|
754 | 0 | n = ir_list_len(&queue_late); |
755 | 0 | while (n > 0) { |
756 | 0 | n--; |
757 | 0 | ref = ir_list_at(&queue_late, n); |
758 | 0 | b = ctx->cfg_map[ref]; |
759 | 0 | if (IR_GCM_IS_SCHEDULED_EARLY(b)) { |
760 | 0 | ir_gcm_schedule_late(ctx, ref, b); |
761 | 0 | } |
762 | 0 | } |
763 | |
|
764 | 0 | #if IR_GCM_SPLIT |
765 | 0 | ir_list_free(&data.worklist); |
766 | 0 | ir_sparse_set_free(&data.totally_useful); |
767 | 0 | ctx->data = NULL; |
768 | 0 | #endif |
769 | |
|
770 | 0 | ir_list_free(&queue_early); |
771 | 0 | ir_list_free(&queue_late); |
772 | |
|
773 | 0 | #ifdef IR_DEBUG |
774 | 0 | if (ctx->flags & IR_DEBUG_GCM) { |
775 | 0 | fprintf(stderr, "GCM Schedule Late\n"); |
776 | 0 | for (n = 1; n < ctx->insns_count; n++) { |
777 | 0 | fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]); |
778 | 0 | } |
779 | 0 | } |
780 | 0 | #endif |
781 | |
|
782 | 0 | return 1; |
783 | 0 | } |
784 | | |
785 | | static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat) |
786 | 0 | { |
787 | 0 | uint32_t n1, n2, pos; |
788 | 0 | ir_ref key; |
789 | 0 | ir_hashtab_bucket *b1, *b2; |
790 | 0 | ir_hashtab *binding = ctx->binding; |
791 | 0 | uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask); |
792 | |
|
793 | 0 | memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t)); |
794 | 0 | n1 = binding->count; |
795 | 0 | n2 = 0; |
796 | 0 | pos = 0; |
797 | 0 | b1 = binding->data; |
798 | 0 | b2 = binding->data; |
799 | 0 | while (n1 > 0) { |
800 | 0 | key = b1->key; |
801 | 0 | IR_ASSERT(key < ctx->insns_count); |
802 | 0 | if (_xlat[key]) { |
803 | 0 | key = _xlat[key]; |
804 | 0 | b2->key = key; |
805 | 0 | if (b1->val > 0) { |
806 | 0 | IR_ASSERT(_xlat[b1->val]); |
807 | 0 | b2->val = _xlat[b1->val]; |
808 | 0 | } else { |
809 | 0 | b2->val = b1->val; |
810 | 0 | } |
811 | 0 | key |= binding->mask; |
812 | 0 | b2->next = ((uint32_t*)binding->data)[key]; |
813 | 0 | ((uint32_t*)binding->data)[key] = pos; |
814 | 0 | pos += sizeof(ir_hashtab_bucket); |
815 | 0 | b2++; |
816 | 0 | n2++; |
817 | 0 | } |
818 | 0 | b1++; |
819 | 0 | n1--; |
820 | 0 | } |
821 | 0 | binding->count = n2; |
822 | 0 | } |
823 | | |
824 | | IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref) |
825 | 0 | { |
826 | 0 | if (!_xlat[ref]) { |
827 | 0 | _xlat[ref] = ref; /* this is only a "used constant" marker */ |
828 | 0 | return 1; |
829 | 0 | } |
830 | 0 | return 0; |
831 | 0 | } |
832 | | |
833 | | IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start) |
834 | 0 | { |
835 | 0 | ir_insn *insn = &ctx->ir_base[start]; |
836 | 0 | uint32_t n = insn->inputs_count; |
837 | 0 | ir_ref *p = insn->ops + 1; |
838 | |
|
839 | 0 | if (n == 1) { |
840 | 0 | return ctx->cfg_map[*p] < b; |
841 | 0 | } else { |
842 | 0 | IR_ASSERT(n > 1); |
843 | 0 | for (; n > 0; p++, n--) { |
844 | 0 | ir_ref input = *p; |
845 | |
|
846 | 0 | if (!IR_IS_CONST_REF(input)) { |
847 | 0 | uint32_t input_b = ctx->cfg_map[input]; |
848 | |
|
849 | 0 | if (input_b < b) { |
850 | | /* ordered */ |
851 | 0 | } else if ((bb->flags & IR_BB_LOOP_HEADER) |
852 | 0 | && (input_b == b || ctx->cfg_blocks[input_b].loop_header == b)) { |
853 | | /* back-edge of reducible loop */ |
854 | 0 | } else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) |
855 | 0 | && (ctx->cfg_blocks[input_b].loop_header == bb->loop_header)) { |
856 | | /* closing edge of irreducible loop */ |
857 | 0 | } else { |
858 | 0 | return 0; |
859 | 0 | } |
860 | 0 | } |
861 | 0 | } |
862 | 0 | return 1; |
863 | 0 | } |
864 | 0 | } |
865 | | |
866 | | static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *_next) |
867 | 0 | { |
868 | 0 | uint32_t b, succ, count, *q, *xlat; |
869 | 0 | ir_block *bb; |
870 | 0 | ir_ref ref, n, prev; |
871 | 0 | ir_worklist worklist; |
872 | 0 | ir_block *new_blocks; |
873 | |
|
874 | | #if 0 |
875 | | for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { |
876 | | if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) { |
877 | | goto fix; |
878 | | } |
879 | | } |
880 | | return; |
881 | | |
882 | | fix: |
883 | | #endif |
884 | 0 | count = ctx->cfg_blocks_count + 1; |
885 | 0 | new_blocks = ir_mem_malloc(count * sizeof(ir_block)); |
886 | 0 | xlat = ir_mem_malloc(count * sizeof(uint32_t)); |
887 | 0 | ir_worklist_init(&worklist, count); |
888 | 0 | ir_worklist_push(&worklist, 1); |
889 | | /* Schedule blocks bottom-up. Place block only after all its successurs (except back-edges) are placed. */ |
890 | 0 | while (ir_worklist_len(&worklist) != 0) { |
891 | 0 | next: |
892 | 0 | b = ir_worklist_peek(&worklist); |
893 | 0 | bb = &ctx->cfg_blocks[b]; |
894 | 0 | n = bb->successors_count; |
895 | 0 | if (n == 1) { |
896 | 0 | succ = ctx->cfg_edges[bb->successors]; |
897 | 0 | if (ir_bitset_in(worklist.visited, succ)) { |
898 | | /* already processed */ |
899 | 0 | } else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP) |
900 | 0 | && ((ctx->cfg_blocks[b].flags & IR_BB_LOOP_HEADER) ? |
901 | 0 | (ctx->cfg_blocks[succ].loop_header != b) : |
902 | 0 | (ctx->cfg_blocks[succ].loop_header != ctx->cfg_blocks[b].loop_header))) { |
903 | | /* "side" entry of irreducible loop (ignore) */ |
904 | 0 | } else if (ir_worklist_push(&worklist, succ)) { |
905 | 0 | goto next; |
906 | 0 | } |
907 | 0 | } else if (n > 1) { |
908 | 0 | uint32_t best = 0; |
909 | 0 | uint32_t best_loop_depth = 0; |
910 | |
|
911 | 0 | q = ctx->cfg_edges + bb->successors + n; |
912 | 0 | do { |
913 | 0 | q--; |
914 | 0 | succ = *q; |
915 | 0 | if (ir_bitset_in(worklist.visited, succ)) { |
916 | | /* already processed */ |
917 | 0 | } else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP) |
918 | 0 | && ((ctx->cfg_blocks[b].flags & IR_BB_LOOP_HEADER) ? |
919 | 0 | (ctx->cfg_blocks[succ].loop_header != b) : |
920 | 0 | (ctx->cfg_blocks[succ].loop_header != ctx->cfg_blocks[b].loop_header))) { |
921 | | /* "side" entry of irreducible loop (ignore) */ |
922 | 0 | } else if (!best) { |
923 | 0 | best = succ; |
924 | 0 | best_loop_depth = ctx->cfg_blocks[best].loop_depth; |
925 | 0 | } else if (ctx->cfg_blocks[succ].loop_depth < best_loop_depth) { |
926 | | /* prefer deeper loop */ |
927 | 0 | best = succ; |
928 | 0 | best_loop_depth = ctx->cfg_blocks[best].loop_depth; |
929 | 0 | } |
930 | 0 | n--; |
931 | 0 | } while (n > 0); |
932 | 0 | if (best) { |
933 | 0 | ir_worklist_push(&worklist, best); |
934 | 0 | goto next; |
935 | 0 | } |
936 | 0 | } |
937 | | |
938 | | /* All successors of "b" are placed. Now we can place "b" itself. */ |
939 | 0 | ir_worklist_pop(&worklist); |
940 | 0 | count--; |
941 | 0 | new_blocks[count] = *bb; |
942 | 0 | xlat[b] = count; |
943 | 0 | } |
944 | 0 | IR_ASSERT(count == 1); |
945 | 0 | xlat[0] = 0; |
946 | 0 | ir_worklist_free(&worklist); |
947 | |
|
948 | 0 | prev = 0; |
949 | 0 | for (b = 1, bb = new_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { |
950 | 0 | bb->idom = xlat[bb->idom]; |
951 | 0 | bb->loop_header = xlat[bb->loop_header]; |
952 | 0 | n = bb->successors_count; |
953 | 0 | if (n > 0) { |
954 | 0 | for (q = ctx->cfg_edges + bb->successors; n > 0; q++, n--) { |
955 | 0 | *q = xlat[*q]; |
956 | 0 | } |
957 | 0 | } |
958 | 0 | n = bb->predecessors_count; |
959 | 0 | if (n > 0) { |
960 | 0 | for (q = ctx->cfg_edges + bb->predecessors; n > 0; q++, n--) { |
961 | 0 | *q = xlat[*q]; |
962 | 0 | } |
963 | 0 | } |
964 | 0 | _next[prev] = bb->start; |
965 | 0 | _prev[bb->start] = prev; |
966 | 0 | prev = bb->end; |
967 | 0 | } |
968 | 0 | _next[0] = 0; |
969 | 0 | _next[prev] = 0; |
970 | |
|
971 | 0 | for (ref = 2; ref < ctx->insns_count; ref++) { |
972 | 0 | ctx->cfg_map[ref] = xlat[ctx->cfg_map[ref]]; |
973 | 0 | } |
974 | 0 | ir_mem_free(xlat); |
975 | |
|
976 | 0 | ir_mem_free(ctx->cfg_blocks); |
977 | 0 | ctx->cfg_blocks = new_blocks; |
978 | 0 | } |
979 | | |
980 | | #if IR_DEBUG |
981 | | static void ir_schedule_print_list(const ir_ctx *ctx, uint32_t b, const ir_ref *_next, |
982 | | ir_ref start, ir_ref end, const char *label) |
983 | 0 | { |
984 | 0 | ir_ref ref; |
985 | |
|
986 | 0 | fprintf(stderr, " %s [%d", label, start); |
987 | 0 | ref = _next[start]; |
988 | 0 | while (ref != end) { |
989 | 0 | fprintf(stderr, ",%d", ref); |
990 | 0 | ref = _next[ref]; |
991 | 0 | } |
992 | 0 | fprintf(stderr, ",%d]\n", ref); |
993 | 0 | } |
994 | | #endif |
995 | | |
996 | | /* Simple Stable Topological Sort */ |
997 | | static void ir_schedule_topsort(const ir_ctx *ctx, uint32_t b, const ir_block *bb, |
998 | | ir_ref *_xlat, ir_ref *_next, ir_ref *_prev, |
999 | | ir_ref ref, ir_ref end, |
1000 | | ir_ref *insns_count, ir_ref *consts_count) |
1001 | 0 | { |
1002 | 0 | ir_ref i = ref; |
1003 | 0 | const ir_insn *insn; |
1004 | |
|
1005 | 0 | if (bb->successors_count > 1) { |
1006 | 0 | ir_ref input, j = bb->end; |
1007 | 0 | ir_insn *end = &ctx->ir_base[j]; |
1008 | |
|
1009 | 0 | if (end->op == IR_IF) { |
1010 | | /* Move condition closer to IF */ |
1011 | 0 | input = end->op2; |
1012 | 0 | if (input > 0 |
1013 | 0 | && ctx->cfg_map[input] == b |
1014 | 0 | && !_xlat[input] |
1015 | 0 | && _prev[j] != input |
1016 | 0 | && (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) { |
1017 | 0 | if (input == i) { |
1018 | 0 | i = _next[i]; |
1019 | 0 | insn = &ctx->ir_base[i]; |
1020 | 0 | } |
1021 | | /* remove "input" */ |
1022 | 0 | _prev[_next[input]] = _prev[input]; |
1023 | 0 | _next[_prev[input]] = _next[input]; |
1024 | | /* insert before "j" */ |
1025 | 0 | _prev[input] = _prev[j]; |
1026 | 0 | _next[input] = j; |
1027 | 0 | _next[_prev[j]] = input; |
1028 | 0 | _prev[j] = input; |
1029 | 0 | } |
1030 | 0 | } |
1031 | 0 | } |
1032 | |
|
1033 | 0 | while (i != end) { |
1034 | 0 | ir_ref n, j, input; |
1035 | 0 | const ir_ref *p; |
1036 | |
|
1037 | 0 | restart: |
1038 | 0 | IR_ASSERT(ctx->cfg_map[i] == b); |
1039 | 0 | insn = &ctx->ir_base[i]; |
1040 | 0 | n = insn->inputs_count; |
1041 | 0 | for (j = n, p = insn->ops + 1; j > 0; p++, j--) { |
1042 | 0 | input = *p; |
1043 | 0 | if (!_xlat[input]) { |
1044 | | /* input is not scheduled yet */ |
1045 | 0 | if (input > 0) { |
1046 | 0 | if (ctx->cfg_map[input] == b) { |
1047 | | /* "input" should be before "i" to satisfy dependency */ |
1048 | 0 | #ifdef IR_DEBUG |
1049 | 0 | if (ctx->flags & IR_DEBUG_SCHEDULE) { |
1050 | 0 | fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i); |
1051 | 0 | } |
1052 | 0 | #endif |
1053 | | /* remove "input" */ |
1054 | 0 | _prev[_next[input]] = _prev[input]; |
1055 | 0 | _next[_prev[input]] = _next[input]; |
1056 | | /* insert before "i" */ |
1057 | 0 | _prev[input] = _prev[i]; |
1058 | 0 | _next[input] = i; |
1059 | 0 | _next[_prev[i]] = input; |
1060 | 0 | _prev[i] = input; |
1061 | | /* restart from "input" */ |
1062 | 0 | i = input; |
1063 | 0 | goto restart; |
1064 | 0 | } |
1065 | 0 | } else if (input < IR_TRUE) { |
1066 | 0 | *consts_count += ir_count_constant(_xlat, input); |
1067 | 0 | } |
1068 | 0 | } |
1069 | 0 | } |
1070 | | |
1071 | 0 | _xlat[i] = *insns_count; |
1072 | 0 | *insns_count += ir_insn_inputs_to_len(n); |
1073 | 0 | IR_ASSERT(_next[i] != IR_UNUSED); |
1074 | 0 | i = _next[i]; |
1075 | 0 | } |
1076 | 0 | } |
1077 | | |
1078 | | int ir_schedule(ir_ctx *ctx) |
1079 | 0 | { |
1080 | 0 | ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count; |
1081 | 0 | ir_ref *_xlat; |
1082 | 0 | ir_ref *edges; |
1083 | 0 | ir_ref prev_b_end; |
1084 | 0 | uint32_t b; |
1085 | 0 | ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); |
1086 | 0 | ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); |
1087 | 0 | ir_block *bb; |
1088 | 0 | ir_insn *insn, *new_insn, *base; |
1089 | 0 | ir_use_list *lists, *use_list, *new_list; |
1090 | 0 | bool bad_bb_order = 0; |
1091 | | |
1092 | | /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */ |
1093 | 0 | IR_ASSERT(ctx->cfg_map[1] == 1); |
1094 | | |
1095 | | /* link BB boundaries */ |
1096 | 0 | _prev[1] = 0; |
1097 | 0 | prev_b_end = ctx->cfg_blocks[1].end; |
1098 | 0 | _next[1] = prev_b_end; |
1099 | 0 | _prev[prev_b_end] = 1; |
1100 | 0 | for (b = 2, bb = ctx->cfg_blocks + 2; b <= ctx->cfg_blocks_count; b++, bb++) { |
1101 | 0 | ir_ref start = bb->start; |
1102 | 0 | ir_ref end = bb->end; |
1103 | 0 | _next[prev_b_end] = start; |
1104 | 0 | _prev[start] = prev_b_end; |
1105 | 0 | _next[start] = end; |
1106 | 0 | _prev[end] = start; |
1107 | 0 | prev_b_end = end; |
1108 | 0 | if (!ir_is_good_bb_order(ctx, b, bb, start)) { |
1109 | 0 | bad_bb_order = 1; |
1110 | 0 | } |
1111 | 0 | } |
1112 | 0 | _next[prev_b_end] = 0; |
1113 | | |
1114 | | /* insert intermediate BB nodes */ |
1115 | 0 | use_edges_count = ctx->use_lists[1].count; |
1116 | 0 | for (i = 2, use_list = &ctx->use_lists[i]; i < ctx->insns_count; use_list++, i++) { |
1117 | 0 | b = ctx->cfg_map[i]; |
1118 | 0 | if (!b) continue; |
1119 | 0 | use_edges_count += use_list->count; |
1120 | 0 | bb = &ctx->cfg_blocks[b]; |
1121 | 0 | if (i != bb->start && i != bb->end) { |
1122 | | /* insert before "end" */ |
1123 | 0 | ir_ref next = bb->end; |
1124 | 0 | ir_ref prev = _prev[next]; |
1125 | 0 | _prev[i] = prev; |
1126 | 0 | _next[i] = next; |
1127 | 0 | _next[prev] = i; |
1128 | 0 | _prev[next] = i; |
1129 | 0 | } |
1130 | 0 | } |
1131 | |
|
1132 | 0 | if (bad_bb_order) { |
1133 | 0 | ir_fix_bb_order(ctx, _prev, _next); |
1134 | 0 | } |
1135 | |
|
1136 | 0 | _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); |
1137 | 0 | _xlat += ctx->consts_count; |
1138 | 0 | _xlat[IR_TRUE] = IR_TRUE; |
1139 | 0 | _xlat[IR_FALSE] = IR_FALSE; |
1140 | 0 | _xlat[IR_NULL] = IR_NULL; |
1141 | 0 | _xlat[IR_UNUSED] = IR_UNUSED; |
1142 | 0 | insns_count = 1; |
1143 | 0 | consts_count = -(IR_TRUE - 1); |
1144 | | |
1145 | | /* Schedule instructions inside each BB (now just topological sort according to dependencies) */ |
1146 | 0 | for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { |
1147 | 0 | ir_ref start; |
1148 | |
|
1149 | 0 | #ifdef IR_DEBUG |
1150 | 0 | if (ctx->flags & IR_DEBUG_SCHEDULE) { |
1151 | 0 | fprintf(stderr, "BB%d\n", b); |
1152 | 0 | ir_schedule_print_list(ctx, b, _next, bb->start, bb->end, "INITIAL"); |
1153 | 0 | } |
1154 | 0 | #endif |
1155 | |
|
1156 | 0 | IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); |
1157 | | /* Schedule BB start */ |
1158 | 0 | start = i = bb->start; |
1159 | 0 | _xlat[i] = bb->start = insns_count; |
1160 | 0 | insn = &ctx->ir_base[i]; |
1161 | 0 | if (insn->op == IR_BEGIN) { |
1162 | 0 | if (insn->op2) { |
1163 | 0 | consts_count += ir_count_constant(_xlat, insn->op2); |
1164 | 0 | } |
1165 | 0 | } else if (insn->op == IR_CASE_VAL) { |
1166 | 0 | IR_ASSERT(insn->op2 < IR_TRUE); |
1167 | 0 | consts_count += ir_count_constant(_xlat, insn->op2); |
1168 | 0 | } else if (insn->op == IR_CASE_RANGE) { |
1169 | 0 | IR_ASSERT(insn->op2 < IR_TRUE); |
1170 | 0 | consts_count += ir_count_constant(_xlat, insn->op2); |
1171 | 0 | IR_ASSERT(insn->op3 < IR_TRUE); |
1172 | 0 | consts_count += ir_count_constant(_xlat, insn->op3); |
1173 | 0 | } |
1174 | 0 | n = insn->inputs_count; |
1175 | 0 | insns_count += ir_insn_inputs_to_len(n); |
1176 | 0 | i = _next[i]; |
1177 | 0 | insn = &ctx->ir_base[i]; |
1178 | 0 | if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) { |
1179 | 0 | int count = 0; |
1180 | | |
1181 | | /* Schedule PARAM, VAR, PI */ |
1182 | 0 | while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) { |
1183 | 0 | _xlat[i] = insns_count; |
1184 | 0 | insns_count += 1; |
1185 | 0 | i = _next[i]; |
1186 | 0 | insn = &ctx->ir_base[i]; |
1187 | 0 | count++; |
1188 | 0 | } |
1189 | | /* Schedule PHIs */ |
1190 | 0 | while (insn->op == IR_PHI) { |
1191 | 0 | ir_ref j, *p, input; |
1192 | |
|
1193 | 0 | _xlat[i] = insns_count; |
1194 | | /* Reuse "n" from MERGE and skip first input */ |
1195 | 0 | insns_count += ir_insn_inputs_to_len(n + 1); |
1196 | 0 | for (j = n, p = insn->ops + 2; j > 0; p++, j--) { |
1197 | 0 | input = *p; |
1198 | 0 | if (input < IR_TRUE) { |
1199 | 0 | consts_count += ir_count_constant(_xlat, input); |
1200 | 0 | } |
1201 | 0 | } |
1202 | 0 | i = _next[i]; |
1203 | 0 | insn = &ctx->ir_base[i]; |
1204 | 0 | count++; |
1205 | 0 | } |
1206 | | /* Schedule remaining PHIs */ |
1207 | 0 | if (UNEXPECTED(count < ctx->use_lists[start].count - 1)) { |
1208 | 0 | ir_use_list *use_list = &ctx->use_lists[start]; |
1209 | 0 | ir_ref *p, count = use_list->count; |
1210 | 0 | ir_ref phis = _prev[i]; |
1211 | |
|
1212 | 0 | for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) { |
1213 | 0 | ir_ref use = *p; |
1214 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
1215 | 0 | if (!_xlat[use] && ctx->cfg_map[use]) { |
1216 | 0 | if (use_insn->op == IR_PARAM |
1217 | 0 | || use_insn->op == IR_VAR |
1218 | 0 | || use_insn->op == IR_PI |
1219 | 0 | || use_insn->op == IR_PHI) { |
1220 | 0 | IR_ASSERT(ctx->cfg_map[use] == b); |
1221 | 0 | if (_prev[use] != phis) { |
1222 | | /* remove "use" */ |
1223 | 0 | _prev[_next[use]] = _prev[use]; |
1224 | 0 | _next[_prev[use]] = _next[use]; |
1225 | | /* insert "use" after "phis" */ |
1226 | 0 | _prev[use] = phis; |
1227 | 0 | _next[use] = _next[phis]; |
1228 | 0 | _prev[_next[phis]] = use; |
1229 | 0 | _next[phis] = use; |
1230 | 0 | } |
1231 | 0 | phis = use; |
1232 | 0 | _xlat[use] = insns_count; |
1233 | 0 | if (use_insn->op == IR_PHI) { |
1234 | 0 | ir_ref *q; |
1235 | | /* Reuse "n" from MERGE and skip first input */ |
1236 | 0 | insns_count += ir_insn_inputs_to_len(n + 1); |
1237 | 0 | for (j = n, q = use_insn->ops + 2; j > 0; q++, j--) { |
1238 | 0 | ir_ref input = *q; |
1239 | 0 | if (input < IR_TRUE) { |
1240 | 0 | consts_count += ir_count_constant(_xlat, input); |
1241 | 0 | } |
1242 | 0 | } |
1243 | 0 | } else { |
1244 | 0 | insns_count += 1; |
1245 | 0 | } |
1246 | 0 | } |
1247 | 0 | } |
1248 | 0 | } |
1249 | 0 | i = _next[phis]; |
1250 | 0 | insn = &ctx->ir_base[i]; |
1251 | 0 | } |
1252 | 0 | } |
1253 | |
|
1254 | 0 | if (i != bb->end) { |
1255 | 0 | ir_schedule_topsort(ctx, b, bb, _xlat, _next, _prev, i, bb->end, &insns_count, &consts_count); |
1256 | 0 | } |
1257 | |
|
1258 | 0 | #ifdef IR_DEBUG |
1259 | 0 | if (ctx->flags & IR_DEBUG_SCHEDULE) { |
1260 | 0 | ir_schedule_print_list(ctx, b, _next, start, bb->end, " FINAL"); |
1261 | 0 | } |
1262 | 0 | #endif |
1263 | | |
1264 | | /* Schedule BB end */ |
1265 | 0 | i = bb->end; |
1266 | 0 | insn = &ctx->ir_base[i]; |
1267 | 0 | _xlat[i] = bb->end = insns_count; |
1268 | 0 | insns_count++; |
1269 | 0 | if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) { |
1270 | 0 | if (insn->op2 < IR_TRUE) { |
1271 | 0 | consts_count += ir_count_constant(_xlat, insn->op2); |
1272 | 0 | } |
1273 | 0 | } |
1274 | 0 | } |
1275 | |
|
1276 | 0 | #if 1 |
1277 | | /* Check if scheduling didn't make any modifications */ |
1278 | 0 | if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) { |
1279 | 0 | bool changed = 0; |
1280 | |
|
1281 | 0 | for (i = 1; i != 0; i = _next[i]) { |
1282 | 0 | if (_xlat[i] != i) { |
1283 | 0 | changed = 1; |
1284 | 0 | break; |
1285 | 0 | } |
1286 | 0 | } |
1287 | 0 | if (!changed) { |
1288 | 0 | _xlat -= ctx->consts_count; |
1289 | 0 | ir_mem_free(_xlat); |
1290 | 0 | ir_mem_free(_next); |
1291 | |
|
1292 | 0 | ctx->prev_ref = _prev; |
1293 | 0 | ctx->flags2 |= IR_LINEAR; |
1294 | 0 | ir_truncate(ctx); |
1295 | |
|
1296 | 0 | return 1; |
1297 | 0 | } |
1298 | 0 | } |
1299 | 0 | #endif |
1300 | | |
1301 | 0 | ir_mem_free(_prev); |
1302 | |
|
1303 | 0 | uint32_t *map = ir_mem_calloc(insns_count, sizeof(uint32_t)); |
1304 | 0 | _prev = ir_mem_malloc(insns_count * sizeof(ir_ref)); |
1305 | 0 | lists = ir_mem_malloc(insns_count * sizeof(ir_use_list)); |
1306 | 0 | ir_ref *use_edges = edges = ir_mem_malloc(use_edges_count * sizeof(ir_ref)); |
1307 | 0 | base = ir_mem_malloc((consts_count + insns_count) * sizeof(ir_insn)); |
1308 | 0 | base += consts_count; |
1309 | | |
1310 | | /* Copy constants */ |
1311 | 0 | if (ctx->consts_count == consts_count) { |
1312 | 0 | memcpy(base - consts_count + 1, ctx->ir_base - consts_count + 1, sizeof(ir_insn) * consts_count); |
1313 | 0 | for (j = -consts_count + 1; j < IR_TRUE; j++) { |
1314 | 0 | _xlat[j] = j; |
1315 | 0 | } |
1316 | 0 | } else { |
1317 | 0 | ir_insn *src = ctx->ir_base - ctx->consts_count + 1; |
1318 | 0 | ir_insn *dst = base - consts_count + 1; |
1319 | |
|
1320 | 0 | i = -ctx->consts_count + 1; |
1321 | 0 | j = -consts_count + 1; |
1322 | 0 | while (i < IR_TRUE) { |
1323 | 0 | if (_xlat[i]) { |
1324 | 0 | *dst = *src; |
1325 | 0 | dst->prev_const = 0; |
1326 | 0 | _xlat[i] = j; |
1327 | 0 | dst++; |
1328 | 0 | j++; |
1329 | 0 | } |
1330 | 0 | src++; |
1331 | 0 | i++; |
1332 | 0 | } |
1333 | 0 | IR_ASSERT(j == IR_TRUE); |
1334 | 0 | base[IR_TRUE].optx = IR_OPT(IR_C_BOOL, IR_BOOL); |
1335 | 0 | base[IR_TRUE].val.u64 = 1; |
1336 | 0 | base[IR_FALSE].optx = IR_OPT(IR_C_BOOL, IR_BOOL); |
1337 | 0 | base[IR_FALSE].val.u64 = 0; |
1338 | 0 | base[IR_NULL].optx = IR_OPT(IR_C_ADDR, IR_ADDR); |
1339 | 0 | base[IR_NULL].val.u64 = 0; |
1340 | 0 | MAKE_NOP(&base[IR_UNUSED]); |
1341 | 0 | } |
1342 | | |
1343 | | /* Copy instructions, use lists and use edges */ |
1344 | 0 | #ifdef IR_DEBUG |
1345 | 0 | ir_ref orig_use_edges_count = use_edges_count; |
1346 | 0 | #endif |
1347 | 0 | prev_ref = 0; |
1348 | 0 | use_edges_count = 0; |
1349 | 0 | for (i = 1; i != 0; i = _next[i]) { |
1350 | 0 | new_ref = _xlat[i]; |
1351 | 0 | map[new_ref] = ctx->cfg_map[i]; |
1352 | 0 | _prev[new_ref] = prev_ref; |
1353 | 0 | prev_ref = new_ref; |
1354 | |
|
1355 | 0 | use_list = &ctx->use_lists[i]; |
1356 | 0 | n = use_list->count; |
1357 | 0 | k = 0; |
1358 | 0 | if (n == 1) { |
1359 | 0 | ref = ctx->use_edges[use_list->refs]; |
1360 | 0 | if (EXPECTED(_xlat[ref])) { |
1361 | 0 | *edges = _xlat[ref]; |
1362 | 0 | edges++; |
1363 | 0 | k = 1; |
1364 | 0 | } |
1365 | 0 | } else { |
1366 | 0 | p = &ctx->use_edges[use_list->refs]; |
1367 | 0 | while (n--) { |
1368 | 0 | ref = *p; |
1369 | 0 | if (EXPECTED(_xlat[ref])) { |
1370 | 0 | *edges = _xlat[ref]; |
1371 | 0 | edges++; |
1372 | 0 | k++; |
1373 | 0 | } |
1374 | 0 | p++; |
1375 | 0 | } |
1376 | 0 | } |
1377 | 0 | new_list = &lists[new_ref]; |
1378 | 0 | new_list->refs = use_edges_count; |
1379 | 0 | use_edges_count += k; |
1380 | 0 | new_list->count = k; |
1381 | |
|
1382 | 0 | insn = &ctx->ir_base[i]; |
1383 | 0 | new_insn = &base[new_ref]; |
1384 | |
|
1385 | 0 | new_insn->optx = insn->optx; |
1386 | 0 | n = new_insn->inputs_count; |
1387 | 0 | switch (n) { |
1388 | 0 | case 0: |
1389 | 0 | new_insn->op1 = insn->op1; |
1390 | 0 | new_insn->op2 = insn->op2; |
1391 | 0 | new_insn->op3 = insn->op3; |
1392 | 0 | break; |
1393 | 0 | case 1: |
1394 | 0 | new_insn->op1 = _xlat[insn->op1]; |
1395 | 0 | if (new_insn->op == IR_BEGIN && insn->op2) { |
1396 | 0 | new_insn->op2 = _xlat[insn->op2]; |
1397 | 0 | } else { |
1398 | 0 | new_insn->op2 = insn->op2; |
1399 | 0 | } |
1400 | 0 | new_insn->op3 = insn->op3; |
1401 | 0 | break; |
1402 | 0 | case 2: |
1403 | 0 | new_insn->op1 = _xlat[insn->op1]; |
1404 | 0 | new_insn->op2 = _xlat[insn->op2]; |
1405 | 0 | new_insn->op3 = insn->op3; |
1406 | 0 | #if IR_SCHEDULE_SWAP_OPS |
1407 | | /* Swap operands according to folding rules */ |
1408 | 0 | if (new_insn->op1 < new_insn->op2) { |
1409 | 0 | switch (new_insn->op) { |
1410 | 0 | case IR_EQ: |
1411 | 0 | case IR_NE: |
1412 | 0 | case IR_ORDERED: |
1413 | 0 | case IR_UNORDERED: |
1414 | 0 | case IR_ADD: |
1415 | 0 | case IR_MUL: |
1416 | 0 | case IR_ADD_OV: |
1417 | 0 | case IR_MUL_OV: |
1418 | 0 | case IR_OR: |
1419 | 0 | case IR_AND: |
1420 | 0 | case IR_XOR: |
1421 | 0 | case IR_MIN: |
1422 | 0 | case IR_MAX: |
1423 | 0 | SWAP_REFS(new_insn->op1, new_insn->op2); |
1424 | 0 | break; |
1425 | 0 | case IR_LT: |
1426 | 0 | case IR_GE: |
1427 | 0 | case IR_LE: |
1428 | 0 | case IR_GT: |
1429 | 0 | case IR_ULT: |
1430 | 0 | case IR_UGE: |
1431 | 0 | case IR_ULE: |
1432 | 0 | case IR_UGT: |
1433 | 0 | SWAP_REFS(new_insn->op1, new_insn->op2); |
1434 | 0 | new_insn->op ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */ |
1435 | 0 | break; |
1436 | 0 | } |
1437 | 0 | } |
1438 | 0 | #endif |
1439 | 0 | break; |
1440 | 0 | case 3: |
1441 | 0 | new_insn->op1 = _xlat[insn->op1]; |
1442 | 0 | new_insn->op2 = _xlat[insn->op2]; |
1443 | 0 | new_insn->op3 = _xlat[insn->op3]; |
1444 | 0 | break; |
1445 | 0 | default: |
1446 | 0 | for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) { |
1447 | 0 | *q = _xlat[*p]; |
1448 | 0 | } |
1449 | 0 | break; |
1450 | 0 | } |
1451 | 0 | } |
1452 | | |
1453 | | /* Update list of terminators (IR_OPND_CONTROL_REF) */ |
1454 | 0 | insn = &base[1]; |
1455 | 0 | ref = insn->op1; |
1456 | 0 | if (ref) { |
1457 | 0 | insn->op1 = ref = _xlat[ref]; |
1458 | 0 | while (1) { |
1459 | 0 | insn = &base[ref]; |
1460 | 0 | ref = insn->op3; |
1461 | 0 | if (!ref) { |
1462 | 0 | break; |
1463 | 0 | } |
1464 | 0 | insn->op3 = ref = _xlat[ref]; |
1465 | 0 | } |
1466 | 0 | } |
1467 | |
|
1468 | 0 | if (ctx->binding) { |
1469 | 0 | ir_xlat_binding(ctx, _xlat); |
1470 | 0 | } |
1471 | |
|
1472 | 0 | _xlat -= ctx->consts_count; |
1473 | 0 | ir_mem_free(_xlat); |
1474 | 0 | ir_mem_free(_next); |
1475 | | |
1476 | | /* Switch to new IR buffer */ |
1477 | 0 | ir_mem_free(ctx->ir_base - ctx->consts_limit); |
1478 | 0 | ctx->ir_base = base; |
1479 | 0 | ctx->insns_count = ctx->insns_limit = insns_count; |
1480 | 0 | ctx->consts_count = ctx->consts_limit = consts_count; |
1481 | |
|
1482 | 0 | ir_mem_free(ctx->use_lists); |
1483 | 0 | ir_mem_free(ctx->use_edges); |
1484 | 0 | IR_ASSERT(orig_use_edges_count >= use_edges_count); |
1485 | 0 | ctx->use_lists = lists; |
1486 | 0 | ctx->use_edges = use_edges; |
1487 | 0 | ctx->use_edges_count = use_edges_count; |
1488 | |
|
1489 | 0 | ir_mem_free(ctx->cfg_map); |
1490 | 0 | ctx->cfg_map = map; |
1491 | |
|
1492 | 0 | ctx->prev_ref = _prev; |
1493 | |
|
1494 | 0 | ctx->flags2 |= IR_LINEAR; |
1495 | |
|
1496 | 0 | return 1; |
1497 | 0 | } |
1498 | | |
1499 | | void ir_build_prev_refs(ir_ctx *ctx) |
1500 | 0 | { |
1501 | 0 | uint32_t b; |
1502 | 0 | ir_block *bb; |
1503 | 0 | ir_ref i, n, prev; |
1504 | 0 | ir_insn *insn; |
1505 | |
|
1506 | 0 | ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); |
1507 | 0 | prev = 0; |
1508 | 0 | for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { |
1509 | 0 | IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); |
1510 | 0 | for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) { |
1511 | 0 | ctx->prev_ref[i] = prev; |
1512 | 0 | n = ir_insn_len(insn); |
1513 | 0 | prev = i; |
1514 | 0 | i += n; |
1515 | 0 | insn += n; |
1516 | 0 | } |
1517 | 0 | ctx->prev_ref[i] = prev; |
1518 | 0 | } |
1519 | 0 | } |