/src/php-src/ext/opcache/jit/ir/ir_gcm.c
Line | Count | Source |
1 | | /* |
2 | | * IR - Lightweight JIT Compilation Framework |
3 | | * (GCM - Global Code Motion and Scheduler) |
4 | | * Copyright (C) 2022 Zend by Perforce. |
5 | | * Authors: Dmitry Stogov <dmitry@php.net> |
6 | | * |
7 | | * The GCM algorithm is based on Cliff Click's publication |
8 | | * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95. |
9 | | */ |
10 | | |
11 | | #include "ir.h" |
12 | | #include "ir_private.h" |
13 | | |
14 | 0 | #define IR_GCM_IS_SCHEDULED_EARLY(b) (((int32_t)(b)) < 0) |
15 | 0 | #define IR_GCM_EARLY_BLOCK(b) ((uint32_t)-((int32_t)(b))) |
16 | | |
17 | | #define IR_GCM_SPLIT 1 |
18 | | #define IR_SCHEDULE_SWAP_OPS 1 |
19 | | |
20 | | static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late) |
21 | 0 | { |
22 | 0 | ir_ref n, *p, input; |
23 | 0 | ir_insn *insn; |
24 | 0 | uint32_t dom_depth; |
25 | 0 | uint32_t b, result; |
26 | |
|
27 | 0 | insn = &ctx->ir_base[ref]; |
28 | |
|
29 | 0 | IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR); |
30 | 0 | IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI); |
31 | |
|
32 | 0 | result = 1; |
33 | 0 | dom_depth = 0; |
34 | |
|
35 | 0 | n = insn->inputs_count; |
36 | 0 | for (p = insn->ops + 1; n > 0; p++, n--) { |
37 | 0 | input = *p; |
38 | 0 | if (input > 0) { |
39 | 0 | b = ctx->cfg_map[input]; |
40 | 0 | if (IR_GCM_IS_SCHEDULED_EARLY(b)) { |
41 | 0 | b = IR_GCM_EARLY_BLOCK(b); |
42 | 0 | } else if (!b) { |
43 | 0 | b = ir_gcm_schedule_early(ctx, input, queue_late); |
44 | 0 | } |
45 | 0 | if (dom_depth < ctx->cfg_blocks[b].dom_depth) { |
46 | 0 | dom_depth = ctx->cfg_blocks[b].dom_depth; |
47 | 0 | result = b; |
48 | 0 | } |
49 | 0 | } |
50 | 0 | } |
51 | |
|
52 | 0 | ctx->cfg_map[ref] = IR_GCM_EARLY_BLOCK(result); |
53 | 0 | ir_list_push_unchecked(queue_late, ref); |
54 | 0 | return result; |
55 | 0 | } |
56 | | |
57 | | /* Last Common Ancestor */ |
58 | | static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2) |
59 | 0 | { |
60 | 0 | uint32_t dom_depth; |
61 | |
|
62 | 0 | dom_depth = ctx->cfg_blocks[b2].dom_depth; |
63 | 0 | while (ctx->cfg_blocks[b1].dom_depth > dom_depth) { |
64 | 0 | b1 = ctx->cfg_blocks[b1].dom_parent; |
65 | 0 | } |
66 | 0 | dom_depth = ctx->cfg_blocks[b1].dom_depth; |
67 | 0 | while (ctx->cfg_blocks[b2].dom_depth > dom_depth) { |
68 | 0 | b2 = ctx->cfg_blocks[b2].dom_parent; |
69 | 0 | } |
70 | 0 | while (b1 != b2) { |
71 | 0 | b1 = ctx->cfg_blocks[b1].dom_parent; |
72 | 0 | b2 = ctx->cfg_blocks[b2].dom_parent; |
73 | 0 | } |
74 | 0 | return b2; |
75 | 0 | } |
76 | | |
77 | | static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca) |
78 | 0 | { |
79 | 0 | ir_block *bb = &ctx->cfg_blocks[lca]; |
80 | 0 | uint32_t loop_depth = bb->loop_depth; |
81 | 0 | uint32_t flags, best, b; |
82 | |
|
83 | 0 | if (!loop_depth) { |
84 | 0 | return lca; |
85 | 0 | } |
86 | | |
87 | | #if 0 /* This is not necessary anymore. Conditions may be fused with IF across BBs. */ |
88 | | if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) { |
89 | | ir_use_list *use_list = &ctx->use_lists[ref]; |
90 | | |
91 | | if (use_list->count == 1) { |
92 | | ir_ref use = ctx->use_edges[use_list->refs]; |
93 | | ir_insn *insn = &ctx->ir_base[use]; |
94 | | if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) { |
95 | | /* Don't hoist invariant comparison */ |
96 | | return lca; |
97 | | } |
98 | | } |
99 | | } |
100 | | #endif |
101 | | |
102 | 0 | flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; |
103 | 0 | if ((flags & IR_BB_LOOP_WITH_ENTRY) |
104 | 0 | && !(ctx->binding && ir_binding_find(ctx, ref))) { |
105 | | /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */ |
106 | 0 | return lca; |
107 | 0 | } |
108 | | |
109 | 0 | best = b = lca; |
110 | 0 | do { |
111 | 0 | b = bb->dom_parent; |
112 | 0 | bb = &ctx->cfg_blocks[b]; |
113 | 0 | if (bb->loop_depth < loop_depth) { |
114 | 0 | if (!bb->loop_depth) { |
115 | 0 | #if 1 |
116 | | /* Avoid LICM if LOOP doesn't have a pre-header block */ |
117 | 0 | ir_block *loop_bb = &ctx->cfg_blocks[best]; |
118 | |
|
119 | 0 | if (!(loop_bb->flags & IR_BB_LOOP_HEADER)) { |
120 | 0 | loop_bb = &ctx->cfg_blocks[loop_bb->loop_header]; |
121 | 0 | } |
122 | 0 | if (loop_bb->predecessors_count > 2) { |
123 | 0 | int n = loop_bb->predecessors_count; |
124 | 0 | uint32_t *p = ctx->cfg_edges + loop_bb->predecessors; |
125 | |
|
126 | 0 | while (n && *p != b) { |
127 | 0 | n--; p++; |
128 | 0 | } |
129 | 0 | if (!n) { |
130 | 0 | break; |
131 | 0 | } |
132 | 0 | } |
133 | 0 | #endif |
134 | 0 | best = b; |
135 | 0 | break; |
136 | 0 | } |
137 | 0 | flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; |
138 | 0 | if ((flags & IR_BB_LOOP_WITH_ENTRY) |
139 | 0 | && !(ctx->binding && ir_binding_find(ctx, ref))) { |
140 | 0 | break; |
141 | 0 | } |
142 | 0 | loop_depth = bb->loop_depth; |
143 | 0 | best = b; |
144 | 0 | } |
145 | 0 | } while (b != ctx->cfg_map[ref]); |
146 | |
|
147 | 0 | return best; |
148 | 0 | } |
149 | | |
150 | | #if IR_GCM_SPLIT |
151 | | /* Partially Dead Code Elimination through splitting the node and sunking the clones |
152 | | * |
153 | | * This code is based on the Benedikt Meurer's idea first implemented in V8. |
154 | | * See: https://codereview.chromium.org/899433005 |
155 | | */ |
156 | | |
157 | | typedef struct _ir_gcm_split_data { |
158 | | ir_sparse_set totally_useful; |
159 | | ir_list worklist; |
160 | | } ir_gcm_split_data; |
161 | | |
162 | | static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data) |
163 | 0 | { |
164 | 0 | uint32_t *p, i, n = bb->predecessors_count; |
165 | |
|
166 | 0 | IR_ASSERT(n > 0); |
167 | 0 | p = ctx->cfg_edges + bb->predecessors; |
168 | 0 | do { |
169 | 0 | i = *p; |
170 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
171 | 0 | ir_list_push(&data->worklist, i); |
172 | 0 | } |
173 | 0 | p++; |
174 | 0 | n--; |
175 | 0 | } while (n > 0); |
176 | 0 | } |
177 | | |
178 | | static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data) |
179 | 0 | { |
180 | 0 | uint32_t *p, i, n = bb->successors_count; |
181 | |
|
182 | 0 | if (n <= 1) { |
183 | 0 | IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors])); |
184 | 0 | return 1; |
185 | 0 | } |
186 | | |
187 | 0 | p = ctx->cfg_edges + bb->successors; |
188 | 0 | do { |
189 | 0 | i = *p; |
190 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
191 | 0 | return 0; |
192 | 0 | } |
193 | 0 | p++; |
194 | 0 | n--; |
195 | 0 | } while (n > 0); |
196 | | |
197 | 0 | return 1; |
198 | 0 | } |
199 | | |
200 | | static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) |
201 | 0 | { |
202 | 0 | ir_use_list *use_list; |
203 | 0 | ir_insn *insn; |
204 | 0 | ir_ref n, *p, use; |
205 | 0 | uint32_t i; |
206 | 0 | ir_gcm_split_data *data = ctx->data; |
207 | |
|
208 | 0 | IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count); |
209 | | |
210 | | /* 1. Find a set of blocks where the node is TOTALLY_USEFUL (not PARTIALLY_DEAD) |
211 | | * 1.1. Collect the blocks where the node is really USED. |
212 | | */ |
213 | 0 | ir_sparse_set_clear(&data->totally_useful); |
214 | |
|
215 | 0 | use_list = &ctx->use_lists[ref]; |
216 | 0 | n = use_list->count; |
217 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
218 | 0 | use = *p; |
219 | 0 | insn = &ctx->ir_base[use]; |
220 | 0 | if (insn->op == IR_PHI) { |
221 | 0 | ir_ref *p = insn->ops + 2; /* PHI data inputs */ |
222 | 0 | ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ |
223 | 0 | ir_ref n = insn->inputs_count - 1; |
224 | |
|
225 | 0 | for (;n > 0; p++, q++, n--) { |
226 | 0 | if (*p == ref) { |
227 | 0 | i = ctx->cfg_map[*q]; |
228 | 0 | IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count); |
229 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
230 | 0 | if (i == b) return 0; /* node is totally-useful in the scheduled block */ |
231 | 0 | ir_sparse_set_add(&data->totally_useful, i); |
232 | 0 | } |
233 | 0 | } |
234 | 0 | } |
235 | 0 | } else { |
236 | 0 | i = ctx->cfg_map[use]; |
237 | 0 | if (!i) { |
238 | 0 | continue; |
239 | 0 | } |
240 | 0 | IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count); |
241 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
242 | 0 | if (i == b) return 0; /* node is totally-useful in the scheduled block */ |
243 | 0 | ir_sparse_set_add(&data->totally_useful, i); |
244 | 0 | } |
245 | 0 | } |
246 | 0 | } |
247 | | |
248 | 0 | #ifdef IR_DEBUG |
249 | 0 | if (ctx->flags & IR_DEBUG_GCM_SPLIT) { |
250 | 0 | bool first = 1; |
251 | 0 | fprintf(stderr, "*** Split partially dead node d_%d scheduled to BB%d\n", ref, b); |
252 | 0 | IR_SPARSE_SET_FOREACH(&data->totally_useful, i) { |
253 | 0 | if (first) { |
254 | 0 | fprintf(stderr, "\td_%d is USED in [BB%d", ref, i); |
255 | 0 | first = 0; |
256 | 0 | } else { |
257 | 0 | fprintf(stderr, ", BB%d", i); |
258 | 0 | } |
259 | 0 | } IR_SPARSE_SET_FOREACH_END(); |
260 | 0 | fprintf(stderr, "]\n"); |
261 | 0 | } |
262 | 0 | #endif |
263 | | |
264 | | /* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and |
265 | | * add them into TOTALLY_USEFUL set if all of their sucessors are already there. |
266 | | */ |
267 | 0 | IR_SPARSE_SET_FOREACH(&data->totally_useful, i) { |
268 | 0 | _push_predecessors(ctx, &ctx->cfg_blocks[i], data); |
269 | 0 | } IR_SPARSE_SET_FOREACH_END(); |
270 | |
|
271 | 0 | while (ir_list_len(&data->worklist)) { |
272 | 0 | i = ir_list_pop(&data->worklist); |
273 | 0 | if (!ir_sparse_set_in(&data->totally_useful, i)) { |
274 | 0 | ir_block *bb = &ctx->cfg_blocks[i]; |
275 | |
|
276 | 0 | if (_check_successors(ctx, bb, data)) { |
277 | 0 | if (i == b) { |
278 | | /* node is TOTALLY_USEFUL in the scheduled block */ |
279 | 0 | ir_list_clear(&data->worklist); |
280 | 0 | return 0; |
281 | 0 | } |
282 | 0 | ir_sparse_set_add(&data->totally_useful, i); |
283 | 0 | _push_predecessors(ctx, bb, data); |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | 0 | IR_ASSERT(!ir_sparse_set_in(&data->totally_useful, b)); |
289 | |
|
290 | 0 | #ifdef IR_DEBUG |
291 | 0 | if (ctx->flags & IR_DEBUG_GCM_SPLIT) { |
292 | 0 | bool first = 1; |
293 | 0 | IR_SPARSE_SET_FOREACH(&data->totally_useful, i) { |
294 | 0 | if (first) { |
295 | 0 | fprintf(stderr, "\td_%d is TOTALLY_USEFUL in [BB%d", ref, i); |
296 | 0 | first = 0; |
297 | 0 | } else { |
298 | 0 | fprintf(stderr, ", BB%d", i); |
299 | 0 | } |
300 | 0 | } IR_SPARSE_SET_FOREACH_END(); |
301 | 0 | fprintf(stderr, "]\n"); |
302 | 0 | } |
303 | 0 | #endif |
304 | | |
305 | | /* 2. Split the USEs into partitions */ |
306 | 0 | use_list = &ctx->use_lists[ref]; |
307 | 0 | ir_hashtab hash; |
308 | 0 | uint32_t j, clone, clones_count = 0, uses_count = 0; |
309 | 0 | struct { |
310 | 0 | ir_ref ref; |
311 | 0 | uint32_t block; |
312 | 0 | uint32_t use_count; |
313 | 0 | uint32_t use; |
314 | 0 | } *clones = ir_mem_malloc(sizeof(*clones) * use_list->count); |
315 | 0 | struct { |
316 | 0 | ir_ref ref; |
317 | 0 | uint32_t block; |
318 | 0 | uint32_t next; |
319 | 0 | } *uses = ir_mem_malloc(sizeof(*uses) * use_list->count); |
320 | |
|
321 | 0 | ir_hashtab_init(&hash, use_list->count); |
322 | 0 | n = use_list->count; |
323 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { |
324 | 0 | use = *p; |
325 | 0 | insn = &ctx->ir_base[use]; |
326 | 0 | if (insn->op == IR_PHI) { |
327 | 0 | ir_ref *p = insn->ops + 2; /* PHI data inputs */ |
328 | 0 | ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ |
329 | 0 | ir_ref n = insn->inputs_count - 1; |
330 | | |
331 | | /* PHIs must be processed once */ |
332 | 0 | if (ir_hashtab_find(&hash, -use) != (ir_ref)IR_INVALID_VAL) { |
333 | 0 | continue; |
334 | 0 | } |
335 | 0 | ir_hashtab_add(&hash, -use, IR_NULL); |
336 | 0 | for (;n > 0; p++, q++, n--) { |
337 | 0 | if (*p == ref) { |
338 | 0 | j = i = ctx->cfg_map[*q]; |
339 | 0 | while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { |
340 | 0 | j = ctx->cfg_blocks[j].idom; |
341 | 0 | } |
342 | 0 | clone = ir_hashtab_find(&hash, j); |
343 | 0 | if (clone == IR_INVALID_VAL) { |
344 | 0 | clone = clones_count++; |
345 | 0 | ir_hashtab_add(&hash, j, clone); |
346 | 0 | clones[clone].block = j; |
347 | 0 | clones[clone].use_count = 0; |
348 | 0 | clones[clone].use = (uint32_t)-1; |
349 | 0 | } |
350 | 0 | uses[uses_count].ref = use; |
351 | 0 | uses[uses_count].block = i; |
352 | 0 | uses[uses_count].next = clones[clone].use; |
353 | 0 | clones[clone].use_count++; |
354 | 0 | clones[clone].use = uses_count++; |
355 | 0 | } |
356 | 0 | } |
357 | 0 | } else { |
358 | 0 | j = i = ctx->cfg_map[use]; |
359 | 0 | if (i) { |
360 | 0 | IR_ASSERT(i > 0); |
361 | 0 | while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { |
362 | 0 | j = ctx->cfg_blocks[j].idom; |
363 | 0 | } |
364 | 0 | } |
365 | 0 | clone = ir_hashtab_find(&hash, j); |
366 | 0 | if (clone == IR_INVALID_VAL) { |
367 | 0 | clone = clones_count++; |
368 | 0 | ir_hashtab_add(&hash, j, clone); |
369 | 0 | clones[clone].block = j; |
370 | 0 | clones[clone].use_count = 0; |
371 | 0 | clones[clone].use = -1; |
372 | 0 | } |
373 | 0 | uses[uses_count].ref = use; |
374 | 0 | uses[uses_count].block = i; |
375 | 0 | uses[uses_count].next = clones[clone].use; |
376 | 0 | clones[clone].use_count++; |
377 | 0 | clones[clone].use = uses_count++; |
378 | 0 | } |
379 | 0 | } |
380 | | |
381 | 0 | #ifdef IR_DEBUG |
382 | 0 | if (ctx->flags & IR_DEBUG_GCM_SPLIT) { |
383 | 0 | for (i = 0; i < clones_count; i++) { |
384 | 0 | uint32_t u = clones[i].use; |
385 | |
|
386 | 0 | fprintf(stderr, "\tCLONE #%d in BB%d USES(%d)=[d_%d/BB%d", |
387 | 0 | i, clones[i].block, clones[i].use_count, uses[u].ref, uses[u].block); |
388 | 0 | u = uses[u].next; |
389 | 0 | while (u != (uint32_t)-1) { |
390 | 0 | fprintf(stderr, ", d_%d/BB%d", uses[u].ref, uses[u].block); |
391 | 0 | u = uses[u].next; |
392 | 0 | } |
393 | 0 | fprintf(stderr, "]\n"); |
394 | 0 | } |
395 | 0 | } |
396 | 0 | #endif |
397 | | |
398 | | /* Create Clones */ |
399 | 0 | insn = &ctx->ir_base[ref]; |
400 | 0 | clones[0].ref = ref; |
401 | 0 | for (i = 1; i < clones_count; i++) { |
402 | 0 | clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3); |
403 | 0 | insn = &ctx->ir_base[ref]; |
404 | | /* Depending on the flags in IR_OPS, these can be references or data. */ |
405 | 0 | if (insn->op1 > 0 && insn->inputs_count >= 1) ir_use_list_add(ctx, insn->op1, clone); |
406 | 0 | if (insn->op2 > 0 && insn->inputs_count >= 2) ir_use_list_add(ctx, insn->op2, clone); |
407 | 0 | if (insn->op3 > 0 && insn->inputs_count >= 3) ir_use_list_add(ctx, insn->op3, clone); |
408 | 0 | } |
409 | | |
410 | | /* Reconstruct IR: Update DEF->USE lists, CFG mapping and etc */ |
411 | 0 | ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list)); |
412 | 0 | ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t)); |
413 | 0 | n = ctx->use_lists[ref].refs; |
414 | 0 | for (i = 0; i < clones_count; i++) { |
415 | 0 | clone = clones[i].ref; |
416 | 0 | if (clones[i].use_count == 1 |
417 | 0 | && ctx->cfg_blocks[clones[i].block].loop_depth >= ctx->cfg_blocks[uses[clones[i].use].block].loop_depth) { |
418 | | /* TOTALLY_USEFUL block may be a head of a diamond above the real usage. |
419 | | * Sink it down to the real usage block. |
420 | | * Clones with few uses will be sunk into the LCA block. |
421 | | */ |
422 | 0 | clones[i].block = uses[clones[i].use].block; |
423 | 0 | } |
424 | 0 | ctx->cfg_map[clone] = clones[i].block; |
425 | 0 | ctx->use_lists[clone].count = clones[i].use_count; |
426 | 0 | ctx->use_lists[clone].refs = n; |
427 | |
|
428 | 0 | uint32_t u = clones[i].use; |
429 | 0 | while (u != (uint32_t)-1) { |
430 | 0 | use = uses[u].ref; |
431 | 0 | ctx->use_edges[n++] = use; |
432 | 0 | u = uses[u].next; |
433 | 0 | if (i > 0) { |
434 | | /* replace inputs */ |
435 | 0 | ir_insn *insn = &ctx->ir_base[use]; |
436 | 0 | ir_ref k, l = insn->inputs_count; |
437 | |
|
438 | 0 | if (insn->op == IR_PHI) { |
439 | 0 | for (k = 1; k <= l; k++) { |
440 | 0 | if (ir_insn_op(insn, k) == ref) { |
441 | 0 | j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)]; |
442 | 0 | if (j != clones[i].block) { |
443 | 0 | uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth; |
444 | 0 | while (ctx->cfg_blocks[j].dom_depth > dom_depth) { |
445 | 0 | j = ctx->cfg_blocks[j].dom_parent; |
446 | 0 | } |
447 | 0 | if (j != clones[i].block) { |
448 | 0 | continue; |
449 | 0 | } |
450 | 0 | } |
451 | 0 | ir_insn_set_op(insn, k, clone); |
452 | 0 | break; |
453 | 0 | } |
454 | 0 | } |
455 | 0 | } else { |
456 | 0 | for (k = 1; k <= l; k++) { |
457 | 0 | if (ir_insn_op(insn, k) == ref) { |
458 | 0 | ir_insn_set_op(insn, k, clone); |
459 | 0 | break; |
460 | 0 | } |
461 | 0 | } |
462 | 0 | } |
463 | 0 | } |
464 | 0 | } |
465 | 0 | } |
466 | |
|
467 | 0 | ir_mem_free(uses); |
468 | 0 | ir_mem_free(clones); |
469 | 0 | ir_hashtab_free(&hash); |
470 | |
|
471 | 0 | #ifdef IR_DEBUG |
472 | 0 | if (ctx->flags & IR_DEBUG_GCM_SPLIT) { |
473 | 0 | ir_check(ctx); |
474 | 0 | } |
475 | 0 | #endif |
476 | |
|
477 | 0 | return 1; |
478 | 0 | } |
479 | | #endif |
480 | | |
481 | | #ifdef IR_DEBUG |
482 | | static bool ir_gcm_dominates(ir_ctx *ctx, uint32_t b1, uint32_t b2) |
483 | 0 | { |
484 | 0 | uint32_t b1_depth = ctx->cfg_blocks[b1].dom_depth; |
485 | 0 | const ir_block *bb2 = &ctx->cfg_blocks[b2]; |
486 | |
|
487 | 0 | while (bb2->dom_depth > b1_depth) { |
488 | 0 | b2 = bb2->dom_parent; |
489 | 0 | bb2 = &ctx->cfg_blocks[b2]; |
490 | 0 | } |
491 | 0 | return b1 == b2; |
492 | 0 | } |
493 | | #endif |
494 | | |
495 | | static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b) |
496 | 0 | { |
497 | 0 | ir_ref n, use; |
498 | 0 | uint32_t lca = 0; |
499 | |
|
500 | 0 | IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR); |
501 | 0 | IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI); |
502 | |
|
503 | 0 | IR_ASSERT(IR_GCM_IS_SCHEDULED_EARLY(b)); |
504 | 0 | b = IR_GCM_EARLY_BLOCK(b); |
505 | 0 | ctx->cfg_map[ref] = b; |
506 | |
|
507 | 0 | for (n = 0; n < ctx->use_lists[ref].count; n++) { |
508 | 0 | use = ctx->use_edges[ctx->use_lists[ref].refs + n]; |
509 | 0 | b = ctx->cfg_map[use]; |
510 | 0 | if (IR_GCM_IS_SCHEDULED_EARLY(b)) { |
511 | 0 | ir_gcm_schedule_late(ctx, use, b); |
512 | 0 | b = ctx->cfg_map[use]; |
513 | 0 | IR_ASSERT(b != 0); |
514 | 0 | } else if (!b) { |
515 | 0 | continue; |
516 | 0 | } else if (ctx->ir_base[use].op == IR_PHI) { |
517 | 0 | ir_insn *insn = &ctx->ir_base[use]; |
518 | 0 | ir_ref *p = insn->ops + 2; /* PHI data inputs */ |
519 | 0 | ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ |
520 | 0 | ir_ref n = insn->inputs_count - 1; |
521 | |
|
522 | 0 | for (;n > 0; p++, q++, n--) { |
523 | 0 | if (*p == ref) { |
524 | 0 | b = ctx->cfg_map[*q]; |
525 | 0 | lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); |
526 | 0 | } |
527 | 0 | } |
528 | 0 | continue; |
529 | 0 | } |
530 | 0 | lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); |
531 | 0 | } |
532 | | |
533 | 0 | IR_ASSERT(lca != 0 && "No Common Ancestor"); |
534 | 0 | IR_ASSERT(ir_gcm_dominates(ctx, ctx->cfg_map[ref], lca) && "Early placement doesn't dominate the late"); |
535 | |
|
536 | 0 | #if IR_GCM_SPLIT |
537 | 0 | if (ctx->use_lists[ref].count > 1 |
538 | 0 | && ir_split_partially_dead_node(ctx, ref, lca)) { |
539 | 0 | return; |
540 | 0 | } |
541 | 0 | #endif |
542 | | |
543 | 0 | if (lca != ctx->cfg_map[ref]) { |
544 | 0 | b = ir_gcm_select_best_block(ctx, ref, lca); |
545 | |
|
546 | 0 | ctx->cfg_map[ref] = b; |
547 | | |
548 | | /* OVERFLOW is a projection of ADD/SUB/MUL_OV and must be scheduled into the same block */ |
549 | 0 | if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) { |
550 | 0 | ir_use_list *use_list = &ctx->use_lists[ref]; |
551 | 0 | ir_ref n, *p, use; |
552 | |
|
553 | 0 | for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) { |
554 | 0 | use = *p; |
555 | 0 | if (ctx->ir_base[use].op == IR_OVERFLOW) { |
556 | 0 | ctx->cfg_map[use] = b; |
557 | 0 | break; |
558 | 0 | } |
559 | 0 | } |
560 | 0 | } |
561 | 0 | } |
562 | 0 | } |
563 | | |
564 | | int ir_gcm(ir_ctx *ctx) |
565 | 0 | { |
566 | 0 | ir_ref k, n, *p, ref; |
567 | 0 | ir_block *bb; |
568 | 0 | ir_list queue_early; |
569 | 0 | ir_list queue_late; |
570 | 0 | uint32_t *_blocks, b; |
571 | 0 | ir_insn *insn, *use_insn; |
572 | 0 | ir_use_list *use_list; |
573 | |
|
574 | 0 | IR_ASSERT(ctx->cfg_map); |
575 | 0 | _blocks = ctx->cfg_map; |
576 | |
|
577 | 0 | ir_list_init(&queue_early, ctx->insns_count); |
578 | |
|
579 | 0 | if (ctx->cfg_blocks_count == 1) { |
580 | 0 | ref = ctx->cfg_blocks[1].end; |
581 | 0 | do { |
582 | 0 | insn = &ctx->ir_base[ref]; |
583 | 0 | _blocks[ref] = 1; /* pin to block */ |
584 | 0 | if (insn->inputs_count > 1) { |
585 | | /* insn has input data edges */ |
586 | 0 | ir_list_push_unchecked(&queue_early, ref); |
587 | 0 | } |
588 | 0 | ref = insn->op1; /* control predecessor */ |
589 | 0 | } while (ref != 1); /* IR_START */ |
590 | 0 | _blocks[1] = 1; /* pin to block */ |
591 | |
|
592 | 0 | use_list = &ctx->use_lists[1]; |
593 | 0 | n = use_list->count; |
594 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { |
595 | 0 | ref = *p; |
596 | 0 | use_insn = &ctx->ir_base[ref]; |
597 | 0 | if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) { |
598 | 0 | ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR; |
599 | 0 | _blocks[ref] = 1; /* pin to block */ |
600 | 0 | } |
601 | 0 | } |
602 | | |
603 | | /* Place all live nodes to the first block */ |
604 | 0 | while (ir_list_len(&queue_early)) { |
605 | 0 | ref = ir_list_pop(&queue_early); |
606 | 0 | insn = &ctx->ir_base[ref]; |
607 | 0 | n = insn->inputs_count; |
608 | 0 | for (p = insn->ops + 1; n > 0; p++, n--) { |
609 | 0 | ref = *p; |
610 | 0 | if (ref > 0 && _blocks[ref] == 0) { |
611 | 0 | _blocks[ref] = 1; |
612 | 0 | ir_list_push_unchecked(&queue_early, ref); |
613 | 0 | } |
614 | 0 | } |
615 | 0 | } |
616 | |
|
617 | 0 | ir_list_free(&queue_early); |
618 | |
|
619 | 0 | return 1; |
620 | 0 | } |
621 | | |
622 | 0 | ir_list_init(&queue_late, ctx->insns_count); |
623 | | |
624 | | /* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */ |
625 | 0 | b = ctx->cfg_blocks_count; |
626 | 0 | for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) { |
627 | 0 | IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); |
628 | 0 | ref = bb->end; |
629 | | |
630 | | /* process the last instruction of the block */ |
631 | 0 | insn = &ctx->ir_base[ref]; |
632 | 0 | _blocks[ref] = b; /* pin to block */ |
633 | 0 | if (insn->inputs_count > 1) { |
634 | | /* insn has input data edges */ |
635 | 0 | ir_list_push_unchecked(&queue_early, ref); |
636 | 0 | } |
637 | 0 | ref = insn->op1; /* control predecessor */ |
638 | |
|
639 | 0 | while (ref != bb->start) { |
640 | 0 | insn = &ctx->ir_base[ref]; |
641 | 0 | _blocks[ref] = b; /* pin to block */ |
642 | 0 | if (insn->inputs_count > 1) { |
643 | | /* insn has input data edges */ |
644 | 0 | ir_list_push_unchecked(&queue_early, ref); |
645 | 0 | } |
646 | 0 | if (insn->type != IR_VOID) { |
647 | 0 | IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM); |
648 | 0 | } |
649 | 0 | ref = insn->op1; /* control predecessor */ |
650 | 0 | } |
651 | | |
652 | | /* process the first instruction of the block */ |
653 | 0 | _blocks[ref] = b; /* pin to block */ |
654 | |
|
655 | 0 | use_list = &ctx->use_lists[ref]; |
656 | 0 | n = use_list->count; |
657 | 0 | if (n > 1) { |
658 | 0 | for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { |
659 | 0 | ref = *p; |
660 | 0 | use_insn = &ctx->ir_base[ref]; |
661 | 0 | if (use_insn->op == IR_PHI || use_insn->op == IR_PI) { |
662 | 0 | bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI; |
663 | 0 | if (EXPECTED(ctx->use_lists[ref].count != 0)) { |
664 | 0 | _blocks[ref] = b; /* pin to block */ |
665 | 0 | ir_list_push_unchecked(&queue_early, ref); |
666 | 0 | } |
667 | 0 | } else if (use_insn->op == IR_PARAM) { |
668 | 0 | bb->flags |= IR_BB_HAS_PARAM; |
669 | 0 | _blocks[ref] = b; /* pin to block */ |
670 | 0 | } else if (use_insn->op == IR_VAR) { |
671 | 0 | bb->flags |= IR_BB_HAS_VAR; |
672 | 0 | _blocks[ref] = b; /* pin to block */ |
673 | 0 | } |
674 | 0 | } |
675 | 0 | } |
676 | 0 | } |
677 | |
|
678 | 0 | n = ir_list_len(&queue_early); |
679 | 0 | while (n > 0) { |
680 | 0 | n--; |
681 | 0 | ref = ir_list_at(&queue_early, n); |
682 | 0 | insn = &ctx->ir_base[ref]; |
683 | 0 | k = insn->inputs_count - 1; |
684 | 0 | for (p = insn->ops + 2; k > 0; p++, k--) { |
685 | 0 | ref = *p; |
686 | 0 | if (ref > 0 && _blocks[ref] == 0) { |
687 | 0 | ir_gcm_schedule_early(ctx, ref, &queue_late); |
688 | 0 | } |
689 | 0 | } |
690 | 0 | } |
691 | |
|
692 | 0 | #ifdef IR_DEBUG |
693 | 0 | if (ctx->flags & IR_DEBUG_GCM) { |
694 | 0 | fprintf(stderr, "GCM Schedule Early\n"); |
695 | 0 | for (n = 1; n < ctx->insns_count; n++) { |
696 | 0 | fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]); |
697 | 0 | } |
698 | 0 | } |
699 | 0 | #endif |
700 | |
|
701 | 0 | #if IR_GCM_SPLIT |
702 | 0 | ir_gcm_split_data data; |
703 | |
|
704 | 0 | ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1); |
705 | 0 | ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1); |
706 | 0 | ctx->data = &data; |
707 | 0 | #endif |
708 | |
|
709 | 0 | n = ir_list_len(&queue_late); |
710 | 0 | while (n > 0) { |
711 | 0 | n--; |
712 | 0 | ref = ir_list_at(&queue_late, n); |
713 | 0 | b = ctx->cfg_map[ref]; |
714 | 0 | if (IR_GCM_IS_SCHEDULED_EARLY(b)) { |
715 | 0 | ir_gcm_schedule_late(ctx, ref, b); |
716 | 0 | } |
717 | 0 | } |
718 | |
|
719 | 0 | #if IR_GCM_SPLIT |
720 | 0 | ir_list_free(&data.worklist); |
721 | 0 | ir_sparse_set_free(&data.totally_useful); |
722 | 0 | ctx->data = NULL; |
723 | 0 | #endif |
724 | |
|
725 | 0 | ir_list_free(&queue_early); |
726 | 0 | ir_list_free(&queue_late); |
727 | |
|
728 | 0 | #ifdef IR_DEBUG |
729 | 0 | if (ctx->flags & IR_DEBUG_GCM) { |
730 | 0 | fprintf(stderr, "GCM Schedule Late\n"); |
731 | 0 | for (n = 1; n < ctx->insns_count; n++) { |
732 | 0 | fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]); |
733 | 0 | } |
734 | 0 | } |
735 | 0 | #endif |
736 | |
|
737 | 0 | return 1; |
738 | 0 | } |
739 | | |
740 | | static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat) |
741 | 0 | { |
742 | 0 | uint32_t n1, n2, pos; |
743 | 0 | ir_ref key; |
744 | 0 | ir_hashtab_bucket *b1, *b2; |
745 | 0 | ir_hashtab *binding = ctx->binding; |
746 | 0 | uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask); |
747 | |
|
748 | 0 | memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t)); |
749 | 0 | n1 = binding->count; |
750 | 0 | n2 = 0; |
751 | 0 | pos = 0; |
752 | 0 | b1 = binding->data; |
753 | 0 | b2 = binding->data; |
754 | 0 | while (n1 > 0) { |
755 | 0 | key = b1->key; |
756 | 0 | IR_ASSERT(key < ctx->insns_count); |
757 | 0 | if (_xlat[key]) { |
758 | 0 | key = _xlat[key]; |
759 | 0 | b2->key = key; |
760 | 0 | if (b1->val > 0) { |
761 | 0 | IR_ASSERT(_xlat[b1->val]); |
762 | 0 | b2->val = _xlat[b1->val]; |
763 | 0 | } else { |
764 | 0 | b2->val = b1->val; |
765 | 0 | } |
766 | 0 | key |= binding->mask; |
767 | 0 | b2->next = ((uint32_t*)binding->data)[key]; |
768 | 0 | ((uint32_t*)binding->data)[key] = pos; |
769 | 0 | pos += sizeof(ir_hashtab_bucket); |
770 | 0 | b2++; |
771 | 0 | n2++; |
772 | 0 | } |
773 | 0 | b1++; |
774 | 0 | n1--; |
775 | 0 | } |
776 | 0 | binding->count = n2; |
777 | 0 | } |
778 | | |
779 | | IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref) |
780 | 0 | { |
781 | 0 | if (!_xlat[ref]) { |
782 | 0 | _xlat[ref] = ref; /* this is only a "used constant" marker */ |
783 | 0 | return 1; |
784 | 0 | } |
785 | 0 | return 0; |
786 | 0 | } |
787 | | |
788 | | IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref start) |
789 | 0 | { |
790 | 0 | ir_insn *insn = &ctx->ir_base[start]; |
791 | 0 | uint32_t n = insn->inputs_count; |
792 | 0 | ir_ref *p = insn->ops + 1; |
793 | |
|
794 | 0 | if (n == 1) { |
795 | 0 | return ctx->cfg_map[*p] < b; |
796 | 0 | } else { |
797 | 0 | IR_ASSERT(n > 1); |
798 | 0 | for (; n > 0; p++, n--) { |
799 | 0 | ir_ref input = *p; |
800 | |
|
801 | 0 | if (!IR_IS_CONST_REF(input)) { |
802 | 0 | uint32_t input_b = ctx->cfg_map[input]; |
803 | |
|
804 | 0 | if (input_b < b) { |
805 | | /* ordered */ |
806 | 0 | } else if ((bb->flags & IR_BB_LOOP_HEADER) |
807 | 0 | && (input_b == b || ctx->cfg_blocks[input_b].loop_header == b)) { |
808 | | /* back-edge of reducible loop */ |
809 | 0 | } else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) |
810 | 0 | && (ctx->cfg_blocks[input_b].loop_header == bb->loop_header)) { |
811 | | /* closing edge of irreducible loop */ |
812 | 0 | } else { |
813 | 0 | return 0; |
814 | 0 | } |
815 | 0 | } |
816 | 0 | } |
817 | 0 | return 1; |
818 | 0 | } |
819 | 0 | } |
820 | | |
821 | | static IR_NEVER_INLINE void ir_fix_bb_order(ir_ctx *ctx, ir_ref *_prev, ir_ref *_next) |
822 | 0 | { |
823 | 0 | uint32_t b, succ, count, *q, *xlat; |
824 | 0 | ir_block *bb; |
825 | 0 | ir_ref ref, n, prev; |
826 | 0 | ir_worklist worklist; |
827 | 0 | ir_block *new_blocks; |
828 | |
|
829 | | #if 0 |
830 | | for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { |
831 | | if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) { |
832 | | goto fix; |
833 | | } |
834 | | } |
835 | | return; |
836 | | |
837 | | fix: |
838 | | #endif |
839 | 0 | count = ctx->cfg_blocks_count + 1; |
840 | 0 | new_blocks = ir_mem_malloc(count * sizeof(ir_block)); |
841 | 0 | xlat = ir_mem_malloc(count * sizeof(uint32_t)); |
842 | 0 | ir_worklist_init(&worklist, count); |
843 | 0 | ir_worklist_push(&worklist, 1); |
844 | 0 | while (ir_worklist_len(&worklist) != 0) { |
845 | 0 | next: |
846 | 0 | b = ir_worklist_peek(&worklist); |
847 | 0 | bb = &ctx->cfg_blocks[b]; |
848 | 0 | n = bb->successors_count; |
849 | 0 | if (n == 1) { |
850 | 0 | succ = ctx->cfg_edges[bb->successors]; |
851 | 0 | if (ir_worklist_push(&worklist, succ)) { |
852 | 0 | goto next; |
853 | 0 | } |
854 | 0 | } else if (n > 1) { |
855 | 0 | uint32_t best = 0; |
856 | 0 | uint32_t best_loop_depth = 0; |
857 | |
|
858 | 0 | q = ctx->cfg_edges + bb->successors + n; |
859 | 0 | do { |
860 | 0 | q--; |
861 | 0 | succ = *q; |
862 | 0 | if (ir_bitset_in(worklist.visited, succ)) { |
863 | | /* already processed */ |
864 | 0 | } else if ((ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER) |
865 | 0 | && (succ == b || ctx->cfg_blocks[b].loop_header == succ)) { |
866 | | /* back-edge of reducible loop */ |
867 | 0 | } else if ((ctx->cfg_blocks[succ].flags & IR_BB_IRREDUCIBLE_LOOP) |
868 | 0 | && (ctx->cfg_blocks[succ].loop_header == ctx->cfg_blocks[b].loop_header)) { |
869 | | /* closing edge of irreducible loop */ |
870 | 0 | } else if (!best) { |
871 | 0 | best = succ; |
872 | 0 | best_loop_depth = ctx->cfg_blocks[best].loop_depth; |
873 | 0 | } else if (ctx->cfg_blocks[succ].loop_depth < best_loop_depth) { |
874 | | /* prefer deeper loop */ |
875 | 0 | best = succ; |
876 | 0 | best_loop_depth = ctx->cfg_blocks[best].loop_depth; |
877 | 0 | } |
878 | 0 | n--; |
879 | 0 | } while (n > 0); |
880 | 0 | if (best) { |
881 | 0 | ir_worklist_push(&worklist, best); |
882 | 0 | goto next; |
883 | 0 | } |
884 | 0 | } |
885 | 0 | ir_worklist_pop(&worklist); |
886 | 0 | count--; |
887 | 0 | new_blocks[count] = *bb; |
888 | 0 | xlat[b] = count; |
889 | 0 | } |
890 | 0 | IR_ASSERT(count == 1); |
891 | 0 | xlat[0] = 0; |
892 | 0 | ir_worklist_free(&worklist); |
893 | |
|
894 | 0 | prev = 0; |
895 | 0 | for (b = 1, bb = new_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { |
896 | 0 | bb->idom = xlat[bb->idom]; |
897 | 0 | bb->loop_header = xlat[bb->loop_header]; |
898 | 0 | n = bb->successors_count; |
899 | 0 | if (n > 0) { |
900 | 0 | for (q = ctx->cfg_edges + bb->successors; n > 0; q++, n--) { |
901 | 0 | *q = xlat[*q]; |
902 | 0 | } |
903 | 0 | } |
904 | 0 | n = bb->predecessors_count; |
905 | 0 | if (n > 0) { |
906 | 0 | for (q = ctx->cfg_edges + bb->predecessors; n > 0; q++, n--) { |
907 | 0 | *q = xlat[*q]; |
908 | 0 | } |
909 | 0 | } |
910 | 0 | _next[prev] = bb->start; |
911 | 0 | _prev[bb->start] = prev; |
912 | 0 | prev = bb->end; |
913 | 0 | } |
914 | 0 | _next[0] = 0; |
915 | 0 | _next[prev] = 0; |
916 | |
|
917 | 0 | for (ref = 2; ref < ctx->insns_count; ref++) { |
918 | 0 | ctx->cfg_map[ref] = xlat[ctx->cfg_map[ref]]; |
919 | 0 | } |
920 | 0 | ir_mem_free(xlat); |
921 | |
|
922 | 0 | ir_mem_free(ctx->cfg_blocks); |
923 | 0 | ctx->cfg_blocks = new_blocks; |
924 | 0 | } |
925 | | |
926 | | int ir_schedule(ir_ctx *ctx) |
927 | 0 | { |
928 | 0 | ir_ctx new_ctx; |
929 | 0 | ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count; |
930 | 0 | ir_ref *_xlat; |
931 | 0 | ir_ref *edges; |
932 | 0 | ir_ref prev_b_end; |
933 | 0 | uint32_t b; |
934 | 0 | uint32_t *_blocks = ctx->cfg_map; |
935 | 0 | ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); |
936 | 0 | ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); |
937 | 0 | ir_block *bb; |
938 | 0 | ir_insn *insn, *new_insn; |
939 | 0 | ir_use_list *lists, *use_list, *new_list; |
940 | 0 | bool bad_bb_order = 0; |
941 | | |
942 | | |
943 | | /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */ |
944 | 0 | IR_ASSERT(_blocks[1] == 1); |
945 | | |
946 | | /* link BB boundaries */ |
947 | 0 | _prev[1] = 0; |
948 | 0 | prev_b_end = ctx->cfg_blocks[1].end; |
949 | 0 | _next[1] = prev_b_end; |
950 | 0 | _prev[prev_b_end] = 1; |
951 | 0 | for (b = 2, bb = ctx->cfg_blocks + 2; b <= ctx->cfg_blocks_count; b++, bb++) { |
952 | 0 | _next[prev_b_end] = bb->start; |
953 | 0 | _prev[bb->start] = prev_b_end; |
954 | 0 | _next[bb->start] = bb->end; |
955 | 0 | _prev[bb->end] = bb->start; |
956 | 0 | prev_b_end = bb->end; |
957 | 0 | if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) { |
958 | 0 | bad_bb_order = 1; |
959 | 0 | } |
960 | 0 | } |
961 | 0 | _next[prev_b_end] = 0; |
962 | | |
963 | | /* insert intermediate BB nodes */ |
964 | 0 | for (i = 2, j = 1; i < ctx->insns_count; i++) { |
965 | 0 | b = _blocks[i]; |
966 | 0 | if (!b) continue; |
967 | 0 | bb = &ctx->cfg_blocks[b]; |
968 | 0 | if (i != bb->start && i != bb->end) { |
969 | | /* insert before "end" */ |
970 | 0 | ir_ref n = bb->end; |
971 | 0 | ir_ref p = _prev[n]; |
972 | 0 | _prev[i] = p; |
973 | 0 | _next[i] = n; |
974 | 0 | _next[p] = i; |
975 | 0 | _prev[n] = i; |
976 | 0 | } |
977 | 0 | } |
978 | |
|
979 | 0 | if (bad_bb_order) { |
980 | 0 | ir_fix_bb_order(ctx, _prev, _next); |
981 | 0 | } |
982 | |
|
983 | 0 | #ifdef IR_DEBUG |
984 | 0 | if (ctx->flags & IR_DEBUG_SCHEDULE) { |
985 | 0 | fprintf(stderr, "Before Schedule\n"); |
986 | 0 | for (i = 1; i != 0; i = _next[i]) { |
987 | 0 | fprintf(stderr, "%d -> %d\n", i, _blocks[i]); |
988 | 0 | } |
989 | 0 | } |
990 | 0 | #endif |
991 | |
|
992 | 0 | _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); |
993 | 0 | _xlat += ctx->consts_count; |
994 | 0 | _xlat[IR_TRUE] = IR_TRUE; |
995 | 0 | _xlat[IR_FALSE] = IR_FALSE; |
996 | 0 | _xlat[IR_NULL] = IR_NULL; |
997 | 0 | _xlat[IR_UNUSED] = IR_UNUSED; |
998 | 0 | insns_count = 1; |
999 | 0 | consts_count = -(IR_TRUE - 1); |
1000 | | |
1001 | | /* Topological sort according dependencies inside each basic block */ |
1002 | 0 | for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { |
1003 | 0 | ir_ref start; |
1004 | |
|
1005 | 0 | IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); |
1006 | | /* Schedule BB start */ |
1007 | 0 | start = i = bb->start; |
1008 | 0 | _xlat[i] = bb->start = insns_count; |
1009 | 0 | insn = &ctx->ir_base[i]; |
1010 | 0 | if (insn->op == IR_CASE_VAL) { |
1011 | 0 | IR_ASSERT(insn->op2 < IR_TRUE); |
1012 | 0 | consts_count += ir_count_constant(_xlat, insn->op2); |
1013 | 0 | } else if (insn->op == IR_CASE_RANGE) { |
1014 | 0 | IR_ASSERT(insn->op2 < IR_TRUE); |
1015 | 0 | consts_count += ir_count_constant(_xlat, insn->op2); |
1016 | 0 | IR_ASSERT(insn->op3 < IR_TRUE); |
1017 | 0 | consts_count += ir_count_constant(_xlat, insn->op3); |
1018 | 0 | } |
1019 | 0 | n = insn->inputs_count; |
1020 | 0 | insns_count += ir_insn_inputs_to_len(n); |
1021 | 0 | i = _next[i]; |
1022 | 0 | insn = &ctx->ir_base[i]; |
1023 | 0 | if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) { |
1024 | 0 | int count = 0; |
1025 | | |
1026 | | /* Schedule PARAM, VAR, PI */ |
1027 | 0 | while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) { |
1028 | 0 | _xlat[i] = insns_count; |
1029 | 0 | insns_count += 1; |
1030 | 0 | i = _next[i]; |
1031 | 0 | insn = &ctx->ir_base[i]; |
1032 | 0 | count++; |
1033 | 0 | } |
1034 | | /* Schedule PHIs */ |
1035 | 0 | while (insn->op == IR_PHI) { |
1036 | 0 | ir_ref j, *p, input; |
1037 | |
|
1038 | 0 | _xlat[i] = insns_count; |
1039 | | /* Reuse "n" from MERGE and skip first input */ |
1040 | 0 | insns_count += ir_insn_inputs_to_len(n + 1); |
1041 | 0 | for (j = n, p = insn->ops + 2; j > 0; p++, j--) { |
1042 | 0 | input = *p; |
1043 | 0 | if (input < IR_TRUE) { |
1044 | 0 | consts_count += ir_count_constant(_xlat, input); |
1045 | 0 | } |
1046 | 0 | } |
1047 | 0 | i = _next[i]; |
1048 | 0 | insn = &ctx->ir_base[i]; |
1049 | 0 | count++; |
1050 | 0 | } |
1051 | | /* Schedule remaining PHIs */ |
1052 | 0 | if (UNEXPECTED(count < ctx->use_lists[start].count - 1)) { |
1053 | 0 | ir_use_list *use_list = &ctx->use_lists[start]; |
1054 | 0 | ir_ref *p, count = use_list->count; |
1055 | 0 | ir_ref phis = _prev[i]; |
1056 | |
|
1057 | 0 | for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) { |
1058 | 0 | ir_ref use = *p; |
1059 | 0 | ir_insn *use_insn = &ctx->ir_base[use]; |
1060 | 0 | if (!_xlat[use] && (_blocks[use] || use_insn->op == IR_PARAM)) { |
1061 | 0 | IR_ASSERT(_blocks[use] == b || use_insn->op == IR_PARAM); |
1062 | 0 | if (use_insn->op == IR_PARAM |
1063 | 0 | || use_insn->op == IR_VAR |
1064 | 0 | || use_insn->op == IR_PI |
1065 | 0 | || use_insn->op == IR_PHI) { |
1066 | 0 | if (_prev[use] != phis) { |
1067 | | /* remove "use" */ |
1068 | 0 | _prev[_next[use]] = _prev[use]; |
1069 | 0 | _next[_prev[use]] = _next[use]; |
1070 | | /* insert "use" after "phis" */ |
1071 | 0 | _prev[use] = phis; |
1072 | 0 | _next[use] = _next[phis]; |
1073 | 0 | _prev[_next[phis]] = use; |
1074 | 0 | _next[phis] = use; |
1075 | 0 | } |
1076 | 0 | phis = use; |
1077 | 0 | _xlat[use] = insns_count; |
1078 | 0 | if (use_insn->op == IR_PHI) { |
1079 | 0 | ir_ref *q; |
1080 | | /* Reuse "n" from MERGE and skip first input */ |
1081 | 0 | insns_count += ir_insn_inputs_to_len(n + 1); |
1082 | 0 | for (j = n, q = use_insn->ops + 2; j > 0; q++, j--) { |
1083 | 0 | ir_ref input = *q; |
1084 | 0 | if (input < IR_TRUE) { |
1085 | 0 | consts_count += ir_count_constant(_xlat, input); |
1086 | 0 | } |
1087 | 0 | } |
1088 | 0 | } else { |
1089 | 0 | insns_count += 1; |
1090 | 0 | } |
1091 | 0 | } |
1092 | 0 | } |
1093 | 0 | } |
1094 | 0 | i = _next[phis]; |
1095 | 0 | insn = &ctx->ir_base[i]; |
1096 | 0 | } |
1097 | 0 | } |
1098 | 0 | if (bb->successors_count > 1) { |
1099 | 0 | ir_ref input, j = bb->end; |
1100 | 0 | ir_insn *end = &ctx->ir_base[j]; |
1101 | |
|
1102 | 0 | if (end->op == IR_IF) { |
1103 | | /* Move condition closer to IF */ |
1104 | 0 | input = end->op2; |
1105 | 0 | if (input > 0 |
1106 | 0 | && _blocks[input] == b |
1107 | 0 | && !_xlat[input] |
1108 | 0 | && _prev[j] != input |
1109 | 0 | && (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) { |
1110 | 0 | if (input == i) { |
1111 | 0 | i = _next[i]; |
1112 | 0 | insn = &ctx->ir_base[i]; |
1113 | 0 | } |
1114 | | /* remove "input" */ |
1115 | 0 | _prev[_next[input]] = _prev[input]; |
1116 | 0 | _next[_prev[input]] = _next[input]; |
1117 | | /* insert before "j" */ |
1118 | 0 | _prev[input] = _prev[j]; |
1119 | 0 | _next[input] = j; |
1120 | 0 | _next[_prev[j]] = input; |
1121 | 0 | _prev[j] = input; |
1122 | 0 | } |
1123 | 0 | } |
1124 | 0 | } |
1125 | 0 | while (i != bb->end) { |
1126 | 0 | ir_ref n, j, *p, input; |
1127 | |
|
1128 | 0 | restart: |
1129 | 0 | IR_ASSERT(_blocks[i] == b); |
1130 | 0 | n = insn->inputs_count; |
1131 | 0 | for (j = n, p = insn->ops + 1; j > 0; p++, j--) { |
1132 | 0 | input = *p; |
1133 | 0 | if (!_xlat[input]) { |
1134 | | /* input is not scheduled yet */ |
1135 | 0 | if (input > 0) { |
1136 | 0 | if (_blocks[input] == b) { |
1137 | | /* "input" should be before "i" to satisfy dependency */ |
1138 | 0 | #ifdef IR_DEBUG |
1139 | 0 | if (ctx->flags & IR_DEBUG_SCHEDULE) { |
1140 | 0 | fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i); |
1141 | 0 | } |
1142 | 0 | #endif |
1143 | | /* remove "input" */ |
1144 | 0 | _prev[_next[input]] = _prev[input]; |
1145 | 0 | _next[_prev[input]] = _next[input]; |
1146 | | /* insert before "i" */ |
1147 | 0 | _prev[input] = _prev[i]; |
1148 | 0 | _next[input] = i; |
1149 | 0 | _next[_prev[i]] = input; |
1150 | 0 | _prev[i] = input; |
1151 | | /* restart from "input" */ |
1152 | 0 | i = input; |
1153 | 0 | insn = &ctx->ir_base[i]; |
1154 | 0 | goto restart; |
1155 | 0 | } |
1156 | 0 | } else if (input < IR_TRUE) { |
1157 | 0 | consts_count += ir_count_constant(_xlat, input); |
1158 | 0 | } |
1159 | 0 | } |
1160 | 0 | } |
1161 | 0 | _xlat[i] = insns_count; |
1162 | 0 | insns_count += ir_insn_inputs_to_len(n); |
1163 | 0 | IR_ASSERT(_next[i] != IR_UNUSED); |
1164 | 0 | i = _next[i]; |
1165 | 0 | insn = &ctx->ir_base[i]; |
1166 | 0 | } |
1167 | | /* Schedule BB end */ |
1168 | 0 | _xlat[i] = bb->end = insns_count; |
1169 | 0 | insns_count++; |
1170 | 0 | if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) { |
1171 | 0 | if (insn->op2 < IR_TRUE) { |
1172 | 0 | consts_count += ir_count_constant(_xlat, insn->op2); |
1173 | 0 | } |
1174 | 0 | } |
1175 | 0 | } |
1176 | | |
1177 | 0 | #ifdef IR_DEBUG |
1178 | 0 | if (ctx->flags & IR_DEBUG_SCHEDULE) { |
1179 | 0 | fprintf(stderr, "After Schedule\n"); |
1180 | 0 | for (i = 1; i != 0; i = _next[i]) { |
1181 | 0 | fprintf(stderr, "%d -> %d (%d)\n", i, _blocks[i], _xlat[i]); |
1182 | 0 | } |
1183 | 0 | } |
1184 | 0 | #endif |
1185 | |
|
1186 | 0 | #if 1 |
1187 | | /* Check if scheduling didn't make any modifications */ |
1188 | 0 | if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) { |
1189 | 0 | bool changed = 0; |
1190 | |
|
1191 | 0 | for (i = 1; i != 0; i = _next[i]) { |
1192 | 0 | if (_xlat[i] != i) { |
1193 | 0 | changed = 1; |
1194 | 0 | break; |
1195 | 0 | } |
1196 | 0 | } |
1197 | 0 | if (!changed) { |
1198 | 0 | _xlat -= ctx->consts_count; |
1199 | 0 | ir_mem_free(_xlat); |
1200 | 0 | ir_mem_free(_next); |
1201 | |
|
1202 | 0 | ctx->prev_ref = _prev; |
1203 | 0 | ctx->flags2 |= IR_LINEAR; |
1204 | 0 | ir_truncate(ctx); |
1205 | |
|
1206 | 0 | return 1; |
1207 | 0 | } |
1208 | 0 | } |
1209 | 0 | #endif |
1210 | | |
1211 | 0 | ir_mem_free(_prev); |
1212 | |
|
1213 | 0 | ir_init(&new_ctx, ctx->flags, consts_count, insns_count); |
1214 | 0 | new_ctx.insns_count = insns_count; |
1215 | 0 | new_ctx.flags2 = ctx->flags2; |
1216 | 0 | new_ctx.ret_type = ctx->ret_type; |
1217 | 0 | new_ctx.value_params = ctx->value_params; |
1218 | 0 | new_ctx.mflags = ctx->mflags; |
1219 | 0 | new_ctx.spill_base = ctx->spill_base; |
1220 | 0 | new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone; |
1221 | 0 | new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size; |
1222 | 0 | new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size; |
1223 | 0 | new_ctx.fixed_regset = ctx->fixed_regset; |
1224 | 0 | new_ctx.fixed_save_regset = ctx->fixed_save_regset; |
1225 | 0 | new_ctx.entries_count = ctx->entries_count; |
1226 | | #if defined(IR_TARGET_AARCH64) |
1227 | | new_ctx.deoptimization_exits = ctx->deoptimization_exits; |
1228 | | new_ctx.get_exit_addr = ctx->get_exit_addr; |
1229 | | new_ctx.get_veneer = ctx->get_veneer; |
1230 | | new_ctx.set_veneer = ctx->set_veneer; |
1231 | | #endif |
1232 | 0 | new_ctx.loader = ctx->loader; |
1233 | | |
1234 | | /* Copy constants */ |
1235 | 0 | if (consts_count == ctx->consts_count) { |
1236 | 0 | new_ctx.consts_count = consts_count; |
1237 | 0 | ref = 1 - consts_count; |
1238 | 0 | insn = &ctx->ir_base[ref]; |
1239 | 0 | new_insn = &new_ctx.ir_base[ref]; |
1240 | |
|
1241 | 0 | memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref)); |
1242 | 0 | if (ctx->strtab.data) { |
1243 | 0 | while (ref != IR_TRUE) { |
1244 | 0 | if (new_insn->op == IR_FUNC_ADDR) { |
1245 | 0 | if (new_insn->proto) { |
1246 | 0 | size_t len; |
1247 | 0 | const char *proto = ir_get_strl(ctx, new_insn->proto, &len); |
1248 | 0 | new_insn->proto = ir_strl(&new_ctx, proto, len); |
1249 | 0 | } |
1250 | 0 | } else if (new_insn->op == IR_FUNC) { |
1251 | 0 | size_t len; |
1252 | 0 | const char *name = ir_get_strl(ctx, new_insn->val.name, &len); |
1253 | 0 | new_insn->val.u64 = ir_strl(&new_ctx, name, len); |
1254 | 0 | if (new_insn->proto) { |
1255 | 0 | const char *proto = ir_get_strl(ctx, new_insn->proto, &len); |
1256 | 0 | new_insn->proto = ir_strl(&new_ctx, proto, len); |
1257 | 0 | } |
1258 | 0 | } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) { |
1259 | 0 | size_t len; |
1260 | 0 | const char *str = ir_get_strl(ctx, new_insn->val.name, &len); |
1261 | 0 | new_insn->val.u64 = ir_strl(&new_ctx, str, len); |
1262 | 0 | } |
1263 | 0 | new_insn++; |
1264 | 0 | ref++; |
1265 | 0 | } |
1266 | 0 | } |
1267 | 0 | } else { |
1268 | 0 | new_ref = -new_ctx.consts_count; |
1269 | 0 | new_insn = &new_ctx.ir_base[new_ref]; |
1270 | 0 | for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) { |
1271 | 0 | if (!_xlat[ref]) { |
1272 | 0 | continue; |
1273 | 0 | } |
1274 | 0 | new_insn->optx = insn->optx; |
1275 | 0 | new_insn->prev_const = 0; |
1276 | 0 | if (insn->op == IR_FUNC_ADDR) { |
1277 | 0 | new_insn->val.u64 = insn->val.u64; |
1278 | 0 | if (insn->proto) { |
1279 | 0 | size_t len; |
1280 | 0 | const char *proto = ir_get_strl(ctx, insn->proto, &len); |
1281 | 0 | new_insn->proto = ir_strl(&new_ctx, proto, len); |
1282 | 0 | } else { |
1283 | 0 | new_insn->proto = 0; |
1284 | 0 | } |
1285 | 0 | } else if (insn->op == IR_FUNC) { |
1286 | 0 | size_t len; |
1287 | 0 | const char *name = ir_get_strl(ctx, insn->val.name, &len); |
1288 | 0 | new_insn->val.u64 = ir_strl(&new_ctx, name, len); |
1289 | 0 | if (insn->proto) { |
1290 | 0 | const char *proto = ir_get_strl(ctx, insn->proto, &len); |
1291 | 0 | new_insn->proto = ir_strl(&new_ctx, proto, len); |
1292 | 0 | } else { |
1293 | 0 | new_insn->proto = 0; |
1294 | 0 | } |
1295 | 0 | } else if (insn->op == IR_SYM || insn->op == IR_STR) { |
1296 | 0 | size_t len; |
1297 | 0 | const char *str = ir_get_strl(ctx, insn->val.name, &len); |
1298 | 0 | new_insn->val.u64 = ir_strl(&new_ctx, str, len); |
1299 | 0 | } else { |
1300 | 0 | new_insn->val.u64 = insn->val.u64; |
1301 | 0 | } |
1302 | 0 | _xlat[ref] = new_ref; |
1303 | 0 | new_ref--; |
1304 | 0 | new_insn--; |
1305 | 0 | } |
1306 | 0 | new_ctx.consts_count = -new_ref; |
1307 | 0 | } |
1308 | |
|
1309 | 0 | new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); |
1310 | 0 | new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref)); |
1311 | 0 | new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list)); |
1312 | 0 | new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref)); |
1313 | | |
1314 | | /* Copy instructions, use lists and use edges */ |
1315 | 0 | prev_ref = 0; |
1316 | 0 | use_edges_count = 0; |
1317 | 0 | for (i = 1; i != 0; i = _next[i]) { |
1318 | 0 | new_ref = _xlat[i]; |
1319 | 0 | new_ctx.cfg_map[new_ref] = _blocks[i]; |
1320 | 0 | _prev[new_ref] = prev_ref; |
1321 | 0 | prev_ref = new_ref; |
1322 | |
|
1323 | 0 | use_list = &ctx->use_lists[i]; |
1324 | 0 | n = use_list->count; |
1325 | 0 | k = 0; |
1326 | 0 | if (n == 1) { |
1327 | 0 | ref = ctx->use_edges[use_list->refs]; |
1328 | 0 | if (_xlat[ref]) { |
1329 | 0 | *edges = _xlat[ref]; |
1330 | 0 | edges++; |
1331 | 0 | k = 1; |
1332 | 0 | } |
1333 | 0 | } else { |
1334 | 0 | p = &ctx->use_edges[use_list->refs]; |
1335 | 0 | while (n--) { |
1336 | 0 | ref = *p; |
1337 | 0 | if (_xlat[ref]) { |
1338 | 0 | *edges = _xlat[ref]; |
1339 | 0 | edges++; |
1340 | 0 | k++; |
1341 | 0 | } |
1342 | 0 | p++; |
1343 | 0 | } |
1344 | 0 | } |
1345 | 0 | new_list = &lists[new_ref]; |
1346 | 0 | new_list->refs = use_edges_count; |
1347 | 0 | use_edges_count += k; |
1348 | 0 | new_list->count = k; |
1349 | |
|
1350 | 0 | insn = &ctx->ir_base[i]; |
1351 | 0 | new_insn = &new_ctx.ir_base[new_ref]; |
1352 | |
|
1353 | 0 | new_insn->optx = insn->optx; |
1354 | 0 | n = new_insn->inputs_count; |
1355 | 0 | switch (n) { |
1356 | 0 | case 0: |
1357 | 0 | new_insn->op1 = insn->op1; |
1358 | 0 | new_insn->op2 = insn->op2; |
1359 | 0 | new_insn->op3 = insn->op3; |
1360 | 0 | break; |
1361 | 0 | case 1: |
1362 | 0 | new_insn->op1 = _xlat[insn->op1]; |
1363 | 0 | if (new_insn->op == IR_PARAM || new_insn->op == IR_VAR || new_insn->op == IR_PROTO) { |
1364 | 0 | size_t len; |
1365 | 0 | const char *str = ir_get_strl(ctx, insn->op2, &len); |
1366 | 0 | new_insn->op2 = ir_strl(&new_ctx, str, len); |
1367 | 0 | } else { |
1368 | 0 | new_insn->op2 = insn->op2; |
1369 | 0 | } |
1370 | 0 | new_insn->op3 = insn->op3; |
1371 | 0 | break; |
1372 | 0 | case 2: |
1373 | 0 | new_insn->op1 = _xlat[insn->op1]; |
1374 | 0 | new_insn->op2 = _xlat[insn->op2]; |
1375 | 0 | new_insn->op3 = insn->op3; |
1376 | 0 | #if IR_SCHEDULE_SWAP_OPS |
1377 | | /* Swap operands according to folding rules */ |
1378 | 0 | if (new_insn->op1 < new_insn->op2) { |
1379 | 0 | switch (new_insn->op) { |
1380 | 0 | case IR_EQ: |
1381 | 0 | case IR_NE: |
1382 | 0 | case IR_ADD: |
1383 | 0 | case IR_MUL: |
1384 | 0 | case IR_ADD_OV: |
1385 | 0 | case IR_MUL_OV: |
1386 | 0 | case IR_OR: |
1387 | 0 | case IR_AND: |
1388 | 0 | case IR_XOR: |
1389 | 0 | case IR_MIN: |
1390 | 0 | case IR_MAX: |
1391 | 0 | SWAP_REFS(new_insn->op1, new_insn->op2); |
1392 | 0 | break; |
1393 | 0 | case IR_LT: |
1394 | 0 | case IR_GE: |
1395 | 0 | case IR_LE: |
1396 | 0 | case IR_GT: |
1397 | 0 | case IR_ULT: |
1398 | 0 | case IR_UGE: |
1399 | 0 | case IR_ULE: |
1400 | 0 | case IR_UGT: |
1401 | 0 | SWAP_REFS(new_insn->op1, new_insn->op2); |
1402 | 0 | new_insn->op ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */ |
1403 | 0 | break; |
1404 | 0 | } |
1405 | 0 | } |
1406 | 0 | #endif |
1407 | 0 | break; |
1408 | 0 | case 3: |
1409 | 0 | new_insn->op1 = _xlat[insn->op1]; |
1410 | 0 | new_insn->op2 = _xlat[insn->op2]; |
1411 | 0 | new_insn->op3 = _xlat[insn->op3]; |
1412 | 0 | break; |
1413 | 0 | default: |
1414 | 0 | for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) { |
1415 | 0 | *q = _xlat[*p]; |
1416 | 0 | } |
1417 | 0 | break; |
1418 | 0 | } |
1419 | 0 | } |
1420 | | |
1421 | | /* Update list of terminators (IR_OPND_CONTROL_REF) */ |
1422 | 0 | insn = &new_ctx.ir_base[1]; |
1423 | 0 | ref = insn->op1; |
1424 | 0 | if (ref) { |
1425 | 0 | insn->op1 = ref = _xlat[ref]; |
1426 | 0 | while (1) { |
1427 | 0 | insn = &new_ctx.ir_base[ref]; |
1428 | 0 | ref = insn->op3; |
1429 | 0 | if (!ref) { |
1430 | 0 | break; |
1431 | 0 | } |
1432 | 0 | insn->op3 = ref = _xlat[ref]; |
1433 | 0 | } |
1434 | 0 | } |
1435 | |
|
1436 | 0 | IR_ASSERT(ctx->use_edges_count >= use_edges_count); |
1437 | 0 | new_ctx.use_edges_count = use_edges_count; |
1438 | 0 | new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref)); |
1439 | |
|
1440 | 0 | if (ctx->binding) { |
1441 | 0 | ir_xlat_binding(ctx, _xlat); |
1442 | 0 | new_ctx.binding = ctx->binding; |
1443 | 0 | ctx->binding = NULL; |
1444 | 0 | } |
1445 | |
|
1446 | 0 | _xlat -= ctx->consts_count; |
1447 | 0 | ir_mem_free(_xlat); |
1448 | |
|
1449 | 0 | new_ctx.cfg_blocks_count = ctx->cfg_blocks_count; |
1450 | 0 | new_ctx.cfg_edges_count = ctx->cfg_edges_count; |
1451 | 0 | new_ctx.cfg_blocks = ctx->cfg_blocks; |
1452 | 0 | new_ctx.cfg_edges = ctx->cfg_edges; |
1453 | 0 | ctx->cfg_blocks = NULL; |
1454 | 0 | ctx->cfg_edges = NULL; |
1455 | 0 | ctx->value_params = NULL; |
1456 | 0 | ir_code_buffer *saved_code_buffer = ctx->code_buffer; |
1457 | |
|
1458 | 0 | ir_free(ctx); |
1459 | 0 | IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit); |
1460 | 0 | IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit); |
1461 | 0 | memcpy(ctx, &new_ctx, sizeof(ir_ctx)); |
1462 | 0 | ctx->code_buffer = saved_code_buffer; |
1463 | 0 | ctx->flags2 |= IR_LINEAR; |
1464 | |
|
1465 | 0 | ir_mem_free(_next); |
1466 | |
|
1467 | 0 | return 1; |
1468 | 0 | } |
1469 | | |
1470 | | void ir_build_prev_refs(ir_ctx *ctx) |
1471 | 0 | { |
1472 | 0 | uint32_t b; |
1473 | 0 | ir_block *bb; |
1474 | 0 | ir_ref i, n, prev; |
1475 | 0 | ir_insn *insn; |
1476 | |
|
1477 | 0 | ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); |
1478 | 0 | prev = 0; |
1479 | 0 | for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { |
1480 | 0 | IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); |
1481 | 0 | for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) { |
1482 | 0 | ctx->prev_ref[i] = prev; |
1483 | 0 | n = ir_insn_len(insn); |
1484 | 0 | prev = i; |
1485 | 0 | i += n; |
1486 | 0 | insn += n; |
1487 | 0 | } |
1488 | 0 | ctx->prev_ref[i] = prev; |
1489 | 0 | } |
1490 | 0 | } |