/src/php-src/ext/opcache/jit/ir/ir_emit.c
Line | Count | Source |
1 | | /* |
2 | | * IR - Lightweight JIT Compilation Framework |
3 | | * (Native code generator based on DynAsm) |
4 | | * Copyright (C) 2022 Zend by Perforce. |
5 | | * Authors: Dmitry Stogov <dmitry@php.net> |
6 | | */ |
7 | | |
8 | | #include "ir.h" |
9 | | |
10 | | #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) |
11 | | # include "ir_x86.h" |
12 | | #elif defined(IR_TARGET_AARCH64) |
13 | | # include "ir_aarch64.h" |
14 | | #else |
15 | | # error "Unknown IR target" |
16 | | #endif |
17 | | |
18 | | #include "ir_private.h" |
19 | | #ifndef _WIN32 |
20 | | # include <dlfcn.h> |
21 | | #else |
22 | | # define WIN32_LEAN_AND_MEAN |
23 | | # include <windows.h> |
24 | | # include <psapi.h> |
25 | | #endif |
26 | | |
27 | | #if defined(__linux__) || defined(__sun) |
28 | | # include <alloca.h> |
29 | | #endif |
30 | | |
31 | | #define DASM_M_GROW(ctx, t, p, sz, need) \ |
32 | 0 | do { \ |
33 | 0 | size_t _sz = (sz), _need = (need); \ |
34 | 0 | if (_sz < _need) { \ |
35 | 0 | size_t _limit = sizeof(t) * DASM_SEC2POS(1); \ |
36 | 0 | if (_need > _limit) { \ |
37 | 0 | Dst_REF->status = DASM_S_NOMEM; \ |
38 | 0 | return; \ |
39 | 0 | } \ |
40 | 0 | if (_sz < 16) _sz = 16; \ |
41 | 0 | while (_sz < _need) _sz += _sz; \ |
42 | 0 | if (_sz > _limit) _sz = _limit; \ |
43 | 0 | (p) = (t *)ir_mem_realloc((p), _sz); \ |
44 | 0 | (sz) = _sz; \ |
45 | 0 | } \ |
46 | 0 | } while(0) |
47 | | |
48 | 0 | #define DASM_M_FREE(ctx, p, sz) ir_mem_free(p) |
49 | | |
50 | | #ifdef IR_DEBUG |
51 | | # define DASM_CHECKS |
52 | | #endif |
53 | | |
54 | | typedef struct _ir_copy { |
55 | | ir_type type; |
56 | | ir_reg from; |
57 | | ir_reg to; |
58 | | } ir_copy; |
59 | | |
60 | | typedef struct _ir_dessa_copy { |
61 | | ir_type type; |
62 | | int32_t from; /* negative - constant ref, [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */ |
63 | | int32_t to; /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */ |
64 | | } ir_dessa_copy; |
65 | | |
66 | | #if IR_REG_INT_ARGS |
67 | | static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS]; |
68 | | #else |
69 | | static const int8_t *_ir_int_reg_params; |
70 | | #endif |
71 | | #if IR_REG_FP_ARGS |
72 | | static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS]; |
73 | | #else |
74 | | static const int8_t *_ir_fp_reg_params; |
75 | | #endif |
76 | | |
77 | | static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn) |
78 | 0 | { |
79 | 0 | if (IR_IS_CONST_REF(insn->op2)) { |
80 | 0 | const ir_insn *func = &ctx->ir_base[insn->op2]; |
81 | |
|
82 | 0 | if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) { |
83 | 0 | if (func->proto) { |
84 | 0 | return (const ir_proto_t *)ir_get_str(ctx, func->proto); |
85 | 0 | } |
86 | 0 | } |
87 | 0 | } else if (ctx->ir_base[insn->op2].op == IR_PROTO) { |
88 | 0 | return (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2); |
89 | 0 | } |
90 | 0 | return NULL; |
91 | 0 | } |
92 | | |
93 | | #ifdef IR_HAVE_FASTCALL |
94 | | static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS]; |
95 | | static const int8_t *_ir_fp_fc_reg_params; |
96 | | |
97 | | bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn) |
98 | | { |
99 | | if (sizeof(void*) == 4) { |
100 | | if (IR_IS_CONST_REF(insn->op2)) { |
101 | | const ir_insn *func = &ctx->ir_base[insn->op2]; |
102 | | |
103 | | if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) { |
104 | | if (func->proto) { |
105 | | const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto); |
106 | | |
107 | | return (proto->flags & IR_FASTCALL_FUNC) != 0; |
108 | | } |
109 | | } |
110 | | } else if (ctx->ir_base[insn->op2].op == IR_PROTO) { |
111 | | const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2); |
112 | | |
113 | | return (proto->flags & IR_FASTCALL_FUNC) != 0; |
114 | | } |
115 | | return 0; |
116 | | } |
117 | | return 0; |
118 | | } |
119 | | #else |
120 | | bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn) |
121 | 0 | { |
122 | 0 | return 0; |
123 | 0 | } |
124 | | #endif |
125 | | |
126 | | bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn) |
127 | 0 | { |
128 | 0 | const ir_proto_t *proto = ir_call_proto(ctx, insn); |
129 | |
|
130 | 0 | if (proto) { |
131 | 0 | return (proto->flags & IR_VARARG_FUNC) != 0; |
132 | 0 | } |
133 | 0 | return 0; |
134 | 0 | } |
135 | | |
136 | | IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref) |
137 | 0 | { |
138 | 0 | IR_ASSERT(!IR_IS_CONST_REF(ref)); |
139 | 0 | return ctx->rules[ref]; |
140 | 0 | } |
141 | | |
142 | | IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref) |
143 | 0 | { |
144 | 0 | return ref > ctx->bb_start; |
145 | 0 | } |
146 | | |
147 | | |
148 | | static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref) |
149 | 0 | { |
150 | 0 | ir_use_list *use_list = &ctx->use_lists[1]; |
151 | 0 | int i; |
152 | 0 | ir_ref use, *p; |
153 | 0 | ir_insn *insn; |
154 | 0 | int int_param = 0; |
155 | 0 | int fp_param = 0; |
156 | 0 | int int_reg_params_count = IR_REG_INT_ARGS; |
157 | 0 | int fp_reg_params_count = IR_REG_FP_ARGS; |
158 | 0 | const int8_t *int_reg_params = _ir_int_reg_params; |
159 | 0 | const int8_t *fp_reg_params = _ir_fp_reg_params; |
160 | |
|
161 | | #ifdef IR_HAVE_FASTCALL |
162 | | if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { |
163 | | int_reg_params_count = IR_REG_INT_FCARGS; |
164 | | fp_reg_params_count = IR_REG_FP_FCARGS; |
165 | | int_reg_params = _ir_int_fc_reg_params; |
166 | | fp_reg_params = _ir_fp_fc_reg_params; |
167 | | } |
168 | | #endif |
169 | |
|
170 | 0 | for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) { |
171 | 0 | use = *p; |
172 | 0 | insn = &ctx->ir_base[use]; |
173 | 0 | if (insn->op == IR_PARAM) { |
174 | 0 | if (IR_IS_TYPE_INT(insn->type)) { |
175 | 0 | if (use == ref) { |
176 | 0 | #if defined(IR_TARGET_X64) || defined(IR_TARGET_X86) |
177 | 0 | if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { |
178 | | /* struct passed by value on stack */ |
179 | 0 | return IR_REG_NONE; |
180 | 0 | } else |
181 | 0 | #endif |
182 | 0 | if (int_param < int_reg_params_count) { |
183 | 0 | return int_reg_params[int_param]; |
184 | 0 | } else { |
185 | 0 | return IR_REG_NONE; |
186 | 0 | } |
187 | 0 | #if defined(IR_TARGET_X64) || defined(IR_TARGET_X86) |
188 | 0 | } else { |
189 | 0 | if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { |
190 | | /* struct passed by value on stack */ |
191 | 0 | continue; |
192 | 0 | } |
193 | 0 | #endif |
194 | 0 | } |
195 | 0 | int_param++; |
196 | | #ifdef _WIN64 |
197 | | /* WIN64 calling convention use common couter for int and fp registers */ |
198 | | fp_param++; |
199 | | #endif |
200 | 0 | } else { |
201 | 0 | IR_ASSERT(IR_IS_TYPE_FP(insn->type)); |
202 | 0 | if (use == ref) { |
203 | 0 | if (fp_param < fp_reg_params_count) { |
204 | 0 | return fp_reg_params[fp_param]; |
205 | 0 | } else { |
206 | 0 | return IR_REG_NONE; |
207 | 0 | } |
208 | 0 | } |
209 | 0 | fp_param++; |
210 | | #ifdef _WIN64 |
211 | | /* WIN64 calling convention use common couter for int and fp registers */ |
212 | | int_param++; |
213 | | #endif |
214 | 0 | } |
215 | 0 | } |
216 | 0 | } |
217 | 0 | return IR_REG_NONE; |
218 | 0 | } |
219 | | |
220 | | static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs) |
221 | 0 | { |
222 | 0 | int j, n; |
223 | 0 | ir_type type; |
224 | 0 | int int_param = 0; |
225 | 0 | int fp_param = 0; |
226 | 0 | int count = 0; |
227 | 0 | int int_reg_params_count = IR_REG_INT_ARGS; |
228 | 0 | int fp_reg_params_count = IR_REG_FP_ARGS; |
229 | 0 | const int8_t *int_reg_params = _ir_int_reg_params; |
230 | 0 | const int8_t *fp_reg_params = _ir_fp_reg_params; |
231 | |
|
232 | | #ifdef IR_HAVE_FASTCALL |
233 | | if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { |
234 | | int_reg_params_count = IR_REG_INT_FCARGS; |
235 | | fp_reg_params_count = IR_REG_FP_FCARGS; |
236 | | int_reg_params = _ir_int_fc_reg_params; |
237 | | fp_reg_params = _ir_fp_fc_reg_params; |
238 | | } |
239 | | #endif |
240 | |
|
241 | 0 | n = insn->inputs_count; |
242 | 0 | n = IR_MIN(n, IR_MAX_REG_ARGS + 2); |
243 | 0 | for (j = 3; j <= n; j++) { |
244 | 0 | ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)]; |
245 | 0 | type = arg->type; |
246 | 0 | if (IR_IS_TYPE_INT(type)) { |
247 | 0 | if (int_param < int_reg_params_count && arg->op != IR_ARGVAL) { |
248 | 0 | regs[j] = int_reg_params[int_param]; |
249 | 0 | count = j + 1; |
250 | 0 | int_param++; |
251 | | #ifdef _WIN64 |
252 | | /* WIN64 calling convention use common couter for int and fp registers */ |
253 | | fp_param++; |
254 | | #endif |
255 | 0 | } else { |
256 | 0 | regs[j] = IR_REG_NONE; |
257 | 0 | } |
258 | 0 | } else { |
259 | 0 | IR_ASSERT(IR_IS_TYPE_FP(type)); |
260 | 0 | if (fp_param < fp_reg_params_count) { |
261 | 0 | regs[j] = fp_reg_params[fp_param]; |
262 | 0 | count = j + 1; |
263 | 0 | fp_param++; |
264 | | #ifdef _WIN64 |
265 | | /* WIN64 calling convention use common couter for int and fp registers */ |
266 | | int_param++; |
267 | | #endif |
268 | 0 | } else { |
269 | 0 | regs[j] = IR_REG_NONE; |
270 | 0 | } |
271 | 0 | } |
272 | 0 | } |
273 | 0 | return count; |
274 | 0 | } |
275 | | |
276 | | static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset) |
277 | 0 | { |
278 | 0 | ir_live_interval *ival1; |
279 | 0 | int32_t o1; |
280 | |
|
281 | 0 | if (IR_IS_CONST_REF(r1)) { |
282 | 0 | return 0; |
283 | 0 | } |
284 | | |
285 | 0 | IR_ASSERT(ctx->vregs[r1]); |
286 | 0 | ival1 = ctx->live_intervals[ctx->vregs[r1]]; |
287 | 0 | IR_ASSERT(ival1); |
288 | 0 | o1 = ival1->stack_spill_pos; |
289 | 0 | IR_ASSERT(o1 != -1); |
290 | 0 | return o1 == offset; |
291 | 0 | } |
292 | | |
293 | | void *ir_resolve_sym_name(const char *name) |
294 | 0 | { |
295 | 0 | void *addr; |
296 | |
|
297 | 0 | #ifndef _WIN32 |
298 | 0 | void *handle = NULL; |
299 | 0 | # ifdef RTLD_DEFAULT |
300 | 0 | handle = RTLD_DEFAULT; |
301 | 0 | # endif |
302 | 0 | addr = dlsym(handle, name); |
303 | | #else |
304 | | HMODULE mods[256]; |
305 | | DWORD cbNeeded; |
306 | | uint32_t i = 0; |
307 | | |
308 | | addr = NULL; |
309 | | |
310 | | EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded); |
311 | | |
312 | | while(i < (cbNeeded / sizeof(HMODULE))) { |
313 | | addr = GetProcAddress(mods[i], name); |
314 | | if (addr) { |
315 | | return addr; |
316 | | } |
317 | | i++; |
318 | | } |
319 | | #endif |
320 | 0 | return addr; |
321 | 0 | } |
322 | | |
323 | | #ifdef IR_SNAPSHOT_HANDLER_DCL |
324 | | IR_SNAPSHOT_HANDLER_DCL(); |
325 | | #endif |
326 | | |
327 | | #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) |
328 | | static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn) |
329 | 0 | { |
330 | 0 | const char *name = ir_get_str(ctx, addr_insn->val.name); |
331 | 0 | void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? |
332 | 0 | ctx->loader->resolve_sym_name(ctx->loader, name, IR_RESOLVE_SYM_SILENT) : |
333 | 0 | ir_resolve_sym_name(name); |
334 | |
|
335 | 0 | return addr; |
336 | 0 | } |
337 | | #endif |
338 | | |
339 | | static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn) |
340 | 0 | { |
341 | 0 | const char *name = ir_get_str(ctx, addr_insn->val.name); |
342 | 0 | void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ? |
343 | 0 | ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC ? IR_RESOLVE_SYM_ADD_THUNK : 0) : |
344 | 0 | ir_resolve_sym_name(name); |
345 | |
|
346 | 0 | IR_ASSERT(addr); |
347 | 0 | return addr; |
348 | 0 | } |
349 | | |
350 | | static void *ir_call_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn) |
351 | 0 | { |
352 | 0 | void *addr; |
353 | |
|
354 | 0 | IR_ASSERT(addr_insn->type == IR_ADDR); |
355 | 0 | if (addr_insn->op == IR_FUNC) { |
356 | 0 | addr = ir_sym_val(ctx, addr_insn); |
357 | 0 | } else { |
358 | 0 | IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR); |
359 | 0 | addr = (void*)addr_insn->val.addr; |
360 | 0 | } |
361 | 0 | return addr; |
362 | 0 | } |
363 | | |
364 | | static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn) |
365 | 0 | { |
366 | 0 | void *addr = ir_call_addr(ctx, insn, addr_insn); |
367 | |
|
368 | 0 | #ifdef IR_SNAPSHOT_HANDLER |
369 | 0 | if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) { |
370 | 0 | addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr); |
371 | 0 | } |
372 | 0 | #endif |
373 | 0 | return addr; |
374 | 0 | } |
375 | | |
376 | | static int8_t ir_get_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op) |
377 | 0 | { |
378 | 0 | if (ctx->fused_regs) { |
379 | 0 | char key[10]; |
380 | 0 | ir_ref val; |
381 | |
|
382 | 0 | memcpy(key, &root, sizeof(ir_ref)); |
383 | 0 | memcpy(key + 4, &ref_and_op, sizeof(ir_ref)); |
384 | |
|
385 | 0 | val = ir_strtab_find(ctx->fused_regs, key, 8); |
386 | 0 | if (val) { |
387 | 0 | return val; |
388 | 0 | } |
389 | 0 | } |
390 | 0 | return ((int8_t*)ctx->regs)[ref_and_op]; |
391 | 0 | } |
392 | | |
393 | | #if defined(__GNUC__) |
394 | | # pragma GCC diagnostic push |
395 | | # pragma GCC diagnostic ignored "-Warray-bounds" |
396 | | # pragma GCC diagnostic ignored "-Wimplicit-fallthrough" |
397 | | #endif |
398 | | |
399 | | #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) |
400 | | # include "dynasm/dasm_proto.h" |
401 | | # include "dynasm/dasm_x86.h" |
402 | | #elif defined(IR_TARGET_AARCH64) |
403 | | # include "dynasm/dasm_proto.h" |
404 | | static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset); |
405 | | # define DASM_ADD_VENEER ir_add_veneer |
406 | | # include "dynasm/dasm_arm64.h" |
407 | | #else |
408 | | # error "Unknown IR target" |
409 | | #endif |
410 | | |
411 | | #if defined(__GNUC__) |
412 | | # pragma GCC diagnostic pop |
413 | | #endif |
414 | | |
415 | | /* Forward Declarations */ |
416 | | static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb); |
417 | | static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg); |
418 | | static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb); |
419 | | |
420 | | typedef struct _ir_common_backend_data { |
421 | | ir_reg_alloc_data ra_data; |
422 | | uint32_t dessa_from_block; |
423 | | dasm_State *dasm_state; |
424 | | ir_bitset emit_constants; |
425 | | } ir_common_backend_data; |
426 | | |
427 | | static int ir_get_const_label(ir_ctx *ctx, ir_ref ref) |
428 | 0 | { |
429 | 0 | ir_common_backend_data *data = ctx->data; |
430 | 0 | int label = ctx->cfg_blocks_count - ref; |
431 | |
|
432 | 0 | IR_ASSERT(IR_IS_CONST_REF(ref)); |
433 | 0 | ir_bitset_incl(data->emit_constants, -ref); |
434 | 0 | return label; |
435 | 0 | } |
436 | | |
437 | | #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) |
438 | | # include <ir_emit_x86.h> |
439 | | #elif defined(IR_TARGET_AARCH64) |
440 | | # include <ir_emit_aarch64.h> |
441 | | #else |
442 | | # error "Unknown IR target" |
443 | | #endif |
444 | | |
445 | | static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb) |
446 | 0 | { |
447 | 0 | ir_list *list = (ir_list*)ctx->osr_entry_loads; |
448 | 0 | int pos = 0, count, i; |
449 | 0 | ir_ref ref; |
450 | |
|
451 | 0 | IR_ASSERT(ctx->binding); |
452 | 0 | IR_ASSERT(list); |
453 | 0 | while (1) { |
454 | 0 | i = ir_list_at(list, pos); |
455 | 0 | if (b == i) { |
456 | 0 | break; |
457 | 0 | } |
458 | 0 | IR_ASSERT(i != 0); /* end marker */ |
459 | 0 | pos++; |
460 | 0 | count = ir_list_at(list, pos); |
461 | 0 | pos += count + 1; |
462 | 0 | } |
463 | 0 | pos++; |
464 | 0 | count = ir_list_at(list, pos); |
465 | 0 | pos++; |
466 | |
|
467 | 0 | for (i = 0; i < count; i++, pos++) { |
468 | 0 | ref = ir_list_at(list, pos); |
469 | 0 | IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); |
470 | 0 | if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) { |
471 | | /* not spilled */ |
472 | 0 | ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg; |
473 | 0 | ir_type type = ctx->ir_base[ref].type; |
474 | 0 | int32_t offset = -ir_binding_find(ctx, ref); |
475 | |
|
476 | 0 | IR_ASSERT(offset > 0); |
477 | 0 | ir_emit_load_mem(ctx, type, reg, IR_MEM_BO(ctx->spill_base, offset)); |
478 | 0 | } else { |
479 | 0 | IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL); |
480 | 0 | } |
481 | 0 | } |
482 | 0 | } |
483 | | |
484 | | /* |
485 | | * Parallel copy sequentialization algorithm |
486 | | * |
487 | | * The implementation is based on algorithm 1 desriebed in |
488 | | * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency", |
489 | | * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon. |
490 | | * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009, |
491 | | * pp. 114-125, doi: 10.1109/CGO.2009.19. |
492 | | */ |
493 | | static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg) |
494 | 0 | { |
495 | 0 | int i; |
496 | 0 | int8_t *pred, *loc, *types; |
497 | 0 | ir_reg to, from; |
498 | 0 | ir_type type; |
499 | 0 | ir_regset todo, ready, srcs; |
500 | |
|
501 | 0 | if (count == 1) { |
502 | 0 | to = copies[0].to; |
503 | 0 | from = copies[0].from; |
504 | 0 | IR_ASSERT(from != to); |
505 | 0 | type = copies[0].type; |
506 | 0 | if (IR_IS_TYPE_INT(type)) { |
507 | 0 | ir_emit_mov(ctx, type, to, from); |
508 | 0 | } else { |
509 | 0 | ir_emit_fp_mov(ctx, type, to, from); |
510 | 0 | } |
511 | 0 | return 1; |
512 | 0 | } |
513 | | |
514 | 0 | loc = alloca(IR_REG_NUM * 3 * sizeof(int8_t)); |
515 | 0 | pred = loc + IR_REG_NUM; |
516 | 0 | types = pred + IR_REG_NUM; |
517 | 0 | todo = IR_REGSET_EMPTY; |
518 | 0 | srcs = IR_REGSET_EMPTY; |
519 | |
|
520 | 0 | for (i = 0; i < count; i++) { |
521 | 0 | from = copies[i].from; |
522 | 0 | to = copies[i].to; |
523 | 0 | IR_ASSERT(from != to); |
524 | 0 | IR_REGSET_INCL(srcs, from); |
525 | 0 | loc[from] = from; |
526 | 0 | pred[to] = from; |
527 | 0 | types[from] = copies[i].type; |
528 | 0 | IR_ASSERT(!IR_REGSET_IN(todo, to)); |
529 | 0 | IR_REGSET_INCL(todo, to); |
530 | 0 | } |
531 | |
|
532 | 0 | ready = IR_REGSET_DIFFERENCE(todo, srcs); |
533 | |
|
534 | 0 | if (ready == todo) { |
535 | 0 | for (i = 0; i < count; i++) { |
536 | 0 | from = copies[i].from; |
537 | 0 | to = copies[i].to; |
538 | 0 | IR_ASSERT(from != to); |
539 | 0 | type = copies[i].type; |
540 | 0 | if (IR_IS_TYPE_INT(type)) { |
541 | 0 | ir_emit_mov(ctx, type, to, from); |
542 | 0 | } else { |
543 | 0 | ir_emit_fp_mov(ctx, type, to, from); |
544 | 0 | } |
545 | 0 | } |
546 | 0 | return 1; |
547 | 0 | } |
548 | | |
549 | | /* temporary registers can't be the same as some of the destinations */ |
550 | 0 | IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_reg)); |
551 | 0 | IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_fp_reg)); |
552 | | |
553 | | /* first we resolve all "windmill blades" - trees (this doesn't requre temporary registers) */ |
554 | 0 | while (ready != IR_REGSET_EMPTY) { |
555 | 0 | ir_reg r; |
556 | |
|
557 | 0 | to = ir_regset_pop_first(&ready); |
558 | 0 | from = pred[to]; |
559 | 0 | r = loc[from]; |
560 | 0 | type = types[from]; |
561 | 0 | if (IR_IS_TYPE_INT(type)) { |
562 | 0 | ir_emit_mov_ext(ctx, type, to, r); |
563 | 0 | } else { |
564 | 0 | ir_emit_fp_mov(ctx, type, to, r); |
565 | 0 | } |
566 | 0 | IR_REGSET_EXCL(todo, to); |
567 | 0 | loc[from] = to; |
568 | 0 | if (from == r && IR_REGSET_IN(todo, from)) { |
569 | 0 | IR_REGSET_INCL(ready, from); |
570 | 0 | } |
571 | 0 | } |
572 | 0 | if (todo == IR_REGSET_EMPTY) { |
573 | 0 | return 1; |
574 | 0 | } |
575 | | |
576 | | /* at this point the sources that are the same as temoraries are already moved */ |
577 | 0 | IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_reg) || pred[loc[tmp_reg]] == tmp_reg); |
578 | 0 | IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_fp_reg) || pred[loc[tmp_fp_reg]] == tmp_fp_reg); |
579 | | |
580 | | /* now we resolve all "windmill axles" - cycles (this reuires temporary registers) */ |
581 | 0 | while (todo != IR_REGSET_EMPTY) { |
582 | 0 | to = ir_regset_pop_first(&todo); |
583 | 0 | from = pred[to]; |
584 | 0 | IR_ASSERT(to != loc[from]); |
585 | 0 | type = types[from]; |
586 | 0 | if (IR_IS_TYPE_INT(type)) { |
587 | 0 | #ifdef IR_HAVE_SWAP_INT |
588 | 0 | if (pred[from] == to) { |
589 | 0 | if (ir_type_size[types[to]] > ir_type_size[type]) { |
590 | 0 | type = types[to]; |
591 | 0 | } |
592 | 0 | ir_emit_swap(ctx, type, to, from); |
593 | 0 | IR_REGSET_EXCL(todo, from); |
594 | 0 | loc[to] = from; |
595 | 0 | loc[from] = to; |
596 | 0 | continue; |
597 | 0 | } |
598 | 0 | #endif |
599 | 0 | IR_ASSERT(tmp_reg != IR_REG_NONE); |
600 | 0 | IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST); |
601 | 0 | ir_emit_mov(ctx, type, tmp_reg, to); |
602 | 0 | loc[to] = tmp_reg; |
603 | 0 | } else { |
604 | | #ifdef IR_HAVE_SWAP_FP |
605 | | if (pred[from] == to && types[to] == type) { |
606 | | ir_emit_swap_fp(ctx, type, to, from); |
607 | | IR_REGSET_EXCL(todo, from); |
608 | | loc[to] = from; |
609 | | loc[from] = to; |
610 | | continue; |
611 | | } |
612 | | #endif |
613 | 0 | IR_ASSERT(tmp_fp_reg != IR_REG_NONE); |
614 | 0 | IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST); |
615 | 0 | ir_emit_fp_mov(ctx, type, tmp_fp_reg, to); |
616 | 0 | loc[to] = tmp_fp_reg; |
617 | 0 | } |
618 | 0 | while (1) { |
619 | 0 | ir_reg r; |
620 | |
|
621 | 0 | from = pred[to]; |
622 | 0 | r = loc[from]; |
623 | 0 | type = types[from]; |
624 | 0 | if (IR_IS_TYPE_INT(type)) { |
625 | 0 | ir_emit_mov_ext(ctx, type, to, r); |
626 | 0 | } else { |
627 | 0 | ir_emit_fp_mov(ctx, type, to, r); |
628 | 0 | } |
629 | 0 | IR_REGSET_EXCL(todo, to); |
630 | 0 | loc[from] = to; |
631 | 0 | if (from == r && IR_REGSET_IN(todo, from)) { |
632 | 0 | to = from; |
633 | 0 | } else { |
634 | 0 | break; |
635 | 0 | } |
636 | 0 | } |
637 | 0 | } |
638 | | |
639 | 0 | return 1; |
640 | 0 | } |
641 | | |
642 | | static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from, ir_reg tmp_reg, ir_reg tmp_fp_reg) |
643 | 0 | { |
644 | 0 | ir_mem mem_from, mem_to; |
645 | |
|
646 | 0 | IR_ASSERT(from != to); |
647 | 0 | if (to < IR_REG_NUM) { |
648 | 0 | if (IR_IS_CONST_REF(from)) { |
649 | 0 | if (-from < ctx->consts_count) { |
650 | | /* constant reference */ |
651 | 0 | ir_emit_load(ctx, type, to, from); |
652 | 0 | } else { |
653 | | /* local variable address */ |
654 | 0 | ir_load_local_addr(ctx, to, -from - ctx->consts_count); |
655 | 0 | } |
656 | 0 | } else if (from < IR_REG_NUM) { |
657 | 0 | if (IR_IS_TYPE_INT(type)) { |
658 | 0 | ir_emit_mov(ctx, type, to, from); |
659 | 0 | } else { |
660 | 0 | ir_emit_fp_mov(ctx, type, to, from); |
661 | 0 | } |
662 | 0 | } else { |
663 | 0 | mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM); |
664 | 0 | ir_emit_load_mem(ctx, type, to, mem_from); |
665 | 0 | } |
666 | 0 | } else { |
667 | 0 | mem_to = ir_vreg_spill_slot(ctx, to - IR_REG_NUM); |
668 | 0 | if (IR_IS_CONST_REF(from)) { |
669 | 0 | if (-from < ctx->consts_count) { |
670 | | /* constant reference */ |
671 | 0 | #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) |
672 | 0 | if (IR_IS_TYPE_INT(type) |
673 | 0 | && !IR_IS_SYM_CONST(ctx->ir_base[from].op) |
674 | 0 | && (ir_type_size[type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[from].val.i64))) { |
675 | 0 | ir_emit_store_mem_imm(ctx, type, mem_to, ctx->ir_base[from].val.i32); |
676 | 0 | return; |
677 | 0 | } |
678 | 0 | #endif |
679 | 0 | ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg; |
680 | 0 | IR_ASSERT(tmp != IR_REG_NONE); |
681 | 0 | ir_emit_load(ctx, type, tmp, from); |
682 | 0 | ir_emit_store_mem(ctx, type, mem_to, tmp); |
683 | 0 | } else { |
684 | | /* local variable address */ |
685 | 0 | IR_ASSERT(IR_IS_TYPE_INT(type)); |
686 | 0 | IR_ASSERT(tmp_reg != IR_REG_NONE); |
687 | 0 | ir_load_local_addr(ctx, tmp_reg, -from - ctx->consts_count); |
688 | 0 | ir_emit_store_mem(ctx, type, mem_to, tmp_reg); |
689 | 0 | } |
690 | 0 | } else if (from < IR_REG_NUM) { |
691 | 0 | ir_emit_store_mem(ctx, type, mem_to, from); |
692 | 0 | } else { |
693 | 0 | mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM); |
694 | 0 | IR_ASSERT(IR_MEM_VAL(mem_to) != IR_MEM_VAL(mem_from)); |
695 | 0 | ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg; |
696 | 0 | IR_ASSERT(tmp != IR_REG_NONE); |
697 | 0 | ir_emit_load_mem(ctx, type, tmp, mem_from); |
698 | 0 | ir_emit_store_mem(ctx, type, mem_to, tmp); |
699 | 0 | } |
700 | 0 | } |
701 | 0 | } |
702 | | |
703 | | IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, int8_t *types, ir_bitset todo, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg) |
704 | 0 | { |
705 | 0 | ir_ref from; |
706 | 0 | ir_mem tmp_spill_slot; |
707 | 0 | ir_type type; |
708 | |
|
709 | 0 | IR_MEM_VAL(tmp_spill_slot) = 0; |
710 | 0 | IR_ASSERT(!IR_IS_CONST_REF(to)); |
711 | 0 | from = pred[to]; |
712 | 0 | type = types[from]; |
713 | 0 | IR_ASSERT(!IR_IS_CONST_REF(from)); |
714 | 0 | IR_ASSERT(from != to); |
715 | 0 | IR_ASSERT(loc[from] == from); |
716 | |
|
717 | 0 | if (IR_IS_TYPE_INT(type)) { |
718 | 0 | #ifdef IR_HAVE_SWAP_INT |
719 | 0 | if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) { |
720 | | /* a simple cycle from 2 elements */ |
721 | 0 | if (ir_type_size[types[to]] > ir_type_size[type]) { |
722 | 0 | type = types[to]; |
723 | 0 | } |
724 | 0 | ir_emit_swap(ctx, type, to, from); |
725 | 0 | ir_bitset_excl(todo, from); |
726 | 0 | ir_bitset_excl(todo, to); |
727 | 0 | loc[to] = from; |
728 | 0 | loc[from] = to; |
729 | 0 | return; |
730 | 0 | } |
731 | 0 | #endif |
732 | 0 | IR_ASSERT(tmp_reg != IR_REG_NONE); |
733 | 0 | IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST); |
734 | 0 | loc[to] = tmp_reg; |
735 | 0 | if (to < IR_REG_NUM) { |
736 | 0 | ir_emit_mov(ctx, type, tmp_reg, to); |
737 | 0 | } else { |
738 | 0 | ir_emit_load_mem_int(ctx, type, tmp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM)); |
739 | 0 | } |
740 | 0 | } else { |
741 | | #ifdef IR_HAVE_SWAP_FP |
742 | | if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM && types[to] == type) { |
743 | | /* a simple cycle from 2 elements */ |
744 | | ir_emit_swap_fp(ctx, type, to, from); |
745 | | IR_REGSET_EXCL(todo, from); |
746 | | IR_REGSET_EXCL(todo, to); |
747 | | loc[to] = from; |
748 | | loc[from] = to; |
749 | | return; |
750 | | } |
751 | | #endif |
752 | 0 | IR_ASSERT(tmp_fp_reg != IR_REG_NONE); |
753 | 0 | IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST); |
754 | 0 | loc[to] = tmp_fp_reg; |
755 | 0 | if (to < IR_REG_NUM) { |
756 | 0 | ir_emit_fp_mov(ctx, type, tmp_fp_reg, to); |
757 | 0 | } else { |
758 | 0 | ir_emit_load_mem_fp(ctx, type, tmp_fp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM)); |
759 | 0 | } |
760 | 0 | } |
761 | | |
762 | 0 | while (1) { |
763 | 0 | int32_t r; |
764 | |
|
765 | 0 | from = pred[to]; |
766 | 0 | r = loc[from]; |
767 | 0 | type = types[to]; |
768 | |
|
769 | 0 | if (from == r && ir_bitset_in(todo, from)) { |
770 | | /* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */ |
771 | 0 | if (to >= IR_REG_NUM && r >= IR_REG_NUM) { |
772 | 0 | ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg; |
773 | |
|
774 | 0 | if (!IR_MEM_VAL(tmp_spill_slot)) { |
775 | | /* Free a register, saving it in a temporary spill slot */ |
776 | 0 | tmp_spill_slot = IR_MEM_BO(IR_REG_STACK_POINTER, -16); |
777 | 0 | ir_emit_store_mem(ctx, type, tmp_spill_slot, tmp); |
778 | 0 | } |
779 | 0 | ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg); |
780 | 0 | } else { |
781 | 0 | ir_emit_dessa_move(ctx, type, to, r, IR_REG_NONE, IR_REG_NONE); |
782 | 0 | } |
783 | 0 | ir_bitset_excl(todo, to); |
784 | 0 | loc[from] = to; |
785 | 0 | to = from; |
786 | 0 | } else { |
787 | 0 | break; |
788 | 0 | } |
789 | 0 | } |
790 | |
|
791 | 0 | type = types[to]; |
792 | 0 | if (IR_MEM_VAL(tmp_spill_slot)) { |
793 | 0 | ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot); |
794 | 0 | } |
795 | 0 | ir_emit_dessa_move(ctx, type, to, loc[from], IR_REG_NONE, IR_REG_NONE); |
796 | 0 | ir_bitset_excl(todo, to); |
797 | 0 | loc[from] = to; |
798 | 0 | } |
799 | | |
800 | | static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg) |
801 | 0 | { |
802 | 0 | int i; |
803 | 0 | int32_t *pred, *loc, to, from; |
804 | 0 | int8_t *types; |
805 | 0 | ir_type type; |
806 | 0 | uint32_t len; |
807 | 0 | ir_bitset todo, ready, srcs, visited; |
808 | |
|
809 | 0 | if (count == 1) { |
810 | 0 | to = copies[0].to; |
811 | 0 | from = copies[0].from; |
812 | 0 | IR_ASSERT(from != to); |
813 | 0 | type = copies[0].type; |
814 | 0 | ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg); |
815 | 0 | return 1; |
816 | 0 | } |
817 | | |
818 | 0 | len = IR_REG_NUM + ctx->vregs_count + 1; |
819 | 0 | todo = ir_bitset_malloc(len); |
820 | 0 | srcs = ir_bitset_malloc(len); |
821 | 0 | loc = ir_mem_malloc(len * 2 * sizeof(int32_t) + len * sizeof(int8_t)); |
822 | 0 | pred = loc + len; |
823 | 0 | types = (int8_t*)(pred + len); |
824 | |
|
825 | 0 | for (i = 0; i < count; i++) { |
826 | 0 | from = copies[i].from; |
827 | 0 | to = copies[i].to; |
828 | 0 | IR_ASSERT(from != to); |
829 | 0 | if (!IR_IS_CONST_REF(from)) { |
830 | 0 | ir_bitset_incl(srcs, from); |
831 | 0 | loc[from] = from; |
832 | 0 | } |
833 | 0 | pred[to] = from; |
834 | 0 | types[to] = copies[i].type; |
835 | 0 | IR_ASSERT(!ir_bitset_in(todo, to)); |
836 | 0 | ir_bitset_incl(todo, to); |
837 | 0 | } |
838 | | |
839 | | /* temporary registers can't be the same as some of the sources */ |
840 | 0 | IR_ASSERT(tmp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_reg)); |
841 | 0 | IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_fp_reg)); |
842 | | |
843 | | /* first we resolve all "windmill blades" - trees, that don't set temporary registers */ |
844 | 0 | ready = ir_bitset_malloc(len); |
845 | 0 | ir_bitset_copy(ready, todo, ir_bitset_len(len)); |
846 | 0 | ir_bitset_difference(ready, srcs, ir_bitset_len(len)); |
847 | 0 | if (tmp_reg != IR_REG_NONE) { |
848 | 0 | ir_bitset_excl(ready, tmp_reg); |
849 | 0 | } |
850 | 0 | if (tmp_fp_reg != IR_REG_NONE) { |
851 | 0 | ir_bitset_excl(ready, tmp_fp_reg); |
852 | 0 | } |
853 | 0 | while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) { |
854 | 0 | ir_bitset_excl(todo, to); |
855 | 0 | type = types[to]; |
856 | 0 | from = pred[to]; |
857 | 0 | if (IR_IS_CONST_REF(from)) { |
858 | 0 | ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg); |
859 | 0 | } else { |
860 | 0 | int32_t r = loc[from]; |
861 | 0 | ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg); |
862 | 0 | loc[from] = to; |
863 | 0 | if (from == r && ir_bitset_in(todo, from) && from != tmp_reg && from != tmp_fp_reg) { |
864 | 0 | ir_bitset_incl(ready, from); |
865 | 0 | } |
866 | 0 | } |
867 | 0 | } |
868 | | |
869 | | /* then we resolve all "windmill axles" - cycles (this requres temporary registers) */ |
870 | 0 | visited = ir_bitset_malloc(len); |
871 | 0 | ir_bitset_copy(ready, todo, ir_bitset_len(len)); |
872 | 0 | ir_bitset_intersection(ready, srcs, ir_bitset_len(len)); |
873 | 0 | while ((to = ir_bitset_first(ready, ir_bitset_len(len))) >= 0) { |
874 | 0 | ir_bitset_clear(visited, ir_bitset_len(len)); |
875 | 0 | ir_bitset_incl(visited, to); |
876 | 0 | to = pred[to]; |
877 | 0 | while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) { |
878 | 0 | to = pred[to]; |
879 | 0 | if (IR_IS_CONST_REF(to)) { |
880 | 0 | break; |
881 | 0 | } else if (ir_bitset_in(visited, to)) { |
882 | | /* We found a cycle. Resolve it. */ |
883 | 0 | ir_bitset_incl(visited, to); |
884 | 0 | ir_dessa_resolve_cycle(ctx, pred, loc, types, todo, to, tmp_reg, tmp_fp_reg); |
885 | 0 | break; |
886 | 0 | } |
887 | 0 | ir_bitset_incl(visited, to); |
888 | 0 | } |
889 | 0 | ir_bitset_difference(ready, visited, ir_bitset_len(len)); |
890 | 0 | } |
891 | | |
892 | | /* finally we resolve remaining "windmill blades" - trees that set temporary registers */ |
893 | 0 | ir_bitset_copy(ready, todo, ir_bitset_len(len)); |
894 | 0 | ir_bitset_difference(ready, srcs, ir_bitset_len(len)); |
895 | 0 | while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) { |
896 | 0 | ir_bitset_excl(todo, to); |
897 | 0 | type = types[to]; |
898 | 0 | from = pred[to]; |
899 | 0 | if (IR_IS_CONST_REF(from)) { |
900 | 0 | ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg); |
901 | 0 | } else { |
902 | 0 | int32_t r = loc[from]; |
903 | 0 | ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg); |
904 | 0 | loc[from] = to; |
905 | 0 | if (from == r && ir_bitset_in(todo, from)) { |
906 | 0 | ir_bitset_incl(ready, from); |
907 | 0 | } |
908 | 0 | } |
909 | 0 | } |
910 | |
|
911 | 0 | IR_ASSERT(ir_bitset_empty(todo, ir_bitset_len(len))); |
912 | |
|
913 | 0 | ir_mem_free(visited); |
914 | 0 | ir_mem_free(ready); |
915 | 0 | ir_mem_free(loc); |
916 | 0 | ir_mem_free(srcs); |
917 | 0 | ir_mem_free(todo); |
918 | 0 | return 1; |
919 | 0 | } |
920 | | |
921 | | static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb) |
922 | 0 | { |
923 | 0 | uint32_t succ, k, n = 0; |
924 | 0 | ir_block *succ_bb; |
925 | 0 | ir_use_list *use_list; |
926 | 0 | ir_ref i, *p; |
927 | 0 | ir_dessa_copy *copies; |
928 | 0 | ir_reg tmp_reg = ctx->regs[bb->end][0]; |
929 | 0 | ir_reg tmp_fp_reg = ctx->regs[bb->end][1]; |
930 | |
|
931 | 0 | IR_ASSERT(bb->successors_count == 1); |
932 | 0 | succ = ctx->cfg_edges[bb->successors]; |
933 | 0 | succ_bb = &ctx->cfg_blocks[succ]; |
934 | 0 | IR_ASSERT(succ_bb->predecessors_count > 1); |
935 | 0 | use_list = &ctx->use_lists[succ_bb->start]; |
936 | 0 | k = ir_phi_input_number(ctx, succ_bb, b); |
937 | |
|
938 | 0 | copies = alloca(use_list->count * sizeof(ir_dessa_copy)); |
939 | |
|
940 | 0 | for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) { |
941 | 0 | ir_ref ref = *p; |
942 | 0 | ir_insn *insn = &ctx->ir_base[ref]; |
943 | |
|
944 | 0 | if (insn->op == IR_PHI) { |
945 | 0 | ir_ref input = ir_insn_op(insn, k); |
946 | 0 | ir_reg src = ir_get_alocated_reg(ctx, ref, k); |
947 | 0 | ir_reg dst = ctx->regs[ref][0]; |
948 | 0 | ir_ref from, to; |
949 | |
|
950 | 0 | IR_ASSERT(dst == IR_REG_NONE || !IR_REG_SPILLED(dst)); |
951 | 0 | if (IR_IS_CONST_REF(input)) { |
952 | 0 | from = input; |
953 | 0 | } else if (ir_rule(ctx, input) == IR_STATIC_ALLOCA) { |
954 | | /* encode local variable address */ |
955 | 0 | from = -(ctx->consts_count + input); |
956 | 0 | } else { |
957 | 0 | from = (src != IR_REG_NONE && !IR_REG_SPILLED(src)) ? |
958 | 0 | (ir_ref)src : (ir_ref)(IR_REG_NUM + ctx->vregs[input]); |
959 | 0 | } |
960 | 0 | to = (dst != IR_REG_NONE) ? |
961 | 0 | (ir_ref)dst : (ir_ref)(IR_REG_NUM + ctx->vregs[ref]); |
962 | 0 | if (to != from) { |
963 | 0 | if (to >= IR_REG_NUM |
964 | 0 | && from >= IR_REG_NUM |
965 | 0 | && IR_MEM_VAL(ir_vreg_spill_slot(ctx, from - IR_REG_NUM)) == |
966 | 0 | IR_MEM_VAL(ir_vreg_spill_slot(ctx, to - IR_REG_NUM))) { |
967 | | /* It's possible that different virtual registers share the same special spill slot */ |
968 | | // TODO: See ext/opcache/tests/jit/gh11917.phpt failure on Linux 32-bit |
969 | 0 | continue; |
970 | 0 | } |
971 | 0 | copies[n].type = insn->type; |
972 | 0 | copies[n].from = from; |
973 | 0 | copies[n].to = to; |
974 | 0 | n++; |
975 | 0 | } |
976 | 0 | } |
977 | 0 | } |
978 | | |
979 | 0 | if (n > 0) { |
980 | 0 | ir_dessa_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg); |
981 | 0 | } |
982 | 0 | } |
983 | | |
984 | | int ir_match(ir_ctx *ctx) |
985 | 0 | { |
986 | 0 | uint32_t b; |
987 | 0 | ir_ref start, ref, *prev_ref; |
988 | 0 | ir_block *bb; |
989 | 0 | ir_insn *insn; |
990 | 0 | uint32_t entries_count = 0; |
991 | |
|
992 | 0 | ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); |
993 | |
|
994 | 0 | prev_ref = ctx->prev_ref; |
995 | 0 | if (!prev_ref) { |
996 | 0 | ir_build_prev_refs(ctx); |
997 | 0 | prev_ref = ctx->prev_ref; |
998 | 0 | } |
999 | |
|
1000 | 0 | if (ctx->entries_count) { |
1001 | 0 | ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref)); |
1002 | 0 | } |
1003 | |
|
1004 | 0 | for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) { |
1005 | 0 | IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); |
1006 | 0 | start = bb->start; |
1007 | 0 | if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) { |
1008 | 0 | IR_ASSERT(entries_count < ctx->entries_count); |
1009 | 0 | insn = &ctx->ir_base[start]; |
1010 | 0 | IR_ASSERT(insn->op == IR_ENTRY); |
1011 | 0 | insn->op3 = entries_count; |
1012 | 0 | ctx->entries[entries_count] = b; |
1013 | 0 | entries_count++; |
1014 | 0 | } |
1015 | 0 | ctx->rules[start] = IR_SKIPPED | IR_NOP; |
1016 | 0 | if (ctx->ir_base[start].op == IR_BEGIN && ctx->ir_base[start].op2) { |
1017 | 0 | ctx->flags2 |= IR_HAS_BLOCK_ADDR; |
1018 | 0 | } |
1019 | 0 | ref = bb->end; |
1020 | 0 | if (bb->successors_count == 1) { |
1021 | 0 | insn = &ctx->ir_base[ref]; |
1022 | 0 | if (insn->op == IR_END || insn->op == IR_LOOP_END) { |
1023 | 0 | if (!ctx->rules[ref]) { |
1024 | 0 | ctx->rules[ref] = insn->op; |
1025 | 0 | } |
1026 | 0 | ref = prev_ref[ref]; |
1027 | 0 | if (ref == start && ctx->cfg_edges[bb->successors] != b) { |
1028 | 0 | if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) { |
1029 | 0 | bb->flags |= IR_BB_EMPTY; |
1030 | 0 | } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) { |
1031 | 0 | bb->flags |= IR_BB_EMPTY; |
1032 | 0 | if (ctx->cfg_edges[bb->successors] == b + 1) { |
1033 | 0 | (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY; |
1034 | 0 | } |
1035 | 0 | } |
1036 | 0 | continue; |
1037 | 0 | } |
1038 | 0 | } |
1039 | 0 | } |
1040 | | |
1041 | 0 | ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */ |
1042 | |
|
1043 | 0 | while (ref != start) { |
1044 | 0 | uint32_t rule = ctx->rules[ref]; |
1045 | |
|
1046 | 0 | if (!rule) { |
1047 | 0 | ctx->rules[ref] = rule = ir_match_insn(ctx, ref); |
1048 | 0 | } |
1049 | 0 | ir_match_insn2(ctx, ref, rule); |
1050 | 0 | ref = prev_ref[ref]; |
1051 | 0 | } |
1052 | 0 | } |
1053 | | |
1054 | 0 | if (ctx->entries_count) { |
1055 | 0 | ctx->entries_count = entries_count; |
1056 | 0 | if (!entries_count) { |
1057 | 0 | ir_mem_free(ctx->entries); |
1058 | 0 | ctx->entries = NULL; |
1059 | 0 | } |
1060 | 0 | } |
1061 | |
|
1062 | 0 | return 1; |
1063 | 0 | } |
1064 | | |
1065 | | int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref) |
1066 | 0 | { |
1067 | 0 | int32_t offset; |
1068 | |
|
1069 | 0 | IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); |
1070 | 0 | offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; |
1071 | 0 | IR_ASSERT(offset != -1); |
1072 | 0 | return IR_SPILL_POS_TO_OFFSET(offset); |
1073 | 0 | } |