Line | Count | Source (jump to first uncovered line) |
1 | | #include <assert.h> |
2 | | #include <string.h> |
3 | | #include <stdlib.h> |
4 | | #include <unistd.h> |
5 | | #include "compile.h" |
6 | | #include "bytecode.h" |
7 | | #include "locfile.h" |
8 | | #include "jv_alloc.h" |
9 | | #include "util.h" |
10 | | |
11 | | /* |
12 | | The intermediate representation for jq filters is as a sequence of |
13 | | struct inst, which form a doubly-linked list via the next and prev |
14 | | pointers. |
15 | | |
16 | | A "block" represents a sequence of "struct inst", which may be |
17 | | empty. |
18 | | |
19 | | Blocks are generated by the parser bottom-up, so may have free |
20 | | variables (refer to things not defined). See inst.bound_by and |
21 | | inst.symbol. |
22 | | */ |
23 | | struct inst { |
24 | | struct inst* next; |
25 | | struct inst* prev; |
26 | | |
27 | | opcode op; |
28 | | |
29 | | struct { |
30 | | uint16_t intval; |
31 | | struct inst* target; |
32 | | jv constant; |
33 | | const struct cfunction* cfunc; |
34 | | } imm; |
35 | | |
36 | | struct locfile* locfile; |
37 | | location source; |
38 | | |
39 | | // Binding |
40 | | // An instruction requiring binding (for parameters/variables/functions) |
41 | | // is in one of three states: |
42 | | // inst->bound_by = NULL - Unbound free variable |
43 | | // inst->bound_by = inst - This instruction binds a variable |
44 | | // inst->bound_by = other - Uses variable bound by other instruction |
45 | | // Unbound instructions (references to other things that may or may not |
46 | | // exist) are created by "gen_foo_unbound", and bindings are created by |
47 | | // block_bind(definition, body), which binds all instructions in |
48 | | // body which are unbound and refer to "definition" by name. |
49 | | struct inst* bound_by; |
50 | | char* symbol; |
51 | | int any_unbound; |
52 | | int referenced; |
53 | | |
54 | | int nformals; |
55 | | int nactuals; |
56 | | |
57 | | block subfn; // used by CLOSURE_CREATE (body of function) |
58 | | block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments) |
59 | | |
60 | | // This instruction is compiled as part of which function? |
61 | | // (only used during block_compile) |
62 | | struct bytecode* compiled; |
63 | | |
64 | | int bytecode_pos; // position just after this insn |
65 | | }; |
66 | | |
67 | 16.7M | static inst* inst_new(opcode op) { |
68 | 16.7M | inst* i = jv_mem_alloc(sizeof(inst)); |
69 | 16.7M | i->next = i->prev = 0; |
70 | 16.7M | i->op = op; |
71 | 16.7M | i->bytecode_pos = -1; |
72 | 16.7M | i->bound_by = 0; |
73 | 16.7M | i->symbol = 0; |
74 | 16.7M | i->any_unbound = 0; |
75 | 16.7M | i->referenced = 0; |
76 | 16.7M | i->nformals = -1; |
77 | 16.7M | i->nactuals = -1; |
78 | 16.7M | i->subfn = gen_noop(); |
79 | 16.7M | i->arglist = gen_noop(); |
80 | 16.7M | i->source = UNKNOWN_LOCATION; |
81 | 16.7M | i->locfile = 0; |
82 | 16.7M | return i; |
83 | 16.7M | } |
84 | | |
85 | 16.7M | static void inst_free(struct inst* i) { |
86 | 16.7M | jv_mem_free(i->symbol); |
87 | 16.7M | block_free(i->subfn); |
88 | 16.7M | block_free(i->arglist); |
89 | 16.7M | if (i->locfile) |
90 | 2.40M | locfile_free(i->locfile); |
91 | 16.7M | if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) { |
92 | 3.00M | jv_free(i->imm.constant); |
93 | 3.00M | } |
94 | 16.7M | jv_mem_free(i); |
95 | 16.7M | } |
96 | | |
97 | 19.7M | static block inst_block(inst* i) { |
98 | 19.7M | block b = {i,i}; |
99 | 19.7M | return b; |
100 | 19.7M | } |
101 | | |
102 | 223M | int block_is_single(block b) { |
103 | 223M | return b.first && b.first == b.last; |
104 | 223M | } |
105 | | |
106 | 1.76M | static inst* block_take(block* b) { |
107 | 1.76M | if (b->first == 0) return 0; |
108 | 1.32M | inst* i = b->first; |
109 | 1.32M | if (i->next) { |
110 | 1.02M | i->next->prev = 0; |
111 | 1.02M | b->first = i->next; |
112 | 1.02M | i->next = 0; |
113 | 1.02M | } else { |
114 | 301k | b->first = 0; |
115 | 301k | b->last = 0; |
116 | 301k | } |
117 | 1.32M | return i; |
118 | 1.76M | } |
119 | | |
120 | 1.99M | block gen_location(location loc, struct locfile* l, block b) { |
121 | 4.50M | for (inst* i = b.first; i; i = i->next) { |
122 | 2.51M | if (i->source.start == UNKNOWN_LOCATION.start && |
123 | 2.51M | i->source.end == UNKNOWN_LOCATION.end) { |
124 | 2.40M | i->source = loc; |
125 | 2.40M | i->locfile = locfile_retain(l); |
126 | 2.40M | } |
127 | 2.51M | } |
128 | 1.99M | return b; |
129 | 1.99M | } |
130 | | |
131 | 39.6M | block gen_noop(void) { |
132 | 39.6M | block b = {0,0}; |
133 | 39.6M | return b; |
134 | 39.6M | } |
135 | | |
136 | 2.16M | int block_is_noop(block b) { |
137 | 2.16M | return (b.first == 0 && b.last == 0); |
138 | 2.16M | } |
139 | | |
140 | 4.65M | block gen_op_simple(opcode op) { |
141 | 4.65M | assert(opcode_describe(op)->length == 1); |
142 | 4.65M | return inst_block(inst_new(op)); |
143 | 4.65M | } |
144 | | |
145 | | |
146 | 16.1k | block gen_error(jv constant) { |
147 | 16.1k | assert(opcode_describe(ERRORK)->flags & OP_HAS_CONSTANT); |
148 | 16.1k | inst *i = inst_new(ERRORK); |
149 | 16.1k | i->imm.constant = constant; |
150 | 16.1k | return inst_block(i); |
151 | 16.1k | } |
152 | | |
153 | 2.43M | block gen_const(jv constant) { |
154 | 2.43M | assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT); |
155 | 2.43M | inst* i = inst_new(LOADK); |
156 | 2.43M | i->imm.constant = constant; |
157 | 2.43M | return inst_block(i); |
158 | 2.43M | } |
159 | | |
160 | 0 | block gen_const_global(jv constant, const char *name) { |
161 | 0 | assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) == |
162 | 0 | (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)); |
163 | 0 | inst* i = inst_new(STORE_GLOBAL); |
164 | 0 | i->imm.constant = constant; |
165 | 0 | i->symbol = strdup(name); |
166 | 0 | i->any_unbound = 0; |
167 | 0 | return inst_block(i); |
168 | 0 | } |
169 | | |
170 | 548k | block gen_op_pushk_under(jv constant) { |
171 | 548k | assert(opcode_describe(PUSHK_UNDER)->flags & OP_HAS_CONSTANT); |
172 | 548k | inst* i = inst_new(PUSHK_UNDER); |
173 | 548k | i->imm.constant = constant; |
174 | 548k | return inst_block(i); |
175 | 548k | } |
176 | | |
177 | 2.22M | int block_is_const(block b) { |
178 | 2.22M | return (block_is_single(b) && (b.first->op == LOADK || b.first->op == PUSHK_UNDER)); |
179 | 2.22M | } |
180 | | |
181 | 8.91k | jv_kind block_const_kind(block b) { |
182 | 8.91k | assert(block_is_const(b)); |
183 | 8.91k | return jv_get_kind(b.first->imm.constant); |
184 | 8.91k | } |
185 | | |
186 | 1.21M | jv block_const(block b) { |
187 | 1.21M | assert(block_is_const(b)); |
188 | 1.21M | return jv_copy(b.first->imm.constant); |
189 | 1.21M | } |
190 | | |
191 | 1.31M | block gen_op_target(opcode op, block target) { |
192 | 1.31M | assert(opcode_describe(op)->flags & OP_HAS_BRANCH); |
193 | 1.31M | assert(target.last); |
194 | 1.31M | inst* i = inst_new(op); |
195 | 1.31M | i->imm.target = target.last; |
196 | 1.31M | return inst_block(i); |
197 | 1.31M | } |
198 | | |
199 | 100k | block gen_op_targetlater(opcode op) { |
200 | 100k | assert(opcode_describe(op)->flags & OP_HAS_BRANCH); |
201 | 100k | inst* i = inst_new(op); |
202 | 100k | i->imm.target = 0; |
203 | 100k | return inst_block(i); |
204 | 100k | } |
205 | 100k | void inst_set_target(block b, block target) { |
206 | 100k | assert(block_is_single(b)); |
207 | 100k | assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH); |
208 | 100k | assert(target.last); |
209 | 100k | b.first->imm.target = target.last; |
210 | 100k | } |
211 | | |
212 | 4.67M | block gen_op_unbound(opcode op, const char* name) { |
213 | 4.67M | assert(opcode_describe(op)->flags & OP_HAS_BINDING); |
214 | 4.67M | inst* i = inst_new(op); |
215 | 4.67M | i->symbol = strdup(name); |
216 | 4.67M | i->any_unbound = 1; |
217 | 4.67M | return inst_block(i); |
218 | 4.67M | } |
219 | | |
220 | 290k | block gen_op_var_fresh(opcode op, const char* name) { |
221 | 290k | assert(opcode_describe(op)->flags & OP_HAS_VARIABLE); |
222 | 290k | block b = gen_op_unbound(op, name); |
223 | 290k | b.first->bound_by = b.first; |
224 | 290k | return b; |
225 | 290k | } |
226 | | |
227 | 659k | block gen_op_bound(opcode op, block binder) { |
228 | 659k | assert(block_is_single(binder)); |
229 | 659k | block b = gen_op_unbound(op, binder.first->symbol); |
230 | 659k | b.first->bound_by = binder.first; |
231 | 659k | b.first->any_unbound = 0; |
232 | 659k | return b; |
233 | 659k | } |
234 | | |
235 | 85.8k | block gen_dictpair(block k, block v) { |
236 | 85.8k | return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT)); |
237 | 85.8k | } |
238 | | |
239 | | |
240 | 12.7M | static void inst_join(inst* a, inst* b) { |
241 | 12.7M | assert(a && b); |
242 | 12.7M | assert(!a->next); |
243 | 12.7M | assert(!b->prev); |
244 | 12.7M | a->next = b; |
245 | 12.7M | b->prev = a; |
246 | 12.7M | } |
247 | | |
248 | 14.6M | void block_append(block* b, block b2) { |
249 | 14.6M | if (b2.first) { |
250 | 13.3M | if (b->last) { |
251 | 12.7M | inst_join(b->last, b2.first); |
252 | 12.7M | } else { |
253 | 633k | b->first = b2.first; |
254 | 633k | } |
255 | 13.3M | b->last = b2.last; |
256 | 13.3M | } |
257 | 14.6M | } |
258 | | |
259 | 14.5M | block block_join(block a, block b) { |
260 | 14.5M | block c = a; |
261 | 14.5M | block_append(&c, b); |
262 | 14.5M | return c; |
263 | 14.5M | } |
264 | | |
265 | 4.18k | int block_has_only_binders_and_imports(block binders, int bindflags) { |
266 | 4.18k | bindflags |= OP_HAS_BINDING; |
267 | 447k | for (inst* curr = binders.first; curr; curr = curr->next) { |
268 | 443k | if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) { |
269 | 0 | return 0; |
270 | 0 | } |
271 | 443k | } |
272 | 4.18k | return 1; |
273 | 4.18k | } |
274 | | |
275 | 0 | static int inst_is_binder(inst *i, int bindflags) { |
276 | 0 | return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA); |
277 | 0 | } |
278 | | |
279 | 52.7k | int block_has_only_binders(block binders, int bindflags) { |
280 | 52.7k | bindflags |= OP_HAS_BINDING; |
281 | 52.7k | bindflags &= ~OP_BIND_WILDCARD; |
282 | 1.13M | for (inst* curr = binders.first; curr; curr = curr->next) { |
283 | 1.08M | if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) { |
284 | 0 | return 0; |
285 | 0 | } |
286 | 1.08M | } |
287 | 52.7k | return 1; |
288 | 52.7k | } |
289 | | |
290 | | // Count a call site's actual params |
291 | 2.23M | static int block_count_actuals(block b) { |
292 | 2.23M | int args = 0; |
293 | 4.12M | for (inst* i = b.first; i; i = i->next) { |
294 | 1.88M | switch (i->op) { |
295 | 0 | default: assert(0 && "Unknown function type"); break; |
296 | 1.88M | case CLOSURE_CREATE: |
297 | 1.88M | case CLOSURE_PARAM: |
298 | 1.88M | case CLOSURE_CREATE_C: |
299 | 1.88M | args++; |
300 | 1.88M | break; |
301 | 1.88M | } |
302 | 1.88M | } |
303 | 2.23M | return args; |
304 | 2.23M | } |
305 | | |
306 | 217M | static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) { |
307 | 217M | assert(block_is_single(binder)); |
308 | 217M | assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD)); |
309 | 217M | assert(binder.first->symbol); |
310 | 217M | assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first); |
311 | 217M | assert(break_distance >= 0); |
312 | | |
313 | 217M | binder.first->bound_by = binder.first; |
314 | 217M | int nrefs = 0; |
315 | 482M | for (inst* i = body.first; i; i = i->next) { |
316 | 264M | if (i->any_unbound == 0) |
317 | 157M | continue; |
318 | | |
319 | 106M | int flags = opcode_describe(i->op)->flags; |
320 | 106M | if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 && |
321 | 106M | (!strcmp(i->symbol, binder.first->symbol) || |
322 | | // Check for break/break2/break3; see parser.y |
323 | 59.4M | ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' && |
324 | 57.9M | break_distance <= 3 && (i->symbol[1] == '1' + break_distance) && |
325 | 57.9M | i->symbol[2] == '\0'))) { |
326 | | // bind this instruction |
327 | 1.48M | if (i->nactuals == -1 || i->nactuals == binder.first->nformals) { |
328 | 1.38M | i->bound_by = binder.first; |
329 | 1.38M | nrefs++; |
330 | 1.38M | } |
331 | 105M | } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 && |
332 | 105M | !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) && |
333 | 105M | !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) { |
334 | | // Increment the break distance required for this binder to match |
335 | | // a break whenever we come across a STOREV of *anonlabel... |
336 | 0 | break_distance++; |
337 | 0 | } |
338 | | |
339 | 106M | i->any_unbound = (i->symbol && !i->bound_by); |
340 | | |
341 | | // binding recurses into closures |
342 | 106M | nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->subfn, bindflags, break_distance); |
343 | | // binding recurses into argument list |
344 | 106M | nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->arglist, bindflags, break_distance); |
345 | | |
346 | 106M | if (i->any_unbound) |
347 | 102M | *any_unbound = 1; |
348 | 106M | } |
349 | 217M | return nrefs; |
350 | 217M | } |
351 | | |
352 | 4.26M | static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) { |
353 | 4.26M | int any_unbound; |
354 | 4.26M | return block_bind_subblock_inner(&any_unbound, binder, body, bindflags, break_distance); |
355 | 4.26M | } |
356 | | |
357 | 13.7k | static int block_bind_each(block binder, block body, int bindflags) { |
358 | 13.7k | assert(block_has_only_binders(binder, bindflags)); |
359 | 13.7k | bindflags |= OP_HAS_BINDING; |
360 | 13.7k | int nrefs = 0; |
361 | 27.5k | for (inst* curr = binder.first; curr; curr = curr->next) { |
362 | 13.7k | nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0); |
363 | 13.7k | } |
364 | 13.7k | return nrefs; |
365 | 13.7k | } |
366 | | |
367 | 13.7k | static block block_bind(block binder, block body, int bindflags) { |
368 | 13.7k | block_bind_each(binder, body, bindflags); |
369 | 13.7k | return block_join(binder, body); |
370 | 13.7k | } |
371 | | |
372 | 0 | block block_bind_library(block binder, block body, int bindflags, const char *libname) { |
373 | 0 | bindflags |= OP_HAS_BINDING; |
374 | 0 | int matchlen = (libname == NULL) ? 0 : strlen(libname); |
375 | 0 | char *matchname = jv_mem_alloc(matchlen+2+1); |
376 | 0 | matchname[0] = '\0'; |
377 | 0 | if (libname != NULL && libname[0] != '\0') { |
378 | 0 | strcpy(matchname,libname); |
379 | 0 | strcpy(matchname+matchlen, "::"); |
380 | 0 | matchlen += 2; |
381 | 0 | } |
382 | 0 | assert(block_has_only_binders(binder, bindflags)); |
383 | 0 | for (inst *curr = binder.last; curr; curr = curr->prev) { |
384 | 0 | int bindflags2 = bindflags; |
385 | 0 | char* cname = curr->symbol; |
386 | 0 | char* tname = jv_mem_alloc(strlen(curr->symbol)+matchlen+1); |
387 | 0 | strcpy(tname, matchname); |
388 | 0 | strcpy(tname+matchlen, curr->symbol); |
389 | | |
390 | | // Ew |
391 | 0 | if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT))) |
392 | 0 | bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING; |
393 | | |
394 | | // This mutation is ugly, even if we undo it |
395 | 0 | curr->symbol = tname; |
396 | 0 | block_bind_subblock(inst_block(curr), body, bindflags2, 0); |
397 | 0 | curr->symbol = cname; |
398 | 0 | free(tname); |
399 | 0 | } |
400 | 0 | free(matchname); |
401 | 0 | return body; // We don't return a join because we don't want those sticking around... |
402 | 0 | } |
403 | | |
404 | 1.11M | static inst* block_take_last(block* b) { |
405 | 1.11M | inst* i = b->last; |
406 | 1.11M | if (i == 0) |
407 | 39.0k | return 0; |
408 | 1.07M | if (i->prev) { |
409 | 1.03M | i->prev->next = i->next; |
410 | 1.03M | b->last = i->prev; |
411 | 1.03M | i->prev = 0; |
412 | 1.03M | } else { |
413 | 39.0k | b->first = 0; |
414 | 39.0k | b->last = 0; |
415 | 39.0k | } |
416 | 1.07M | return i; |
417 | 1.11M | } |
418 | | |
419 | | // Binds a sequence of binders, which *must not* already be bound to each other, |
420 | | // to body, throwing away unreferenced defs |
421 | 39.0k | block block_bind_referenced(block binder, block body, int bindflags) { |
422 | 39.0k | assert(block_has_only_binders(binder, bindflags)); |
423 | 39.0k | bindflags |= OP_HAS_BINDING; |
424 | | |
425 | 39.0k | inst* curr; |
426 | 1.11M | while ((curr = block_take_last(&binder))) { |
427 | 1.07M | block b = inst_block(curr); |
428 | 1.07M | if (block_bind_subblock(b, body, bindflags, 0) == 0) { |
429 | 994k | block_free(b); |
430 | 994k | } else { |
431 | 78.0k | body = BLOCK(b, body); |
432 | 78.0k | } |
433 | 1.07M | } |
434 | 39.0k | return body; |
435 | 39.0k | } |
436 | | |
437 | 0 | block block_bind_self(block binder, int bindflags) { |
438 | 0 | assert(block_has_only_binders(binder, bindflags)); |
439 | 0 | bindflags |= OP_HAS_BINDING; |
440 | 0 | block body = gen_noop(); |
441 | |
|
442 | 0 | inst* curr; |
443 | 0 | while ((curr = block_take_last(&binder))) { |
444 | 0 | block b = inst_block(curr); |
445 | 0 | block_bind_subblock(b, body, bindflags, 0); |
446 | 0 | body = BLOCK(b, body); |
447 | 0 | } |
448 | 0 | return body; |
449 | 0 | } |
450 | | |
451 | 1.32M | static void block_mark_referenced(block body) { |
452 | 1.32M | int saw_top = 0; |
453 | 1.98M | for (inst* i = body.last; i; i = i->prev) { |
454 | 660k | if (saw_top && i->bound_by == i && !i->referenced) |
455 | 0 | continue; |
456 | 660k | if (i->op == TOP) { |
457 | 4.18k | saw_top = 1; |
458 | 4.18k | } |
459 | 660k | if (i->bound_by) { |
460 | 173k | i->bound_by->referenced = 1; |
461 | 173k | } |
462 | | |
463 | 660k | block_mark_referenced(i->arglist); |
464 | 660k | block_mark_referenced(i->subfn); |
465 | 660k | } |
466 | 1.32M | } |
467 | | |
468 | 4.18k | block block_drop_unreferenced(block body) { |
469 | 4.18k | block_mark_referenced(body); |
470 | | |
471 | 4.18k | block refd = gen_noop(); |
472 | 4.18k | inst* curr; |
473 | 164k | while ((curr = block_take(&body))) { |
474 | 160k | if (curr->bound_by == curr && !curr->referenced) { |
475 | 0 | inst_free(curr); |
476 | 160k | } else { |
477 | 160k | refd = BLOCK(refd, inst_block(curr)); |
478 | 160k | } |
479 | 160k | } |
480 | 4.18k | return refd; |
481 | 4.18k | } |
482 | | |
483 | 4.20k | jv block_take_imports(block* body) { |
484 | 4.20k | jv imports = jv_array(); |
485 | | |
486 | | /* Parser should never generate TOP before imports */ |
487 | 4.20k | assert(!(body->first && body->first->op == TOP && body->first->next && |
488 | 4.20k | (body->first->next->op == MODULEMETA || body->first->next->op == DEPS))); |
489 | | |
490 | 8.60k | while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) { |
491 | 4.40k | inst* dep = block_take(body); |
492 | 4.40k | if (dep->op == DEPS) { |
493 | 4.40k | imports = jv_array_append(imports, jv_copy(dep->imm.constant)); |
494 | 4.40k | } |
495 | 4.40k | inst_free(dep); |
496 | 4.40k | } |
497 | 4.20k | return imports; |
498 | 4.20k | } |
499 | | |
500 | 4.18k | jv block_list_funcs(block body, int omit_underscores) { |
501 | 4.18k | jv funcs = jv_object(); // Use the keys for set semantics. |
502 | 1.03M | for (inst *pos = body.first; pos != NULL; pos = pos->next) { |
503 | 1.03M | if (pos->op == CLOSURE_CREATE || pos->op == CLOSURE_CREATE_C) { |
504 | 1.03M | if (pos->symbol != NULL && (!omit_underscores || pos->symbol[0] != '_')) { |
505 | 941k | funcs = jv_object_set(funcs, jv_string_fmt("%s/%i", pos->symbol, pos->nformals), jv_null()); |
506 | 941k | } |
507 | 1.03M | } |
508 | 1.03M | } |
509 | 4.18k | return jv_keys_unsorted(funcs); |
510 | 4.18k | } |
511 | | |
512 | 28 | block gen_module(block metadata) { |
513 | 28 | assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT); |
514 | 28 | inst* i = inst_new(MODULEMETA); |
515 | 28 | i->imm.constant = block_const(metadata); |
516 | 28 | if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT) |
517 | 0 | i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant); |
518 | 28 | block_free(metadata); |
519 | 28 | return inst_block(i); |
520 | 28 | } |
521 | | |
522 | 0 | jv block_module_meta(block b) { |
523 | 0 | if (b.first != NULL && b.first->op == MODULEMETA) |
524 | 0 | return jv_copy(b.first->imm.constant); |
525 | 0 | return jv_null(); |
526 | 0 | } |
527 | | |
528 | 8.75k | block gen_import(const char* name, const char* as, int is_data) { |
529 | 8.75k | inst* i = inst_new(DEPS); |
530 | 8.75k | jv meta = jv_object(); |
531 | 8.75k | if (as != NULL) |
532 | 4.55k | meta = jv_object_set(meta, jv_string("as"), jv_string(as)); |
533 | 8.75k | meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false()); |
534 | 8.75k | meta = jv_object_set(meta, jv_string("relpath"), jv_string(name)); |
535 | 8.75k | i->imm.constant = meta; |
536 | 8.75k | return inst_block(i); |
537 | 8.75k | } |
538 | | |
539 | 4.54k | block gen_import_meta(block import, block metadata) { |
540 | 4.54k | assert(block_is_single(import) && import.first->op == DEPS); |
541 | 4.54k | assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT); |
542 | 4.54k | inst *i = import.first; |
543 | 4.54k | i->imm.constant = jv_object_merge(block_const(metadata), i->imm.constant); |
544 | 4.54k | block_free(metadata); |
545 | 4.54k | return import; |
546 | 4.54k | } |
547 | | |
548 | 2.39M | block gen_function(const char* name, block formals, block body) { |
549 | 2.39M | inst* i = inst_new(CLOSURE_CREATE); |
550 | 2.39M | int nformals = 0; |
551 | 2.84M | for (inst* i = formals.last; i; i = i->prev) { |
552 | 450k | nformals++; |
553 | 450k | i->nformals = 0; |
554 | 450k | if (i->op == CLOSURE_PARAM_REGULAR) { |
555 | 155k | i->op = CLOSURE_PARAM; |
556 | 155k | body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body); |
557 | 155k | } |
558 | 450k | block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0); |
559 | 450k | } |
560 | 2.39M | i->subfn = body; |
561 | 2.39M | i->symbol = strdup(name); |
562 | 2.39M | i->any_unbound = -1; |
563 | 2.39M | i->nformals = nformals; |
564 | 2.39M | i->arglist = formals; |
565 | 2.39M | block b = inst_block(i); |
566 | 2.39M | block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0); |
567 | 2.39M | return b; |
568 | 2.39M | } |
569 | | |
570 | 155k | block gen_param_regular(const char* name) { |
571 | 155k | return gen_op_unbound(CLOSURE_PARAM_REGULAR, name); |
572 | 155k | } |
573 | | |
574 | 295k | block gen_param(const char* name) { |
575 | 295k | return gen_op_unbound(CLOSURE_PARAM, name); |
576 | 295k | } |
577 | | |
578 | 1.89M | block gen_lambda(block body) { |
579 | 1.89M | return gen_function("@lambda", gen_noop(), body); |
580 | 1.89M | } |
581 | | |
582 | 2.23M | block gen_call(const char* name, block args) { |
583 | 2.23M | block b = gen_op_unbound(CALL_JQ, name); |
584 | 2.23M | b.first->arglist = args; |
585 | 2.23M | b.first->nactuals = block_count_actuals(b.first->arglist); |
586 | 2.23M | return b; |
587 | 2.23M | } |
588 | | |
589 | 1.27M | block gen_subexp(block a) { |
590 | 1.27M | if (block_is_noop(a)) { |
591 | 45.9k | return gen_op_simple(DUP); |
592 | 45.9k | } |
593 | 1.22M | if (block_is_single(a) && a.first->op == LOADK) { |
594 | 548k | jv c = block_const(a); |
595 | 548k | block_free(a); |
596 | 548k | return gen_op_pushk_under(c); |
597 | 548k | } |
598 | 677k | return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END)); |
599 | 1.22M | } |
600 | | |
601 | 100k | block gen_both(block a, block b) { |
602 | 100k | block jump = gen_op_targetlater(JUMP); |
603 | 100k | block fork = gen_op_target(FORK, jump); |
604 | 100k | block c = BLOCK(fork, a, jump, b); |
605 | 100k | inst_set_target(jump, c); |
606 | 100k | return c; |
607 | 100k | } |
608 | | |
609 | 70.4k | block gen_const_object(block expr) { |
610 | 70.4k | int is_const = 1; |
611 | 70.4k | jv o = jv_object(); |
612 | 70.4k | jv k = jv_null(); |
613 | 70.4k | jv v = jv_null(); |
614 | 102k | for (inst *i = expr.first; i; i = i->next) { |
615 | 60.9k | if (i->op == PUSHK_UNDER) { |
616 | 45.1k | k = jv_copy(i->imm.constant); |
617 | 45.1k | i = i->next; |
618 | 45.1k | } else if (i->op != SUBEXP_BEGIN || |
619 | 15.8k | i->next == NULL || |
620 | 15.8k | i->next->op != LOADK || |
621 | 15.8k | i->next->next == NULL || |
622 | 15.8k | i->next->next->op != SUBEXP_END) { |
623 | 15.8k | is_const = 0; |
624 | 15.8k | break; |
625 | 15.8k | } else { |
626 | 0 | k = jv_copy(i->next->imm.constant); |
627 | 0 | i = i->next->next->next; |
628 | 0 | } |
629 | 45.1k | if (i != NULL && i->op == PUSHK_UNDER) { |
630 | 33.5k | v = jv_copy(i->imm.constant); |
631 | 33.5k | i = i->next; |
632 | 33.5k | } else if (i == NULL || |
633 | 11.5k | i->op != SUBEXP_BEGIN || |
634 | 11.5k | i->next == NULL || |
635 | 11.5k | i->next->op != LOADK || |
636 | 11.5k | i->next->next == NULL || |
637 | 11.5k | i->next->next->op != SUBEXP_END) { |
638 | 11.5k | is_const = 0; |
639 | 11.5k | break; |
640 | 11.5k | } else { |
641 | 0 | v = jv_copy(i->next->imm.constant); |
642 | 0 | i = i->next->next->next; |
643 | 0 | } |
644 | 33.5k | if (i == NULL || i->op != INSERT) { |
645 | 895 | is_const = 0; |
646 | 895 | break; |
647 | 895 | } |
648 | 32.6k | if (jv_get_kind(k) != JV_KIND_STRING) { |
649 | 251 | is_const = 0; |
650 | 251 | break; |
651 | 251 | } |
652 | 32.4k | o = jv_object_set(o, k, v); |
653 | 32.4k | k = jv_null(); |
654 | 32.4k | v = jv_null(); |
655 | 32.4k | } |
656 | 70.4k | if (!is_const) { |
657 | 28.5k | jv_free(o); |
658 | 28.5k | jv_free(k); |
659 | 28.5k | jv_free(v); |
660 | 28.5k | block b = {0,0}; |
661 | 28.5k | return b; |
662 | 28.5k | } |
663 | 41.9k | block_free(expr); |
664 | 41.9k | return gen_const(o); |
665 | 70.4k | } |
666 | | |
667 | 187k | static block gen_const_array(block expr) { |
668 | | /* |
669 | | * An expr of all constant elements looks like this: |
670 | | * |
671 | | * 0009 FORK 0027 |
672 | | * 0011 FORK 0023 |
673 | | * 0013 FORK 0019 |
674 | | * 0015 LOADK 1 |
675 | | * 0017 JUMP 0021 |
676 | | * 0019 LOADK 2 |
677 | | * 0021 JUMP 0025 |
678 | | * 0023 LOADK 3 |
679 | | * 0025 JUMP 0029 |
680 | | * 0027 LOADK 4 |
681 | | * |
682 | | * That's: N-1 commas for N elements, N LOADKs, and a JUMP between |
683 | | * every LOADK. The sequence ends in a LOADK. Any deviation and it's |
684 | | * not a list of constants. |
685 | | * |
686 | | * Here we check for this pattern almost exactly. We don't check that |
687 | | * the targets of the FORK and JUMP instructions are in the right |
688 | | * sequence. |
689 | | */ |
690 | 187k | int all_const = 1; |
691 | 187k | int commas = 0; |
692 | 187k | int normal = 1; |
693 | 187k | jv a = jv_array(); |
694 | 764k | for (inst *i = expr.first; i; i = i->next) { |
695 | 599k | if (i->op == FORK) { |
696 | 59.1k | commas++; |
697 | 59.1k | if (i->imm.target == NULL || i->imm.target->op != JUMP || |
698 | 59.1k | jv_array_length(jv_copy(a)) > 0) { |
699 | 22.4k | normal = 0; |
700 | 22.4k | break; |
701 | 22.4k | } |
702 | 540k | } else if (all_const && i->op == LOADK) { |
703 | 69.2k | if (i->next != NULL && i->next->op != JUMP) { |
704 | 477 | normal = 0; |
705 | 477 | break; |
706 | 477 | } |
707 | 68.7k | a = jv_array_append(a, jv_copy(i->imm.constant)); |
708 | 471k | } else if (i->op != JUMP || i->imm.target == NULL || |
709 | 471k | i->imm.target->op != LOADK) { |
710 | 458k | all_const = 0; |
711 | 458k | } |
712 | 599k | } |
713 | | |
714 | 187k | if (all_const && normal && |
715 | 187k | (expr.last == NULL || expr.last->op == LOADK) && |
716 | 187k | jv_array_length(jv_copy(a)) == commas + 1) { |
717 | 50.1k | block_free(expr); |
718 | 50.1k | return gen_const(a); |
719 | 50.1k | } |
720 | | |
721 | 137k | jv_free(a); |
722 | 137k | block b = {0,0}; |
723 | 137k | return b; |
724 | 187k | } |
725 | | |
726 | 187k | block gen_collect(block expr) { |
727 | 187k | block const_array = gen_const_array(expr); |
728 | 187k | if (const_array.first != NULL) |
729 | 50.1k | return const_array; |
730 | | |
731 | 137k | block array_var = gen_op_var_fresh(STOREV, "collect"); |
732 | 137k | block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var); |
733 | | |
734 | 137k | block tail = BLOCK(gen_op_bound(APPEND, array_var), |
735 | 137k | gen_op_simple(BACKTRACK)); |
736 | | |
737 | 137k | return BLOCK(c, |
738 | 187k | gen_op_target(FORK, tail), |
739 | 187k | expr, |
740 | 187k | tail, |
741 | 187k | gen_op_bound(LOADVN, array_var)); |
742 | 187k | } |
743 | | |
744 | 323k | static block bind_matcher(block matcher, block body) { |
745 | | // cannot call block_bind(matcher, body) because that requires |
746 | | // block_has_only_binders(matcher), which is not true here as matchers |
747 | | // may also contain code to extract the correct elements |
748 | 690k | for (inst* i = matcher.first; i; i = i->next) { |
749 | 367k | if ((i->op == STOREV || i->op == STOREVN) && !i->bound_by) |
750 | 329k | block_bind_subblock(inst_block(i), body, OP_HAS_VARIABLE, 0); |
751 | 367k | } |
752 | 323k | return BLOCK(matcher, body); |
753 | 323k | } |
754 | | |
755 | | |
756 | | // Extract destructuring var names from the block |
757 | | // *vars should be a jv_object (for set semantics) |
758 | 5.12k | static void block_get_unbound_vars(block b, jv *vars) { |
759 | 5.12k | assert(vars != NULL); |
760 | 5.12k | assert(jv_get_kind(*vars) == JV_KIND_OBJECT); |
761 | 20.9k | for (inst* i = b.first; i; i = i->next) { |
762 | 15.8k | if (i->subfn.first) { |
763 | 3.14k | block_get_unbound_vars(i->subfn, vars); |
764 | 3.14k | continue; |
765 | 3.14k | } |
766 | 12.6k | if ((i->op == STOREV || i->op == STOREVN) && i->bound_by == NULL) { |
767 | 5.72k | *vars = jv_object_set(*vars, jv_string(i->symbol), jv_true()); |
768 | 5.72k | } |
769 | 12.6k | } |
770 | 5.12k | } |
771 | | |
772 | | /* Build wrappers around destructuring matchers so that we can chain them |
773 | | * when we have errors. The approach is as follows: |
774 | | * DESTRUCTURE_ALT NEXT_MATCHER (unless last matcher) |
775 | | * existing_matcher_block |
776 | | * JUMP BODY |
777 | | */ |
778 | 323k | static block bind_alternation_matchers(block matchers, block body) { |
779 | 323k | block preamble = {0}; |
780 | 323k | block altmatchers = {0}; |
781 | 323k | block mb = {0}; |
782 | 323k | block final_matcher = matchers; |
783 | | |
784 | | // Pass through the matchers to find all destructured names. |
785 | 326k | while (final_matcher.first && final_matcher.first->op == DESTRUCTURE_ALT) { |
786 | 3.14k | block_append(&altmatchers, inst_block(block_take(&final_matcher))); |
787 | 3.14k | } |
788 | | |
789 | | // We don't have any alternations here, so we can use the simplest case. |
790 | 323k | if (altmatchers.first == NULL) { |
791 | 322k | return bind_matcher(final_matcher, body); |
792 | 322k | } |
793 | | |
794 | | // Collect var names |
795 | 986 | jv all_vars = jv_object(); |
796 | 986 | block_get_unbound_vars(altmatchers, &all_vars); |
797 | 986 | block_get_unbound_vars(final_matcher, &all_vars); |
798 | | |
799 | | // We need a preamble of STOREVs to which to bind the matchers and the body. |
800 | 3.20k | jv_object_keys_foreach(all_vars, key) { |
801 | 3.20k | preamble = BLOCK(preamble, |
802 | 3.20k | gen_op_simple(DUP), |
803 | 3.20k | gen_const(jv_null()), |
804 | 3.20k | gen_op_unbound(STOREV, jv_string_value(key))); |
805 | 3.20k | jv_free(key); |
806 | 3.20k | } |
807 | 986 | jv_free(all_vars); |
808 | | |
809 | | // Now we build each matcher in turn |
810 | 4.13k | for (inst *i = altmatchers.first; i; i = i->next) { |
811 | 3.14k | block submatcher = i->subfn; |
812 | | |
813 | | // If we're successful, jump to the end of the matchers |
814 | 3.14k | submatcher = BLOCK(submatcher, gen_op_target(JUMP, final_matcher)); |
815 | | |
816 | | // DESTRUCTURE_ALT to the end of this submatcher so we can skip to the next one on error |
817 | 3.14k | mb = BLOCK(mb, gen_op_target(DESTRUCTURE_ALT, submatcher), submatcher); |
818 | | |
819 | | // We're done with this inst and we don't want it anymore |
820 | | // But we can't let it free the submatcher block. |
821 | 3.14k | i->subfn.first = i->subfn.last = NULL; |
822 | 3.14k | } |
823 | | // We're done with these insts now. |
824 | 986 | block_free(altmatchers); |
825 | | |
826 | 986 | return bind_matcher(preamble, BLOCK(mb, final_matcher, body)); |
827 | 323k | } |
828 | | |
829 | 53.0k | block gen_reduce(block source, block matcher, block init, block body) { |
830 | 53.0k | block res_var = gen_op_var_fresh(STOREV, "reduce"); |
831 | 53.0k | block loop = BLOCK(gen_op_simple(DUPN), |
832 | 53.0k | source, |
833 | 53.0k | bind_alternation_matchers(matcher, |
834 | 53.0k | BLOCK(gen_op_bound(LOADVN, res_var), |
835 | 53.0k | body, |
836 | 53.0k | gen_op_bound(STOREV, res_var))), |
837 | 53.0k | gen_op_simple(BACKTRACK)); |
838 | 53.0k | return BLOCK(gen_op_simple(DUP), |
839 | 53.0k | init, |
840 | 53.0k | res_var, |
841 | 53.0k | gen_op_target(FORK, loop), |
842 | 53.0k | loop, |
843 | 53.0k | gen_op_bound(LOADVN, res_var)); |
844 | 53.0k | } |
845 | | |
846 | 18.0k | block gen_foreach(block source, block matcher, block init, block update, block extract) { |
847 | 18.0k | block state_var = gen_op_var_fresh(STOREV, "foreach"); |
848 | 18.0k | return BLOCK(gen_op_simple(DUP), |
849 | 18.0k | init, |
850 | 18.0k | state_var, |
851 | 18.0k | gen_op_simple(DUP), |
852 | | // get a value from the source expression: |
853 | 18.0k | source, |
854 | | // destructure the value into variable(s) for all the code |
855 | | // in the body to see |
856 | 18.0k | bind_alternation_matchers(matcher, |
857 | | // load the loop state variable |
858 | 18.0k | BLOCK(gen_op_bound(LOADVN, state_var), |
859 | | // generate updated state |
860 | 18.0k | update, |
861 | | // save the updated state for value extraction |
862 | 18.0k | gen_op_simple(DUP), |
863 | | // save new state |
864 | 18.0k | gen_op_bound(STOREV, state_var), |
865 | | // extract an output... |
866 | 18.0k | extract))); |
867 | 18.0k | } |
868 | | |
869 | 49.2k | block gen_definedor(block a, block b) { |
870 | | // var found := false |
871 | 49.2k | block found_var = gen_op_var_fresh(STOREV, "found"); |
872 | 49.2k | block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var); |
873 | | |
874 | | // if found, backtrack. Otherwise execute b |
875 | 49.2k | block backtrack = gen_op_simple(BACKTRACK); |
876 | 49.2k | block tail = BLOCK(gen_op_simple(DUP), |
877 | 49.2k | gen_op_bound(LOADV, found_var), |
878 | 49.2k | gen_op_target(JUMP_F, backtrack), |
879 | 49.2k | backtrack, |
880 | 49.2k | gen_op_simple(POP), |
881 | 49.2k | b); |
882 | | |
883 | | // try again |
884 | 49.2k | block if_notfound = gen_op_simple(BACKTRACK); |
885 | | |
886 | | // found := true, produce result |
887 | 49.2k | block if_found = BLOCK(gen_op_simple(DUP), |
888 | 49.2k | gen_const(jv_true()), |
889 | 49.2k | gen_op_bound(STOREV, found_var), |
890 | 49.2k | gen_op_target(JUMP, tail)); |
891 | | |
892 | 49.2k | return BLOCK(init, |
893 | 49.2k | gen_op_target(FORK, if_notfound), |
894 | 49.2k | a, |
895 | 49.2k | gen_op_target(JUMP_F, if_found), |
896 | 49.2k | if_found, |
897 | 49.2k | if_notfound, |
898 | 49.2k | tail); |
899 | 49.2k | } |
900 | | |
901 | 8.39k | int block_has_main(block top) { |
902 | 452k | for (inst *c = top.first; c; c = c->next) { |
903 | 447k | if (c->op == TOP) |
904 | 4.20k | return 1; |
905 | 447k | } |
906 | 4.19k | return 0; |
907 | 8.39k | } |
908 | | |
909 | 0 | int block_is_funcdef(block b) { |
910 | 0 | if (b.first != NULL && b.first->op == CLOSURE_CREATE) |
911 | 0 | return 1; |
912 | 0 | return 0; |
913 | 0 | } |
914 | | |
915 | 354k | block gen_condbranch(block iftrue, block iffalse) { |
916 | 354k | iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse)); |
917 | 354k | return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse); |
918 | 354k | } |
919 | | |
920 | 61.3k | block gen_and(block a, block b) { |
921 | | // a and b = if a then (if b then true else false) else false |
922 | 61.3k | return BLOCK(gen_op_simple(DUP), a, |
923 | 61.3k | gen_condbranch(BLOCK(gen_op_simple(POP), |
924 | 61.3k | b, |
925 | 61.3k | gen_condbranch(gen_const(jv_true()), |
926 | 61.3k | gen_const(jv_false()))), |
927 | 61.3k | BLOCK(gen_op_simple(POP), gen_const(jv_false())))); |
928 | 61.3k | } |
929 | | |
930 | 13.8k | block gen_or(block a, block b) { |
931 | | // a or b = if a then true else (if b then true else false) |
932 | 13.8k | return BLOCK(gen_op_simple(DUP), a, |
933 | 13.8k | gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())), |
934 | 13.8k | BLOCK(gen_op_simple(POP), |
935 | 13.8k | b, |
936 | 13.8k | gen_condbranch(gen_const(jv_true()), |
937 | 13.8k | gen_const(jv_false()))))); |
938 | 13.8k | } |
939 | | |
940 | 5.34k | block gen_destructure_alt(block matcher) { |
941 | 25.3k | for (inst *i = matcher.first; i; i = i->next) { |
942 | 20.0k | if (i->op == STOREV) { |
943 | 7.67k | i->op = STOREVN; |
944 | 7.67k | } |
945 | 20.0k | } |
946 | 5.34k | inst* i = inst_new(DESTRUCTURE_ALT); |
947 | 5.34k | i->subfn = matcher; |
948 | 5.34k | return inst_block(i); |
949 | 5.34k | } |
950 | | |
951 | 155k | block gen_var_binding(block var, const char* name, block body) { |
952 | 155k | return gen_destructure(var, gen_op_unbound(STOREV, name), body); |
953 | 155k | } |
954 | | |
955 | 2.47k | block gen_array_matcher(block left, block curr) { |
956 | 2.47k | int index; |
957 | 2.47k | if (block_is_noop(left)) |
958 | 2.26k | index = 0; |
959 | 209 | else { |
960 | | // `left` was returned by this function, so the third inst is the |
961 | | // constant containing the previously used index |
962 | 209 | assert(left.first->op == DUP); |
963 | 209 | assert(left.first->next != NULL); |
964 | 209 | inst *i = NULL; |
965 | 209 | if (left.first->next->op == PUSHK_UNDER) { |
966 | 209 | i = left.first->next; |
967 | 209 | } else { |
968 | 0 | assert(left.first->next->op == SUBEXP_BEGIN); |
969 | 0 | assert(left.first->next->next->op == LOADK); |
970 | 0 | i = left.first->next->next; |
971 | 0 | } |
972 | 209 | index = 1 + (int) jv_number_value(i->imm.constant); |
973 | 209 | } |
974 | | |
975 | | // `left` goes at the end so that the const index is in a predictable place |
976 | 2.47k | return BLOCK(gen_op_simple(DUP), gen_subexp(gen_const(jv_number(index))), |
977 | 2.47k | gen_op_simple(INDEX), curr, left); |
978 | 2.47k | } |
979 | | |
980 | 14.2k | block gen_object_matcher(block name, block curr) { |
981 | 14.2k | return BLOCK(gen_op_simple(DUP), gen_subexp(name), gen_op_simple(INDEX), |
982 | 14.2k | curr); |
983 | 14.2k | } |
984 | | |
985 | 252k | block gen_destructure(block var, block matchers, block body) { |
986 | | // var bindings can be added after coding the program; leave the TOP first. |
987 | 252k | block top = gen_noop(); |
988 | 252k | if (body.first && body.first->op == TOP) |
989 | 0 | top = inst_block(block_take(&body)); |
990 | | |
991 | 252k | if (matchers.first && matchers.first->op == DESTRUCTURE_ALT) { |
992 | 986 | block_append(&var, gen_op_simple(DUP)); |
993 | 251k | } else { |
994 | 251k | top = BLOCK(top, gen_op_simple(DUP)); |
995 | 251k | } |
996 | | |
997 | 252k | return BLOCK(top, gen_subexp(var), gen_op_simple(POP), bind_alternation_matchers(matchers, body)); |
998 | 252k | } |
999 | | |
1000 | | // Like gen_var_binding(), but bind `break`'s wildcard unbound variable |
1001 | 13.7k | static block gen_wildvar_binding(block var, const char* name, block body) { |
1002 | 13.7k | return BLOCK(gen_op_simple(DUP), var, |
1003 | 13.7k | block_bind(gen_op_unbound(STOREV, name), |
1004 | 13.7k | body, OP_HAS_VARIABLE | OP_BIND_WILDCARD)); |
1005 | 13.7k | } |
1006 | | |
1007 | 199k | block gen_cond(block cond, block iftrue, block iffalse) { |
1008 | 199k | return BLOCK(gen_op_simple(DUP), BLOCK(gen_subexp(cond), gen_op_simple(POP)), |
1009 | 199k | gen_condbranch(BLOCK(gen_op_simple(POP), iftrue), |
1010 | 199k | BLOCK(gen_op_simple(POP), iffalse))); |
1011 | 199k | } |
1012 | | |
1013 | 50.1k | block gen_try(block exp, block handler) { |
1014 | | /* |
1015 | | * Produce: |
1016 | | * |
1017 | | * TRY_BEGIN handler |
1018 | | * <exp> |
1019 | | * TRY_END |
1020 | | * JUMP past_handler |
1021 | | * handler: <handler> |
1022 | | * past_handler: |
1023 | | * |
1024 | | * If <exp> backtracks then TRY_BEGIN will backtrack. |
1025 | | * |
1026 | | * If <exp> produces a value then we'll execute whatever bytecode follows |
1027 | | * this sequence. If that code raises an exception, then TRY_END will wrap |
1028 | | * and re-raise that exception, and TRY_BEGIN will unwrap and re-raise the |
1029 | | * exception (see jq_next()). |
1030 | | * |
1031 | | * If <exp> raises then the TRY_BEGIN will see a non-wrapped exception and |
1032 | | * will jump to the handler (note the TRY_END will not execute in this case), |
1033 | | * and if the handler produces any values, then we'll execute whatever |
1034 | | * bytecode follows this sequence. Note that TRY_END will not execute in |
1035 | | * this case, so if the handler raises an exception, or code past the handler |
1036 | | * raises an exception, then that exception won't be wrapped and re-raised, |
1037 | | * and the TRY_BEGIN will not catch it because it does not stack_save() when |
1038 | | * it branches to the handler. |
1039 | | */ |
1040 | | |
1041 | 50.1k | if (block_is_noop(handler)) |
1042 | 379 | handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP)); |
1043 | | |
1044 | 50.1k | block jump = gen_op_target(JUMP, handler); |
1045 | 50.1k | return BLOCK(gen_op_target(TRY_BEGIN, jump), exp, gen_op_simple(TRY_END), |
1046 | 50.1k | jump, handler); |
1047 | 50.1k | } |
1048 | | |
1049 | 13.7k | block gen_label(const char *label, block exp) { |
1050 | 13.7k | block cond = gen_call("_equal", |
1051 | 13.7k | BLOCK(gen_lambda(gen_noop()), |
1052 | 13.7k | gen_lambda(gen_op_unbound(LOADV, label)))); |
1053 | 13.7k | return gen_wildvar_binding(gen_op_simple(GENLABEL), label, |
1054 | 13.7k | BLOCK(gen_op_simple(POP), |
1055 | | // try exp catch if . == $label |
1056 | | // then empty |
1057 | | // else error end |
1058 | | // |
1059 | | // Can't use gen_binop(), as that's firmly |
1060 | | // stuck in parser.y as it refers to things |
1061 | | // like EQ. |
1062 | 13.7k | gen_try(exp, |
1063 | 13.7k | gen_cond(cond, |
1064 | 13.7k | gen_op_simple(BACKTRACK), |
1065 | 13.7k | gen_call("error", gen_noop()))))); |
1066 | 13.7k | } |
1067 | | |
1068 | 4.18k | block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) { |
1069 | 573k | for (int cfunc=0; cfunc<ncfunctions; cfunc++) { |
1070 | 569k | inst* i = inst_new(CLOSURE_CREATE_C); |
1071 | 569k | i->imm.cfunc = &cfunctions[cfunc]; |
1072 | 569k | i->symbol = strdup(cfunctions[cfunc].name); |
1073 | 569k | i->nformals = cfunctions[cfunc].nargs - 1; |
1074 | 569k | i->any_unbound = 0; |
1075 | 569k | code = BLOCK(inst_block(i), code); |
1076 | 569k | } |
1077 | 4.18k | return code; |
1078 | 4.18k | } |
1079 | | |
1080 | 252k | static uint16_t nesting_level(struct bytecode* bc, inst* target) { |
1081 | 252k | uint16_t level = 0; |
1082 | 252k | assert(bc && target && target->compiled); |
1083 | 317k | while (bc && target->compiled != bc) { |
1084 | 65.3k | level++; |
1085 | 65.3k | bc = bc->parent; |
1086 | 65.3k | } |
1087 | 252k | assert(bc && bc == target->compiled); |
1088 | 252k | return level; |
1089 | 252k | } |
1090 | | |
1091 | 545k | static int count_cfunctions(block b) { |
1092 | 545k | int n = 0; |
1093 | 1.08M | for (inst* i = b.first; i; i = i->next) { |
1094 | 541k | if (i->op == CLOSURE_CREATE_C) n++; |
1095 | 541k | n += count_cfunctions(i->subfn); |
1096 | 541k | } |
1097 | 545k | return n; |
1098 | 545k | } |
1099 | | |
1100 | | #ifndef WIN32 |
1101 | | extern char **environ; |
1102 | | #endif |
1103 | | |
1104 | | static jv |
1105 | | make_env(jv env) |
1106 | 50 | { |
1107 | 50 | if (jv_is_valid(env)) |
1108 | 48 | return jv_copy(env); |
1109 | 2 | jv r = jv_object(); |
1110 | 2 | if (environ == NULL) |
1111 | 0 | return r; |
1112 | 72 | for (size_t i = 0; environ[i] != NULL; i++) { |
1113 | 70 | const char *eq; |
1114 | | |
1115 | 70 | if ((eq = strchr(environ[i], '=')) == NULL) |
1116 | 0 | r = jv_object_delete(r, jv_string(environ[i])); |
1117 | 70 | else |
1118 | 70 | r = jv_object_set(r, jv_string_sized(environ[i], eq - environ[i]), jv_string(eq + 1)); |
1119 | 70 | } |
1120 | 2 | return jv_copy(r); |
1121 | 2 | } |
1122 | | |
1123 | | // Expands call instructions into a calling sequence |
1124 | 208k | static int expand_call_arglist(block* b, jv args, jv *env) { |
1125 | 208k | int errors = 0; |
1126 | 208k | block ret = gen_noop(); |
1127 | 1.18M | for (inst* curr; (curr = block_take(b));) { |
1128 | 975k | if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) { |
1129 | 414k | if (!curr->bound_by && curr->op == LOADV && strcmp(curr->symbol, "ENV") == 0) { |
1130 | 50 | curr->op = LOADK; |
1131 | 50 | *env = curr->imm.constant = make_env(*env); |
1132 | 414k | } else if (!curr->bound_by && curr->op == LOADV && jv_object_has(jv_copy(args), jv_string(curr->symbol))) { |
1133 | 0 | curr->op = LOADK; |
1134 | 0 | curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol)); |
1135 | 414k | } else if (!curr->bound_by) { |
1136 | 4.61k | if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0') |
1137 | 0 | locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure"); |
1138 | 4.61k | else if (curr->op == LOADV) |
1139 | 490 | locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol); |
1140 | 4.12k | else |
1141 | 4.12k | locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, curr->nactuals); |
1142 | 4.61k | errors++; |
1143 | | // don't process this instruction if it's not well-defined |
1144 | 4.61k | ret = BLOCK(ret, inst_block(curr)); |
1145 | 4.61k | continue; |
1146 | 4.61k | } |
1147 | 414k | } |
1148 | | |
1149 | 970k | block prelude = gen_noop(); |
1150 | 970k | if (curr->op == CALL_JQ) { |
1151 | 227k | int actual_args = 0, desired_args = 0; |
1152 | | // We expand the argument list as a series of instructions |
1153 | 227k | switch (curr->bound_by->op) { |
1154 | 0 | default: assert(0 && "Unknown function type"); break; |
1155 | 40.2k | case CLOSURE_CREATE: |
1156 | 72.0k | case CLOSURE_PARAM: { |
1157 | 72.0k | block callargs = gen_noop(); |
1158 | 121k | for (inst* i; (i = block_take(&curr->arglist));) { |
1159 | 49.8k | assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO); |
1160 | 49.8k | block b = inst_block(i); |
1161 | 49.8k | switch (i->op) { |
1162 | 0 | default: assert(0 && "Unknown type of parameter"); break; |
1163 | 0 | case CLOSURE_REF: |
1164 | 0 | block_append(&callargs, b); |
1165 | 0 | break; |
1166 | 49.8k | case CLOSURE_CREATE: |
1167 | 49.8k | block_append(&prelude, b); |
1168 | 49.8k | block_append(&callargs, gen_op_bound(CLOSURE_REF, b)); |
1169 | 49.8k | break; |
1170 | 49.8k | } |
1171 | 49.8k | actual_args++; |
1172 | 49.8k | } |
1173 | 72.0k | curr->imm.intval = actual_args; |
1174 | 72.0k | curr->arglist = callargs; |
1175 | | |
1176 | 72.0k | if (curr->bound_by->op == CLOSURE_CREATE) { |
1177 | 90.1k | for (inst* i = curr->bound_by->arglist.first; i; i = i->next) { |
1178 | 49.8k | assert(i->op == CLOSURE_PARAM); |
1179 | 49.8k | desired_args++; |
1180 | 49.8k | } |
1181 | 40.2k | } |
1182 | 72.0k | break; |
1183 | 72.0k | } |
1184 | | |
1185 | 155k | case CLOSURE_CREATE_C: { |
1186 | 289k | for (inst* i; (i = block_take(&curr->arglist)); ) { |
1187 | 134k | assert(i->op == CLOSURE_CREATE); // FIXME |
1188 | 134k | block body = i->subfn; |
1189 | 134k | i->subfn = gen_noop(); |
1190 | 134k | inst_free(i); |
1191 | | // arguments should be pushed in reverse order, prepend them to prelude |
1192 | 134k | errors += expand_call_arglist(&body, args, env); |
1193 | 134k | prelude = BLOCK(gen_subexp(body), prelude); |
1194 | 134k | actual_args++; |
1195 | 134k | } |
1196 | 155k | assert(curr->op == CALL_JQ); |
1197 | 155k | curr->op = CALL_BUILTIN; |
1198 | 155k | curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */; |
1199 | 155k | assert(curr->bound_by->op == CLOSURE_CREATE_C); |
1200 | 155k | desired_args = curr->bound_by->imm.cfunc->nargs - 1; |
1201 | 155k | assert(!curr->arglist.first); |
1202 | 155k | break; |
1203 | 155k | } |
1204 | 227k | } |
1205 | | |
1206 | 227k | assert(actual_args == desired_args); // because now handle this above |
1207 | 227k | } |
1208 | 970k | ret = BLOCK(ret, prelude, inst_block(curr)); |
1209 | 970k | } |
1210 | 208k | *b = ret; |
1211 | 208k | return errors; |
1212 | 208k | } |
1213 | | |
1214 | 74.0k | static int compile(struct bytecode* bc, block b, struct locfile* lf, jv args, jv *env) { |
1215 | 74.0k | int errors = 0; |
1216 | 74.0k | int pos = 0; |
1217 | 74.0k | int var_frame_idx = 0; |
1218 | 74.0k | bc->nsubfunctions = 0; |
1219 | 74.0k | errors += expand_call_arglist(&b, args, env); |
1220 | 74.0k | b = BLOCK(b, gen_op_simple(RET)); |
1221 | 74.0k | jv localnames = jv_array(); |
1222 | 1.38M | for (inst* curr = b.first; curr; curr = curr->next) { |
1223 | 1.30M | if (!curr->next) assert(curr == b.last); |
1224 | 1.30M | int length = opcode_describe(curr->op)->length; |
1225 | 1.30M | if (curr->op == CALL_JQ) { |
1226 | 126k | for (inst* arg = curr->arglist.first; arg; arg = arg->next) { |
1227 | 49.8k | length += 2; |
1228 | 49.8k | } |
1229 | 76.1k | } |
1230 | 1.30M | pos += length; |
1231 | 1.30M | curr->bytecode_pos = pos; |
1232 | 1.30M | curr->compiled = bc; |
1233 | | |
1234 | 1.30M | assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM); |
1235 | | |
1236 | 1.30M | if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) && |
1237 | 1.30M | curr->bound_by == curr) { |
1238 | 53.3k | curr->imm.intval = var_frame_idx++; |
1239 | 53.3k | localnames = jv_array_append(localnames, jv_string(curr->symbol)); |
1240 | 53.3k | } |
1241 | | |
1242 | 1.30M | if (curr->op == CLOSURE_CREATE) { |
1243 | 70.5k | assert(curr->bound_by == curr); |
1244 | 70.5k | curr->imm.intval = bc->nsubfunctions++; |
1245 | 70.5k | } |
1246 | 1.30M | if (curr->op == CLOSURE_CREATE_C) { |
1247 | 27.8k | assert(curr->bound_by == curr); |
1248 | 27.8k | int idx = bc->globals->ncfunctions++; |
1249 | 27.8k | bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names, |
1250 | 27.8k | jv_string(curr->symbol)); |
1251 | 27.8k | bc->globals->cfunctions[idx] = *curr->imm.cfunc; |
1252 | 27.8k | curr->imm.intval = idx; |
1253 | 27.8k | } |
1254 | 1.30M | } |
1255 | 74.0k | if (pos > 0xFFFF) { |
1256 | | // too long for program counter to fit in uint16_t |
1257 | 1 | locfile_locate(lf, UNKNOWN_LOCATION, |
1258 | 1 | "function compiled to %d bytes which is too long", pos); |
1259 | 1 | errors++; |
1260 | 1 | } |
1261 | 74.0k | bc->codelen = pos; |
1262 | 74.0k | bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames); |
1263 | 74.0k | if (bc->nsubfunctions && !errors) { |
1264 | 24.6k | bc->subfunctions = jv_mem_calloc(bc->nsubfunctions, sizeof(struct bytecode*)); |
1265 | 775k | for (inst* curr = b.first; curr; curr = curr->next) { |
1266 | 750k | if (curr->op == CLOSURE_CREATE) { |
1267 | 69.8k | struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode)); |
1268 | 69.8k | bc->subfunctions[curr->imm.intval] = subfn; |
1269 | 69.8k | subfn->globals = bc->globals; |
1270 | 69.8k | subfn->parent = bc; |
1271 | 69.8k | subfn->nclosures = 0; |
1272 | 69.8k | subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol)); |
1273 | 69.8k | jv params = jv_array(); |
1274 | 101k | for (inst* param = curr->arglist.first; param; param = param->next) { |
1275 | 31.8k | assert(param->op == CLOSURE_PARAM); |
1276 | 31.8k | assert(param->bound_by == param); |
1277 | 31.8k | param->imm.intval = subfn->nclosures++; |
1278 | 31.8k | param->compiled = subfn; |
1279 | 31.8k | params = jv_array_append(params, jv_string(param->symbol)); |
1280 | 31.8k | } |
1281 | 69.8k | subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params); |
1282 | 69.8k | errors += compile(subfn, curr->subfn, lf, args, env); |
1283 | 69.8k | curr->subfn = gen_noop(); |
1284 | 69.8k | } |
1285 | 750k | } |
1286 | 49.3k | } else { |
1287 | 49.3k | bc->nsubfunctions = 0; |
1288 | 49.3k | bc->subfunctions = 0; |
1289 | 49.3k | } |
1290 | 74.0k | uint16_t* code = jv_mem_calloc(bc->codelen, sizeof(uint16_t)); |
1291 | 74.0k | bc->code = code; |
1292 | 74.0k | pos = 0; |
1293 | 74.0k | jv constant_pool = jv_array(); |
1294 | 74.0k | int maxvar = -1; |
1295 | 1.28M | if (!errors) for (inst* curr = b.first; curr; curr = curr->next) { |
1296 | 1.21M | const struct opcode_description* op = opcode_describe(curr->op); |
1297 | 1.21M | if (op->length == 0) |
1298 | 96.7k | continue; |
1299 | 1.11M | code[pos++] = curr->op; |
1300 | 1.11M | assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM); |
1301 | 1.11M | if (curr->op == CALL_BUILTIN) { |
1302 | 103k | assert(curr->bound_by->op == CLOSURE_CREATE_C); |
1303 | 103k | assert(!curr->arglist.first); |
1304 | 103k | code[pos++] = (uint16_t)curr->imm.intval; |
1305 | 103k | code[pos++] = curr->bound_by->imm.intval; |
1306 | 1.01M | } else if (curr->op == CALL_JQ) { |
1307 | 71.2k | assert(curr->bound_by->op == CLOSURE_CREATE || |
1308 | 71.2k | curr->bound_by->op == CLOSURE_PARAM); |
1309 | 71.2k | code[pos++] = (uint16_t)curr->imm.intval; |
1310 | 71.2k | code[pos++] = nesting_level(bc, curr->bound_by); |
1311 | 71.2k | code[pos++] = curr->bound_by->imm.intval | |
1312 | 71.2k | (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0); |
1313 | 119k | for (inst* arg = curr->arglist.first; arg; arg = arg->next) { |
1314 | 48.4k | assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE); |
1315 | 48.4k | code[pos++] = nesting_level(bc, arg->bound_by); |
1316 | 48.4k | code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE; |
1317 | 48.4k | } |
1318 | 941k | } else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) { |
1319 | | // STORE_GLOBAL: constant global, basically |
1320 | 0 | code[pos++] = jv_array_length(jv_copy(constant_pool)); |
1321 | 0 | constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant)); |
1322 | 0 | code[pos++] = nesting_level(bc, curr->bound_by); |
1323 | 0 | uint16_t var = (uint16_t)curr->bound_by->imm.intval; |
1324 | 0 | code[pos++] = var; |
1325 | 0 | if (var > maxvar) maxvar = var; |
1326 | 941k | } else if (op->flags & OP_HAS_CONSTANT) { |
1327 | 121k | code[pos++] = jv_array_length(jv_copy(constant_pool)); |
1328 | 121k | constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant)); |
1329 | 820k | } else if (op->flags & OP_HAS_VARIABLE) { |
1330 | 132k | code[pos++] = nesting_level(bc, curr->bound_by); |
1331 | 132k | uint16_t var = (uint16_t)curr->bound_by->imm.intval; |
1332 | 132k | code[pos++] = var; |
1333 | 132k | if (var > maxvar) maxvar = var; |
1334 | 687k | } else if (op->flags & OP_HAS_BRANCH) { |
1335 | 119k | assert(curr->imm.target->bytecode_pos != -1); |
1336 | 119k | assert(curr->imm.target->bytecode_pos > pos); // only forward branches |
1337 | 119k | code[pos] = curr->imm.target->bytecode_pos - (pos + 1); |
1338 | 119k | pos++; |
1339 | 568k | } else if (op->length > 1) { |
1340 | 0 | assert(0 && "codegen not implemented for this operation"); |
1341 | 0 | } |
1342 | 1.11M | } |
1343 | 74.0k | bc->constants = constant_pool; |
1344 | 74.0k | bc->nlocals = maxvar + 2; // FIXME: frames of size zero? |
1345 | 74.0k | block_free(b); |
1346 | 74.0k | return errors; |
1347 | 74.0k | } |
1348 | | |
1349 | 4.18k | int block_compile(block b, struct bytecode** out, struct locfile* lf, jv args) { |
1350 | 4.18k | struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode)); |
1351 | 4.18k | bc->parent = 0; |
1352 | 4.18k | bc->nclosures = 0; |
1353 | 4.18k | bc->globals = jv_mem_alloc(sizeof(struct symbol_table)); |
1354 | 4.18k | int ncfunc = count_cfunctions(b); |
1355 | 4.18k | bc->globals->ncfunctions = 0; |
1356 | 4.18k | bc->globals->cfunctions = jv_mem_calloc(MAX(ncfunc, 1), sizeof(struct cfunction)); |
1357 | 4.18k | bc->globals->cfunc_names = jv_array(); |
1358 | 4.18k | bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null()); |
1359 | 4.18k | jv env = jv_invalid(); |
1360 | 4.18k | int nerrors = compile(bc, b, lf, args, &env); |
1361 | 4.18k | jv_free(args); |
1362 | 4.18k | jv_free(env); |
1363 | 4.18k | assert(bc->globals->ncfunctions == ncfunc); |
1364 | 4.18k | if (nerrors > 0) { |
1365 | 49 | bytecode_free(bc); |
1366 | 49 | *out = 0; |
1367 | 4.13k | } else { |
1368 | 4.13k | *out = bc; |
1369 | 4.13k | } |
1370 | 4.18k | return nerrors; |
1371 | 4.18k | } |
1372 | | |
1373 | 35.9M | void block_free(block b) { |
1374 | 35.9M | struct inst* next; |
1375 | 52.5M | for (struct inst* curr = b.first; curr; curr = next) { |
1376 | 16.5M | next = curr->next; |
1377 | 16.5M | inst_free(curr); |
1378 | 16.5M | } |
1379 | 35.9M | } |