Coverage Report

Created: 2025-07-12 06:30

/src/jq/src/compile.c
Line
Count
Source (jump to first uncovered line)
1
#include <assert.h>
2
#include <string.h>
3
#include <stdlib.h>
4
#include <unistd.h>
5
#include "compile.h"
6
#include "bytecode.h"
7
#include "locfile.h"
8
#include "jv_alloc.h"
9
#include "util.h"
10
11
/*
12
  The intermediate representation for jq filters is as a sequence of
13
  struct inst, which form a doubly-linked list via the next and prev
14
  pointers.
15
16
  A "block" represents a sequence of "struct inst", which may be
17
  empty.
18
19
  Blocks are generated by the parser bottom-up, so may have free
20
  variables (refer to things not defined). See inst.bound_by and
21
  inst.symbol.
22
 */
23
struct inst {
24
  struct inst* next;
25
  struct inst* prev;
26
27
  opcode op;
28
29
  struct {
30
    uint16_t intval;
31
    struct inst* target;
32
    jv constant;
33
    const struct cfunction* cfunc;
34
  } imm;
35
36
  struct locfile* locfile;
37
  location source;
38
39
  // Binding
40
  // An instruction requiring binding (for parameters/variables/functions)
41
  // is in one of three states:
42
  //   inst->bound_by = NULL  - Unbound free variable
43
  //   inst->bound_by = inst  - This instruction binds a variable
44
  //   inst->bound_by = other - Uses variable bound by other instruction
45
  // Unbound instructions (references to other things that may or may not
46
  // exist) are created by "gen_foo_unbound", and bindings are created by
47
  // block_bind(definition, body), which binds all instructions in
48
  // body which are unbound and refer to "definition" by name.
49
  struct inst* bound_by;
50
  char* symbol;
51
  int any_unbound;
52
  int referenced;
53
54
  int nformals;
55
  int nactuals;
56
57
  block subfn;   // used by CLOSURE_CREATE (body of function)
58
  block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments)
59
60
  // This instruction is compiled as part of which function?
61
  // (only used during block_compile)
62
  struct bytecode* compiled;
63
64
  int bytecode_pos; // position just after this insn
65
};
66
67
16.7M
static inst* inst_new(opcode op) {
68
16.7M
  inst* i = jv_mem_alloc(sizeof(inst));
69
16.7M
  i->next = i->prev = 0;
70
16.7M
  i->op = op;
71
16.7M
  i->bytecode_pos = -1;
72
16.7M
  i->bound_by = 0;
73
16.7M
  i->symbol = 0;
74
16.7M
  i->any_unbound = 0;
75
16.7M
  i->referenced = 0;
76
16.7M
  i->nformals = -1;
77
16.7M
  i->nactuals = -1;
78
16.7M
  i->subfn = gen_noop();
79
16.7M
  i->arglist = gen_noop();
80
16.7M
  i->source = UNKNOWN_LOCATION;
81
16.7M
  i->locfile = 0;
82
16.7M
  return i;
83
16.7M
}
84
85
16.7M
static void inst_free(struct inst* i) {
86
16.7M
  jv_mem_free(i->symbol);
87
16.7M
  block_free(i->subfn);
88
16.7M
  block_free(i->arglist);
89
16.7M
  if (i->locfile)
90
2.40M
    locfile_free(i->locfile);
91
16.7M
  if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) {
92
3.00M
    jv_free(i->imm.constant);
93
3.00M
  }
94
16.7M
  jv_mem_free(i);
95
16.7M
}
96
97
19.7M
static block inst_block(inst* i) {
98
19.7M
  block b = {i,i};
99
19.7M
  return b;
100
19.7M
}
101
102
223M
int block_is_single(block b) {
103
223M
  return b.first && b.first == b.last;
104
223M
}
105
106
1.76M
static inst* block_take(block* b) {
107
1.76M
  if (b->first == 0) return 0;
108
1.32M
  inst* i = b->first;
109
1.32M
  if (i->next) {
110
1.02M
    i->next->prev = 0;
111
1.02M
    b->first = i->next;
112
1.02M
    i->next = 0;
113
1.02M
  } else {
114
301k
    b->first = 0;
115
301k
    b->last = 0;
116
301k
  }
117
1.32M
  return i;
118
1.76M
}
119
120
1.99M
block gen_location(location loc, struct locfile* l, block b) {
121
4.50M
  for (inst* i = b.first; i; i = i->next) {
122
2.51M
    if (i->source.start == UNKNOWN_LOCATION.start &&
123
2.51M
        i->source.end == UNKNOWN_LOCATION.end) {
124
2.40M
      i->source = loc;
125
2.40M
      i->locfile = locfile_retain(l);
126
2.40M
    }
127
2.51M
  }
128
1.99M
  return b;
129
1.99M
}
130
131
39.6M
block gen_noop(void) {
132
39.6M
  block b = {0,0};
133
39.6M
  return b;
134
39.6M
}
135
136
2.16M
int block_is_noop(block b) {
137
2.16M
  return (b.first == 0 && b.last == 0);
138
2.16M
}
139
140
4.65M
block gen_op_simple(opcode op) {
141
4.65M
  assert(opcode_describe(op)->length == 1);
142
4.65M
  return inst_block(inst_new(op));
143
4.65M
}
144
145
146
16.1k
block gen_error(jv constant) {
147
16.1k
  assert(opcode_describe(ERRORK)->flags & OP_HAS_CONSTANT);
148
16.1k
  inst *i = inst_new(ERRORK);
149
16.1k
  i->imm.constant = constant;
150
16.1k
  return inst_block(i);
151
16.1k
}
152
153
2.43M
block gen_const(jv constant) {
154
2.43M
  assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT);
155
2.43M
  inst* i = inst_new(LOADK);
156
2.43M
  i->imm.constant = constant;
157
2.43M
  return inst_block(i);
158
2.43M
}
159
160
0
block gen_const_global(jv constant, const char *name) {
161
0
  assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) ==
162
0
         (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING));
163
0
  inst* i = inst_new(STORE_GLOBAL);
164
0
  i->imm.constant = constant;
165
0
  i->symbol = strdup(name);
166
0
  i->any_unbound = 0;
167
0
  return inst_block(i);
168
0
}
169
170
548k
block gen_op_pushk_under(jv constant) {
171
548k
  assert(opcode_describe(PUSHK_UNDER)->flags & OP_HAS_CONSTANT);
172
548k
  inst* i = inst_new(PUSHK_UNDER);
173
548k
  i->imm.constant = constant;
174
548k
  return inst_block(i);
175
548k
}
176
177
2.22M
int block_is_const(block b) {
178
2.22M
  return (block_is_single(b) && (b.first->op == LOADK || b.first->op == PUSHK_UNDER));
179
2.22M
}
180
181
8.91k
jv_kind block_const_kind(block b) {
182
8.91k
  assert(block_is_const(b));
183
8.91k
  return jv_get_kind(b.first->imm.constant);
184
8.91k
}
185
186
1.21M
jv block_const(block b) {
187
1.21M
  assert(block_is_const(b));
188
1.21M
  return jv_copy(b.first->imm.constant);
189
1.21M
}
190
191
1.31M
block gen_op_target(opcode op, block target) {
192
1.31M
  assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
193
1.31M
  assert(target.last);
194
1.31M
  inst* i = inst_new(op);
195
1.31M
  i->imm.target = target.last;
196
1.31M
  return inst_block(i);
197
1.31M
}
198
199
100k
block gen_op_targetlater(opcode op) {
200
100k
  assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
201
100k
  inst* i = inst_new(op);
202
100k
  i->imm.target = 0;
203
100k
  return inst_block(i);
204
100k
}
205
100k
void inst_set_target(block b, block target) {
206
100k
  assert(block_is_single(b));
207
100k
  assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH);
208
100k
  assert(target.last);
209
100k
  b.first->imm.target = target.last;
210
100k
}
211
212
4.67M
block gen_op_unbound(opcode op, const char* name) {
213
4.67M
  assert(opcode_describe(op)->flags & OP_HAS_BINDING);
214
4.67M
  inst* i = inst_new(op);
215
4.67M
  i->symbol = strdup(name);
216
4.67M
  i->any_unbound = 1;
217
4.67M
  return inst_block(i);
218
4.67M
}
219
220
290k
block gen_op_var_fresh(opcode op, const char* name) {
221
290k
  assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
222
290k
  block b = gen_op_unbound(op, name);
223
290k
  b.first->bound_by = b.first;
224
290k
  return b;
225
290k
}
226
227
659k
block gen_op_bound(opcode op, block binder) {
228
659k
  assert(block_is_single(binder));
229
659k
  block b = gen_op_unbound(op, binder.first->symbol);
230
659k
  b.first->bound_by = binder.first;
231
659k
  b.first->any_unbound = 0;
232
659k
  return b;
233
659k
}
234
235
85.8k
block gen_dictpair(block k, block v) {
236
85.8k
  return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT));
237
85.8k
}
238
239
240
12.7M
static void inst_join(inst* a, inst* b) {
241
12.7M
  assert(a && b);
242
12.7M
  assert(!a->next);
243
12.7M
  assert(!b->prev);
244
12.7M
  a->next = b;
245
12.7M
  b->prev = a;
246
12.7M
}
247
248
14.6M
void block_append(block* b, block b2) {
249
14.6M
  if (b2.first) {
250
13.3M
    if (b->last) {
251
12.7M
      inst_join(b->last, b2.first);
252
12.7M
    } else {
253
633k
      b->first = b2.first;
254
633k
    }
255
13.3M
    b->last = b2.last;
256
13.3M
  }
257
14.6M
}
258
259
14.5M
block block_join(block a, block b) {
260
14.5M
  block c = a;
261
14.5M
  block_append(&c, b);
262
14.5M
  return c;
263
14.5M
}
264
265
4.18k
int block_has_only_binders_and_imports(block binders, int bindflags) {
266
4.18k
  bindflags |= OP_HAS_BINDING;
267
447k
  for (inst* curr = binders.first; curr; curr = curr->next) {
268
443k
    if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) {
269
0
      return 0;
270
0
    }
271
443k
  }
272
4.18k
  return 1;
273
4.18k
}
274
275
0
static int inst_is_binder(inst *i, int bindflags) {
276
0
  return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA);
277
0
}
278
279
52.7k
int block_has_only_binders(block binders, int bindflags) {
280
52.7k
  bindflags |= OP_HAS_BINDING;
281
52.7k
  bindflags &= ~OP_BIND_WILDCARD;
282
1.13M
  for (inst* curr = binders.first; curr; curr = curr->next) {
283
1.08M
    if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) {
284
0
      return 0;
285
0
    }
286
1.08M
  }
287
52.7k
  return 1;
288
52.7k
}
289
290
// Count a call site's actual params
291
2.23M
static int block_count_actuals(block b) {
292
2.23M
  int args = 0;
293
4.12M
  for (inst* i = b.first; i; i = i->next) {
294
1.88M
    switch (i->op) {
295
0
    default: assert(0 && "Unknown function type"); break;
296
1.88M
    case CLOSURE_CREATE:
297
1.88M
    case CLOSURE_PARAM:
298
1.88M
    case CLOSURE_CREATE_C:
299
1.88M
      args++;
300
1.88M
      break;
301
1.88M
    }
302
1.88M
  }
303
2.23M
  return args;
304
2.23M
}
305
306
217M
static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) {
307
217M
  assert(block_is_single(binder));
308
217M
  assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
309
217M
  assert(binder.first->symbol);
310
217M
  assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
311
217M
  assert(break_distance >= 0);
312
313
217M
  binder.first->bound_by = binder.first;
314
217M
  int nrefs = 0;
315
482M
  for (inst* i = body.first; i; i = i->next) {
316
264M
    if (i->any_unbound == 0)
317
157M
      continue;
318
319
106M
    int flags = opcode_describe(i->op)->flags;
320
106M
    if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
321
106M
        (!strcmp(i->symbol, binder.first->symbol) ||
322
         // Check for break/break2/break3; see parser.y
323
59.4M
         ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' &&
324
57.9M
          break_distance <= 3 && (i->symbol[1] == '1' + break_distance) &&
325
57.9M
          i->symbol[2] == '\0'))) {
326
      // bind this instruction
327
1.48M
      if (i->nactuals == -1 || i->nactuals == binder.first->nformals) {
328
1.38M
        i->bound_by = binder.first;
329
1.38M
        nrefs++;
330
1.38M
      }
331
105M
    } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 &&
332
105M
               !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) &&
333
105M
               !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) {
334
      // Increment the break distance required for this binder to match
335
      // a break whenever we come across a STOREV of *anonlabel...
336
0
      break_distance++;
337
0
    }
338
339
106M
    i->any_unbound = (i->symbol && !i->bound_by);
340
341
    // binding recurses into closures
342
106M
    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->subfn, bindflags, break_distance);
343
    // binding recurses into argument list
344
106M
    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->arglist, bindflags, break_distance);
345
346
106M
    if (i->any_unbound)
347
102M
      *any_unbound = 1;
348
106M
  }
349
217M
  return nrefs;
350
217M
}
351
352
4.26M
static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
353
4.26M
  int any_unbound;
354
4.26M
  return block_bind_subblock_inner(&any_unbound, binder, body, bindflags, break_distance);
355
4.26M
}
356
357
13.7k
static int block_bind_each(block binder, block body, int bindflags) {
358
13.7k
  assert(block_has_only_binders(binder, bindflags));
359
13.7k
  bindflags |= OP_HAS_BINDING;
360
13.7k
  int nrefs = 0;
361
27.5k
  for (inst* curr = binder.first; curr; curr = curr->next) {
362
13.7k
    nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
363
13.7k
  }
364
13.7k
  return nrefs;
365
13.7k
}
366
367
13.7k
static block block_bind(block binder, block body, int bindflags) {
368
13.7k
  block_bind_each(binder, body, bindflags);
369
13.7k
  return block_join(binder, body);
370
13.7k
}
371
372
0
block block_bind_library(block binder, block body, int bindflags, const char *libname) {
373
0
  bindflags |= OP_HAS_BINDING;
374
0
  int matchlen = (libname == NULL) ? 0 : strlen(libname);
375
0
  char *matchname = jv_mem_alloc(matchlen+2+1);
376
0
  matchname[0] = '\0';
377
0
  if (libname != NULL && libname[0] != '\0') {
378
0
    strcpy(matchname,libname);
379
0
    strcpy(matchname+matchlen, "::");
380
0
    matchlen += 2;
381
0
  }
382
0
  assert(block_has_only_binders(binder, bindflags));
383
0
  for (inst *curr = binder.last; curr; curr = curr->prev) {
384
0
    int bindflags2 = bindflags;
385
0
    char* cname = curr->symbol;
386
0
    char* tname = jv_mem_alloc(strlen(curr->symbol)+matchlen+1);
387
0
    strcpy(tname, matchname);
388
0
    strcpy(tname+matchlen, curr->symbol);
389
390
    // Ew
391
0
    if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT)))
392
0
      bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING;
393
394
    // This mutation is ugly, even if we undo it
395
0
    curr->symbol = tname;
396
0
    block_bind_subblock(inst_block(curr), body, bindflags2, 0);
397
0
    curr->symbol = cname;
398
0
    free(tname);
399
0
  }
400
0
  free(matchname);
401
0
  return body; // We don't return a join because we don't want those sticking around...
402
0
}
403
404
1.11M
static inst* block_take_last(block* b) {
405
1.11M
  inst* i = b->last;
406
1.11M
  if (i == 0)
407
39.0k
    return 0;
408
1.07M
  if (i->prev) {
409
1.03M
    i->prev->next = i->next;
410
1.03M
    b->last = i->prev;
411
1.03M
    i->prev = 0;
412
1.03M
  } else {
413
39.0k
    b->first = 0;
414
39.0k
    b->last = 0;
415
39.0k
  }
416
1.07M
  return i;
417
1.11M
}
418
419
// Binds a sequence of binders, which *must not* already be bound to each other,
420
// to body, throwing away unreferenced defs
421
39.0k
block block_bind_referenced(block binder, block body, int bindflags) {
422
39.0k
  assert(block_has_only_binders(binder, bindflags));
423
39.0k
  bindflags |= OP_HAS_BINDING;
424
425
39.0k
  inst* curr;
426
1.11M
  while ((curr = block_take_last(&binder))) {
427
1.07M
    block b = inst_block(curr);
428
1.07M
    if (block_bind_subblock(b, body, bindflags, 0) == 0) {
429
994k
      block_free(b);
430
994k
    } else {
431
78.0k
      body = BLOCK(b, body);
432
78.0k
    }
433
1.07M
  }
434
39.0k
  return body;
435
39.0k
}
436
437
0
block block_bind_self(block binder, int bindflags) {
438
0
  assert(block_has_only_binders(binder, bindflags));
439
0
  bindflags |= OP_HAS_BINDING;
440
0
  block body = gen_noop();
441
442
0
  inst* curr;
443
0
  while ((curr = block_take_last(&binder))) {
444
0
    block b = inst_block(curr);
445
0
    block_bind_subblock(b, body, bindflags, 0);
446
0
    body = BLOCK(b, body);
447
0
  }
448
0
  return body;
449
0
}
450
451
1.32M
static void block_mark_referenced(block body) {
452
1.32M
  int saw_top = 0;
453
1.98M
  for (inst* i = body.last; i; i = i->prev) {
454
660k
    if (saw_top && i->bound_by == i && !i->referenced)
455
0
      continue;
456
660k
    if (i->op == TOP) {
457
4.18k
      saw_top = 1;
458
4.18k
    }
459
660k
    if (i->bound_by) {
460
173k
      i->bound_by->referenced = 1;
461
173k
    }
462
463
660k
    block_mark_referenced(i->arglist);
464
660k
    block_mark_referenced(i->subfn);
465
660k
  }
466
1.32M
}
467
468
4.18k
block block_drop_unreferenced(block body) {
469
4.18k
  block_mark_referenced(body);
470
471
4.18k
  block refd = gen_noop();
472
4.18k
  inst* curr;
473
164k
  while ((curr = block_take(&body))) {
474
160k
    if (curr->bound_by == curr && !curr->referenced) {
475
0
      inst_free(curr);
476
160k
    } else {
477
160k
      refd = BLOCK(refd, inst_block(curr));
478
160k
    }
479
160k
  }
480
4.18k
  return refd;
481
4.18k
}
482
483
4.20k
jv block_take_imports(block* body) {
484
4.20k
  jv imports = jv_array();
485
486
  /* Parser should never generate TOP before imports */
487
4.20k
  assert(!(body->first && body->first->op == TOP && body->first->next &&
488
4.20k
        (body->first->next->op == MODULEMETA || body->first->next->op == DEPS)));
489
490
8.60k
  while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) {
491
4.40k
    inst* dep = block_take(body);
492
4.40k
    if (dep->op == DEPS) {
493
4.40k
      imports = jv_array_append(imports, jv_copy(dep->imm.constant));
494
4.40k
    }
495
4.40k
    inst_free(dep);
496
4.40k
  }
497
4.20k
  return imports;
498
4.20k
}
499
500
4.18k
jv block_list_funcs(block body, int omit_underscores) {
501
4.18k
  jv funcs = jv_object(); // Use the keys for set semantics.
502
1.03M
  for (inst *pos = body.first; pos != NULL; pos = pos->next) {
503
1.03M
    if (pos->op == CLOSURE_CREATE || pos->op == CLOSURE_CREATE_C) {
504
1.03M
      if (pos->symbol != NULL && (!omit_underscores || pos->symbol[0] != '_')) {
505
941k
        funcs = jv_object_set(funcs, jv_string_fmt("%s/%i", pos->symbol, pos->nformals), jv_null());
506
941k
      }
507
1.03M
    }
508
1.03M
  }
509
4.18k
  return jv_keys_unsorted(funcs);
510
4.18k
}
511
512
28
block gen_module(block metadata) {
513
28
  assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
514
28
  inst* i = inst_new(MODULEMETA);
515
28
  i->imm.constant = block_const(metadata);
516
28
  if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT)
517
0
    i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant);
518
28
  block_free(metadata);
519
28
  return inst_block(i);
520
28
}
521
522
0
jv block_module_meta(block b) {
523
0
  if (b.first != NULL && b.first->op == MODULEMETA)
524
0
    return jv_copy(b.first->imm.constant);
525
0
  return jv_null();
526
0
}
527
528
8.75k
block gen_import(const char* name, const char* as, int is_data) {
529
8.75k
  inst* i = inst_new(DEPS);
530
8.75k
  jv meta = jv_object();
531
8.75k
  if (as != NULL)
532
4.55k
    meta = jv_object_set(meta, jv_string("as"), jv_string(as));
533
8.75k
  meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false());
534
8.75k
  meta = jv_object_set(meta, jv_string("relpath"), jv_string(name));
535
8.75k
  i->imm.constant = meta;
536
8.75k
  return inst_block(i);
537
8.75k
}
538
539
4.54k
block gen_import_meta(block import, block metadata) {
540
4.54k
  assert(block_is_single(import) && import.first->op == DEPS);
541
4.54k
  assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
542
4.54k
  inst *i = import.first;
543
4.54k
  i->imm.constant = jv_object_merge(block_const(metadata), i->imm.constant);
544
4.54k
  block_free(metadata);
545
4.54k
  return import;
546
4.54k
}
547
548
2.39M
block gen_function(const char* name, block formals, block body) {
549
2.39M
  inst* i = inst_new(CLOSURE_CREATE);
550
2.39M
  int nformals = 0;
551
2.84M
  for (inst* i = formals.last; i; i = i->prev) {
552
450k
    nformals++;
553
450k
    i->nformals = 0;
554
450k
    if (i->op == CLOSURE_PARAM_REGULAR) {
555
155k
      i->op = CLOSURE_PARAM;
556
155k
      body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body);
557
155k
    }
558
450k
    block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
559
450k
  }
560
2.39M
  i->subfn = body;
561
2.39M
  i->symbol = strdup(name);
562
2.39M
  i->any_unbound = -1;
563
2.39M
  i->nformals = nformals;
564
2.39M
  i->arglist = formals;
565
2.39M
  block b = inst_block(i);
566
2.39M
  block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
567
2.39M
  return b;
568
2.39M
}
569
570
155k
block gen_param_regular(const char* name) {
571
155k
  return gen_op_unbound(CLOSURE_PARAM_REGULAR, name);
572
155k
}
573
574
295k
block gen_param(const char* name) {
575
295k
  return gen_op_unbound(CLOSURE_PARAM, name);
576
295k
}
577
578
1.89M
block gen_lambda(block body) {
579
1.89M
  return gen_function("@lambda", gen_noop(), body);
580
1.89M
}
581
582
2.23M
block gen_call(const char* name, block args) {
583
2.23M
  block b = gen_op_unbound(CALL_JQ, name);
584
2.23M
  b.first->arglist = args;
585
2.23M
  b.first->nactuals = block_count_actuals(b.first->arglist);
586
2.23M
  return b;
587
2.23M
}
588
589
1.27M
block gen_subexp(block a) {
590
1.27M
  if (block_is_noop(a)) {
591
45.9k
    return gen_op_simple(DUP);
592
45.9k
  }
593
1.22M
  if (block_is_single(a) && a.first->op == LOADK) {
594
548k
    jv c = block_const(a);
595
548k
    block_free(a);
596
548k
    return gen_op_pushk_under(c);
597
548k
  }
598
677k
  return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END));
599
1.22M
}
600
601
100k
block gen_both(block a, block b) {
602
100k
  block jump = gen_op_targetlater(JUMP);
603
100k
  block fork = gen_op_target(FORK, jump);
604
100k
  block c = BLOCK(fork, a, jump, b);
605
100k
  inst_set_target(jump, c);
606
100k
  return c;
607
100k
}
608
609
70.4k
block gen_const_object(block expr) {
610
70.4k
  int is_const = 1;
611
70.4k
  jv o = jv_object();
612
70.4k
  jv k = jv_null();
613
70.4k
  jv v = jv_null();
614
102k
  for (inst *i = expr.first; i; i = i->next) {
615
60.9k
    if (i->op == PUSHK_UNDER) {
616
45.1k
      k = jv_copy(i->imm.constant);
617
45.1k
      i = i->next;
618
45.1k
    } else if (i->op != SUBEXP_BEGIN ||
619
15.8k
        i->next == NULL ||
620
15.8k
        i->next->op != LOADK ||
621
15.8k
        i->next->next == NULL ||
622
15.8k
        i->next->next->op != SUBEXP_END) {
623
15.8k
      is_const = 0;
624
15.8k
      break;
625
15.8k
    } else {
626
0
      k = jv_copy(i->next->imm.constant);
627
0
      i = i->next->next->next;
628
0
    }
629
45.1k
    if (i != NULL && i->op == PUSHK_UNDER) {
630
33.5k
      v = jv_copy(i->imm.constant);
631
33.5k
      i = i->next;
632
33.5k
    } else if (i == NULL ||
633
11.5k
        i->op != SUBEXP_BEGIN ||
634
11.5k
        i->next == NULL ||
635
11.5k
        i->next->op != LOADK ||
636
11.5k
        i->next->next == NULL ||
637
11.5k
        i->next->next->op != SUBEXP_END) {
638
11.5k
      is_const = 0;
639
11.5k
      break;
640
11.5k
    } else {
641
0
      v = jv_copy(i->next->imm.constant);
642
0
      i = i->next->next->next;
643
0
    }
644
33.5k
    if (i == NULL || i->op != INSERT) {
645
895
      is_const = 0;
646
895
      break;
647
895
    }
648
32.6k
    if (jv_get_kind(k) != JV_KIND_STRING) {
649
251
      is_const = 0;
650
251
      break;
651
251
    }
652
32.4k
    o = jv_object_set(o, k, v);
653
32.4k
    k = jv_null();
654
32.4k
    v = jv_null();
655
32.4k
  }
656
70.4k
  if (!is_const) {
657
28.5k
    jv_free(o);
658
28.5k
    jv_free(k);
659
28.5k
    jv_free(v);
660
28.5k
    block b = {0,0};
661
28.5k
    return b;
662
28.5k
  }
663
41.9k
  block_free(expr);
664
41.9k
  return gen_const(o);
665
70.4k
}
666
667
187k
static block gen_const_array(block expr) {
668
  /*
669
   * An expr of all constant elements looks like this:
670
   *
671
   * 0009 FORK 0027
672
   * 0011 FORK 0023
673
   * 0013 FORK 0019
674
   * 0015 LOADK 1
675
   * 0017 JUMP 0021
676
   * 0019 LOADK 2
677
   * 0021 JUMP 0025
678
   * 0023 LOADK 3
679
   * 0025 JUMP 0029
680
   * 0027 LOADK 4
681
   *
682
   * That's: N-1 commas for N elements, N LOADKs, and a JUMP between
683
   * every LOADK.  The sequence ends in a LOADK.  Any deviation and it's
684
   * not a list of constants.
685
   *
686
   * Here we check for this pattern almost exactly.  We don't check that
687
   * the targets of the FORK and JUMP instructions are in the right
688
   * sequence.
689
   */
690
187k
  int all_const = 1;
691
187k
  int commas = 0;
692
187k
  int normal = 1;
693
187k
  jv a = jv_array();
694
764k
  for (inst *i = expr.first; i; i = i->next) {
695
599k
    if (i->op == FORK) {
696
59.1k
      commas++;
697
59.1k
      if (i->imm.target == NULL || i->imm.target->op != JUMP ||
698
59.1k
          jv_array_length(jv_copy(a)) > 0) {
699
22.4k
        normal = 0;
700
22.4k
        break;
701
22.4k
      }
702
540k
    } else if (all_const && i->op == LOADK) {
703
69.2k
      if (i->next != NULL && i->next->op != JUMP) {
704
477
        normal = 0;
705
477
        break;
706
477
      }
707
68.7k
      a = jv_array_append(a, jv_copy(i->imm.constant));
708
471k
    } else if (i->op != JUMP || i->imm.target == NULL ||
709
471k
               i->imm.target->op != LOADK) {
710
458k
      all_const = 0;
711
458k
    }
712
599k
  }
713
714
187k
  if (all_const && normal &&
715
187k
      (expr.last == NULL || expr.last->op == LOADK) &&
716
187k
      jv_array_length(jv_copy(a)) == commas + 1) {
717
50.1k
    block_free(expr);
718
50.1k
    return gen_const(a);
719
50.1k
  }
720
721
137k
  jv_free(a);
722
137k
  block b = {0,0};
723
137k
  return b;
724
187k
}
725
726
187k
block gen_collect(block expr) {
727
187k
  block const_array = gen_const_array(expr);
728
187k
  if (const_array.first != NULL)
729
50.1k
    return const_array;
730
731
137k
  block array_var = gen_op_var_fresh(STOREV, "collect");
732
137k
  block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var);
733
734
137k
  block tail = BLOCK(gen_op_bound(APPEND, array_var),
735
137k
                     gen_op_simple(BACKTRACK));
736
737
137k
  return BLOCK(c,
738
187k
               gen_op_target(FORK, tail),
739
187k
               expr,
740
187k
               tail,
741
187k
               gen_op_bound(LOADVN, array_var));
742
187k
}
743
744
323k
static block bind_matcher(block matcher, block body) {
745
  // cannot call block_bind(matcher, body) because that requires
746
  // block_has_only_binders(matcher), which is not true here as matchers
747
  // may also contain code to extract the correct elements
748
690k
  for (inst* i = matcher.first; i; i = i->next) {
749
367k
    if ((i->op == STOREV || i->op == STOREVN) && !i->bound_by)
750
329k
      block_bind_subblock(inst_block(i), body, OP_HAS_VARIABLE, 0);
751
367k
  }
752
323k
  return BLOCK(matcher, body);
753
323k
}
754
755
756
// Extract destructuring var names from the block
757
// *vars should be a jv_object (for set semantics)
758
5.12k
static void block_get_unbound_vars(block b, jv *vars) {
759
5.12k
  assert(vars != NULL);
760
5.12k
  assert(jv_get_kind(*vars) == JV_KIND_OBJECT);
761
20.9k
  for (inst* i = b.first; i; i = i->next) {
762
15.8k
    if (i->subfn.first) {
763
3.14k
      block_get_unbound_vars(i->subfn, vars);
764
3.14k
      continue;
765
3.14k
    }
766
12.6k
    if ((i->op == STOREV || i->op == STOREVN) && i->bound_by == NULL) {
767
5.72k
      *vars = jv_object_set(*vars, jv_string(i->symbol), jv_true());
768
5.72k
    }
769
12.6k
  }
770
5.12k
}
771
772
/* Build wrappers around destructuring matchers so that we can chain them
773
 * when we have errors.  The approach is as follows:
774
 * DESTRUCTURE_ALT NEXT_MATCHER (unless last matcher)
775
 * existing_matcher_block
776
 * JUMP BODY
777
 */
778
323k
static block bind_alternation_matchers(block matchers, block body) {
779
323k
  block preamble = {0};
780
323k
  block altmatchers = {0};
781
323k
  block mb = {0};
782
323k
  block final_matcher = matchers;
783
784
  // Pass through the matchers to find all destructured names.
785
326k
  while (final_matcher.first && final_matcher.first->op == DESTRUCTURE_ALT) {
786
3.14k
    block_append(&altmatchers, inst_block(block_take(&final_matcher)));
787
3.14k
  }
788
789
  // We don't have any alternations here, so we can use the simplest case.
790
323k
  if (altmatchers.first == NULL) {
791
322k
    return bind_matcher(final_matcher, body);
792
322k
  }
793
794
  // Collect var names
795
986
  jv all_vars = jv_object();
796
986
  block_get_unbound_vars(altmatchers, &all_vars);
797
986
  block_get_unbound_vars(final_matcher, &all_vars);
798
799
  // We need a preamble of STOREVs to which to bind the matchers and the body.
800
3.20k
  jv_object_keys_foreach(all_vars, key) {
801
3.20k
    preamble = BLOCK(preamble,
802
3.20k
                     gen_op_simple(DUP),
803
3.20k
                     gen_const(jv_null()),
804
3.20k
                     gen_op_unbound(STOREV, jv_string_value(key)));
805
3.20k
    jv_free(key);
806
3.20k
  }
807
986
  jv_free(all_vars);
808
809
  // Now we build each matcher in turn
810
4.13k
  for (inst *i = altmatchers.first; i; i = i->next) {
811
3.14k
    block submatcher = i->subfn;
812
813
    // If we're successful, jump to the end of the matchers
814
3.14k
    submatcher = BLOCK(submatcher, gen_op_target(JUMP, final_matcher));
815
816
    // DESTRUCTURE_ALT to the end of this submatcher so we can skip to the next one on error
817
3.14k
    mb = BLOCK(mb, gen_op_target(DESTRUCTURE_ALT, submatcher), submatcher);
818
819
    // We're done with this inst and we don't want it anymore
820
    // But we can't let it free the submatcher block.
821
3.14k
    i->subfn.first = i->subfn.last = NULL;
822
3.14k
  }
823
  // We're done with these insts now.
824
986
  block_free(altmatchers);
825
826
986
  return bind_matcher(preamble, BLOCK(mb, final_matcher, body));
827
323k
}
828
829
53.0k
block gen_reduce(block source, block matcher, block init, block body) {
830
53.0k
  block res_var = gen_op_var_fresh(STOREV, "reduce");
831
53.0k
  block loop = BLOCK(gen_op_simple(DUPN),
832
53.0k
                     source,
833
53.0k
                     bind_alternation_matchers(matcher,
834
53.0k
                                  BLOCK(gen_op_bound(LOADVN, res_var),
835
53.0k
                                        body,
836
53.0k
                                        gen_op_bound(STOREV, res_var))),
837
53.0k
                     gen_op_simple(BACKTRACK));
838
53.0k
  return BLOCK(gen_op_simple(DUP),
839
53.0k
               init,
840
53.0k
               res_var,
841
53.0k
               gen_op_target(FORK, loop),
842
53.0k
               loop,
843
53.0k
               gen_op_bound(LOADVN, res_var));
844
53.0k
}
845
846
18.0k
block gen_foreach(block source, block matcher, block init, block update, block extract) {
847
18.0k
  block state_var = gen_op_var_fresh(STOREV, "foreach");
848
18.0k
  return BLOCK(gen_op_simple(DUP),
849
18.0k
               init,
850
18.0k
               state_var,
851
18.0k
               gen_op_simple(DUP),
852
               // get a value from the source expression:
853
18.0k
               source,
854
               // destructure the value into variable(s) for all the code
855
               // in the body to see
856
18.0k
               bind_alternation_matchers(matcher,
857
                            // load the loop state variable
858
18.0k
                            BLOCK(gen_op_bound(LOADVN, state_var),
859
                                  // generate updated state
860
18.0k
                                  update,
861
                                  // save the updated state for value extraction
862
18.0k
                                  gen_op_simple(DUP),
863
                                  // save new state
864
18.0k
                                  gen_op_bound(STOREV, state_var),
865
                                  // extract an output...
866
18.0k
                                  extract)));
867
18.0k
}
868
869
49.2k
block gen_definedor(block a, block b) {
870
  // var found := false
871
49.2k
  block found_var = gen_op_var_fresh(STOREV, "found");
872
49.2k
  block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var);
873
874
  // if found, backtrack. Otherwise execute b
875
49.2k
  block backtrack = gen_op_simple(BACKTRACK);
876
49.2k
  block tail = BLOCK(gen_op_simple(DUP),
877
49.2k
                     gen_op_bound(LOADV, found_var),
878
49.2k
                     gen_op_target(JUMP_F, backtrack),
879
49.2k
                     backtrack,
880
49.2k
                     gen_op_simple(POP),
881
49.2k
                     b);
882
883
  // try again
884
49.2k
  block if_notfound = gen_op_simple(BACKTRACK);
885
886
  // found := true, produce result
887
49.2k
  block if_found = BLOCK(gen_op_simple(DUP),
888
49.2k
                         gen_const(jv_true()),
889
49.2k
                         gen_op_bound(STOREV, found_var),
890
49.2k
                         gen_op_target(JUMP, tail));
891
892
49.2k
  return BLOCK(init,
893
49.2k
               gen_op_target(FORK, if_notfound),
894
49.2k
               a,
895
49.2k
               gen_op_target(JUMP_F, if_found),
896
49.2k
               if_found,
897
49.2k
               if_notfound,
898
49.2k
               tail);
899
49.2k
}
900
901
8.39k
int block_has_main(block top) {
902
452k
  for (inst *c = top.first; c; c = c->next) {
903
447k
    if (c->op == TOP)
904
4.20k
      return 1;
905
447k
  }
906
4.19k
  return 0;
907
8.39k
}
908
909
0
int block_is_funcdef(block b) {
910
0
  if (b.first != NULL && b.first->op == CLOSURE_CREATE)
911
0
    return 1;
912
0
  return 0;
913
0
}
914
915
354k
block gen_condbranch(block iftrue, block iffalse) {
916
354k
  iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse));
917
354k
  return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse);
918
354k
}
919
920
61.3k
block gen_and(block a, block b) {
921
  // a and b = if a then (if b then true else false) else false
922
61.3k
  return BLOCK(gen_op_simple(DUP), a,
923
61.3k
               gen_condbranch(BLOCK(gen_op_simple(POP),
924
61.3k
                                    b,
925
61.3k
                                    gen_condbranch(gen_const(jv_true()),
926
61.3k
                                                   gen_const(jv_false()))),
927
61.3k
                              BLOCK(gen_op_simple(POP), gen_const(jv_false()))));
928
61.3k
}
929
930
13.8k
block gen_or(block a, block b) {
931
  // a or b = if a then true else (if b then true else false)
932
13.8k
  return BLOCK(gen_op_simple(DUP), a,
933
13.8k
               gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())),
934
13.8k
                              BLOCK(gen_op_simple(POP),
935
13.8k
                                    b,
936
13.8k
                                    gen_condbranch(gen_const(jv_true()),
937
13.8k
                                                   gen_const(jv_false())))));
938
13.8k
}
939
940
5.34k
block gen_destructure_alt(block matcher) {
941
25.3k
  for (inst *i = matcher.first; i; i = i->next) {
942
20.0k
    if (i->op == STOREV) {
943
7.67k
      i->op = STOREVN;
944
7.67k
    }
945
20.0k
  }
946
5.34k
  inst* i = inst_new(DESTRUCTURE_ALT);
947
5.34k
  i->subfn = matcher;
948
5.34k
  return inst_block(i);
949
5.34k
}
950
951
155k
block gen_var_binding(block var, const char* name, block body) {
952
155k
  return gen_destructure(var, gen_op_unbound(STOREV, name), body);
953
155k
}
954
955
2.47k
block gen_array_matcher(block left, block curr) {
956
2.47k
  int index;
957
2.47k
  if (block_is_noop(left))
958
2.26k
    index = 0;
959
209
  else {
960
    // `left` was returned by this function, so the third inst is the
961
    // constant containing the previously used index
962
209
    assert(left.first->op == DUP);
963
209
    assert(left.first->next != NULL);
964
209
    inst *i = NULL;
965
209
    if (left.first->next->op == PUSHK_UNDER) {
966
209
      i = left.first->next;
967
209
    } else {
968
0
      assert(left.first->next->op == SUBEXP_BEGIN);
969
0
      assert(left.first->next->next->op == LOADK);
970
0
      i = left.first->next->next;
971
0
    }
972
209
    index = 1 + (int) jv_number_value(i->imm.constant);
973
209
  }
974
975
  // `left` goes at the end so that the const index is in a predictable place
976
2.47k
  return BLOCK(gen_op_simple(DUP), gen_subexp(gen_const(jv_number(index))),
977
2.47k
               gen_op_simple(INDEX), curr, left);
978
2.47k
}
979
980
14.2k
block gen_object_matcher(block name, block curr) {
981
14.2k
  return BLOCK(gen_op_simple(DUP), gen_subexp(name), gen_op_simple(INDEX),
982
14.2k
               curr);
983
14.2k
}
984
985
252k
block gen_destructure(block var, block matchers, block body) {
986
  // var bindings can be added after coding the program; leave the TOP first.
987
252k
  block top = gen_noop();
988
252k
  if (body.first && body.first->op == TOP)
989
0
    top = inst_block(block_take(&body));
990
991
252k
  if (matchers.first && matchers.first->op == DESTRUCTURE_ALT) {
992
986
    block_append(&var, gen_op_simple(DUP));
993
251k
  } else {
994
251k
    top = BLOCK(top, gen_op_simple(DUP));
995
251k
  }
996
997
252k
  return BLOCK(top, gen_subexp(var), gen_op_simple(POP), bind_alternation_matchers(matchers, body));
998
252k
}
999
1000
// Like gen_var_binding(), but bind `break`'s wildcard unbound variable
1001
13.7k
static block gen_wildvar_binding(block var, const char* name, block body) {
1002
13.7k
  return BLOCK(gen_op_simple(DUP), var,
1003
13.7k
               block_bind(gen_op_unbound(STOREV, name),
1004
13.7k
                          body, OP_HAS_VARIABLE | OP_BIND_WILDCARD));
1005
13.7k
}
1006
1007
199k
block gen_cond(block cond, block iftrue, block iffalse) {
1008
199k
  return BLOCK(gen_op_simple(DUP), BLOCK(gen_subexp(cond), gen_op_simple(POP)),
1009
199k
               gen_condbranch(BLOCK(gen_op_simple(POP), iftrue),
1010
199k
                              BLOCK(gen_op_simple(POP), iffalse)));
1011
199k
}
1012
1013
50.1k
block gen_try(block exp, block handler) {
1014
  /*
1015
   * Produce:
1016
   *
1017
   *  TRY_BEGIN handler
1018
   *  <exp>
1019
   *  TRY_END
1020
   *  JUMP past_handler
1021
   *  handler: <handler>
1022
   *  past_handler:
1023
   *
1024
   * If <exp> backtracks then TRY_BEGIN will backtrack.
1025
   *
1026
   * If <exp> produces a value then we'll execute whatever bytecode follows
1027
   * this sequence.  If that code raises an exception, then TRY_END will wrap
1028
   * and re-raise that exception, and TRY_BEGIN will unwrap and re-raise the
1029
   * exception (see jq_next()).
1030
   *
1031
   * If <exp> raises then the TRY_BEGIN will see a non-wrapped exception and
1032
   * will jump to the handler (note the TRY_END will not execute in this case),
1033
   * and if the handler produces any values, then we'll execute whatever
1034
   * bytecode follows this sequence.  Note that TRY_END will not execute in
1035
   * this case, so if the handler raises an exception, or code past the handler
1036
   * raises an exception, then that exception won't be wrapped and re-raised,
1037
   * and the TRY_BEGIN will not catch it because it does not stack_save() when
1038
   * it branches to the handler.
1039
   */
1040
1041
50.1k
  if (block_is_noop(handler))
1042
379
    handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP));
1043
1044
50.1k
  block jump = gen_op_target(JUMP, handler);
1045
50.1k
  return BLOCK(gen_op_target(TRY_BEGIN, jump), exp, gen_op_simple(TRY_END),
1046
50.1k
               jump, handler);
1047
50.1k
}
1048
1049
13.7k
block gen_label(const char *label, block exp) {
1050
13.7k
  block cond = gen_call("_equal",
1051
13.7k
                        BLOCK(gen_lambda(gen_noop()),
1052
13.7k
                              gen_lambda(gen_op_unbound(LOADV, label))));
1053
13.7k
  return gen_wildvar_binding(gen_op_simple(GENLABEL), label,
1054
13.7k
                             BLOCK(gen_op_simple(POP),
1055
                                   // try exp catch if . == $label
1056
                                   //               then empty
1057
                                   //               else error end
1058
                                   //
1059
                                   // Can't use gen_binop(), as that's firmly
1060
                                   // stuck in parser.y as it refers to things
1061
                                   // like EQ.
1062
13.7k
                                   gen_try(exp,
1063
13.7k
                                           gen_cond(cond,
1064
13.7k
                                                    gen_op_simple(BACKTRACK),
1065
13.7k
                                                    gen_call("error", gen_noop())))));
1066
13.7k
}
1067
1068
4.18k
block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) {
1069
573k
  for (int cfunc=0; cfunc<ncfunctions; cfunc++) {
1070
569k
    inst* i = inst_new(CLOSURE_CREATE_C);
1071
569k
    i->imm.cfunc = &cfunctions[cfunc];
1072
569k
    i->symbol = strdup(cfunctions[cfunc].name);
1073
569k
    i->nformals = cfunctions[cfunc].nargs - 1;
1074
569k
    i->any_unbound = 0;
1075
569k
    code = BLOCK(inst_block(i), code);
1076
569k
  }
1077
4.18k
  return code;
1078
4.18k
}
1079
1080
252k
static uint16_t nesting_level(struct bytecode* bc, inst* target) {
1081
252k
  uint16_t level = 0;
1082
252k
  assert(bc && target && target->compiled);
1083
317k
  while (bc && target->compiled != bc) {
1084
65.3k
    level++;
1085
65.3k
    bc = bc->parent;
1086
65.3k
  }
1087
252k
  assert(bc && bc == target->compiled);
1088
252k
  return level;
1089
252k
}
1090
1091
545k
static int count_cfunctions(block b) {
1092
545k
  int n = 0;
1093
1.08M
  for (inst* i = b.first; i; i = i->next) {
1094
541k
    if (i->op == CLOSURE_CREATE_C) n++;
1095
541k
    n += count_cfunctions(i->subfn);
1096
541k
  }
1097
545k
  return n;
1098
545k
}
1099
1100
#ifndef WIN32
1101
extern char **environ;
1102
#endif
1103
1104
static jv
1105
make_env(jv env)
1106
50
{
1107
50
  if (jv_is_valid(env))
1108
48
    return jv_copy(env);
1109
2
  jv r = jv_object();
1110
2
  if (environ == NULL)
1111
0
    return r;
1112
72
  for (size_t i = 0; environ[i] != NULL; i++) {
1113
70
    const char *eq;
1114
1115
70
    if ((eq = strchr(environ[i], '=')) == NULL)
1116
0
      r = jv_object_delete(r, jv_string(environ[i]));
1117
70
    else
1118
70
      r = jv_object_set(r, jv_string_sized(environ[i], eq - environ[i]), jv_string(eq + 1));
1119
70
  }
1120
2
  return jv_copy(r);
1121
2
}
1122
1123
// Expands call instructions into a calling sequence
1124
208k
static int expand_call_arglist(block* b, jv args, jv *env) {
1125
208k
  int errors = 0;
1126
208k
  block ret = gen_noop();
1127
1.18M
  for (inst* curr; (curr = block_take(b));) {
1128
975k
    if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
1129
414k
      if (!curr->bound_by && curr->op == LOADV && strcmp(curr->symbol, "ENV") == 0) {
1130
50
        curr->op = LOADK;
1131
50
        *env = curr->imm.constant = make_env(*env);
1132
414k
      } else if (!curr->bound_by && curr->op == LOADV && jv_object_has(jv_copy(args), jv_string(curr->symbol))) {
1133
0
        curr->op = LOADK;
1134
0
        curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol));
1135
414k
      } else if (!curr->bound_by) {
1136
4.61k
        if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
1137
0
          locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
1138
4.61k
        else if (curr->op == LOADV)
1139
490
          locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol);
1140
4.12k
        else
1141
4.12k
          locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, curr->nactuals);
1142
4.61k
        errors++;
1143
        // don't process this instruction if it's not well-defined
1144
4.61k
        ret = BLOCK(ret, inst_block(curr));
1145
4.61k
        continue;
1146
4.61k
      }
1147
414k
    }
1148
1149
970k
    block prelude = gen_noop();
1150
970k
    if (curr->op == CALL_JQ) {
1151
227k
      int actual_args = 0, desired_args = 0;
1152
      // We expand the argument list as a series of instructions
1153
227k
      switch (curr->bound_by->op) {
1154
0
      default: assert(0 && "Unknown function type"); break;
1155
40.2k
      case CLOSURE_CREATE:
1156
72.0k
      case CLOSURE_PARAM: {
1157
72.0k
        block callargs = gen_noop();
1158
121k
        for (inst* i; (i = block_take(&curr->arglist));) {
1159
49.8k
          assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
1160
49.8k
          block b = inst_block(i);
1161
49.8k
          switch (i->op) {
1162
0
          default: assert(0 && "Unknown type of parameter"); break;
1163
0
          case CLOSURE_REF:
1164
0
            block_append(&callargs, b);
1165
0
            break;
1166
49.8k
          case CLOSURE_CREATE:
1167
49.8k
            block_append(&prelude, b);
1168
49.8k
            block_append(&callargs, gen_op_bound(CLOSURE_REF, b));
1169
49.8k
            break;
1170
49.8k
          }
1171
49.8k
          actual_args++;
1172
49.8k
        }
1173
72.0k
        curr->imm.intval = actual_args;
1174
72.0k
        curr->arglist = callargs;
1175
1176
72.0k
        if (curr->bound_by->op == CLOSURE_CREATE) {
1177
90.1k
          for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
1178
49.8k
            assert(i->op == CLOSURE_PARAM);
1179
49.8k
            desired_args++;
1180
49.8k
          }
1181
40.2k
        }
1182
72.0k
        break;
1183
72.0k
      }
1184
1185
155k
      case CLOSURE_CREATE_C: {
1186
289k
        for (inst* i; (i = block_take(&curr->arglist)); ) {
1187
134k
          assert(i->op == CLOSURE_CREATE); // FIXME
1188
134k
          block body = i->subfn;
1189
134k
          i->subfn = gen_noop();
1190
134k
          inst_free(i);
1191
          // arguments should be pushed in reverse order, prepend them to prelude
1192
134k
          errors += expand_call_arglist(&body, args, env);
1193
134k
          prelude = BLOCK(gen_subexp(body), prelude);
1194
134k
          actual_args++;
1195
134k
        }
1196
155k
        assert(curr->op == CALL_JQ);
1197
155k
        curr->op = CALL_BUILTIN;
1198
155k
        curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */;
1199
155k
        assert(curr->bound_by->op == CLOSURE_CREATE_C);
1200
155k
        desired_args = curr->bound_by->imm.cfunc->nargs - 1;
1201
155k
        assert(!curr->arglist.first);
1202
155k
        break;
1203
155k
      }
1204
227k
      }
1205
1206
227k
      assert(actual_args == desired_args); // because now handle this above
1207
227k
    }
1208
970k
    ret = BLOCK(ret, prelude, inst_block(curr));
1209
970k
  }
1210
208k
  *b = ret;
1211
208k
  return errors;
1212
208k
}
1213
1214
74.0k
static int compile(struct bytecode* bc, block b, struct locfile* lf, jv args, jv *env) {
1215
74.0k
  int errors = 0;
1216
74.0k
  int pos = 0;
1217
74.0k
  int var_frame_idx = 0;
1218
74.0k
  bc->nsubfunctions = 0;
1219
74.0k
  errors += expand_call_arglist(&b, args, env);
1220
74.0k
  b = BLOCK(b, gen_op_simple(RET));
1221
74.0k
  jv localnames = jv_array();
1222
1.38M
  for (inst* curr = b.first; curr; curr = curr->next) {
1223
1.30M
    if (!curr->next) assert(curr == b.last);
1224
1.30M
    int length = opcode_describe(curr->op)->length;
1225
1.30M
    if (curr->op == CALL_JQ) {
1226
126k
      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1227
49.8k
        length += 2;
1228
49.8k
      }
1229
76.1k
    }
1230
1.30M
    pos += length;
1231
1.30M
    curr->bytecode_pos = pos;
1232
1.30M
    curr->compiled = bc;
1233
1234
1.30M
    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1235
1236
1.30M
    if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) &&
1237
1.30M
        curr->bound_by == curr) {
1238
53.3k
      curr->imm.intval = var_frame_idx++;
1239
53.3k
      localnames = jv_array_append(localnames, jv_string(curr->symbol));
1240
53.3k
    }
1241
1242
1.30M
    if (curr->op == CLOSURE_CREATE) {
1243
70.5k
      assert(curr->bound_by == curr);
1244
70.5k
      curr->imm.intval = bc->nsubfunctions++;
1245
70.5k
    }
1246
1.30M
    if (curr->op == CLOSURE_CREATE_C) {
1247
27.8k
      assert(curr->bound_by == curr);
1248
27.8k
      int idx = bc->globals->ncfunctions++;
1249
27.8k
      bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names,
1250
27.8k
                                                 jv_string(curr->symbol));
1251
27.8k
      bc->globals->cfunctions[idx] = *curr->imm.cfunc;
1252
27.8k
      curr->imm.intval = idx;
1253
27.8k
    }
1254
1.30M
  }
1255
74.0k
  if (pos > 0xFFFF) {
1256
    // too long for program counter to fit in uint16_t
1257
1
    locfile_locate(lf, UNKNOWN_LOCATION,
1258
1
        "function compiled to %d bytes which is too long", pos);
1259
1
    errors++;
1260
1
  }
1261
74.0k
  bc->codelen = pos;
1262
74.0k
  bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames);
1263
74.0k
  if (bc->nsubfunctions && !errors) {
1264
24.6k
    bc->subfunctions = jv_mem_calloc(bc->nsubfunctions, sizeof(struct bytecode*));
1265
775k
    for (inst* curr = b.first; curr; curr = curr->next) {
1266
750k
      if (curr->op == CLOSURE_CREATE) {
1267
69.8k
        struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode));
1268
69.8k
        bc->subfunctions[curr->imm.intval] = subfn;
1269
69.8k
        subfn->globals = bc->globals;
1270
69.8k
        subfn->parent = bc;
1271
69.8k
        subfn->nclosures = 0;
1272
69.8k
        subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol));
1273
69.8k
        jv params = jv_array();
1274
101k
        for (inst* param = curr->arglist.first; param; param = param->next) {
1275
31.8k
          assert(param->op == CLOSURE_PARAM);
1276
31.8k
          assert(param->bound_by == param);
1277
31.8k
          param->imm.intval = subfn->nclosures++;
1278
31.8k
          param->compiled = subfn;
1279
31.8k
          params = jv_array_append(params, jv_string(param->symbol));
1280
31.8k
        }
1281
69.8k
        subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params);
1282
69.8k
        errors += compile(subfn, curr->subfn, lf, args, env);
1283
69.8k
        curr->subfn = gen_noop();
1284
69.8k
      }
1285
750k
    }
1286
49.3k
  } else {
1287
49.3k
    bc->nsubfunctions = 0;
1288
49.3k
    bc->subfunctions = 0;
1289
49.3k
  }
1290
74.0k
  uint16_t* code = jv_mem_calloc(bc->codelen, sizeof(uint16_t));
1291
74.0k
  bc->code = code;
1292
74.0k
  pos = 0;
1293
74.0k
  jv constant_pool = jv_array();
1294
74.0k
  int maxvar = -1;
1295
1.28M
  if (!errors) for (inst* curr = b.first; curr; curr = curr->next) {
1296
1.21M
    const struct opcode_description* op = opcode_describe(curr->op);
1297
1.21M
    if (op->length == 0)
1298
96.7k
      continue;
1299
1.11M
    code[pos++] = curr->op;
1300
1.11M
    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1301
1.11M
    if (curr->op == CALL_BUILTIN) {
1302
103k
      assert(curr->bound_by->op == CLOSURE_CREATE_C);
1303
103k
      assert(!curr->arglist.first);
1304
103k
      code[pos++] = (uint16_t)curr->imm.intval;
1305
103k
      code[pos++] = curr->bound_by->imm.intval;
1306
1.01M
    } else if (curr->op == CALL_JQ) {
1307
71.2k
      assert(curr->bound_by->op == CLOSURE_CREATE ||
1308
71.2k
             curr->bound_by->op == CLOSURE_PARAM);
1309
71.2k
      code[pos++] = (uint16_t)curr->imm.intval;
1310
71.2k
      code[pos++] = nesting_level(bc, curr->bound_by);
1311
71.2k
      code[pos++] = curr->bound_by->imm.intval |
1312
71.2k
        (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0);
1313
119k
      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1314
48.4k
        assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE);
1315
48.4k
        code[pos++] = nesting_level(bc, arg->bound_by);
1316
48.4k
        code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE;
1317
48.4k
      }
1318
941k
    } else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) {
1319
      // STORE_GLOBAL: constant global, basically
1320
0
      code[pos++] = jv_array_length(jv_copy(constant_pool));
1321
0
      constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1322
0
      code[pos++] = nesting_level(bc, curr->bound_by);
1323
0
      uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1324
0
      code[pos++] = var;
1325
0
      if (var > maxvar) maxvar = var;
1326
941k
    } else if (op->flags & OP_HAS_CONSTANT) {
1327
121k
      code[pos++] = jv_array_length(jv_copy(constant_pool));
1328
121k
      constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1329
820k
    } else if (op->flags & OP_HAS_VARIABLE) {
1330
132k
      code[pos++] = nesting_level(bc, curr->bound_by);
1331
132k
      uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1332
132k
      code[pos++] = var;
1333
132k
      if (var > maxvar) maxvar = var;
1334
687k
    } else if (op->flags & OP_HAS_BRANCH) {
1335
119k
      assert(curr->imm.target->bytecode_pos != -1);
1336
119k
      assert(curr->imm.target->bytecode_pos > pos); // only forward branches
1337
119k
      code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
1338
119k
      pos++;
1339
568k
    } else if (op->length > 1) {
1340
0
      assert(0 && "codegen not implemented for this operation");
1341
0
    }
1342
1.11M
  }
1343
74.0k
  bc->constants = constant_pool;
1344
74.0k
  bc->nlocals = maxvar + 2; // FIXME: frames of size zero?
1345
74.0k
  block_free(b);
1346
74.0k
  return errors;
1347
74.0k
}
1348
1349
4.18k
int block_compile(block b, struct bytecode** out, struct locfile* lf, jv args) {
1350
4.18k
  struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode));
1351
4.18k
  bc->parent = 0;
1352
4.18k
  bc->nclosures = 0;
1353
4.18k
  bc->globals = jv_mem_alloc(sizeof(struct symbol_table));
1354
4.18k
  int ncfunc = count_cfunctions(b);
1355
4.18k
  bc->globals->ncfunctions = 0;
1356
4.18k
  bc->globals->cfunctions = jv_mem_calloc(MAX(ncfunc, 1), sizeof(struct cfunction));
1357
4.18k
  bc->globals->cfunc_names = jv_array();
1358
4.18k
  bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null());
1359
4.18k
  jv env = jv_invalid();
1360
4.18k
  int nerrors = compile(bc, b, lf, args, &env);
1361
4.18k
  jv_free(args);
1362
4.18k
  jv_free(env);
1363
4.18k
  assert(bc->globals->ncfunctions == ncfunc);
1364
4.18k
  if (nerrors > 0) {
1365
49
    bytecode_free(bc);
1366
49
    *out = 0;
1367
4.13k
  } else {
1368
4.13k
    *out = bc;
1369
4.13k
  }
1370
4.18k
  return nerrors;
1371
4.18k
}
1372
1373
35.9M
void block_free(block b) {
1374
35.9M
  struct inst* next;
1375
52.5M
  for (struct inst* curr = b.first; curr; curr = next) {
1376
16.5M
    next = curr->next;
1377
16.5M
    inst_free(curr);
1378
16.5M
  }
1379
35.9M
}