Coverage Report

Created: 2025-10-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/jq/src/compile.c
Line
Count
Source
1
#include <assert.h>
2
#include <string.h>
3
#include <stdlib.h>
4
#include <unistd.h>
5
#include "compile.h"
6
#include "bytecode.h"
7
#include "locfile.h"
8
#include "jv_alloc.h"
9
#include "util.h"
10
11
/*
12
  The intermediate representation for jq filters is as a sequence of
13
  struct inst, which form a doubly-linked list via the next and prev
14
  pointers.
15
16
  A "block" represents a sequence of "struct inst", which may be
17
  empty.
18
19
  Blocks are generated by the parser bottom-up, so may have free
20
  variables (refer to things not defined). See inst.bound_by and
21
  inst.symbol.
22
 */
23
struct inst {
24
  struct inst* next;
25
  struct inst* prev;
26
27
  opcode op;
28
29
  struct {
30
    uint16_t intval;
31
    struct inst* target;
32
    jv constant;
33
    const struct cfunction* cfunc;
34
  } imm;
35
36
  struct locfile* locfile;
37
  location source;
38
39
  // Binding
40
  // An instruction requiring binding (for parameters/variables/functions)
41
  // is in one of three states:
42
  //   inst->bound_by = NULL  - Unbound free variable
43
  //   inst->bound_by = inst  - This instruction binds a variable
44
  //   inst->bound_by = other - Uses variable bound by other instruction
45
  // Unbound instructions (references to other things that may or may not
46
  // exist) are created by "gen_foo_unbound", and bindings are created by
47
  // block_bind(definition, body), which binds all instructions in
48
  // body which are unbound and refer to "definition" by name.
49
  struct inst* bound_by;
50
  char* symbol;
51
  int any_unbound;
52
  int referenced;
53
54
  int nformals;
55
  int nactuals;
56
57
  block subfn;   // used by CLOSURE_CREATE (body of function)
58
  block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments)
59
60
  // This instruction is compiled as part of which function?
61
  // (only used during block_compile)
62
  struct bytecode* compiled;
63
64
  int bytecode_pos; // position just after this insn
65
};
66
67
87.5M
static inst* inst_new(opcode op) {
68
87.5M
  inst* i = jv_mem_alloc(sizeof(inst));
69
87.5M
  i->next = i->prev = 0;
70
87.5M
  i->op = op;
71
87.5M
  i->bytecode_pos = -1;
72
87.5M
  i->bound_by = 0;
73
87.5M
  i->symbol = 0;
74
87.5M
  i->any_unbound = 0;
75
87.5M
  i->referenced = 0;
76
87.5M
  i->nformals = -1;
77
87.5M
  i->nactuals = -1;
78
87.5M
  i->subfn = gen_noop();
79
87.5M
  i->arglist = gen_noop();
80
87.5M
  i->source = UNKNOWN_LOCATION;
81
87.5M
  i->locfile = 0;
82
87.5M
  return i;
83
87.5M
}
84
85
87.5M
static void inst_free(struct inst* i) {
86
87.5M
  jv_mem_free(i->symbol);
87
87.5M
  block_free(i->subfn);
88
87.5M
  block_free(i->arglist);
89
87.5M
  if (i->locfile)
90
12.9M
    locfile_free(i->locfile);
91
87.5M
  if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) {
92
15.0M
    jv_free(i->imm.constant);
93
15.0M
  }
94
87.5M
  jv_mem_free(i);
95
87.5M
}
96
97
103M
static block inst_block(inst* i) {
98
103M
  block b = {i,i};
99
103M
  return b;
100
103M
}
101
102
1.02G
int block_is_single(block b) {
103
1.02G
  return b.first && b.first == b.last;
104
1.02G
}
105
106
7.09M
static inst* block_take(block* b) {
107
7.09M
  if (b->first == 0) return 0;
108
5.65M
  inst* i = b->first;
109
5.65M
  if (i->next) {
110
4.58M
    i->next->prev = 0;
111
4.58M
    b->first = i->next;
112
4.58M
    i->next = 0;
113
4.58M
  } else {
114
1.06M
    b->first = 0;
115
1.06M
    b->last = 0;
116
1.06M
  }
117
5.65M
  return i;
118
7.09M
}
119
120
10.7M
block gen_location(location loc, struct locfile* l, block b) {
121
24.2M
  for (inst* i = b.first; i; i = i->next) {
122
13.5M
    if (i->source.start == UNKNOWN_LOCATION.start &&
123
12.9M
        i->source.end == UNKNOWN_LOCATION.end) {
124
12.9M
      i->source = loc;
125
12.9M
      i->locfile = locfile_retain(l);
126
12.9M
    }
127
13.5M
  }
128
10.7M
  return b;
129
10.7M
}
130
131
205M
block gen_noop(void) {
132
205M
  block b = {0,0};
133
205M
  return b;
134
205M
}
135
136
10.9M
int block_is_noop(block b) {
137
10.9M
  return (b.first == 0 && b.last == 0);
138
10.9M
}
139
140
24.2M
block gen_op_simple(opcode op) {
141
24.2M
  assert(opcode_describe(op)->length == 1);
142
24.2M
  return inst_block(inst_new(op));
143
24.2M
}
144
145
146
23.2k
block gen_error(jv constant) {
147
23.2k
  assert(opcode_describe(ERRORK)->flags & OP_HAS_CONSTANT);
148
23.2k
  inst *i = inst_new(ERRORK);
149
23.2k
  i->imm.constant = constant;
150
23.2k
  return inst_block(i);
151
23.2k
}
152
153
12.1M
block gen_const(jv constant) {
154
12.1M
  assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT);
155
12.1M
  inst* i = inst_new(LOADK);
156
12.1M
  i->imm.constant = constant;
157
12.1M
  return inst_block(i);
158
12.1M
}
159
160
0
block gen_const_global(jv constant, const char *name) {
161
0
  assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) ==
162
0
         (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING));
163
0
  inst* i = inst_new(STORE_GLOBAL);
164
0
  i->imm.constant = constant;
165
0
  i->symbol = strdup(name);
166
0
  i->any_unbound = 0;
167
0
  return inst_block(i);
168
0
}
169
170
2.84M
block gen_op_pushk_under(jv constant) {
171
2.84M
  assert(opcode_describe(PUSHK_UNDER)->flags & OP_HAS_CONSTANT);
172
2.84M
  inst* i = inst_new(PUSHK_UNDER);
173
2.84M
  i->imm.constant = constant;
174
2.84M
  return inst_block(i);
175
2.84M
}
176
177
10.6M
int block_is_const(block b) {
178
10.6M
  return (block_is_single(b) && (b.first->op == LOADK || b.first->op == PUSHK_UNDER));
179
10.6M
}
180
181
28.2k
jv_kind block_const_kind(block b) {
182
28.2k
  assert(block_is_const(b));
183
28.2k
  return jv_get_kind(b.first->imm.constant);
184
28.2k
}
185
186
5.83M
jv block_const(block b) {
187
5.83M
  assert(block_is_const(b));
188
5.83M
  return jv_copy(b.first->imm.constant);
189
5.83M
}
190
191
6.83M
block gen_op_target(opcode op, block target) {
192
6.83M
  assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
193
6.83M
  assert(target.last);
194
6.83M
  inst* i = inst_new(op);
195
6.83M
  i->imm.target = target.last;
196
6.83M
  return inst_block(i);
197
6.83M
}
198
199
599k
block gen_op_targetlater(opcode op) {
200
599k
  assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
201
599k
  inst* i = inst_new(op);
202
599k
  i->imm.target = 0;
203
599k
  return inst_block(i);
204
599k
}
205
599k
void inst_set_target(block b, block target) {
206
599k
  assert(block_is_single(b));
207
599k
  assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH);
208
599k
  assert(target.last);
209
599k
  b.first->imm.target = target.last;
210
599k
}
211
212
24.5M
block gen_op_unbound(opcode op, const char* name) {
213
24.5M
  assert(opcode_describe(op)->flags & OP_HAS_BINDING);
214
24.5M
  inst* i = inst_new(op);
215
24.5M
  i->symbol = strdup(name);
216
24.5M
  i->any_unbound = 1;
217
24.5M
  return inst_block(i);
218
24.5M
}
219
220
1.48M
block gen_op_var_fresh(opcode op, const char* name) {
221
1.48M
  assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
222
1.48M
  block b = gen_op_unbound(op, name);
223
1.48M
  b.first->bound_by = b.first;
224
1.48M
  return b;
225
1.48M
}
226
227
3.36M
block gen_op_bound(opcode op, block binder) {
228
3.36M
  assert(block_is_single(binder));
229
3.36M
  block b = gen_op_unbound(op, binder.first->symbol);
230
3.36M
  b.first->bound_by = binder.first;
231
3.36M
  b.first->any_unbound = 0;
232
3.36M
  return b;
233
3.36M
}
234
235
415k
block gen_dictpair(block k, block v) {
236
415k
  return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT));
237
415k
}
238
239
240
66.3M
static void inst_join(inst* a, inst* b) {
241
66.3M
  assert(a && b);
242
66.3M
  assert(!a->next);
243
66.3M
  assert(!b->prev);
244
66.3M
  a->next = b;
245
66.3M
  b->prev = a;
246
66.3M
}
247
248
75.0M
void block_append(block* b, block b2) {
249
75.0M
  if (b2.first) {
250
69.3M
    if (b->last) {
251
66.3M
      inst_join(b->last, b2.first);
252
66.3M
    } else {
253
3.01M
      b->first = b2.first;
254
3.01M
    }
255
69.3M
    b->last = b2.last;
256
69.3M
  }
257
75.0M
}
258
259
74.5M
block block_join(block a, block b) {
260
74.5M
  block c = a;
261
74.5M
  block_append(&c, b);
262
74.5M
  return c;
263
74.5M
}
264
265
23.9k
int block_has_only_binders_and_imports(block binders, int bindflags) {
266
23.9k
  bindflags |= OP_HAS_BINDING;
267
2.55M
  for (inst* curr = binders.first; curr; curr = curr->next) {
268
2.53M
    if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) {
269
0
      return 0;
270
0
    }
271
2.53M
  }
272
23.9k
  return 1;
273
23.9k
}
274
275
0
static int inst_is_binder(inst *i, int bindflags) {
276
0
  return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA);
277
0
}
278
279
274k
int block_has_only_binders(block binders, int bindflags) {
280
274k
  bindflags |= OP_HAS_BINDING;
281
274k
  bindflags &= ~OP_BIND_WILDCARD;
282
6.45M
  for (inst* curr = binders.first; curr; curr = curr->next) {
283
6.17M
    if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) {
284
0
      return 0;
285
0
    }
286
6.17M
  }
287
274k
  return 1;
288
274k
}
289
290
// Count a call site's actual params
291
11.4M
static int block_count_actuals(block b) {
292
11.4M
  int args = 0;
293
21.6M
  for (inst* i = b.first; i; i = i->next) {
294
10.1M
    switch (i->op) {
295
0
    default: assert(0 && "Unknown function type"); break;
296
10.1M
    case CLOSURE_CREATE:
297
10.1M
    case CLOSURE_PARAM:
298
10.1M
    case CLOSURE_CREATE_C:
299
10.1M
      args++;
300
10.1M
      break;
301
10.1M
    }
302
10.1M
  }
303
11.4M
  return args;
304
11.4M
}
305
306
998M
static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) {
307
998M
  assert(block_is_single(binder));
308
998M
  assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
309
998M
  assert(binder.first->symbol);
310
998M
  assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
311
998M
  assert(break_distance >= 0);
312
313
998M
  binder.first->bound_by = binder.first;
314
998M
  int nrefs = 0;
315
2.29G
  for (inst* i = body.first; i; i = i->next) {
316
1.29G
    if (i->any_unbound == 0)
317
811M
      continue;
318
319
487M
    int flags = opcode_describe(i->op)->flags;
320
487M
    if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
321
247M
        (!strcmp(i->symbol, binder.first->symbol) ||
322
         // Check for break/break2/break3; see parser.y
323
240M
         ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' &&
324
745
          break_distance <= 3 && (i->symbol[1] == '1' + break_distance) &&
325
7.93M
          i->symbol[2] == '\0'))) {
326
      // bind this instruction
327
7.93M
      if (i->nactuals == -1 || i->nactuals == binder.first->nformals) {
328
7.34M
        i->bound_by = binder.first;
329
7.34M
        nrefs++;
330
7.34M
      }
331
479M
    } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 &&
332
217M
               !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) &&
333
0
               !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) {
334
      // Increment the break distance required for this binder to match
335
      // a break whenever we come across a STOREV of *anonlabel...
336
0
      break_distance++;
337
0
    }
338
339
487M
    i->any_unbound = (i->symbol && !i->bound_by);
340
341
    // binding recurses into closures
342
487M
    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->subfn, bindflags, break_distance);
343
    // binding recurses into argument list
344
487M
    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->arglist, bindflags, break_distance);
345
346
487M
    if (i->any_unbound)
347
466M
      *any_unbound = 1;
348
487M
  }
349
998M
  return nrefs;
350
998M
}
351
352
23.5M
static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
353
23.5M
  int any_unbound;
354
23.5M
  return block_bind_subblock_inner(&any_unbound, binder, body, bindflags, break_distance);
355
23.5M
}
356
357
74.2k
static int block_bind_each(block binder, block body, int bindflags) {
358
74.2k
  assert(block_has_only_binders(binder, bindflags));
359
74.2k
  bindflags |= OP_HAS_BINDING;
360
74.2k
  int nrefs = 0;
361
148k
  for (inst* curr = binder.first; curr; curr = curr->next) {
362
74.2k
    nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
363
74.2k
  }
364
74.2k
  return nrefs;
365
74.2k
}
366
367
74.2k
static block block_bind(block binder, block body, int bindflags) {
368
74.2k
  block_bind_each(binder, body, bindflags);
369
74.2k
  return block_join(binder, body);
370
74.2k
}
371
372
0
block block_bind_library(block binder, block body, int bindflags, const char *libname) {
373
0
  bindflags |= OP_HAS_BINDING;
374
0
  int matchlen = (libname == NULL) ? 0 : strlen(libname);
375
0
  char *matchname = jv_mem_alloc(matchlen+2+1);
376
0
  matchname[0] = '\0';
377
0
  if (libname != NULL && libname[0] != '\0') {
378
0
    strcpy(matchname,libname);
379
0
    strcpy(matchname+matchlen, "::");
380
0
    matchlen += 2;
381
0
  }
382
0
  assert(block_has_only_binders(binder, bindflags));
383
0
  for (inst *curr = binder.last; curr; curr = curr->prev) {
384
0
    int bindflags2 = bindflags;
385
0
    char* cname = curr->symbol;
386
0
    char* tname = jv_mem_alloc(strlen(curr->symbol)+matchlen+1);
387
0
    strcpy(tname, matchname);
388
0
    strcpy(tname+matchlen, curr->symbol);
389
390
    // Ew
391
0
    if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT)))
392
0
      bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING;
393
394
    // This mutation is ugly, even if we undo it
395
0
    curr->symbol = tname;
396
0
    block_bind_subblock(inst_block(curr), body, bindflags2, 0);
397
0
    curr->symbol = cname;
398
0
    free(tname);
399
0
  }
400
0
  free(matchname);
401
0
  return body; // We don't return a join because we don't want those sticking around...
402
0
}
403
404
6.30M
static inst* block_take_last(block* b) {
405
6.30M
  inst* i = b->last;
406
6.30M
  if (i == 0)
407
200k
    return 0;
408
6.10M
  if (i->prev) {
409
5.90M
    i->prev->next = i->next;
410
5.90M
    b->last = i->prev;
411
5.90M
    i->prev = 0;
412
5.90M
  } else {
413
200k
    b->first = 0;
414
200k
    b->last = 0;
415
200k
  }
416
6.10M
  return i;
417
6.30M
}
418
419
// Binds a sequence of binders, which *must not* already be bound to each other,
420
// to body, throwing away unreferenced defs
421
200k
block block_bind_referenced(block binder, block body, int bindflags) {
422
200k
  assert(block_has_only_binders(binder, bindflags));
423
200k
  bindflags |= OP_HAS_BINDING;
424
425
200k
  inst* curr;
426
6.30M
  while ((curr = block_take_last(&binder))) {
427
6.10M
    block b = inst_block(curr);
428
6.10M
    if (block_bind_subblock(b, body, bindflags, 0) == 0) {
429
5.74M
      block_free(b);
430
5.74M
    } else {
431
360k
      body = BLOCK(b, body);
432
360k
    }
433
6.10M
  }
434
200k
  return body;
435
200k
}
436
437
0
block block_bind_self(block binder, int bindflags) {
438
0
  assert(block_has_only_binders(binder, bindflags));
439
0
  bindflags |= OP_HAS_BINDING;
440
0
  block body = gen_noop();
441
442
0
  inst* curr;
443
0
  while ((curr = block_take_last(&binder))) {
444
0
    block b = inst_block(curr);
445
0
    block_bind_subblock(b, body, bindflags, 0);
446
0
    body = BLOCK(b, body);
447
0
  }
448
0
  return body;
449
0
}
450
451
5.74M
static void block_mark_referenced(block body) {
452
5.74M
  int saw_top = 0;
453
8.60M
  for (inst* i = body.last; i; i = i->prev) {
454
2.86M
    if (saw_top && i->bound_by == i && !i->referenced)
455
0
      continue;
456
2.86M
    if (i->op == TOP) {
457
23.9k
      saw_top = 1;
458
23.9k
    }
459
2.86M
    if (i->bound_by) {
460
818k
      i->bound_by->referenced = 1;
461
818k
    }
462
463
2.86M
    block_mark_referenced(i->arglist);
464
2.86M
    block_mark_referenced(i->subfn);
465
2.86M
  }
466
5.74M
}
467
468
23.9k
block block_drop_unreferenced(block body) {
469
23.9k
  block_mark_referenced(body);
470
471
23.9k
  block refd = gen_noop();
472
23.9k
  inst* curr;
473
1.06M
  while ((curr = block_take(&body))) {
474
1.03M
    if (curr->bound_by == curr && !curr->referenced) {
475
0
      inst_free(curr);
476
1.03M
    } else {
477
1.03M
      refd = BLOCK(refd, inst_block(curr));
478
1.03M
    }
479
1.03M
  }
480
23.9k
  return refd;
481
23.9k
}
482
483
23.9k
jv block_take_imports(block* body) {
484
23.9k
  jv imports = jv_array();
485
486
  /* Parser should never generate TOP before imports */
487
23.9k
  assert(!(body->first && body->first->op == TOP && body->first->next &&
488
23.9k
        (body->first->next->op == MODULEMETA || body->first->next->op == DEPS)));
489
490
48.5k
  while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) {
491
24.5k
    inst* dep = block_take(body);
492
24.5k
    if (dep->op == DEPS) {
493
24.5k
      imports = jv_array_append(imports, jv_copy(dep->imm.constant));
494
24.5k
    }
495
24.5k
    inst_free(dep);
496
24.5k
  }
497
23.9k
  return imports;
498
23.9k
}
499
500
23.9k
jv block_list_funcs(block body, int omit_underscores) {
501
23.9k
  jv funcs = jv_object(); // Use the keys for set semantics.
502
5.92M
  for (inst *pos = body.first; pos != NULL; pos = pos->next) {
503
5.90M
    if (pos->op == CLOSURE_CREATE || pos->op == CLOSURE_CREATE_C) {
504
5.90M
      if (pos->symbol != NULL && (!omit_underscores || pos->symbol[0] != '_')) {
505
5.37M
        funcs = jv_object_set(funcs, jv_string_fmt("%s/%i", pos->symbol, pos->nformals), jv_null());
506
5.37M
      }
507
5.90M
    }
508
5.90M
  }
509
23.9k
  return jv_keys_unsorted(funcs);
510
23.9k
}
511
512
34
block gen_module(block metadata) {
513
34
  assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
514
34
  inst* i = inst_new(MODULEMETA);
515
34
  i->imm.constant = block_const(metadata);
516
34
  if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT)
517
0
    i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant);
518
34
  block_free(metadata);
519
34
  return inst_block(i);
520
34
}
521
522
0
jv block_module_meta(block b) {
523
0
  if (b.first != NULL && b.first->op == MODULEMETA)
524
0
    return jv_copy(b.first->imm.constant);
525
0
  return jv_null();
526
0
}
527
528
29.1k
block gen_import(const char* name, const char* as, int is_data) {
529
29.1k
  inst* i = inst_new(DEPS);
530
29.1k
  jv meta = jv_object();
531
29.1k
  if (as != NULL)
532
3.76k
    meta = jv_object_set(meta, jv_string("as"), jv_string(as));
533
29.1k
  meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false());
534
29.1k
  meta = jv_object_set(meta, jv_string("relpath"), jv_string(name));
535
29.1k
  i->imm.constant = meta;
536
29.1k
  return inst_block(i);
537
29.1k
}
538
539
24.5k
block gen_import_meta(block import, block metadata) {
540
24.5k
  assert(block_is_single(import) && import.first->op == DEPS);
541
24.5k
  assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
542
24.5k
  inst *i = import.first;
543
24.5k
  i->imm.constant = jv_object_merge(block_const(metadata), i->imm.constant);
544
24.5k
  block_free(metadata);
545
24.5k
  return import;
546
24.5k
}
547
548
13.0M
block gen_function(const char* name, block formals, block body) {
549
13.0M
  inst* i = inst_new(CLOSURE_CREATE);
550
13.0M
  int nformals = 0;
551
15.5M
  for (inst* i = formals.last; i; i = i->prev) {
552
2.56M
    nformals++;
553
2.56M
    i->nformals = 0;
554
2.56M
    if (i->op == CLOSURE_PARAM_REGULAR) {
555
885k
      i->op = CLOSURE_PARAM;
556
885k
      body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body);
557
885k
    }
558
2.56M
    block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
559
2.56M
  }
560
13.0M
  i->subfn = body;
561
13.0M
  i->symbol = strdup(name);
562
13.0M
  i->any_unbound = -1;
563
13.0M
  i->nformals = nformals;
564
13.0M
  i->arglist = formals;
565
13.0M
  block b = inst_block(i);
566
13.0M
  block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
567
13.0M
  return b;
568
13.0M
}
569
570
886k
block gen_param_regular(const char* name) {
571
886k
  return gen_op_unbound(CLOSURE_PARAM_REGULAR, name);
572
886k
}
573
574
1.67M
block gen_param(const char* name) {
575
1.67M
  return gen_op_unbound(CLOSURE_PARAM, name);
576
1.67M
}
577
578
10.1M
block gen_lambda(block body) {
579
10.1M
  return gen_function("@lambda", gen_noop(), body);
580
10.1M
}
581
582
11.4M
block gen_call(const char* name, block args) {
583
11.4M
  block b = gen_op_unbound(CALL_JQ, name);
584
11.4M
  b.first->arglist = args;
585
11.4M
  b.first->nactuals = block_count_actuals(b.first->arglist);
586
11.4M
  return b;
587
11.4M
}
588
589
6.56M
block gen_subexp(block a) {
590
6.56M
  if (block_is_noop(a)) {
591
244k
    return gen_op_simple(DUP);
592
244k
  }
593
6.32M
  if (block_is_single(a) && a.first->op == LOADK) {
594
2.84M
    jv c = block_const(a);
595
2.84M
    block_free(a);
596
2.84M
    return gen_op_pushk_under(c);
597
2.84M
  }
598
3.47M
  return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END));
599
6.32M
}
600
601
599k
block gen_both(block a, block b) {
602
599k
  block jump = gen_op_targetlater(JUMP);
603
599k
  block fork = gen_op_target(FORK, jump);
604
599k
  block c = BLOCK(fork, a, jump, b);
605
599k
  inst_set_target(jump, c);
606
599k
  return c;
607
599k
}
608
609
359k
block gen_const_object(block expr) {
610
359k
  int is_const = 1;
611
359k
  jv o = jv_object();
612
359k
  jv k = jv_null();
613
359k
  jv v = jv_null();
614
523k
  for (inst *i = expr.first; i; i = i->next) {
615
302k
    if (i->op == PUSHK_UNDER) {
616
223k
      k = jv_copy(i->imm.constant);
617
223k
      i = i->next;
618
223k
    } else if (i->op != SUBEXP_BEGIN ||
619
72.9k
        i->next == NULL ||
620
72.9k
        i->next->op != LOADK ||
621
292
        i->next->next == NULL ||
622
78.6k
        i->next->next->op != SUBEXP_END) {
623
78.6k
      is_const = 0;
624
78.6k
      break;
625
78.6k
    } else {
626
0
      k = jv_copy(i->next->imm.constant);
627
0
      i = i->next->next->next;
628
0
    }
629
223k
    if (i != NULL && i->op == PUSHK_UNDER) {
630
166k
      v = jv_copy(i->imm.constant);
631
166k
      i = i->next;
632
166k
    } else if (i == NULL ||
633
57.2k
        i->op != SUBEXP_BEGIN ||
634
55.8k
        i->next == NULL ||
635
55.8k
        i->next->op != LOADK ||
636
1.35k
        i->next->next == NULL ||
637
57.2k
        i->next->next->op != SUBEXP_END) {
638
57.2k
      is_const = 0;
639
57.2k
      break;
640
57.2k
    } else {
641
0
      v = jv_copy(i->next->imm.constant);
642
0
      i = i->next->next->next;
643
0
    }
644
166k
    if (i == NULL || i->op != INSERT) {
645
2.08k
      is_const = 0;
646
2.08k
      break;
647
2.08k
    }
648
164k
    if (jv_get_kind(k) != JV_KIND_STRING) {
649
646
      is_const = 0;
650
646
      break;
651
646
    }
652
163k
    o = jv_object_set(o, k, v);
653
163k
    k = jv_null();
654
163k
    v = jv_null();
655
163k
  }
656
359k
  if (!is_const) {
657
138k
    jv_free(o);
658
138k
    jv_free(k);
659
138k
    jv_free(v);
660
138k
    block b = {0,0};
661
138k
    return b;
662
138k
  }
663
221k
  block_free(expr);
664
221k
  return gen_const(o);
665
359k
}
666
667
894k
static block gen_const_array(block expr) {
668
  /*
669
   * An expr of all constant elements looks like this:
670
   *
671
   * 0009 FORK 0027
672
   * 0011 FORK 0023
673
   * 0013 FORK 0019
674
   * 0015 LOADK 1
675
   * 0017 JUMP 0021
676
   * 0019 LOADK 2
677
   * 0021 JUMP 0025
678
   * 0023 LOADK 3
679
   * 0025 JUMP 0029
680
   * 0027 LOADK 4
681
   *
682
   * That's: N-1 commas for N elements, N LOADKs, and a JUMP between
683
   * every LOADK.  The sequence ends in a LOADK.  Any deviation and it's
684
   * not a list of constants.
685
   *
686
   * Here we check for this pattern almost exactly.  We don't check that
687
   * the targets of the FORK and JUMP instructions are in the right
688
   * sequence.
689
   */
690
894k
  int all_const = 1;
691
894k
  int commas = 0;
692
894k
  int normal = 1;
693
894k
  jv a = jv_array();
694
3.82M
  for (inst *i = expr.first; i; i = i->next) {
695
3.03M
    if (i->op == FORK) {
696
289k
      commas++;
697
289k
      if (i->imm.target == NULL || i->imm.target->op != JUMP ||
698
178k
          jv_array_length(jv_copy(a)) > 0) {
699
111k
        normal = 0;
700
111k
        break;
701
111k
      }
702
2.75M
    } else if (all_const && i->op == LOADK) {
703
291k
      if (i->next != NULL && i->next->op != JUMP) {
704
1.78k
        normal = 0;
705
1.78k
        break;
706
1.78k
      }
707
289k
      a = jv_array_append(a, jv_copy(i->imm.constant));
708
2.45M
    } else if (i->op != JUMP || i->imm.target == NULL ||
709
2.40M
               i->imm.target->op != LOADK) {
710
2.40M
      all_const = 0;
711
2.40M
    }
712
3.03M
  }
713
714
894k
  if (all_const && normal &&
715
241k
      (expr.last == NULL || expr.last->op == LOADK) &&
716
241k
      jv_array_length(jv_copy(a)) == commas + 1) {
717
215k
    block_free(expr);
718
215k
    return gen_const(a);
719
215k
  }
720
721
678k
  jv_free(a);
722
678k
  block b = {0,0};
723
678k
  return b;
724
894k
}
725
726
894k
block gen_collect(block expr) {
727
894k
  block const_array = gen_const_array(expr);
728
894k
  if (const_array.first != NULL)
729
215k
    return const_array;
730
731
678k
  block array_var = gen_op_var_fresh(STOREV, "collect");
732
678k
  block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var);
733
734
678k
  block tail = BLOCK(gen_op_bound(APPEND, array_var),
735
678k
                     gen_op_simple(BACKTRACK));
736
737
678k
  return BLOCK(c,
738
894k
               gen_op_target(FORK, tail),
739
894k
               expr,
740
894k
               tail,
741
894k
               gen_op_bound(LOADVN, array_var));
742
894k
}
743
744
1.80M
static block bind_matcher(block matcher, block body) {
745
  // cannot call block_bind(matcher, body) because that requires
746
  // block_has_only_binders(matcher), which is not true here as matchers
747
  // may also contain code to extract the correct elements
748
3.82M
  for (inst* i = matcher.first; i; i = i->next) {
749
2.02M
    if ((i->op == STOREV || i->op == STOREVN) && !i->bound_by)
750
1.83M
      block_bind_subblock(inst_block(i), body, OP_HAS_VARIABLE, 0);
751
2.02M
  }
752
1.80M
  return BLOCK(matcher, body);
753
1.80M
}
754
755
756
// Extract destructuring var names from the block
757
// *vars should be a jv_object (for set semantics)
758
12.2k
static void block_get_unbound_vars(block b, jv *vars) {
759
12.2k
  assert(vars != NULL);
760
12.2k
  assert(jv_get_kind(*vars) == JV_KIND_OBJECT);
761
35.3k
  for (inst* i = b.first; i; i = i->next) {
762
23.1k
    if (i->subfn.first) {
763
5.80k
      block_get_unbound_vars(i->subfn, vars);
764
5.80k
      continue;
765
5.80k
    }
766
17.3k
    if ((i->op == STOREV || i->op == STOREVN) && i->bound_by == NULL) {
767
10.4k
      *vars = jv_object_set(*vars, jv_string(i->symbol), jv_true());
768
10.4k
    }
769
17.3k
  }
770
12.2k
}
771
772
/* Build wrappers around destructuring matchers so that we can chain them
773
 * when we have errors.  The approach is as follows:
774
 * DESTRUCTURE_ALT NEXT_MATCHER (unless last matcher)
775
 * existing_matcher_block
776
 * JUMP BODY
777
 */
778
1.80M
static block bind_alternation_matchers(block matchers, block body) {
779
1.80M
  block preamble = {0};
780
1.80M
  block altmatchers = {0};
781
1.80M
  block mb = {0};
782
1.80M
  block final_matcher = matchers;
783
784
  // Pass through the matchers to find all destructured names.
785
1.81M
  while (final_matcher.first && final_matcher.first->op == DESTRUCTURE_ALT) {
786
5.80k
    block_append(&altmatchers, inst_block(block_take(&final_matcher)));
787
5.80k
  }
788
789
  // We don't have any alternations here, so we can use the simplest case.
790
1.80M
  if (altmatchers.first == NULL) {
791
1.80M
    return bind_matcher(final_matcher, body);
792
1.80M
  }
793
794
  // Collect var names
795
3.23k
  jv all_vars = jv_object();
796
3.23k
  block_get_unbound_vars(altmatchers, &all_vars);
797
3.23k
  block_get_unbound_vars(final_matcher, &all_vars);
798
799
  // We need a preamble of STOREVs to which to bind the matchers and the body.
800
7.57k
  jv_object_keys_foreach(all_vars, key) {
801
7.57k
    preamble = BLOCK(preamble,
802
7.57k
                     gen_op_simple(DUP),
803
7.57k
                     gen_const(jv_null()),
804
7.57k
                     gen_op_unbound(STOREV, jv_string_value(key)));
805
7.57k
    jv_free(key);
806
7.57k
  }
807
3.23k
  jv_free(all_vars);
808
809
  // Now we build each matcher in turn
810
9.04k
  for (inst *i = altmatchers.first; i; i = i->next) {
811
5.80k
    block submatcher = i->subfn;
812
813
    // If we're successful, jump to the end of the matchers
814
5.80k
    submatcher = BLOCK(submatcher, gen_op_target(JUMP, final_matcher));
815
816
    // DESTRUCTURE_ALT to the end of this submatcher so we can skip to the next one on error
817
5.80k
    mb = BLOCK(mb, gen_op_target(DESTRUCTURE_ALT, submatcher), submatcher);
818
819
    // We're done with this inst and we don't want it anymore
820
    // But we can't let it free the submatcher block.
821
5.80k
    i->subfn.first = i->subfn.last = NULL;
822
5.80k
  }
823
  // We're done with these insts now.
824
3.23k
  block_free(altmatchers);
825
826
3.23k
  return bind_matcher(preamble, BLOCK(mb, final_matcher, body));
827
1.80M
}
828
829
289k
block gen_reduce(block source, block matcher, block init, block body) {
830
289k
  block res_var = gen_op_var_fresh(STOREV, "reduce");
831
289k
  block loop = BLOCK(gen_op_simple(DUPN),
832
289k
                     source,
833
289k
                     bind_alternation_matchers(matcher,
834
289k
                                  BLOCK(gen_op_bound(LOADVN, res_var),
835
289k
                                        body,
836
289k
                                        gen_op_bound(STOREV, res_var))),
837
289k
                     gen_op_simple(BACKTRACK));
838
289k
  return BLOCK(gen_op_simple(DUP),
839
289k
               init,
840
289k
               res_var,
841
289k
               gen_op_target(FORK, loop),
842
289k
               loop,
843
289k
               gen_op_bound(LOADVN, res_var));
844
289k
}
845
846
97.8k
block gen_foreach(block source, block matcher, block init, block update, block extract) {
847
97.8k
  block state_var = gen_op_var_fresh(STOREV, "foreach");
848
97.8k
  return BLOCK(gen_op_simple(DUP),
849
97.8k
               init,
850
97.8k
               state_var,
851
97.8k
               gen_op_simple(DUP),
852
               // get a value from the source expression:
853
97.8k
               source,
854
               // destructure the value into variable(s) for all the code
855
               // in the body to see
856
97.8k
               bind_alternation_matchers(matcher,
857
                            // load the loop state variable
858
97.8k
                            BLOCK(gen_op_bound(LOADVN, state_var),
859
                                  // generate updated state
860
97.8k
                                  update,
861
                                  // save the updated state for value extraction
862
97.8k
                                  gen_op_simple(DUP),
863
                                  // save new state
864
97.8k
                                  gen_op_bound(STOREV, state_var),
865
                                  // extract an output...
866
97.8k
                                  extract)));
867
97.8k
}
868
869
230k
block gen_definedor(block a, block b) {
870
  // var found := false
871
230k
  block found_var = gen_op_var_fresh(STOREV, "found");
872
230k
  block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var);
873
874
  // if found, backtrack. Otherwise execute b
875
230k
  block backtrack = gen_op_simple(BACKTRACK);
876
230k
  block tail = BLOCK(gen_op_simple(DUP),
877
230k
                     gen_op_bound(LOADV, found_var),
878
230k
                     gen_op_target(JUMP_F, backtrack),
879
230k
                     backtrack,
880
230k
                     gen_op_simple(POP),
881
230k
                     b);
882
883
  // try again
884
230k
  block if_notfound = gen_op_simple(BACKTRACK);
885
886
  // found := true, produce result
887
230k
  block if_found = BLOCK(gen_op_simple(DUP),
888
230k
                         gen_const(jv_true()),
889
230k
                         gen_op_bound(STOREV, found_var),
890
230k
                         gen_op_target(JUMP, tail));
891
892
230k
  return BLOCK(init,
893
230k
               gen_op_target(FORK, if_notfound),
894
230k
               a,
895
230k
               gen_op_target(JUMP_F, if_found),
896
230k
               if_found,
897
230k
               if_notfound,
898
230k
               tail);
899
230k
}
900
901
47.9k
int block_has_main(block top) {
902
2.58M
  for (inst *c = top.first; c; c = c->next) {
903
2.55M
    if (c->op == TOP)
904
23.9k
      return 1;
905
2.55M
  }
906
23.9k
  return 0;
907
47.9k
}
908
909
0
int block_is_funcdef(block b) {
910
0
  if (b.first != NULL && b.first->op == CLOSURE_CREATE)
911
0
    return 1;
912
0
  return 0;
913
0
}
914
915
1.95M
block gen_condbranch(block iftrue, block iffalse) {
916
1.95M
  iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse));
917
1.95M
  return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse);
918
1.95M
}
919
920
337k
block gen_and(block a, block b) {
921
  // a and b = if a then (if b then true else false) else false
922
337k
  return BLOCK(gen_op_simple(DUP), a,
923
337k
               gen_condbranch(BLOCK(gen_op_simple(POP),
924
337k
                                    b,
925
337k
                                    gen_condbranch(gen_const(jv_true()),
926
337k
                                                   gen_const(jv_false()))),
927
337k
                              BLOCK(gen_op_simple(POP), gen_const(jv_false()))));
928
337k
}
929
930
73.3k
block gen_or(block a, block b) {
931
  // a or b = if a then true else (if b then true else false)
932
73.3k
  return BLOCK(gen_op_simple(DUP), a,
933
73.3k
               gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())),
934
73.3k
                              BLOCK(gen_op_simple(POP),
935
73.3k
                                    b,
936
73.3k
                                    gen_condbranch(gen_const(jv_true()),
937
73.3k
                                                   gen_const(jv_false())))));
938
73.3k
}
939
940
9.24k
block gen_destructure_alt(block matcher) {
941
33.9k
  for (inst *i = matcher.first; i; i = i->next) {
942
24.6k
    if (i->op == STOREV) {
943
11.4k
      i->op = STOREVN;
944
11.4k
    }
945
24.6k
  }
946
9.24k
  inst* i = inst_new(DESTRUCTURE_ALT);
947
9.24k
  i->subfn = matcher;
948
9.24k
  return inst_block(i);
949
9.24k
}
950
951
885k
block gen_var_binding(block var, const char* name, block body) {
952
885k
  return gen_destructure(var, gen_op_unbound(STOREV, name), body);
953
885k
}
954
955
1.86k
block gen_array_matcher(block left, block curr) {
956
1.86k
  int index;
957
1.86k
  if (block_is_noop(left))
958
1.60k
    index = 0;
959
258
  else {
960
    // `left` was returned by this function, so the third inst is the
961
    // constant containing the previously used index
962
258
    assert(left.first->op == DUP);
963
258
    assert(left.first->next != NULL);
964
258
    inst *i = NULL;
965
258
    if (left.first->next->op == PUSHK_UNDER) {
966
258
      i = left.first->next;
967
258
    } else {
968
0
      assert(left.first->next->op == SUBEXP_BEGIN);
969
0
      assert(left.first->next->next->op == LOADK);
970
0
      i = left.first->next->next;
971
0
    }
972
258
    index = 1 + (int) jv_number_value(i->imm.constant);
973
258
  }
974
975
  // `left` goes at the end so that the const index is in a predictable place
976
1.86k
  return BLOCK(gen_op_simple(DUP), gen_subexp(gen_const(jv_number(index))),
977
1.86k
               gen_op_simple(INDEX), curr, left);
978
1.86k
}
979
980
56.1k
block gen_object_matcher(block name, block curr) {
981
56.1k
  return BLOCK(gen_op_simple(DUP), gen_subexp(name), gen_op_simple(INDEX),
982
56.1k
               curr);
983
56.1k
}
984
985
1.41M
block gen_destructure(block var, block matchers, block body) {
986
  // var bindings can be added after coding the program; leave the TOP first.
987
1.41M
  block top = gen_noop();
988
1.41M
  if (body.first && body.first->op == TOP)
989
0
    top = inst_block(block_take(&body));
990
991
1.41M
  if (matchers.first && matchers.first->op == DESTRUCTURE_ALT) {
992
3.23k
    block_append(&var, gen_op_simple(DUP));
993
1.41M
  } else {
994
1.41M
    top = BLOCK(top, gen_op_simple(DUP));
995
1.41M
  }
996
997
1.41M
  return BLOCK(top, gen_subexp(var), gen_op_simple(POP), bind_alternation_matchers(matchers, body));
998
1.41M
}
999
1000
// Like gen_var_binding(), but bind `break`'s wildcard unbound variable
1001
74.2k
static block gen_wildvar_binding(block var, const char* name, block body) {
1002
74.2k
  return BLOCK(gen_op_simple(DUP), var,
1003
74.2k
               block_bind(gen_op_unbound(STOREV, name),
1004
74.2k
                          body, OP_HAS_VARIABLE | OP_BIND_WILDCARD));
1005
74.2k
}
1006
1007
1.11M
block gen_cond(block cond, block iftrue, block iffalse) {
1008
1.11M
  return BLOCK(gen_op_simple(DUP), BLOCK(gen_subexp(cond), gen_op_simple(POP)),
1009
1.11M
               gen_condbranch(BLOCK(gen_op_simple(POP), iftrue),
1010
1.11M
                              BLOCK(gen_op_simple(POP), iffalse)));
1011
1.11M
}
1012
1013
187k
block gen_try(block exp, block handler) {
1014
  /*
1015
   * Produce:
1016
   *
1017
   *  TRY_BEGIN handler
1018
   *  <exp>
1019
   *  TRY_END
1020
   *  JUMP past_handler
1021
   *  handler: <handler>
1022
   *  past_handler:
1023
   *
1024
   * If <exp> backtracks then TRY_BEGIN will backtrack.
1025
   *
1026
   * If <exp> produces a value then we'll execute whatever bytecode follows
1027
   * this sequence.  If that code raises an exception, then TRY_END will wrap
1028
   * and re-raise that exception, and TRY_BEGIN will unwrap and re-raise the
1029
   * exception (see jq_next()).
1030
   *
1031
   * If <exp> raises then the TRY_BEGIN will see a non-wrapped exception and
1032
   * will jump to the handler (note the TRY_END will not execute in this case),
1033
   * and if the handler produces any values, then we'll execute whatever
1034
   * bytecode follows this sequence.  Note that TRY_END will not execute in
1035
   * this case, so if the handler raises an exception, or code past the handler
1036
   * raises an exception, then that exception won't be wrapped and re-raised,
1037
   * and the TRY_BEGIN will not catch it because it does not stack_save() when
1038
   * it branches to the handler.
1039
   */
1040
1041
187k
  if (block_is_noop(handler))
1042
1.90k
    handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP));
1043
1044
187k
  block jump = gen_op_target(JUMP, handler);
1045
187k
  return BLOCK(gen_op_target(TRY_BEGIN, jump), exp, gen_op_simple(TRY_END),
1046
187k
               jump, handler);
1047
187k
}
1048
1049
74.2k
block gen_label(const char *label, block exp) {
1050
74.2k
  block cond = gen_call("_equal",
1051
74.2k
                        BLOCK(gen_lambda(gen_noop()),
1052
74.2k
                              gen_lambda(gen_op_unbound(LOADV, label))));
1053
74.2k
  return gen_wildvar_binding(gen_op_simple(GENLABEL), label,
1054
74.2k
                             BLOCK(gen_op_simple(POP),
1055
                                   // try exp catch if . == $label
1056
                                   //               then empty
1057
                                   //               else error end
1058
                                   //
1059
                                   // Can't use gen_binop(), as that's firmly
1060
                                   // stuck in parser.y as it refers to things
1061
                                   // like EQ.
1062
74.2k
                                   gen_try(exp,
1063
74.2k
                                           gen_cond(cond,
1064
74.2k
                                                    gen_op_simple(BACKTRACK),
1065
74.2k
                                                    gen_call("error", gen_noop())))));
1066
74.2k
}
1067
1068
23.9k
block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) {
1069
3.27M
  for (int cfunc=0; cfunc<ncfunctions; cfunc++) {
1070
3.25M
    inst* i = inst_new(CLOSURE_CREATE_C);
1071
3.25M
    i->imm.cfunc = &cfunctions[cfunc];
1072
3.25M
    i->symbol = strdup(cfunctions[cfunc].name);
1073
3.25M
    i->nformals = cfunctions[cfunc].nargs - 1;
1074
3.25M
    i->any_unbound = 0;
1075
3.25M
    code = BLOCK(inst_block(i), code);
1076
3.25M
  }
1077
23.9k
  return code;
1078
23.9k
}
1079
1080
881k
static uint16_t nesting_level(struct bytecode* bc, inst* target) {
1081
881k
  uint16_t level = 0;
1082
881k
  assert(bc && target && target->compiled);
1083
1.10M
  while (bc && target->compiled != bc) {
1084
228k
    level++;
1085
228k
    bc = bc->parent;
1086
228k
  }
1087
881k
  assert(bc && bc == target->compiled);
1088
881k
  return level;
1089
881k
}
1090
1091
2.51M
static int count_cfunctions(block b) {
1092
2.51M
  int n = 0;
1093
5.00M
  for (inst* i = b.first; i; i = i->next) {
1094
2.49M
    if (i->op == CLOSURE_CREATE_C) n++;
1095
2.49M
    n += count_cfunctions(i->subfn);
1096
2.49M
  }
1097
2.51M
  return n;
1098
2.51M
}
1099
1100
#ifndef WIN32
1101
extern char **environ;
1102
#endif
1103
1104
static jv
1105
make_env(jv env)
1106
34
{
1107
34
  if (jv_is_valid(env))
1108
21
    return jv_copy(env);
1109
13
  jv r = jv_object();
1110
13
  if (environ == NULL)
1111
0
    return r;
1112
468
  for (size_t i = 0; environ[i] != NULL; i++) {
1113
455
    const char *eq;
1114
1115
455
    if ((eq = strchr(environ[i], '=')) == NULL)
1116
0
      r = jv_object_delete(r, jv_string(environ[i]));
1117
455
    else
1118
455
      r = jv_object_set(r, jv_string_sized(environ[i], eq - environ[i]), jv_string(eq + 1));
1119
455
  }
1120
13
  return jv_copy(r);
1121
13
}
1122
1123
// Expands call instructions into a calling sequence
1124
705k
static int expand_call_arglist(block* b, jv args, jv *env) {
1125
705k
  int errors = 0;
1126
705k
  block ret = gen_noop();
1127
4.59M
  for (inst* curr; (curr = block_take(b));) {
1128
3.88M
    if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
1129
1.48M
      if (!curr->bound_by && curr->op == LOADV && strcmp(curr->symbol, "ENV") == 0) {
1130
34
        curr->op = LOADK;
1131
34
        *env = curr->imm.constant = make_env(*env);
1132
1.48M
      } else if (!curr->bound_by && curr->op == LOADV && jv_object_has(jv_copy(args), jv_string(curr->symbol))) {
1133
0
        curr->op = LOADK;
1134
0
        curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol));
1135
1.48M
      } else if (!curr->bound_by) {
1136
16.0k
        if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
1137
0
          locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
1138
16.0k
        else if (curr->op == LOADV)
1139
1.26k
          locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol);
1140
14.8k
        else
1141
14.8k
          locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, curr->nactuals);
1142
16.0k
        errors++;
1143
        // don't process this instruction if it's not well-defined
1144
16.0k
        ret = BLOCK(ret, inst_block(curr));
1145
16.0k
        continue;
1146
16.0k
      }
1147
1.48M
    }
1148
1149
3.87M
    block prelude = gen_noop();
1150
3.87M
    if (curr->op == CALL_JQ) {
1151
711k
      int actual_args = 0, desired_args = 0;
1152
      // We expand the argument list as a series of instructions
1153
711k
      switch (curr->bound_by->op) {
1154
0
      default: assert(0 && "Unknown function type"); break;
1155
172k
      case CLOSURE_CREATE:
1156
281k
      case CLOSURE_PARAM: {
1157
281k
        block callargs = gen_noop();
1158
527k
        for (inst* i; (i = block_take(&curr->arglist));) {
1159
245k
          assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
1160
245k
          block b = inst_block(i);
1161
245k
          switch (i->op) {
1162
0
          default: assert(0 && "Unknown type of parameter"); break;
1163
0
          case CLOSURE_REF:
1164
0
            block_append(&callargs, b);
1165
0
            break;
1166
245k
          case CLOSURE_CREATE:
1167
245k
            block_append(&prelude, b);
1168
245k
            block_append(&callargs, gen_op_bound(CLOSURE_REF, b));
1169
245k
            break;
1170
245k
          }
1171
245k
          actual_args++;
1172
245k
        }
1173
281k
        curr->imm.intval = actual_args;
1174
281k
        curr->arglist = callargs;
1175
1176
281k
        if (curr->bound_by->op == CLOSURE_CREATE) {
1177
418k
          for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
1178
245k
            assert(i->op == CLOSURE_PARAM);
1179
245k
            desired_args++;
1180
245k
          }
1181
172k
        }
1182
281k
        break;
1183
281k
      }
1184
1185
429k
      case CLOSURE_CREATE_C: {
1186
882k
        for (inst* i; (i = block_take(&curr->arglist)); ) {
1187
452k
          assert(i->op == CLOSURE_CREATE); // FIXME
1188
452k
          block body = i->subfn;
1189
452k
          i->subfn = gen_noop();
1190
452k
          inst_free(i);
1191
          // arguments should be pushed in reverse order, prepend them to prelude
1192
452k
          errors += expand_call_arglist(&body, args, env);
1193
452k
          prelude = BLOCK(gen_subexp(body), prelude);
1194
452k
          actual_args++;
1195
452k
        }
1196
429k
        assert(curr->op == CALL_JQ);
1197
429k
        curr->op = CALL_BUILTIN;
1198
429k
        curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */;
1199
429k
        assert(curr->bound_by->op == CLOSURE_CREATE_C);
1200
429k
        desired_args = curr->bound_by->imm.cfunc->nargs - 1;
1201
429k
        assert(!curr->arglist.first);
1202
429k
        break;
1203
429k
      }
1204
711k
      }
1205
1206
711k
      assert(actual_args == desired_args); // because now handle this above
1207
711k
    }
1208
3.87M
    ret = BLOCK(ret, prelude, inst_block(curr));
1209
3.87M
  }
1210
705k
  *b = ret;
1211
705k
  return errors;
1212
705k
}
1213
1214
252k
static int compile(struct bytecode* bc, block b, struct locfile* lf, jv args, jv *env) {
1215
252k
  int errors = 0;
1216
252k
  int pos = 0;
1217
252k
  int var_frame_idx = 0;
1218
252k
  bc->nsubfunctions = 0;
1219
252k
  errors += expand_call_arglist(&b, args, env);
1220
252k
  b = BLOCK(b, gen_op_simple(RET));
1221
252k
  jv localnames = jv_array();
1222
5.31M
  for (inst* curr = b.first; curr; curr = curr->next) {
1223
5.06M
    if (!curr->next) assert(curr == b.last);
1224
5.06M
    int length = opcode_describe(curr->op)->length;
1225
5.06M
    if (curr->op == CALL_JQ) {
1226
548k
      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1227
251k
        length += 2;
1228
251k
      }
1229
296k
    }
1230
5.06M
    pos += length;
1231
5.06M
    curr->bytecode_pos = pos;
1232
5.06M
    curr->compiled = bc;
1233
1234
5.06M
    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1235
1236
5.06M
    if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) &&
1237
565k
        curr->bound_by == curr) {
1238
236k
      curr->imm.intval = var_frame_idx++;
1239
236k
      localnames = jv_array_append(localnames, jv_string(curr->symbol));
1240
236k
    }
1241
1242
5.06M
    if (curr->op == CLOSURE_CREATE) {
1243
321k
      assert(curr->bound_by == curr);
1244
321k
      curr->imm.intval = bc->nsubfunctions++;
1245
321k
    }
1246
5.06M
    if (curr->op == CLOSURE_CREATE_C) {
1247
118k
      assert(curr->bound_by == curr);
1248
118k
      int idx = bc->globals->ncfunctions++;
1249
118k
      bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names,
1250
118k
                                                 jv_string(curr->symbol));
1251
118k
      bc->globals->cfunctions[idx] = *curr->imm.cfunc;
1252
118k
      curr->imm.intval = idx;
1253
118k
    }
1254
5.06M
  }
1255
252k
  if (pos > 0xFFFF) {
1256
    // too long for program counter to fit in uint16_t
1257
4
    locfile_locate(lf, UNKNOWN_LOCATION,
1258
4
        "function compiled to %d bytes which is too long", pos);
1259
4
    errors++;
1260
4
  }
1261
252k
  bc->codelen = pos;
1262
252k
  bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames);
1263
252k
  if (bc->nsubfunctions && !errors) {
1264
77.8k
    bc->subfunctions = jv_mem_calloc(bc->nsubfunctions, sizeof(struct bytecode*));
1265
2.77M
    for (inst* curr = b.first; curr; curr = curr->next) {
1266
2.69M
      if (curr->op == CLOSURE_CREATE) {
1267
229k
        struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode));
1268
229k
        bc->subfunctions[curr->imm.intval] = subfn;
1269
229k
        subfn->globals = bc->globals;
1270
229k
        subfn->parent = bc;
1271
229k
        subfn->nclosures = 0;
1272
229k
        subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol));
1273
229k
        jv params = jv_array();
1274
338k
        for (inst* param = curr->arglist.first; param; param = param->next) {
1275
109k
          assert(param->op == CLOSURE_PARAM);
1276
109k
          assert(param->bound_by == param);
1277
109k
          param->imm.intval = subfn->nclosures++;
1278
109k
          param->compiled = subfn;
1279
109k
          params = jv_array_append(params, jv_string(param->symbol));
1280
109k
        }
1281
229k
        subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params);
1282
229k
        errors += compile(subfn, curr->subfn, lf, args, env);
1283
229k
        curr->subfn = gen_noop();
1284
229k
      }
1285
2.69M
    }
1286
175k
  } else {
1287
175k
    bc->nsubfunctions = 0;
1288
175k
    bc->subfunctions = 0;
1289
175k
  }
1290
252k
  uint16_t* code = jv_mem_calloc(bc->codelen, sizeof(uint16_t));
1291
252k
  bc->code = code;
1292
252k
  pos = 0;
1293
252k
  jv constant_pool = jv_array();
1294
252k
  int maxvar = -1;
1295
4.76M
  if (!errors) for (inst* curr = b.first; curr; curr = curr->next) {
1296
4.51M
    const struct opcode_description* op = opcode_describe(curr->op);
1297
4.51M
    if (op->length == 0)
1298
344k
      continue;
1299
4.17M
    code[pos++] = curr->op;
1300
4.17M
    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1301
4.17M
    if (curr->op == CALL_BUILTIN) {
1302
418k
      assert(curr->bound_by->op == CLOSURE_CREATE_C);
1303
418k
      assert(!curr->arglist.first);
1304
418k
      code[pos++] = (uint16_t)curr->imm.intval;
1305
418k
      code[pos++] = curr->bound_by->imm.intval;
1306
3.75M
    } else if (curr->op == CALL_JQ) {
1307
234k
      assert(curr->bound_by->op == CLOSURE_CREATE ||
1308
234k
             curr->bound_by->op == CLOSURE_PARAM);
1309
234k
      code[pos++] = (uint16_t)curr->imm.intval;
1310
234k
      code[pos++] = nesting_level(bc, curr->bound_by);
1311
234k
      code[pos++] = curr->bound_by->imm.intval |
1312
234k
        (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0);
1313
386k
      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1314
151k
        assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE);
1315
151k
        code[pos++] = nesting_level(bc, arg->bound_by);
1316
151k
        code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE;
1317
151k
      }
1318
3.51M
    } else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) {
1319
      // STORE_GLOBAL: constant global, basically
1320
0
      code[pos++] = jv_array_length(jv_copy(constant_pool));
1321
0
      constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1322
0
      code[pos++] = nesting_level(bc, curr->bound_by);
1323
0
      uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1324
0
      code[pos++] = var;
1325
0
      if (var > maxvar) maxvar = var;
1326
3.51M
    } else if (op->flags & OP_HAS_CONSTANT) {
1327
502k
      code[pos++] = jv_array_length(jv_copy(constant_pool));
1328
502k
      constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1329
3.01M
    } else if (op->flags & OP_HAS_VARIABLE) {
1330
494k
      code[pos++] = nesting_level(bc, curr->bound_by);
1331
494k
      uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1332
494k
      code[pos++] = var;
1333
494k
      if (var > maxvar) maxvar = var;
1334
2.52M
    } else if (op->flags & OP_HAS_BRANCH) {
1335
521k
      assert(curr->imm.target->bytecode_pos != -1);
1336
521k
      assert(curr->imm.target->bytecode_pos > pos); // only forward branches
1337
521k
      code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
1338
521k
      pos++;
1339
2.00M
    } else if (op->length > 1) {
1340
0
      assert(0 && "codegen not implemented for this operation");
1341
0
    }
1342
4.17M
  }
1343
252k
  bc->constants = constant_pool;
1344
252k
  bc->nlocals = maxvar + 2; // FIXME: frames of size zero?
1345
252k
  block_free(b);
1346
252k
  return errors;
1347
252k
}
1348
1349
23.9k
int block_compile(block b, struct bytecode** out, struct locfile* lf, jv args) {
1350
23.9k
  struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode));
1351
23.9k
  bc->parent = 0;
1352
23.9k
  bc->nclosures = 0;
1353
23.9k
  bc->globals = jv_mem_alloc(sizeof(struct symbol_table));
1354
23.9k
  int ncfunc = count_cfunctions(b);
1355
23.9k
  bc->globals->ncfunctions = 0;
1356
23.9k
  bc->globals->cfunctions = jv_mem_calloc(MAX(ncfunc, 1), sizeof(struct cfunction));
1357
23.9k
  bc->globals->cfunc_names = jv_array();
1358
23.9k
  bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null());
1359
23.9k
  jv env = jv_invalid();
1360
23.9k
  int nerrors = compile(bc, b, lf, args, &env);
1361
23.9k
  jv_free(args);
1362
23.9k
  jv_free(env);
1363
23.9k
  assert(bc->globals->ncfunctions == ncfunc);
1364
23.9k
  if (nerrors > 0) {
1365
443
    bytecode_free(bc);
1366
443
    *out = 0;
1367
23.4k
  } else {
1368
23.4k
    *out = bc;
1369
23.4k
  }
1370
23.9k
  return nerrors;
1371
23.9k
}
1372
1373
187M
void block_free(block b) {
1374
187M
  struct inst* next;
1375
274M
  for (struct inst* curr = b.first; curr; curr = next) {
1376
87.0M
    next = curr->next;
1377
87.0M
    inst_free(curr);
1378
87.0M
  }
1379
187M
}