Coverage Report

Created: 2025-12-03 06:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/jq/src/compile.c
Line
Count
Source
1
#include <assert.h>
2
#include <string.h>
3
#include <stdlib.h>
4
#include <unistd.h>
5
#include "compile.h"
6
#include "bytecode.h"
7
#include "locfile.h"
8
#include "jv_alloc.h"
9
#include "util.h"
10
11
/*
12
  The intermediate representation for jq filters is as a sequence of
13
  struct inst, which form a doubly-linked list via the next and prev
14
  pointers.
15
16
  A "block" represents a sequence of "struct inst", which may be
17
  empty.
18
19
  Blocks are generated by the parser bottom-up, so may have free
20
  variables (refer to things not defined). See inst.bound_by and
21
  inst.symbol.
22
 */
23
struct inst {
24
  struct inst* next;
25
  struct inst* prev;
26
27
  opcode op;
28
29
  struct {
30
    uint16_t intval;
31
    struct inst* target;
32
    jv constant;
33
    const struct cfunction* cfunc;
34
  } imm;
35
36
  struct locfile* locfile;
37
  location source;
38
39
  // Binding
40
  // An instruction requiring binding (for parameters/variables/functions)
41
  // is in one of three states:
42
  //   inst->bound_by = NULL  - Unbound free variable
43
  //   inst->bound_by = inst  - This instruction binds a variable
44
  //   inst->bound_by = other - Uses variable bound by other instruction
45
  // Unbound instructions (references to other things that may or may not
46
  // exist) are created by "gen_foo_unbound", and bindings are created by
47
  // block_bind(definition, body), which binds all instructions in
48
  // body which are unbound and refer to "definition" by name.
49
  struct inst* bound_by;
50
  char* symbol;
51
  int any_unbound;
52
  int referenced;
53
54
  int nformals;
55
  int nactuals;
56
57
  block subfn;   // used by CLOSURE_CREATE (body of function)
58
  block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments)
59
60
  // This instruction is compiled as part of which function?
61
  // (only used during block_compile)
62
  struct bytecode* compiled;
63
64
  int bytecode_pos; // position just after this insn
65
};
66
67
0
static inst* inst_new(opcode op) {
68
0
  inst* i = jv_mem_alloc(sizeof(inst));
69
0
  i->next = i->prev = 0;
70
0
  i->op = op;
71
0
  i->bytecode_pos = -1;
72
0
  i->bound_by = 0;
73
0
  i->symbol = 0;
74
0
  i->any_unbound = 0;
75
0
  i->referenced = 0;
76
0
  i->nformals = -1;
77
0
  i->nactuals = -1;
78
0
  i->subfn = gen_noop();
79
0
  i->arglist = gen_noop();
80
0
  i->source = UNKNOWN_LOCATION;
81
0
  i->locfile = 0;
82
0
  return i;
83
0
}
84
85
0
static void inst_free(struct inst* i) {
86
0
  jv_mem_free(i->symbol);
87
0
  block_free(i->subfn);
88
0
  block_free(i->arglist);
89
0
  if (i->locfile)
90
0
    locfile_free(i->locfile);
91
0
  if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) {
92
0
    jv_free(i->imm.constant);
93
0
  }
94
0
  jv_mem_free(i);
95
0
}
96
97
0
static block inst_block(inst* i) {
98
0
  block b = {i,i};
99
0
  return b;
100
0
}
101
102
0
int block_is_single(block b) {
103
0
  return b.first && b.first == b.last;
104
0
}
105
106
0
static inst* block_take(block* b) {
107
0
  if (b->first == 0) return 0;
108
0
  inst* i = b->first;
109
0
  if (i->next) {
110
0
    i->next->prev = 0;
111
0
    b->first = i->next;
112
0
    i->next = 0;
113
0
  } else {
114
0
    b->first = 0;
115
0
    b->last = 0;
116
0
  }
117
0
  return i;
118
0
}
119
120
0
block gen_location(location loc, struct locfile* l, block b) {
121
0
  for (inst* i = b.first; i; i = i->next) {
122
0
    if (i->source.start == UNKNOWN_LOCATION.start &&
123
0
        i->source.end == UNKNOWN_LOCATION.end) {
124
0
      i->source = loc;
125
0
      i->locfile = locfile_retain(l);
126
0
    }
127
0
  }
128
0
  return b;
129
0
}
130
131
0
block gen_noop(void) {
132
0
  block b = {0,0};
133
0
  return b;
134
0
}
135
136
0
int block_is_noop(block b) {
137
0
  return (b.first == 0 && b.last == 0);
138
0
}
139
140
0
block gen_op_simple(opcode op) {
141
0
  assert(opcode_describe(op)->length == 1);
142
0
  return inst_block(inst_new(op));
143
0
}
144
145
146
0
block gen_error(jv constant) {
147
0
  assert(opcode_describe(ERRORK)->flags & OP_HAS_CONSTANT);
148
0
  inst *i = inst_new(ERRORK);
149
0
  i->imm.constant = constant;
150
0
  return inst_block(i);
151
0
}
152
153
0
block gen_const(jv constant) {
154
0
  assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT);
155
0
  inst* i = inst_new(LOADK);
156
0
  i->imm.constant = constant;
157
0
  return inst_block(i);
158
0
}
159
160
0
block gen_const_global(jv constant, const char *name) {
161
0
  assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) ==
162
0
         (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING));
163
0
  inst* i = inst_new(STORE_GLOBAL);
164
0
  i->imm.constant = constant;
165
0
  i->symbol = strdup(name);
166
0
  i->any_unbound = 0;
167
0
  return inst_block(i);
168
0
}
169
170
0
block gen_op_pushk_under(jv constant) {
171
0
  assert(opcode_describe(PUSHK_UNDER)->flags & OP_HAS_CONSTANT);
172
0
  inst* i = inst_new(PUSHK_UNDER);
173
0
  i->imm.constant = constant;
174
0
  return inst_block(i);
175
0
}
176
177
0
int block_is_const(block b) {
178
0
  return (block_is_single(b) && (b.first->op == LOADK || b.first->op == PUSHK_UNDER));
179
0
}
180
181
0
jv_kind block_const_kind(block b) {
182
0
  assert(block_is_const(b));
183
0
  return jv_get_kind(b.first->imm.constant);
184
0
}
185
186
0
jv block_const(block b) {
187
0
  assert(block_is_const(b));
188
0
  return jv_copy(b.first->imm.constant);
189
0
}
190
191
0
block gen_op_target(opcode op, block target) {
192
0
  assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
193
0
  assert(target.last);
194
0
  inst* i = inst_new(op);
195
0
  i->imm.target = target.last;
196
0
  return inst_block(i);
197
0
}
198
199
0
block gen_op_targetlater(opcode op) {
200
0
  assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
201
0
  inst* i = inst_new(op);
202
0
  i->imm.target = 0;
203
0
  return inst_block(i);
204
0
}
205
0
void inst_set_target(block b, block target) {
206
0
  assert(block_is_single(b));
207
0
  assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH);
208
0
  assert(target.last);
209
0
  b.first->imm.target = target.last;
210
0
}
211
212
0
block gen_op_unbound(opcode op, const char* name) {
213
0
  assert(opcode_describe(op)->flags & OP_HAS_BINDING);
214
0
  inst* i = inst_new(op);
215
0
  i->symbol = strdup(name);
216
0
  i->any_unbound = 1;
217
0
  return inst_block(i);
218
0
}
219
220
0
block gen_op_var_fresh(opcode op, const char* name) {
221
0
  assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
222
0
  block b = gen_op_unbound(op, name);
223
0
  b.first->bound_by = b.first;
224
0
  return b;
225
0
}
226
227
0
block gen_op_bound(opcode op, block binder) {
228
0
  assert(block_is_single(binder));
229
0
  block b = gen_op_unbound(op, binder.first->symbol);
230
0
  b.first->bound_by = binder.first;
231
0
  b.first->any_unbound = 0;
232
0
  return b;
233
0
}
234
235
0
block gen_dictpair(block k, block v) {
236
0
  return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT));
237
0
}
238
239
240
0
static void inst_join(inst* a, inst* b) {
241
0
  assert(a && b);
242
0
  assert(!a->next);
243
0
  assert(!b->prev);
244
0
  a->next = b;
245
0
  b->prev = a;
246
0
}
247
248
0
void block_append(block* b, block b2) {
249
0
  if (b2.first) {
250
0
    if (b->last) {
251
0
      inst_join(b->last, b2.first);
252
0
    } else {
253
0
      b->first = b2.first;
254
0
    }
255
0
    b->last = b2.last;
256
0
  }
257
0
}
258
259
0
block block_join(block a, block b) {
260
0
  block c = a;
261
0
  block_append(&c, b);
262
0
  return c;
263
0
}
264
265
0
int block_has_only_binders_and_imports(block binders, int bindflags) {
266
0
  bindflags |= OP_HAS_BINDING;
267
0
  for (inst* curr = binders.first; curr; curr = curr->next) {
268
0
    if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) {
269
0
      return 0;
270
0
    }
271
0
  }
272
0
  return 1;
273
0
}
274
275
0
static int inst_is_binder(inst *i, int bindflags) {
276
0
  return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA);
277
0
}
278
279
0
int block_has_only_binders(block binders, int bindflags) {
280
0
  bindflags |= OP_HAS_BINDING;
281
0
  bindflags &= ~OP_BIND_WILDCARD;
282
0
  for (inst* curr = binders.first; curr; curr = curr->next) {
283
0
    if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) {
284
0
      return 0;
285
0
    }
286
0
  }
287
0
  return 1;
288
0
}
289
290
// Count a call site's actual params
291
0
static int block_count_actuals(block b) {
292
0
  int args = 0;
293
0
  for (inst* i = b.first; i; i = i->next) {
294
0
    switch (i->op) {
295
0
    default: assert(0 && "Unknown function type"); break;
296
0
    case CLOSURE_CREATE:
297
0
    case CLOSURE_PARAM:
298
0
    case CLOSURE_CREATE_C:
299
0
      args++;
300
0
      break;
301
0
    }
302
0
  }
303
0
  return args;
304
0
}
305
306
0
static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) {
307
0
  assert(block_is_single(binder));
308
0
  assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
309
0
  assert(binder.first->symbol);
310
0
  assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
311
0
  assert(break_distance >= 0);
312
313
0
  binder.first->bound_by = binder.first;
314
0
  int nrefs = 0;
315
0
  for (inst* i = body.first; i; i = i->next) {
316
0
    if (i->any_unbound == 0)
317
0
      continue;
318
319
0
    int flags = opcode_describe(i->op)->flags;
320
0
    if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
321
0
        (!strcmp(i->symbol, binder.first->symbol) ||
322
         // Check for break/break2/break3; see parser.y
323
0
         ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' &&
324
0
          break_distance <= 3 && (i->symbol[1] == '1' + break_distance) &&
325
0
          i->symbol[2] == '\0'))) {
326
      // bind this instruction
327
0
      if (i->nactuals == -1 || i->nactuals == binder.first->nformals) {
328
0
        i->bound_by = binder.first;
329
0
        nrefs++;
330
0
      }
331
0
    } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 &&
332
0
               !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) &&
333
0
               !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) {
334
      // Increment the break distance required for this binder to match
335
      // a break whenever we come across a STOREV of *anonlabel...
336
0
      break_distance++;
337
0
    }
338
339
0
    i->any_unbound = (i->symbol && !i->bound_by);
340
341
    // binding recurses into closures
342
0
    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->subfn, bindflags, break_distance);
343
    // binding recurses into argument list
344
0
    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->arglist, bindflags, break_distance);
345
346
0
    if (i->any_unbound)
347
0
      *any_unbound = 1;
348
0
  }
349
0
  return nrefs;
350
0
}
351
352
0
static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
353
0
  int any_unbound;
354
0
  return block_bind_subblock_inner(&any_unbound, binder, body, bindflags, break_distance);
355
0
}
356
357
0
static int block_bind_each(block binder, block body, int bindflags) {
358
0
  assert(block_has_only_binders(binder, bindflags));
359
0
  bindflags |= OP_HAS_BINDING;
360
0
  int nrefs = 0;
361
0
  for (inst* curr = binder.first; curr; curr = curr->next) {
362
0
    nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
363
0
  }
364
0
  return nrefs;
365
0
}
366
367
0
static block block_bind(block binder, block body, int bindflags) {
368
0
  block_bind_each(binder, body, bindflags);
369
0
  return block_join(binder, body);
370
0
}
371
372
0
block block_bind_library(block binder, block body, int bindflags, const char *libname) {
373
0
  bindflags |= OP_HAS_BINDING;
374
0
  int matchlen = (libname == NULL) ? 0 : strlen(libname);
375
0
  char *matchname = jv_mem_alloc(matchlen+2+1);
376
0
  matchname[0] = '\0';
377
0
  if (libname != NULL && libname[0] != '\0') {
378
0
    strcpy(matchname,libname);
379
0
    strcpy(matchname+matchlen, "::");
380
0
    matchlen += 2;
381
0
  }
382
0
  assert(block_has_only_binders(binder, bindflags));
383
0
  for (inst *curr = binder.last; curr; curr = curr->prev) {
384
0
    int bindflags2 = bindflags;
385
0
    char* cname = curr->symbol;
386
0
    char* tname = jv_mem_alloc(strlen(curr->symbol)+matchlen+1);
387
0
    strcpy(tname, matchname);
388
0
    strcpy(tname+matchlen, curr->symbol);
389
390
    // Ew
391
0
    if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT)))
392
0
      bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING;
393
394
    // This mutation is ugly, even if we undo it
395
0
    curr->symbol = tname;
396
0
    block_bind_subblock(inst_block(curr), body, bindflags2, 0);
397
0
    curr->symbol = cname;
398
0
    free(tname);
399
0
  }
400
0
  free(matchname);
401
0
  return body; // We don't return a join because we don't want those sticking around...
402
0
}
403
404
0
static inst* block_take_last(block* b) {
405
0
  inst* i = b->last;
406
0
  if (i == 0)
407
0
    return 0;
408
0
  if (i->prev) {
409
0
    i->prev->next = i->next;
410
0
    b->last = i->prev;
411
0
    i->prev = 0;
412
0
  } else {
413
0
    b->first = 0;
414
0
    b->last = 0;
415
0
  }
416
0
  return i;
417
0
}
418
419
// Binds a sequence of binders, which *must not* already be bound to each other,
420
// to body, throwing away unreferenced defs
421
0
block block_bind_referenced(block binder, block body, int bindflags) {
422
0
  assert(block_has_only_binders(binder, bindflags));
423
0
  bindflags |= OP_HAS_BINDING;
424
425
0
  inst* curr;
426
0
  while ((curr = block_take_last(&binder))) {
427
0
    block b = inst_block(curr);
428
0
    if (block_bind_subblock(b, body, bindflags, 0) == 0) {
429
0
      block_free(b);
430
0
    } else {
431
0
      body = BLOCK(b, body);
432
0
    }
433
0
  }
434
0
  return body;
435
0
}
436
437
0
block block_bind_self(block binder, int bindflags) {
438
0
  assert(block_has_only_binders(binder, bindflags));
439
0
  bindflags |= OP_HAS_BINDING;
440
0
  block body = gen_noop();
441
442
0
  inst* curr;
443
0
  while ((curr = block_take_last(&binder))) {
444
0
    block b = inst_block(curr);
445
0
    block_bind_subblock(b, body, bindflags, 0);
446
0
    body = BLOCK(b, body);
447
0
  }
448
0
  return body;
449
0
}
450
451
0
static void block_mark_referenced(block body) {
452
0
  int saw_top = 0;
453
0
  for (inst* i = body.last; i; i = i->prev) {
454
0
    if (saw_top && i->bound_by == i && !i->referenced)
455
0
      continue;
456
0
    if (i->op == TOP) {
457
0
      saw_top = 1;
458
0
    }
459
0
    if (i->bound_by) {
460
0
      i->bound_by->referenced = 1;
461
0
    }
462
463
0
    block_mark_referenced(i->arglist);
464
0
    block_mark_referenced(i->subfn);
465
0
  }
466
0
}
467
468
0
block block_drop_unreferenced(block body) {
469
0
  block_mark_referenced(body);
470
471
0
  block refd = gen_noop();
472
0
  inst* curr;
473
0
  while ((curr = block_take(&body))) {
474
0
    if (curr->bound_by == curr && !curr->referenced) {
475
0
      inst_free(curr);
476
0
    } else {
477
0
      refd = BLOCK(refd, inst_block(curr));
478
0
    }
479
0
  }
480
0
  return refd;
481
0
}
482
483
0
jv block_take_imports(block* body) {
484
0
  jv imports = jv_array();
485
486
  /* Parser should never generate TOP before imports */
487
0
  assert(!(body->first && body->first->op == TOP && body->first->next &&
488
0
        (body->first->next->op == MODULEMETA || body->first->next->op == DEPS)));
489
490
0
  while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) {
491
0
    inst* dep = block_take(body);
492
0
    if (dep->op == DEPS) {
493
0
      imports = jv_array_append(imports, jv_copy(dep->imm.constant));
494
0
    }
495
0
    inst_free(dep);
496
0
  }
497
0
  return imports;
498
0
}
499
500
0
jv block_list_funcs(block body, int omit_underscores) {
501
0
  jv funcs = jv_object(); // Use the keys for set semantics.
502
0
  for (inst *pos = body.first; pos != NULL; pos = pos->next) {
503
0
    if (pos->op == CLOSURE_CREATE || pos->op == CLOSURE_CREATE_C) {
504
0
      if (pos->symbol != NULL && (!omit_underscores || pos->symbol[0] != '_')) {
505
0
        funcs = jv_object_set(funcs, jv_string_fmt("%s/%i", pos->symbol, pos->nformals), jv_null());
506
0
      }
507
0
    }
508
0
  }
509
0
  return jv_keys_unsorted(funcs);
510
0
}
511
512
0
block gen_module(block metadata) {
513
0
  assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
514
0
  inst* i = inst_new(MODULEMETA);
515
0
  i->imm.constant = block_const(metadata);
516
0
  if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT)
517
0
    i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant);
518
0
  block_free(metadata);
519
0
  return inst_block(i);
520
0
}
521
522
0
jv block_module_meta(block b) {
523
0
  if (b.first != NULL && b.first->op == MODULEMETA)
524
0
    return jv_copy(b.first->imm.constant);
525
0
  return jv_null();
526
0
}
527
528
0
block gen_import(const char* name, const char* as, int is_data) {
529
0
  inst* i = inst_new(DEPS);
530
0
  jv meta = jv_object();
531
0
  if (as != NULL)
532
0
    meta = jv_object_set(meta, jv_string("as"), jv_string(as));
533
0
  meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false());
534
0
  meta = jv_object_set(meta, jv_string("relpath"), jv_string(name));
535
0
  i->imm.constant = meta;
536
0
  return inst_block(i);
537
0
}
538
539
0
block gen_import_meta(block import, block metadata) {
540
0
  assert(block_is_single(import) && import.first->op == DEPS);
541
0
  assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
542
0
  inst *i = import.first;
543
0
  i->imm.constant = jv_object_merge(block_const(metadata), i->imm.constant);
544
0
  block_free(metadata);
545
0
  return import;
546
0
}
547
548
0
block gen_function(const char* name, block formals, block body) {
549
0
  inst* i = inst_new(CLOSURE_CREATE);
550
0
  int nformals = 0;
551
0
  for (inst* i = formals.last; i; i = i->prev) {
552
0
    nformals++;
553
0
    i->nformals = 0;
554
0
    if (i->op == CLOSURE_PARAM_REGULAR) {
555
0
      i->op = CLOSURE_PARAM;
556
0
      body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body);
557
0
    }
558
0
    block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
559
0
  }
560
0
  i->subfn = body;
561
0
  i->symbol = strdup(name);
562
0
  i->any_unbound = -1;
563
0
  i->nformals = nformals;
564
0
  i->arglist = formals;
565
0
  block b = inst_block(i);
566
0
  block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
567
0
  return b;
568
0
}
569
570
0
block gen_param_regular(const char* name) {
571
0
  return gen_op_unbound(CLOSURE_PARAM_REGULAR, name);
572
0
}
573
574
0
block gen_param(const char* name) {
575
0
  return gen_op_unbound(CLOSURE_PARAM, name);
576
0
}
577
578
0
block gen_lambda(block body) {
579
0
  return gen_function("@lambda", gen_noop(), body);
580
0
}
581
582
0
block gen_call(const char* name, block args) {
583
0
  block b = gen_op_unbound(CALL_JQ, name);
584
0
  b.first->arglist = args;
585
0
  b.first->nactuals = block_count_actuals(b.first->arglist);
586
0
  return b;
587
0
}
588
589
0
block gen_subexp(block a) {
590
0
  if (block_is_noop(a)) {
591
0
    return gen_op_simple(DUP);
592
0
  }
593
0
  if (block_is_single(a) && a.first->op == LOADK) {
594
0
    jv c = block_const(a);
595
0
    block_free(a);
596
0
    return gen_op_pushk_under(c);
597
0
  }
598
0
  return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END));
599
0
}
600
601
0
block gen_both(block a, block b) {
602
0
  block jump = gen_op_targetlater(JUMP);
603
0
  block fork = gen_op_target(FORK, jump);
604
0
  block c = BLOCK(fork, a, jump, b);
605
0
  inst_set_target(jump, c);
606
0
  return c;
607
0
}
608
609
0
block gen_const_object(block expr) {
610
0
  int is_const = 1;
611
0
  jv o = jv_object();
612
0
  jv k = jv_null();
613
0
  jv v = jv_null();
614
0
  for (inst *i = expr.first; i; i = i->next) {
615
0
    if (i->op == PUSHK_UNDER) {
616
0
      k = jv_copy(i->imm.constant);
617
0
      i = i->next;
618
0
    } else if (i->op != SUBEXP_BEGIN ||
619
0
        i->next == NULL ||
620
0
        i->next->op != LOADK ||
621
0
        i->next->next == NULL ||
622
0
        i->next->next->op != SUBEXP_END) {
623
0
      is_const = 0;
624
0
      break;
625
0
    } else {
626
0
      k = jv_copy(i->next->imm.constant);
627
0
      i = i->next->next->next;
628
0
    }
629
0
    if (i != NULL && i->op == PUSHK_UNDER) {
630
0
      v = jv_copy(i->imm.constant);
631
0
      i = i->next;
632
0
    } else if (i == NULL ||
633
0
        i->op != SUBEXP_BEGIN ||
634
0
        i->next == NULL ||
635
0
        i->next->op != LOADK ||
636
0
        i->next->next == NULL ||
637
0
        i->next->next->op != SUBEXP_END) {
638
0
      is_const = 0;
639
0
      break;
640
0
    } else {
641
0
      v = jv_copy(i->next->imm.constant);
642
0
      i = i->next->next->next;
643
0
    }
644
0
    if (i == NULL || i->op != INSERT) {
645
0
      is_const = 0;
646
0
      break;
647
0
    }
648
0
    if (jv_get_kind(k) != JV_KIND_STRING) {
649
0
      is_const = 0;
650
0
      break;
651
0
    }
652
0
    o = jv_object_set(o, k, v);
653
0
    k = jv_null();
654
0
    v = jv_null();
655
0
  }
656
0
  if (!is_const) {
657
0
    jv_free(o);
658
0
    jv_free(k);
659
0
    jv_free(v);
660
0
    block b = {0,0};
661
0
    return b;
662
0
  }
663
0
  block_free(expr);
664
0
  return gen_const(o);
665
0
}
666
667
0
static block gen_const_array(block expr) {
668
  /*
669
   * An expr of all constant elements looks like this:
670
   *
671
   * 0009 FORK 0027
672
   * 0011 FORK 0023
673
   * 0013 FORK 0019
674
   * 0015 LOADK 1
675
   * 0017 JUMP 0021
676
   * 0019 LOADK 2
677
   * 0021 JUMP 0025
678
   * 0023 LOADK 3
679
   * 0025 JUMP 0029
680
   * 0027 LOADK 4
681
   *
682
   * That's: N-1 commas for N elements, N LOADKs, and a JUMP between
683
   * every LOADK.  The sequence ends in a LOADK.  Any deviation and it's
684
   * not a list of constants.
685
   *
686
   * Here we check for this pattern almost exactly.  We don't check that
687
   * the targets of the FORK and JUMP instructions are in the right
688
   * sequence.
689
   */
690
0
  int all_const = 1;
691
0
  int commas = 0;
692
0
  int normal = 1;
693
0
  jv a = jv_array();
694
0
  for (inst *i = expr.first; i; i = i->next) {
695
0
    if (i->op == FORK) {
696
0
      commas++;
697
0
      if (i->imm.target == NULL || i->imm.target->op != JUMP ||
698
0
          jv_array_length(jv_copy(a)) > 0) {
699
0
        normal = 0;
700
0
        break;
701
0
      }
702
0
    } else if (all_const && i->op == LOADK) {
703
0
      if (i->next != NULL && i->next->op != JUMP) {
704
0
        normal = 0;
705
0
        break;
706
0
      }
707
0
      a = jv_array_append(a, jv_copy(i->imm.constant));
708
0
    } else if (i->op != JUMP || i->imm.target == NULL ||
709
0
               i->imm.target->op != LOADK) {
710
0
      all_const = 0;
711
0
    }
712
0
  }
713
714
0
  if (all_const && normal &&
715
0
      (expr.last == NULL || expr.last->op == LOADK) &&
716
0
      jv_array_length(jv_copy(a)) == commas + 1) {
717
0
    block_free(expr);
718
0
    return gen_const(a);
719
0
  }
720
721
0
  jv_free(a);
722
0
  block b = {0,0};
723
0
  return b;
724
0
}
725
726
0
block gen_collect(block expr) {
727
0
  block const_array = gen_const_array(expr);
728
0
  if (const_array.first != NULL)
729
0
    return const_array;
730
731
0
  block array_var = gen_op_var_fresh(STOREV, "collect");
732
0
  block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var);
733
734
0
  block tail = BLOCK(gen_op_bound(APPEND, array_var),
735
0
                     gen_op_simple(BACKTRACK));
736
737
0
  return BLOCK(c,
738
0
               gen_op_target(FORK, tail),
739
0
               expr,
740
0
               tail,
741
0
               gen_op_bound(LOADVN, array_var));
742
0
}
743
744
0
static block bind_matcher(block matcher, block body) {
745
  // cannot call block_bind(matcher, body) because that requires
746
  // block_has_only_binders(matcher), which is not true here as matchers
747
  // may also contain code to extract the correct elements
748
0
  for (inst* i = matcher.first; i; i = i->next) {
749
0
    if ((i->op == STOREV || i->op == STOREVN) && !i->bound_by)
750
0
      block_bind_subblock(inst_block(i), body, OP_HAS_VARIABLE, 0);
751
0
  }
752
0
  return BLOCK(matcher, body);
753
0
}
754
755
756
// Extract destructuring var names from the block
757
// *vars should be a jv_object (for set semantics)
758
0
static void block_get_unbound_vars(block b, jv *vars) {
759
0
  assert(vars != NULL);
760
0
  assert(jv_get_kind(*vars) == JV_KIND_OBJECT);
761
0
  for (inst* i = b.first; i; i = i->next) {
762
0
    if (i->subfn.first) {
763
0
      block_get_unbound_vars(i->subfn, vars);
764
0
      continue;
765
0
    }
766
0
    if ((i->op == STOREV || i->op == STOREVN) && i->bound_by == NULL) {
767
0
      *vars = jv_object_set(*vars, jv_string(i->symbol), jv_true());
768
0
    }
769
0
  }
770
0
}
771
772
/* Build wrappers around destructuring matchers so that we can chain them
773
 * when we have errors.  The approach is as follows:
774
 * DESTRUCTURE_ALT NEXT_MATCHER (unless last matcher)
775
 * existing_matcher_block
776
 * JUMP BODY
777
 */
778
0
static block bind_alternation_matchers(block matchers, block body) {
779
0
  block preamble = {0};
780
0
  block altmatchers = {0};
781
0
  block mb = {0};
782
0
  block final_matcher = matchers;
783
784
  // Pass through the matchers to find all destructured names.
785
0
  while (final_matcher.first && final_matcher.first->op == DESTRUCTURE_ALT) {
786
0
    block_append(&altmatchers, inst_block(block_take(&final_matcher)));
787
0
  }
788
789
  // We don't have any alternations here, so we can use the simplest case.
790
0
  if (altmatchers.first == NULL) {
791
0
    return bind_matcher(final_matcher, body);
792
0
  }
793
794
  // Collect var names
795
0
  jv all_vars = jv_object();
796
0
  block_get_unbound_vars(altmatchers, &all_vars);
797
0
  block_get_unbound_vars(final_matcher, &all_vars);
798
799
  // We need a preamble of STOREVs to which to bind the matchers and the body.
800
0
  jv_object_keys_foreach(all_vars, key) {
801
0
    preamble = BLOCK(preamble,
802
0
                     gen_op_simple(DUP),
803
0
                     gen_const(jv_null()),
804
0
                     gen_op_unbound(STOREV, jv_string_value(key)));
805
0
    jv_free(key);
806
0
  }
807
0
  jv_free(all_vars);
808
809
  // Now we build each matcher in turn
810
0
  for (inst *i = altmatchers.first; i; i = i->next) {
811
0
    block submatcher = i->subfn;
812
813
    // If we're successful, jump to the end of the matchers
814
0
    submatcher = BLOCK(submatcher, gen_op_target(JUMP, final_matcher));
815
816
    // DESTRUCTURE_ALT to the end of this submatcher so we can skip to the next one on error
817
0
    mb = BLOCK(mb, gen_op_target(DESTRUCTURE_ALT, submatcher), submatcher);
818
819
    // We're done with this inst and we don't want it anymore
820
    // But we can't let it free the submatcher block.
821
0
    i->subfn.first = i->subfn.last = NULL;
822
0
  }
823
  // We're done with these insts now.
824
0
  block_free(altmatchers);
825
826
0
  return bind_matcher(preamble, BLOCK(mb, final_matcher, body));
827
0
}
828
829
0
block gen_reduce(block source, block matcher, block init, block body) {
830
0
  block res_var = gen_op_var_fresh(STOREV, "reduce");
831
0
  block loop = BLOCK(gen_op_simple(DUPN),
832
0
                     source,
833
0
                     bind_alternation_matchers(matcher,
834
0
                                  BLOCK(gen_op_bound(LOADVN, res_var),
835
0
                                        body,
836
0
                                        gen_op_bound(STOREV, res_var))),
837
0
                     gen_op_simple(BACKTRACK));
838
0
  return BLOCK(gen_op_simple(DUP),
839
0
               init,
840
0
               res_var,
841
0
               gen_op_target(FORK, loop),
842
0
               loop,
843
0
               gen_op_bound(LOADVN, res_var));
844
0
}
845
846
0
block gen_foreach(block source, block matcher, block init, block update, block extract) {
847
0
  block state_var = gen_op_var_fresh(STOREV, "foreach");
848
0
  return BLOCK(gen_op_simple(DUP),
849
0
               init,
850
0
               state_var,
851
0
               gen_op_simple(DUP),
852
               // get a value from the source expression:
853
0
               source,
854
               // destructure the value into variable(s) for all the code
855
               // in the body to see
856
0
               bind_alternation_matchers(matcher,
857
                            // load the loop state variable
858
0
                            BLOCK(gen_op_bound(LOADVN, state_var),
859
                                  // generate updated state
860
0
                                  update,
861
                                  // save the updated state for value extraction
862
0
                                  gen_op_simple(DUP),
863
                                  // save new state
864
0
                                  gen_op_bound(STOREV, state_var),
865
                                  // extract an output...
866
0
                                  extract)));
867
0
}
868
869
0
block gen_definedor(block a, block b) {
870
  // var found := false
871
0
  block found_var = gen_op_var_fresh(STOREV, "found");
872
0
  block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var);
873
874
  // if found, backtrack. Otherwise execute b
875
0
  block backtrack = gen_op_simple(BACKTRACK);
876
0
  block tail = BLOCK(gen_op_simple(DUP),
877
0
                     gen_op_bound(LOADV, found_var),
878
0
                     gen_op_target(JUMP_F, backtrack),
879
0
                     backtrack,
880
0
                     gen_op_simple(POP),
881
0
                     b);
882
883
  // try again
884
0
  block if_notfound = gen_op_simple(BACKTRACK);
885
886
  // found := true, produce result
887
0
  block if_found = BLOCK(gen_op_simple(DUP),
888
0
                         gen_const(jv_true()),
889
0
                         gen_op_bound(STOREV, found_var),
890
0
                         gen_op_target(JUMP, tail));
891
892
0
  return BLOCK(init,
893
0
               gen_op_target(FORK, if_notfound),
894
0
               a,
895
0
               gen_op_target(JUMP_F, if_found),
896
0
               if_found,
897
0
               if_notfound,
898
0
               tail);
899
0
}
900
901
0
int block_has_main(block top) {
902
0
  for (inst *c = top.first; c; c = c->next) {
903
0
    if (c->op == TOP)
904
0
      return 1;
905
0
  }
906
0
  return 0;
907
0
}
908
909
0
int block_is_funcdef(block b) {
910
0
  if (b.first != NULL && b.first->op == CLOSURE_CREATE)
911
0
    return 1;
912
0
  return 0;
913
0
}
914
915
0
block gen_condbranch(block iftrue, block iffalse) {
916
0
  iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse));
917
0
  return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse);
918
0
}
919
920
0
block gen_and(block a, block b) {
921
  // a and b = if a then (if b then true else false) else false
922
0
  return BLOCK(gen_op_simple(DUP), a,
923
0
               gen_condbranch(BLOCK(gen_op_simple(POP),
924
0
                                    b,
925
0
                                    gen_condbranch(gen_const(jv_true()),
926
0
                                                   gen_const(jv_false()))),
927
0
                              BLOCK(gen_op_simple(POP), gen_const(jv_false()))));
928
0
}
929
930
0
block gen_or(block a, block b) {
931
  // a or b = if a then true else (if b then true else false)
932
0
  return BLOCK(gen_op_simple(DUP), a,
933
0
               gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())),
934
0
                              BLOCK(gen_op_simple(POP),
935
0
                                    b,
936
0
                                    gen_condbranch(gen_const(jv_true()),
937
0
                                                   gen_const(jv_false())))));
938
0
}
939
940
0
block gen_destructure_alt(block matcher) {
941
0
  for (inst *i = matcher.first; i; i = i->next) {
942
0
    if (i->op == STOREV) {
943
0
      i->op = STOREVN;
944
0
    }
945
0
  }
946
0
  inst* i = inst_new(DESTRUCTURE_ALT);
947
0
  i->subfn = matcher;
948
0
  return inst_block(i);
949
0
}
950
951
0
block gen_var_binding(block var, const char* name, block body) {
952
0
  return gen_destructure(var, gen_op_unbound(STOREV, name), body);
953
0
}
954
955
0
block gen_array_matcher(block left, block curr) {
956
0
  int index;
957
0
  if (block_is_noop(left))
958
0
    index = 0;
959
0
  else {
960
    // `left` was returned by this function, so the third inst is the
961
    // constant containing the previously used index
962
0
    assert(left.first->op == DUP);
963
0
    assert(left.first->next != NULL);
964
0
    inst *i = NULL;
965
0
    if (left.first->next->op == PUSHK_UNDER) {
966
0
      i = left.first->next;
967
0
    } else {
968
0
      assert(left.first->next->op == SUBEXP_BEGIN);
969
0
      assert(left.first->next->next->op == LOADK);
970
0
      i = left.first->next->next;
971
0
    }
972
0
    index = 1 + (int) jv_number_value(i->imm.constant);
973
0
  }
974
975
  // `left` goes at the end so that the const index is in a predictable place
976
0
  return BLOCK(gen_op_simple(DUP), gen_subexp(gen_const(jv_number(index))),
977
0
               gen_op_simple(INDEX), curr, left);
978
0
}
979
980
0
block gen_object_matcher(block name, block curr) {
981
0
  return BLOCK(gen_op_simple(DUP), gen_subexp(name), gen_op_simple(INDEX),
982
0
               curr);
983
0
}
984
985
0
block gen_destructure(block var, block matchers, block body) {
986
  // var bindings can be added after coding the program; leave the TOP first.
987
0
  block top = gen_noop();
988
0
  if (body.first && body.first->op == TOP)
989
0
    top = inst_block(block_take(&body));
990
991
0
  if (matchers.first && matchers.first->op == DESTRUCTURE_ALT) {
992
0
    block_append(&var, gen_op_simple(DUP));
993
0
  } else {
994
0
    top = BLOCK(top, gen_op_simple(DUP));
995
0
  }
996
997
0
  return BLOCK(top, gen_subexp(var), gen_op_simple(POP), bind_alternation_matchers(matchers, body));
998
0
}
999
1000
// Like gen_var_binding(), but bind `break`'s wildcard unbound variable
1001
0
static block gen_wildvar_binding(block var, const char* name, block body) {
1002
0
  return BLOCK(gen_op_simple(DUP), var,
1003
0
               block_bind(gen_op_unbound(STOREV, name),
1004
0
                          body, OP_HAS_VARIABLE | OP_BIND_WILDCARD));
1005
0
}
1006
1007
0
block gen_cond(block cond, block iftrue, block iffalse) {
1008
0
  return BLOCK(gen_op_simple(DUP), BLOCK(gen_subexp(cond), gen_op_simple(POP)),
1009
0
               gen_condbranch(BLOCK(gen_op_simple(POP), iftrue),
1010
0
                              BLOCK(gen_op_simple(POP), iffalse)));
1011
0
}
1012
1013
0
block gen_try(block exp, block handler) {
1014
  /*
1015
   * Produce:
1016
   *
1017
   *  TRY_BEGIN handler
1018
   *  <exp>
1019
   *  TRY_END
1020
   *  JUMP past_handler
1021
   *  handler: <handler>
1022
   *  past_handler:
1023
   *
1024
   * If <exp> backtracks then TRY_BEGIN will backtrack.
1025
   *
1026
   * If <exp> produces a value then we'll execute whatever bytecode follows
1027
   * this sequence.  If that code raises an exception, then TRY_END will wrap
1028
   * and re-raise that exception, and TRY_BEGIN will unwrap and re-raise the
1029
   * exception (see jq_next()).
1030
   *
1031
   * If <exp> raises then the TRY_BEGIN will see a non-wrapped exception and
1032
   * will jump to the handler (note the TRY_END will not execute in this case),
1033
   * and if the handler produces any values, then we'll execute whatever
1034
   * bytecode follows this sequence.  Note that TRY_END will not execute in
1035
   * this case, so if the handler raises an exception, or code past the handler
1036
   * raises an exception, then that exception won't be wrapped and re-raised,
1037
   * and the TRY_BEGIN will not catch it because it does not stack_save() when
1038
   * it branches to the handler.
1039
   */
1040
1041
0
  if (block_is_noop(handler))
1042
0
    handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP));
1043
1044
0
  block jump = gen_op_target(JUMP, handler);
1045
0
  return BLOCK(gen_op_target(TRY_BEGIN, jump), exp, gen_op_simple(TRY_END),
1046
0
               jump, handler);
1047
0
}
1048
1049
0
block gen_label(const char *label, block exp) {
1050
0
  block cond = gen_call("_equal",
1051
0
                        BLOCK(gen_lambda(gen_noop()),
1052
0
                              gen_lambda(gen_op_unbound(LOADV, label))));
1053
0
  return gen_wildvar_binding(gen_op_simple(GENLABEL), label,
1054
0
                             BLOCK(gen_op_simple(POP),
1055
                                   // try exp catch if . == $label
1056
                                   //               then empty
1057
                                   //               else error end
1058
                                   //
1059
                                   // Can't use gen_binop(), as that's firmly
1060
                                   // stuck in parser.y as it refers to things
1061
                                   // like EQ.
1062
0
                                   gen_try(exp,
1063
0
                                           gen_cond(cond,
1064
0
                                                    gen_op_simple(BACKTRACK),
1065
0
                                                    gen_call("error", gen_noop())))));
1066
0
}
1067
1068
0
block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) {
1069
0
  for (int cfunc=0; cfunc<ncfunctions; cfunc++) {
1070
0
    inst* i = inst_new(CLOSURE_CREATE_C);
1071
0
    i->imm.cfunc = &cfunctions[cfunc];
1072
0
    i->symbol = strdup(cfunctions[cfunc].name);
1073
0
    i->nformals = cfunctions[cfunc].nargs - 1;
1074
0
    i->any_unbound = 0;
1075
0
    code = BLOCK(inst_block(i), code);
1076
0
  }
1077
0
  return code;
1078
0
}
1079
1080
0
static uint16_t nesting_level(struct bytecode* bc, inst* target) {
1081
0
  uint16_t level = 0;
1082
0
  assert(bc && target && target->compiled);
1083
0
  while (bc && target->compiled != bc) {
1084
0
    level++;
1085
0
    bc = bc->parent;
1086
0
  }
1087
0
  assert(bc && bc == target->compiled);
1088
0
  return level;
1089
0
}
1090
1091
0
static int count_cfunctions(block b) {
1092
0
  int n = 0;
1093
0
  for (inst* i = b.first; i; i = i->next) {
1094
0
    if (i->op == CLOSURE_CREATE_C) n++;
1095
0
    n += count_cfunctions(i->subfn);
1096
0
  }
1097
0
  return n;
1098
0
}
1099
1100
#ifndef WIN32
1101
extern char **environ;
1102
#endif
1103
1104
static jv
1105
make_env(jv env)
1106
0
{
1107
0
  if (jv_is_valid(env))
1108
0
    return jv_copy(env);
1109
0
  jv r = jv_object();
1110
0
  if (environ == NULL)
1111
0
    return r;
1112
0
  for (size_t i = 0; environ[i] != NULL; i++) {
1113
0
    const char *eq;
1114
1115
0
    if ((eq = strchr(environ[i], '=')) == NULL)
1116
0
      r = jv_object_delete(r, jv_string(environ[i]));
1117
0
    else
1118
0
      r = jv_object_set(r, jv_string_sized(environ[i], eq - environ[i]), jv_string(eq + 1));
1119
0
  }
1120
0
  return jv_copy(r);
1121
0
}
1122
1123
// Expands call instructions into a calling sequence
1124
0
static int expand_call_arglist(block* b, jv args, jv *env) {
1125
0
  int errors = 0;
1126
0
  block ret = gen_noop();
1127
0
  for (inst* curr; (curr = block_take(b));) {
1128
0
    if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
1129
0
      if (!curr->bound_by && curr->op == LOADV && strcmp(curr->symbol, "ENV") == 0) {
1130
0
        curr->op = LOADK;
1131
0
        *env = curr->imm.constant = make_env(*env);
1132
0
      } else if (!curr->bound_by && curr->op == LOADV && jv_object_has(jv_copy(args), jv_string(curr->symbol))) {
1133
0
        curr->op = LOADK;
1134
0
        curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol));
1135
0
      } else if (!curr->bound_by) {
1136
0
        if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
1137
0
          locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
1138
0
        else if (curr->op == LOADV)
1139
0
          locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol);
1140
0
        else
1141
0
          locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, curr->nactuals);
1142
0
        errors++;
1143
        // don't process this instruction if it's not well-defined
1144
0
        ret = BLOCK(ret, inst_block(curr));
1145
0
        continue;
1146
0
      }
1147
0
    }
1148
1149
0
    block prelude = gen_noop();
1150
0
    if (curr->op == CALL_JQ) {
1151
0
      int actual_args = 0, desired_args = 0;
1152
      // We expand the argument list as a series of instructions
1153
0
      switch (curr->bound_by->op) {
1154
0
      default: assert(0 && "Unknown function type"); break;
1155
0
      case CLOSURE_CREATE:
1156
0
      case CLOSURE_PARAM: {
1157
0
        block callargs = gen_noop();
1158
0
        for (inst* i; (i = block_take(&curr->arglist));) {
1159
0
          assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
1160
0
          block b = inst_block(i);
1161
0
          switch (i->op) {
1162
0
          default: assert(0 && "Unknown type of parameter"); break;
1163
0
          case CLOSURE_REF:
1164
0
            block_append(&callargs, b);
1165
0
            break;
1166
0
          case CLOSURE_CREATE:
1167
0
            block_append(&prelude, b);
1168
0
            block_append(&callargs, gen_op_bound(CLOSURE_REF, b));
1169
0
            break;
1170
0
          }
1171
0
          actual_args++;
1172
0
        }
1173
0
        curr->imm.intval = actual_args;
1174
0
        curr->arglist = callargs;
1175
1176
0
        if (curr->bound_by->op == CLOSURE_CREATE) {
1177
0
          for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
1178
0
            assert(i->op == CLOSURE_PARAM);
1179
0
            desired_args++;
1180
0
          }
1181
0
        }
1182
0
        break;
1183
0
      }
1184
1185
0
      case CLOSURE_CREATE_C: {
1186
0
        for (inst* i; (i = block_take(&curr->arglist)); ) {
1187
0
          assert(i->op == CLOSURE_CREATE); // FIXME
1188
0
          block body = i->subfn;
1189
0
          i->subfn = gen_noop();
1190
0
          inst_free(i);
1191
          // arguments should be pushed in reverse order, prepend them to prelude
1192
0
          errors += expand_call_arglist(&body, args, env);
1193
0
          prelude = BLOCK(gen_subexp(body), prelude);
1194
0
          actual_args++;
1195
0
        }
1196
0
        assert(curr->op == CALL_JQ);
1197
0
        curr->op = CALL_BUILTIN;
1198
0
        curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */;
1199
0
        assert(curr->bound_by->op == CLOSURE_CREATE_C);
1200
0
        desired_args = curr->bound_by->imm.cfunc->nargs - 1;
1201
0
        assert(!curr->arglist.first);
1202
0
        break;
1203
0
      }
1204
0
      }
1205
1206
0
      assert(actual_args == desired_args); // because now handle this above
1207
0
    }
1208
0
    ret = BLOCK(ret, prelude, inst_block(curr));
1209
0
  }
1210
0
  *b = ret;
1211
0
  return errors;
1212
0
}
1213
1214
0
static int compile(struct bytecode* bc, block b, struct locfile* lf, jv args, jv *env) {
1215
0
  int errors = 0;
1216
0
  int pos = 0;
1217
0
  int var_frame_idx = 0;
1218
0
  bc->nsubfunctions = 0;
1219
0
  errors += expand_call_arglist(&b, args, env);
1220
0
  b = BLOCK(b, gen_op_simple(RET));
1221
0
  jv localnames = jv_array();
1222
0
  for (inst* curr = b.first; curr; curr = curr->next) {
1223
0
    if (!curr->next) assert(curr == b.last);
1224
0
    int length = opcode_describe(curr->op)->length;
1225
0
    if (curr->op == CALL_JQ) {
1226
0
      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1227
0
        length += 2;
1228
0
      }
1229
0
    }
1230
0
    pos += length;
1231
0
    curr->bytecode_pos = pos;
1232
0
    curr->compiled = bc;
1233
1234
0
    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1235
1236
0
    if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) &&
1237
0
        curr->bound_by == curr) {
1238
0
      curr->imm.intval = var_frame_idx++;
1239
0
      localnames = jv_array_append(localnames, jv_string(curr->symbol));
1240
0
    }
1241
1242
0
    if (curr->op == CLOSURE_CREATE) {
1243
0
      assert(curr->bound_by == curr);
1244
0
      curr->imm.intval = bc->nsubfunctions++;
1245
0
    }
1246
0
    if (curr->op == CLOSURE_CREATE_C) {
1247
0
      assert(curr->bound_by == curr);
1248
0
      int idx = bc->globals->ncfunctions++;
1249
0
      bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names,
1250
0
                                                 jv_string(curr->symbol));
1251
0
      bc->globals->cfunctions[idx] = *curr->imm.cfunc;
1252
0
      curr->imm.intval = idx;
1253
0
    }
1254
0
  }
1255
0
  if (pos > 0xFFFF) {
1256
    // too long for program counter to fit in uint16_t
1257
0
    locfile_locate(lf, UNKNOWN_LOCATION,
1258
0
        "function compiled to %d bytes which is too long", pos);
1259
0
    errors++;
1260
0
  }
1261
0
  bc->codelen = pos;
1262
0
  bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames);
1263
0
  if (bc->nsubfunctions && !errors) {
1264
0
    bc->subfunctions = jv_mem_calloc(bc->nsubfunctions, sizeof(struct bytecode*));
1265
0
    for (inst* curr = b.first; curr; curr = curr->next) {
1266
0
      if (curr->op == CLOSURE_CREATE) {
1267
0
        struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode));
1268
0
        bc->subfunctions[curr->imm.intval] = subfn;
1269
0
        subfn->globals = bc->globals;
1270
0
        subfn->parent = bc;
1271
0
        subfn->nclosures = 0;
1272
0
        subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol));
1273
0
        jv params = jv_array();
1274
0
        for (inst* param = curr->arglist.first; param; param = param->next) {
1275
0
          assert(param->op == CLOSURE_PARAM);
1276
0
          assert(param->bound_by == param);
1277
0
          param->imm.intval = subfn->nclosures++;
1278
0
          param->compiled = subfn;
1279
0
          params = jv_array_append(params, jv_string(param->symbol));
1280
0
        }
1281
0
        subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params);
1282
0
        errors += compile(subfn, curr->subfn, lf, args, env);
1283
0
        curr->subfn = gen_noop();
1284
0
      }
1285
0
    }
1286
0
  } else {
1287
0
    bc->nsubfunctions = 0;
1288
0
    bc->subfunctions = 0;
1289
0
  }
1290
0
  uint16_t* code = jv_mem_calloc(bc->codelen, sizeof(uint16_t));
1291
0
  bc->code = code;
1292
0
  pos = 0;
1293
0
  jv constant_pool = jv_array();
1294
0
  int maxvar = -1;
1295
0
  if (!errors) for (inst* curr = b.first; curr; curr = curr->next) {
1296
0
    const struct opcode_description* op = opcode_describe(curr->op);
1297
0
    if (op->length == 0)
1298
0
      continue;
1299
0
    code[pos++] = curr->op;
1300
0
    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
1301
0
    if (curr->op == CALL_BUILTIN) {
1302
0
      assert(curr->bound_by->op == CLOSURE_CREATE_C);
1303
0
      assert(!curr->arglist.first);
1304
0
      code[pos++] = (uint16_t)curr->imm.intval;
1305
0
      code[pos++] = curr->bound_by->imm.intval;
1306
0
    } else if (curr->op == CALL_JQ) {
1307
0
      assert(curr->bound_by->op == CLOSURE_CREATE ||
1308
0
             curr->bound_by->op == CLOSURE_PARAM);
1309
0
      code[pos++] = (uint16_t)curr->imm.intval;
1310
0
      code[pos++] = nesting_level(bc, curr->bound_by);
1311
0
      code[pos++] = curr->bound_by->imm.intval |
1312
0
        (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0);
1313
0
      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
1314
0
        assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE);
1315
0
        code[pos++] = nesting_level(bc, arg->bound_by);
1316
0
        code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE;
1317
0
      }
1318
0
    } else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) {
1319
      // STORE_GLOBAL: constant global, basically
1320
0
      code[pos++] = jv_array_length(jv_copy(constant_pool));
1321
0
      constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1322
0
      code[pos++] = nesting_level(bc, curr->bound_by);
1323
0
      uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1324
0
      code[pos++] = var;
1325
0
      if (var > maxvar) maxvar = var;
1326
0
    } else if (op->flags & OP_HAS_CONSTANT) {
1327
0
      code[pos++] = jv_array_length(jv_copy(constant_pool));
1328
0
      constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
1329
0
    } else if (op->flags & OP_HAS_VARIABLE) {
1330
0
      code[pos++] = nesting_level(bc, curr->bound_by);
1331
0
      uint16_t var = (uint16_t)curr->bound_by->imm.intval;
1332
0
      code[pos++] = var;
1333
0
      if (var > maxvar) maxvar = var;
1334
0
    } else if (op->flags & OP_HAS_BRANCH) {
1335
0
      assert(curr->imm.target->bytecode_pos != -1);
1336
0
      assert(curr->imm.target->bytecode_pos > pos); // only forward branches
1337
0
      code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
1338
0
      pos++;
1339
0
    } else if (op->length > 1) {
1340
0
      assert(0 && "codegen not implemented for this operation");
1341
0
    }
1342
0
  }
1343
0
  bc->constants = constant_pool;
1344
0
  bc->nlocals = maxvar + 2; // FIXME: frames of size zero?
1345
0
  block_free(b);
1346
0
  return errors;
1347
0
}
1348
1349
0
int block_compile(block b, struct bytecode** out, struct locfile* lf, jv args) {
1350
0
  struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode));
1351
0
  bc->parent = 0;
1352
0
  bc->nclosures = 0;
1353
0
  bc->globals = jv_mem_alloc(sizeof(struct symbol_table));
1354
0
  int ncfunc = count_cfunctions(b);
1355
0
  bc->globals->ncfunctions = 0;
1356
0
  bc->globals->cfunctions = jv_mem_calloc(MAX(ncfunc, 1), sizeof(struct cfunction));
1357
0
  bc->globals->cfunc_names = jv_array();
1358
0
  bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null());
1359
0
  jv env = jv_invalid();
1360
0
  int nerrors = compile(bc, b, lf, args, &env);
1361
0
  jv_free(args);
1362
0
  jv_free(env);
1363
0
  assert(bc->globals->ncfunctions == ncfunc);
1364
0
  if (nerrors > 0) {
1365
0
    bytecode_free(bc);
1366
0
    *out = 0;
1367
0
  } else {
1368
0
    *out = bc;
1369
0
  }
1370
0
  return nerrors;
1371
0
}
1372
1373
0
void block_free(block b) {
1374
0
  struct inst* next;
1375
0
  for (struct inst* curr = b.first; curr; curr = next) {
1376
0
    next = curr->next;
1377
0
    inst_free(curr);
1378
0
  }
1379
0
}