Coverage Report

Created: 2023-09-23 08:17

/src/augeas/src/jmt.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * jmt.c: Earley parser for lenses based on Jim/Mandelbaum transducers
3
 *
4
 * Copyright (C) 2009-2016 David Lutterkort
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
19
 *
20
 * Author: David Lutterkort <lutter@redhat.com>
21
 */
22
23
#include <config.h>
24
25
#include "jmt.h"
26
#include "internal.h"
27
#include "memory.h"
28
#include "errcode.h"
29
30
/* This is an implementation of the Earley parser described in the paper
31
 * "Efficient Earley Parsing with Regular Right-hand Sides" by Trever Jim
32
 * and Yitzhak Mandelbaum.
33
 *
34
 * Since we only deal in lenses, they are both terminals and nonterminals:
35
 * recursive lenses (those with lens->recursive set) are nonterminals, and
36
 * non-recursive lenses are terminals, with the exception of non-recursive
37
 * lenses that match the empty word. Lenses are also the semantic actions
38
 * attached to a SCAN or COMPLETE during recosntruction of the parse
39
 * tree. Our SCAN makes sure that we only ever scan nonempty words - that
40
 * means that for nonrecursive lenses which match the empty word (e.g., del
41
 * [ \t]* "") we need to make sure we get a COMPLETE when the lens matched
42
 * the empty word.
43
 *
44
 * That is achieved by treating such a lens t as the construct (t|T) with
45
 * T := eps.
46
 */
47
48
/*
49
 * Data structures for the Jim/Mandelbaum parser
50
 */
51
52
0
#define IND_MAX UINT32_MAX
53
54
struct array {
55
    size_t elem_size;
56
    ind_t used;
57
    ind_t size;
58
    void *data;
59
};
60
61
#define array_elem(arr, ind, typ)                               \
62
0
    (((typ *) (arr).data) + ind)
63
64
#define array_for_each(i, arr)                                  \
65
0
    for(ind_t i = 0; i < (arr).used; i++)
66
67
#define array_each_elem(elt, arr, typ)                          \
68
0
    for(typ *elt = (typ *) (arr).data + 0;                      \
69
0
        elt - (typ *) (arr).data < (arr).used;                  \
70
0
        elt++)
71
72
0
static void array_init(struct array *arr, size_t elem_size) {
73
0
    MEMZERO(arr, 1);
74
0
    arr->elem_size = elem_size;
75
0
}
76
77
ATTRIBUTE_RETURN_CHECK
78
0
static int array_add(struct array *arr, ind_t *ind) {
79
0
    if (arr->used >= arr->size) {
80
0
        int r;
81
0
        ind_t expand = arr->size;
82
0
        if (expand < 8)
83
0
            expand = 8;
84
0
        r = mem_realloc_n(&(arr->data), arr->elem_size, arr->size + expand);
85
0
        if (r < 0)
86
0
            return -1;
87
0
        memset((char *) arr->data + arr->elem_size*arr->size, 0,
88
0
               arr->elem_size * expand);
89
0
        arr->size += expand;
90
0
    }
91
0
    *ind = arr->used;
92
0
    arr->used += 1;
93
0
    return 0;
94
0
}
95
96
/* Insert a new entry into the array at index IND. Shift all entries from
97
 * IND on back by one. */
98
ATTRIBUTE_RETURN_CHECK
99
0
static int array_insert(struct array *arr, ind_t ind) {
100
0
    ind_t last;
101
102
0
    if (array_add(arr, &last) < 0)
103
0
        return -1;
104
105
0
    if (ind >= last)
106
0
        return 0;
107
108
0
    memmove((char *) arr->data + arr->elem_size*(ind+1),
109
0
            (char *) arr->data + arr->elem_size*ind,
110
0
            arr->elem_size * (arr->used - ind - 1));
111
0
    memset((char *) arr->data + arr->elem_size*ind, 0, arr->elem_size);
112
0
    return 0;
113
0
}
114
115
0
static void array_release(struct array *arr) {
116
0
    if (arr != NULL) {
117
0
        free(arr->data);
118
0
        arr->used = arr->size = 0;
119
0
    }
120
0
}
121
122
0
static void array_remove(struct array *arr, ind_t ind) {
123
0
    char *data = arr->data;
124
0
    memmove(data + ind * arr->elem_size,
125
0
            data + (ind + 1) * arr->elem_size,
126
0
            (arr->used - ind - 1) * arr->elem_size);
127
0
    arr->used -= 1;
128
0
}
129
130
ATTRIBUTE_RETURN_CHECK
131
0
static int array_join(struct array *dst, struct array *src) {
132
0
    int r;
133
134
0
    if (dst->elem_size != src->elem_size)
135
0
        return -1;
136
137
0
    r = mem_realloc_n(&(dst->data), dst->elem_size,
138
0
                      dst->used + src->used);
139
0
    if (r < 0)
140
0
        return -1;
141
142
0
    memcpy(((char *) dst->data) + dst->used * dst->elem_size,
143
0
           src->data,
144
0
           src->used * src->elem_size);
145
0
    dst->used += src->used;
146
0
    dst->size = dst->used;
147
0
    return 0;
148
0
}
149
150
/* Special lens indices - these don't reference lenses, but
151
 * some pseudo actions. We stash them at the top of the range
152
 * of IND_T, with LENS_MAX giving us the maximal lens index for
153
 * a real lens
154
 */
155
enum trans_op {
156
    EPS = IND_MAX,
157
    CALL = EPS - 1,
158
    LENS_MAX = CALL - 1
159
};
160
161
struct trans {
162
    struct state *to;
163
    ind_t lens;
164
};
165
166
struct state {
167
    struct state *next;      /* Linked list for memory management */
168
    struct array trans;      /* Array of struct trans */
169
    ind_t  nret;             /* Number of returned lenses */
170
    ind_t  *ret;             /* The returned lenses */
171
    ind_t  num;              /* Counter for num of new states */
172
    unsigned int reachable : 1;
173
    unsigned int live : 1;
174
};
175
176
/* Sets of states used in determinizing the NFA to a DFA */
177
struct nfa_state {
178
    struct state *state;  /* The new state in the DFA */
179
    struct array     set; /* Set of (struct state *) in the NFA, sorted */
180
};
181
182
/* For recursive lenses (nonterminals), the mapping of nonterminal to
183
 * state. We also store nonrecursive lenses here; in that case, the state
184
 * will be NULL */
185
struct jmt_lens {
186
    struct lens  *lens;
187
    struct state *state;
188
};
189
190
/* A Jim/Mandelbaum transducer */
191
struct jmt {
192
    struct error *error;
193
    struct array lenses;       /* Array of struct jmt_lens */
194
    struct state *start;
195
    ind_t  lens;               /* The start symbol of the grammar */
196
    ind_t  state_count;
197
};
198
199
enum item_reason {
200
    R_ROOT = 1,
201
    R_COMPLETE = R_ROOT << 1,
202
    R_PREDICT = R_COMPLETE << 1,
203
    R_SCAN = R_PREDICT << 1
204
};
205
206
/* The reason an item was added for parse reconstruction; can be R_SCAN,
207
 * R_COMPLETE, R_PREDICT or R_COMPLETE|R_PREDICT */
208
struct link {
209
    enum item_reason reason;
210
    ind_t            lens;       /* R_COMPLETE, R_SCAN */
211
    ind_t            from_set;   /* R_COMPLETE, R_PREDICT, R_SCAN */
212
    ind_t            from_item;  /* R_COMPLETE, R_PREDICT, R_SCAN */
213
    ind_t            to_item;    /* R_COMPLETE */
214
    ind_t            caller;     /* number of a state */
215
};
216
217
struct item {
218
    /* The 'classical' Earley item (state, parent) */
219
    struct state    *state;
220
    ind_t            parent;
221
    /* Backlinks to why item was added */
222
    ind_t            nlinks;
223
    struct link     *links;
224
};
225
226
struct item_set {
227
    struct array items;
228
};
229
230
struct jmt_parse {
231
    struct jmt       *jmt;
232
    struct error     *error;
233
    const char       *text;
234
    ind_t             nsets;
235
    struct item_set **sets;
236
};
237
238
#define for_each_item(it, set)                                  \
239
0
    array_each_elem(it, (set)->items, struct item)
240
241
#define for_each_trans(t, s)                                    \
242
0
    array_each_elem(t, (s)->trans, struct trans)
243
244
#define parse_state(parse, ind)                                 \
245
    array_elem(parse->states, ind, struct state)
246
247
static struct item *set_item(struct jmt_parse *parse, ind_t set,
248
0
                                 ind_t item) {
249
0
    ensure(parse->sets[set] != NULL, parse);
250
0
    ensure(item < parse->sets[set]->items.used, parse);
251
0
    return array_elem(parse->sets[set]->items, item, struct item);
252
0
 error:
253
0
    return NULL;
254
0
}
255
256
static struct state *item_state(struct jmt_parse *parse, ind_t set,
257
0
                                    ind_t item) {
258
0
    return set_item(parse, set, item)->state;
259
0
}
260
261
0
static struct trans *state_trans(struct state *state, ind_t t) {
262
0
    return array_elem(state->trans, t, struct trans);
263
0
}
264
265
0
static ind_t item_parent(struct jmt_parse *parse, ind_t set, ind_t item) {
266
0
    return set_item(parse, set, item)->parent;
267
0
}
268
269
0
static bool is_return(const struct state *s) {
270
0
    return s->nret > 0;
271
0
}
272
273
0
static struct lens *lens_of_parse(struct jmt_parse *parse, ind_t lens) {
274
0
    return array_elem(parse->jmt->lenses, lens, struct jmt_lens)->lens;
275
0
}
276
277
/*
278
 * The parser
279
 */
280
281
/*
282
 * Manipulate the Earley graph. We denote edges in the graph as
283
 *   [j, (s,i)] -> [k, item_k] => [l, item_l]
284
 * to indicate that we are adding item (s,i) to E_j and record that its
285
 * child is the item with index item_k in E_k and its sibling is item
286
 * item_l in E_l.
287
 */
288
289
/* Add item (s, k) to E_j. Note that the item was caused by action reason
290
 * using lens starting at from_item in E_{from_set}
291
 *
292
 * [j, (s,k)] -> [from_set, from_item] => [j, to_item]
293
 */
294
static ind_t parse_add_item(struct jmt_parse *parse, ind_t j,
295
                            struct state *s, ind_t k,
296
                            enum item_reason reason, ind_t lens,
297
                            ind_t from_set,
298
                            ind_t from_item, ind_t to_item,
299
0
                            ind_t caller) {
300
301
0
    int r;
302
0
    struct item_set *set = parse->sets[j];
303
0
    struct item *item = NULL;
304
0
    ind_t result = IND_MAX;
305
306
0
    ensure(from_item == EPS || from_item < parse->sets[from_set]->items.used,
307
0
           parse);
308
0
    ensure(to_item == EPS || to_item < parse->sets[j]->items.used,
309
0
           parse);
310
311
0
    if (set == NULL) {
312
0
        r = ALLOC(parse->sets[j]);
313
0
        ERR_NOMEM(r < 0, parse);
314
0
        array_init(&parse->sets[j]->items, sizeof(struct item));
315
0
        set = parse->sets[j];
316
0
    }
317
318
0
    for (ind_t i=0; i < set->items.used; i++) {
319
0
        if (item_state(parse, j, i) == s
320
0
            && item_parent(parse, j, i) == k) {
321
0
            result = i;
322
0
            item = set_item(parse, j, i);
323
0
            break;
324
0
        }
325
0
    }
326
327
0
    if (result == IND_MAX) {
328
0
        r = array_add(&set->items, &result);
329
0
        ERR_NOMEM(r < 0, parse);
330
331
0
        item = set_item(parse, j, result);
332
0
        item->state = s;
333
0
        item->parent = k;
334
0
    }
335
336
0
    for (ind_t i = 0; i < item->nlinks; i++) {
337
0
        struct link *lnk = item->links + i;
338
0
        if (lnk->reason == reason && lnk->lens == lens
339
0
            && lnk->from_set == from_set && lnk->from_item == from_item
340
0
            && lnk->to_item == to_item && lnk->caller == caller)
341
0
            return result;
342
0
    }
343
344
0
    r = REALLOC_N(item->links, item->nlinks + 1);
345
0
    ERR_NOMEM(r < 0, parse);
346
347
0
    struct link *lnk = item->links+ item->nlinks;
348
0
    item->nlinks += 1;
349
350
0
    lnk->reason = reason;
351
0
    lnk->lens = lens;
352
0
    lnk->from_set = from_set;
353
0
    lnk->from_item = from_item;
354
0
    lnk->to_item = to_item;
355
0
    lnk->caller = caller;
356
0
 error:
357
0
    return result;
358
0
}
359
360
/* Add item (s, i) to set E_j and record that it was added because
361
 * a parse of nonterminal lens, starting at itemk in E_k, was completed
362
 * by item item in E_j
363
 *
364
 * [j, (s,i)] -> [j, item] => [k, itemk]
365
 */
366
static void parse_add_complete(struct jmt_parse *parse, ind_t j,
367
                               struct state *s, ind_t i,
368
                               ind_t k, ind_t itemk,
369
                               ind_t lens,
370
0
                               ind_t item) {
371
0
    parse_add_item(parse, j, s, i, R_COMPLETE, lens, k, itemk, item, IND_MAX);
372
0
}
373
374
/* Same as parse_add_complete, but mark the item also as a predict
375
 *
376
 * [j, (s,i)] -> [j, item] => [k, itemk]
377
 */
378
static void parse_add_predict_complete(struct jmt_parse *parse, ind_t j,
379
                                       struct state *s, ind_t i,
380
                                       ind_t k, ind_t itemk,
381
                                       ind_t lens,
382
0
                                       ind_t item, ind_t caller) {
383
0
    parse_add_item(parse, j, s, i, R_COMPLETE|R_PREDICT,
384
0
                   lens, k, itemk, item, caller);
385
0
}
386
387
/* Add item (s, j) to E_j and record that it was added because of a
388
 * prediction from item from in E_j
389
 */
390
static ind_t parse_add_predict(struct jmt_parse *parse, ind_t j,
391
0
                               struct state *s, ind_t from) {
392
393
0
    ensure(from < parse->sets[j]->items.used, parse);
394
0
    struct state *t = item_state(parse, j, from);
395
396
0
    return parse_add_item(parse, j, s, j, R_PREDICT, EPS, j, from, EPS,
397
0
                          t->num);
398
0
 error:
399
0
    return IND_MAX;
400
0
}
401
402
/* Add item (s,i) to E_j and record that it was added because of scanning
403
 * with lens starting from item item in E_k.
404
 *
405
 * [j, (s,i)] -> [k, item]
406
 */
407
static void parse_add_scan(struct jmt_parse *parse, ind_t j,
408
                           struct state *s, ind_t i,
409
0
                           ind_t lens, ind_t k, ind_t item) {
410
0
    ensure(item < parse->sets[k]->items.used, parse);
411
412
0
    parse_add_item(parse, j, s, i, R_SCAN, lens, k, item, EPS, IND_MAX);
413
0
 error:
414
0
    return;
415
0
}
416
417
ATTRIBUTE_PURE
418
0
static bool is_complete(const struct link *lnk) {
419
0
    return lnk->reason & R_COMPLETE;
420
0
}
421
422
ATTRIBUTE_PURE
423
0
static bool is_predict(const struct link *lnk) {
424
0
    return lnk->reason & R_PREDICT;
425
0
}
426
427
ATTRIBUTE_PURE
428
0
static bool is_scan(const struct link *lnk) {
429
0
    return lnk->reason & R_SCAN;
430
0
}
431
432
ATTRIBUTE_PURE
433
0
static bool is_last_sibling(const struct link *lnk) {
434
0
    if (is_complete(lnk))
435
0
        return false;
436
0
    return lnk->reason & (R_PREDICT|R_ROOT);
437
0
}
438
439
ATTRIBUTE_PURE
440
0
static bool returns(const struct state *s, ind_t l) {
441
0
    for (ind_t i = 0; i < s->nret; i++)
442
0
        if (s->ret[i] == l)
443
0
            return true;
444
0
    return false;
445
0
}
446
447
0
static void state_add_return(struct jmt *jmt, struct state *s, ind_t l) {
448
0
    int r;
449
450
0
    if (s == NULL || returns(s, l))
451
0
        return;
452
453
0
    r = REALLOC_N(s->ret, s->nret + 1);
454
0
    ERR_NOMEM(r < 0, jmt);
455
0
    s->ret[s->nret] = l;
456
0
    s->nret += 1;
457
0
 error:
458
0
    return;
459
0
}
460
461
static void state_merge_returns(struct jmt *jmt, struct state *dst,
462
0
                                const struct state *src) {
463
0
    for (ind_t l = 0; l < src->nret; l++)
464
0
        state_add_return(jmt, dst, src->ret[l]);
465
0
}
466
467
static void nncomplete(struct jmt_parse *parse, ind_t j,
468
0
                       struct state *t, ind_t k, ind_t item) {
469
470
0
    for (ind_t itemk = 0; itemk < parse->sets[k]->items.used; itemk++) {
471
0
        struct state *u = item_state(parse, k, itemk);
472
0
        for_each_trans(y, u) {
473
0
            if (returns(t, y->lens)) {
474
0
                ind_t parent = item_parent(parse, k, itemk);
475
0
                parse_add_complete(parse, j,
476
0
                                   y->to, parent,
477
0
                                   k, itemk, y->lens, item);
478
0
            }
479
0
        }
480
0
    }
481
0
}
482
483
/* NCALLER for (t, i) in E_j, which has index item in E_j, and t -> s a
484
 * call in the transducer and s a return. The item (s,j) has index pred in
485
 * E_j
486
 */
487
static void ncaller(struct jmt_parse *parse, ind_t j, ind_t item,
488
0
                    struct state *t, ind_t i, struct state *s, ind_t pred) {
489
0
    for_each_trans(u, t) {
490
0
        if (returns(s, u->lens)) {
491
            /* [j, (u->to, i)] -> [j, pred] => [j, item] */
492
0
            parse_add_predict_complete(parse, j,
493
0
                                       u->to, i,
494
0
                                       j, item, u->lens,
495
0
                                       pred, t->num);
496
0
        }
497
0
    }
498
0
}
499
500
/* NCALLEE for (t, parent) in E_j, which has index item in E_j, and t -> s
501
 * a call in the transducer and s a return. The item (s,j) has index pred
502
 * in E_j
503
 */
504
static void ncallee(struct jmt_parse *parse, ind_t j, ATTRIBUTE_UNUSED ind_t item,
505
0
                    ATTRIBUTE_UNUSED struct state *t, ATTRIBUTE_UNUSED ind_t parent, struct state *s, ind_t pred) {
506
0
    for_each_trans(u, s) {
507
0
        if (returns(s, u->lens)) {
508
            /* [j, (u->to, j)] -> [j, item] => [j, item] */
509
0
            parse_add_predict_complete(parse, j,
510
0
                                       u->to, j,
511
0
                                       j, pred, u->lens,
512
0
                                       pred, t->num);
513
0
        }
514
0
    }
515
0
}
516
517
static struct jmt_parse *parse_init(struct jmt *jmt,
518
0
                                    const char *text, size_t text_len) {
519
0
    int r;
520
0
    struct jmt_parse *parse;
521
522
0
    r = ALLOC(parse);
523
0
    ERR_NOMEM(r < 0, jmt);
524
525
0
    parse->jmt = jmt;
526
0
    parse->error = jmt->error;
527
0
    parse->text = text;
528
0
    parse->nsets = text_len + 1;
529
0
    r = ALLOC_N(parse->sets, parse->nsets);
530
0
    ERR_NOMEM(r < 0, jmt);
531
0
    return parse;
532
0
 error:
533
0
    if (parse != NULL)
534
0
        free(parse->sets);
535
0
    free(parse);
536
0
    return NULL;
537
0
}
538
539
0
void jmt_free_parse(struct jmt_parse *parse) {
540
0
    if (parse == NULL)
541
0
        return;
542
0
    for (int i=0; i < parse->nsets; i++) {
543
0
        struct item_set *set = parse->sets[i];
544
0
        if (set != NULL) {
545
0
            array_each_elem(x, set->items, struct item)
546
0
                free(x->links);
547
0
            array_release(&set->items);
548
0
            free(set);
549
0
        }
550
0
    }
551
0
    free(parse->sets);
552
0
    free(parse);
553
0
}
554
555
static struct state *lens_state(struct jmt *jmt, ind_t l);
556
557
0
static void flens(FILE *fp, ind_t l) {
558
0
    if (l == 0)
559
0
        fprintf(fp, "%c", 'S');
560
0
    else if (l < 'S' - 'A')
561
0
        fprintf(fp, "%c", 'A' + l - 1);
562
0
    else if (l <= 'Z' - 'A')
563
0
        fprintf(fp, "%c", 'A' + l);
564
0
    else
565
0
        fprintf(fp, "%u", l);
566
0
}
567
568
static void parse_dot_link(FILE *fp, struct jmt_parse *parse,
569
0
                           ind_t k, struct item *x, struct link *lnk) {
570
0
    char *lens_label = NULL;
571
0
    if (is_complete(lnk) || is_scan(lnk)) {
572
0
        struct state *sA = lens_state(parse->jmt, lnk->lens);
573
0
        int r;
574
575
0
        if (sA == NULL)
576
0
            r = xasprintf(&lens_label, "<%d>", lnk->lens);
577
0
        else
578
0
            r = xasprintf(&lens_label, "%d", lnk->lens);
579
0
        if (r < 0) {
580
0
            fprintf(fp, "// Internal error generating lens_label\n");
581
0
            return;
582
0
        }
583
0
    }
584
0
    fprintf(fp, "    n%d_%d_%d [ label = \"(%d, %d)\"];\n",
585
0
            k, x->state->num, x->parent, x->state->num, x->parent);
586
0
    if (is_complete(lnk)) {
587
0
        struct item *y = set_item(parse, k, lnk->to_item);
588
0
        const char *pred = is_predict(lnk) ? "p" : "";
589
0
        fprintf(fp, "    n%d_%d_%d -> n%s%d_%d_%d [ style = dashed ];\n",
590
0
                k, x->state->num, x->parent,
591
0
                pred, k, y->state->num, y->parent);
592
0
        if (is_predict(lnk)) {
593
0
            fprintf(fp, "    n%s%d_%d_%d [ label = \"\" ];\n",
594
0
                    pred, k, y->state->num, y->parent);
595
0
            fprintf(fp, "    n%s%d_%d_%d -> n%d_%d_%d [ style = bold ];\n",
596
0
                    pred, k, y->state->num, y->parent,
597
0
                    k, y->state->num, y->parent);
598
0
        }
599
0
        y = set_item(parse, lnk->from_set, lnk->from_item);
600
0
        fprintf(fp,
601
0
                "    n%d_%d_%d -> n%d_%d_%d [ style = dashed, label = \"",
602
0
                k, x->state->num, x->parent,
603
0
                lnk->from_set, y->state->num, y->parent);
604
0
        flens(fp, lnk->lens);
605
0
        fprintf(fp, "\" ];\n");
606
0
    } else if (is_scan(lnk)) {
607
0
        struct item *y =
608
0
            set_item(parse, lnk->from_set, lnk->from_item);
609
0
        fprintf(fp,
610
0
                "    n%d_%d_%d -> n%d_%d_%d [ label = \"",
611
0
                k, x->state->num, x->parent,
612
0
                lnk->from_set, y->state->num, y->parent);
613
0
        for (ind_t i=lnk->from_set; i < k; i++)
614
0
            fprintf(fp, "%c", parse->text[i]);
615
0
        fprintf(fp, "\" ];\n");
616
617
0
    } else if (is_predict(lnk)) {
618
0
        struct item *y =
619
0
            set_item(parse, lnk->from_set, lnk->from_item);
620
0
        fprintf(fp,
621
0
                "    n%d_%d_%d -> n%d_%d_%d [ style = bold ];\n",
622
0
                k, x->state->num, x->parent,
623
0
                lnk->from_set, y->state->num, y->parent);
624
625
0
    }
626
0
    free(lens_label);
627
0
}
628
629
0
static void parse_dot(struct jmt_parse *parse, const char *fname) {
630
0
    FILE *fp = debug_fopen("%s", fname);
631
0
    if (fp == NULL)
632
0
        return;
633
634
0
    fprintf(fp, "digraph \"jmt_parse\" {\n");
635
0
    fprintf(fp, "  rankdir = RL;\n");
636
0
    for (int k=0; k < parse->nsets; k++) {
637
0
        struct item_set *set = parse->sets[k];
638
639
0
        if (set == NULL)
640
0
            continue;
641
642
0
        fprintf(fp, "  subgraph \"cluster_E_%d\" {\n", k);
643
0
        fprintf(fp, "    rankdir=RL;\n    rank=same;\n");
644
0
        fprintf(fp, "    title%d [ label=\"E%d\", shape=plaintext ]\n", k, k);
645
0
        for_each_item(x, set) {
646
0
            for (int i=0; i < x->nlinks; i++) {
647
0
                struct link *lnk = x->links + i;
648
0
                parse_dot_link(fp, parse, k, x, lnk);
649
0
            }
650
0
        }
651
0
        fprintf(fp, "}\n");
652
0
    }
653
0
    fprintf(fp, "}\n");
654
0
    fclose(fp);
655
0
}
656
657
struct jmt_parse *
658
jmt_parse(struct jmt *jmt, const char *text, size_t text_len)
659
0
{
660
0
    struct jmt_parse *parse = NULL;
661
662
0
    parse = parse_init(jmt, text, text_len);
663
0
    ERR_BAIL(jmt);
664
665
    /* INIT */
666
0
    parse_add_item(parse, 0, jmt->start, 0, R_ROOT, EPS, EPS, EPS, EPS,
667
0
                   jmt->lens);
668
    /* NINIT */
669
0
    if (is_return(jmt->start)) {
670
0
        for_each_trans(x, jmt->start) {
671
0
            if (returns(jmt->start, x->lens))
672
0
                parse_add_predict_complete(parse, 0, x->to, 0,
673
0
                                           0, 0, x->lens, 0, 0);
674
0
        }
675
0
    }
676
677
0
    for (int j=0; j <= text_len; j++) {
678
0
        struct item_set *set = parse->sets[j];
679
0
        if (set == NULL)
680
0
            continue;
681
682
0
        for (int item=0; item < set->items.used; item++) {
683
0
            struct state *t = item_state(parse, j, item);
684
0
            ind_t i = item_parent(parse, j, item);
685
686
0
            if (is_return(t) && i != j) {
687
                /* NNCOMPLETE */
688
0
                nncomplete(parse, j, t, i, item);
689
0
            }
690
691
0
            for_each_trans(x, t) {
692
0
                if (x->lens == CALL) {
693
                    /* PREDICT */
694
0
                    ind_t pred = parse_add_predict(parse, j, x->to, item);
695
0
                    ERR_BAIL(parse);
696
0
                    if (is_return(x->to)) {
697
                        /* NCALLER */
698
0
                        ncaller(parse, j, item, t, i, x->to, pred);
699
                        /* NCALLEE */
700
0
                        ncallee(parse, j, item, t, i, x->to, pred);
701
0
                    }
702
0
                } else {
703
0
                    int count;
704
0
                    struct lens *lens = lens_of_parse(parse, x->lens);
705
0
                    struct state *sA = lens_state(parse->jmt, x->lens);
706
0
                    if (! lens->recursive && sA == NULL) {
707
                        /* SCAN, terminal */
708
                        // FIXME: We really need to find every k so that
709
                        // text[j..k] matches lens->ctype, not just one
710
0
                        count = regexp_match(lens->ctype, text, text_len, j, NULL);
711
0
                        if (count > 0) {
712
0
                            parse_add_scan(parse, j+count,
713
0
                                           x->to, i,
714
0
                                           x->lens, j, item);
715
0
                        }
716
0
                    }
717
0
                }
718
0
            }
719
0
        }
720
0
    }
721
0
    if (debugging("cf.jmt.parse"))
722
0
        parse_dot(parse, "jmt_parse.dot");
723
0
    return parse;
724
0
 error:
725
0
    jmt_free_parse(parse);
726
0
    return NULL;
727
0
}
728
729
/*
730
 * Reconstruction of the parse tree
731
 */
732
733
static void
734
build_nullable(struct jmt_parse *parse, ind_t pos,
735
0
               struct jmt_visitor *visitor, struct lens *lens, int lvl) {
736
0
    if (! lens->recursive) {
737
0
        if (visitor->terminal != NULL) {
738
0
            (*visitor->terminal)(lens, pos, pos, visitor->data);
739
0
            ERR_BAIL(parse);
740
0
        }
741
0
    } else {
742
0
        if (visitor->enter != NULL) {
743
0
            (*visitor->enter)(lens, pos, pos, visitor->data);
744
0
            ERR_BAIL(parse);
745
0
        }
746
747
0
        switch(lens->tag) {
748
0
        case L_REC:
749
0
            build_nullable(parse, pos, visitor, lens->body, lvl+1);
750
0
            break;
751
0
        case L_CONCAT:
752
0
            for (int i=0; i < lens->nchildren; i++)
753
0
                build_nullable(parse, pos, visitor, lens->children[i], lvl+1);
754
0
            break;
755
0
        case L_UNION:
756
0
            for (int i=0; i < lens->nchildren; i++)
757
0
                if (lens->children[i]->ctype_nullable)
758
0
                    build_nullable(parse, pos, visitor,
759
0
                                   lens->children[i], lvl+1);
760
0
            break;
761
0
        case L_SUBTREE:
762
0
        case L_SQUARE:
763
0
            build_nullable(parse, pos, visitor, lens->child, lvl+1);
764
0
            break;
765
0
        case L_STAR:
766
0
        case L_MAYBE:
767
0
            break;
768
0
        default:
769
0
            BUG_ON(true, parse, "Unexpected lens tag %d", lens->tag);
770
0
        }
771
772
0
        if (visitor->exit != NULL) {
773
0
            (*visitor->exit)(lens, pos, pos, visitor->data);
774
0
            ERR_BAIL(parse);
775
0
        }
776
0
    }
777
0
 error:
778
0
    return;
779
0
}
780
781
static void build_trace(const char *msg, ind_t start, ind_t end,
782
0
                        struct item *x, int lvl) {
783
0
    for (int i=0; i < lvl; i++) putc(' ', stderr);
784
0
    if (x != NULL) {
785
0
        printf("%s %d..%d: (%d, %d) %d %s%s%s\n", msg,
786
0
               start, end, x->state->num,
787
0
               x->parent, x->links->lens,
788
0
               is_complete(x->links) ? "c" : "",
789
0
               is_predict(x->links) ? "p" : "",
790
0
               is_scan(x->links) ? "s" : "");
791
0
    } else {
792
0
        printf("%s %d..%d\n", msg, start, end);
793
0
    }
794
0
}
795
796
0
static int add_sibling(struct array *siblings, ind_t lnk) {
797
0
    int r;
798
0
    ind_t ind;
799
800
0
    r = array_add(siblings, &ind);
801
0
    if (r < 0)
802
0
        return -1;
803
0
    *array_elem(*siblings, ind, ind_t) = lnk;
804
0
    return 0;
805
0
}
806
807
/* Return true if CALLER is a possible caller for the link LNK which starts
808
 * at item X.
809
 *
810
 * FIXME: We can get rid of the caller field on a link if we distinguish
811
 * between NCALLER and NCALLEE in the Earley graph, rather than collapse
812
 * them both into links with reason PREDICT|COMPLETE
813
 */
814
0
static bool is_caller(struct item *x, struct link *lnk, ind_t caller) {
815
0
    if (lnk->reason & R_ROOT)
816
0
        return caller == lnk->caller;
817
818
0
    if (! is_predict(lnk))
819
0
        return false;
820
821
0
    if (is_complete(lnk)) {
822
        /* NCALLER: caller == t
823
         * NCALLEE: caller == s */
824
0
        return caller == lnk->caller;
825
0
    }
826
    /* PREDICT: caller == t || caller == s */
827
0
    return caller == lnk->caller || caller == x->state->num;
828
0
}
829
830
/* Traverse the siblings of x and check that the callee's set of callers
831
 * contains CALLER. When a path ending with a call from CALLER exists,
832
 * record the number of the corresponding link for each item in
833
 * SIBLINGS. The links are recorded in left-to-right order, i.e. the number
834
 * of the first link to follow is in the last entry in SIBLINGS.
835
 *
836
 * Returns 0 if there is a path ending in a callee of CALLER. Return -1 if
837
 * there is none. Return -2 if there are multiple such paths. Return -3 for
838
 * any other error.
839
 */
840
static int filter_siblings(struct jmt_visitor *visitor, struct lens *lens,
841
                          ind_t k, ind_t item, ind_t caller,
842
0
                          struct array *siblings) {
843
0
    struct jmt_parse *parse = visitor->parse;
844
0
    struct item *x = set_item(parse, k, item);
845
0
    ind_t nlast = 0;
846
0
    int r;
847
848
0
    for (ind_t lnk = 0; lnk < x->nlinks; lnk++)
849
0
        if (is_last_sibling(x->links + lnk))
850
0
            nlast += 1;
851
852
0
    if (nlast > 0 && nlast < x->nlinks)
853
0
        goto ambig;
854
855
0
    if (nlast == x->nlinks) {
856
0
        for (ind_t lnk = 0; lnk < x->nlinks; lnk++) {
857
0
            if (is_caller(x, x->links + lnk, caller)) {
858
0
                siblings->used = 0;
859
0
                r = add_sibling(siblings, lnk);
860
0
                if (r < 0) {
861
0
                    ERR_REPORT(parse, AUG_ENOMEM, NULL);
862
0
                    return -3;
863
0
                }
864
0
                return 0;
865
0
            }
866
0
        }
867
0
        return -1;
868
0
    } else {
869
        /* nlast == 0 */
870
0
        ind_t found = IND_MAX;
871
0
        for (ind_t lnk = 0; lnk < x->nlinks; lnk++) {
872
0
            struct link *l = x->links + lnk;
873
0
            r = filter_siblings(visitor, lens,
874
0
                                l->from_set, l->from_item, caller,
875
0
                                siblings);
876
0
            if (r == -1)
877
0
                continue;
878
0
            if (r == 0) {
879
0
                if (found != IND_MAX)
880
0
                    goto ambig;
881
0
                else
882
0
                    found = lnk;
883
0
            } else {
884
0
                return r;
885
0
            }
886
0
        }
887
0
        if (found == IND_MAX) {
888
0
            return -1;
889
0
        } else {
890
0
            r = add_sibling(siblings, found);
891
0
            if (r < 0) {
892
0
                ERR_REPORT(parse, AUG_ENOMEM, NULL);
893
0
                return -3;
894
0
            }
895
0
            return 0;
896
0
        }
897
0
    }
898
0
 ambig:
899
0
    (*visitor->error)(lens, visitor->data, k,
900
0
                      "Ambiguous parse: %d links in state (%d, %d) in E_%d",
901
0
                      x->nlinks, x->state->num, x->parent, k);
902
0
    return -2;
903
0
}
904
905
static void visit_enter(struct jmt_visitor *visitor, struct lens *lens,
906
                        size_t start, size_t end,
907
0
                        struct item *x, int lvl) {
908
0
    if (debugging("cf.jmt.visit"))
909
0
        build_trace("{", start, end, x, lvl);
910
0
    if (visitor->enter != NULL)
911
0
        (*visitor->enter)(lens, start, end, visitor->data);
912
0
}
913
914
static void visit_exit(struct jmt_visitor *visitor, struct lens *lens,
915
                       size_t start, size_t end,
916
0
                       struct item *x, int lvl) {
917
0
    if (debugging("cf.jmt.visit"))
918
0
        build_trace("}", start, end, x, lvl);
919
0
    if (visitor->exit != NULL)
920
0
        (*visitor->exit)(lens, start, end, visitor->data);
921
0
}
922
923
static int
924
build_children(struct jmt_parse *parse, ind_t k, ind_t item,
925
               struct jmt_visitor *visitor, int lvl, ind_t caller);
926
927
static int
928
build_tree(struct jmt_parse *parse, ind_t k, ind_t item, struct lens *lens,
929
0
           struct jmt_visitor *visitor, int lvl) {
930
0
    struct item *x = set_item(parse, k, item);
931
0
    ind_t start = x->links->from_set;
932
0
    ind_t end = k;
933
0
    struct item *old_x = x;
934
935
0
    if (start == end) {
936
        /* This completion corresponds to a nullable nonterminal
937
         * that match epsilon. Reconstruct the full parse tree
938
         * for matching epsilon */
939
0
        if (debugging("cf.jmt.visit"))
940
0
            build_trace("N", x->links->from_set, k, x, lvl);
941
0
        build_nullable(parse, start, visitor, lens, lvl);
942
0
        return end;
943
0
    }
944
945
0
    ensure(is_complete(x->links), parse);
946
947
0
    visit_enter(visitor, lens, start, end, x, lvl);
948
0
    ERR_BAIL(parse);
949
950
    /* x is a completion item. (k, x->to_item) is its first child in the
951
     * parse tree. */
952
0
    if (! is_predict(x->links)) {
953
0
        struct link *lnk = x->links;
954
0
        struct item *sib = set_item(parse, lnk->from_set, lnk->from_item);
955
0
        ind_t caller = sib->state->num;
956
957
0
        item = lnk->to_item;
958
0
        x = set_item(parse, k, item);
959
0
        build_children(parse, k, item, visitor, lvl, caller);
960
0
        ERR_BAIL(parse);
961
0
    }
962
963
0
    visit_exit(visitor, lens, start, end, old_x, lvl);
964
0
    ERR_BAIL(parse);
965
0
 error:
966
0
    return end;
967
0
}
968
969
static int
970
build_children(struct jmt_parse *parse, ind_t k, ind_t item,
971
0
               struct jmt_visitor *visitor, int lvl, ind_t caller) {
972
0
    struct item *x = set_item(parse, k, item);
973
0
    struct lens *lens = lens_of_parse(parse, x->links->lens);
974
0
    struct array siblings;
975
0
    ind_t end = k;
976
0
    int r;
977
978
0
    array_init(&siblings, sizeof(ind_t));
979
0
    r = filter_siblings(visitor, lens, k, item, caller, &siblings);
980
0
    if (r < 0)
981
0
        goto error;
982
983
    /* x the first item in a list of siblings; visit items (x->from_set,
984
     * x->from_item) in order, which will visit x and its siblings in the
985
     * parse tree from right to left */
986
0
    for (ind_t i = siblings.used - 1; i > 0; i--) {
987
0
        ind_t lnk = *array_elem(siblings, i, ind_t);
988
0
        struct lens *sub = lens_of_parse(parse, x->links[lnk].lens);
989
0
        if (sub->recursive) {
990
0
            build_tree(parse, k, item, sub, visitor, lvl+1);
991
0
            ERR_BAIL(parse);
992
0
        } else {
993
0
            if (debugging("cf.jmt.visit"))
994
0
                build_trace("T", x->links->from_set, k, x, lvl+1);
995
0
            if (visitor->terminal != NULL) {
996
0
                (*visitor->terminal)(sub,
997
0
                                     x->links->from_set, k, visitor->data);
998
0
                ERR_BAIL(parse);
999
0
            }
1000
0
        }
1001
0
        k = x->links[lnk].from_set;
1002
0
        item = x->links[lnk].from_item;
1003
0
        x = set_item(parse, k, item);
1004
0
    }
1005
0
 error:
1006
0
    array_release(&siblings);
1007
0
    return end;
1008
0
}
1009
1010
0
int jmt_visit(struct jmt_visitor *visitor, size_t *len) {
1011
0
    struct jmt_parse *parse = visitor->parse;
1012
0
    ind_t k = parse->nsets - 1;     /* Current Earley set */
1013
0
    ind_t item;
1014
0
    struct item_set *set = parse->sets[k];
1015
1016
0
    if (set == NULL)
1017
0
        goto noparse;
1018
1019
0
    for (item = 0; item < set->items.used; item++) {
1020
0
        struct item *x = set_item(parse, k, item);
1021
0
        if (x->parent == 0 && returns(x->state, parse->jmt->lens)) {
1022
0
            for (ind_t i = 0; i < x->nlinks; i++) {
1023
0
                if (is_complete(x->links + i) || is_scan(x->links + i)) {
1024
0
                    if (debugging("cf.jmt.visit"))
1025
0
                        printf("visit: found (%d, %d) in E_%d\n",
1026
0
                               x->state->num, x->parent, k);
1027
0
                    goto found;
1028
0
                }
1029
0
            }
1030
0
        }
1031
0
    }
1032
0
 found:
1033
0
    if (item >= parse->sets[k]->items.used)
1034
0
        goto noparse;
1035
0
    struct lens *lens = lens_of_parse(parse, parse->jmt->lens);
1036
1037
0
    visit_enter(visitor, lens, 0, k, NULL, 0);
1038
0
    ERR_BAIL(parse);
1039
1040
0
    *len = build_children(parse, k, item, visitor, 0,
1041
0
                          parse->jmt->start->num);
1042
0
    ERR_BAIL(parse);
1043
1044
0
    visit_exit(visitor, lens, 0, k, NULL, 0);
1045
0
    ERR_BAIL(parse);
1046
0
    return 1;
1047
0
 error:
1048
0
    return -1;
1049
0
 noparse:
1050
0
    for (; k > 0; k--)
1051
0
        if (parse->sets[k] != NULL) break;
1052
0
    *len = k;
1053
0
    return 0;
1054
0
}
1055
1056
/*
1057
 * Build the automaton
1058
 */
1059
1060
1061
0
static struct state *make_state(struct jmt *jmt) {
1062
0
    struct state *s;
1063
0
    int r;
1064
1065
0
    r = ALLOC(s);
1066
0
    ERR_NOMEM(r < 0, jmt);
1067
0
    s->num = jmt->state_count++;
1068
0
    array_init(&s->trans, sizeof(struct trans));
1069
0
    if (jmt->start != NULL)
1070
0
        list_cons(jmt->start->next, s);
1071
0
    else
1072
0
        jmt->start = s;
1073
0
    return s;
1074
0
 error:
1075
0
    return NULL;
1076
0
}
1077
1078
0
static ind_t add_lens(struct jmt *jmt, struct lens *lens) {
1079
0
    int r;
1080
0
    ind_t l;
1081
0
    struct state *sA = NULL;
1082
0
    int nullable = 0;
1083
1084
0
    r = array_add(&jmt->lenses, &l);
1085
0
    ERR_NOMEM(r < 0, jmt);
1086
0
    ERR_NOMEM(l == IND_MAX, jmt);
1087
1088
0
    if (! lens->recursive)
1089
0
        nullable = regexp_matches_empty(lens->ctype);
1090
1091
0
    array_elem(jmt->lenses, l, struct jmt_lens)->lens = lens;
1092
    /* A nonrecursive lens that matches epsilon is both a terminal
1093
     * and a nonterminal */
1094
0
    if (lens->recursive || nullable) {
1095
0
        sA = make_state(jmt);
1096
0
        ERR_NOMEM(sA == NULL, jmt);
1097
0
        array_elem(jmt->lenses, l, struct jmt_lens)->state = sA;
1098
0
        if (! lens->recursive) {
1099
            /* Add lens again, so that l refers to the nonterminal T
1100
             * for the lens, and l+1 refers to the terminal t for it */
1101
0
            ind_t m;
1102
0
            r = array_add(&jmt->lenses, &m);
1103
0
            ERR_NOMEM(r < 0, jmt);
1104
0
            ERR_NOMEM(m == IND_MAX, jmt);
1105
1106
0
            array_elem(jmt->lenses, m, struct jmt_lens)->lens = lens;
1107
0
        }
1108
0
    }
1109
1110
0
    if (debugging("cf.jmt")) {
1111
0
        if (sA == NULL) {
1112
0
            char *s = format_lens(lens);
1113
0
            printf("add_lens: ");
1114
0
            print_regexp(stdout, lens->ctype);
1115
0
            printf(" %s\n", s);
1116
0
            free(s);
1117
0
        } else {
1118
0
            char *s = format_lens(lens);
1119
0
            printf("add_lens: ");
1120
0
            flens(stdout, l);
1121
0
            printf(" %u %s\n", sA->num, s);
1122
0
            if (nullable) {
1123
0
                printf("add_lens: // %s\n", s);
1124
0
            }
1125
0
            free(s);
1126
0
        }
1127
0
    }
1128
1129
0
    return l;
1130
0
 error:
1131
0
    return IND_MAX;
1132
0
}
1133
1134
static struct trans *
1135
add_new_trans(struct jmt *jmt,
1136
0
              struct state *from, struct state *to, ind_t lens) {
1137
0
    struct trans *t;
1138
0
    ind_t i;
1139
0
    int r;
1140
1141
0
    if (from == NULL || to == NULL)
1142
0
        return NULL;
1143
1144
0
    r = array_add(&from->trans, &i);
1145
0
    ERR_NOMEM(r < 0, jmt);
1146
0
    t = array_elem(from->trans, i, struct trans);
1147
0
    t->to = to;
1148
0
    t->lens = lens;
1149
0
    return t;
1150
0
 error:
1151
0
    return NULL;
1152
0
}
1153
1154
static struct trans *
1155
0
add_eps_trans(struct jmt *jmt, struct state *from, struct state *to) {
1156
0
    return add_new_trans(jmt, from, to, EPS);
1157
0
}
1158
1159
0
static struct lens *lens_of_jmt(struct jmt *jmt, ind_t l) {
1160
0
    return array_elem(jmt->lenses, l, struct jmt_lens)->lens;
1161
0
}
1162
1163
0
static ind_t lens_index(struct jmt *jmt, struct lens *lens) {
1164
0
    array_for_each(i, jmt->lenses)
1165
0
        if (lens_of_jmt(jmt, i) == lens)
1166
0
            return i;
1167
0
    return IND_MAX;
1168
0
}
1169
1170
0
static struct state *lens_state(struct jmt *jmt, ind_t l) {
1171
0
    return array_elem(jmt->lenses, l, struct jmt_lens)->state;
1172
0
}
1173
1174
0
static void print_lens_symbol(FILE *fp, struct jmt *jmt, struct lens *lens) {
1175
0
    ind_t l = lens_index(jmt, lens);
1176
0
    struct state *sA = lens_state(jmt, l);
1177
1178
0
    if (sA == NULL)
1179
0
        print_regexp(fp, lens->ctype);
1180
0
    else
1181
0
        flens(fp, l);
1182
0
}
1183
1184
0
static void print_grammar(struct jmt *jmt, struct lens *lens) {
1185
0
    ind_t l = lens_index(jmt, lens);
1186
0
    struct state *sA = lens_state(jmt, l);
1187
1188
0
    if (sA == NULL || (lens->tag == L_REC && lens->rec_internal))
1189
0
        return;
1190
1191
0
    printf("  ");
1192
0
    print_lens_symbol(stdout, jmt, lens);
1193
0
    printf(" := ");
1194
1195
0
    if (! lens->recursive) {
1196
        /* Nullable regexps */
1197
0
        print_regexp(stdout, lens->ctype);
1198
0
        printf("\n");
1199
0
        return;
1200
0
    }
1201
1202
0
    switch (lens->tag) {
1203
0
    case L_CONCAT:
1204
0
        print_lens_symbol(stdout, jmt, lens->children[0]);
1205
0
        for (int i=1; i < lens->nchildren; i++) {
1206
0
            printf(" . ");
1207
0
            print_lens_symbol(stdout, jmt, lens->children[i]);
1208
0
        }
1209
0
        printf("\n");
1210
0
        for (int i=0; i < lens->nchildren; i++)
1211
0
            print_grammar(jmt, lens->children[i]);
1212
0
        break;
1213
0
    case L_UNION:
1214
0
        print_lens_symbol(stdout, jmt, lens->children[0]);
1215
0
        for (int i=1; i < lens->nchildren; i++) {
1216
0
            printf(" | ");
1217
0
            print_lens_symbol(stdout, jmt, lens->children[i]);
1218
0
        }
1219
0
        printf("\n");
1220
0
        for (int i=0; i < lens->nchildren; i++)
1221
0
            print_grammar(jmt, lens->children[i]);
1222
0
        break;
1223
0
    case L_SUBTREE:
1224
0
        print_lens_symbol(stdout, jmt, lens->child);
1225
0
        printf("\n");
1226
0
        print_grammar(jmt, lens->child);
1227
0
        break;
1228
0
    case L_STAR:
1229
0
        print_lens_symbol(stdout, jmt, lens->child);
1230
0
        printf("*\n");
1231
0
        print_grammar(jmt, lens->child);
1232
0
        break;
1233
0
    case L_MAYBE:
1234
0
        print_lens_symbol(stdout, jmt, lens->child);
1235
0
        printf("?\n");
1236
0
        print_grammar(jmt, lens->child);
1237
0
        break;
1238
0
    case L_REC:
1239
0
        print_lens_symbol(stdout, jmt, lens->body);
1240
0
        printf("\n");
1241
0
        print_grammar(jmt, lens->body);
1242
0
        break;
1243
0
    case L_SQUARE:
1244
0
       print_lens_symbol(stdout, jmt, lens->child);
1245
0
       printf("\n");
1246
0
       print_grammar(jmt, lens->child);
1247
0
       break;
1248
0
    default:
1249
0
        BUG_ON(true, jmt, "Unexpected lens tag %d", lens->tag);
1250
0
        break;
1251
0
    }
1252
0
 error:
1253
0
    return;
1254
0
}
1255
1256
0
static void print_grammar_top(struct jmt *jmt, struct lens *lens) {
1257
0
    printf("Grammar:\n");
1258
0
    print_grammar(jmt, lens);
1259
0
    if (lens->tag == L_REC) {
1260
0
        printf("  ");
1261
0
        print_lens_symbol(stdout, jmt, lens->alias);
1262
0
        printf(" := ");
1263
0
        print_lens_symbol(stdout, jmt, lens->alias->body);
1264
0
        printf("\n");
1265
0
    }
1266
0
}
1267
1268
0
static void index_lenses(struct jmt *jmt, struct lens *lens) {
1269
0
    ind_t l;
1270
1271
0
    l = lens_index(jmt, lens);
1272
0
    if (l == IND_MAX) {
1273
0
        l = add_lens(jmt, lens);
1274
0
        ERR_BAIL(jmt);
1275
0
    }
1276
1277
0
    if (! lens->recursive)
1278
0
        return;
1279
1280
0
    switch (lens->tag) {
1281
0
    case L_CONCAT:
1282
0
    case L_UNION:
1283
0
        for (int i=0; i < lens->nchildren; i++)
1284
0
            index_lenses(jmt, lens->children[i]);
1285
0
        break;
1286
0
    case L_SUBTREE:
1287
0
    case L_STAR:
1288
0
    case L_MAYBE:
1289
0
    case L_SQUARE:
1290
0
        index_lenses(jmt, lens->child);
1291
0
        break;
1292
0
    case L_REC:
1293
0
        if (! lens->rec_internal)
1294
0
            index_lenses(jmt, lens->body);
1295
0
        break;
1296
0
    default:
1297
0
        BUG_ON(true, jmt, "Unexpected lens tag %d", lens->tag);
1298
0
        break;
1299
0
    }
1300
0
 error:
1301
0
    return;
1302
0
}
1303
1304
static void thompson(struct jmt *jmt, struct lens *lens,
1305
0
                     struct state **s, struct state **f) {
1306
0
    ind_t l = lens_index(jmt, lens);
1307
0
    struct state *sA = lens_state(jmt, l);
1308
0
    ensure(l < jmt->lenses.used, jmt);
1309
1310
0
    *s = make_state(jmt);
1311
0
    *f = make_state(jmt);
1312
0
    ERR_BAIL(jmt);
1313
1314
0
    if (lens->recursive) {
1315
        /* A nonterminal */
1316
0
        add_new_trans(jmt, *s, *f, l);
1317
0
        add_new_trans(jmt, *s, sA, CALL);
1318
0
    } else if (sA == NULL) {
1319
        /* A terminal that never matches epsilon */
1320
0
        add_new_trans(jmt, *s, *f, l);
1321
0
    } else {
1322
        /* A terminal that matches epsilon */
1323
0
        add_new_trans(jmt, *s, *f, l);
1324
0
        add_new_trans(jmt, *s, sA, CALL);
1325
0
        add_new_trans(jmt, *s, *f, l+1);
1326
0
    }
1327
0
 error:
1328
0
    return;
1329
0
}
1330
1331
static void conv(struct jmt *jmt, struct lens *lens,
1332
                 struct state **s, struct state **e,
1333
0
                 struct state **f) {
1334
0
    ind_t l = lens_index(jmt, lens);
1335
0
    ensure(l < jmt->lenses.used, jmt);
1336
0
    struct state *sA = lens_state(jmt, l);
1337
1338
0
    *s = NULL;
1339
0
    *e = NULL;
1340
0
    *f = NULL;
1341
1342
0
    if (lens->recursive) {
1343
        /* A nonterminal */
1344
0
        *s = make_state(jmt);
1345
0
        *f = make_state(jmt);
1346
0
        ERR_BAIL(jmt);
1347
0
        add_new_trans(jmt, *s, *f, l);
1348
0
        ERR_BAIL(jmt);
1349
0
        ensure(sA != NULL, jmt);
1350
0
        add_new_trans(jmt, *s, sA, EPS);
1351
0
        ERR_BAIL(jmt);
1352
0
    } else if (sA == NULL) {
1353
        /* A terminal that never matches epsilon */
1354
0
        *s = make_state(jmt);
1355
0
        *f = make_state(jmt);
1356
0
        ERR_BAIL(jmt);
1357
0
        add_new_trans(jmt, *s, *f, l);
1358
0
        ERR_BAIL(jmt);
1359
0
    } else {
1360
        /* A terminal that matches epsilon */
1361
0
        *s = make_state(jmt);
1362
0
        *f = make_state(jmt);
1363
0
        ERR_BAIL(jmt);
1364
0
        add_new_trans(jmt, *s, *f, l);
1365
0
        add_new_trans(jmt, *s, *f, l+1);
1366
0
        add_new_trans(jmt, *s, sA, EPS);
1367
0
        ERR_BAIL(jmt);
1368
0
    }
1369
0
 error:
1370
0
    return;
1371
0
}
1372
1373
static void conv_concat(struct jmt *jmt, struct lens *lens,
1374
                        struct state **s, struct state **e,
1375
0
                        struct state **f) {
1376
0
    struct state *s2, *f2, *e2;
1377
1378
0
    conv(jmt, lens->children[0], &s2, &e2, &f2);
1379
0
    *s = make_state(jmt);
1380
0
    add_new_trans(jmt, *s, s2, EPS);
1381
1382
0
    for (int i=1; i < lens->nchildren; i++) {
1383
0
        struct state *s3, *e3, *f3, *scall, *fcall;
1384
0
        conv(jmt, lens->children[i], &s3, &e3, &f3);
1385
0
        thompson(jmt, lens->children[i], &scall, &fcall);
1386
0
        ERR_BAIL(jmt);
1387
0
        add_eps_trans(jmt, f2, scall);
1388
0
        add_eps_trans(jmt, e2, s3);
1389
0
        *f = make_state(jmt);
1390
0
        add_eps_trans(jmt, f3, *f);
1391
0
        add_eps_trans(jmt, fcall, *f);
1392
0
        *e = make_state(jmt);
1393
0
        add_eps_trans(jmt, e3, *e);
1394
0
        f2 = *f;
1395
0
        e2 = *e;
1396
0
    }
1397
0
 error:
1398
0
    return;
1399
0
}
1400
1401
static void conv_union(struct jmt *jmt, struct lens *lens,
1402
                        struct state **s, struct state **e,
1403
0
                        struct state **f) {
1404
1405
0
    *s = make_state(jmt);
1406
0
    *e = make_state(jmt);
1407
0
    *f = make_state(jmt);
1408
0
    ERR_BAIL(jmt);
1409
1410
0
    for (int i = 0; i < lens->nchildren; i++) {
1411
0
        struct state *s2, *e2, *f2;
1412
1413
0
        conv(jmt, lens->children[i], &s2, &e2, &f2);
1414
0
        ERR_BAIL(jmt);
1415
1416
0
        add_eps_trans(jmt, *s, s2);
1417
0
        add_eps_trans(jmt, e2, *e);
1418
0
        add_eps_trans(jmt, f2, *f);
1419
0
    }
1420
1421
0
 error:
1422
0
    return;
1423
0
}
1424
1425
static void conv_star(struct jmt *jmt, struct lens *lens,
1426
                      struct state **s, struct state **e,
1427
0
                      struct state **f) {
1428
1429
0
    *s = make_state(jmt);
1430
0
    *e = make_state(jmt);
1431
0
    *f = make_state(jmt);
1432
0
    ERR_BAIL(jmt);
1433
1434
0
    struct state *si, *ei, *fi, *scall, *fcall;
1435
0
    conv(jmt, lens->child, &si, &ei, &fi);
1436
0
    thompson(jmt, lens->child, &scall, &fcall);
1437
0
    ERR_BAIL(jmt);
1438
1439
0
    add_eps_trans(jmt, *s, si);
1440
0
    add_eps_trans(jmt, ei, si);
1441
0
    add_eps_trans(jmt, *s, *e);
1442
0
    add_eps_trans(jmt, ei, *e);
1443
0
    add_eps_trans(jmt, fi, scall);
1444
0
    add_eps_trans(jmt, fcall, scall);
1445
0
    add_eps_trans(jmt, fi, *f);
1446
0
    add_eps_trans(jmt, fcall, *f);
1447
0
    ERR_BAIL(jmt);
1448
1449
0
 error:
1450
0
    return;
1451
0
}
1452
1453
0
static void conv_rhs(struct jmt *jmt, ind_t l) {
1454
0
    struct lens *lens = lens_of_jmt(jmt, l);
1455
0
    struct state *s = NULL, *e = NULL, *f = NULL;
1456
0
    struct state *sA = lens_state(jmt, l);
1457
1458
0
    if (! lens->recursive) {
1459
        /* Nothing to do for terminals that do not match epsilon */
1460
0
        if (sA != NULL)
1461
0
            state_add_return(jmt, sA, l);
1462
0
        return;
1463
0
    }
1464
1465
    /* All other nonterminals/recursive lenses */
1466
1467
    /* Maintain P1 */
1468
0
    if (lens->ctype_nullable)
1469
0
        state_add_return(jmt, sA, l);
1470
1471
0
    switch (lens->tag) {
1472
0
    case L_REC:
1473
0
        conv(jmt, lens->body, &s, &e, &f);
1474
0
        break;
1475
0
    case L_CONCAT:
1476
0
        conv_concat(jmt, lens, &s, &e, &f);
1477
0
        break;
1478
0
    case L_UNION:
1479
0
        conv_union(jmt, lens, &s, &e, &f);
1480
0
        break;
1481
0
    case L_SUBTREE:
1482
0
        conv(jmt, lens->child, &s, &e, &f);
1483
0
        break;
1484
0
    case L_STAR:
1485
0
        conv_star(jmt, lens, &s, &e, &f);
1486
0
        break;
1487
0
    case L_MAYBE:
1488
0
        conv(jmt, lens->child, &s, &e, &f);
1489
0
        add_new_trans(jmt, s, e, EPS);
1490
0
        break;
1491
0
    case L_SQUARE:
1492
0
       conv(jmt, lens->child, &s, &e, &f);
1493
0
       break;
1494
0
    default:
1495
0
        BUG_ON(true, jmt, "Unexpected lens tag %d", lens->tag);
1496
0
    }
1497
1498
0
    ensure(sA != NULL, jmt);
1499
1500
0
    add_eps_trans(jmt, sA, s);
1501
0
    state_add_return(jmt, e, l);
1502
0
    state_add_return(jmt, f, l);
1503
1504
0
 error:
1505
0
    return;
1506
0
}
1507
1508
ATTRIBUTE_RETURN_CHECK
1509
0
static int push_state(struct array *worklist, struct state *s) {
1510
0
    int r;
1511
0
    ind_t ind;
1512
1513
0
    r = array_add(worklist, &ind);
1514
0
    if (r < 0)
1515
0
        return -1;
1516
0
    *array_elem(*worklist, ind, struct state *) = s;
1517
0
    return 0;
1518
0
}
1519
1520
0
static struct state *pop_state(struct array *worklist) {
1521
0
    if (worklist->used > 0) {
1522
0
        worklist->used -= 1;
1523
0
        return *array_elem(*worklist, worklist->used, struct state *);
1524
0
    } else {
1525
0
        return NULL;
1526
0
    }
1527
0
}
1528
1529
0
static void free_state(struct state *s) {
1530
0
    if (s == NULL)
1531
0
        return;
1532
0
    free(s->ret);
1533
0
    array_release(&s->trans);
1534
0
    free(s);
1535
0
}
1536
1537
0
static void collect(struct jmt *jmt) {
1538
0
    struct array worklist;
1539
0
    size_t count, removed;
1540
0
    int r;
1541
1542
0
    count = 0;
1543
0
    list_for_each(s, jmt->start) {
1544
0
        s->live = 0;
1545
0
        s->reachable = 0;
1546
0
        count += 1;
1547
0
    }
1548
1549
0
    array_init(&worklist, sizeof(struct state *));
1550
0
    jmt->start->reachable = 1;
1551
0
    for (struct state *s = jmt->start;
1552
0
         s != NULL;
1553
0
         s = pop_state(&worklist)) {
1554
0
        for_each_trans(t, s) {
1555
0
            if (! t->to->reachable) {
1556
0
                t->to->reachable = 1;
1557
0
                r = push_state(&worklist, t->to);
1558
0
                ERR_NOMEM(r < 0, jmt);
1559
0
            }
1560
0
        }
1561
0
    }
1562
1563
0
    list_for_each(s, jmt->start)
1564
0
        if (s->reachable && is_return(s))
1565
0
            s->live = 1;
1566
1567
0
    bool changed;
1568
0
    do {
1569
0
        changed = false;
1570
0
        list_for_each(s, jmt->start) {
1571
0
            if (! s->live && s->reachable) {
1572
0
                for_each_trans(t, s) {
1573
0
                    if (t->lens != CALL && t->to->live) {
1574
0
                        s->live = 1;
1575
0
                        changed = true;
1576
0
                        break;
1577
0
                    }
1578
0
                }
1579
0
            }
1580
0
        }
1581
0
    } while (changed);
1582
1583
0
    list_for_each(s, jmt->start) {
1584
0
        if (s->live && s->reachable) {
1585
0
            for (ind_t i = 0; i < s->trans.used; ) {
1586
0
                struct trans *t = state_trans(s, i);
1587
0
                if (! (t->to->live && t->to->reachable))
1588
0
                    array_remove(&s->trans, i);
1589
0
                else
1590
0
                    i += 1;
1591
0
            }
1592
0
        }
1593
0
    }
1594
1595
0
    removed = 0;
1596
0
    for (struct state *s = jmt->start;
1597
0
         s->next != NULL; ) {
1598
0
        struct state *p = s->next;
1599
0
        if (p->live && p->reachable) {
1600
0
            s = p;
1601
0
        } else {
1602
0
            s->next = p->next;
1603
0
            free_state(p);
1604
0
            removed += 1;
1605
0
        }
1606
0
    }
1607
1608
0
 error:
1609
0
    array_release(&worklist);
1610
0
    return;
1611
0
}
1612
1613
0
static void dedup(struct state *s) {
1614
0
    array_for_each(i, s->trans) {
1615
0
        struct trans *t = state_trans(s, i);
1616
0
        for (ind_t j = i+1; j < s->trans.used;) {
1617
0
            struct trans *u = state_trans(s, j);
1618
0
            if (t->to == u->to && t->lens == u->lens)
1619
0
                array_remove(&s->trans, j);
1620
0
            else
1621
0
                j += 1;
1622
0
        }
1623
0
    }
1624
0
}
1625
1626
0
static void unepsilon(struct jmt *jmt) {
1627
0
    int r;
1628
1629
0
    if (debugging("cf.jmt.build"))
1630
0
        jmt_dot(jmt, "jmt_10_raw.dot");
1631
0
    collect(jmt);
1632
1633
    /* Get rid of epsilon transitions */
1634
0
    bool changed;
1635
0
    do {
1636
0
        changed = false;
1637
0
        list_for_each(s, jmt->start) {
1638
0
            array_for_each(i , s->trans) {
1639
0
                struct trans *t = state_trans(s, i);
1640
0
                if (t->lens == EPS) {
1641
0
                    struct state *to = t->to;
1642
0
                    array_remove(&s->trans, i);
1643
0
                    r = array_join(&s->trans, &to->trans);
1644
0
                    ERR_NOMEM(r < 0, jmt);
1645
0
                    state_merge_returns(jmt, s, to);
1646
0
                    dedup(s);
1647
0
                    changed = true;
1648
0
                }
1649
0
            }
1650
0
        }
1651
0
    } while (changed);
1652
1653
0
    collect(jmt);
1654
0
    if (debugging("cf.jmt.build"))
1655
0
        jmt_dot(jmt, "jmt_20_uneps.dot");
1656
0
 error:
1657
0
    return;
1658
0
}
1659
1660
0
static bool is_deterministic(struct jmt *jmt) {
1661
0
    list_for_each(s, jmt->start) {
1662
0
        array_for_each(i, s->trans) {
1663
0
            struct trans *t = state_trans(s, i);
1664
0
            for (ind_t j = i+1; j < s->trans.used; j++) {
1665
0
                struct trans *u = state_trans(s, j);
1666
0
                if (t->lens == u->lens)
1667
0
                    return false;
1668
0
            }
1669
0
        }
1670
0
    }
1671
0
    return true;
1672
0
}
1673
1674
0
static void free_nfa_state(struct nfa_state *s) {
1675
0
    if (s == NULL)
1676
0
        return;
1677
0
    array_release(&s->set);
1678
0
    free(s);
1679
0
}
1680
1681
0
static struct nfa_state *make_nfa_state(struct jmt *jmt) {
1682
0
    struct nfa_state *result = NULL;
1683
0
    int r;
1684
1685
0
    r = ALLOC(result);
1686
0
    ERR_NOMEM(r < 0, jmt);
1687
1688
0
    array_init(&result->set, sizeof(struct state *));
1689
1690
0
    return result;
1691
0
 error:
1692
0
    FREE(result);
1693
0
    return NULL;
1694
0
}
1695
1696
static ind_t nfa_state_add(struct jmt *jmt, struct nfa_state *nfa,
1697
0
                           struct state *s) {
1698
0
    ind_t i;
1699
0
    int r;
1700
1701
0
    array_for_each(j, nfa->set) {
1702
0
        struct state *q = *array_elem(nfa->set, j, struct state *);
1703
0
        if (q == s)
1704
0
            return j;
1705
0
    }
1706
1707
    /* Keep the list of states sorted */
1708
0
    i = nfa->set.used;
1709
0
    for (int j=0; j + 1 < nfa->set.used; j++) {
1710
0
        if (s < *array_elem(nfa->set, j, struct state *)) {
1711
0
            i = j;
1712
0
            break;
1713
0
        }
1714
0
    }
1715
0
    r = array_insert(&nfa->set, i);
1716
0
    ERR_NOMEM(r < 0, jmt);
1717
1718
0
    *array_elem(nfa->set, i, struct state *) = s;
1719
0
    return i;
1720
0
 error:
1721
0
    return IND_MAX;
1722
0
}
1723
1724
0
static bool nfa_same_set(struct nfa_state *s1, struct nfa_state *s2) {
1725
0
    if (s1->set.used != s2->set.used)
1726
0
        return false;
1727
1728
0
    array_for_each(i, s1->set) {
1729
0
        struct state *q1 = *array_elem(s1->set, i, struct state *);
1730
0
        struct state *q2 = *array_elem(s2->set, i, struct state *);
1731
0
        if (q1 != q2)
1732
0
            return false;
1733
0
    }
1734
1735
0
    return true;
1736
0
}
1737
1738
static struct nfa_state *nfa_uniq(struct jmt *jmt, struct array *newstates,
1739
0
                                  struct nfa_state *s) {
1740
0
    ind_t ind;
1741
0
    int r;
1742
1743
0
    array_each_elem(q, *newstates, struct nfa_state *) {
1744
0
        if (nfa_same_set(s, *q)) {
1745
0
            if (s == *q)
1746
0
                return s;
1747
0
            free_nfa_state(s);
1748
0
            return *q;
1749
0
        }
1750
0
    }
1751
1752
0
    r = array_add(newstates, &ind);
1753
0
    ERR_NOMEM(r < 0, jmt);
1754
0
    *array_elem(*newstates, ind, struct nfa_state *) = s;
1755
1756
0
    if (s->state == NULL) {
1757
0
        s->state = make_state(jmt);
1758
0
        ERR_BAIL(jmt);
1759
0
    }
1760
1761
    /* This makes looking at pictures easier */
1762
0
    if (s->set.used == 1)
1763
0
        s->state->num = (*array_elem(s->set, 0, struct state *))->num;
1764
1765
0
    return s;
1766
1767
0
 error:
1768
0
    return NULL;
1769
0
}
1770
1771
static void det_target(struct jmt *jmt, struct array *newstates,
1772
0
                       struct nfa_state *nfas, ind_t l) {
1773
0
    struct nfa_state *to = NULL;
1774
1775
0
    array_each_elem(s, nfas->set, struct state *) {
1776
0
        for_each_trans(t, *s) {
1777
0
            if (t->lens == l) {
1778
0
                if (to == NULL) {
1779
0
                    to = make_nfa_state(jmt);
1780
0
                    ERR_RET(jmt);
1781
0
                }
1782
0
                nfa_state_add(jmt, to, t->to);
1783
0
                ERR_RET(jmt);
1784
0
            }
1785
0
        }
1786
0
    }
1787
1788
0
    if (to != NULL) {
1789
0
        to = nfa_uniq(jmt, newstates, to);
1790
0
        ERR_RET(jmt);
1791
0
        add_new_trans(jmt, nfas->state, to->state, l);
1792
0
    }
1793
0
}
1794
1795
0
static void determinize(struct jmt *jmt) {
1796
0
    struct nfa_state *ini = NULL;
1797
0
    struct array *newstates = NULL;
1798
0
    int r;
1799
0
    ind_t ind, nlenses;
1800
1801
0
    if (is_deterministic(jmt))
1802
0
        return;
1803
1804
0
    r = ALLOC(newstates);
1805
0
    ERR_NOMEM(r < 0, jmt);
1806
0
    array_init(newstates, sizeof(struct nfa_state *));
1807
1808
0
    nlenses = jmt->lenses.used;
1809
1810
    /* The initial state consists of just the start state */
1811
0
    ini = make_nfa_state(jmt);
1812
0
    ERR_BAIL(jmt);
1813
0
    nfa_state_add(jmt, ini, jmt->start);
1814
0
    ERR_BAIL(jmt);
1815
1816
    /* Make a new initial state */
1817
0
    ini->state = make_state(jmt);
1818
0
    ini->state->num = jmt->start->num;  /* Makes lokking at pictures easier */
1819
0
    ERR_BAIL(jmt);
1820
0
    jmt->start->next = ini->state->next;
1821
0
    ini->state->next = jmt->start;
1822
0
    jmt->start = ini->state;
1823
1824
0
    r = array_add(newstates, &ind);
1825
0
    ERR_NOMEM(r < 0, jmt);
1826
1827
0
    *array_elem(*newstates, ind, struct nfa_state *) = ini;
1828
0
    ini = NULL;
1829
1830
0
    for (ind_t i = 0; i < newstates->used; i++) {
1831
0
        struct nfa_state *nfas = *array_elem(*newstates, i, struct nfa_state *);
1832
1833
0
        for (int j = 0; j < nfas->set.used; j++) {
1834
0
            struct state *s = *array_elem(nfas->set, j, struct state *);
1835
0
            state_merge_returns(jmt, nfas->state, s);
1836
0
        }
1837
1838
0
        for (ind_t l = 0; l < nlenses; l++) {
1839
0
            det_target(jmt, newstates, nfas, l);
1840
0
            ERR_BAIL(jmt);
1841
0
        }
1842
0
        det_target(jmt, newstates, nfas, CALL);
1843
0
        ERR_BAIL(jmt);
1844
0
    }
1845
1846
0
    collect(jmt);
1847
1848
0
 done:
1849
0
    if (newstates) {
1850
0
        array_each_elem(s, *newstates, struct nfa_state *) {
1851
0
            free_nfa_state(*s);
1852
0
        }
1853
0
        array_release(newstates);
1854
0
        FREE(newstates);
1855
0
    }
1856
0
    free_nfa_state(ini);
1857
0
    return;
1858
0
 error:
1859
0
    goto done;
1860
0
}
1861
1862
0
struct jmt *jmt_build(struct lens *lens) {
1863
0
    struct jmt *jmt = NULL;
1864
0
    int r;
1865
1866
0
    r = ALLOC(jmt);
1867
0
    ERR_NOMEM(r < 0, lens->info);
1868
1869
0
    jmt->error = lens->info->error;
1870
0
    array_init(&jmt->lenses, sizeof(struct jmt_lens));
1871
1872
0
    index_lenses(jmt, lens);
1873
1874
0
    if (debugging("cf.jmt"))
1875
0
        print_grammar_top(jmt, lens);
1876
1877
0
    for (ind_t i=0; i < jmt->lenses.used; i++) {
1878
0
        conv_rhs(jmt, i);
1879
0
        ERR_BAIL(jmt);
1880
0
    }
1881
1882
0
    unepsilon(jmt);
1883
0
    ERR_BAIL(jmt);
1884
1885
0
    determinize(jmt);
1886
0
    ERR_BAIL(jmt);
1887
1888
0
    if (debugging("cf.jmt.build"))
1889
0
        jmt_dot(jmt, "jmt_30_dfa.dot");
1890
1891
0
    return jmt;
1892
0
 error:
1893
0
    jmt_free(jmt);
1894
0
    return NULL;
1895
0
}
1896
1897
0
void jmt_free(struct jmt *jmt) {
1898
0
    if (jmt == NULL)
1899
0
        return;
1900
0
    array_release(&jmt->lenses);
1901
0
    struct state *s = jmt->start;
1902
0
    while (s != NULL) {
1903
0
        struct state *del = s;
1904
0
        s = del->next;
1905
0
        free_state(del);
1906
0
    }
1907
0
    free(jmt);
1908
0
}
1909
1910
0
void jmt_dot(struct jmt *jmt, const char *fname) {
1911
0
    FILE *fp = debug_fopen("%s", fname);
1912
0
    if (fp == NULL)
1913
0
        return;
1914
1915
0
    fprintf(fp, "digraph \"jmt\" {\n");
1916
0
    fprintf(fp, "  rankdir = LR;\n");
1917
0
    list_for_each(s, jmt->start) {
1918
0
        if (is_return(s)) {
1919
0
            fprintf(fp, "  %u [ shape = doublecircle, label = \"%u (",
1920
0
                    s->num, s->num);
1921
0
            flens(fp, s->ret[0]);
1922
0
            for (ind_t i = 1; i < s->nret; i++) {
1923
0
                fprintf(fp, ", ");
1924
0
                flens(fp, s->ret[i]);
1925
0
            }
1926
0
            fprintf(fp, ")\" ];\n");
1927
0
        }
1928
0
        for_each_trans(t, s) {
1929
0
            fprintf(fp, "  %u -> %u ", s->num, t->to->num);
1930
0
            if (t->lens == EPS)
1931
0
                fprintf(fp, ";\n");
1932
0
            else if (t->lens == CALL)
1933
0
                fprintf(fp, "[ label = \"call\" ];\n");
1934
0
            else if (lens_state(jmt, t->lens) == NULL) {
1935
0
                struct lens *lens = lens_of_jmt(jmt, t->lens);
1936
0
                fprintf(fp, "[ label = \"");
1937
0
                print_regexp(fp, lens->ctype);
1938
0
                fprintf(fp, "\" ];\n");
1939
0
            } else {
1940
0
                fprintf(fp, "[ label = \"");
1941
0
                flens(fp, t->lens);
1942
0
                fprintf(fp, "\" ];\n");
1943
0
            }
1944
0
        }
1945
0
    }
1946
0
    fprintf(fp, "}\n");
1947
0
    fclose(fp);
1948
0
}
1949
1950
/*
1951
 * Local variables:
1952
 *  indent-tabs-mode: nil
1953
 *  c-indent-level: 4
1954
 *  c-basic-offset: 4
1955
 *  tab-width: 4
1956
 * End:
1957
 */