Coverage Report

Created: 2023-09-20 06:34

/src/augeas/src/lens.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * lens.c:
3
 *
4
 * Copyright (C) 2007-2016 David Lutterkort
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
19
 *
20
 * Author: David Lutterkort <dlutter@redhat.com>
21
 */
22
23
#include <config.h>
24
#include <stddef.h>
25
26
#include "lens.h"
27
#include "memory.h"
28
#include "errcode.h"
29
#include "internal.h"
30
31
/* This enum must be kept in sync with type_offs and ntypes */
32
enum lens_type {
33
    CTYPE, ATYPE, KTYPE, VTYPE
34
};
35
36
static const int type_offs[] = {
37
    offsetof(struct lens, ctype),
38
    offsetof(struct lens, atype),
39
    offsetof(struct lens, ktype),
40
    offsetof(struct lens, vtype)
41
};
42
static const int ntypes = sizeof(type_offs)/sizeof(type_offs[0]);
43
44
static const char *lens_type_names[] =
45
    { "ctype", "atype", "ktype", "vtype" };
46
47
0
#define ltype(lns, t) *((struct regexp **) ((char *) lns + type_offs[t]))
48
49
static struct value * typecheck_union(struct info *,
50
                                      struct lens *l1, struct lens *l2);
51
static struct value *typecheck_concat(struct info *,
52
                                      struct lens *l1, struct lens *l2);
53
static struct value *typecheck_square(struct info *,
54
                                      struct lens *l1, struct lens *l2);
55
static struct value *typecheck_iter(struct info *info, struct lens *l);
56
static struct value *typecheck_maybe(struct info *info, struct lens *l);
57
58
/* Lens names for pretty printing */
59
/* keep order in sync with enum type */
60
static const char *const tags[] = {
61
    "del", "store", "value", "key", "label", "seq", "counter",
62
    "concat", "union",
63
    "subtree", "star", "maybe", "rec", "square"
64
};
65
66
0
#define ltag(lens) (tags[lens->tag - L_DEL])
67
68
static const struct string digits_string = {
69
    .ref = REF_MAX, .str = (char *) "[0123456789]+"
70
};
71
static const struct string *const digits_pat = &digits_string;
72
73
0
char *format_lens(struct lens *l) {
74
0
    if (l == NULL) {
75
0
        return strdup("(no lens)");
76
0
    }
77
78
0
    char *inf = format_info(l->info);
79
0
    char *result;
80
81
0
    xasprintf(&result, "%s[%s]%s", tags[l->tag - L_DEL], inf,
82
0
              l->recursive ? "R" : "r");
83
0
    free(inf);
84
0
    return result;
85
0
}
86
87
0
#define BUG_LENS_TAG(lns)  bug_lens_tag(lns, __FILE__, __LINE__)
88
89
0
static void bug_lens_tag(struct lens *lens, const char *file, int lineno) {
90
0
    if (lens != NULL && lens->info != NULL && lens->info->error != NULL) {
91
0
        char *s = format_lens(lens);
92
0
        bug_on(lens->info->error, file, lineno, "Unexpected lens tag %s", s);
93
0
        free(s);
94
0
    } else {
95
        /* We are really screwed */
96
0
        assert(0);
97
0
    }
98
0
    return;
99
0
}
100
101
/* Construct a finite automaton from REGEXP and return it in *FA.
102
 *
103
 * Return NULL if REGEXP is valid, if the regexp REGEXP has syntax errors,
104
 * return an exception.
105
 */
106
static struct value *str_to_fa(struct info *info, const char *pattern,
107
0
                               struct fa **fa, int nocase) {
108
0
    int error;
109
0
    struct value *exn = NULL;
110
0
    size_t re_err_len;
111
0
    char *re_str = NULL, *re_err = NULL;
112
113
0
    *fa = NULL;
114
0
    error = fa_compile(pattern, strlen(pattern), fa);
115
0
    if (error == REG_NOERROR) {
116
0
        if (nocase) {
117
0
            error = fa_nocase(*fa);
118
0
            ERR_NOMEM(error < 0, info);
119
0
        }
120
0
        return NULL;
121
0
    }
122
123
0
    re_str = escape(pattern, -1, RX_ESCAPES);
124
0
    ERR_NOMEM(re_str == NULL, info);
125
126
0
    exn = make_exn_value(info, "Invalid regular expression /%s/", re_str);
127
128
0
    re_err_len = regerror(error, NULL, NULL, 0);
129
0
    error = ALLOC_N(re_err, re_err_len);
130
0
    ERR_NOMEM(error < 0, info);
131
132
0
    regerror(error, NULL, re_err, re_err_len);
133
0
    exn_printf_line(exn, "%s", re_err);
134
135
0
 done:
136
0
    free(re_str);
137
0
    free(re_err);
138
0
    return exn;
139
0
 error:
140
0
    fa_free(*fa);
141
0
    *fa = NULL;
142
0
    exn = info->error->exn;
143
0
    goto done;
144
0
}
145
146
0
static struct value *regexp_to_fa(struct regexp *regexp, struct fa **fa) {
147
0
    return str_to_fa(regexp->info, regexp->pattern->str, fa, regexp->nocase);
148
0
}
149
150
0
static struct lens *make_lens(enum lens_tag tag, struct info *info) {
151
0
    struct lens *lens;
152
0
    make_ref(lens);
153
0
    lens->tag = tag;
154
0
    lens->info = info;
155
156
0
    return lens;
157
0
}
158
159
static struct lens *make_lens_unop(enum lens_tag tag, struct info *info,
160
0
                                  struct lens *child) {
161
0
    struct lens *lens = make_lens(tag, info);
162
0
    lens->child = child;
163
0
    lens->value = child->value;
164
0
    lens->key = child->key;
165
0
    return lens;
166
0
}
167
168
typedef struct regexp *regexp_combinator(struct info *, int, struct regexp **);
169
170
static struct lens *make_lens_binop(enum lens_tag tag, struct info *info,
171
                                    struct lens *l1, struct lens *l2,
172
0
                                    regexp_combinator *combinator) {
173
0
    struct lens *lens = make_lens(tag, info);
174
0
    int n1 = (l1->tag == tag) ? l1->nchildren : 1;
175
0
    struct regexp **types = NULL;
176
177
0
    if (lens == NULL)
178
0
        goto error;
179
180
0
    lens->nchildren = n1;
181
0
    lens->nchildren += (l2->tag == tag) ? l2->nchildren : 1;
182
183
0
    lens->recursive = l1->recursive || l2->recursive;
184
0
    lens->rec_internal = l1->rec_internal || l2->rec_internal;
185
186
0
    if (ALLOC_N(lens->children, lens->nchildren) < 0) {
187
0
        lens->nchildren = 0;
188
0
        goto error;
189
0
    }
190
191
0
    if (l1->tag == tag) {
192
0
        for (int i=0; i < l1->nchildren; i++)
193
0
            lens->children[i] = ref(l1->children[i]);
194
0
        unref(l1, lens);
195
0
    } else {
196
0
        lens->children[0] = l1;
197
0
    }
198
199
0
    if (l2->tag == tag) {
200
0
        for (int i=0; i < l2->nchildren; i++)
201
0
            lens->children[n1 + i] = ref(l2->children[i]);
202
0
        unref(l2, lens);
203
0
    } else {
204
0
        lens->children[n1] = l2;
205
0
    }
206
207
0
    for (int i=0; i < lens->nchildren; i++) {
208
0
        lens->value = lens->value || lens->children[i]->value;
209
0
        lens->key = lens->key || lens->children[i]->key;
210
0
    }
211
212
0
    if (ALLOC_N(types, lens->nchildren) < 0)
213
0
        goto error;
214
215
0
    if (! lens->rec_internal) {
216
        /* Inside a recursive lens, we assign types with lns_check_rec
217
         * once we know the entire lens */
218
0
        for (int t=0; t < ntypes; t++) {
219
0
            if (lens->recursive && t == CTYPE)
220
0
                continue;
221
0
            for (int i=0; i < lens->nchildren; i++)
222
0
                types[i] = ltype(lens->children[i], t);
223
0
            ltype(lens, t) = (*combinator)(info, lens->nchildren, types);
224
0
        }
225
0
    }
226
0
    FREE(types);
227
228
0
    for (int i=0; i < lens->nchildren; i++)
229
0
        ensure(tag != lens->children[i]->tag, lens->info);
230
231
0
    return lens;
232
0
 error:
233
0
    unref(lens, lens);
234
0
    FREE(types);
235
0
    return NULL;
236
0
}
237
238
0
static struct value *make_lens_value(struct lens *lens) {
239
0
    struct value *v;
240
0
    v = make_value(V_LENS, ref(lens->info));
241
0
    v->lens = lens;
242
0
    return v;
243
0
}
244
245
struct value *lns_make_union(struct info *info,
246
0
                             struct lens *l1, struct lens *l2, int check) {
247
0
    struct lens *lens = NULL;
248
0
    int consumes_value = l1->consumes_value && l2->consumes_value;
249
0
    int recursive = l1->recursive || l2->recursive;
250
0
    int ctype_nullable = l1->ctype_nullable || l2->ctype_nullable;
251
252
0
    if (check) {
253
0
        struct value *exn = typecheck_union(info, l1, l2);
254
0
        if (exn != NULL)
255
0
            return exn;
256
0
    }
257
258
0
    lens = make_lens_binop(L_UNION, info, l1, l2, regexp_union_n);
259
0
    lens->consumes_value = consumes_value;
260
0
    if (! recursive)
261
0
        lens->ctype_nullable = ctype_nullable;
262
0
    return make_lens_value(lens);
263
0
}
264
265
struct value *lns_make_concat(struct info *info,
266
0
                              struct lens *l1, struct lens *l2, int check) {
267
0
    struct lens *lens = NULL;
268
0
    int consumes_value = l1->consumes_value || l2->consumes_value;
269
0
    int recursive = l1->recursive || l2->recursive;
270
0
    int ctype_nullable = l1->ctype_nullable && l2->ctype_nullable;
271
272
0
    if (check) {
273
0
        struct value *exn = typecheck_concat(info, l1, l2);
274
0
        if (exn != NULL)
275
0
            return exn;
276
0
    }
277
0
    if (l1->value && l2->value) {
278
0
        return make_exn_value(info, "Multiple stores in concat");
279
0
    }
280
0
    if (l1->key && l2->key) {
281
0
        return make_exn_value(info, "Multiple keys/labels in concat");
282
0
    }
283
284
0
    lens = make_lens_binop(L_CONCAT, info, l1, l2, regexp_concat_n);
285
0
    lens->consumes_value = consumes_value;
286
0
    if (! recursive)
287
0
        lens->ctype_nullable = ctype_nullable;
288
0
    return make_lens_value(lens);
289
0
}
290
291
static struct regexp *subtree_atype(struct info *info,
292
                                    struct regexp *ktype,
293
0
                                    struct regexp *vtype) {
294
0
    const char *kpat = (ktype == NULL) ? ENC_NULL : ktype->pattern->str;
295
0
    const char *vpat = (vtype == NULL) ? ENC_NULL : vtype->pattern->str;
296
0
    char *pat;
297
0
    struct regexp *result = NULL;
298
0
    char *ks = NULL, *vs = NULL;
299
0
    int nocase;
300
301
0
    if (ktype != NULL && vtype != NULL && ktype->nocase != vtype->nocase) {
302
0
        ks = regexp_expand_nocase(ktype);
303
0
        vs = regexp_expand_nocase(vtype);
304
0
        ERR_NOMEM(ks == NULL || vs == NULL, info);
305
0
        if (asprintf(&pat, "(%s)%s(%s)%s", ks, ENC_EQ, vs, ENC_SLASH) < 0)
306
0
            ERR_NOMEM(true, info);
307
0
        nocase = 0;
308
0
    } else {
309
0
        if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
310
0
            ERR_NOMEM(pat == NULL, info);
311
312
0
        nocase = 0;
313
0
        if (ktype != NULL)
314
0
            nocase = ktype->nocase;
315
0
        else if (vtype != NULL)
316
0
            nocase = vtype->nocase;
317
0
    }
318
0
    result = make_regexp(info, pat, nocase);
319
0
 error:
320
0
    free(ks);
321
0
    free(vs);
322
0
    return result;
323
0
}
324
325
/*
326
 * A subtree lens l1 = [ l ]
327
 *
328
 * Types are assigned as follows:
329
 *
330
 * l1->ctype = l->ctype
331
 * l1->atype = encode(l->ktype, l->vtype)
332
 * l1->ktype = NULL
333
 * l1->vtype = NULL
334
 */
335
0
struct value *lns_make_subtree(struct info *info, struct lens *l) {
336
0
    struct lens *lens;
337
338
0
    lens = make_lens_unop(L_SUBTREE, info, l);
339
0
    lens->ctype = ref(l->ctype);
340
0
    if (! l->recursive)
341
0
        lens->atype = subtree_atype(info, l->ktype, l->vtype);
342
0
    lens->value = lens->key = 0;
343
0
    lens->recursive = l->recursive;
344
0
    lens->rec_internal = l->rec_internal;
345
0
    if (! l->recursive)
346
0
        lens->ctype_nullable = l->ctype_nullable;
347
0
    return make_lens_value(lens);
348
0
}
349
350
0
struct value *lns_make_star(struct info *info, struct lens *l, int check) {
351
0
    struct lens *lens;
352
353
0
    if (check) {
354
0
        struct value *exn = typecheck_iter(info, l);
355
0
        if (exn != NULL)
356
0
            return exn;
357
0
    }
358
0
    if (l->value) {
359
0
        return make_exn_value(info, "Multiple stores in iteration");
360
0
    }
361
0
    if (l->key) {
362
0
        return make_exn_value(info, "Multiple keys/labels in iteration");
363
0
    }
364
365
0
    lens = make_lens_unop(L_STAR, info, l);
366
0
    for (int t = 0; t < ntypes; t++) {
367
0
        ltype(lens, t) = regexp_iter(info, ltype(l, t), 0, -1);
368
0
    }
369
0
    lens->recursive = l->recursive;
370
0
    lens->rec_internal = l->rec_internal;
371
0
    lens->ctype_nullable = 1;
372
0
    return make_lens_value(lens);
373
0
}
374
375
0
struct value *lns_make_plus(struct info *info, struct lens *l, int check) {
376
0
    struct value *star, *conc;
377
378
0
    star = lns_make_star(info, l, check);
379
0
    if (EXN(star))
380
0
        return star;
381
382
0
    conc = lns_make_concat(ref(info), ref(l), ref(star->lens), check);
383
0
    unref(star, value);
384
0
    return conc;
385
0
}
386
387
0
struct value *lns_make_maybe(struct info *info, struct lens *l, int check) {
388
0
    struct lens *lens;
389
390
0
    if (check) {
391
0
        struct value *exn = typecheck_maybe(info, l);
392
0
        if (exn != NULL)
393
0
            return exn;
394
0
    }
395
0
    lens = make_lens_unop(L_MAYBE, info, l);
396
0
    for (int t=0; t < ntypes; t++)
397
0
        ltype(lens, t) = regexp_maybe(info, ltype(l, t));
398
0
    lens->value = l->value;
399
0
    lens->key = l->key;
400
0
    lens->recursive = l->recursive;
401
0
    lens->rec_internal = l->rec_internal;
402
0
    lens->ctype_nullable = 1;
403
0
    return make_lens_value(lens);
404
0
}
405
406
/* The ctype of SQR is a regular approximation of the true ctype of SQR
407
 * at this point. In some situations, for example in processing quoted
408
 * strings this leads to false typecheck errors; to lower the chances
409
 * of these, we try to construct the precise ctype of SQR if the
410
 * language of L1 is finite (and has a small number of words)
411
 */
412
static void square_precise_type(struct info *info,
413
                                struct regexp **sqr,
414
                                struct regexp *left,
415
0
                                struct regexp *body) {
416
417
0
    char **words = NULL;
418
0
    int nwords = 0, r;
419
0
    struct fa *fa = NULL;
420
0
    struct value *exn = NULL;
421
0
    struct regexp **u = NULL, *c[3], *w = NULL;
422
423
0
    exn = str_to_fa(info, left->pattern->str, &fa, left->nocase);
424
0
    if (exn != NULL)
425
0
        goto error;
426
427
0
    nwords = fa_enumerate(fa, 10, &words); /* The limit of 10 is arbitrary */
428
0
    if (nwords < 0)
429
0
        goto error;
430
431
0
    r = ALLOC_N(u, nwords);
432
0
    ERR_NOMEM(r < 0, info);
433
434
0
    c[1] = body;
435
0
    for (int i=0; i < nwords; i++) {
436
0
        w = make_regexp_literal(left->info, words[i]);
437
0
        ERR_NOMEM(w == NULL, info);
438
0
        w->nocase = left->nocase;
439
440
0
        c[0] = c[2] = w;
441
0
        u[i] = regexp_concat_n(info, 3, c);
442
443
0
        unref(w, regexp);
444
0
        ERR_NOMEM(u[i] == NULL, info);
445
0
    }
446
0
    w = regexp_union_n(info, nwords, u);
447
0
    if (w != NULL) {
448
0
        unref(*sqr, regexp);
449
0
        *sqr = w;
450
0
        w = NULL;
451
0
    }
452
453
0
 error:
454
0
    unref(w, regexp);
455
0
    for (int i=0; i < nwords; i++) {
456
0
        free(words[i]);
457
0
        if (u != NULL)
458
0
            unref(u[i], regexp);
459
0
    }
460
0
    free(words);
461
0
    free(u);
462
0
    fa_free(fa);
463
0
    unref(exn, value);
464
0
}
465
466
/* Build a square lens as
467
 *    left . body . right
468
 * where left and right accepts the same language and
469
 * captured strings must match. The inability to express this with other
470
 * lenses makes the square primitive necessary.
471
 */
472
struct value * lns_make_square(struct info *info, struct lens *l1,
473
0
                               struct lens *l2, struct lens *l3, int check) {
474
0
    struct value *cnt1 = NULL, *cnt2 = NULL, *res = NULL;
475
0
    struct lens *sqr = NULL;
476
477
    /* supported types: L_KEY . body . L_DEL or L_DEL . body . L_DEL */
478
0
    if (l3->tag != L_DEL || (l1->tag != L_DEL && l1->tag != L_KEY))
479
0
        return make_exn_value(info, "Supported types: (key lns del) or (del lns del)");
480
481
0
    res = typecheck_square(info, l1, l3);
482
0
    if (res != NULL)
483
0
        goto error;
484
485
0
    res = lns_make_concat(ref(info), ref(l1), ref(l2), check);
486
0
    if (EXN(res))
487
0
        goto error;
488
0
    cnt1 = res;
489
0
    res = lns_make_concat(ref(info), ref(cnt1->lens), ref(l3), check);
490
0
    if (EXN(res))
491
0
        goto error;
492
0
    cnt2 = res;
493
494
0
    sqr = make_lens_unop(L_SQUARE, ref(info), ref(cnt2->lens));
495
0
    ERR_NOMEM(sqr == NULL, info);
496
497
0
    for (int t=0; t < ntypes; t++)
498
0
        ltype(sqr, t) = ref(ltype(cnt2->lens, t));
499
500
0
    square_precise_type(info, &(sqr->ctype), l1->ctype, l2->ctype);
501
502
0
    sqr->recursive = cnt2->lens->recursive;
503
0
    sqr->rec_internal = cnt2->lens->rec_internal;
504
0
    sqr->consumes_value = cnt2->lens->consumes_value;
505
506
0
    res = make_lens_value(sqr);
507
0
    ERR_NOMEM(res == NULL, info);
508
0
    sqr = NULL;
509
510
0
 error:
511
0
    unref(info, info);
512
0
    unref(l1, lens);
513
0
    unref(l2, lens);
514
0
    unref(l3, lens);
515
0
    unref(cnt1, value);
516
0
    unref(cnt2, value);
517
0
    unref(sqr, lens);
518
0
    return res;
519
0
}
520
521
/*
522
 * Lens primitives
523
 */
524
525
static struct regexp *make_regexp_from_string(struct info *info,
526
0
                                              struct string *string) {
527
0
    struct regexp *r;
528
0
    make_ref(r);
529
0
    if (r != NULL) {
530
0
        r->info = ref(info);
531
0
        r->pattern = ref(string);
532
0
        r->nocase = 0;
533
0
    }
534
0
    return r;
535
0
}
536
537
0
static struct regexp *restrict_regexp(struct regexp *r) {
538
0
    char *nre = NULL;
539
0
    struct regexp *result = NULL;
540
0
    size_t nre_len;
541
0
    int ret;
542
543
0
    ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
544
0
                               &nre, &nre_len,
545
0
                               RESERVED_FROM_CH, RESERVED_TO_CH);
546
0
    ERR_NOMEM(ret == REG_ESPACE || ret < 0, r->info);
547
0
    BUG_ON(ret != 0, r->info, NULL);
548
0
    ensure(nre_len == strlen(nre), r->info);
549
550
0
    ret = regexp_c_locale(&nre, &nre_len);
551
0
    ERR_NOMEM(ret < 0, r->info);
552
553
0
    result = make_regexp(r->info, nre, r->nocase);
554
0
    nre = NULL;
555
0
    BUG_ON(regexp_compile(result) != 0, r->info,
556
0
           "Could not compile restricted regexp");
557
0
 done:
558
0
    free(nre);
559
0
    return result;
560
0
 error:
561
0
    unref(result, regexp);
562
0
    goto done;
563
0
}
564
565
static struct value *
566
typecheck_prim(enum lens_tag tag, struct info *info,
567
0
               struct regexp *regexp, struct string *string) {
568
0
    struct fa *fa_slash = NULL;
569
0
    struct fa *fa_key = NULL;
570
0
    struct fa *fa_isect = NULL;
571
0
    struct value *exn = NULL;
572
573
    /* Typecheck */
574
0
    if (tag == L_DEL && string != NULL) {
575
0
        int cnt;
576
0
        const char *dflt = string->str;
577
0
        cnt = regexp_match(regexp, dflt, strlen(dflt), 0, NULL);
578
0
        if (cnt != strlen(dflt)) {
579
0
            char *s = escape(dflt, -1, RX_ESCAPES);
580
0
            char *r = regexp_escape(regexp);
581
0
            exn = make_exn_value(info,
582
0
                  "del: the default value '%s' does not match /%s/", s, r);
583
0
            FREE(s);
584
0
            FREE(r);
585
0
            goto error;
586
0
        }
587
0
    }
588
589
0
 error:
590
0
    fa_free(fa_isect);
591
0
    fa_free(fa_key);
592
0
    fa_free(fa_slash);
593
0
    return exn;
594
0
}
595
596
struct value *lns_make_prim(enum lens_tag tag, struct info *info,
597
0
                            struct regexp *regexp, struct string *string) {
598
0
    struct lens *lens = NULL;
599
0
    struct value *exn = NULL;
600
601
0
    if (typecheck_p(info)) {
602
0
        exn = typecheck_prim(tag, info, regexp, string);
603
0
        if (exn != NULL)
604
0
            goto error;
605
0
    }
606
607
    /* Build the actual lens */
608
0
    lens = make_lens(tag, info);
609
0
    lens->regexp = regexp;
610
0
    lens->string = string;
611
0
    lens->key = (tag == L_KEY || tag == L_LABEL || tag == L_SEQ);
612
0
    lens->value = (tag == L_STORE || tag == L_VALUE);
613
0
    lens->consumes_value = (tag == L_STORE || tag == L_VALUE);
614
0
    lens->atype = regexp_make_empty(info);
615
    /* Set the ctype */
616
0
    if (tag == L_DEL || tag == L_STORE || tag == L_KEY) {
617
0
        lens->ctype = ref(regexp);
618
0
        lens->ctype_nullable = regexp_matches_empty(lens->ctype);
619
0
    } else if (tag == L_LABEL || tag == L_VALUE
620
0
               || tag == L_SEQ || tag == L_COUNTER) {
621
0
        lens->ctype = regexp_make_empty(info);
622
0
        lens->ctype_nullable = 1;
623
0
    } else {
624
0
        BUG_LENS_TAG(lens);
625
0
        goto error;
626
0
    }
627
628
629
    /* Set the ktype */
630
0
    if (tag == L_SEQ) {
631
0
        lens->ktype =
632
0
            make_regexp_from_string(info, (struct string *) digits_pat);
633
0
        if (lens->ktype == NULL)
634
0
            goto error;
635
0
    } else if (tag == L_KEY) {
636
0
        lens->ktype = restrict_regexp(lens->regexp);
637
0
    } else if (tag == L_LABEL) {
638
0
        lens->ktype = make_regexp_literal(info, lens->string->str);
639
0
        if (lens->ktype == NULL)
640
0
            goto error;
641
0
    }
642
643
    /* Set the vtype */
644
0
    if (tag == L_STORE) {
645
0
        lens->vtype = restrict_regexp(lens->regexp);
646
0
    } else if (tag == L_VALUE) {
647
0
        lens->vtype = make_regexp_literal(info, lens->string->str);
648
0
        if (lens->vtype == NULL)
649
0
            goto error;
650
0
    }
651
652
0
    return make_lens_value(lens);
653
0
 error:
654
0
    return exn;
655
0
}
656
657
/*
658
 * Typechecking of lenses
659
 */
660
static struct value *disjoint_check(struct info *info, bool is_get,
661
0
                                    struct regexp *r1, struct regexp *r2) {
662
0
    struct fa *fa1 = NULL;
663
0
    struct fa *fa2 = NULL;
664
0
    struct fa *fa = NULL;
665
0
    struct value *exn = NULL;
666
0
    const char *const msg = is_get ? "union.get" : "tree union.put";
667
668
0
    if (r1 == NULL || r2 == NULL)
669
0
        return NULL;
670
671
0
    exn = regexp_to_fa(r1, &fa1);
672
0
    if (exn != NULL)
673
0
        goto done;
674
675
0
    exn = regexp_to_fa(r2, &fa2);
676
0
    if (exn != NULL)
677
0
        goto done;
678
679
0
    fa = fa_intersect(fa1, fa2);
680
0
    if (! fa_is_basic(fa, FA_EMPTY)) {
681
0
        size_t xmpl_len;
682
0
        char *xmpl;
683
0
        fa_example(fa, &xmpl, &xmpl_len);
684
0
        if (! is_get) {
685
0
            char *fmt = enc_format(xmpl, xmpl_len);
686
0
            if (fmt != NULL) {
687
0
                FREE(xmpl);
688
0
                xmpl = fmt;
689
0
            }
690
0
        }
691
0
        exn = make_exn_value(ref(info),
692
0
                             "overlapping lenses in %s", msg);
693
694
0
        if (is_get)
695
0
            exn_printf_line(exn, "Example matched by both: '%s'", xmpl);
696
0
        else
697
0
            exn_printf_line(exn, "Example matched by both: %s", xmpl);
698
0
        free(xmpl);
699
0
    }
700
701
0
 done:
702
0
    fa_free(fa);
703
0
    fa_free(fa1);
704
0
    fa_free(fa2);
705
706
0
    return exn;
707
0
}
708
709
static struct value *typecheck_union(struct info *info,
710
0
                                     struct lens *l1, struct lens *l2) {
711
0
    struct value *exn = NULL;
712
713
0
    exn = disjoint_check(info, true, l1->ctype, l2->ctype);
714
0
    if (exn == NULL) {
715
0
        exn = disjoint_check(info, false, l1->atype, l2->atype);
716
0
    }
717
0
    if (exn != NULL) {
718
0
        char *fi = format_info(l1->info);
719
0
        exn_printf_line(exn, "First lens: %s", fi);
720
0
        free(fi);
721
722
0
        fi = format_info(l2->info);
723
0
        exn_printf_line(exn, "Second lens: %s", fi);
724
0
        free(fi);
725
0
    }
726
0
    return exn;
727
0
}
728
729
static struct value *
730
ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
731
            enum lens_type typ,  struct lens *l1, struct lens *l2,
732
0
            const char *msg, bool iterated) {
733
0
    char *upv, *pv, *v;
734
0
    size_t upv_len;
735
0
    struct value *exn = NULL;
736
0
    int r;
737
738
0
    r = fa_ambig_example(fa1, fa2, &upv, &upv_len, &pv, &v);
739
0
    if (r < 0) {
740
0
        exn = make_exn_value(ref(info), "not enough memory");
741
0
        if (exn != NULL) {
742
0
            return exn;
743
0
        } else {
744
0
            ERR_REPORT(info, AUG_ENOMEM, NULL);
745
0
            return info->error->exn;
746
0
        }
747
0
    }
748
749
0
    if (upv != NULL) {
750
0
        char *e_u, *e_up, *e_upv, *e_pv, *e_v;
751
0
        char *s1, *s2;
752
753
0
        if (typ == ATYPE) {
754
0
            e_u = enc_format(upv, pv - upv);
755
0
            e_up = enc_format(upv, v - upv);
756
0
            e_upv = enc_format(upv, upv_len);
757
0
            e_pv = enc_format(pv, strlen(pv));
758
0
            e_v = enc_format(v, strlen(v));
759
0
            lns_format_atype(l1, &s1);
760
0
            lns_format_atype(l2, &s2);
761
0
        } else {
762
0
            e_u = escape(upv, pv - upv, RX_ESCAPES);
763
0
            e_up = escape(upv, v - upv, RX_ESCAPES);
764
0
            e_upv = escape(upv, -1, RX_ESCAPES);
765
0
            e_pv = escape(pv, -1, RX_ESCAPES);
766
0
            e_v = escape(v, -1, RX_ESCAPES);
767
0
            s1 = regexp_escape(ltype(l1, typ));
768
0
            s2 = regexp_escape(ltype(l2, typ));
769
0
        }
770
0
        exn = make_exn_value(ref(info), "%s", msg);
771
0
        if (iterated) {
772
0
            exn_printf_line(exn, "  Iterated regexp: /%s/", s1);
773
0
        } else {
774
0
            exn_printf_line(exn, "  First regexp: /%s/", s1);
775
0
            exn_printf_line(exn, "  Second regexp: /%s/", s2);
776
0
        }
777
0
        exn_printf_line(exn, "  '%s' can be split into", e_upv);
778
0
        exn_printf_line(exn, "  '%s|=|%s'\n", e_u, e_pv);
779
0
        exn_printf_line(exn, " and");
780
0
        exn_printf_line(exn, "  '%s|=|%s'\n", e_up, e_v);
781
0
        free(e_u);
782
0
        free(e_up);
783
0
        free(e_upv);
784
0
        free(e_pv);
785
0
        free(e_v);
786
0
        free(s1);
787
0
        free(s2);
788
0
    }
789
0
    free(upv);
790
0
    return exn;
791
0
}
792
793
static struct value *
794
ambig_concat_check(struct info *info, const char *msg,
795
0
                   enum lens_type typ, struct lens *l1, struct lens *l2) {
796
0
    struct fa *fa1 = NULL;
797
0
    struct fa *fa2 = NULL;
798
0
    struct value *result = NULL;
799
0
    struct regexp *r1 = ltype(l1, typ);
800
0
    struct regexp *r2 = ltype(l2, typ);
801
802
0
    if (r1 == NULL || r2 == NULL)
803
0
        return NULL;
804
805
0
    result = regexp_to_fa(r1, &fa1);
806
0
    if (result != NULL)
807
0
        goto done;
808
809
0
    result = regexp_to_fa(r2, &fa2);
810
0
    if (result != NULL)
811
0
        goto done;
812
813
0
    result = ambig_check(info, fa1, fa2, typ, l1, l2, msg, false);
814
0
 done:
815
0
    fa_free(fa1);
816
0
    fa_free(fa2);
817
0
    return result;
818
0
}
819
820
static struct value *typecheck_concat(struct info *info,
821
0
                                      struct lens *l1, struct lens *l2) {
822
0
    struct value *result = NULL;
823
824
0
    result = ambig_concat_check(info, "ambiguous concatenation",
825
0
                                CTYPE, l1, l2);
826
0
    if (result == NULL) {
827
0
        result = ambig_concat_check(info, "ambiguous tree concatenation",
828
0
                                    ATYPE, l1, l2);
829
0
    }
830
0
    if (result != NULL) {
831
0
        char *fi = format_info(l1->info);
832
0
        exn_printf_line(result, "First lens: %s", fi);
833
0
        free(fi);
834
0
        fi = format_info(l2->info);
835
0
        exn_printf_line(result, "Second lens: %s", fi);
836
0
        free(fi);
837
0
    }
838
0
    return result;
839
0
}
840
841
static struct value *make_exn_square(struct info *info, struct lens *l1,
842
0
                                     struct lens *l2, const char *msg) {
843
844
0
    char *fi;
845
0
    struct value *exn = make_exn_value(ref(info), "%s",
846
0
            "Inconsistency in lens square");
847
0
    exn_printf_line(exn, "%s", msg);
848
0
    fi = format_info(l1->info);
849
0
    exn_printf_line(exn, "Left lens: %s", fi);
850
0
    free(fi);
851
0
    fi = format_info(l2->info);
852
0
    exn_printf_line(exn, "Right lens: %s", fi);
853
0
    free(fi);
854
0
    return exn;
855
0
}
856
857
static struct value *typecheck_square(struct info *info, struct lens *l1,
858
0
                                      struct lens *l2) {
859
0
    int r;
860
0
    struct value *exn = NULL;
861
0
    struct fa *fa1 = NULL, *fa2 = NULL;
862
0
    struct regexp *r1 = ltype(l1, CTYPE);
863
0
    struct regexp *r2 = ltype(l2, CTYPE);
864
865
0
    if (r1 == NULL || r2 == NULL)
866
0
        return NULL;
867
868
0
    exn = regexp_to_fa(r1, &fa1);
869
0
    if (exn != NULL)
870
0
        goto done;
871
872
0
    exn = regexp_to_fa(r2, &fa2);
873
0
    if (exn != NULL)
874
0
        goto done;
875
876
0
    r = fa_equals(fa1, fa2);
877
878
0
    if (r < 0) {
879
0
        exn = make_exn_value(ref(info), "not enough memory");
880
0
        if (exn != NULL) {
881
0
            return exn;
882
0
        } else {
883
0
            ERR_REPORT(info, AUG_ENOMEM, NULL);
884
0
            return info->error->exn;;
885
0
        }
886
0
    }
887
888
0
    if (r == 0) {
889
0
        exn = make_exn_square(info, l1, l2,
890
0
                "Left and right lenses must accept the same language");
891
0
        goto done;
892
0
    }
893
894
    /* check del create consistency */
895
0
    if (l1->tag == L_DEL && l2->tag == L_DEL) {
896
0
        if (!STREQ(l1->string->str, l2->string->str)) {
897
0
            exn = make_exn_square(info, l1, l2,
898
0
                    "Left and right lenses must have the same default value");
899
0
            goto done;
900
0
        }
901
0
    }
902
903
0
 done:
904
0
    fa_free(fa1);
905
0
    fa_free(fa2);
906
0
    return exn;
907
0
}
908
909
static struct value *
910
ambig_iter_check(struct info *info, const char *msg,
911
0
                 enum lens_type typ, struct lens *l) {
912
0
    struct fa *fas = NULL, *fa = NULL;
913
0
    struct value *result = NULL;
914
0
    struct regexp *r = ltype(l, typ);
915
916
0
    if (r == NULL)
917
0
        return NULL;
918
919
0
    result = regexp_to_fa(r, &fa);
920
0
    if (result != NULL)
921
0
        goto done;
922
923
0
    fas = fa_iter(fa, 0, -1);
924
925
0
    result = ambig_check(info, fa, fas, typ, l, l, msg, true);
926
927
0
 done:
928
0
    fa_free(fa);
929
0
    fa_free(fas);
930
0
    return result;
931
0
}
932
933
0
static struct value *typecheck_iter(struct info *info, struct lens *l) {
934
0
    struct value *result = NULL;
935
936
0
    result = ambig_iter_check(info, "ambiguous iteration", CTYPE, l);
937
0
    if (result == NULL) {
938
0
        result = ambig_iter_check(info, "ambiguous tree iteration", ATYPE, l);
939
0
    }
940
0
    if (result != NULL) {
941
0
        char *fi = format_info(l->info);
942
0
        exn_printf_line(result, "Iterated lens: %s", fi);
943
0
        free(fi);
944
0
    }
945
0
    return result;
946
0
}
947
948
0
static struct value *typecheck_maybe(struct info *info, struct lens *l) {
949
    /* Check (r)? as (<e>|r) where <e> is the empty language */
950
0
    struct value *exn = NULL;
951
952
0
    if (l->ctype != NULL && regexp_matches_empty(l->ctype)) {
953
0
        exn = make_exn_value(ref(info),
954
0
                "illegal optional expression: /%s/ matches the empty word",
955
0
                l->ctype->pattern->str);
956
0
    }
957
958
    /* Typecheck the put direction; the check passes if
959
       (1) the atype does not match the empty string, because we can tell
960
           from looking at tree nodes whether L should be applied or not
961
       (2) L handles a value; with that, we know whether to apply L or not
962
           depending on whether the current node has a non NULL value or not
963
    */
964
0
    if (exn == NULL && ! l->consumes_value) {
965
0
        if (l->atype != NULL && regexp_matches_empty(l->atype)) {
966
0
            exn = make_exn_value(ref(info),
967
0
               "optional expression matches the empty tree but does not consume a value");
968
0
        }
969
0
    }
970
0
    return exn;
971
0
}
972
973
0
void free_lens(struct lens *lens) {
974
0
    if (lens == NULL)
975
0
        return;
976
0
    ensure(lens->ref == 0, lens->info);
977
978
0
    if (debugging("lenses"))
979
0
        dump_lens_tree(lens);
980
0
    switch (lens->tag) {
981
0
    case L_DEL:
982
0
        unref(lens->regexp, regexp);
983
0
        unref(lens->string, string);
984
0
        break;
985
0
    case L_STORE:
986
0
    case L_KEY:
987
0
        unref(lens->regexp, regexp);
988
0
        break;
989
0
    case L_LABEL:
990
0
    case L_SEQ:
991
0
    case L_COUNTER:
992
0
    case L_VALUE:
993
0
        unref(lens->string, string);
994
0
        break;
995
0
    case L_SUBTREE:
996
0
    case L_STAR:
997
0
    case L_MAYBE:
998
0
    case L_SQUARE:
999
0
        unref(lens->child, lens);
1000
0
        break;
1001
0
    case L_CONCAT:
1002
0
    case L_UNION:
1003
0
        for (int i=0; i < lens->nchildren; i++)
1004
0
            unref(lens->children[i], lens);
1005
0
        free(lens->children);
1006
0
        break;
1007
0
    case L_REC:
1008
0
        if (!lens->rec_internal) {
1009
0
            unref(lens->body, lens);
1010
0
        }
1011
0
        break;
1012
0
    default:
1013
0
        BUG_LENS_TAG(lens);
1014
0
        break;
1015
0
    }
1016
1017
0
    for (int t=0; t < ntypes; t++)
1018
0
        unref(ltype(lens, t), regexp);
1019
1020
0
    unref(lens->info, info);
1021
0
    jmt_free(lens->jmt);
1022
0
    free(lens);
1023
0
 error:
1024
0
    return;
1025
0
}
1026
1027
0
void lens_release(struct lens *lens) {
1028
0
    if (lens == NULL)
1029
0
        return;
1030
1031
0
    for (int t=0; t < ntypes; t++)
1032
0
        regexp_release(ltype(lens, t));
1033
1034
0
    if (lens->tag == L_KEY || lens->tag == L_STORE)
1035
0
        regexp_release(lens->regexp);
1036
1037
0
    if (lens->tag == L_SUBTREE || lens->tag == L_STAR
1038
0
        || lens->tag == L_MAYBE || lens->tag == L_SQUARE) {
1039
0
        lens_release(lens->child);
1040
0
    }
1041
1042
0
    if (lens->tag == L_UNION || lens->tag == L_CONCAT) {
1043
0
        for (int i=0; i < lens->nchildren; i++) {
1044
0
            lens_release(lens->children[i]);
1045
0
        }
1046
0
    }
1047
1048
0
    if (lens->tag == L_REC && !lens->rec_internal) {
1049
0
        lens_release(lens->body);
1050
0
    }
1051
1052
0
    jmt_free(lens->jmt);
1053
0
    lens->jmt = NULL;
1054
0
}
1055
1056
/*
1057
 * Encoding of tree levels
1058
 */
1059
0
char *enc_format(const char *e, size_t len) {
1060
0
    return enc_format_indent(e, len, 0);
1061
0
}
1062
1063
0
char *enc_format_indent(const char *e, size_t len, int indent) {
1064
0
    size_t size = 0;
1065
0
    char *result = NULL, *r;
1066
0
    const char *k = e;
1067
1068
0
    while (*k && k - e < len) {
1069
0
        char *eq,  *slash, *v;
1070
0
        eq = strchr(k, ENC_EQ_CH);
1071
0
        assert(eq != NULL);
1072
0
        slash = strchr(eq, ENC_SLASH_CH);
1073
0
        assert(slash != NULL);
1074
0
        v = eq + 1;
1075
1076
0
        if (indent > 0)
1077
0
            size += indent + 1;
1078
0
        size += 6;     /* Surrounding braces */
1079
0
        if (k != eq)
1080
0
            size += 1 + (eq - k) + 1;
1081
0
        if (v != slash)
1082
0
            size += 4 + (slash - v) + 1;
1083
0
        k = slash + 1;
1084
0
    }
1085
0
    if (ALLOC_N(result, size + 1) < 0)
1086
0
        return NULL;
1087
1088
0
    k = e;
1089
0
    r = result;
1090
0
    while (*k && k - e < len) {
1091
0
        char *eq,  *slash, *v;
1092
0
        eq = strchr(k, ENC_EQ_CH);
1093
0
        slash = strchr(eq, ENC_SLASH_CH);
1094
0
        assert(eq != NULL && slash != NULL);
1095
0
        v = eq + 1;
1096
1097
0
        for (int i=0; i < indent; i++)
1098
0
            *r++ = ' ';
1099
0
        r = stpcpy(r, " { ");
1100
0
        if (k != eq) {
1101
0
            r = stpcpy(r, "\"");
1102
0
            r = stpncpy(r, k, eq - k);
1103
0
            r = stpcpy(r, "\"");
1104
0
        }
1105
0
        if (v != slash) {
1106
0
            r = stpcpy (r, " = \"");
1107
0
            r = stpncpy(r, v, slash - v);
1108
0
            r = stpcpy(r, "\"");
1109
0
        }
1110
0
        r = stpcpy(r, " }");
1111
0
        if (indent > 0)
1112
0
            *r++ = '\n';
1113
0
        k = slash + 1;
1114
0
    }
1115
0
    return result;
1116
0
}
1117
1118
static int format_atype(struct lens *l, char **buf, uint indent);
1119
1120
0
static int format_indent(char **buf, uint indent) {
1121
0
    if (ALLOC_N(*buf, indent+1) < 0)
1122
0
        return -1;
1123
0
    memset(*buf, ' ', indent);
1124
0
    return 0;
1125
0
}
1126
1127
0
static int format_subtree_atype(struct lens *l, char **buf, uint indent) {
1128
0
    char *k = NULL, *v = NULL;
1129
0
    const struct regexp *ktype = l->child->ktype;
1130
0
    const struct regexp *vtype = l->child->vtype;
1131
0
    int r, result = -1;
1132
0
    char *si = NULL;
1133
1134
0
    if (format_indent(&si, indent) < 0)
1135
0
        goto done;
1136
1137
0
    if (ktype != NULL) {
1138
0
        k = regexp_escape(ktype);
1139
0
        if (k == NULL)
1140
0
            goto done;
1141
0
    }
1142
0
    if (vtype != NULL) {
1143
0
        v = regexp_escape(vtype);
1144
0
        if (v == NULL)
1145
0
            goto done;
1146
0
        if (k == NULL)
1147
0
            r = xasprintf(buf, "%s{ = /%s/ }", si, k, v);
1148
0
        else
1149
0
            r = xasprintf(buf, "%s{ /%s/ = /%s/ }", si, k, v);
1150
0
    } else {
1151
0
        if (k == NULL)
1152
0
            r = xasprintf(buf, "%s{ }", si, k);
1153
0
        else
1154
0
            r = xasprintf(buf, "%s{ /%s/ }", si, k);
1155
0
    }
1156
0
    if (r < 0)
1157
0
        goto done;
1158
1159
0
    result = 0;
1160
0
 done:
1161
0
    FREE(si);
1162
0
    FREE(v);
1163
0
    FREE(k);
1164
0
    return result;
1165
0
}
1166
1167
static int format_rep_atype(struct lens *l, char **buf,
1168
0
                            uint indent, char quant) {
1169
0
    char *a = NULL;
1170
0
    int r, result = -1;
1171
1172
0
    r = format_atype(l->child, &a, indent);
1173
0
    if (r < 0)
1174
0
        goto done;
1175
0
    if (strlen(a) == 0) {
1176
0
        *buf = a;
1177
0
        a = NULL;
1178
0
        result = 0;
1179
0
        goto done;
1180
0
    }
1181
1182
0
    if (l->child->tag == L_CONCAT || l->child->tag == L_UNION)
1183
0
        r = xasprintf(buf, "(%s)%c", a, quant);
1184
0
    else
1185
0
        r = xasprintf(buf, "%s%c", a, quant);
1186
1187
0
    if (r < 0)
1188
0
        goto done;
1189
1190
0
    result = 0;
1191
0
 done:
1192
0
    FREE(a);
1193
0
    return result;
1194
0
}
1195
1196
0
static int format_concat_atype(struct lens *l, char **buf, uint indent) {
1197
0
    char **c = NULL, *s = NULL, *p;
1198
0
    int r, result = -1;
1199
0
    size_t len = 0, nconc = 0;
1200
1201
0
    if (ALLOC_N(c, l->nchildren) < 0)
1202
0
        goto done;
1203
1204
0
    for (int i=0; i < l->nchildren; i++) {
1205
0
        r = format_atype(l->children[i], c+i, indent);
1206
0
        if (r < 0)
1207
0
            goto done;
1208
0
        len += strlen(c[i]) + 3;
1209
0
        if (strlen(c[i]) > 0)
1210
0
            nconc += 1;
1211
0
        if (l->children[i]->tag == L_UNION)
1212
0
            len += 2;
1213
0
    }
1214
1215
0
    if (ALLOC_N(s, len+1) < 0)
1216
0
        goto done;
1217
0
    p = s;
1218
0
    for (int i=0; i < l->nchildren; i++) {
1219
0
        bool needs_parens = nconc > 1 && l->children[i]->tag == L_UNION;
1220
0
        if (strlen(c[i]) == 0)
1221
0
            continue;
1222
0
        if (i > 0)
1223
0
            *p++ = '\n';
1224
0
        char *t = c[i];
1225
0
        if (needs_parens) {
1226
0
            for (int j=0; j < indent; j++)
1227
0
                *p++ = *t++;
1228
0
            *p++ = '(';
1229
0
        }
1230
0
        p = stpcpy(p, t);
1231
0
        if (needs_parens)
1232
0
            *p++ = ')';
1233
0
    }
1234
1235
0
    *buf = s;
1236
0
    s = NULL;
1237
0
    result = 0;
1238
0
 done:
1239
0
    if (c != NULL)
1240
0
        for (int i=0; i < l->nchildren; i++)
1241
0
            FREE(c[i]);
1242
0
    FREE(c);
1243
0
    FREE(s);
1244
0
    return result;
1245
0
}
1246
1247
0
static int format_union_atype(struct lens *l, char **buf, uint indent) {
1248
0
    char **c = NULL, *s = NULL, *p;
1249
0
    int r, result = -1;
1250
0
    size_t len = 0;
1251
1252
0
    if (ALLOC_N(c, l->nchildren) < 0)
1253
0
        goto done;
1254
1255
    /* Estimate the length of the string we will build. The calculation
1256
       overestimates that length so that the logic is a little simpler than
1257
       in the loop where we actually build the string */
1258
0
    for (int i=0; i < l->nchildren; i++) {
1259
0
        r = format_atype(l->children[i], c+i, indent + 2);
1260
0
        if (r < 0)
1261
0
            goto done;
1262
        /* We will add c[i] and some fixed characters */
1263
0
        len += strlen(c[i]) + strlen("\n| ()");
1264
0
        if (strlen(c[i]) < indent+2) {
1265
            /* We will add indent+2 whitespace */
1266
0
            len += indent+2;
1267
0
        }
1268
0
    }
1269
1270
0
    if (ALLOC_N(s, len+1) < 0)
1271
0
        goto done;
1272
1273
0
    p = s;
1274
0
    for (int i=0; i < l->nchildren; i++) {
1275
0
        char *t = c[i];
1276
0
        if (i > 0) {
1277
0
            *p++ = '\n';
1278
0
            if (strlen(t) >= indent+2) {
1279
                /* c[i] is not just whitespace */
1280
0
                p = stpncpy(p, t, indent+2);
1281
0
                t += indent+2;
1282
0
            } else {
1283
                /* c[i] is just whitespace, make sure we indent the
1284
                   '|' appropriately */
1285
0
                memset(p, ' ', indent+2);
1286
0
                p += indent+2;
1287
0
            }
1288
0
            p = stpcpy(p, "| ");
1289
0
        } else {
1290
            /* Skip additional indent */
1291
0
            t += 2;
1292
0
        }
1293
0
        if (strlen(t) == 0)
1294
0
            p = stpcpy(p, "()");
1295
0
        else
1296
0
            p = stpcpy(p, t);
1297
0
    }
1298
0
    *buf = s;
1299
0
    s = NULL;
1300
0
    result = 0;
1301
0
 done:
1302
0
    if (c != NULL)
1303
0
        for (int i=0; i < l->nchildren; i++)
1304
0
            FREE(c[i]);
1305
0
    FREE(c);
1306
0
    FREE(s);
1307
0
    return result;
1308
0
}
1309
1310
0
static int format_rec_atype(struct lens *l, char **buf, uint indent) {
1311
0
    int r;
1312
1313
0
    if (l->rec_internal) {
1314
0
        *buf = strdup("<<rec>>");
1315
0
        return (*buf == NULL) ? -1 : 0;
1316
0
    }
1317
1318
0
    char *c = NULL;
1319
0
    r = format_atype(l->body, &c, indent);
1320
0
    if (r < 0)
1321
0
        return -1;
1322
0
    r = xasprintf(buf, "<<rec:%s>>", c);
1323
0
    free(c);
1324
0
    return (r < 0) ? -1 : 0;
1325
0
}
1326
1327
0
static int format_atype(struct lens *l, char **buf, uint indent) {
1328
0
    *buf = NULL;
1329
1330
0
    switch(l->tag) {
1331
0
    case L_DEL:
1332
0
    case L_STORE:
1333
0
    case L_KEY:
1334
0
    case L_LABEL:
1335
0
    case L_VALUE:
1336
0
    case L_SEQ:
1337
0
    case L_COUNTER:
1338
0
        *buf = strdup("");
1339
0
        return (*buf == NULL) ? -1 : 0;
1340
0
        break;
1341
0
    case L_SUBTREE:
1342
0
        return format_subtree_atype(l, buf, indent);
1343
0
        break;
1344
0
    case L_STAR:
1345
0
        return format_rep_atype(l, buf, indent, '*');
1346
0
        break;
1347
0
    case L_MAYBE:
1348
0
        return format_rep_atype(l, buf, indent, '?');
1349
0
        break;
1350
0
    case L_CONCAT:
1351
0
        return format_concat_atype(l, buf, indent);
1352
0
        break;
1353
0
    case L_UNION:
1354
0
        return format_union_atype(l, buf, indent);
1355
0
        break;
1356
0
    case L_REC:
1357
0
        return format_rec_atype(l, buf, indent);
1358
0
        break;
1359
0
    case L_SQUARE:
1360
0
        return format_concat_atype(l->child, buf, indent);
1361
0
        break;
1362
0
    default:
1363
0
        BUG_LENS_TAG(l);
1364
0
        break;
1365
0
    };
1366
0
    return -1;
1367
0
}
1368
1369
0
int lns_format_atype(struct lens *l, char **buf) {
1370
0
    int r = 0;
1371
0
    r = format_atype(l, buf, 4);
1372
0
    return r;
1373
0
}
1374
1375
/*
1376
 * Recursive lenses
1377
 */
1378
0
struct value *lns_make_rec(struct info *info) {
1379
0
    struct lens *l = make_lens(L_REC, info);
1380
0
    l->recursive = 1;
1381
0
    l->rec_internal = 1;
1382
1383
0
    return make_lens_value(l);
1384
0
}
1385
1386
/* Transform a recursive lens into a recursive transition network
1387
 *
1388
 * First, we transform the lens into context free grammar, considering any
1389
 * nonrecursive lens as a terminal
1390
 *
1391
 * cfg: lens -> nonterminal -> production list
1392
 *
1393
 * cfg(primitive, N) -> N := regexp(primitive)
1394
 * cfg(l1 . l2, N)   -> N := N1 . N2 + cfg(l1, N1) + cfg(l2, N2)
1395
 * cfg(l1 | l2, N)   -> N := N1 | N2 + cfg(l1, N1) + cfg(l2, N2)
1396
 * cfg(l*, N)        -> N := N . N' | eps + cfg(l, N')
1397
 * cfg([ l ], N)     -> N := N' + cfg(l, N')
1398
 *
1399
 * We use the lenses as nonterminals themselves; this also means that our
1400
 * productions are normalized such that the RHS is either a terminal
1401
 * (regexp) or entirely consists of nonterminals
1402
 *
1403
 * In a few places, we need to know that a nonterminal corresponds to a
1404
 * subtree combinator ([ l ]); this is the main reason that the rule (cfg[
1405
 * l ], N) introduces a useless production N := N'.
1406
 *
1407
 * Computing the types for a recursive lens r is (fairly) straightforward,
1408
 * given the above grammar, which we convert to an automaton following
1409
 * http://arxiv.org/abs/cs/9910022; the only complication arises from the
1410
 * subtree combinator, since it can be used in recursive lenses to
1411
 * construct trees of arbitrary depth, but we need to approximate the types
1412
 * of r in a way that fits with our top-down tree automaton in put.c.
1413
 *
1414
 * To handle subtree combinators, remember that the type rules for a lens
1415
 * m = [ l ] are:
1416
 *
1417
 *   m.ktype = NULL
1418
 *   m.vtype = NULL
1419
 *   m.ctype = l.ctype
1420
 *   m.atype = enc(l.ktype, l.vtype)
1421
 *     ( enc is a function regexp -> regexp -> regexp)
1422
 *
1423
 * We compute types for r by modifying its automaton according to
1424
 * Nederhof's paper and reducing it to a regular expression of lenses. This
1425
 * has to happen in the following steps:
1426
 *   r.ktype : approximate by using [ .. ].ktype = NULL
1427
 *   r.vtype : same as r.ktype
1428
 *   r.ctype : approximate by treating [ l ] as l
1429
 *   r.atype : approximate by using r.ktype and r.vtype from above
1430
 *             in lens expressions [ f(r) ]
1431
 */
1432
1433
/* Transitions go to a state and are labeled with a lens. For epsilon
1434
 * transitions, lens may be NULL. When lens is a simple (nonrecursive
1435
 * lens), PROD will be NULL. When we modify the automaton to splice
1436
 * nonterminals in, we remember the production for the nonterminal in PROD.
1437
 */
1438
struct trans {
1439
    struct state  *to;
1440
    struct lens   *lens;
1441
    struct regexp *re;
1442
};
1443
1444
struct state {
1445
    struct state  *next;   /* Linked list for memory management */
1446
    size_t         ntrans;
1447
    struct trans  *trans;
1448
};
1449
1450
/* Productions for lens LENS. Start state START and end state END. If we
1451
   start with START, END is the only accepting state. */
1452
struct prod {
1453
    struct lens  *lens;
1454
    struct state *start;
1455
    struct state *end;
1456
};
1457
1458
/* A recursive transition network used to compute regular approximations
1459
 * to the types */
1460
struct rtn {
1461
    struct info *info;
1462
    size_t        nprod;
1463
    struct prod **prod;
1464
    struct state *states;  /* Linked list through next of all states in all
1465
                              prods; the states for each production are on
1466
                              the part of the list from prod->start to
1467
                              prod->end */
1468
    struct value *exn;
1469
    enum lens_type lens_type;
1470
    unsigned int check : 1;
1471
};
1472
1473
0
#define RTN_BAIL(rtn) if ((rtn)->exn != NULL ||                     \
1474
0
                          (rtn)->info->error->code != AUG_NOERROR)  \
1475
0
                         goto error;
1476
1477
0
static void free_prod(struct prod *prod) {
1478
0
    if (prod == NULL)
1479
0
        return;
1480
0
    unref(prod->lens, lens);
1481
0
    free(prod);
1482
0
}
1483
1484
0
static void free_rtn(struct rtn *rtn) {
1485
0
    if (rtn == NULL)
1486
0
        return;
1487
0
    for (int i=0; i < rtn->nprod; i++)
1488
0
        free_prod(rtn->prod[i]);
1489
0
    free(rtn->prod);
1490
0
    list_for_each(s, rtn->states) {
1491
0
        for (int i=0; i < s->ntrans; i++) {
1492
0
            unref(s->trans[i].lens, lens);
1493
0
            unref(s->trans[i].re, regexp);
1494
0
        }
1495
0
        free(s->trans);
1496
0
    }
1497
0
    list_free(rtn->states);
1498
0
    unref(rtn->info, info);
1499
0
    unref(rtn->exn, value);
1500
0
    free(rtn);
1501
0
}
1502
1503
0
static struct state *add_state(struct prod *prod) {
1504
0
    struct state *result = NULL;
1505
0
    int r;
1506
1507
0
    r = ALLOC(result);
1508
0
    ERR_NOMEM(r < 0, prod->lens->info);
1509
1510
0
    list_cons(prod->start->next, result);
1511
0
 error:
1512
0
    return result;
1513
0
}
1514
1515
static struct trans *add_trans(struct rtn *rtn, struct state *state,
1516
0
                               struct state *to, struct lens *l) {
1517
0
    int r;
1518
0
    struct trans *result = NULL;
1519
1520
0
    for (int i=0; i < state->ntrans; i++)
1521
0
        if (state->trans[i].to == to && state->trans[i].lens == l)
1522
0
            return state->trans + i;
1523
1524
0
    r = REALLOC_N(state->trans, state->ntrans+1);
1525
0
    ERR_NOMEM(r < 0, rtn->info);
1526
1527
0
    result = state->trans + state->ntrans;
1528
0
    state->ntrans += 1;
1529
1530
0
    MEMZERO(result, 1);
1531
0
    result->to = to;
1532
0
    if (l != NULL) {
1533
0
        result->lens = ref(l);
1534
0
        result->re = ref(ltype(l, rtn->lens_type));
1535
0
    }
1536
0
 error:
1537
0
    return result;
1538
0
}
1539
1540
0
static struct prod *make_prod(struct rtn *rtn, struct lens *l) {
1541
0
    struct prod *result = NULL;
1542
0
    int r;
1543
1544
0
    r = ALLOC(result);
1545
0
    ERR_NOMEM(r < 0, l->info);
1546
1547
0
    result->lens = ref(l);
1548
0
    r = ALLOC(result->start);
1549
0
    ERR_NOMEM(r < 0, l->info);
1550
1551
0
    result->end = add_state(result);
1552
0
    ERR_BAIL(l->info);
1553
1554
0
    result->end->next = rtn->states;
1555
0
    rtn->states = result->start;
1556
1557
0
    return result;
1558
0
 error:
1559
0
    free_prod(result);
1560
0
    return NULL;
1561
0
}
1562
1563
0
static struct prod *prod_for_lens(struct rtn *rtn, struct lens *l) {
1564
0
    if (l == NULL)
1565
0
        return NULL;
1566
0
    for (int i=0; i < rtn->nprod; i++) {
1567
0
        if (rtn->prod[i]->lens == l)
1568
0
            return rtn->prod[i];
1569
0
    }
1570
0
    return NULL;
1571
0
}
1572
1573
0
static void rtn_dot(struct rtn *rtn, const char *stage) {
1574
0
    FILE *fp;
1575
0
    int r = 0;
1576
1577
0
    fp = debug_fopen("rtn_%s_%s.dot", stage, lens_type_names[rtn->lens_type]);
1578
0
    if (fp == NULL)
1579
0
        return;
1580
1581
0
    fprintf(fp, "digraph \"l1\" {\n  rankdir=LR;\n");
1582
0
    list_for_each(s, rtn->states) {
1583
0
        char *label = NULL;
1584
0
        for (int p=0; p < rtn->nprod; p++) {
1585
0
            if (s == rtn->prod[p]->start) {
1586
0
                r = xasprintf(&label, "s%d", p);
1587
0
            } else if (s == rtn->prod[p]->end) {
1588
0
                r = xasprintf(&label, "e%d", p);
1589
0
            }
1590
0
            ERR_NOMEM(r < 0, rtn->info);
1591
0
        }
1592
0
        if (label == NULL) {
1593
0
            r = xasprintf(&label, "%p", s);
1594
0
            ERR_NOMEM(r < 0, rtn->info);
1595
0
        }
1596
0
        fprintf(fp, "  n%p [label = \"%s\"];\n", s, label == NULL ? "" : label);
1597
0
        FREE(label);
1598
0
        for (int i=0; i < s->ntrans; i++) {
1599
0
            fprintf(fp, "  n%p -> n%p", s, s->trans[i].to);
1600
0
            if (s->trans[i].re != NULL) {
1601
0
                label = regexp_escape(s->trans[i].re);
1602
0
                for (char *t = label; *t; t++)
1603
0
                    if (*t == '\\')
1604
0
                        *t = '~';
1605
0
                fprintf(fp, " [ label = \"%s\" ]", label);
1606
0
                FREE(label);
1607
0
            }
1608
0
            fprintf(fp, ";\n");
1609
0
        }
1610
0
    }
1611
0
 error:
1612
0
    fprintf(fp, "}\n");
1613
0
    fclose(fp);
1614
0
}
1615
1616
/* Add transitions to RTN corresponding to cfg(l, N) */
1617
0
static void rtn_rules(struct rtn *rtn, struct lens *l) {
1618
0
    if (! l->recursive)
1619
0
        return;
1620
1621
0
    struct prod *prod = prod_for_lens(rtn, l);
1622
0
    if (prod != NULL)
1623
0
        return;
1624
1625
0
    int r = REALLOC_N(rtn->prod, rtn->nprod+1);
1626
0
    ERR_NOMEM(r < 0, l->info);
1627
1628
0
    prod =  make_prod(rtn, l);
1629
0
    rtn->prod[rtn->nprod] = prod;
1630
0
    RTN_BAIL(rtn);
1631
0
    rtn->nprod += 1;
1632
1633
0
    struct state *start = prod->start;
1634
1635
0
    switch (l->tag) {
1636
0
    case L_UNION:
1637
        /* cfg(l1|..|ln, N) -> N := N1 | N2 | ... | Nn */
1638
0
        for (int i=0; i < l->nchildren; i++) {
1639
0
            add_trans(rtn, start, prod->end, l->children[i]);
1640
0
            RTN_BAIL(rtn);
1641
0
            rtn_rules(rtn, l->children[i]);
1642
0
            RTN_BAIL(rtn);
1643
0
        }
1644
0
        break;
1645
0
    case L_CONCAT:
1646
        /* cfg(l1 . l2 ... ln, N) -> N := N1 . N2 ... Nn */
1647
0
        for (int i=0; i < l->nchildren-1; i++) {
1648
0
            struct state *s = add_state(prod);
1649
0
            RTN_BAIL(rtn);
1650
0
            add_trans(rtn, start, s, l->children[i]);
1651
0
            RTN_BAIL(rtn);
1652
0
            start = s;
1653
0
            rtn_rules(rtn, l->children[i]);
1654
0
            RTN_BAIL(rtn);
1655
0
        }
1656
0
        {
1657
0
            struct lens *c = l->children[l->nchildren - 1];
1658
0
            add_trans(rtn, start, prod->end, c);
1659
0
            RTN_BAIL(rtn);
1660
0
            rtn_rules(rtn, c);
1661
0
            RTN_BAIL(rtn);
1662
0
        }
1663
0
        break;
1664
0
    case L_STAR: {
1665
        /* cfg(l*, N) -> N := N . N' | eps */
1666
0
        struct state *s = add_state(prod);
1667
0
        RTN_BAIL(rtn);
1668
0
        add_trans(rtn, start, s, l);
1669
0
        RTN_BAIL(rtn);
1670
0
        add_trans(rtn, s, prod->end, l->child);
1671
0
        RTN_BAIL(rtn);
1672
0
        add_trans(rtn, start, prod->end, NULL);
1673
0
        RTN_BAIL(rtn);
1674
0
        rtn_rules(rtn, l->child);
1675
0
        RTN_BAIL(rtn);
1676
0
        break;
1677
0
    }
1678
0
    case L_SUBTREE:
1679
0
        switch (rtn->lens_type) {
1680
0
        case KTYPE:
1681
0
        case VTYPE:
1682
            /* cfg([ l ], N) -> N := eps */
1683
0
            add_trans(rtn, start, prod->end, NULL);
1684
0
            break;
1685
0
        case CTYPE:
1686
            /* cfg([ l ], N) -> N := N' plus cfg(l, N') */
1687
0
            add_trans(rtn, start, prod->end, l->child);
1688
0
            RTN_BAIL(rtn);
1689
0
            rtn_rules(rtn, l->child);
1690
0
            RTN_BAIL(rtn);
1691
0
            break;
1692
0
        case ATYPE: {
1693
            /* At this point, we have propagated ktype and vtype */
1694
            /* cfg([ l ], N) -> N := enc(l->ktype, l->vtype) */
1695
0
            struct trans *t = add_trans(rtn, start, prod->end, NULL);
1696
0
            RTN_BAIL(rtn);
1697
0
            t->re = subtree_atype(l->info, l->child->ktype, l->child->vtype);
1698
0
            break;
1699
0
        }
1700
0
        default:
1701
0
            BUG_ON(true, rtn->info, "Unexpected lens type %d", rtn->lens_type);
1702
0
            break;
1703
0
        }
1704
0
        break;
1705
0
    case L_MAYBE:
1706
        /* cfg(l?, N) -> N := N' | eps plus cfg(l, N') */
1707
0
        add_trans(rtn, start, prod->end, l->child);
1708
0
        RTN_BAIL(rtn);
1709
0
        add_trans(rtn, start, prod->end, NULL);
1710
0
        RTN_BAIL(rtn);
1711
0
        rtn_rules(rtn, l->child);
1712
0
        RTN_BAIL(rtn);
1713
0
        break;
1714
0
    case L_REC:
1715
        /* cfg(l, N) -> N := N' plus cfg(l->body, N') */
1716
0
        add_trans(rtn, start, prod->end, l->body);
1717
0
        RTN_BAIL(rtn);
1718
0
        rtn_rules(rtn, l->body);
1719
0
        RTN_BAIL(rtn);
1720
0
        break;
1721
0
    case L_SQUARE:
1722
0
        add_trans(rtn, start, prod->end, l->child);
1723
0
        RTN_BAIL(rtn);
1724
0
        break;
1725
0
    default:
1726
0
        BUG_LENS_TAG(l);
1727
0
        break;
1728
0
    }
1729
0
 error:
1730
0
    return;
1731
0
}
1732
1733
/* Replace transition t with two epsilon transitions s => p->start and
1734
 * p->end => s->trans[i].to where s is the start of t. Instead of adding
1735
 * epsilon transitions, we expand the epsilon transitions.
1736
 */
1737
static void prod_splice(struct rtn *rtn,
1738
0
                        struct prod *from, struct prod *to, struct trans *t) {
1739
1740
0
    add_trans(rtn, to->end, t->to, NULL);
1741
0
    ERR_BAIL(from->lens->info);
1742
0
    t->to = to->start;
1743
0
    unref(t->re, regexp);
1744
1745
0
 error:
1746
0
    return;
1747
0
}
1748
1749
0
static void rtn_splice(struct rtn *rtn, struct prod *prod) {
1750
0
    for (struct state *s = prod->start; s != prod->end; s = s->next) {
1751
0
        for (int i=0; i < s->ntrans; i++) {
1752
0
            struct prod *p = prod_for_lens(rtn, s->trans[i].lens);
1753
0
            if (p != NULL) {
1754
0
                prod_splice(rtn, prod, p, s->trans+i);
1755
0
                RTN_BAIL(rtn);
1756
0
            }
1757
0
        }
1758
0
    }
1759
0
 error:
1760
0
    return;
1761
0
}
1762
1763
0
static struct rtn *rtn_build(struct lens *rec, enum lens_type lt) {
1764
0
    int r;
1765
0
    struct rtn *rtn;
1766
1767
0
    r = ALLOC(rtn);
1768
0
    ERR_NOMEM(r < 0, rec->info);
1769
1770
0
    rtn->info = ref(rec->info);
1771
0
    rtn->lens_type = lt;
1772
1773
0
    rtn_rules(rtn, rec);
1774
0
    RTN_BAIL(rtn);
1775
0
    if (debugging("cf.approx"))
1776
0
        rtn_dot(rtn, "10-rules");
1777
1778
0
    for (int i=0; i < rtn->nprod; i++) {
1779
0
        rtn_splice(rtn, rtn->prod[i]);
1780
0
        RTN_BAIL(rtn);
1781
0
    }
1782
0
    if (debugging("cf.approx"))
1783
0
        rtn_dot(rtn, "11-splice");
1784
1785
0
 error:
1786
0
    return rtn;
1787
0
}
1788
1789
/* Compare transitions lexicographically by (to, lens) */
1790
0
static int trans_to_cmp(const void *v1, const void *v2) {
1791
0
    const struct trans *t1 = v1;
1792
0
    const struct trans *t2 = v2;
1793
1794
0
    if (t1->to != t2->to)
1795
0
        return (t1->to < t2->to) ? -1 : 1;
1796
1797
0
    if (t1->lens == t2->lens)
1798
0
        return 0;
1799
0
    return (t1->lens < t2->lens) ? -1 : 1;
1800
0
}
1801
1802
/* Collapse a transition S1 -> S -> S2 by adding a transition S1 -> S2 with
1803
 * lens R1 . (LOOP)* . R2 | R3 where R3 is the regexp on the possibly
1804
 * existing transition S1 -> S2. If LOOP is NULL or R3 does not exist,
1805
 * label the transition with a simplified regexp by treating NULL as
1806
 * epsilon */
1807
static void collapse_trans(struct rtn *rtn,
1808
                           struct state *s1, struct state *s2,
1809
                           struct regexp *r1, struct regexp *loop,
1810
0
                           struct regexp *r2) {
1811
1812
0
    struct trans *t = NULL;
1813
0
    struct regexp *r = NULL;
1814
1815
0
    for (int i=0; i < s1->ntrans; i++) {
1816
0
        if (s1->trans[i].to == s2) {
1817
0
            t = s1->trans + i;
1818
0
            break;
1819
0
        }
1820
0
    }
1821
1822
    /* Set R = R1 . (LOOP)* . R2, treating NULL's as epsilon */
1823
0
    if (loop == NULL) {
1824
0
        if (r1 == NULL)
1825
0
            r = ref(r2);
1826
0
        else if (r2 == NULL)
1827
0
            r = ref(r1);
1828
0
        else
1829
0
            r = regexp_concat(rtn->info, r1, r2);
1830
0
    } else {
1831
0
        struct regexp *s = regexp_iter(rtn->info, loop, 0, -1);
1832
0
        ERR_NOMEM(s == NULL, rtn->info);
1833
0
        struct regexp *c = NULL;
1834
0
        if (r1 == NULL) {
1835
0
            c = s;
1836
0
            s = NULL;
1837
0
        } else {
1838
0
            c = regexp_concat(rtn->info, r1, s);
1839
0
            unref(s, regexp);
1840
0
            ERR_NOMEM(c == NULL, rtn->info);
1841
0
        }
1842
0
        if (r2 == NULL) {
1843
0
            r = c;
1844
0
            c = NULL;
1845
0
        } else {
1846
0
            r = regexp_concat(rtn->info, c, r2);
1847
0
            unref(c, regexp);
1848
0
            ERR_NOMEM(r == NULL, rtn->info);
1849
0
        }
1850
0
    }
1851
1852
0
    if (t == NULL) {
1853
0
        t = add_trans(rtn, s1, s2, NULL);
1854
0
        ERR_NOMEM(t == NULL, rtn->info);
1855
0
        t->re = r;
1856
0
    } else if (t->re == NULL) {
1857
0
        if (r == NULL || regexp_matches_empty(r))
1858
0
            t->re = r;
1859
0
        else {
1860
0
            t->re = regexp_maybe(rtn->info, r);
1861
0
            unref(r, regexp);
1862
0
            ERR_NOMEM(t->re == NULL, rtn->info);
1863
0
        }
1864
0
    } else if (r == NULL) {
1865
0
        if (!regexp_matches_empty(t->re)) {
1866
0
            r = regexp_maybe(rtn->info, t->re);
1867
0
            unref(t->re, regexp);
1868
0
            t->re = r;
1869
0
            ERR_NOMEM(r == NULL, rtn->info);
1870
0
        }
1871
0
    } else {
1872
0
        struct regexp *u = regexp_union(rtn->info, r, t->re);
1873
0
        unref(r, regexp);
1874
0
        unref(t->re, regexp);
1875
0
        t->re = u;
1876
0
        ERR_NOMEM(u == NULL, rtn->info);
1877
0
    }
1878
1879
0
    return;
1880
0
 error:
1881
0
    rtn->exn = rtn->info->error->exn;
1882
0
    return;
1883
0
}
1884
1885
/* Reduce the automaton with start state rprod->start and only accepting
1886
 * state rprod->end so that we have a single transition rprod->start =>
1887
 * rprod->end labelled with the overall approximating regexp for the
1888
 * automaton.
1889
 *
1890
 * This is the same algorithm as fa_as_regexp in fa.c
1891
 */
1892
0
static struct regexp *rtn_reduce(struct rtn *rtn, struct lens *rec) {
1893
0
    struct prod *prod = prod_for_lens(rtn, rec);
1894
0
    int r;
1895
1896
0
    ERR_THROW(prod == NULL, rtn->info, AUG_EINTERNAL,
1897
0
              "No production for recursive lens");
1898
1899
    /* Eliminate epsilon transitions and turn transitions between the same
1900
     * two states into a regexp union */
1901
0
    list_for_each(s, rtn->states) {
1902
0
        qsort(s->trans, s->ntrans, sizeof(*s->trans), trans_to_cmp);
1903
0
        for (int i=0; i < s->ntrans; i++) {
1904
0
            int j = i+1;
1905
0
            for (;j < s->ntrans && s->trans[i].to == s->trans[j].to;
1906
0
                 j++);
1907
0
            if (j > i+1) {
1908
0
                struct regexp *u, **v;
1909
0
                r = ALLOC_N(v, j - i);
1910
0
                ERR_NOMEM(r < 0, rtn->info);
1911
0
                for (int k=i; k < j; k++)
1912
0
                    v[k-i] = s->trans[k].re;
1913
0
                u = regexp_union_n(rtn->info, j - i, v);
1914
0
                if (u == NULL) {
1915
                    // FIXME: The calling convention for regexp_union_n
1916
                    // is bad, since we can't distinguish between alloc
1917
                    // failure and unioning all NULL's
1918
0
                    for (int k=0; k < j-i; k++)
1919
0
                        if (v[k] != NULL) {
1920
0
                            FREE(v);
1921
0
                            ERR_NOMEM(true, rtn->info);
1922
0
                        }
1923
0
                }
1924
0
                FREE(v);
1925
0
                for (int k=i; k < j; k++) {
1926
0
                    unref(s->trans[k].lens, lens);
1927
0
                    unref(s->trans[k].re, regexp);
1928
0
                }
1929
0
                s->trans[i].re = u;
1930
0
                MEMMOVE(s->trans + (i+1),
1931
0
                        s->trans + j,
1932
0
                        s->ntrans - j);
1933
0
                s->ntrans -= j - (i + 1);
1934
0
            }
1935
0
        }
1936
0
    }
1937
1938
    /* Introduce new start and end states with epsilon transitions to/from
1939
     * the old start and end states */
1940
0
    struct state *end = NULL;
1941
0
    struct state *start = NULL;
1942
0
    if (ALLOC(start) < 0 || ALLOC(end) < 0) {
1943
0
        FREE(start);
1944
0
        FREE(end);
1945
0
        ERR_NOMEM(true, rtn->info);
1946
0
    }
1947
0
    list_insert_before(start, prod->start, rtn->states);
1948
0
    end->next = prod->end->next;
1949
0
    prod->end->next = end;
1950
1951
0
    add_trans(rtn, start, prod->start, NULL);
1952
0
    RTN_BAIL(rtn);
1953
0
    add_trans(rtn, prod->end, end, NULL);
1954
0
    RTN_BAIL(rtn);
1955
1956
0
    prod->start = start;
1957
0
    prod->end = end;
1958
1959
    /* Eliminate states S (except for INI and FIN) one by one:
1960
     *     Let LOOP the regexp for the transition S -> S if it exists, epsilon
1961
     *     otherwise.
1962
     *     For all S1, S2 different from S with S1 -> S -> S2
1963
     *       Let R1 the regexp of S1 -> S
1964
     *           R2 the regexp of S -> S2
1965
     *           R3 the regexp of S1 -> S2 (or the regexp matching nothing
1966
     *                                      if no such transition)
1967
     *        set the regexp on the transition S1 -> S2 to
1968
     *          R1 . (LOOP)* . R2 | R3 */
1969
    // FIXME: This does not go over all states
1970
0
    list_for_each(s, rtn->states) {
1971
0
        if (s == prod->end || s == prod->start)
1972
0
            continue;
1973
0
        struct regexp *loop = NULL;
1974
0
        for (int i=0; i < s->ntrans; i++) {
1975
0
            if (s == s->trans[i].to) {
1976
0
                ensure(loop == NULL, rtn->info);
1977
0
                loop = s->trans[i].re;
1978
0
            }
1979
0
        }
1980
0
        list_for_each(s1, rtn->states) {
1981
0
            if (s == s1)
1982
0
                continue;
1983
0
            for (int t1=0; t1 < s1->ntrans; t1++) {
1984
0
                if (s == s1->trans[t1].to) {
1985
0
                    for (int t2=0; t2 < s->ntrans; t2++) {
1986
0
                        struct state *s2 = s->trans[t2].to;
1987
0
                        if (s2 == s)
1988
0
                            continue;
1989
0
                        collapse_trans(rtn, s1, s2,
1990
0
                                       s1->trans[t1].re, loop,
1991
0
                                       s->trans[t2].re);
1992
0
                        RTN_BAIL(rtn);
1993
0
                    }
1994
0
                }
1995
0
            }
1996
0
        }
1997
0
    }
1998
1999
    /* Find the overall regexp */
2000
0
    struct regexp *result = NULL;
2001
0
    for (int i=0; i < prod->start->ntrans; i++) {
2002
0
        if (prod->start->trans[i].to == prod->end) {
2003
0
            ensure(result == NULL, rtn->info);
2004
0
            result = ref(prod->start->trans[i].re);
2005
0
        }
2006
0
    }
2007
0
    return result;
2008
0
 error:
2009
0
    return NULL;
2010
0
}
2011
2012
0
static void propagate_type(struct lens *l, enum lens_type lt) {
2013
0
    struct regexp **types = NULL;
2014
0
    int r;
2015
2016
0
    if (! l->recursive || ltype(l, lt) != NULL)
2017
0
        return;
2018
2019
0
    switch(l->tag) {
2020
0
    case L_CONCAT:
2021
0
        r = ALLOC_N(types, l->nchildren);
2022
0
        ERR_NOMEM(r < 0, l->info);
2023
0
        for (int i=0; i < l->nchildren; i++) {
2024
0
            propagate_type(l->children[i], lt);
2025
0
            types[i] = ltype(l->children[i], lt);
2026
0
        }
2027
0
        ltype(l, lt) = regexp_concat_n(l->info, l->nchildren, types);
2028
0
        FREE(types);
2029
0
        break;
2030
0
    case L_UNION:
2031
0
        r = ALLOC_N(types, l->nchildren);
2032
0
        ERR_NOMEM(r < 0, l->info);
2033
0
        for (int i=0; i < l->nchildren; i++) {
2034
0
            propagate_type(l->children[i], lt);
2035
0
            types[i] = ltype(l->children[i], lt);
2036
0
        }
2037
0
        ltype(l, lt) = regexp_union_n(l->info, l->nchildren, types);
2038
0
        FREE(types);
2039
0
        break;
2040
0
    case L_SUBTREE:
2041
0
        propagate_type(l->child, lt);
2042
0
        if (lt == ATYPE)
2043
0
            l->atype = subtree_atype(l->info, l->child->ktype, l->child->vtype);
2044
0
        if (lt == CTYPE)
2045
0
            l->ctype = ref(l->child->ctype);
2046
0
        break;
2047
0
    case L_STAR:
2048
0
        propagate_type(l->child, lt);
2049
0
        ltype(l, lt) = regexp_iter(l->info, ltype(l->child, lt), 0, -1);
2050
0
        break;
2051
0
    case L_MAYBE:
2052
0
        propagate_type(l->child, lt);
2053
0
        ltype(l, lt) = regexp_maybe(l->info, ltype(l->child, lt));
2054
0
        break;
2055
0
    case L_REC:
2056
        /* Nothing to do */
2057
0
        break;
2058
0
    case L_SQUARE:
2059
0
        propagate_type(l->child, lt);
2060
0
        ltype(l, lt) = ref(ltype(l->child, lt));
2061
0
        break;
2062
0
    default:
2063
0
        BUG_LENS_TAG(l);
2064
0
        break;
2065
0
    }
2066
2067
0
 error:
2068
0
    FREE(types);
2069
0
}
2070
2071
static struct value *typecheck(struct lens *l, int check);
2072
2073
typedef struct value *typecheck_n_make(struct info *,
2074
                                       struct lens *, struct lens *, int);
2075
2076
0
static struct info *merge_info(struct info *i1, struct info *i2) {
2077
0
    struct info *info;
2078
0
    make_ref(info);
2079
0
    ERR_NOMEM(info == NULL, i1);
2080
2081
0
    info->filename = ref(i1->filename);
2082
0
    info->first_line = i1->first_line;
2083
0
    info->first_column = i1->first_column;
2084
0
    info->last_line    = i2->last_line;
2085
0
    info->last_column  = i2->last_column;
2086
0
    info->error        = i1->error;
2087
0
    return info;
2088
2089
0
 error:
2090
0
    unref(info, info);
2091
0
    return NULL;
2092
0
}
2093
2094
static struct value *typecheck_n(struct lens *l,
2095
0
                                 typecheck_n_make *make, int check) {
2096
0
    struct value *exn = NULL;
2097
0
    struct lens *acc = NULL;
2098
2099
0
    ensure(l->tag == L_CONCAT || l->tag == L_UNION, l->info);
2100
0
    for (int i=0; i < l->nchildren; i++) {
2101
0
        exn = typecheck(l->children[i], check);
2102
0
        if (exn != NULL)
2103
0
            goto error;
2104
0
    }
2105
0
    acc = ref(l->children[0]);
2106
0
    for (int i=1; i < l->nchildren; i++) {
2107
0
        struct info *info = merge_info(acc->info, l->children[i]->info);
2108
0
        ERR_NOMEM(info == NULL, acc->info);
2109
0
        exn = (*make)(info, acc, ref(l->children[i]), check);
2110
0
        if (EXN(exn))
2111
0
            goto error;
2112
0
        ensure(exn->tag == V_LENS, l->info);
2113
0
        acc = ref(exn->lens);
2114
0
        unref(exn, value);
2115
0
    }
2116
0
    l->value = acc->value;
2117
0
    l->key = acc->key;
2118
0
 error:
2119
0
    unref(acc, lens);
2120
0
    return exn;
2121
0
}
2122
2123
0
static struct value *typecheck(struct lens *l, int check) {
2124
0
    struct value *exn = NULL;
2125
2126
    /* Nonrecursive lenses are typechecked at build time */
2127
0
    if (! l->recursive)
2128
0
        return NULL;
2129
2130
0
    switch(l->tag) {
2131
0
    case L_CONCAT:
2132
0
        exn = typecheck_n(l, lns_make_concat, check);
2133
0
        break;
2134
0
    case L_UNION:
2135
0
        exn = typecheck_n(l, lns_make_union, check);
2136
0
        break;
2137
0
    case L_SUBTREE:
2138
0
    case L_SQUARE:
2139
0
        exn = typecheck(l->child, check);
2140
0
        break;
2141
0
    case L_STAR:
2142
0
        if (check)
2143
0
            exn = typecheck_iter(l->info, l->child);
2144
0
        if (exn == NULL && l->value)
2145
0
            exn = make_exn_value(l->info, "Multiple stores in iteration");
2146
0
        if (exn == NULL && l->key)
2147
0
            exn = make_exn_value(l->info, "Multiple keys/labels in iteration");
2148
0
        break;
2149
0
    case L_MAYBE:
2150
0
        if (check)
2151
0
            exn = typecheck_maybe(l->info, l->child);
2152
0
        l->key = l->child->key;
2153
0
        l->value = l->child->value;
2154
0
        break;
2155
0
    case L_REC:
2156
        /* Nothing to do */
2157
0
        break;
2158
0
    default:
2159
0
        BUG_LENS_TAG(l);
2160
0
        break;
2161
0
    }
2162
2163
0
    return exn;
2164
0
}
2165
2166
0
static struct value *rtn_approx(struct lens *rec, enum lens_type lt) {
2167
0
    struct rtn *rtn = NULL;
2168
0
    struct value *result = NULL;
2169
2170
0
    rtn = rtn_build(rec, lt);
2171
0
    RTN_BAIL(rtn);
2172
0
    ltype(rec, lt) = rtn_reduce(rtn, rec);
2173
0
    RTN_BAIL(rtn);
2174
0
    if (debugging("cf.approx"))
2175
0
        rtn_dot(rtn, "50-reduce");
2176
2177
0
    propagate_type(rec->body, lt);
2178
0
    ERR_BAIL(rec->info);
2179
2180
0
 done:
2181
0
    free_rtn(rtn);
2182
2183
0
    if (debugging("cf.approx")) {
2184
0
        printf("approx %s  => ", lens_type_names[lt]);
2185
0
        print_regexp(stdout, ltype(rec, lt));
2186
0
        printf("\n");
2187
0
    }
2188
2189
0
    return result;
2190
0
 error:
2191
0
    if (rtn->exn == NULL)
2192
0
        result = rec->info->error->exn;
2193
0
    else
2194
0
        result = ref(rtn->exn);
2195
0
    goto done;
2196
0
}
2197
2198
static struct value *
2199
exn_multiple_epsilons(struct lens *lens,
2200
0
                      struct lens *l1, struct lens *l2) {
2201
0
    char *fi = NULL;
2202
0
    struct value *exn = NULL;
2203
2204
0
    exn = make_exn_value(ref(lens->info),
2205
0
                         "more than one nullable branch in a union");
2206
0
    fi = format_info(l1->info);
2207
0
    exn_printf_line(exn, "First nullable lens: %s", fi);
2208
0
    FREE(fi);
2209
2210
0
    fi = format_info(l2->info);
2211
0
    exn_printf_line(exn, "Second nullable lens: %s", fi);
2212
0
    FREE(fi);
2213
2214
0
    return exn;
2215
0
}
2216
2217
/* Update lens->ctype_nullable and return 1 if there was a change,
2218
 * 0 if there was none */
2219
0
static int ctype_nullable(struct lens *lens, struct value **exn) {
2220
0
    int nullable = 0;
2221
0
    int ret = 0;
2222
0
    struct lens *null_lens = NULL;
2223
2224
0
    if (! lens->recursive)
2225
0
        return 0;
2226
2227
0
    switch(lens->tag) {
2228
0
    case L_CONCAT:
2229
0
        nullable = 1;
2230
0
        for (int i=0; i < lens->nchildren; i++) {
2231
0
            if (ctype_nullable(lens->children[i], exn))
2232
0
                ret = 1;
2233
0
            if (! lens->children[i]->ctype_nullable)
2234
0
                nullable = 0;
2235
0
        }
2236
0
        break;
2237
0
    case L_UNION:
2238
0
        for (int i=0; i < lens->nchildren; i++) {
2239
0
            if (ctype_nullable(lens->children[i], exn))
2240
0
                ret = 1;
2241
0
            if (lens->children[i]->ctype_nullable) {
2242
0
                if (nullable) {
2243
0
                    *exn = exn_multiple_epsilons(lens, null_lens,
2244
0
                                                 lens->children[i]);
2245
0
                    return 0;
2246
0
                }
2247
0
                nullable = 1;
2248
0
                null_lens = lens->children[i];
2249
0
            }
2250
0
        }
2251
0
        break;
2252
0
    case L_SUBTREE:
2253
0
    case L_SQUARE:
2254
0
        ret = ctype_nullable(lens->child, exn);
2255
0
        nullable = lens->child->ctype_nullable;
2256
0
        break;
2257
0
    case L_STAR:
2258
0
    case L_MAYBE:
2259
0
        nullable = 1;
2260
0
        break;
2261
0
    case L_REC:
2262
0
        nullable = lens->body->ctype_nullable;
2263
0
        break;
2264
0
    default:
2265
0
        BUG_LENS_TAG(lens);
2266
0
        break;
2267
0
    }
2268
0
    if (*exn != NULL)
2269
0
        return 0;
2270
0
    if (nullable != lens->ctype_nullable) {
2271
0
        ret = 1;
2272
0
        lens->ctype_nullable = nullable;
2273
0
    }
2274
0
    return ret;
2275
0
}
2276
2277
struct value *lns_check_rec(struct info *info,
2278
                            struct lens *body, struct lens *rec,
2279
0
                            int check) {
2280
    /* The types in the order of approximation */
2281
0
    static const enum lens_type types[] = { KTYPE, VTYPE, ATYPE };
2282
0
    struct value *result = NULL;
2283
2284
0
    ensure(rec->tag == L_REC, info);
2285
0
    ensure(rec->rec_internal, info);
2286
2287
    /* The user might have written down a regular lens with 'let rec' */
2288
0
    if (! body->recursive) {
2289
0
        result = make_lens_value(ref(body));
2290
0
        ERR_NOMEM(result == NULL, info);
2291
0
        return result;
2292
0
    }
2293
2294
    /* To help memory management, we avoid the cycle inherent ina recursive
2295
     * lens by using two instances of an L_REC lens. One is marked with
2296
     * rec_internal, and used inside the body of the lens. The other is the
2297
     * "toplevel" which receives external references.
2298
     *
2299
     * The internal instance of the recursive lens is REC, the external one
2300
     * is TOP, constructed below
2301
     */
2302
0
    rec->body = body;                          /* REC does not own BODY */
2303
2304
0
    for (int i=0; i < ARRAY_CARDINALITY(types); i++) {
2305
0
        result = rtn_approx(rec, types[i]);
2306
0
        ERR_BAIL(info);
2307
0
    }
2308
2309
0
    if (rec->atype == NULL) {
2310
0
        result = make_exn_value(ref(rec->info),
2311
0
        "recursive lens generates the empty language for its %s",
2312
0
         rec->ctype == NULL ? "ctype" : "atype");
2313
0
        goto error;
2314
0
    }
2315
2316
0
    rec->key = rec->body->key;
2317
0
    rec->value = rec->body->value;
2318
0
    rec->consumes_value = rec->body->consumes_value;
2319
2320
0
    while(ctype_nullable(rec->body, &result));
2321
0
    if (result != NULL)
2322
0
        goto error;
2323
0
    rec->ctype_nullable = rec->body->ctype_nullable;
2324
2325
0
    result = typecheck(rec->body, check);
2326
0
    if (result != NULL)
2327
0
        goto error;
2328
2329
0
    result = lns_make_rec(ref(rec->info));
2330
0
    struct lens *top = result->lens;
2331
0
    for (int t=0; t < ntypes; t++)
2332
0
        ltype(top, t) = ref(ltype(rec, t));
2333
0
    top->value = rec->value;
2334
0
    top->key = rec->key;
2335
0
    top->consumes_value = rec->consumes_value;
2336
0
    top->ctype_nullable = rec->ctype_nullable;
2337
0
    top->body = ref(body);
2338
0
    top->alias = rec;
2339
0
    top->rec_internal = 0;
2340
0
    rec->alias = top;
2341
2342
0
    top->jmt = jmt_build(top);
2343
0
    ERR_BAIL(info);
2344
2345
0
    return result;
2346
0
 error:
2347
0
    if (result != NULL && result->tag != V_EXN)
2348
0
        unref(result, value);
2349
0
    if (result == NULL)
2350
0
        result = info->error->exn;
2351
0
    return result;
2352
0
}
2353
2354
#if ENABLE_DEBUG
2355
0
void dump_lens_tree(struct lens *lens){
2356
0
    static int count = 0;
2357
0
    FILE *fp;
2358
2359
0
    fp = debug_fopen("lens_%02d_%s.dot", count++, ltag(lens));
2360
0
    if (fp == NULL)
2361
0
        return;
2362
2363
0
    fprintf(fp, "digraph \"%s\" {\n", "lens");
2364
0
    dump_lens(fp, lens);
2365
0
    fprintf(fp, "}\n");
2366
2367
0
    fclose(fp);
2368
0
}
2369
2370
0
void dump_lens(FILE *out, struct lens *lens){
2371
0
    int i = 0;
2372
0
    struct regexp *re;
2373
2374
0
    fprintf(out, "\"%p\" [ shape = box, label = \"%s\\n", lens, ltag(lens));
2375
2376
0
    for (int t=0; t < ntypes; t++) {
2377
0
        re = ltype(lens, t);
2378
0
        if (re == NULL)
2379
0
            continue;
2380
0
        fprintf(out, "%s=",lens_type_names[t]);
2381
0
        print_regexp(out, re);
2382
0
        fprintf(out, "\\n");
2383
0
    }
2384
2385
0
    fprintf(out, "recursive=%x\\n", lens->recursive);
2386
0
    fprintf(out, "rec_internal=%x\\n", lens->rec_internal);
2387
0
    fprintf(out, "consumes_value=%x\\n", lens->consumes_value);
2388
0
    fprintf(out, "ctype_nullable=%x\\n", lens->ctype_nullable);
2389
0
    fprintf(out, "\"];\n");
2390
0
    switch(lens->tag){
2391
0
    case L_DEL:
2392
0
        break;
2393
0
    case L_STORE:
2394
0
        break;
2395
0
    case L_VALUE:
2396
0
        break;
2397
0
    case L_KEY:
2398
0
        break;
2399
0
    case L_LABEL:
2400
0
        break;
2401
0
    case L_SEQ:
2402
0
        break;
2403
0
    case L_COUNTER:
2404
0
        break;
2405
0
    case L_CONCAT:
2406
0
        for(i = 0; i<lens->nchildren;i++){
2407
0
            fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2408
0
            dump_lens(out, lens->children[i]);
2409
0
        }
2410
0
        break;
2411
0
    case L_UNION:
2412
0
        for(i = 0; i<lens->nchildren;i++){
2413
0
            fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->children[i]);
2414
0
            dump_lens(out, lens->children[i]);
2415
0
        }
2416
0
        break;
2417
0
    case L_SUBTREE:
2418
0
        fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2419
0
        dump_lens(out, lens->child);
2420
0
        break;
2421
0
    case L_STAR:
2422
0
        fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2423
0
        dump_lens(out, lens->child);
2424
2425
0
        break;
2426
0
    case L_MAYBE:
2427
0
        fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2428
0
        dump_lens(out, lens->child);
2429
2430
0
        break;
2431
0
    case L_REC:
2432
0
        if (lens->rec_internal == 0){
2433
0
            fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2434
0
            dump_lens(out, lens->body);
2435
0
        }
2436
0
        break;
2437
0
    case L_SQUARE:
2438
0
        fprintf(out, "\"%p\" -> \"%p\"\n", lens, lens->child);
2439
0
        dump_lens(out, lens->child);
2440
0
        break;
2441
0
    default:
2442
0
        fprintf(out, "ERROR\n");
2443
0
        break;
2444
0
    }
2445
0
}
2446
#endif
2447
2448
/*
2449
 * Local variables:
2450
 *  indent-tabs-mode: nil
2451
 *  c-indent-level: 4
2452
 *  c-basic-offset: 4
2453
 *  tab-width: 4
2454
 * End:
2455
 */