Coverage Report

Created: 2023-09-23 08:17

/src/augeas/src/pathx.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * pathx.c: handling path expressions
3
 *
4
 * Copyright (C) 2007-2016 David Lutterkort
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
19
 *
20
 * Author: David Lutterkort <dlutter@redhat.com>
21
 */
22
23
#include <config.h>
24
#include <internal.h>
25
#include <stdint.h>
26
#include <stdbool.h>
27
#include <memory.h>
28
#include <ctype.h>
29
30
#include "ref.h"
31
#include "regexp.h"
32
#include "errcode.h"
33
34
static const char *const errcodes[] = {
35
    "no error",
36
    "empty name",
37
    "illegal string literal",
38
    "illegal number",                             /* PATHX_ENUMBER */
39
    "string missing ending ' or \"",
40
    "expected '='",
41
    "allocation failed",
42
    "unmatched '['",
43
    "unmatched '('",
44
    "expected a '/'",
45
    "internal error",                             /* PATHX_EINTERNAL */
46
    "type error",                                 /* PATHX_ETYPE */
47
    "undefined variable",                         /* PATHX_ENOVAR */
48
    "garbage at end of path expression",          /* PATHX_EEND */
49
    "no match for path expression",               /* PATHX_ENOMATCH */
50
    "wrong number of arguments in function call", /* PATHX_EARITY */
51
    "invalid regular expression",                 /* PATHX_EREGEXP */
52
    "too many matches",                           /* PATHX_EMMATCH */
53
    "wrong flag for regexp"                       /* PATHX_EREGEXPFLAG */
54
};
55
56
/*
57
 * Path expressions are strings that use a notation modelled on XPath.
58
 */
59
60
enum type {
61
    T_NONE = 0,     /* Not a type */
62
    T_NODESET,
63
    T_BOOLEAN,
64
    T_NUMBER,
65
    T_STRING,
66
    T_REGEXP
67
};
68
69
enum expr_tag {
70
    E_FILTER,
71
    E_BINARY,
72
    E_VALUE,
73
    E_VAR,
74
    E_APP
75
};
76
77
enum binary_op {
78
    OP_EQ,         /* '='  */
79
    OP_NEQ,        /* '!=' */
80
    OP_LT,         /* '<'  */
81
    OP_LE,         /* '<=' */
82
    OP_GT,         /* '>'  */
83
    OP_GE,         /* '>=' */
84
    OP_PLUS,       /* '+'  */
85
    OP_MINUS,      /* '-'  */
86
    OP_STAR,       /* '*'  */
87
    OP_AND,        /* 'and' */
88
    OP_OR,         /* 'or' */
89
    OP_ELSE,       /* 'else' */
90
    OP_RE_MATCH,   /* '=~' */
91
    OP_RE_NOMATCH, /* '!~' */
92
    OP_UNION       /* '|' */
93
};
94
95
struct pred {
96
    int               nexpr;
97
    struct expr     **exprs;
98
};
99
100
enum axis {
101
    SELF,
102
    CHILD,
103
    SEQ,
104
    DESCENDANT,
105
    DESCENDANT_OR_SELF,
106
    PARENT,
107
    ANCESTOR,
108
    ROOT,
109
    PRECEDING_SIBLING,
110
    FOLLOWING_SIBLING
111
};
112
113
/* This array is indexed by enum axis */
114
static const char *const axis_names[] = {
115
    "self",
116
    "child",
117
    "seq",        /* Like child, but only selects node-names which are integers */
118
    "descendant",
119
    "descendant-or-self",
120
    "parent",
121
    "ancestor",
122
    "root",
123
    "preceding-sibling",
124
    "following-sibling"
125
};
126
127
/* The characters that can follow a name in a location expression (aka path)
128
 * The parser will assume that name (path component) is finished when it
129
 * encounters any of these characters, unless they are escaped by preceding
130
 * them with a '\\'.
131
 *
132
 * See parse_name for the gory details
133
 */
134
static const char name_follow[] = "][|/=()!,";
135
136
/* Doubly linked list of location steps. Besides the information from the
137
 * path expression, also contains information to iterate over a node set,
138
 * in particular, the context node CTX for the step, and the current node
139
 * CUR within that context.
140
 */
141
struct step {
142
    struct step *next;
143
    enum axis    axis;
144
    char        *name;              /* NULL to match any name */
145
    struct pred *predicates;
146
};
147
148
/* Initialise the root nodeset with the first step */
149
static struct tree *step_root(struct step *step, struct tree *ctx,
150
                              struct tree *root_ctx);
151
/* Iteration over the nodes on a step, ignoring the predicates */
152
static struct tree *step_first(struct step *step, struct tree *ctx);
153
static struct tree *step_next(struct step *step, struct tree *ctx,
154
                              struct tree *node);
155
156
struct pathx_symtab {
157
    struct pathx_symtab *next;
158
    char                *name;
159
    struct value        *value;
160
};
161
162
struct pathx {
163
    struct state   *state;
164
    struct nodeset *nodeset;
165
    int             node;
166
    struct tree    *origin;
167
};
168
169
18.5M
#define L_BRACK '['
170
6.03M
#define R_BRACK ']'
171
172
struct locpath {
173
    struct step *steps;
174
};
175
176
struct nodeset {
177
    struct tree **nodes;
178
    size_t        used;
179
    size_t        size;
180
};
181
182
typedef uint32_t value_ind_t;
183
184
struct value {
185
    enum type tag;
186
    union {
187
        struct nodeset  *nodeset;     /* T_NODESET */
188
        int64_t          number;      /* T_NUMBER  */
189
        char            *string;      /* T_STRING  */
190
        bool             boolval;     /* T_BOOLEAN */
191
        struct regexp   *regexp;      /* T_REGEXP  */
192
    };
193
};
194
195
struct expr {
196
    enum expr_tag tag;
197
    enum type     type;
198
    union {
199
        struct {                       /* E_FILTER */
200
            struct expr     *primary;
201
            struct pred     *predicates;
202
            struct locpath  *locpath;
203
        };
204
        struct {                       /* E_BINARY */
205
            enum binary_op op;
206
            struct expr *left;
207
            struct expr *right;
208
            bool   left_matched;
209
        };
210
        value_ind_t      value_ind;    /* E_VALUE */
211
        char            *ident;        /* E_VAR */
212
        struct {                       /* E_APP */
213
            const struct func *func;
214
            struct expr       **args;
215
            /* If fold is true, replace this function invocation
216
             * with its value after the first time we evaluate this
217
             * expression */
218
            bool              fold;
219
        };
220
    };
221
};
222
223
struct locpath_trace {
224
    unsigned int      maxns;
225
    struct nodeset  **ns;
226
    struct locpath   *lp;
227
};
228
229
/* Internal state of the evaluator/parser */
230
struct state {
231
    pathx_errcode_t errcode;
232
    const char     *file;
233
    int             line;
234
    char           *errmsg;
235
236
    const char     *txt;  /* Entire expression */
237
    const char     *pos;  /* Current position within TXT during parsing */
238
239
    struct tree *ctx; /* The current node */
240
    uint            ctx_pos;
241
    uint            ctx_len;
242
243
    struct tree *root_ctx; /* Root context for relative paths */
244
245
    /* A table of all values. The table is dynamically reallocated, i.e.
246
     * pointers to struct value should not be used across calls that
247
     * might allocate new values
248
     *
249
     * value_pool[0] is always the boolean false, and value_pool[1]
250
     * always the boolean true
251
     */
252
    struct value  *value_pool;
253
    value_ind_t    value_pool_used;
254
    value_ind_t    value_pool_size;
255
    /* Stack of values (as indices into value_pool), with bottom of
256
       stack in values[0] */
257
    value_ind_t   *values;
258
    size_t         values_used;
259
    size_t         values_size;
260
    /* Stack of expressions, with bottom of stack in exprs[0] */
261
    struct expr  **exprs;
262
    size_t         exprs_used;
263
    size_t         exprs_size;
264
    /* Trace of a locpath evaluation, needed by pathx_expand_tree.
265
       Generally NULL, unless a trace is needed.
266
     */
267
    struct locpath_trace *locpath_trace;
268
    /* Symbol table for variable lookups */
269
    struct pathx_symtab *symtab;
270
    /* Error structure, used to communicate errors to struct augeas;
271
     * we never own this structure, and therefore never free it */
272
    struct error        *error;
273
    /* If a filter-expression contains the 'else' operator, we need
274
     * we need to evaluate the filter twice. The has_else flag
275
     * means we don't do this unless we really need to */
276
    bool                 has_else;
277
};
278
279
/* We consider NULL and the empty string to be equal */
280
ATTRIBUTE_PURE
281
1.20M
static inline int streqx(const char *s1, const char *s2) {
282
1.20M
    if (s1 == NULL)
283
3
        return (s2 == NULL || strlen(s2) == 0);
284
1.20M
    if (s2 == NULL)
285
77
        return strlen(s1) == 0;
286
1.20M
    return STREQ(s1, s2);
287
1.20M
}
288
289
/* Functions */
290
291
typedef void (*func_impl_t)(struct state *state, int nargs);
292
293
struct func {
294
    const char      *name;
295
    unsigned int     arity;
296
    enum type        type;
297
    bool             pure;      /* Result only depends on args */
298
    const enum type *arg_types;
299
    func_impl_t      impl;
300
};
301
302
static void func_last(struct state *state, int nargs);
303
static void func_position(struct state *state, int nargs);
304
static void func_count(struct state *state, int nargs);
305
static void func_label(struct state *state, int nargs);
306
static void func_regexp(struct state *state, int nargs);
307
static void func_regexp_flag(struct state *state, int nargs);
308
static void func_glob(struct state *state, int nargs);
309
static void func_int(struct state *state, int nargs);
310
static void func_not(struct state *state, int nargs);
311
static void func_modified(struct state *state, int nargs);
312
313
static const enum type arg_types_nodeset[] = { T_NODESET };
314
static const enum type arg_types_string[] = { T_STRING };
315
static const enum type arg_types_bool[] = { T_BOOLEAN };
316
static const enum type arg_types_string_string[] = { T_STRING, T_STRING };
317
static const enum type arg_types_nodeset_string[] = { T_NODESET, T_STRING };
318
319
static const struct func builtin_funcs[] = {
320
    { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
321
      .impl = func_last, .pure = false },
322
    { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
323
      .impl = func_position, .pure = false },
324
    { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
325
      .impl = func_label, .pure = false },
326
    { .name = "count", .arity = 1, .type = T_NUMBER,
327
      .arg_types = arg_types_nodeset,
328
      .impl = func_count, .pure = false },
329
    { .name = "regexp", .arity = 1, .type = T_REGEXP,
330
      .arg_types = arg_types_string,
331
      .impl = func_regexp, .pure = true },
332
    { .name = "regexp", .arity = 1, .type = T_REGEXP,
333
      .arg_types = arg_types_nodeset,
334
      .impl = func_regexp, .pure = true },
335
    { .name = "regexp", .arity = 2, .type = T_REGEXP,
336
      .arg_types = arg_types_string_string,
337
      .impl = func_regexp_flag, .pure = true },
338
    { .name = "regexp", .arity = 2, .type = T_REGEXP,
339
      .arg_types = arg_types_nodeset_string,
340
      .impl = func_regexp_flag, .pure = true },
341
    { .name = "glob", .arity = 1, .type = T_REGEXP,
342
      .arg_types = arg_types_string,
343
      .impl = func_glob, .pure = true },
344
    { .name = "glob", .arity = 1, .type = T_REGEXP,
345
      .arg_types = arg_types_nodeset,
346
      .impl = func_glob, .pure = true },
347
    { .name = "int", .arity = 1, .type = T_NUMBER,
348
      .arg_types = arg_types_string, .impl = func_int, .pure = false },
349
    { .name = "int", .arity = 1, .type = T_NUMBER,
350
      .arg_types = arg_types_nodeset, .impl = func_int, .pure = false },
351
    { .name = "int", .arity = 1, .type = T_NUMBER,
352
      .arg_types = arg_types_bool, .impl = func_int, .pure = false },
353
    { .name = "modified", .arity = 0, .type = T_BOOLEAN,
354
      .arg_types = NULL, .impl = func_modified, .pure = false },
355
    { .name = "not", .arity = 1, .type = T_BOOLEAN,
356
      .arg_types = arg_types_bool, .impl = func_not, .pure = true }
357
};
358
359
#define RET_ON_ERROR                                                    \
360
113M
    if (state->errcode != PATHX_NOERROR) return
361
362
#define RET0_ON_ERROR                                                   \
363
8.15M
    if (state->errcode != PATHX_NOERROR) return 0
364
365
#define STATE_ERROR(state, err)                                         \
366
1.58k
    do {                                                                \
367
1.58k
        state->errcode = err;                                           \
368
1.58k
        state->file = __FILE__;                                         \
369
1.58k
        state->line = __LINE__;                                         \
370
1.58k
    } while (0)
371
372
40.1M
#define HAS_ERROR(state) (state->errcode != PATHX_NOERROR)
373
374
0
#define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
375
376
/*
377
 * Free the various data structures
378
 */
379
380
static void free_expr(struct expr *expr);
381
382
8.55M
static void free_pred(struct pred *pred) {
383
8.55M
    if (pred == NULL)
384
8.54M
        return;
385
386
4.09M
    for (int i=0; i < pred->nexpr; i++) {
387
4.09M
        free_expr(pred->exprs[i]);
388
4.09M
    }
389
8.36k
    free(pred->exprs);
390
8.36k
    free(pred);
391
8.36k
}
392
393
322k
static void free_step(struct step *step) {
394
483k
    while (step != NULL) {
395
161k
        struct step *del = step;
396
161k
        step = del->next;
397
161k
        free(del->name);
398
161k
        free_pred(del->predicates);
399
161k
        free(del);
400
161k
    }
401
322k
}
402
403
4.26M
static void free_locpath(struct locpath *locpath) {
404
4.26M
    if (locpath == NULL)
405
322k
        return;
406
8.23M
    while (locpath->steps != NULL) {
407
4.29M
        struct step *step = locpath->steps;
408
4.29M
        locpath->steps = step->next;
409
4.29M
        free(step->name);
410
4.29M
        free_pred(step->predicates);
411
4.29M
        free(step);
412
4.29M
    }
413
3.94M
    free(locpath);
414
3.94M
}
415
416
20.6M
static void free_expr(struct expr *expr) {
417
20.6M
    if (expr == NULL)
418
4.10M
        return;
419
16.5M
    switch (expr->tag) {
420
4.10M
    case E_FILTER:
421
4.10M
        free_expr(expr->primary);
422
4.10M
        free_pred(expr->predicates);
423
4.10M
        free_locpath(expr->locpath);
424
4.10M
        break;
425
4.22M
    case E_BINARY:
426
4.22M
        free_expr(expr->left);
427
4.22M
        free_expr(expr->right);
428
4.22M
        break;
429
8.21M
    case E_VALUE:
430
8.21M
        break;
431
808
    case E_VAR:
432
808
        free(expr->ident);
433
808
        break;
434
777
    case E_APP:
435
1.84k
        for (int i=0; i < expr->func->arity; i++)
436
1.06k
            free_expr(expr->args[i]);
437
777
        free(expr->args);
438
777
        break;
439
0
    default:
440
0
        assert(0);
441
16.5M
    }
442
16.5M
    free(expr);
443
16.5M
}
444
445
6.62M
static void free_nodeset(struct nodeset *ns) {
446
6.62M
    if (ns != NULL) {
447
6.62M
        free(ns->nodes);
448
6.62M
        free(ns);
449
6.62M
    }
450
6.62M
}
451
452
/* Free all objects used by VALUE, but not VALUE itself */
453
11.0M
static void release_value(struct value *v) {
454
11.0M
    if (v == NULL)
455
4
        return;
456
457
11.0M
    switch (v->tag) {
458
2.53M
    case T_NODESET:
459
2.53M
        free_nodeset(v->nodeset);
460
2.53M
        break;
461
3.32k
    case T_STRING:
462
3.32k
        free(v->string);
463
3.32k
        break;
464
206k
    case T_BOOLEAN:
465
8.45M
    case T_NUMBER:
466
8.45M
        break;
467
12.7k
    case T_REGEXP:
468
12.7k
        unref(v->regexp, regexp);
469
12.7k
        break;
470
12.7k
    default:
471
0
        assert(0);
472
11.0M
    }
473
11.0M
}
474
475
103k
static void free_state(struct state *state) {
476
103k
    if (state == NULL)
477
0
        return;
478
479
4.09M
    for(int i=0; i < state->exprs_used; i++)
480
3.99M
        free_expr(state->exprs[i]);
481
103k
    free(state->exprs);
482
483
11.1M
    for(int i=0; i < state->value_pool_used; i++)
484
11.0M
        release_value(state->value_pool + i);
485
103k
    free(state->value_pool);
486
103k
    free(state->values);
487
103k
    free(state);
488
103k
}
489
490
103k
void free_pathx(struct pathx *pathx) {
491
103k
    if (pathx == NULL)
492
307
        return;
493
103k
    free_state(pathx->state);
494
103k
    free(pathx);
495
103k
}
496
497
/*
498
 * Nodeset helpers
499
 */
500
6.43M
static struct nodeset *make_nodeset(struct state *state) {
501
6.43M
    struct nodeset *result;
502
6.43M
    if (ALLOC(result) < 0)
503
6.43M
        STATE_ENOMEM;
504
6.43M
    return result;
505
6.43M
}
506
507
/* Add NODE to NS if it is not in NS yet. This relies on the flag
508
 * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
509
 * as soon as we are done adding nodes to it.
510
 */
511
static void ns_add(struct nodeset *ns, struct tree *node,
512
7.15M
                   struct state *state) {
513
7.15M
    if (node->added)
514
4.23k
        return;
515
7.15M
    if (ns->used >= ns->size) {
516
4.91M
        size_t size = 2 * ns->size;
517
4.91M
        if (size < 10) size = 10;
518
4.91M
        if (REALLOC_N(ns->nodes, size) < 0)
519
4.91M
            STATE_ENOMEM;
520
4.91M
        ns->size = size;
521
4.91M
    }
522
7.15M
    ns->nodes[ns->used] = node;
523
7.15M
    node->added = 1;
524
7.15M
    ns->used += 1;
525
7.15M
}
526
527
6.62M
static void ns_clear_added(struct nodeset *ns) {
528
18.5M
    for (int i=0; i < ns->used; i++)
529
11.9M
        ns->nodes[i]->added = 0;
530
6.62M
}
531
532
static struct nodeset *
533
clone_nodeset(struct nodeset *ns, struct state *state)
534
193k
{
535
193k
    struct nodeset *clone;
536
193k
    if (ALLOC(clone) < 0) {
537
0
        STATE_ENOMEM;
538
0
        return NULL;
539
0
    }
540
193k
    if (ALLOC_N(clone->nodes, ns->used) < 0) {
541
0
        free(clone);
542
0
        STATE_ENOMEM;
543
0
        return NULL;
544
0
    }
545
193k
    clone->used = ns->used;
546
193k
    clone->size = ns->used;
547
4.94M
    for (int i=0; i < ns->used; i++)
548
4.75M
        clone->nodes[i] = ns->nodes[i];
549
193k
    return clone;
550
193k
}
551
552
/*
553
 * Handling values
554
 */
555
10.7M
static value_ind_t make_value(enum type tag, struct state *state) {
556
10.7M
    assert(tag != T_BOOLEAN);
557
558
10.7M
    if (state->value_pool_used >= state->value_pool_size) {
559
5.91k
        value_ind_t new_size = 2*state->value_pool_size;
560
5.91k
        if (new_size <= state->value_pool_size) {
561
0
            STATE_ENOMEM;
562
0
            return 0;
563
0
        }
564
5.91k
        if (REALLOC_N(state->value_pool, new_size) < 0) {
565
0
            STATE_ENOMEM;
566
0
            return 0;
567
0
        }
568
5.91k
        state->value_pool_size = new_size;
569
5.91k
    }
570
10.7M
    state->value_pool[state->value_pool_used].tag = tag;
571
10.7M
    state->value_pool[state->value_pool_used].nodeset = NULL;
572
10.7M
    return state->value_pool_used++;
573
10.7M
}
574
575
0
static value_ind_t clone_value(struct value *v, struct state *state) {
576
0
    value_ind_t vind = make_value(v->tag, state);
577
0
    RET0_ON_ERROR;
578
0
    struct value *clone = state->value_pool + vind;
579
580
0
    switch (v->tag) {
581
0
    case T_NODESET:
582
0
        clone->nodeset = clone_nodeset(v->nodeset, state);
583
0
        break;
584
0
    case T_NUMBER:
585
0
        clone->number = v->number;
586
0
        break;
587
0
    case T_STRING:
588
0
        clone->string = strdup(v->string);
589
0
        if (clone->string == NULL) {
590
0
            STATE_ENOMEM;
591
0
        }
592
0
        break;
593
0
    case T_BOOLEAN:
594
0
        clone->boolval = v->boolval;
595
0
        break;
596
0
    case T_REGEXP:
597
0
        clone->regexp = ref(v->regexp);
598
0
        break;
599
0
    default:
600
0
        assert(0);
601
0
    }
602
0
    return vind;
603
0
}
604
605
6.46M
static value_ind_t pop_value_ind(struct state *state) {
606
6.46M
    if (state->values_used > 0) {
607
6.46M
        state->values_used -= 1;
608
6.46M
        return state->values[state->values_used];
609
6.46M
    } else {
610
0
        STATE_ERROR(state, PATHX_EINTERNAL);
611
0
        assert(0);
612
0
        return 0;
613
0
    }
614
6.46M
}
615
616
6.46M
static struct value *pop_value(struct state *state) {
617
6.46M
    value_ind_t vind = pop_value_ind(state);
618
6.46M
    if (HAS_ERROR(state))
619
0
        return NULL;
620
6.46M
    return state->value_pool + vind;
621
6.46M
}
622
623
6.46M
static void push_value(value_ind_t vind, struct state *state) {
624
6.46M
    if (state->values_used >= state->values_size) {
625
101k
        size_t new_size = 2*state->values_size;
626
101k
        if (new_size == 0) new_size = 8;
627
101k
        if (REALLOC_N(state->values, new_size) < 0) {
628
0
            STATE_ENOMEM;
629
0
            return;
630
0
        }
631
101k
        state->values_size = new_size;
632
101k
    }
633
6.46M
    state->values[state->values_used++] = vind;
634
6.46M
}
635
636
1.98M
static void push_boolean_value(int b, struct state *state) {
637
1.98M
    push_value(b != 0, state);
638
1.98M
}
639
640
ATTRIBUTE_PURE
641
9.32M
static struct value *expr_value(struct expr *expr, struct state *state) {
642
9.32M
    return state->value_pool + expr->value_ind;
643
9.32M
}
644
645
/*************************************************************************
646
 * Evaluation
647
 ************************************************************************/
648
static void eval_expr(struct expr *expr, struct state *state);
649
650
651
#define ensure_arity(min, max)                                          \
652
52.4k
    if (nargs < min || nargs > max) {                                   \
653
0
        STATE_ERROR(state, PATHX_EINTERNAL);                            \
654
0
        return;                                                         \
655
0
    }
656
657
0
static void func_last(struct state *state, int nargs) {
658
0
    ensure_arity(0, 0);
659
0
    value_ind_t vind = make_value(T_NUMBER, state);
660
0
    RET_ON_ERROR;
661
662
0
    state->value_pool[vind].number = state->ctx_len;
663
0
    push_value(vind, state);
664
0
}
665
666
0
static void func_position(struct state *state, int nargs) {
667
0
    ensure_arity(0, 0);
668
0
    value_ind_t vind = make_value(T_NUMBER, state);
669
0
    RET_ON_ERROR;
670
671
0
    state->value_pool[vind].number = state->ctx_pos;
672
0
    push_value(vind, state);
673
0
}
674
675
0
static void func_count(struct state *state, int nargs) {
676
0
    ensure_arity(1, 1);
677
0
    value_ind_t vind = make_value(T_NUMBER, state);
678
0
    RET_ON_ERROR;
679
680
0
    struct value *ns = pop_value(state);
681
0
    state->value_pool[vind].number = ns->nodeset->used;
682
0
    push_value(vind, state);
683
0
}
684
685
0
static void func_label(struct state *state, int nargs) {
686
0
    ensure_arity(0, 0);
687
0
    value_ind_t vind = make_value(T_STRING, state);
688
0
    char *s;
689
690
0
    RET_ON_ERROR;
691
692
0
    if (state->ctx->label)
693
0
        s = strdup(state->ctx->label);
694
0
    else
695
0
        s = strdup("");
696
0
    if (s == NULL) {
697
0
        STATE_ENOMEM;
698
0
        return;
699
0
    }
700
0
    state->value_pool[vind].string = s;
701
0
    push_value(vind, state);
702
0
}
703
704
39.7k
static void func_int(struct state *state, int nargs) {
705
39.7k
    ensure_arity(1, 1);
706
39.7k
    value_ind_t vind = make_value(T_NUMBER, state);
707
39.7k
    int64_t i = -1;
708
39.7k
    RET_ON_ERROR;
709
710
39.7k
    struct value *v = pop_value(state);
711
39.7k
    if (v->tag == T_BOOLEAN) {
712
39.7k
        i = v->boolval;
713
39.7k
    } else {
714
0
        const char *s = NULL;
715
0
        if (v->tag == T_STRING) {
716
0
            s = v->string;
717
0
        } else {
718
            /* T_NODESET */
719
0
            if (v->nodeset->used != 1) {
720
0
                STATE_ERROR(state, PATHX_EMMATCH);
721
0
                return;
722
0
            }
723
0
            s = v->nodeset->nodes[0]->value;
724
0
        }
725
0
        if (s != NULL) {
726
0
            int r;
727
0
            r = xstrtoint64(s, 10, &i);
728
0
            if (r < 0) {
729
0
                STATE_ERROR(state, PATHX_ENUMBER);
730
0
                return;
731
0
            }
732
0
        }
733
0
    }
734
39.7k
    state->value_pool[vind].number = i;
735
39.7k
    push_value(vind, state);
736
39.7k
}
737
738
0
static void func_modified(struct state *state, int nargs) {
739
0
    ensure_arity(0, 0);
740
741
0
    push_boolean_value(state->ctx->dirty , state);
742
0
}
743
744
0
static void func_not(struct state *state, int nargs) {
745
0
    ensure_arity(1, 1);
746
0
    RET_ON_ERROR;
747
748
0
    struct value *v = pop_value(state);
749
0
    if (v->tag == T_BOOLEAN) {
750
0
        push_boolean_value(! v->boolval, state);
751
0
    }
752
0
}
753
754
static struct regexp *
755
12.5k
nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
756
12.5k
    struct regexp *result = NULL;
757
12.5k
    struct regexp **rx = NULL;
758
12.5k
    int used = 0;
759
760
18.5k
    for (int i = 0; i < ns->used; i++) {
761
6.04k
        if (ns->nodes[i]->value != NULL)
762
2.22k
            used += 1;
763
6.04k
    }
764
765
12.5k
    if (used == 0) {
766
        /* If the nodeset is empty, make sure we produce a regexp
767
         * that never matches anything */
768
12.2k
        result = make_regexp_unescape(info, "[^\001-\7ff]", nocase);
769
12.2k
    } else {
770
325
        if (ALLOC_N(rx, ns->used) < 0)
771
0
            goto error;
772
4.92k
        for (int i=0; i < ns->used; i++) {
773
4.60k
            if (ns->nodes[i]->value == NULL)
774
2.37k
                continue;
775
776
2.22k
            if (glob)
777
0
                rx[i] = make_regexp_from_glob(info, ns->nodes[i]->value);
778
2.22k
            else
779
2.22k
                rx[i] = make_regexp_unescape(info, ns->nodes[i]->value, 0);
780
2.22k
            if (rx[i] == NULL)
781
0
                goto error;
782
2.22k
        }
783
325
        result = regexp_union_n(info, ns->used, rx);
784
325
    }
785
786
12.5k
 error:
787
12.5k
    if (rx != NULL) {
788
4.92k
        for (int i=0; i < ns->used; i++)
789
4.60k
            unref(rx[i], regexp);
790
325
        free(rx);
791
325
    }
792
12.5k
    return result;
793
12.5k
}
794
795
12.7k
static void func_regexp_or_glob(struct state *state, int glob, int nocase) {
796
12.7k
    value_ind_t vind = make_value(T_REGEXP, state);
797
12.7k
    int r;
798
799
12.7k
    RET_ON_ERROR;
800
801
12.7k
    struct value *v = pop_value(state);
802
12.7k
    struct regexp *rx = NULL;
803
804
12.7k
    if (v->tag == T_STRING) {
805
198
        if (glob)
806
0
            rx = make_regexp_from_glob(state->error->info, v->string);
807
198
        else
808
198
            rx = make_regexp_unescape(state->error->info, v->string, nocase);
809
12.5k
    } else if (v->tag == T_NODESET) {
810
12.5k
        rx = nodeset_as_regexp(state->error->info, v->nodeset, glob, nocase);
811
12.5k
    } else {
812
0
        assert(0);
813
0
    }
814
815
12.7k
    if (rx == NULL) {
816
0
        STATE_ENOMEM;
817
0
        return;
818
0
    }
819
820
12.7k
    state->value_pool[vind].regexp = rx;
821
12.7k
    r = regexp_compile(rx);
822
12.7k
    if (r < 0) {
823
27
        const char *msg;
824
27
        regexp_check(rx, &msg);
825
27
        state->errmsg = strdup(msg);
826
27
        STATE_ERROR(state, PATHX_EREGEXP);
827
27
        return;
828
27
    }
829
12.7k
    push_value(vind, state);
830
12.7k
}
831
832
3.10k
static void func_regexp(struct state *state, int nargs) {
833
3.10k
    ensure_arity(1, 1);
834
3.10k
    func_regexp_or_glob(state, 0, 0);
835
3.10k
}
836
837
9.62k
static void func_regexp_flag(struct state *state, int nargs) {
838
9.62k
    ensure_arity(2, 2);
839
9.62k
    int nocase = 0;
840
9.62k
    struct value *f = pop_value(state);
841
842
9.62k
    if (STREQ("i", f->string))
843
9.62k
        nocase = 1;
844
0
    else
845
0
        STATE_ERROR(state, PATHX_EREGEXPFLAG);
846
847
9.62k
    func_regexp_or_glob(state, 0, nocase);
848
9.62k
}
849
850
0
static void func_glob(struct state *state, int nargs) {
851
0
    ensure_arity(1, 1);
852
0
    func_regexp_or_glob(state, 1, 0);
853
0
}
854
855
78.9k
static bool coerce_to_bool(struct value *v) {
856
78.9k
    switch (v->tag) {
857
39.4k
    case T_NODESET:
858
39.4k
        return v->nodeset->used > 0;
859
0
        break;
860
39.4k
    case T_BOOLEAN:
861
39.4k
        return v->boolval;
862
0
        break;
863
0
    case T_NUMBER:
864
0
        return v->number > 0;
865
0
        break;
866
0
    case T_STRING:
867
0
        return strlen(v->string) > 0;
868
0
        break;
869
0
    case T_REGEXP:
870
0
        return true;
871
0
    default:
872
0
        assert(0);
873
0
        return false;
874
78.9k
    }
875
78.9k
}
876
877
static int calc_eq_nodeset_nodeset(struct nodeset *ns1, struct nodeset *ns2,
878
39.7k
                                   int neq) {
879
39.8k
    for (int i1=0; i1 < ns1->used; i1++) {
880
85
        struct tree *t1 = ns1->nodes[i1];
881
86
        for (int i2=0; i2 < ns2->used; i2++) {
882
20
            struct tree *t2 = ns2->nodes[i2];
883
20
            if (neq) {
884
20
                if (!streqx(t1->value, t2->value))
885
19
                    return 1;
886
20
            } else {
887
0
                if (streqx(t1->value, t2->value))
888
0
                    return 1;
889
0
            }
890
20
        }
891
85
    }
892
39.7k
    return 0;
893
39.7k
}
894
895
static int calc_eq_nodeset_string(struct nodeset *ns, const char *s,
896
0
                                  int neq) {
897
0
    for (int i=0; i < ns->used; i++) {
898
0
        struct tree *t = ns->nodes[i];
899
0
        if (neq) {
900
0
            if (!streqx(t->value, s))
901
0
                return 1;
902
0
        } else {
903
0
            if (streqx(t->value, s))
904
0
                return 1;
905
0
        }
906
0
    }
907
0
    return 0;
908
0
}
909
910
39.7k
static void eval_eq(struct state *state, int neq) {
911
39.7k
    struct value *r = pop_value(state);
912
39.7k
    struct value *l = pop_value(state);
913
39.7k
    int res;
914
915
39.7k
    if (l->tag == T_NODESET && r->tag == T_NODESET) {
916
39.7k
        res = calc_eq_nodeset_nodeset(l->nodeset, r->nodeset, neq);
917
39.7k
    } else if (l->tag == T_NODESET) {
918
0
        res = calc_eq_nodeset_string(l->nodeset, r->string, neq);
919
0
    } else if (r->tag == T_NODESET) {
920
0
        res = calc_eq_nodeset_string(r->nodeset, l->string, neq);
921
0
    } else if (l->tag == T_NUMBER && r->tag == T_NUMBER) {
922
0
        if (neq)
923
0
            res = (l->number != r->number);
924
0
        else
925
0
            res = (l->number == r->number);
926
0
    } else {
927
0
        assert(l->tag == T_STRING);
928
0
        assert(r->tag == T_STRING);
929
0
        res = streqx(l->string, r->string);
930
0
        if (neq)
931
0
            res = !res;
932
0
    }
933
39.7k
    RET_ON_ERROR;
934
935
39.7k
    push_boolean_value(res, state);
936
39.7k
}
937
938
500
static void eval_arith(struct state *state, enum binary_op op) {
939
500
    value_ind_t vind = make_value(T_NUMBER, state);
940
500
    struct value *r = pop_value(state);
941
500
    struct value *l = pop_value(state);
942
500
    int res;
943
944
500
    assert(l->tag == T_NUMBER);
945
500
    assert(r->tag == T_NUMBER);
946
947
500
    RET_ON_ERROR;
948
949
500
    if (op == OP_PLUS)
950
0
        res = l->number + r->number;
951
500
    else if (op == OP_MINUS)
952
0
        res = l->number - r->number;
953
500
    else if (op == OP_STAR)
954
500
        res = l->number * r->number;
955
0
    else
956
0
        assert(0);
957
958
500
    state->value_pool[vind].number = res;
959
500
    push_value(vind, state);
960
500
}
961
962
0
static void eval_rel(struct state *state, bool greater, bool equal) {
963
0
    struct value *r, *l;
964
0
    int res;
965
966
    /* We always check l < r or l <= r */
967
0
    if (greater) {
968
0
        l = pop_value(state);
969
0
        r = pop_value(state);
970
0
    } else {
971
0
        r = pop_value(state);
972
0
        l = pop_value(state);
973
0
    }
974
0
    if (l->tag == T_NUMBER) {
975
0
        if (equal)
976
0
            res = (l->number <= r->number);
977
0
        else
978
0
            res = (l->number < r->number);
979
0
    } else if (l->tag == T_STRING) {
980
0
        int cmp = strcmp(l->string, r->string);
981
0
        if (equal)
982
0
            res = cmp <= 0;
983
0
        else
984
0
            res = cmp < 0;
985
0
    } else {
986
0
        assert(0);
987
0
    }
988
989
0
    push_boolean_value(res, state);
990
0
}
991
992
20
static void eval_and_or(struct state *state, enum binary_op op) {
993
20
    struct value *r = pop_value(state);
994
20
    struct value *l = pop_value(state);
995
20
    bool left = coerce_to_bool(l);
996
20
    bool right = coerce_to_bool(r);
997
998
999
20
    if (op == OP_AND)
1000
20
        push_boolean_value(left && right, state);
1001
0
    else
1002
0
        push_boolean_value(left || right, state);
1003
20
}
1004
1005
39.5k
static void eval_else(struct state *state, struct expr *expr, struct locpath_trace *lpt_right) {
1006
39.5k
    struct value *r = pop_value(state);
1007
39.5k
    struct value *l = pop_value(state);
1008
1009
39.5k
    if ( l->tag == T_NODESET && r->tag == T_NODESET ) {
1010
73
        int discard_maxns=0;
1011
73
        struct nodeset **discard_ns=NULL;
1012
73
        struct locpath_trace *lpt = state->locpath_trace;
1013
73
        value_ind_t vind = make_value(T_NODESET, state);
1014
73
        if (l->nodeset->used >0 || expr->left_matched) {
1015
40
            expr->left_matched = 1;
1016
40
            state->value_pool[vind].nodeset = clone_nodeset(l->nodeset, state);
1017
40
            if( lpt_right != NULL ) {
1018
40
                discard_maxns = lpt_right->maxns;
1019
40
                discard_ns    = lpt_right->ns;
1020
40
            }
1021
40
        } else {
1022
33
            state->value_pool[vind].nodeset = clone_nodeset(r->nodeset, state);
1023
33
            if( lpt != NULL && lpt_right != NULL ) {
1024
0
                discard_maxns = lpt->maxns;
1025
0
                discard_ns    = lpt->ns;
1026
0
                lpt->maxns = lpt_right->maxns;
1027
0
                lpt->ns    = lpt_right->ns;
1028
0
                lpt->lp    = lpt_right->lp;
1029
0
            }
1030
33
        }
1031
73
        push_value(vind, state);
1032
73
        if ( lpt != NULL && lpt_right != NULL ) {
1033
0
            for (int i=0; i < discard_maxns; i++)
1034
0
                free_nodeset(discard_ns[i]);
1035
0
            FREE(discard_ns);
1036
0
        }
1037
39.4k
    } else {
1038
39.4k
        bool left = coerce_to_bool(l);
1039
39.4k
        bool right = coerce_to_bool(r);
1040
1041
39.4k
        expr->left_matched = expr->left_matched || left;
1042
39.4k
        if (expr->left_matched) {
1043
            /* One or more LHS have matched, so we're not interested in the right expr */
1044
0
            push_boolean_value(left, state);
1045
39.4k
        } else {
1046
            /* no LHS has matched (yet), so keep the right expr */
1047
            /* If this is the 2nd pass, and expr->left_matched is true, no RHS nodes will be included */
1048
39.4k
            push_boolean_value(right, state);
1049
39.4k
        }
1050
39.4k
    }
1051
39.5k
}
1052
1053
static bool eval_re_match_str(struct state *state, struct regexp *rx,
1054
1.97M
                              const char *str) {
1055
1.97M
    int r;
1056
1057
1.97M
    if (str == NULL)
1058
1.55M
        str = "";
1059
1060
1.97M
    r = regexp_match(rx, str, strlen(str), 0, NULL);
1061
1.97M
    if (r == -2) {
1062
0
        STATE_ERROR(state, PATHX_EINTERNAL);
1063
1.97M
    } else if (r == -3) {
1064
        /* We should never get this far; func_regexp should catch
1065
         * invalid regexps */
1066
0
        assert(false);
1067
0
    }
1068
1.97M
    return r == strlen(str);
1069
1.97M
}
1070
1071
193k
static void eval_union(struct state *state, struct locpath_trace *lpt_right) {
1072
193k
    value_ind_t vind = make_value(T_NODESET, state);
1073
193k
    struct value *r = pop_value(state);
1074
193k
    struct value *l = pop_value(state);
1075
193k
    struct nodeset *res = NULL;
1076
193k
    struct locpath_trace *lpt = state->locpath_trace;
1077
1078
193k
    assert(l->tag == T_NODESET);
1079
193k
    assert(r->tag == T_NODESET);
1080
1081
193k
    RET_ON_ERROR;
1082
1083
193k
    res = clone_nodeset(l->nodeset, state);
1084
193k
    RET_ON_ERROR;
1085
336k
    for (int i=0; i < r->nodeset->used; i++) {
1086
142k
        ns_add(res, r->nodeset->nodes[i], state);
1087
142k
        if (HAS_ERROR(state))
1088
0
            goto error;
1089
142k
    }
1090
193k
    state->value_pool[vind].nodeset = res;
1091
193k
    push_value(vind, state);
1092
1093
193k
    if( lpt != NULL && lpt_right != NULL ) {
1094
0
        STATE_ERROR(state, PATHX_EMMATCH);
1095
0
        for (int i=0; i < lpt_right->maxns; i++)
1096
0
            free_nodeset(lpt_right->ns[i]);
1097
0
        FREE(lpt_right->ns);
1098
0
    }
1099
193k
 error:
1100
193k
    ns_clear_added(res);
1101
193k
}
1102
1103
0
static void eval_concat_string(struct state *state) {
1104
0
    value_ind_t vind = make_value(T_STRING, state);
1105
0
    struct value *r = pop_value(state);
1106
0
    struct value *l = pop_value(state);
1107
0
    char *res = NULL;
1108
1109
0
    RET_ON_ERROR;
1110
1111
0
    if (ALLOC_N(res, strlen(l->string) + strlen(r->string) + 1) < 0) {
1112
0
        STATE_ENOMEM;
1113
0
        return;
1114
0
    }
1115
0
    strcpy(res, l->string);
1116
0
    strcat(res, r->string);
1117
0
    state->value_pool[vind].string = res;
1118
0
    push_value(vind, state);
1119
0
}
1120
1121
0
static void eval_concat_regexp(struct state *state) {
1122
0
    value_ind_t vind = make_value(T_REGEXP, state);
1123
0
    struct value *r = pop_value(state);
1124
0
    struct value *l = pop_value(state);
1125
0
    struct regexp *rx = NULL;
1126
1127
0
    RET_ON_ERROR;
1128
1129
0
    rx = regexp_concat(state->error->info, l->regexp, r->regexp);
1130
0
    if (rx == NULL) {
1131
0
        STATE_ENOMEM;
1132
0
        return;
1133
0
    }
1134
1135
0
    state->value_pool[vind].regexp = rx;
1136
0
    push_value(vind, state);
1137
0
}
1138
1139
1.90M
static void eval_re_match(struct state *state, enum binary_op op) {
1140
1.90M
    struct value *rx  = pop_value(state);
1141
1.90M
    struct value *v = pop_value(state);
1142
1143
1.90M
    bool result = false;
1144
1145
1.90M
    if (v->tag == T_STRING) {
1146
32
        result = eval_re_match_str(state, rx->regexp, v->string);
1147
32
        RET_ON_ERROR;
1148
1.90M
    } else if (v->tag == T_NODESET) {
1149
3.87M
        for (int i=0; i < v->nodeset->used && result == false; i++) {
1150
1.97M
            struct tree *t = v->nodeset->nodes[i];
1151
1.97M
            result = eval_re_match_str(state, rx->regexp, t->value);
1152
1.97M
            RET_ON_ERROR;
1153
1.97M
        }
1154
1.90M
    }
1155
1.90M
    if (op == OP_RE_NOMATCH)
1156
1.90M
        result = !result;
1157
1.90M
    push_boolean_value(result, state);
1158
1.90M
}
1159
1160
2.17M
static void eval_binary(struct expr *expr, struct state *state) {
1161
2.17M
    struct locpath_trace *lpt = state->locpath_trace;
1162
2.17M
    struct locpath_trace lpt_right;
1163
1164
2.17M
    eval_expr(expr->left, state);
1165
2.17M
    if ( lpt != NULL && expr->type == T_NODESET ) {
1166
0
       MEMZERO(&lpt_right, 1);
1167
0
       state->locpath_trace = &lpt_right;
1168
0
    }
1169
2.17M
    eval_expr(expr->right, state);
1170
2.17M
    state->locpath_trace = lpt;
1171
2.17M
    RET_ON_ERROR;
1172
1173
2.17M
    switch (expr->op) {
1174
39.7k
    case OP_EQ:
1175
39.7k
        eval_eq(state, 0);
1176
39.7k
        break;
1177
37
    case OP_NEQ:
1178
37
        eval_eq(state, 1);
1179
37
        break;
1180
0
    case OP_LT:
1181
0
        eval_rel(state, false, false);
1182
0
        break;
1183
0
    case OP_LE:
1184
0
        eval_rel(state, false, true);
1185
0
        break;
1186
0
    case OP_GT:
1187
0
        eval_rel(state, true, false);
1188
0
        break;
1189
0
    case OP_GE:
1190
0
        eval_rel(state, true, true);
1191
0
        break;
1192
0
    case OP_PLUS:
1193
0
        if (expr->type == T_NUMBER)
1194
0
            eval_arith(state, expr->op);
1195
0
        else if (expr->type == T_STRING)
1196
0
            eval_concat_string(state);
1197
0
        else if (expr->type == T_REGEXP)
1198
0
            eval_concat_regexp(state);
1199
0
        break;
1200
0
    case OP_MINUS:
1201
500
    case OP_STAR:
1202
500
        eval_arith(state, expr->op);
1203
500
        break;
1204
20
    case OP_AND:
1205
20
    case OP_OR:
1206
20
        eval_and_or(state, expr->op);
1207
20
        break;
1208
39.5k
    case OP_ELSE:
1209
39.5k
        eval_else(state, expr, &lpt_right);
1210
39.5k
        break;
1211
193k
    case OP_UNION:
1212
193k
        eval_union(state, &lpt_right);
1213
193k
        break;
1214
2
    case OP_RE_MATCH:
1215
1.90M
    case OP_RE_NOMATCH:
1216
1.90M
        eval_re_match(state, expr->op);
1217
1.90M
        break;
1218
0
    default:
1219
0
        assert(0);
1220
2.17M
    }
1221
2.17M
}
1222
1223
52.4k
static void eval_app(struct expr *expr, struct state *state) {
1224
52.4k
    assert(expr->tag == E_APP);
1225
1226
114k
    for (int i=0; i < expr->func->arity; i++) {
1227
62.0k
        eval_expr(expr->args[i], state);
1228
62.0k
        RET_ON_ERROR;
1229
62.0k
    }
1230
52.4k
    expr->func->impl(state, expr->func->arity);
1231
52.4k
}
1232
1233
1.95M
static bool eval_pred(struct expr *expr, struct state *state) {
1234
1.95M
    eval_expr(expr, state);
1235
1.95M
    RET0_ON_ERROR;
1236
1237
1.95M
    struct value *v = pop_value(state);
1238
1.95M
    switch(v->tag) {
1239
1.90M
    case T_BOOLEAN:
1240
1.90M
        return v->boolval;
1241
40.2k
    case T_NUMBER:
1242
40.2k
        return (state->ctx_pos == v->number);
1243
10.3k
    case T_NODESET:
1244
10.3k
        return v->nodeset->used > 0;
1245
0
    case T_STRING:
1246
0
        return streqv(state->ctx->value, v->string);
1247
0
    default:
1248
0
        assert(0);
1249
0
        return false;
1250
1.95M
    }
1251
1.95M
}
1252
1253
/* Remove COUNT successive entries from NS. The first entry to remove is at
1254
   IND */
1255
47.6k
static void ns_remove(struct nodeset *ns, int ind, int count) {
1256
47.6k
    if (count < 1)
1257
0
        return;
1258
47.6k
    memmove(ns->nodes + ind, ns->nodes + ind+count,
1259
47.6k
            sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
1260
47.6k
    ns->used -= count;
1261
47.6k
}
1262
1263
/*
1264
 * Remove all nodes from NS for which one of PRED is false
1265
 */
1266
static void ns_filter(struct nodeset *ns, struct pred *predicates,
1267
4.08M
                      struct state *state) {
1268
4.08M
    if (predicates == NULL)
1269
4.03M
        return;
1270
1271
51.0k
    struct tree *old_ctx = state->ctx;
1272
51.0k
    uint old_ctx_len = state->ctx_len;
1273
51.0k
    uint old_ctx_pos = state->ctx_pos;
1274
1275
102k
    for (int p=0; p < predicates->nexpr; p++) {
1276
51.0k
        if ( state->has_else) {
1277
1.01M
            for (int i=0; i < ns->used; i++) {
1278
                /* 1st pass, check if any else statements have match on the left */
1279
                /* Don't delete any nodes (yet) */
1280
970k
                state->ctx = ns->nodes[i];
1281
970k
                eval_pred(predicates->exprs[p], state);
1282
970k
            }
1283
49.5k
        }
1284
51.0k
        int first_bad = -1;  /* The index of the first non-matching node */
1285
51.0k
        state->ctx_len = ns->used;
1286
51.0k
        state->ctx_pos = 1;
1287
1.03M
        for (int i=0; i < ns->used; state->ctx_pos++) {
1288
981k
            state->ctx = ns->nodes[i];
1289
981k
            bool match = eval_pred(predicates->exprs[p], state);
1290
981k
            RET_ON_ERROR;
1291
            /* We remove non-matching nodes from NS in batches; this logic
1292
             * makes sure that we only call ns_remove at the end of a run
1293
             * of non-matching nodes
1294
             */
1295
981k
            if (match) {
1296
48.3k
                if (first_bad >= 0) {
1297
18.0k
                    ns_remove(ns, first_bad, i - first_bad);
1298
18.0k
                    i = first_bad + 1;
1299
30.2k
                } else {
1300
30.2k
                    i += 1;
1301
30.2k
                }
1302
48.3k
                first_bad = -1;
1303
933k
            } else {
1304
933k
                if (first_bad == -1)
1305
47.6k
                    first_bad = i;
1306
933k
                i += 1;
1307
933k
            }
1308
981k
        }
1309
50.9k
        if (first_bad >= 0) {
1310
29.5k
            ns_remove(ns, first_bad, ns->used - first_bad);
1311
29.5k
        }
1312
50.9k
    }
1313
1314
50.9k
    state->ctx = old_ctx;
1315
50.9k
    state->ctx_pos = old_ctx_pos;
1316
50.9k
    state->ctx_len = old_ctx_len;
1317
50.9k
}
1318
1319
/* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
1320
4.09M
static bool position_pred(struct pred *pred) {
1321
4.09M
    return pred != NULL &&
1322
4.09M
        pred->nexpr == 1 &&
1323
4.09M
        pred->exprs[0]->tag == E_VALUE &&
1324
4.09M
        pred->exprs[0]->type == T_NUMBER;
1325
4.09M
}
1326
1327
/* Return the tree node at the position implied by STEP->PREDICATES. It is
1328
   assumed and required that STEP->PREDICATES is actually a
1329
   POSITION_PRED.
1330
1331
   This method hand-optimizes the important case of a path expression like
1332
   'service[42]'
1333
*/
1334
static struct tree *position_filter(struct nodeset *ns,
1335
                                    struct step *step,
1336
6.72k
                                    struct state *state) {
1337
6.72k
    int value_ind = step->predicates->exprs[0]->value_ind;
1338
6.72k
    int number = state->value_pool[value_ind].number;
1339
1340
6.72k
    int pos = 1;
1341
11.7k
    for (int i=0; i < ns->used; i++) {
1342
5.04k
        for (struct tree *node = step_first(step, ns->nodes[i]);
1343
15.1k
             node != NULL;
1344
10.0k
             node = step_next(step, ns->nodes[i], node), pos++) {
1345
10.0k
            if (pos == number)
1346
0
                return node;
1347
10.0k
        }
1348
5.04k
    }
1349
1350
6.72k
    return NULL;
1351
6.72k
}
1352
1353
/* Return an array of nodesets, one for each step in the locpath.
1354
 *
1355
 * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
1356
 * contain the nodes that matched the entire locpath
1357
 */
1358
static void ns_from_locpath(struct locpath *lp, uint *maxns,
1359
                            struct nodeset ***ns,
1360
                            const struct nodeset *root,
1361
2.33M
                            struct state *state) {
1362
2.33M
    struct tree *old_ctx = state->ctx;
1363
2.33M
    *maxns = 0;
1364
1365
2.33M
    ensure(lp != NULL, state);
1366
1367
2.33M
    *ns = NULL;
1368
2.33M
    list_for_each(step, lp->steps)
1369
4.09M
        *maxns += 1;
1370
2.33M
    if (ALLOC_N(*ns, *maxns+1) < 0) {
1371
0
        STATE_ERROR(state, PATHX_ENOMEM);
1372
0
        goto error;
1373
0
    }
1374
8.77M
    for (int i=0; i <= *maxns; i++) {
1375
6.43M
        (*ns)[i] = make_nodeset(state);
1376
6.43M
        if (HAS_ERROR(state))
1377
0
            goto error;
1378
6.43M
    }
1379
1380
2.33M
    if (root == NULL) {
1381
2.33M
        struct step *first_step = NULL;
1382
2.33M
        first_step = lp->steps;
1383
1384
2.33M
        struct tree *root_tree;
1385
2.33M
        root_tree = step_root(first_step, state->ctx, state->root_ctx);
1386
2.33M
        ns_add((*ns)[0], root_tree, state);
1387
2.33M
        ns_clear_added((*ns)[0]);
1388
2.33M
    } else {
1389
226
        for (int i=0; i < root->used; i++)
1390
224
            ns_add((*ns)[0], root->nodes[i], state);
1391
2
        ns_clear_added((*ns)[0]);
1392
2
    }
1393
1394
2.33M
    if (HAS_ERROR(state))
1395
0
        goto error;
1396
1397
2.33M
    uint cur_ns = 0;
1398
4.09M
    list_for_each(step, lp->steps) {
1399
4.09M
        struct nodeset *work = (*ns)[cur_ns];
1400
4.09M
        struct nodeset *next = (*ns)[cur_ns + 1];
1401
4.09M
        if (position_pred(step->predicates)) {
1402
6.72k
            struct tree *node = position_filter(work, step, state);
1403
6.72k
            if (node) {
1404
0
                ns_add(next, node, state);
1405
0
                ns_clear_added(next);
1406
0
            }
1407
4.08M
        } else {
1408
8.03M
            for (int i=0; i < work->used; i++) {
1409
3.95M
                for (struct tree *node = step_first(step, work->nodes[i]);
1410
8.62M
                     node != NULL;
1411
4.67M
                     node = step_next(step, work->nodes[i], node)) {
1412
4.67M
                    ns_add(next, node, state);
1413
4.67M
                }
1414
3.95M
            }
1415
4.08M
            ns_clear_added(next);
1416
4.08M
            ns_filter(next, step->predicates, state);
1417
4.08M
            if (HAS_ERROR(state))
1418
25
                goto error;
1419
4.08M
        }
1420
4.09M
        cur_ns += 1;
1421
4.09M
    }
1422
1423
2.33M
    state->ctx = old_ctx;
1424
2.33M
    return;
1425
25
 error:
1426
25
    if (*ns != NULL) {
1427
100
        for (int i=0; i <= *maxns; i++)
1428
75
            free_nodeset((*ns)[i]);
1429
25
        FREE(*ns);
1430
25
    }
1431
25
    state->ctx = old_ctx;
1432
25
    return;
1433
2.33M
}
1434
1435
2.33M
static void eval_filter(struct expr *expr, struct state *state) {
1436
2.33M
    struct locpath *lp = expr->locpath;
1437
2.33M
    struct nodeset **ns = NULL;
1438
2.33M
    struct locpath_trace *lpt = state->locpath_trace;
1439
2.33M
    uint maxns;
1440
1441
2.33M
    state->locpath_trace = NULL;
1442
2.33M
    if (expr->primary == NULL) {
1443
2.33M
        ns_from_locpath(lp, &maxns, &ns, NULL, state);
1444
2.33M
    } else {
1445
2
        eval_expr(expr->primary, state);
1446
2
        RET_ON_ERROR;
1447
2
        value_ind_t primary_ind = pop_value_ind(state);
1448
2
        struct value *primary = state->value_pool + primary_ind;
1449
2
        assert(primary->tag == T_NODESET);
1450
2
        ns_filter(primary->nodeset, expr->predicates, state);
1451
        /* Evaluating predicates might have reallocated the value_pool */
1452
2
        primary = state->value_pool + primary_ind;
1453
2
        ns_from_locpath(lp, &maxns, &ns, primary->nodeset, state);
1454
2
    }
1455
2.33M
    RET_ON_ERROR;
1456
1457
2.33M
    value_ind_t vind = make_value(T_NODESET, state);
1458
2.33M
    RET_ON_ERROR;
1459
2.33M
    state->value_pool[vind].nodeset = ns[maxns];
1460
2.33M
    push_value(vind, state);
1461
1462
2.33M
    if (lpt != NULL) {
1463
33.6k
        assert(lpt->ns == NULL);
1464
33.6k
        assert(lpt->lp == NULL);
1465
33.6k
        lpt->maxns = maxns;
1466
33.6k
        lpt->ns = ns;
1467
33.6k
        lpt->lp = lp;
1468
33.6k
        state->locpath_trace = lpt;
1469
2.30M
    } else {
1470
6.25M
        for (int i=0; i < maxns; i++)
1471
3.94M
            free_nodeset(ns[i]);
1472
2.30M
        FREE(ns);
1473
2.30M
    }
1474
2.33M
}
1475
1476
static struct value *lookup_var(const char *ident,
1477
56
                                const struct pathx_symtab *symtab) {
1478
56
    list_for_each(tab, symtab) {
1479
0
        if (STREQ(ident, tab->name))
1480
0
            return tab->value;
1481
0
    }
1482
56
    return NULL;
1483
56
}
1484
1485
0
static void eval_var(struct expr *expr, struct state *state) {
1486
0
    struct value *v = lookup_var(expr->ident, state->symtab);
1487
0
    value_ind_t vind = clone_value(v, state);
1488
0
    RET_ON_ERROR;
1489
0
    push_value(vind, state);
1490
0
}
1491
1492
6.46M
static void eval_expr(struct expr *expr, struct state *state) {
1493
6.46M
    RET_ON_ERROR;
1494
6.46M
    switch (expr->tag) {
1495
2.33M
    case E_FILTER:
1496
2.33M
        eval_filter(expr, state);
1497
2.33M
        break;
1498
2.17M
    case E_BINARY:
1499
2.17M
        eval_binary(expr, state);
1500
2.17M
        break;
1501
1.89M
    case E_VALUE:
1502
1.89M
        push_value(expr->value_ind, state);
1503
1.89M
        break;
1504
0
    case E_VAR:
1505
0
        eval_var(expr, state);
1506
0
        break;
1507
52.4k
    case E_APP:
1508
52.4k
        eval_app(expr, state);
1509
52.4k
        if (expr->fold) {
1510
            /* Do constant folding: replace the function application with
1511
             * a reference to the value that resulted from evaluating it */
1512
396
            for (int i=0; i < expr->func->arity; i++)
1513
198
                free_expr(expr->args[i]);
1514
198
            free(expr->args);
1515
198
            value_ind_t vind = state->values_used - 1;
1516
198
            expr->tag = E_VALUE;
1517
198
            expr->value_ind = state->values[vind];
1518
198
        }
1519
52.4k
        break;
1520
0
    default:
1521
0
        assert(0);
1522
6.46M
    }
1523
6.46M
}
1524
1525
/*************************************************************************
1526
 * Typechecker
1527
 *************************************************************************/
1528
1529
static void check_expr(struct expr *expr, struct state *state);
1530
1531
/* Typecheck a list of predicates. A predicate is a function of
1532
 * one of the following types:
1533
 *
1534
 * T_NODESET -> T_BOOLEAN
1535
 * T_NUMBER  -> T_BOOLEAN  (position test)
1536
 * T_BOOLEAN -> T_BOOLEAN
1537
 */
1538
446k
static void check_preds(struct pred *pred, struct state *state) {
1539
446k
    if (pred == NULL)
1540
438k
        return;
1541
16.4k
    for (int i=0; i < pred->nexpr; i++) {
1542
8.20k
        struct expr *e = pred->exprs[i];
1543
8.20k
        check_expr(e, state);
1544
8.20k
        RET_ON_ERROR;
1545
8.20k
        if (e->type != T_NODESET && e->type != T_NUMBER &&
1546
8.20k
            e->type != T_BOOLEAN && e->type != T_STRING) {
1547
0
            STATE_ERROR(state, PATHX_ETYPE);
1548
0
            return;
1549
0
        }
1550
8.20k
    }
1551
8.20k
}
1552
1553
128k
static void check_filter(struct expr *expr, struct state *state) {
1554
128k
    assert(expr->tag == E_FILTER);
1555
128k
    struct locpath *locpath = expr->locpath;
1556
1557
128k
    if (expr->primary != NULL) {
1558
4
        check_expr(expr->primary, state);
1559
4
        if (expr->primary->type != T_NODESET) {
1560
0
            STATE_ERROR(state, PATHX_ETYPE);
1561
0
            return;
1562
0
        }
1563
4
        check_preds(expr->predicates, state);
1564
4
        RET_ON_ERROR;
1565
4
    }
1566
446k
    list_for_each(s, locpath->steps) {
1567
446k
        check_preds(s->predicates, state);
1568
446k
        RET_ON_ERROR;
1569
446k
    }
1570
128k
    expr->type = T_NODESET;
1571
128k
}
1572
1573
943
static void check_app(struct expr *expr, struct state *state) {
1574
943
    assert(expr->tag == E_APP);
1575
1576
2.17k
    for (int i=0; i < expr->func->arity; i++) {
1577
1.23k
        check_expr(expr->args[i], state);
1578
1.23k
        RET_ON_ERROR;
1579
1.23k
    }
1580
1581
943
    int f;
1582
7.43k
    for (f=0; f < ARRAY_CARDINALITY(builtin_funcs); f++) {
1583
7.43k
        const struct func *fn = builtin_funcs + f;
1584
7.43k
        if (STRNEQ(expr->func->name, fn->name))
1585
5.00k
            continue;
1586
2.43k
        if (expr->func->arity != fn->arity)
1587
576
            continue;
1588
1589
1.85k
        int match = 1;
1590
3.08k
        for (int i=0; i < expr->func->arity; i++) {
1591
2.14k
            if (expr->args[i]->type != fn->arg_types[i]) {
1592
914
                match = 0;
1593
914
                break;
1594
914
            }
1595
2.14k
        }
1596
1.85k
        if (match)
1597
943
            break;
1598
1.85k
    }
1599
1600
943
    if (f < ARRAY_CARDINALITY(builtin_funcs)) {
1601
943
        expr->func = builtin_funcs + f;
1602
943
        expr->type = expr->func->type;
1603
943
        expr->fold = expr->func->pure;
1604
943
        if (expr->fold) {
1605
            /* We only do constant folding for invocations of pure functions
1606
             * whose arguments are literal values. That misses opportunities
1607
             * for constant folding, e.g., "regexp('foo' + 'bar')" but is
1608
             * a bit simpler than doing full tracking of constants
1609
             */
1610
1.76k
            for (int i=0; i < expr->func->arity; i++) {
1611
1.02k
                if (expr->args[i]->tag != E_VALUE)
1612
504
                    expr->fold = false;
1613
1.02k
            }
1614
738
        }
1615
943
    } else {
1616
0
        STATE_ERROR(state, PATHX_ETYPE);
1617
0
    }
1618
943
}
1619
1620
/* Check the binary operators. Type rules:
1621
 *
1622
 * '=', '!='  : T_NODESET -> T_NODESET -> T_BOOLEAN
1623
 *              T_STRING  -> T_NODESET -> T_BOOLEAN
1624
 *              T_NODESET -> T_STRING  -> T_BOOLEAN
1625
 *              T_NUMBER  -> T_NUMBER  -> T_BOOLEAN
1626
 *
1627
 * '>', '>=',
1628
 * '<', '<='  : T_NUMBER -> T_NUMBER -> T_BOOLEAN
1629
 *              T_STRING -> T_STRING -> T_BOOLEAN
1630
 * '+'        : T_NUMBER -> T_NUMBER -> T_NUMBER
1631
 *              T_STRING -> T_STRING -> T_STRING
1632
 *              T_REGEXP -> T_REGEXP -> T_REGEXP
1633
 * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER
1634
 *
1635
 * 'and', 'or': T_BOOLEAN -> T_BOOLEAN -> T_BOOLEAN
1636
 * '=~', '!~' : T_STRING  -> T_REGEXP  -> T_BOOLEAN
1637
 *              T_NODESET -> T_REGEXP  -> T_BOOLEAN
1638
 *
1639
 * '|'        : T_NODESET -> T_NODESET -> T_NODESET
1640
 *
1641
 * Any type can be coerced to T_BOOLEAN (see coerce_to_bool)
1642
 */
1643
1.95M
static void check_binary(struct expr *expr, struct state *state) {
1644
1.95M
    check_expr(expr->left, state);
1645
1.95M
    check_expr(expr->right, state);
1646
1.95M
    RET_ON_ERROR;
1647
1648
1.13M
    enum type l = expr->left->type;
1649
1.13M
    enum type r = expr->right->type;
1650
1.13M
    int  ok = 1;
1651
1.13M
    enum type res;
1652
1653
1.13M
    switch(expr->op) {
1654
209
    case OP_EQ:
1655
236
    case OP_NEQ:
1656
236
        ok = ((l == T_NODESET || l == T_STRING)
1657
236
              && (r == T_NODESET || r == T_STRING))
1658
236
            || (l == T_NUMBER && r == T_NUMBER);;
1659
236
        res = T_BOOLEAN;
1660
236
        break;
1661
418
    case OP_LT:
1662
418
    case OP_LE:
1663
1.30k
    case OP_GT:
1664
1.30k
    case OP_GE:
1665
1.30k
        ok = (l == T_NUMBER && r == T_NUMBER)
1666
1.30k
            || (l == T_STRING && r == T_STRING);
1667
1.30k
        res = T_BOOLEAN;
1668
1.30k
        break;
1669
1.64k
    case OP_PLUS:
1670
1.64k
        ok = (l == r && (l == T_NUMBER || l == T_STRING || l == T_REGEXP));
1671
1.64k
        res = l;
1672
1.64k
        break;
1673
51.0k
    case OP_MINUS:
1674
1.10M
    case OP_STAR:
1675
1.10M
        ok =  (l == T_NUMBER && r == T_NUMBER);
1676
1.10M
        res = T_NUMBER;
1677
1.10M
        break;
1678
19.0k
    case OP_UNION:
1679
19.0k
        ok = (l == T_NODESET && r == T_NODESET);
1680
19.0k
        res = T_NODESET;
1681
19.0k
        break;
1682
409
    case OP_AND:
1683
4.89k
    case OP_OR:
1684
4.89k
        ok = 1;
1685
4.89k
        res = T_BOOLEAN;
1686
4.89k
        break;
1687
1.91k
    case OP_ELSE:
1688
1.91k
        if (l == T_NODESET && r == T_NODESET) {
1689
463
            res = T_NODESET;
1690
1.45k
        } else {
1691
1.45k
            res = T_BOOLEAN;
1692
1.45k
        }
1693
1.91k
        ok = 1;
1694
1.91k
        break;
1695
7
    case OP_RE_MATCH:
1696
738
    case OP_RE_NOMATCH:
1697
738
        ok = ((l == T_STRING || l == T_NODESET) && r == T_REGEXP);
1698
738
        res = T_BOOLEAN;
1699
738
        break;
1700
0
    default:
1701
0
        assert(0);
1702
1.13M
    }
1703
1.13M
    if (! ok) {
1704
269
        STATE_ERROR(state, PATHX_ETYPE);
1705
1.13M
    } else {
1706
1.13M
        expr->type = res;
1707
1.13M
    }
1708
1.13M
}
1709
1710
56
static void check_var(struct expr *expr, struct state *state) {
1711
56
    struct value *v = lookup_var(expr->ident, state->symtab);
1712
56
    if (v == NULL) {
1713
56
        STATE_ERROR(state, PATHX_ENOVAR);
1714
56
        return;
1715
56
    }
1716
0
    expr->type = v->tag;
1717
0
}
1718
1719
/* Typecheck an expression */
1720
4.01M
static void check_expr(struct expr *expr, struct state *state) {
1721
4.01M
    RET_ON_ERROR;
1722
3.19M
    switch(expr->tag) {
1723
128k
    case E_FILTER:
1724
128k
        check_filter(expr, state);
1725
128k
        break;
1726
1.95M
    case E_BINARY:
1727
1.95M
        check_binary(expr, state);
1728
1.95M
        break;
1729
1.11M
    case E_VALUE:
1730
1.11M
        expr->type = expr_value(expr, state)->tag;
1731
1.11M
        break;
1732
56
    case E_VAR:
1733
56
        check_var(expr, state);
1734
56
        break;
1735
943
    case E_APP:
1736
943
        check_app(expr, state);
1737
943
        break;
1738
0
    default:
1739
0
        assert(0);
1740
3.19M
    }
1741
3.19M
}
1742
1743
/*
1744
 * Utility functions for the parser
1745
 */
1746
1747
85.0M
static void skipws(struct state *state) {
1748
85.0M
    while (isspace(*state->pos)) state->pos += 1;
1749
85.0M
}
1750
1751
70.9M
static int match(struct state *state, char m) {
1752
70.9M
    skipws(state);
1753
1754
70.9M
    if (*state->pos == '\0')
1755
408k
        return 0;
1756
70.5M
    if (*state->pos == m) {
1757
19.2M
        state->pos += 1;
1758
19.2M
        return 1;
1759
19.2M
    }
1760
51.2M
    return 0;
1761
70.5M
}
1762
1763
28.7M
static int peek(struct state *state, const char *chars) {
1764
28.7M
    return strchr(chars, *state->pos) != NULL;
1765
28.7M
}
1766
1767
/* Return 1 if STATE->POS starts with TOKEN, followed by optional
1768
 * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the
1769
 * first character after FOLLOW. Return 0 otherwise and leave STATE->POS
1770
 * unchanged.
1771
 */
1772
static int looking_at(struct state *state, const char *token,
1773
42.9M
                      const char *follow) {
1774
42.9M
    if (STREQLEN(state->pos, token, strlen(token))) {
1775
16.1k
        const char *p = state->pos + strlen(token);
1776
16.1k
        while (isspace(*p)) p++;
1777
16.1k
        if (STREQLEN(p, follow, strlen(follow))) {
1778
12.8k
            state->pos = p + strlen(follow);
1779
12.8k
            return 1;
1780
12.8k
        }
1781
16.1k
    }
1782
42.9M
    return 0;
1783
42.9M
}
1784
1785
/*************************************************************************
1786
 * The parser
1787
 *************************************************************************/
1788
1789
static void parse_expr(struct state *state);
1790
1791
12.5M
static struct expr* pop_expr(struct state *state) {
1792
12.5M
    if (state->exprs_used > 0) {
1793
12.5M
        state->exprs_used -= 1;
1794
12.5M
        return state->exprs[state->exprs_used];
1795
12.5M
    } else {
1796
0
        STATE_ERROR(state, PATHX_EINTERNAL);
1797
0
        assert(0);
1798
0
        return NULL;
1799
0
    }
1800
12.5M
}
1801
1802
16.5M
static void push_expr(struct expr *expr, struct state *state) {
1803
16.5M
    if (state->exprs_used >= state->exprs_size) {
1804
105k
        size_t new_size = 2*state->exprs_size;
1805
105k
        if (new_size == 0) new_size = 8;
1806
105k
        if (REALLOC_N(state->exprs, new_size) < 0) {
1807
0
            STATE_ENOMEM;
1808
0
            return;
1809
0
        }
1810
105k
        state->exprs_size = new_size;
1811
105k
    }
1812
16.5M
    state->exprs[state->exprs_used++] = expr;
1813
16.5M
}
1814
1815
4.22M
static void push_new_binary_op(enum binary_op op, struct state *state) {
1816
4.22M
    struct expr *expr = NULL;
1817
4.22M
    if (ALLOC(expr) < 0) {
1818
0
        STATE_ENOMEM;
1819
0
        return;
1820
0
    }
1821
1822
4.22M
    expr->tag = E_BINARY;
1823
4.22M
    expr->op  = op;
1824
4.22M
    expr->right = pop_expr(state);
1825
4.22M
    expr->left = pop_expr(state);
1826
4.22M
    expr->left_matched = false;  /* for 'else' operator only, true if any matches on LHS */
1827
4.22M
    push_expr(expr, state);
1828
4.22M
}
1829
1830
36.5k
int pathx_escape_name(const char *in, char **out) {
1831
36.5k
    const char *p;
1832
36.5k
    int num_to_escape = 0;
1833
36.5k
    char *s;
1834
1835
36.5k
    *out = NULL;
1836
1837
104M
    for (p = in; *p; p++) {
1838
104M
        if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1839
81.9M
            num_to_escape += 1;
1840
104M
    }
1841
1842
36.5k
    if (num_to_escape == 0)
1843
36.0k
        return 0;
1844
1845
482
    if (ALLOC_N(*out, strlen(in) + num_to_escape + 1) < 0)
1846
0
        return -1;
1847
1848
95.0M
    for (p = in, s = *out; *p; p++) {
1849
95.0M
        if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1850
81.9M
            *s++ = '\\';
1851
95.0M
        *s++ = *p;
1852
95.0M
    }
1853
482
    *s = '\0';
1854
482
    return 0;
1855
482
}
1856
1857
/* Return true if POS is preceded by an odd number of backslashes, i.e., if
1858
 * POS is escaped. Stop the search when we get to START */
1859
3.40M
static bool backslash_escaped(const char *pos, const char *start) {
1860
3.40M
    bool result=false;
1861
3.40M
    while (pos-- > start && *pos == '\\') {
1862
275
        result = !result;
1863
275
    }
1864
3.40M
    return result;
1865
3.40M
}
1866
1867
/*
1868
 * NameNoWS ::= [^][|/\= \t\n] | \\.
1869
 * NameWS   ::= [^][|/\=] | \\.
1870
 * Name ::= NameNoWS NameWS* NameNoWS | NameNoWS
1871
 */
1872
3.33M
static char *parse_name(struct state *state) {
1873
3.33M
    const char *s = state->pos;
1874
3.33M
    char *result;
1875
1876
    /* Advance state->pos until it points to the first character that is
1877
     * not part of a name. */
1878
197M
    while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
1879
        /* Since we allow spaces in names, we need to avoid gobbling up
1880
         * stuff that is in follow(Name), e.g. 'or' so that things like
1881
         * [name1 or name2] still work. In other words, we'll parse 'x frob
1882
         * y' as one name, but for 'x or y', we consider 'x' a name in its
1883
         * own right. */
1884
194M
        if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
1885
194M
            STREQLEN(state->pos, " and ", strlen(" and ")) ||
1886
194M
            STREQLEN(state->pos, " else ", strlen(" else ")))
1887
894
            break;
1888
1889
194M
        if (*state->pos == '\\') {
1890
4.70M
            state->pos += 1;
1891
4.70M
            if (*state->pos == '\0') {
1892
0
                STATE_ERROR(state, PATHX_ENAME);
1893
0
                return NULL;
1894
0
            }
1895
4.70M
        }
1896
194M
        state->pos += 1;
1897
194M
    }
1898
1899
    /* Strip trailing white space. Make sure we respect escaped whitespace
1900
     * and don't strip it as in "x\\ " */
1901
3.33M
    if (state->pos > s) {
1902
3.33M
        state->pos -= 1;
1903
6.73M
        while (isspace(*state->pos) && state->pos > s
1904
6.73M
               && !backslash_escaped(state->pos, s))
1905
3.40M
            state->pos -= 1;
1906
3.33M
        state->pos += 1;
1907
3.33M
    }
1908
1909
3.33M
    if (state->pos == s) {
1910
79
        STATE_ERROR(state, PATHX_ENAME);
1911
79
        return NULL;
1912
79
    }
1913
1914
3.33M
    result = strndup(s, state->pos - s);
1915
3.33M
    if (result == NULL) {
1916
0
        STATE_ENOMEM;
1917
0
        return NULL;
1918
0
    }
1919
1920
3.33M
    char *p = result;
1921
194M
    for (char *t = result; *t != '\0'; t++, p++) {
1922
190M
        if (*t == '\\')
1923
4.70M
            t += 1;
1924
190M
        *p = *t;
1925
190M
    }
1926
3.33M
    *p = '\0';
1927
1928
3.33M
    return result;
1929
3.33M
}
1930
1931
/*
1932
 * Predicate    ::= "[" Expr "]" *
1933
 */
1934
12.4M
static struct pred *parse_predicates(struct state *state) {
1935
12.4M
    struct pred *pred = NULL;
1936
12.4M
    int nexpr = 0;
1937
1938
18.5M
    while (match(state, L_BRACK)) {
1939
6.20M
        parse_expr(state);
1940
6.20M
        nexpr += 1;
1941
6.20M
        RET0_ON_ERROR;
1942
1943
6.03M
        if (! match(state, R_BRACK)) {
1944
117
            STATE_ERROR(state, PATHX_EPRED);
1945
117
            return NULL;
1946
117
        }
1947
6.03M
        skipws(state);
1948
6.03M
    }
1949
1950
12.3M
    if (nexpr == 0)
1951
12.2M
        return NULL;
1952
1953
8.36k
    if (ALLOC(pred) < 0) {
1954
0
        STATE_ENOMEM;
1955
0
        return NULL;
1956
0
    }
1957
8.36k
    pred->nexpr = nexpr;
1958
1959
8.36k
    if (ALLOC_N(pred->exprs, nexpr) < 0) {
1960
0
        free_pred(pred);
1961
0
        STATE_ENOMEM;
1962
0
        return NULL;
1963
0
    }
1964
1965
4.09M
    for (int i = nexpr - 1; i >= 0; i--)
1966
4.09M
        pred->exprs[i] = pop_expr(state);
1967
1968
8.36k
    return pred;
1969
8.36k
}
1970
1971
/*
1972
 * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..'
1973
 * AxisSpecifier ::= AxisName '::' | <epsilon>
1974
 * AxisName ::= 'ancestor'
1975
 *            | 'ancestor-or-self'
1976
 *            | 'child'
1977
 *            | 'descendant'
1978
 *            | 'descendant-or-self'
1979
 *            | 'parent'
1980
 *            | 'self'
1981
 *            | 'root'
1982
 */
1983
4.29M
static struct step *parse_step(struct state *state) {
1984
4.29M
    struct step *step;
1985
4.29M
    int explicit_axis = 0, allow_predicates = 1;
1986
1987
4.29M
    if (ALLOC(step) < 0) {
1988
0
        STATE_ENOMEM;
1989
0
        return NULL;
1990
0
    }
1991
1992
4.29M
    step->axis = CHILD;
1993
47.1M
    for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) {
1994
42.9M
        if (looking_at(state, axis_names[i], "::")) {
1995
625
            step->axis = i;
1996
625
            explicit_axis = 1;
1997
625
            break;
1998
625
        }
1999
42.9M
    }
2000
2001
4.29M
    if (! match(state, '*')) {
2002
3.33M
        step->name = parse_name(state);
2003
3.33M
        if (HAS_ERROR(state))
2004
79
            goto error;
2005
3.33M
        if (! explicit_axis) {
2006
3.33M
            if (STREQ(step->name, ".") || STREQ(step->name, "..")) {
2007
35.7k
                step->axis = STREQ(step->name, ".") ? SELF : PARENT;
2008
35.7k
                FREE(step->name);
2009
35.7k
                allow_predicates = 0;
2010
35.7k
            }
2011
3.33M
        }
2012
3.33M
    }
2013
2014
4.29M
    if (allow_predicates) {
2015
4.25M
        step->predicates = parse_predicates(state);
2016
4.25M
        if (HAS_ERROR(state))
2017
161k
            goto error;
2018
4.25M
    }
2019
2020
4.12M
    return step;
2021
2022
161k
 error:
2023
161k
    free_step(step);
2024
161k
    return NULL;
2025
4.29M
}
2026
2027
163k
static struct step *make_step(enum axis axis, struct state *state) {
2028
163k
    struct step *result = NULL;
2029
2030
163k
    if (ALLOC(result) < 0) {
2031
0
        STATE_ENOMEM;
2032
0
        return NULL;
2033
0
    }
2034
163k
    result->axis = axis;
2035
163k
    return result;
2036
163k
}
2037
2038
/*
2039
 * RelativeLocationPath ::= Step
2040
 *                        | RelativeLocationPath '/' Step
2041
 *                        | AbbreviatedRelativeLocationPath
2042
 * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
2043
 *
2044
 * The above is the same as
2045
 * RelativeLocationPath ::= Step ('/' Step | '//' Step)*
2046
 */
2047
static struct locpath *
2048
4.10M
parse_relative_location_path(struct state *state) {
2049
4.10M
    struct step *step = NULL;
2050
4.10M
    struct locpath *locpath = NULL;
2051
2052
4.10M
    step = parse_step(state);
2053
4.10M
    if (HAS_ERROR(state))
2054
161k
        goto error;
2055
2056
3.94M
    if (ALLOC(locpath) < 0) {
2057
0
        STATE_ENOMEM;
2058
0
        goto error;
2059
0
    }
2060
3.94M
    list_append(locpath->steps, step);
2061
0
    step = NULL;
2062
2063
4.12M
    while (match(state, '/')) {
2064
187k
        if (*state->pos == '/') {
2065
36.3k
            state->pos += 1;
2066
36.3k
            step = make_step(DESCENDANT_OR_SELF, state);
2067
36.3k
            if (step == NULL) {
2068
0
                STATE_ENOMEM;
2069
0
                goto error;
2070
0
            }
2071
36.3k
            list_append(locpath->steps, step);
2072
36.3k
        }
2073
187k
        step = parse_step(state);
2074
187k
        if (HAS_ERROR(state))
2075
147
            goto error;
2076
187k
        list_append(locpath->steps, step);
2077
0
        step = NULL;
2078
187k
    }
2079
3.94M
    return locpath;
2080
2081
161k
 error:
2082
161k
    free_step(step);
2083
161k
    free_locpath(locpath);
2084
161k
    return NULL;
2085
3.94M
}
2086
2087
/*
2088
 * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
2089
 * AbsoluteLocationPath ::= '/' RelativeLocationPath?
2090
 *                        | AbbreviatedAbsoluteLocationPath
2091
 * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
2092
 *
2093
 */
2094
4.10M
static void parse_location_path(struct state *state) {
2095
4.10M
    struct expr *expr = NULL;
2096
4.10M
    struct locpath *locpath = NULL;
2097
2098
4.10M
    if (match(state, '/')) {
2099
127k
        if (*state->pos == '/') {
2100
21.5k
            state->pos += 1;
2101
21.5k
            locpath = parse_relative_location_path(state);
2102
21.5k
            if (HAS_ERROR(state))
2103
76
                goto error;
2104
21.4k
            struct step *step = make_step(DESCENDANT_OR_SELF, state);
2105
21.4k
            if (HAS_ERROR(state))
2106
0
                goto error;
2107
21.4k
            list_cons(locpath->steps, step);
2108
105k
        } else {
2109
105k
            if (*state->pos != '\0') {
2110
105k
                locpath = parse_relative_location_path(state);
2111
105k
            } else {
2112
0
                if (ALLOC(locpath) < 0)
2113
0
                    goto err_nomem;
2114
0
            }
2115
105k
            struct step *step = make_step(ROOT, state);
2116
105k
            if (HAS_ERROR(state)) {
2117
3
                free_step(step);
2118
3
                goto error;
2119
3
            }
2120
105k
            list_cons(locpath->steps, step);
2121
105k
        }
2122
3.97M
    } else {
2123
3.97M
        locpath = parse_relative_location_path(state);
2124
3.97M
    }
2125
2126
4.10M
    if (ALLOC(expr) < 0)
2127
0
        goto err_nomem;
2128
4.10M
    expr->tag = E_FILTER;
2129
4.10M
    expr->locpath = locpath;
2130
4.10M
    push_expr(expr, state);
2131
4.10M
    return;
2132
2133
0
 err_nomem:
2134
0
    STATE_ENOMEM;
2135
79
 error:
2136
79
    free_expr(expr);
2137
79
    free_locpath(locpath);
2138
79
    return;
2139
0
}
2140
2141
/*
2142
 * Number       ::= /[0-9]+/
2143
 */
2144
8.20M
static void parse_number(struct state *state) {
2145
8.20M
    struct expr *expr = NULL;
2146
8.20M
    unsigned long val;
2147
8.20M
    char *end;
2148
2149
8.20M
    errno = 0;
2150
8.20M
    val = strtoul(state->pos, &end, 10);
2151
8.20M
    if (errno || end == state->pos || (int) val != val) {
2152
68
        STATE_ERROR(state, PATHX_ENUMBER);
2153
68
        return;
2154
68
    }
2155
2156
8.20M
    state->pos = end;
2157
2158
8.20M
    if (ALLOC(expr) < 0)
2159
0
        goto err_nomem;
2160
8.20M
    expr->tag = E_VALUE;
2161
8.20M
    expr->value_ind = make_value(T_NUMBER, state);
2162
8.20M
    if (HAS_ERROR(state))
2163
0
        goto error;
2164
8.20M
    expr_value(expr, state)->number = val;
2165
2166
8.20M
    push_expr(expr, state);
2167
8.20M
    return;
2168
2169
0
 err_nomem:
2170
0
    STATE_ENOMEM;
2171
0
 error:
2172
0
    free_expr(expr);
2173
0
    return;
2174
0
}
2175
2176
/*
2177
 * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'"
2178
 */
2179
3.56k
static void parse_literal(struct state *state) {
2180
3.56k
    char delim;
2181
3.56k
    const char *s;
2182
3.56k
    struct expr *expr = NULL;
2183
2184
3.56k
    if (*state->pos == '"')
2185
2.11k
        delim = '"';
2186
1.45k
    else if (*state->pos == '\'')
2187
1.29k
        delim = '\'';
2188
165
    else {
2189
165
        STATE_ERROR(state, PATHX_ESTRING);
2190
165
        return;
2191
165
    }
2192
3.40k
    state->pos += 1;
2193
2194
3.40k
    s = state->pos;
2195
9.25M
    while (*state->pos != '\0' && *state->pos != delim) state->pos += 1;
2196
2197
3.40k
    if (*state->pos != delim) {
2198
83
        STATE_ERROR(state, PATHX_EDELIM);
2199
83
        return;
2200
83
    }
2201
3.32k
    state->pos += 1;
2202
2203
3.32k
    if (ALLOC(expr) < 0)
2204
0
        goto err_nomem;
2205
3.32k
    expr->tag = E_VALUE;
2206
3.32k
    expr->value_ind = make_value(T_STRING, state);
2207
3.32k
    if (HAS_ERROR(state))
2208
0
        goto error;
2209
3.32k
    expr_value(expr, state)->string = strndup(s, state->pos - s - 1);
2210
3.32k
    if (expr_value(expr, state)->string == NULL)
2211
0
        goto err_nomem;
2212
2213
3.32k
    push_expr(expr, state);
2214
3.32k
    return;
2215
2216
0
 err_nomem:
2217
0
    STATE_ENOMEM;
2218
0
 error:
2219
0
    free_expr(expr);
2220
0
    return;
2221
0
}
2222
2223
/*
2224
 * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')'
2225
 */
2226
12.1k
static void parse_function_call(struct state *state) {
2227
12.1k
    const struct func *func = NULL;
2228
12.1k
    struct expr *expr = NULL;
2229
12.1k
    int nargs = 0, find = 0;
2230
2231
62.3k
    for (; find < ARRAY_CARDINALITY(builtin_funcs); find++) {
2232
62.3k
        if (looking_at(state, builtin_funcs[find].name, "(")) {
2233
12.1k
            func = builtin_funcs + find;
2234
12.1k
            break;
2235
12.1k
        }
2236
62.3k
    }
2237
12.1k
    if (func == NULL) {
2238
10
        STATE_ERROR(state, PATHX_ENAME);
2239
10
        return;
2240
10
    }
2241
2242
12.1k
    if (! match(state, ')')) {
2243
1.52M
        do {
2244
1.52M
            nargs += 1;
2245
1.52M
            parse_expr(state);
2246
1.52M
            RET_ON_ERROR;
2247
1.52M
        } while (match(state, ','));
2248
2249
1.01k
        if (! match(state, ')')) {
2250
25
            STATE_ERROR(state, PATHX_EPAREN);
2251
25
            return;
2252
25
        }
2253
1.01k
    }
2254
2255
993
    int found = 0; /* Whether there is a builtin matching in name and arity */
2256
1.59k
    for (int i=find; i < ARRAY_CARDINALITY(builtin_funcs); i++) {
2257
1.59k
        if (STRNEQ(func->name, builtin_funcs[i].name))
2258
18
            break;
2259
1.57k
        if (builtin_funcs[i].arity == nargs) {
2260
975
            func = builtin_funcs + i;
2261
975
            found = 1;
2262
975
            break;
2263
975
        }
2264
1.57k
    }
2265
2266
993
    if (! found) {
2267
18
        STATE_ERROR(state, PATHX_EARITY);
2268
18
        return;
2269
18
    }
2270
2271
975
    if (ALLOC(expr) < 0) {
2272
0
        STATE_ENOMEM;
2273
0
        return;
2274
0
    }
2275
975
    expr->tag = E_APP;
2276
975
    if (ALLOC_N(expr->args, nargs) < 0) {
2277
0
        free_expr(expr);
2278
0
        STATE_ENOMEM;
2279
0
        return;
2280
0
    }
2281
975
    expr->func = func;
2282
2.23k
    for (int i = nargs - 1; i >= 0; i--)
2283
1.26k
        expr->args[i] = pop_expr(state);
2284
2285
975
    push_expr(expr, state);
2286
975
}
2287
2288
/*
2289
 * VariableReference ::= '$' /[a-zA-Z_][a-zA-Z0-9_]* /
2290
 *
2291
 * The '$' is consumed by parse_primary_expr
2292
 */
2293
843
static void parse_var(struct state *state) {
2294
843
    const char *id = state->pos;
2295
843
    struct expr *expr = NULL;
2296
2297
843
    if (!isalpha(*id) && *id != '_') {
2298
35
        STATE_ERROR(state, PATHX_ENAME);
2299
35
        return;
2300
35
    }
2301
808
    id++;
2302
1.38k
    while (isalpha(*id) || isdigit(*id) || *id == '_')
2303
574
        id += 1;
2304
2305
808
    if (ALLOC(expr) < 0)
2306
0
        goto err_nomem;
2307
808
    expr->tag = E_VAR;
2308
808
    expr->ident = strndup(state->pos, id - state->pos);
2309
808
    if (expr->ident == NULL)
2310
0
        goto err_nomem;
2311
2312
808
    push_expr(expr, state);
2313
808
    state->pos = id;
2314
808
    return;
2315
0
 err_nomem:
2316
0
    STATE_ENOMEM;
2317
0
    free_expr(expr);
2318
0
    return;
2319
808
}
2320
2321
/*
2322
 * PrimaryExpr ::= Literal
2323
 *               | Number
2324
 *               | FunctionCall
2325
 *               | VariableReference
2326
 *               | '(' Expr ')'
2327
 *
2328
 */
2329
8.22M
static void parse_primary_expr(struct state *state) {
2330
8.22M
    if (peek(state, "'\"")) {
2331
3.56k
        parse_literal(state);
2332
8.22M
    } else if (peek(state, "0123456789")) {
2333
8.20M
        parse_number(state);
2334
8.20M
    } else if (match(state, '(')) {
2335
55
        parse_expr(state);
2336
55
        RET_ON_ERROR;
2337
29
        if (! match(state, ')')) {
2338
9
            STATE_ERROR(state, PATHX_EPAREN);
2339
9
            return;
2340
9
        }
2341
13.0k
    } else if (match(state, '$')) {
2342
843
        parse_var(state);
2343
12.1k
    } else {
2344
12.1k
        parse_function_call(state);
2345
12.1k
    }
2346
8.22M
}
2347
2348
12.3M
static int looking_at_primary_expr(struct state *state) {
2349
12.3M
    const char *s = state->pos;
2350
    /* Is it a Number, Literal or VariableReference ? */
2351
12.3M
    if (peek(state, "$'\"0123456789"))
2352
8.21M
        return 1;
2353
2354
    /* Or maybe a function call, i.e. a word followed by a '(' ?
2355
     * Note that our function names are only [a-zA-Z]+
2356
     */
2357
7.23M
    while (*s != '\0' && isalpha(*s)) s++;
2358
13.2M
    while (*s != '\0' && isspace(*s)) s++;
2359
4.11M
    return *s == '(';
2360
12.3M
}
2361
2362
/*
2363
 * PathExpr ::= LocationPath
2364
 *            | FilterExpr
2365
 *            | FilterExpr '/' RelativeLocationPath
2366
 *            | FilterExpr '//' RelativeLocationPath
2367
 *
2368
 * FilterExpr ::= PrimaryExpr Predicate
2369
 *
2370
 * The grammar is ambiguous here: the expression '42' can either be the
2371
 * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The
2372
 * reason for this ambiguity is that we allow node names like '42' in the
2373
 * tree; rather than forbid them, we resolve the ambiguity by always
2374
 * parsing '42' as a number, and requiring that the user write the
2375
 * RelativeLocationPath in a different form, e.g. 'child::42' or './42'.
2376
 */
2377
12.3M
static void parse_path_expr(struct state *state) {
2378
12.3M
    struct expr *expr = NULL;
2379
12.3M
    struct pred *predicates = NULL;
2380
12.3M
    struct locpath *locpath = NULL;
2381
2382
12.3M
    if (looking_at_primary_expr(state)) {
2383
8.22M
        parse_primary_expr(state);
2384
8.22M
        RET_ON_ERROR;
2385
8.21M
        predicates = parse_predicates(state);
2386
8.21M
        RET_ON_ERROR;
2387
8.20M
        if (match(state, '/')) {
2388
99
            if (match(state, '/')) {
2389
36
                locpath = parse_relative_location_path(state);
2390
36
                if (HAS_ERROR(state))
2391
24
                    goto error;
2392
2393
12
                struct step *step = make_step(DESCENDANT_OR_SELF, state);
2394
12
                if (HAS_ERROR(state))
2395
0
                    return;
2396
12
                list_cons(locpath->steps, step);
2397
63
            } else {
2398
63
                if (*state->pos == '\0') {
2399
0
                    STATE_ERROR(state, PATHX_EEND);
2400
0
                    goto error;
2401
0
                }
2402
63
                locpath = parse_relative_location_path(state);
2403
63
            }
2404
99
        }
2405
        /* A PathExpr without predicates and locpath is
2406
         * just a PrimaryExpr
2407
         */
2408
8.20M
        if (predicates == NULL && locpath == NULL)
2409
8.20M
            return;
2410
        /* To make evaluation easier, we parse something like
2411
         *   $var[pred] as $var[pred]/.
2412
         */
2413
105
        if (locpath == NULL) {
2414
30
            if (ALLOC(locpath) < 0)
2415
0
                goto error;
2416
30
            if (ALLOC(locpath->steps) < 0)
2417
0
                goto error;
2418
30
            locpath->steps->axis = SELF;
2419
30
        }
2420
105
        if (ALLOC(expr) < 0)
2421
0
            goto error;
2422
105
        expr->tag = E_FILTER;
2423
105
        expr->predicates = predicates;
2424
105
        expr->primary    = pop_expr(state);
2425
105
        expr->locpath    = locpath;
2426
105
        push_expr(expr, state);
2427
4.10M
    } else {
2428
4.10M
        parse_location_path(state);
2429
4.10M
    }
2430
4.10M
    return;
2431
4.10M
 error:
2432
24
    free_expr(expr);
2433
24
    free_pred(predicates);
2434
24
    free_locpath(locpath);
2435
24
    return;
2436
12.3M
}
2437
2438
/*
2439
 * UnionExpr ::= PathExpr ('|' PathExpr)*
2440
 */
2441
12.1M
static void parse_union_expr(struct state *state) {
2442
12.1M
    parse_path_expr(state);
2443
12.1M
    RET_ON_ERROR;
2444
12.1M
    while (match(state, '|')) {
2445
180k
        parse_path_expr(state);
2446
180k
        RET_ON_ERROR;
2447
109k
        push_new_binary_op(OP_UNION, state);
2448
109k
    }
2449
12.0M
}
2450
2451
/*
2452
 * MultiplicativeExpr ::= UnionExpr ('*' UnionExpr)*
2453
 */
2454
8.10M
static void parse_multiplicative_expr(struct state *state) {
2455
8.10M
    parse_union_expr(state);
2456
8.10M
    RET_ON_ERROR;
2457
11.9M
    while (match(state, '*')) {
2458
4.04M
        parse_union_expr(state);
2459
4.04M
        RET_ON_ERROR;
2460
4.04M
        push_new_binary_op(OP_STAR, state);
2461
4.04M
    }
2462
7.92M
}
2463
2464
/*
2465
 * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)*
2466
 * AdditiveOp   ::= '+' | '-'
2467
 */
2468
7.98M
static void parse_additive_expr(struct state *state) {
2469
7.98M
    parse_multiplicative_expr(state);
2470
7.98M
    RET_ON_ERROR;
2471
7.92M
    while (*state->pos == '+' || *state->pos == '-') {
2472
116k
        enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS;
2473
116k
        state->pos += 1;
2474
116k
        skipws(state);
2475
116k
        parse_multiplicative_expr(state);
2476
116k
        RET_ON_ERROR;
2477
58.2k
        push_new_binary_op(op, state);
2478
58.2k
    }
2479
7.86M
}
2480
2481
/*
2482
 * RelationalExpr ::= AdditiveExpr (RelationalOp AdditiveExpr)?
2483
 * RelationalOp ::= ">" | "<" | ">=" | "<="
2484
 */
2485
7.91M
static void parse_relational_expr(struct state *state) {
2486
7.91M
    parse_additive_expr(state);
2487
7.91M
    RET_ON_ERROR;
2488
7.80M
    if (*state->pos == '<' || *state->pos == '>') {
2489
69.8k
        enum binary_op op = (*state->pos == '<') ? OP_LT : OP_GT;
2490
69.8k
        state->pos += 1;
2491
69.8k
        if (*state->pos == '=') {
2492
0
            op = (op == OP_LT) ? OP_LE : OP_GE;
2493
0
            state->pos += 1;
2494
0
        }
2495
69.8k
        skipws(state);
2496
69.8k
        parse_additive_expr(state);
2497
69.8k
        RET_ON_ERROR;
2498
2.76k
        push_new_binary_op(op, state);
2499
2.76k
    }
2500
7.80M
}
2501
2502
/*
2503
 * EqualityExpr ::= RelationalExpr (EqualityOp RelationalExpr)? | ReMatchExpr
2504
 * EqualityOp ::= "=" | "!="
2505
 * ReMatchExpr ::= RelationalExpr MatchOp RelationalExpr
2506
 * MatchOp ::= "=~" | "!~"
2507
 */
2508
7.84M
static void parse_equality_expr(struct state *state) {
2509
7.84M
    parse_relational_expr(state);
2510
7.84M
    RET_ON_ERROR;
2511
7.73M
    if ((*state->pos == '=' || *state->pos == '!') && state->pos[1] == '~') {
2512
67.7k
        enum binary_op op = (*state->pos == '=') ? OP_RE_MATCH : OP_RE_NOMATCH;
2513
67.7k
        state->pos += 2;
2514
67.7k
        skipws(state);
2515
67.7k
        parse_relational_expr(state);
2516
67.7k
        RET_ON_ERROR;
2517
778
        push_new_binary_op(op, state);
2518
7.66M
    } else if (*state->pos == '=' ||
2519
7.66M
        (*state->pos == '!' && state->pos[1] == '=')) {
2520
5.88k
        enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ;
2521
5.88k
        state->pos += (op == OP_EQ) ? 1 : 2;
2522
5.88k
        skipws(state);
2523
5.88k
        parse_relational_expr(state);
2524
5.88k
        RET_ON_ERROR;
2525
306
        push_new_binary_op(op, state);
2526
306
    }
2527
7.73M
}
2528
2529
/*
2530
 * AndExpr ::= EqualityExpr ('and' EqualityExpr)*
2531
 */
2532
7.84M
static void parse_and_expr(struct state *state) {
2533
7.84M
    parse_equality_expr(state);
2534
7.84M
    RET_ON_ERROR;
2535
7.66M
    while (*state->pos == 'a' && state->pos[1] == 'n'
2536
7.66M
        && state->pos[2] == 'd') {
2537
1.42k
        state->pos += 3;
2538
1.42k
        skipws(state);
2539
1.42k
        parse_equality_expr(state);
2540
1.42k
        RET_ON_ERROR;
2541
856
        push_new_binary_op(OP_AND, state);
2542
856
    }
2543
7.66M
}
2544
2545
/*
2546
 * OrExpr ::= AndExpr ('or' AndExpr)*
2547
 */
2548
7.83M
static void parse_or_expr(struct state *state) {
2549
7.83M
    parse_and_expr(state);
2550
7.83M
    RET_ON_ERROR;
2551
7.66M
    while (*state->pos == 'o' && state->pos[1] == 'r') {
2552
7.58k
        state->pos += 2;
2553
7.58k
        skipws(state);
2554
7.58k
        parse_and_expr(state);
2555
7.58k
        RET_ON_ERROR;
2556
7.51k
        push_new_binary_op(OP_OR, state);
2557
7.51k
    }
2558
7.65M
}
2559
2560
/*
2561
 * ElseExpr ::= OrExpr ('else' OrExpr)*
2562
 */
2563
7.83M
static void parse_else_expr(struct state *state) {
2564
7.83M
    parse_or_expr(state);
2565
7.83M
    RET_ON_ERROR;
2566
7.65M
    while (*state->pos == 'e' && state->pos[1] == 'l'
2567
7.65M
        && state->pos[2] == 's' && state->pos[3] == 'e' ) {
2568
3.58k
        state->pos += 4;
2569
3.58k
        skipws(state);
2570
3.58k
        parse_or_expr(state);
2571
3.58k
        RET_ON_ERROR;
2572
3.56k
        push_new_binary_op(OP_ELSE, state);
2573
3.56k
        state->has_else = 1;
2574
3.56k
    }
2575
7.65M
}
2576
2577
/*
2578
 * Expr ::= ElseExpr
2579
 */
2580
7.83M
static void parse_expr(struct state *state) {
2581
7.83M
    skipws(state);
2582
7.83M
    parse_else_expr(state);
2583
7.83M
}
2584
2585
103k
static void store_error(struct pathx *pathx) {
2586
103k
    const char *pathx_msg = NULL;
2587
103k
    const char *path = pathx->state->txt;
2588
103k
    const pathx_errcode_t errcode = pathx->state->errcode;
2589
103k
    struct error *err = pathx->state->error;
2590
2591
103k
    char *pos_str = pathx->state->errmsg;
2592
103k
    pathx->state->errmsg = NULL;
2593
2594
103k
    if (err == NULL || errcode == PATHX_NOERROR || err->code != AUG_NOERROR)
2595
101k
        return;
2596
2597
1.58k
    switch (errcode) {
2598
0
    case PATHX_ENOMEM:
2599
0
        err->code = AUG_ENOMEM;
2600
0
        break;
2601
0
    case PATHX_EMMATCH:
2602
0
        err->code = AUG_EMMATCH;
2603
0
        break;
2604
0
    case PATHX_ENOMATCH:
2605
0
        err->code = AUG_ENOMATCH;
2606
0
        break;
2607
1.58k
    default:
2608
1.58k
        err->code = AUG_EPATHX;
2609
1.58k
        break;
2610
1.58k
    }
2611
2612
    /* We only need details for pathx syntax errors */
2613
1.58k
    if (err->code != AUG_EPATHX)
2614
0
        return;
2615
2616
1.58k
    int pos;
2617
1.58k
    pathx_msg = pathx_error(pathx, NULL, &pos);
2618
2619
1.58k
    bool has_msg = pos_str != NULL;
2620
1.58k
    int pos_str_len = pos_str == NULL ? 0 : strlen(pos_str);
2621
1.58k
    if (REALLOC_N(pos_str, pos_str_len + strlen(path) + 8) >= 0) {
2622
1.58k
        if (has_msg) {
2623
27
            strcat(pos_str, " in ");
2624
27
            strncat(pos_str, path, pos);
2625
1.56k
        } else {
2626
            /* initialize pos_str explicitly, path might be "" */
2627
1.56k
            pos_str[0] = '\0';
2628
1.56k
            strncat(pos_str, path, pos);
2629
1.56k
        }
2630
1.58k
        strcat(pos_str, "|=|");
2631
1.58k
        strcat(pos_str, path + pos);
2632
1.58k
    }
2633
2634
1.58k
    err->minor = errcode;
2635
1.58k
    err->details = pos_str;
2636
1.58k
    pos_str = NULL;
2637
1.58k
    err->minor_details = pathx_msg;
2638
1.58k
}
2639
2640
int pathx_parse(const struct tree *tree,
2641
                struct error *err,
2642
                const char *txt,
2643
                bool need_nodeset,
2644
                struct pathx_symtab *symtab,
2645
                struct tree *root_ctx,
2646
103k
                struct pathx **pathx) {
2647
103k
    struct state *state = NULL;
2648
2649
103k
    *pathx = NULL;
2650
2651
103k
    if (ALLOC(*pathx) < 0)
2652
0
        goto oom;
2653
2654
103k
    (*pathx)->origin = (struct tree *) tree;
2655
2656
    /* Set up state */
2657
103k
    if (ALLOC((*pathx)->state) < 0)
2658
0
        goto oom;
2659
103k
    state = (*pathx)->state;
2660
2661
103k
    state->errcode = PATHX_NOERROR;
2662
103k
    state->errmsg = NULL;
2663
103k
    state->txt = txt;
2664
103k
    state->pos = txt;
2665
103k
    state->symtab = symtab;
2666
103k
    state->root_ctx = root_ctx;
2667
103k
    state->error = err;
2668
2669
103k
    if (ALLOC_N(state->value_pool, 8) < 0) {
2670
0
        STATE_ENOMEM;
2671
0
        goto done;
2672
0
    }
2673
103k
    state->value_pool_size = 8;
2674
103k
    state->value_pool[0].tag = T_BOOLEAN;
2675
103k
    state->value_pool[0].boolval = 0;
2676
103k
    state->value_pool[1].tag = T_BOOLEAN;
2677
103k
    state->value_pool[1].boolval = 1;
2678
103k
    state->value_pool_used = 2;
2679
2680
    /* Parse */
2681
103k
    parse_expr(state);
2682
103k
    if (HAS_ERROR(state))
2683
609
        goto done;
2684
102k
    if (state->pos != state->txt + strlen(state->txt)) {
2685
357
        STATE_ERROR(state, PATHX_EEND);
2686
357
        goto done;
2687
357
    }
2688
2689
102k
    if (state->exprs_used != 1) {
2690
0
        STATE_ERROR(state, PATHX_EINTERNAL);
2691
0
        goto done;
2692
0
    }
2693
2694
    /* Typecheck */
2695
102k
    check_expr(state->exprs[0], state);
2696
102k
    if (HAS_ERROR(state))
2697
325
        goto done;
2698
2699
101k
    if (need_nodeset && state->exprs[0]->type != T_NODESET) {
2700
271
        STATE_ERROR(state, PATHX_ETYPE);
2701
271
        goto done;
2702
271
    }
2703
2704
103k
 done:
2705
103k
    store_error(*pathx);
2706
103k
    return state->errcode;
2707
0
 oom:
2708
0
    free_pathx(*pathx);
2709
0
    *pathx = NULL;
2710
0
    if (err != NULL)
2711
0
        err->code = AUG_ENOMEM;
2712
0
    return PATHX_ENOMEM;
2713
101k
}
2714
2715
/*************************************************************************
2716
 * Searching in the tree
2717
 *************************************************************************/
2718
2719
5.69M
static bool step_matches(struct step *step, struct tree *tree) {
2720
5.69M
    if ( step->axis == SEQ && step->name == NULL ) {
2721
682
        if ( tree->label == NULL )
2722
0
            return false;
2723
        /* label matches if it consists of numeric digits only */
2724
734
        for( char *s = tree->label; *s ; s++) {
2725
682
            if ( ! isdigit(*s) )
2726
630
                return false;
2727
682
        }
2728
52
        return true;
2729
5.69M
    } else if (step->name == NULL) {
2730
4.49M
        return step->axis == ROOT || tree->label != NULL;
2731
4.49M
    } else {
2732
1.20M
        return streqx(step->name, tree->label);
2733
1.20M
    }
2734
5.69M
}
2735
2736
30
static struct tree *tree_prev(struct tree *pos) {
2737
30
    struct tree *node = NULL;
2738
30
    if (pos != pos->parent->children) {
2739
15
        for (node = pos->parent->children;
2740
15
             node->next != pos;
2741
15
             node = node->next);
2742
15
    }
2743
30
    return node;
2744
30
}
2745
2746
/* When the first step doesn't begin with ROOT then use relative root context
2747
 * instead. */
2748
static struct tree *step_root(struct step *step, struct tree *ctx,
2749
2.33M
                              struct tree *root_ctx) {
2750
2.33M
    struct tree *node = NULL;
2751
2.33M
    switch (step->axis) {
2752
1.89M
    case SELF:
2753
2.08M
    case CHILD:
2754
2.08M
    case SEQ:
2755
2.08M
    case DESCENDANT:
2756
2.08M
    case PARENT:
2757
2.08M
    case ANCESTOR:
2758
2.08M
    case PRECEDING_SIBLING:
2759
2.08M
    case FOLLOWING_SIBLING:
2760
        /* only use root_ctx when ctx is the absolute tree root */
2761
2.08M
        if (ctx == ctx->parent && root_ctx != NULL)
2762
2.35k
            node = root_ctx;
2763
2.08M
        else
2764
2.08M
            node = ctx;
2765
2.08M
        break;
2766
160k
    case ROOT:
2767
254k
    case DESCENDANT_OR_SELF:
2768
254k
        node = ctx;
2769
254k
        break;
2770
0
    default:
2771
0
        assert(0);
2772
2.33M
    }
2773
2.33M
    if (node == NULL)
2774
0
        return NULL;
2775
2.33M
    return node;
2776
2.33M
}
2777
2778
3.95M
static struct tree *step_first(struct step *step, struct tree *ctx) {
2779
3.95M
    struct tree *node = NULL;
2780
3.95M
    switch (step->axis) {
2781
2.03M
    case SELF:
2782
2.12M
    case DESCENDANT_OR_SELF:
2783
2.12M
        node = ctx;
2784
2.12M
        break;
2785
1.66M
    case CHILD:
2786
1.66M
    case SEQ:
2787
1.66M
    case DESCENDANT:
2788
1.66M
        node = ctx->children;
2789
1.66M
        break;
2790
6
    case PARENT:
2791
12
    case ANCESTOR:
2792
12
        node = ctx->parent;
2793
12
        break;
2794
160k
    case ROOT:
2795
343k
        while (ctx->parent != ctx)
2796
182k
            ctx = ctx->parent;
2797
160k
        node = ctx;
2798
160k
        break;
2799
15
    case PRECEDING_SIBLING:
2800
15
        node = tree_prev(ctx);
2801
15
        break;
2802
6
    case FOLLOWING_SIBLING:
2803
6
        node = ctx->next;
2804
6
        break;
2805
0
    default:
2806
0
        assert(0);
2807
3.95M
    }
2808
3.95M
    if (node == NULL)
2809
638k
        return NULL;
2810
3.31M
    if (step_matches(step, node))
2811
3.01M
        return node;
2812
299k
    return step_next(step, ctx, node);
2813
3.31M
}
2814
2815
static struct tree *step_next(struct step *step, struct tree *ctx,
2816
4.98M
                              struct tree *node) {
2817
9.01M
    while (node != NULL) {
2818
5.69M
        switch (step->axis) {
2819
2.03M
        case SELF:
2820
2.03M
            node = NULL;
2821
2.03M
            break;
2822
850
        case SEQ:
2823
2.20M
        case CHILD:
2824
2.20M
            node = node->next;
2825
2.20M
            break;
2826
0
        case DESCENDANT:
2827
1.30M
        case DESCENDANT_OR_SELF:
2828
1.30M
            if (node->children != NULL) {
2829
708k
                node = node->children;
2830
708k
            } else {
2831
1.30M
                while (node->next == NULL && node != ctx)
2832
708k
                    node = node->parent;
2833
593k
                if (node == ctx)
2834
97.3k
                    node = NULL;
2835
496k
                else
2836
496k
                    node = node->next;
2837
593k
            }
2838
1.30M
            break;
2839
6
        case PARENT:
2840
160k
        case ROOT:
2841
160k
            node = NULL;
2842
160k
            break;
2843
6
        case ANCESTOR:
2844
6
            if (node->parent == node)
2845
6
                node = NULL;
2846
0
            else
2847
0
                node = node->parent;
2848
6
            break;
2849
15
        case PRECEDING_SIBLING:
2850
15
            node = tree_prev(node);
2851
15
            break;
2852
0
        case FOLLOWING_SIBLING:
2853
0
            node = node->next;
2854
0
            break;
2855
0
        default:
2856
0
            assert(0);
2857
5.69M
        }
2858
5.69M
        if (node != NULL && step_matches(step, node))
2859
1.66M
            break;
2860
5.69M
    }
2861
4.98M
    return node;
2862
4.98M
}
2863
2864
103k
static struct value *pathx_eval(struct pathx *pathx) {
2865
103k
    struct state *state = pathx->state;
2866
103k
    state->ctx = pathx->origin;
2867
103k
    state->ctx_pos = 1;
2868
103k
    state->ctx_len = 1;
2869
103k
    eval_expr(state->exprs[0], state);
2870
103k
    if (HAS_ERROR(state))
2871
27
        return NULL;
2872
2873
103k
    if (state->values_used != 1) {
2874
0
        STATE_ERROR(state, PATHX_EINTERNAL);
2875
0
        return NULL;
2876
0
    }
2877
103k
    return pop_value(state);
2878
103k
}
2879
2880
12.5k
struct tree *pathx_next(struct pathx *pathx) {
2881
12.5k
    if (pathx->node + 1 < pathx->nodeset->used)
2882
12.3k
        return pathx->nodeset->nodes[++pathx->node];
2883
154
    return NULL;
2884
12.5k
}
2885
2886
/* Find the first node in TREE matching PATH. */
2887
100k
struct tree *pathx_first(struct pathx *pathx) {
2888
100k
    if (pathx->nodeset == NULL) {
2889
69.3k
        struct value *v = pathx_eval(pathx);
2890
2891
69.3k
        if (HAS_ERROR(pathx->state))
2892
25
            goto error;
2893
69.3k
        assert(v->tag == T_NODESET);
2894
69.3k
        pathx->nodeset = v->nodeset;
2895
69.3k
    }
2896
100k
    pathx->node = 0;
2897
100k
    if (pathx->nodeset->used == 0)
2898
3.83k
        return NULL;
2899
96.4k
    else
2900
96.4k
        return pathx->nodeset->nodes[0];
2901
25
 error:
2902
25
    store_error(pathx);
2903
25
    return NULL;
2904
100k
}
2905
2906
/* Find a node in the tree that matches the longest prefix of PATH.
2907
 *
2908
 * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete
2909
 * prefix matches, and -1 if more than one node in the tree match.
2910
 *
2911
 * TMATCH is set to the tree node that matches, and SMATCH to the next step
2912
 * after the one where TMATCH matched. If no node matches or multiple nodes
2913
 * at the same depth match, TMATCH and SMATCH will be NULL. When exactly
2914
 * one node matches, TMATCH will be that node, and SMATCH will be NULL.
2915
 */
2916
static int locpath_search(struct locpath_trace *lpt,
2917
33.6k
                          struct tree **tmatch, struct step **smatch) {
2918
33.6k
    int last;
2919
33.6k
    int result = -1;
2920
2921
72.3k
    for (last=lpt->maxns; last >= 0 && lpt->ns[last]->used == 0; last--);
2922
33.6k
    if (last < 0) {
2923
0
        *smatch = lpt->lp->steps;
2924
0
        result = 1;
2925
0
        goto done;
2926
0
    }
2927
33.6k
    if (lpt->ns[last]->used > 1) {
2928
0
        result = -1;
2929
0
        goto done;
2930
0
    }
2931
33.6k
    result = 0;
2932
33.6k
    *tmatch = lpt->ns[last]->nodes[0];
2933
33.6k
    *smatch = lpt->lp->steps;
2934
144k
    for (int i=0; i < last; i++)
2935
111k
        *smatch = (*smatch)->next;
2936
33.6k
 done:
2937
183k
    for (int i=0; i < lpt->maxns; i++)
2938
149k
        free_nodeset(lpt->ns[i]);
2939
33.6k
    FREE(lpt->ns);
2940
33.6k
    return result;
2941
33.6k
}
2942
2943
static char *step_seq_choose_name(struct pathx *path, struct tree *tree);
2944
2945
/* Expand the tree ROOT so that it contains all components of PATH. PATH
2946
 * must have been initialized against ROOT by a call to PATH_FIND_ONE.
2947
 *
2948
 * Return the first segment that was created by this operation, or NULL on
2949
 * error.
2950
 */
2951
33.6k
int pathx_expand_tree(struct pathx *path, struct tree **tree) {
2952
33.6k
    int r;
2953
33.6k
    struct step *step = NULL;
2954
33.6k
    struct locpath_trace lpt;
2955
33.6k
    struct tree *first_child = NULL;
2956
33.6k
    struct value *v = NULL;
2957
2958
33.6k
    MEMZERO(&lpt, 1);
2959
33.6k
    path->state->locpath_trace = &lpt;
2960
33.6k
    v = pathx_eval(path);
2961
33.6k
    path->state->locpath_trace = NULL;
2962
33.6k
    if (HAS_ERROR(path->state))
2963
0
        goto error;
2964
2965
33.6k
    if (lpt.maxns == 0) {
2966
0
        if (v->tag != T_NODESET || v->nodeset->used == 0) {
2967
0
            STATE_ERROR(path->state, PATHX_ENOMATCH);
2968
0
            goto error;
2969
0
        }
2970
0
        if (v->nodeset->used > 1)
2971
0
            goto error;
2972
0
        *tree = v->nodeset->nodes[0];
2973
0
        return 0;
2974
0
    }
2975
2976
33.6k
    *tree = path->origin;
2977
33.6k
    r = locpath_search(&lpt, tree, &step);
2978
33.6k
    if (r == -1) {
2979
0
        STATE_ERROR(path->state, PATHX_EMMATCH);
2980
0
        goto error;
2981
0
    }
2982
2983
33.6k
    if (step == NULL)
2984
1.68k
        return 0;
2985
2986
31.9k
    struct tree *parent = *tree;
2987
31.9k
    if (parent == NULL)
2988
0
        parent = path->origin;
2989
2990
38.6k
    list_for_each(s, step) {
2991
38.6k
        if (s->axis != CHILD && s->axis != SEQ)
2992
0
            goto error;
2993
38.6k
        if (s->axis==SEQ && s->name == NULL) {
2994
0
            s->name = step_seq_choose_name(path, parent);
2995
0
        }
2996
38.6k
        if (s->name == NULL )
2997
0
            goto error;
2998
38.6k
        struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL);
2999
38.6k
        if (first_child == NULL)
3000
31.9k
            first_child = t;
3001
38.6k
        if (t == NULL || t->label == NULL)
3002
0
            goto error;
3003
38.6k
        list_append(parent->children, t);
3004
0
        parent = t;
3005
38.6k
    }
3006
3007
38.6k
    while (first_child->children != NULL)
3008
6.72k
        first_child = first_child->children;
3009
3010
31.9k
    *tree = first_child;
3011
31.9k
    return 1;
3012
3013
0
 error:
3014
0
    if (first_child != NULL) {
3015
0
        list_remove(first_child, first_child->parent->children);
3016
0
        free_tree(first_child);
3017
0
    }
3018
0
    *tree = NULL;
3019
0
    store_error(path);
3020
0
    return -1;
3021
31.9k
}
3022
3023
/* Generate a numeric string to use for step->name
3024
 * Scan tree->children for the highest numbered label, and add 1 to that
3025
 * numeric labels may be interspersed with #comment or other labels
3026
 */
3027
0
static char *step_seq_choose_name(struct pathx *path, struct tree *tree) {
3028
0
    unsigned long int max_node_n=0;
3029
0
    unsigned long int node_n;
3030
0
    char *step_name;
3031
0
    char *label_end;
3032
0
    for(tree=tree->children; tree!=NULL; tree=tree->next) {
3033
0
        if ( tree->label == NULL)
3034
0
          continue;
3035
0
        node_n=strtoul(tree->label, &label_end, 10);
3036
0
        if ( label_end == tree->label || *label_end != '\0' )
3037
          /* label is not a number - ignore it */
3038
0
          continue;
3039
0
        if ( node_n >= ULONG_MAX ) {
3040
0
          STATE_ERROR(path->state, PATHX_ENUMBER);
3041
0
          return NULL;
3042
0
        }
3043
0
        if( node_n > max_node_n )
3044
0
            max_node_n = node_n;
3045
0
    }
3046
0
    if (asprintf(&step_name,"%lu",max_node_n+1) >= 0)
3047
0
        return step_name;
3048
0
    else
3049
0
        return NULL;
3050
0
}
3051
3052
67.0k
int pathx_find_one(struct pathx *path, struct tree **tree) {
3053
67.0k
    *tree = pathx_first(path);
3054
67.0k
    if (HAS_ERROR(path->state))
3055
19
        return -1;
3056
67.0k
    return path->nodeset->used;
3057
67.0k
}
3058
3059
0
struct error *err_of_pathx(struct pathx *px) {
3060
0
    return px->state->error;
3061
0
}
3062
3063
1.58k
const char *pathx_error(struct pathx *path, const char **txt, int *pos) {
3064
1.58k
    int errcode = PATHX_ENOMEM;
3065
3066
1.58k
    if (path != NULL) {
3067
1.58k
        if (path->state->errcode < ARRAY_CARDINALITY(errcodes))
3068
1.58k
            errcode = path->state->errcode;
3069
0
        else
3070
0
            errcode = PATHX_EINTERNAL;
3071
3072
1.58k
        if (txt)
3073
0
            *txt = path->state->txt;
3074
3075
1.58k
        if (pos)
3076
1.58k
            *pos = path->state->pos - path->state->txt;
3077
1.58k
    }
3078
1.58k
    return errcodes[errcode];
3079
1.58k
}
3080
3081
/*
3082
 * Symbol tables
3083
 */
3084
static struct pathx_symtab
3085
*make_symtab(struct pathx_symtab *symtab, const char *name,
3086
             struct value *value)
3087
92
{
3088
92
    struct pathx_symtab *new;
3089
92
    char *n = NULL;
3090
3091
92
    n = strdup(name);
3092
92
    if (n == NULL)
3093
0
        return NULL;
3094
3095
92
    if (ALLOC(new) < 0) {
3096
0
        free(n);
3097
0
        return NULL;
3098
0
    }
3099
92
    new->name = n;
3100
92
    new->value = value;
3101
92
    if (symtab == NULL) {
3102
92
        return new;
3103
92
    } else {
3104
0
        new->next = symtab->next;
3105
0
        symtab->next = new;
3106
0
    }
3107
0
    return symtab;
3108
92
}
3109
3110
1.68k
void free_symtab(struct pathx_symtab *symtab) {
3111
3112
1.77k
    while (symtab != NULL) {
3113
92
        struct pathx_symtab *del = symtab;
3114
92
        symtab = del->next;
3115
92
        free(del->name);
3116
92
        release_value(del->value);
3117
92
        free(del->value);
3118
92
        free(del);
3119
92
    }
3120
1.68k
}
3121
3122
0
struct pathx_symtab *pathx_get_symtab(struct pathx *pathx) {
3123
0
    return pathx->state->symtab;
3124
0
}
3125
3126
static int pathx_symtab_set(struct pathx_symtab **symtab,
3127
92
                            const char *name, struct value *v) {
3128
92
    int found = 0;
3129
3130
92
    list_for_each(tab, *symtab) {
3131
0
        if (STREQ(tab->name, name)) {
3132
0
            release_value(tab->value);
3133
0
            free(tab->value);
3134
0
            tab->value = v;
3135
0
            found = 1;
3136
0
            break;
3137
0
        }
3138
0
    }
3139
3140
92
    if (!found) {
3141
92
        struct pathx_symtab *new = NULL;
3142
3143
92
        new = make_symtab(*symtab, name, v);
3144
92
        if (new == NULL)
3145
0
            goto error;
3146
92
        *symtab = new;
3147
92
    }
3148
92
    return 0;
3149
0
 error:
3150
0
    return -1;
3151
92
}
3152
3153
int pathx_symtab_define(struct pathx_symtab **symtab,
3154
94
                        const char *name, struct pathx *px) {
3155
94
    int r;
3156
94
    struct value *value = NULL, *v = NULL;
3157
94
    struct state *state = px->state;
3158
3159
94
    value = pathx_eval(px);
3160
94
    if (HAS_ERROR(px->state))
3161
2
        goto error;
3162
3163
92
    if (ALLOC(v) < 0) {
3164
0
        STATE_ENOMEM;
3165
0
        goto error;
3166
0
    }
3167
3168
92
    *v = *value;
3169
92
    value->tag = T_BOOLEAN;
3170
3171
92
    r = pathx_symtab_set(symtab, name, v);
3172
92
    if (r < 0) {
3173
0
        STATE_ENOMEM;
3174
0
        goto error;
3175
0
    }
3176
3177
92
    if (v->tag == T_NODESET)
3178
60
        return v->nodeset->used;
3179
32
    else
3180
32
        return 0;
3181
2
 error:
3182
2
    release_value(value);
3183
2
    free(value);
3184
2
    release_value(v);
3185
2
    free(v);
3186
2
    store_error(px);
3187
2
    return -1;
3188
92
}
3189
3190
0
int pathx_symtab_undefine(struct pathx_symtab **symtab, const char *name) {
3191
0
    struct pathx_symtab *del = NULL;
3192
3193
0
    for(del = *symtab;
3194
0
        del != NULL && !STREQ(del->name, name);
3195
0
        del = del->next);
3196
0
    if (del == NULL)
3197
0
        return 0;
3198
0
    list_remove(del, *symtab);
3199
0
    free_symtab(del);
3200
0
    return 0;
3201
0
}
3202
3203
int pathx_symtab_assign_tree(struct pathx_symtab **symtab,
3204
0
                             const char *name, struct tree *tree) {
3205
0
    struct value *v = NULL;
3206
0
    int r;
3207
3208
0
    if (ALLOC(v) < 0)
3209
0
        goto error;
3210
3211
0
    v->tag = T_NODESET;
3212
0
    if (ALLOC(v->nodeset) < 0)
3213
0
        goto error;
3214
0
    if (ALLOC_N(v->nodeset->nodes, 1) < 0)
3215
0
        goto error;
3216
0
    v->nodeset->used = 1;
3217
0
    v->nodeset->size = 1;
3218
0
    v->nodeset->nodes[0] = tree;
3219
3220
0
    r = pathx_symtab_set(symtab, name, v);
3221
0
    if (r < 0)
3222
0
        goto error;
3223
0
    return 1;
3224
0
 error:
3225
0
    release_value(v);
3226
0
    free(v);
3227
0
    return -1;
3228
0
}
3229
3230
int
3231
0
pathx_symtab_count(const struct pathx_symtab *symtab, const char *name) {
3232
0
    struct value *v = lookup_var(name, symtab);
3233
3234
0
    if (v == NULL || v->tag != T_NODESET)
3235
0
        return -1;
3236
3237
0
    return v->nodeset->used;
3238
0
}
3239
3240
struct tree *
3241
pathx_symtab_get_tree(struct pathx_symtab *symtab,
3242
0
                      const char *name, int i) {
3243
0
    struct value *v = lookup_var(name, symtab);
3244
0
    if (v == NULL)
3245
0
        return NULL;
3246
0
    if (v->tag != T_NODESET)
3247
0
        return NULL;
3248
0
    if (i >= v->nodeset->used)
3249
0
        return NULL;
3250
0
    return v->nodeset->nodes[i];
3251
0
}
3252
3253
void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
3254
0
                                     const struct tree *tree) {
3255
0
    list_for_each(tab, symtab) {
3256
0
        if (tab->value->tag != T_NODESET)
3257
0
            continue;
3258
0
        struct nodeset *ns = tab->value->nodeset;
3259
0
        for (int i=0; i < ns->used;) {
3260
0
            struct tree *t = ns->nodes[i];
3261
0
            while (t != t->parent && t != tree)
3262
0
                t = t->parent;
3263
0
            if (t == tree)
3264
0
                ns_remove(ns, i, 1);
3265
0
            else
3266
0
                i += 1;
3267
0
        }
3268
0
    }
3269
0
}
3270
3271
/*
3272
 * Local variables:
3273
 *  indent-tabs-mode: nil
3274
 *  c-indent-level: 4
3275
 *  c-basic-offset: 4
3276
 *  tab-width: 4
3277
 * End:
3278
 */