Coverage Report

Created: 2026-03-19 06:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/augeas/src/pathx.c
Line
Count
Source
1
/*
2
 * pathx.c: handling path expressions
3
 *
4
 * Copyright (C) 2007-2016 David Lutterkort
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
19
 *
20
 * Author: David Lutterkort <dlutter@redhat.com>
21
 */
22
23
#include <config.h>
24
#include <internal.h>
25
#include <stdint.h>
26
#include <stdbool.h>
27
#include <memory.h>
28
#include <ctype.h>
29
30
#include "ref.h"
31
#include "regexp.h"
32
#include "errcode.h"
33
34
static const char *const errcodes[] = {
35
    "no error",
36
    "empty name",
37
    "illegal string literal",
38
    "illegal number",                             /* PATHX_ENUMBER */
39
    "string missing ending ' or \"",
40
    "expected '='",
41
    "allocation failed",
42
    "unmatched '['",
43
    "unmatched '('",
44
    "expected a '/'",
45
    "internal error",                             /* PATHX_EINTERNAL */
46
    "type error",                                 /* PATHX_ETYPE */
47
    "undefined variable",                         /* PATHX_ENOVAR */
48
    "garbage at end of path expression",          /* PATHX_EEND */
49
    "no match for path expression",               /* PATHX_ENOMATCH */
50
    "wrong number of arguments in function call", /* PATHX_EARITY */
51
    "invalid regular expression",                 /* PATHX_EREGEXP */
52
    "too many matches",                           /* PATHX_EMMATCH */
53
    "wrong flag for regexp"                       /* PATHX_EREGEXPFLAG */
54
};
55
56
/*
57
 * Path expressions are strings that use a notation modelled on XPath.
58
 */
59
60
enum type {
61
    T_NONE = 0,     /* Not a type */
62
    T_NODESET,
63
    T_BOOLEAN,
64
    T_NUMBER,
65
    T_STRING,
66
    T_REGEXP
67
};
68
69
enum expr_tag {
70
    E_FILTER,
71
    E_BINARY,
72
    E_VALUE,
73
    E_VAR,
74
    E_APP
75
};
76
77
enum binary_op {
78
    OP_EQ,         /* '='  */
79
    OP_NEQ,        /* '!=' */
80
    OP_LT,         /* '<'  */
81
    OP_LE,         /* '<=' */
82
    OP_GT,         /* '>'  */
83
    OP_GE,         /* '>=' */
84
    OP_PLUS,       /* '+'  */
85
    OP_MINUS,      /* '-'  */
86
    OP_STAR,       /* '*'  */
87
    OP_AND,        /* 'and' */
88
    OP_OR,         /* 'or' */
89
    OP_ELSE,       /* 'else' */
90
    OP_RE_MATCH,   /* '=~' */
91
    OP_RE_NOMATCH, /* '!~' */
92
    OP_UNION       /* '|' */
93
};
94
95
struct pred {
96
    int               nexpr;
97
    struct expr     **exprs;
98
};
99
100
enum axis {
101
    SELF,
102
    CHILD,
103
    SEQ,
104
    DESCENDANT,
105
    DESCENDANT_OR_SELF,
106
    PARENT,
107
    ANCESTOR,
108
    ROOT,
109
    PRECEDING_SIBLING,
110
    FOLLOWING_SIBLING
111
};
112
113
/* This array is indexed by enum axis */
114
static const char *const axis_names[] = {
115
    "self",
116
    "child",
117
    "seq",        /* Like child, but only selects node-names which are integers */
118
    "descendant",
119
    "descendant-or-self",
120
    "parent",
121
    "ancestor",
122
    "root",
123
    "preceding-sibling",
124
    "following-sibling"
125
};
126
127
/* The characters that can follow a name in a location expression (aka path)
128
 * The parser will assume that name (path component) is finished when it
129
 * encounters any of these characters, unless they are escaped by preceding
130
 * them with a '\\'.
131
 *
132
 * See parse_name for the gory details
133
 */
134
static const char name_follow[] = "][|/=()!,";
135
136
/* Doubly linked list of location steps. Besides the information from the
137
 * path expression, also contains information to iterate over a node set,
138
 * in particular, the context node CTX for the step, and the current node
139
 * CUR within that context.
140
 */
141
struct step {
142
    struct step *next;
143
    enum axis    axis;
144
    char        *name;              /* NULL to match any name */
145
    struct pred *predicates;
146
};
147
148
/* Initialise the root nodeset with the first step */
149
static struct tree *step_root(struct step *step, struct tree *ctx,
150
                              struct tree *root_ctx);
151
/* Iteration over the nodes on a step, ignoring the predicates */
152
static struct tree *step_first(struct step *step, struct tree *ctx);
153
static struct tree *step_next(struct step *step, struct tree *ctx,
154
                              struct tree *node);
155
156
struct pathx_symtab {
157
    struct pathx_symtab *next;
158
    char                *name;
159
    struct value        *value;
160
};
161
162
struct pathx {
163
    struct state   *state;
164
    struct nodeset *nodeset;
165
    int             node;
166
    struct tree    *origin;
167
};
168
169
3.81M
#define L_BRACK '['
170
619k
#define R_BRACK ']'
171
172
struct locpath {
173
    struct step *steps;
174
};
175
176
struct nodeset {
177
    struct tree **nodes;
178
    size_t        used;
179
    size_t        size;
180
};
181
182
typedef uint32_t value_ind_t;
183
184
struct value {
185
    enum type tag;
186
    union {
187
        struct nodeset  *nodeset;     /* T_NODESET */
188
        int64_t          number;      /* T_NUMBER  */
189
        char            *string;      /* T_STRING  */
190
        bool             boolval;     /* T_BOOLEAN */
191
        struct regexp   *regexp;      /* T_REGEXP  */
192
    };
193
};
194
195
struct expr {
196
    enum expr_tag tag;
197
    enum type     type;
198
    union {
199
        struct {                       /* E_FILTER */
200
            struct expr     *primary;
201
            struct pred     *predicates;
202
            struct locpath  *locpath;
203
        };
204
        struct {                       /* E_BINARY */
205
            enum binary_op op;
206
            struct expr *left;
207
            struct expr *right;
208
            bool   left_matched;
209
        };
210
        value_ind_t      value_ind;    /* E_VALUE */
211
        char            *ident;        /* E_VAR */
212
        struct {                       /* E_APP */
213
            const struct func *func;
214
            struct expr       **args;
215
            /* If fold is true, replace this function invocation
216
             * with its value after the first time we evaluate this
217
             * expression */
218
            bool              fold;
219
        };
220
    };
221
};
222
223
struct locpath_trace {
224
    unsigned int      maxns;
225
    struct nodeset  **ns;
226
    struct locpath   *lp;
227
};
228
229
/* Internal state of the evaluator/parser */
230
struct state {
231
    pathx_errcode_t errcode;
232
    const char     *file;
233
    int             line;
234
    char           *errmsg;
235
236
    const char     *txt;  /* Entire expression */
237
    const char     *pos;  /* Current position within TXT during parsing */
238
239
    struct tree *ctx; /* The current node */
240
    uint            ctx_pos;
241
    uint            ctx_len;
242
243
    struct tree *root_ctx; /* Root context for relative paths */
244
245
    /* A table of all values. The table is dynamically reallocated, i.e.
246
     * pointers to struct value should not be used across calls that
247
     * might allocate new values
248
     *
249
     * value_pool[0] is always the boolean false, and value_pool[1]
250
     * always the boolean true
251
     */
252
    struct value  *value_pool;
253
    value_ind_t    value_pool_used;
254
    value_ind_t    value_pool_size;
255
    /* Stack of values (as indices into value_pool), with bottom of
256
       stack in values[0] */
257
    value_ind_t   *values;
258
    size_t         values_used;
259
    size_t         values_size;
260
    /* Stack of expressions, with bottom of stack in exprs[0] */
261
    struct expr  **exprs;
262
    size_t         exprs_used;
263
    size_t         exprs_size;
264
    /* Trace of a locpath evaluation, needed by pathx_expand_tree.
265
       Generally NULL, unless a trace is needed.
266
     */
267
    struct locpath_trace *locpath_trace;
268
    /* Symbol table for variable lookups */
269
    struct pathx_symtab *symtab;
270
    /* Error structure, used to communicate errors to struct augeas;
271
     * we never own this structure, and therefore never free it */
272
    struct error        *error;
273
    /* If a filter-expression contains the 'else' operator, we need
274
     * we need to evaluate the filter twice. The has_else flag
275
     * means we don't do this unless we really need to */
276
    bool                 has_else;
277
};
278
279
/* We consider NULL and the empty string to be equal */
280
ATTRIBUTE_PURE
281
780k
static inline int streqx(const char *s1, const char *s2) {
282
780k
    if (s1 == NULL)
283
0
        return (s2 == NULL || strlen(s2) == 0);
284
780k
    if (s2 == NULL)
285
0
        return strlen(s1) == 0;
286
780k
    return STREQ(s1, s2);
287
780k
}
288
289
/* Functions */
290
291
typedef void (*func_impl_t)(struct state *state, int nargs);
292
293
struct func {
294
    const char      *name;
295
    unsigned int     arity;
296
    enum type        type;
297
    bool             pure;      /* Result only depends on args */
298
    const enum type *arg_types;
299
    func_impl_t      impl;
300
};
301
302
static void func_last(struct state *state, int nargs);
303
static void func_position(struct state *state, int nargs);
304
static void func_count(struct state *state, int nargs);
305
static void func_label(struct state *state, int nargs);
306
static void func_regexp(struct state *state, int nargs);
307
static void func_regexp_flag(struct state *state, int nargs);
308
static void func_glob(struct state *state, int nargs);
309
static void func_int(struct state *state, int nargs);
310
static void func_not(struct state *state, int nargs);
311
static void func_modified(struct state *state, int nargs);
312
313
static const enum type arg_types_nodeset[] = { T_NODESET };
314
static const enum type arg_types_string[] = { T_STRING };
315
static const enum type arg_types_bool[] = { T_BOOLEAN };
316
static const enum type arg_types_string_string[] = { T_STRING, T_STRING };
317
static const enum type arg_types_nodeset_string[] = { T_NODESET, T_STRING };
318
319
static const struct func builtin_funcs[] = {
320
    { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
321
      .impl = func_last, .pure = false },
322
    { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL,
323
      .impl = func_position, .pure = false },
324
    { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL,
325
      .impl = func_label, .pure = false },
326
    { .name = "count", .arity = 1, .type = T_NUMBER,
327
      .arg_types = arg_types_nodeset,
328
      .impl = func_count, .pure = false },
329
    { .name = "regexp", .arity = 1, .type = T_REGEXP,
330
      .arg_types = arg_types_string,
331
      .impl = func_regexp, .pure = true },
332
    { .name = "regexp", .arity = 1, .type = T_REGEXP,
333
      .arg_types = arg_types_nodeset,
334
      .impl = func_regexp, .pure = true },
335
    { .name = "regexp", .arity = 2, .type = T_REGEXP,
336
      .arg_types = arg_types_string_string,
337
      .impl = func_regexp_flag, .pure = true },
338
    { .name = "regexp", .arity = 2, .type = T_REGEXP,
339
      .arg_types = arg_types_nodeset_string,
340
      .impl = func_regexp_flag, .pure = true },
341
    { .name = "glob", .arity = 1, .type = T_REGEXP,
342
      .arg_types = arg_types_string,
343
      .impl = func_glob, .pure = true },
344
    { .name = "glob", .arity = 1, .type = T_REGEXP,
345
      .arg_types = arg_types_nodeset,
346
      .impl = func_glob, .pure = true },
347
    { .name = "int", .arity = 1, .type = T_NUMBER,
348
      .arg_types = arg_types_string, .impl = func_int, .pure = false },
349
    { .name = "int", .arity = 1, .type = T_NUMBER,
350
      .arg_types = arg_types_nodeset, .impl = func_int, .pure = false },
351
    { .name = "int", .arity = 1, .type = T_NUMBER,
352
      .arg_types = arg_types_bool, .impl = func_int, .pure = false },
353
    { .name = "modified", .arity = 0, .type = T_BOOLEAN,
354
      .arg_types = NULL, .impl = func_modified, .pure = false },
355
    { .name = "not", .arity = 1, .type = T_BOOLEAN,
356
      .arg_types = arg_types_bool, .impl = func_not, .pure = true }
357
};
358
359
#define RET_ON_ERROR                                                    \
360
34.0M
    if (state->errcode != PATHX_NOERROR) return
361
362
#define RET0_ON_ERROR                                                   \
363
1.87M
    if (state->errcode != PATHX_NOERROR) return 0
364
365
#define STATE_ERROR(state, err)                                         \
366
405
    do {                                                                \
367
405
        state->errcode = err;                                           \
368
405
        state->file = __FILE__;                                         \
369
405
        state->line = __LINE__;                                         \
370
405
    } while (0)
371
372
19.9M
#define HAS_ERROR(state) (state->errcode != PATHX_NOERROR)
373
374
0
#define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM)
375
376
/*
377
 * Free the various data structures
378
 */
379
380
static void free_expr(struct expr *expr);
381
382
4.51M
static void free_pred(struct pred *pred) {
383
4.51M
    if (pred == NULL)
384
4.38M
        return;
385
386
744k
    for (int i=0; i < pred->nexpr; i++) {
387
617k
        free_expr(pred->exprs[i]);
388
617k
    }
389
127k
    free(pred->exprs);
390
127k
    free(pred);
391
127k
}
392
393
230k
static void free_step(struct step *step) {
394
346k
    while (step != NULL) {
395
115k
        struct step *del = step;
396
115k
        step = del->next;
397
115k
        free(del->name);
398
115k
        free_pred(del->predicates);
399
115k
        free(del);
400
115k
    }
401
230k
}
402
403
2.26M
static void free_locpath(struct locpath *locpath) {
404
2.26M
    if (locpath == NULL)
405
230k
        return;
406
4.28M
    while (locpath->steps != NULL) {
407
2.24M
        struct step *step = locpath->steps;
408
2.24M
        locpath->steps = step->next;
409
2.24M
        free(step->name);
410
2.24M
        free_pred(step->predicates);
411
2.24M
        free(step);
412
2.24M
    }
413
2.03M
    free(locpath);
414
2.03M
}
415
416
6.10M
static void free_expr(struct expr *expr) {
417
6.10M
    if (expr == NULL)
418
2.11M
        return;
419
3.99M
    switch (expr->tag) {
420
2.15M
    case E_FILTER:
421
2.15M
        free_expr(expr->primary);
422
2.15M
        free_pred(expr->predicates);
423
2.15M
        free_locpath(expr->locpath);
424
2.15M
        break;
425
899k
    case E_BINARY:
426
899k
        free_expr(expr->left);
427
899k
        free_expr(expr->right);
428
899k
        break;
429
901k
    case E_VALUE:
430
901k
        break;
431
0
    case E_VAR:
432
0
        free(expr->ident);
433
0
        break;
434
42.4k
    case E_APP:
435
85.4k
        for (int i=0; i < expr->func->arity; i++)
436
43.0k
            free_expr(expr->args[i]);
437
42.4k
        free(expr->args);
438
42.4k
        break;
439
0
    default:
440
0
        assert(0);
441
3.99M
    }
442
3.99M
    free(expr);
443
3.99M
}
444
445
4.52M
static void free_nodeset(struct nodeset *ns) {
446
4.52M
    if (ns != NULL) {
447
4.52M
        free(ns->nodes);
448
4.52M
        free(ns);
449
4.52M
    }
450
4.52M
}
451
452
/* Free all objects used by VALUE, but not VALUE itself */
453
2.46M
static void release_value(struct value *v) {
454
2.46M
    if (v == NULL)
455
6
        return;
456
457
2.46M
    switch (v->tag) {
458
1.48M
    case T_NODESET:
459
1.48M
        free_nodeset(v->nodeset);
460
1.48M
        break;
461
87.8k
    case T_STRING:
462
87.8k
        free(v->string);
463
87.8k
        break;
464
15.7k
    case T_BOOLEAN:
465
873k
    case T_NUMBER:
466
873k
        break;
467
14.5k
    case T_REGEXP:
468
14.5k
        unref(v->regexp, regexp);
469
14.5k
        break;
470
14.5k
    default:
471
0
        assert(0);
472
2.46M
    }
473
2.46M
}
474
475
7.82k
static void free_state(struct state *state) {
476
7.82k
    if (state == NULL)
477
0
        return;
478
479
1.50M
    for(int i=0; i < state->exprs_used; i++)
480
1.49M
        free_expr(state->exprs[i]);
481
7.82k
    free(state->exprs);
482
483
2.47M
    for(int i=0; i < state->value_pool_used; i++)
484
2.46M
        release_value(state->value_pool + i);
485
7.82k
    free(state->value_pool);
486
7.82k
    free(state->values);
487
7.82k
    free(state);
488
7.82k
}
489
490
8.02k
void free_pathx(struct pathx *pathx) {
491
8.02k
    if (pathx == NULL)
492
197
        return;
493
7.82k
    free_state(pathx->state);
494
7.82k
    free(pathx);
495
7.82k
}
496
497
/*
498
 * Nodeset helpers
499
 */
500
4.39M
static struct nodeset *make_nodeset(struct state *state) {
501
4.39M
    struct nodeset *result;
502
4.39M
    if (ALLOC(result) < 0)
503
4.39M
        STATE_ENOMEM;
504
4.39M
    return result;
505
4.39M
}
506
507
/* Add NODE to NS if it is not in NS yet. This relies on the flag
508
 * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS
509
 * as soon as we are done adding nodes to it.
510
 */
511
static void ns_add(struct nodeset *ns, struct tree *node,
512
3.91M
                   struct state *state) {
513
3.91M
    if (node->added)
514
2.92k
        return;
515
3.90M
    if (ns->used >= ns->size) {
516
2.69M
        size_t size = 2 * ns->size;
517
2.69M
        if (size < 10) size = 10;
518
2.69M
        if (REALLOC_N(ns->nodes, size) < 0)
519
2.69M
            STATE_ENOMEM;
520
2.69M
        ns->size = size;
521
2.69M
    }
522
3.90M
    ns->nodes[ns->used] = node;
523
3.90M
    node->added = 1;
524
3.90M
    ns->used += 1;
525
3.90M
}
526
527
4.46M
static void ns_clear_added(struct nodeset *ns) {
528
10.3M
    for (int i=0; i < ns->used; i++)
529
5.86M
        ns->nodes[i]->added = 0;
530
4.46M
}
531
532
static struct nodeset *
533
clone_nodeset(struct nodeset *ns, struct state *state)
534
132k
{
535
132k
    struct nodeset *clone;
536
132k
    if (ALLOC(clone) < 0) {
537
0
        STATE_ENOMEM;
538
0
        return NULL;
539
0
    }
540
132k
    if (ALLOC_N(clone->nodes, ns->used) < 0) {
541
0
        free(clone);
542
0
        STATE_ENOMEM;
543
0
        return NULL;
544
0
    }
545
132k
    clone->used = ns->used;
546
132k
    clone->size = ns->used;
547
2.08M
    for (int i=0; i < ns->used; i++)
548
1.95M
        clone->nodes[i] = ns->nodes[i];
549
132k
    return clone;
550
132k
}
551
552
/*
553
 * Handling values
554
 */
555
2.44M
static value_ind_t make_value(enum type tag, struct state *state) {
556
2.44M
    assert(tag != T_BOOLEAN);
557
558
2.44M
    if (state->value_pool_used >= state->value_pool_size) {
559
2.67k
        value_ind_t new_size = 2*state->value_pool_size;
560
2.67k
        if (new_size <= state->value_pool_size) {
561
0
            STATE_ENOMEM;
562
0
            return 0;
563
0
        }
564
2.67k
        if (REALLOC_N(state->value_pool, new_size) < 0) {
565
0
            STATE_ENOMEM;
566
0
            return 0;
567
0
        }
568
2.67k
        state->value_pool_size = new_size;
569
2.67k
    }
570
2.44M
    state->value_pool[state->value_pool_used].tag = tag;
571
2.44M
    state->value_pool[state->value_pool_used].nodeset = NULL;
572
2.44M
    return state->value_pool_used++;
573
2.44M
}
574
575
0
static value_ind_t clone_value(struct value *v, struct state *state) {
576
0
    value_ind_t vind = make_value(v->tag, state);
577
0
    RET0_ON_ERROR;
578
0
    struct value *clone = state->value_pool + vind;
579
580
0
    switch (v->tag) {
581
0
    case T_NODESET:
582
0
        clone->nodeset = clone_nodeset(v->nodeset, state);
583
0
        break;
584
0
    case T_NUMBER:
585
0
        clone->number = v->number;
586
0
        break;
587
0
    case T_STRING:
588
0
        clone->string = strdup(v->string);
589
0
        if (clone->string == NULL) {
590
0
            STATE_ENOMEM;
591
0
        }
592
0
        break;
593
0
    case T_BOOLEAN:
594
0
        clone->boolval = v->boolval;
595
0
        break;
596
0
    case T_REGEXP:
597
0
        clone->regexp = ref(v->regexp);
598
0
        break;
599
0
    default:
600
0
        assert(0);
601
0
    }
602
0
    return vind;
603
0
}
604
605
3.71M
static value_ind_t pop_value_ind(struct state *state) {
606
3.71M
    if (state->values_used > 0) {
607
3.71M
        state->values_used -= 1;
608
3.71M
        return state->values[state->values_used];
609
3.71M
    } else {
610
0
        STATE_ERROR(state, PATHX_EINTERNAL);
611
0
        assert(0);
612
0
        return 0;
613
0
    }
614
3.71M
}
615
616
3.71M
static struct value *pop_value(struct state *state) {
617
3.71M
    value_ind_t vind = pop_value_ind(state);
618
3.71M
    if (HAS_ERROR(state))
619
0
        return NULL;
620
3.71M
    return state->value_pool + vind;
621
3.71M
}
622
623
3.72M
static void push_value(value_ind_t vind, struct state *state) {
624
3.72M
    if (state->values_used >= state->values_size) {
625
7.50k
        size_t new_size = 2*state->values_size;
626
7.50k
        if (new_size == 0) new_size = 8;
627
7.50k
        if (REALLOC_N(state->values, new_size) < 0) {
628
0
            STATE_ENOMEM;
629
0
            return;
630
0
        }
631
7.50k
        state->values_size = new_size;
632
7.50k
    }
633
3.72M
    state->values[state->values_used++] = vind;
634
3.72M
}
635
636
1.13M
static void push_boolean_value(int b, struct state *state) {
637
1.13M
    push_value(b != 0, state);
638
1.13M
}
639
640
ATTRIBUTE_PURE
641
1.07M
static struct value *expr_value(struct expr *expr, struct state *state) {
642
1.07M
    return state->value_pool + expr->value_ind;
643
1.07M
}
644
645
/*************************************************************************
646
 * Evaluation
647
 ************************************************************************/
648
static void eval_expr(struct expr *expr, struct state *state);
649
650
651
#define ensure_arity(min, max)                                          \
652
59.4k
    if (nargs < min || nargs > max) {                                   \
653
0
        STATE_ERROR(state, PATHX_EINTERNAL);                            \
654
0
        return;                                                         \
655
0
    }
656
657
0
static void func_last(struct state *state, int nargs) {
658
0
    ensure_arity(0, 0);
659
0
    value_ind_t vind = make_value(T_NUMBER, state);
660
0
    RET_ON_ERROR;
661
662
0
    state->value_pool[vind].number = state->ctx_len;
663
0
    push_value(vind, state);
664
0
}
665
666
0
static void func_position(struct state *state, int nargs) {
667
0
    ensure_arity(0, 0);
668
0
    value_ind_t vind = make_value(T_NUMBER, state);
669
0
    RET_ON_ERROR;
670
671
0
    state->value_pool[vind].number = state->ctx_pos;
672
0
    push_value(vind, state);
673
0
}
674
675
0
static void func_count(struct state *state, int nargs) {
676
0
    ensure_arity(1, 1);
677
0
    value_ind_t vind = make_value(T_NUMBER, state);
678
0
    RET_ON_ERROR;
679
680
0
    struct value *ns = pop_value(state);
681
0
    state->value_pool[vind].number = ns->nodeset->used;
682
0
    push_value(vind, state);
683
0
}
684
685
0
static void func_label(struct state *state, int nargs) {
686
0
    ensure_arity(0, 0);
687
0
    value_ind_t vind = make_value(T_STRING, state);
688
0
    char *s;
689
690
0
    RET_ON_ERROR;
691
692
0
    if (state->ctx->label)
693
0
        s = strdup(state->ctx->label);
694
0
    else
695
0
        s = strdup("");
696
0
    if (s == NULL) {
697
0
        STATE_ENOMEM;
698
0
        return;
699
0
    }
700
0
    state->value_pool[vind].string = s;
701
0
    push_value(vind, state);
702
0
}
703
704
44.9k
static void func_int(struct state *state, int nargs) {
705
44.9k
    ensure_arity(1, 1);
706
44.9k
    value_ind_t vind = make_value(T_NUMBER, state);
707
44.9k
    int64_t i = -1;
708
44.9k
    RET_ON_ERROR;
709
710
44.9k
    struct value *v = pop_value(state);
711
44.9k
    if (v->tag == T_BOOLEAN) {
712
44.9k
        i = v->boolval;
713
44.9k
    } else {
714
0
        const char *s = NULL;
715
0
        if (v->tag == T_STRING) {
716
0
            s = v->string;
717
0
        } else {
718
            /* T_NODESET */
719
0
            if (v->nodeset->used != 1) {
720
0
                STATE_ERROR(state, PATHX_EMMATCH);
721
0
                return;
722
0
            }
723
0
            s = v->nodeset->nodes[0]->value;
724
0
        }
725
0
        if (s != NULL) {
726
0
            int r;
727
0
            r = xstrtoint64(s, 10, &i);
728
0
            if (r < 0) {
729
0
                STATE_ERROR(state, PATHX_ENUMBER);
730
0
                return;
731
0
            }
732
0
        }
733
0
    }
734
44.9k
    state->value_pool[vind].number = i;
735
44.9k
    push_value(vind, state);
736
44.9k
}
737
738
0
static void func_modified(struct state *state, int nargs) {
739
0
    ensure_arity(0, 0);
740
741
0
    push_boolean_value(state->ctx->dirty , state);
742
0
}
743
744
0
static void func_not(struct state *state, int nargs) {
745
0
    ensure_arity(1, 1);
746
0
    RET_ON_ERROR;
747
748
0
    struct value *v = pop_value(state);
749
0
    if (v->tag == T_BOOLEAN) {
750
0
        push_boolean_value(! v->boolval, state);
751
0
    }
752
0
}
753
754
static struct regexp *
755
14.1k
nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) {
756
14.1k
    struct regexp *result = NULL;
757
14.1k
    struct regexp **rx = NULL;
758
14.1k
    int used = 0;
759
760
15.0k
    for (int i = 0; i < ns->used; i++) {
761
897
        if (ns->nodes[i]->value != NULL)
762
359
            used += 1;
763
897
    }
764
765
14.1k
    if (used == 0) {
766
        /* If the nodeset is empty, make sure we produce a regexp
767
         * that never matches anything */
768
14.0k
        result = make_regexp_unescape(info, "[^\001-\7ff]", nocase);
769
14.0k
    } else {
770
53
        if (ALLOC_N(rx, ns->used) < 0)
771
0
            goto error;
772
806
        for (int i=0; i < ns->used; i++) {
773
753
            if (ns->nodes[i]->value == NULL)
774
394
                continue;
775
776
359
            if (glob)
777
30
                rx[i] = make_regexp_from_glob(info, ns->nodes[i]->value);
778
329
            else
779
329
                rx[i] = make_regexp_unescape(info, ns->nodes[i]->value, 0);
780
359
            if (rx[i] == NULL)
781
0
                goto error;
782
359
        }
783
53
        result = regexp_union_n(info, ns->used, rx);
784
53
    }
785
786
14.1k
 error:
787
14.1k
    if (rx != NULL) {
788
806
        for (int i=0; i < ns->used; i++)
789
753
            unref(rx[i], regexp);
790
53
        free(rx);
791
53
    }
792
14.1k
    return result;
793
14.1k
}
794
795
14.5k
static void func_regexp_or_glob(struct state *state, int glob, int nocase) {
796
14.5k
    value_ind_t vind = make_value(T_REGEXP, state);
797
14.5k
    int r;
798
799
14.5k
    RET_ON_ERROR;
800
801
14.5k
    struct value *v = pop_value(state);
802
14.5k
    struct regexp *rx = NULL;
803
804
14.5k
    if (v->tag == T_STRING) {
805
411
        if (glob)
806
0
            rx = make_regexp_from_glob(state->error->info, v->string);
807
411
        else
808
411
            rx = make_regexp_unescape(state->error->info, v->string, nocase);
809
14.1k
    } else if (v->tag == T_NODESET) {
810
14.1k
        rx = nodeset_as_regexp(state->error->info, v->nodeset, glob, nocase);
811
14.1k
    } else {
812
0
        assert(0);
813
0
    }
814
815
14.5k
    if (rx == NULL) {
816
0
        STATE_ENOMEM;
817
0
        return;
818
0
    }
819
820
14.5k
    state->value_pool[vind].regexp = rx;
821
14.5k
    r = regexp_compile(rx);
822
14.5k
    if (r < 0) {
823
89
        const char *msg;
824
89
        regexp_check(rx, &msg);
825
89
        state->errmsg = strdup(msg);
826
89
        STATE_ERROR(state, PATHX_EREGEXP);
827
89
        return;
828
89
    }
829
14.4k
    push_value(vind, state);
830
14.4k
}
831
832
718
static void func_regexp(struct state *state, int nargs) {
833
718
    ensure_arity(1, 1);
834
718
    func_regexp_or_glob(state, 0, 0);
835
718
}
836
837
13.8k
static void func_regexp_flag(struct state *state, int nargs) {
838
13.8k
    ensure_arity(2, 2);
839
13.8k
    int nocase = 0;
840
13.8k
    struct value *f = pop_value(state);
841
842
13.8k
    if (STREQ("i", f->string))
843
13.8k
        nocase = 1;
844
0
    else
845
0
        STATE_ERROR(state, PATHX_EREGEXPFLAG);
846
847
13.8k
    func_regexp_or_glob(state, 0, nocase);
848
13.8k
}
849
850
6
static void func_glob(struct state *state, int nargs) {
851
6
    ensure_arity(1, 1);
852
6
    func_regexp_or_glob(state, 1, 0);
853
6
}
854
855
91.7k
static bool coerce_to_bool(struct value *v) {
856
91.7k
    switch (v->tag) {
857
45.8k
    case T_NODESET:
858
45.8k
        return v->nodeset->used > 0;
859
0
        break;
860
45.6k
    case T_BOOLEAN:
861
45.6k
        return v->boolval;
862
0
        break;
863
0
    case T_NUMBER:
864
0
        return v->number > 0;
865
0
        break;
866
212
    case T_STRING:
867
212
        return strlen(v->string) > 0;
868
0
        break;
869
0
    case T_REGEXP:
870
0
        return true;
871
0
    default:
872
0
        assert(0);
873
0
        return false;
874
91.7k
    }
875
91.7k
}
876
877
static int calc_eq_nodeset_nodeset(struct nodeset *ns1, struct nodeset *ns2,
878
45.2k
                                   int neq) {
879
45.2k
    for (int i1=0; i1 < ns1->used; i1++) {
880
0
        struct tree *t1 = ns1->nodes[i1];
881
0
        for (int i2=0; i2 < ns2->used; i2++) {
882
0
            struct tree *t2 = ns2->nodes[i2];
883
0
            if (neq) {
884
0
                if (!streqx(t1->value, t2->value))
885
0
                    return 1;
886
0
            } else {
887
0
                if (streqx(t1->value, t2->value))
888
0
                    return 1;
889
0
            }
890
0
        }
891
0
    }
892
45.2k
    return 0;
893
45.2k
}
894
895
static int calc_eq_nodeset_string(struct nodeset *ns, const char *s,
896
0
                                  int neq) {
897
0
    for (int i=0; i < ns->used; i++) {
898
0
        struct tree *t = ns->nodes[i];
899
0
        if (neq) {
900
0
            if (!streqx(t->value, s))
901
0
                return 1;
902
0
        } else {
903
0
            if (streqx(t->value, s))
904
0
                return 1;
905
0
        }
906
0
    }
907
0
    return 0;
908
0
}
909
910
45.2k
static void eval_eq(struct state *state, int neq) {
911
45.2k
    struct value *r = pop_value(state);
912
45.2k
    struct value *l = pop_value(state);
913
45.2k
    int res;
914
915
45.2k
    if (l->tag == T_NODESET && r->tag == T_NODESET) {
916
45.2k
        res = calc_eq_nodeset_nodeset(l->nodeset, r->nodeset, neq);
917
45.2k
    } else if (l->tag == T_NODESET) {
918
0
        res = calc_eq_nodeset_string(l->nodeset, r->string, neq);
919
0
    } else if (r->tag == T_NODESET) {
920
0
        res = calc_eq_nodeset_string(r->nodeset, l->string, neq);
921
0
    } else if (l->tag == T_NUMBER && r->tag == T_NUMBER) {
922
0
        if (neq)
923
0
            res = (l->number != r->number);
924
0
        else
925
0
            res = (l->number == r->number);
926
0
    } else {
927
0
        assert(l->tag == T_STRING);
928
0
        assert(r->tag == T_STRING);
929
0
        res = streqx(l->string, r->string);
930
0
        if (neq)
931
0
            res = !res;
932
0
    }
933
45.2k
    RET_ON_ERROR;
934
935
45.2k
    push_boolean_value(res, state);
936
45.2k
}
937
938
0
static void eval_arith(struct state *state, enum binary_op op) {
939
0
    value_ind_t vind = make_value(T_NUMBER, state);
940
0
    struct value *r = pop_value(state);
941
0
    struct value *l = pop_value(state);
942
0
    int res;
943
944
0
    assert(l->tag == T_NUMBER);
945
0
    assert(r->tag == T_NUMBER);
946
947
0
    RET_ON_ERROR;
948
949
0
    if (op == OP_PLUS)
950
0
        res = l->number + r->number;
951
0
    else if (op == OP_MINUS)
952
0
        res = l->number - r->number;
953
0
    else if (op == OP_STAR)
954
0
        res = l->number * r->number;
955
0
    else
956
0
        assert(0);
957
958
0
    state->value_pool[vind].number = res;
959
0
    push_value(vind, state);
960
0
}
961
962
0
static void eval_rel(struct state *state, bool greater, bool equal) {
963
0
    struct value *r, *l;
964
0
    int res;
965
966
    /* We always check l < r or l <= r */
967
0
    if (greater) {
968
0
        l = pop_value(state);
969
0
        r = pop_value(state);
970
0
    } else {
971
0
        r = pop_value(state);
972
0
        l = pop_value(state);
973
0
    }
974
0
    if (l->tag == T_NUMBER) {
975
0
        if (equal)
976
0
            res = (l->number <= r->number);
977
0
        else
978
0
            res = (l->number < r->number);
979
0
    } else if (l->tag == T_STRING) {
980
0
        int cmp = strcmp(l->string, r->string);
981
0
        if (equal)
982
0
            res = cmp <= 0;
983
0
        else
984
0
            res = cmp < 0;
985
0
    } else {
986
0
        assert(0);
987
0
    }
988
989
0
    push_boolean_value(res, state);
990
0
}
991
992
0
static void eval_and_or(struct state *state, enum binary_op op) {
993
0
    struct value *r = pop_value(state);
994
0
    struct value *l = pop_value(state);
995
0
    bool left = coerce_to_bool(l);
996
0
    bool right = coerce_to_bool(r);
997
998
999
0
    if (op == OP_AND)
1000
0
        push_boolean_value(left && right, state);
1001
0
    else
1002
0
        push_boolean_value(left || right, state);
1003
0
}
1004
1005
45.8k
static void eval_else(struct state *state, struct expr *expr, struct locpath_trace *lpt_right) {
1006
45.8k
    struct value *r = pop_value(state);
1007
45.8k
    struct value *l = pop_value(state);
1008
1009
45.8k
    if ( l->tag == T_NODESET && r->tag == T_NODESET ) {
1010
3
        int discard_maxns=0;
1011
3
        struct nodeset **discard_ns=NULL;
1012
3
        struct locpath_trace *lpt = state->locpath_trace;
1013
3
        value_ind_t vind = make_value(T_NODESET, state);
1014
3
        if (l->nodeset->used >0 || expr->left_matched) {
1015
0
            expr->left_matched = 1;
1016
0
            state->value_pool[vind].nodeset = clone_nodeset(l->nodeset, state);
1017
0
            if( lpt_right != NULL ) {
1018
0
                discard_maxns = lpt_right->maxns;
1019
0
                discard_ns    = lpt_right->ns;
1020
0
            }
1021
3
        } else {
1022
3
            state->value_pool[vind].nodeset = clone_nodeset(r->nodeset, state);
1023
3
            if( lpt != NULL && lpt_right != NULL ) {
1024
0
                discard_maxns = lpt->maxns;
1025
0
                discard_ns    = lpt->ns;
1026
0
                lpt->maxns = lpt_right->maxns;
1027
0
                lpt->ns    = lpt_right->ns;
1028
0
                lpt->lp    = lpt_right->lp;
1029
0
            }
1030
3
        }
1031
3
        push_value(vind, state);
1032
3
        if ( lpt != NULL && lpt_right != NULL ) {
1033
0
            for (int i=0; i < discard_maxns; i++)
1034
0
                free_nodeset(discard_ns[i]);
1035
0
            FREE(discard_ns);
1036
0
        }
1037
45.8k
    } else {
1038
45.8k
        bool left = coerce_to_bool(l);
1039
45.8k
        bool right = coerce_to_bool(r);
1040
1041
45.8k
        expr->left_matched = expr->left_matched || left;
1042
45.8k
        if (expr->left_matched) {
1043
            /* One or more LHS have matched, so we're not interested in the right expr */
1044
678
            push_boolean_value(left, state);
1045
45.1k
        } else {
1046
            /* no LHS has matched (yet), so keep the right expr */
1047
            /* If this is the 2nd pass, and expr->left_matched is true, no RHS nodes will be included */
1048
45.1k
            push_boolean_value(right, state);
1049
45.1k
        }
1050
45.8k
    }
1051
45.8k
}
1052
1053
static bool eval_re_match_str(struct state *state, struct regexp *rx,
1054
1.04M
                              const char *str) {
1055
1.04M
    int r;
1056
1057
1.04M
    if (str == NULL)
1058
592k
        str = "";
1059
1060
1.04M
    r = regexp_match(rx, str, strlen(str), 0, NULL);
1061
1.04M
    if (r == -2) {
1062
0
        STATE_ERROR(state, PATHX_EINTERNAL);
1063
1.04M
    } else if (r == -3) {
1064
        /* We should never get this far; func_regexp should catch
1065
         * invalid regexps */
1066
0
        assert(false);
1067
0
    }
1068
1.04M
    return r == strlen(str);
1069
1.04M
}
1070
1071
132k
static void eval_union(struct state *state, struct locpath_trace *lpt_right) {
1072
132k
    value_ind_t vind = make_value(T_NODESET, state);
1073
132k
    struct value *r = pop_value(state);
1074
132k
    struct value *l = pop_value(state);
1075
132k
    struct nodeset *res = NULL;
1076
132k
    struct locpath_trace *lpt = state->locpath_trace;
1077
1078
132k
    assert(l->tag == T_NODESET);
1079
132k
    assert(r->tag == T_NODESET);
1080
1081
132k
    RET_ON_ERROR;
1082
1083
132k
    res = clone_nodeset(l->nodeset, state);
1084
132k
    RET_ON_ERROR;
1085
161k
    for (int i=0; i < r->nodeset->used; i++) {
1086
29.3k
        ns_add(res, r->nodeset->nodes[i], state);
1087
29.3k
        if (HAS_ERROR(state))
1088
0
            goto error;
1089
29.3k
    }
1090
132k
    state->value_pool[vind].nodeset = res;
1091
132k
    push_value(vind, state);
1092
1093
132k
    if( lpt != NULL && lpt_right != NULL ) {
1094
0
        STATE_ERROR(state, PATHX_EMMATCH);
1095
0
        for (int i=0; i < lpt_right->maxns; i++)
1096
0
            free_nodeset(lpt_right->ns[i]);
1097
0
        FREE(lpt_right->ns);
1098
0
    }
1099
132k
 error:
1100
132k
    ns_clear_added(res);
1101
132k
}
1102
1103
0
static void eval_concat_string(struct state *state) {
1104
0
    value_ind_t vind = make_value(T_STRING, state);
1105
0
    struct value *r = pop_value(state);
1106
0
    struct value *l = pop_value(state);
1107
0
    char *res = NULL;
1108
1109
0
    RET_ON_ERROR;
1110
1111
0
    if (ALLOC_N(res, strlen(l->string) + strlen(r->string) + 1) < 0) {
1112
0
        STATE_ENOMEM;
1113
0
        return;
1114
0
    }
1115
0
    strcpy(res, l->string);
1116
0
    strcat(res, r->string);
1117
0
    state->value_pool[vind].string = res;
1118
0
    push_value(vind, state);
1119
0
}
1120
1121
0
static void eval_concat_regexp(struct state *state) {
1122
0
    value_ind_t vind = make_value(T_REGEXP, state);
1123
0
    struct value *r = pop_value(state);
1124
0
    struct value *l = pop_value(state);
1125
0
    struct regexp *rx = NULL;
1126
1127
0
    RET_ON_ERROR;
1128
1129
0
    rx = regexp_concat(state->error->info, l->regexp, r->regexp);
1130
0
    if (rx == NULL) {
1131
0
        STATE_ENOMEM;
1132
0
        return;
1133
0
    }
1134
1135
0
    state->value_pool[vind].regexp = rx;
1136
0
    push_value(vind, state);
1137
0
}
1138
1139
1.04M
static void eval_re_match(struct state *state, enum binary_op op) {
1140
1.04M
    struct value *rx  = pop_value(state);
1141
1.04M
    struct value *v = pop_value(state);
1142
1143
1.04M
    bool result = false;
1144
1145
1.04M
    if (v->tag == T_STRING) {
1146
11
        result = eval_re_match_str(state, rx->regexp, v->string);
1147
11
        RET_ON_ERROR;
1148
1.04M
    } else if (v->tag == T_NODESET) {
1149
2.08M
        for (int i=0; i < v->nodeset->used && result == false; i++) {
1150
1.04M
            struct tree *t = v->nodeset->nodes[i];
1151
1.04M
            result = eval_re_match_str(state, rx->regexp, t->value);
1152
1.04M
            RET_ON_ERROR;
1153
1.04M
        }
1154
1.04M
    }
1155
1.04M
    if (op == OP_RE_NOMATCH)
1156
1.04M
        result = !result;
1157
1.04M
    push_boolean_value(result, state);
1158
1.04M
}
1159
1160
1.27M
static void eval_binary(struct expr *expr, struct state *state) {
1161
1.27M
    struct locpath_trace *lpt = state->locpath_trace;
1162
1.27M
    struct locpath_trace lpt_right;
1163
1164
1.27M
    eval_expr(expr->left, state);
1165
1.27M
    if ( lpt != NULL && expr->type == T_NODESET ) {
1166
0
       MEMZERO(&lpt_right, 1);
1167
0
       state->locpath_trace = &lpt_right;
1168
0
    }
1169
1.27M
    eval_expr(expr->right, state);
1170
1.27M
    state->locpath_trace = lpt;
1171
1.27M
    RET_ON_ERROR;
1172
1173
1.26M
    switch (expr->op) {
1174
45.2k
    case OP_EQ:
1175
45.2k
        eval_eq(state, 0);
1176
45.2k
        break;
1177
0
    case OP_NEQ:
1178
0
        eval_eq(state, 1);
1179
0
        break;
1180
0
    case OP_LT:
1181
0
        eval_rel(state, false, false);
1182
0
        break;
1183
0
    case OP_LE:
1184
0
        eval_rel(state, false, true);
1185
0
        break;
1186
0
    case OP_GT:
1187
0
        eval_rel(state, true, false);
1188
0
        break;
1189
0
    case OP_GE:
1190
0
        eval_rel(state, true, true);
1191
0
        break;
1192
0
    case OP_PLUS:
1193
0
        if (expr->type == T_NUMBER)
1194
0
            eval_arith(state, expr->op);
1195
0
        else if (expr->type == T_STRING)
1196
0
            eval_concat_string(state);
1197
0
        else if (expr->type == T_REGEXP)
1198
0
            eval_concat_regexp(state);
1199
0
        break;
1200
0
    case OP_MINUS:
1201
0
    case OP_STAR:
1202
0
        eval_arith(state, expr->op);
1203
0
        break;
1204
0
    case OP_AND:
1205
0
    case OP_OR:
1206
0
        eval_and_or(state, expr->op);
1207
0
        break;
1208
45.8k
    case OP_ELSE:
1209
45.8k
        eval_else(state, expr, &lpt_right);
1210
45.8k
        break;
1211
132k
    case OP_UNION:
1212
132k
        eval_union(state, &lpt_right);
1213
132k
        break;
1214
0
    case OP_RE_MATCH:
1215
1.04M
    case OP_RE_NOMATCH:
1216
1.04M
        eval_re_match(state, expr->op);
1217
1.04M
        break;
1218
0
    default:
1219
0
        assert(0);
1220
1.26M
    }
1221
1.26M
}
1222
1223
62.2k
static void eval_app(struct expr *expr, struct state *state) {
1224
62.2k
    assert(expr->tag == E_APP);
1225
1226
135k
    for (int i=0; i < expr->func->arity; i++) {
1227
76.1k
        eval_expr(expr->args[i], state);
1228
76.1k
        RET_ON_ERROR;
1229
76.1k
    }
1230
59.4k
    expr->func->impl(state, expr->func->arity);
1231
59.4k
}
1232
1233
1.10M
static bool eval_pred(struct expr *expr, struct state *state) {
1234
1.10M
    eval_expr(expr, state);
1235
1.10M
    RET0_ON_ERROR;
1236
1237
1.09M
    struct value *v = pop_value(state);
1238
1.09M
    switch(v->tag) {
1239
1.04M
    case T_BOOLEAN:
1240
1.04M
        return v->boolval;
1241
44.9k
    case T_NUMBER:
1242
44.9k
        return (state->ctx_pos == v->number);
1243
11.2k
    case T_NODESET:
1244
11.2k
        return v->nodeset->used > 0;
1245
0
    case T_STRING:
1246
0
        return streqv(state->ctx->value, v->string);
1247
0
    default:
1248
0
        assert(0);
1249
0
        return false;
1250
1.09M
    }
1251
1.09M
}
1252
1253
/* Remove COUNT successive entries from NS. The first entry to remove is at
1254
   IND */
1255
53.8k
static void ns_remove(struct nodeset *ns, int ind, int count) {
1256
53.8k
    if (count < 1)
1257
0
        return;
1258
53.8k
    memmove(ns->nodes + ind, ns->nodes + ind+count,
1259
53.8k
            sizeof(ns->nodes[0]) * (ns->used - (ind+count)));
1260
53.8k
    ns->used -= count;
1261
53.8k
}
1262
1263
/*
1264
 * Remove all nodes from NS for which one of PRED is false
1265
 */
1266
static void ns_filter(struct nodeset *ns, struct pred *predicates,
1267
2.97M
                      struct state *state) {
1268
2.97M
    if (predicates == NULL)
1269
2.86M
        return;
1270
1271
114k
    struct tree *old_ctx = state->ctx;
1272
114k
    uint old_ctx_len = state->ctx_len;
1273
114k
    uint old_ctx_pos = state->ctx_pos;
1274
1275
308k
    for (int p=0; p < predicates->nexpr; p++) {
1276
196k
        if ( state->has_else) {
1277
658k
            for (int i=0; i < ns->used; i++) {
1278
                /* 1st pass, check if any else statements have match on the left */
1279
                /* Don't delete any nodes (yet) */
1280
549k
                state->ctx = ns->nodes[i];
1281
549k
                eval_pred(predicates->exprs[p], state);
1282
549k
            }
1283
108k
        }
1284
196k
        int first_bad = -1;  /* The index of the first non-matching node */
1285
196k
        state->ctx_len = ns->used;
1286
196k
        state->ctx_pos = 1;
1287
751k
        for (int i=0; i < ns->used; state->ctx_pos++) {
1288
558k
            state->ctx = ns->nodes[i];
1289
558k
            bool match = eval_pred(predicates->exprs[p], state);
1290
558k
            RET_ON_ERROR;
1291
            /* We remove non-matching nodes from NS in batches; this logic
1292
             * makes sure that we only call ns_remove at the end of a run
1293
             * of non-matching nodes
1294
             */
1295
555k
            if (match) {
1296
56.1k
                if (first_bad >= 0) {
1297
19.8k
                    ns_remove(ns, first_bad, i - first_bad);
1298
19.8k
                    i = first_bad + 1;
1299
36.3k
                } else {
1300
36.3k
                    i += 1;
1301
36.3k
                }
1302
56.1k
                first_bad = -1;
1303
499k
            } else {
1304
499k
                if (first_bad == -1)
1305
53.8k
                    first_bad = i;
1306
499k
                i += 1;
1307
499k
            }
1308
555k
        }
1309
193k
        if (first_bad >= 0) {
1310
33.9k
            ns_remove(ns, first_bad, ns->used - first_bad);
1311
33.9k
        }
1312
193k
    }
1313
1314
111k
    state->ctx = old_ctx;
1315
111k
    state->ctx_pos = old_ctx_pos;
1316
111k
    state->ctx_len = old_ctx_len;
1317
111k
}
1318
1319
/* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */
1320
2.97M
static bool position_pred(struct pred *pred) {
1321
2.97M
    return pred != NULL &&
1322
114k
        pred->nexpr == 1 &&
1323
114k
        pred->exprs[0]->tag == E_VALUE &&
1324
424
        pred->exprs[0]->type == T_NUMBER;
1325
2.97M
}
1326
1327
/* Return the tree node at the position implied by STEP->PREDICATES. It is
1328
   assumed and required that STEP->PREDICATES is actually a
1329
   POSITION_PRED.
1330
1331
   This method hand-optimizes the important case of a path expression like
1332
   'service[42]'
1333
*/
1334
static struct tree *position_filter(struct nodeset *ns,
1335
                                    struct step *step,
1336
424
                                    struct state *state) {
1337
424
    int value_ind = step->predicates->exprs[0]->value_ind;
1338
424
    int number = state->value_pool[value_ind].number;
1339
1340
424
    int pos = 1;
1341
742
    for (int i=0; i < ns->used; i++) {
1342
318
        for (struct tree *node = step_first(step, ns->nodes[i]);
1343
954
             node != NULL;
1344
636
             node = step_next(step, ns->nodes[i], node), pos++) {
1345
636
            if (pos == number)
1346
0
                return node;
1347
636
        }
1348
318
    }
1349
1350
424
    return NULL;
1351
424
}
1352
1353
/* Return an array of nodesets, one for each step in the locpath.
1354
 *
1355
 * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will
1356
 * contain the nodes that matched the entire locpath
1357
 */
1358
static void ns_from_locpath(struct locpath *lp, uint *maxns,
1359
                            struct nodeset ***ns,
1360
                            const struct nodeset *root,
1361
1.35M
                            struct state *state) {
1362
1.35M
    struct tree *old_ctx = state->ctx;
1363
1.35M
    *maxns = 0;
1364
1365
1.35M
    ensure(lp != NULL, state);
1366
1367
1.35M
    *ns = NULL;
1368
1.35M
    list_for_each(step, lp->steps)
1369
3.03M
        *maxns += 1;
1370
1.35M
    if (ALLOC_N(*ns, *maxns+1) < 0) {
1371
0
        STATE_ERROR(state, PATHX_ENOMEM);
1372
0
        goto error;
1373
0
    }
1374
5.74M
    for (int i=0; i <= *maxns; i++) {
1375
4.39M
        (*ns)[i] = make_nodeset(state);
1376
4.39M
        if (HAS_ERROR(state))
1377
0
            goto error;
1378
4.39M
    }
1379
1380
1.35M
    if (root == NULL) {
1381
1.35M
        struct step *first_step = NULL;
1382
1.35M
        first_step = lp->steps;
1383
1384
1.35M
        struct tree *root_tree;
1385
1.35M
        root_tree = step_root(first_step, state->ctx, state->root_ctx);
1386
1.35M
        ns_add((*ns)[0], root_tree, state);
1387
1.35M
        ns_clear_added((*ns)[0]);
1388
1.35M
    } else {
1389
8
        for (int i=0; i < root->used; i++)
1390
0
            ns_add((*ns)[0], root->nodes[i], state);
1391
8
        ns_clear_added((*ns)[0]);
1392
8
    }
1393
1394
1.35M
    if (HAS_ERROR(state))
1395
0
        goto error;
1396
1397
1.35M
    uint cur_ns = 0;
1398
2.97M
    list_for_each(step, lp->steps) {
1399
2.97M
        struct nodeset *work = (*ns)[cur_ns];
1400
2.97M
        struct nodeset *next = (*ns)[cur_ns + 1];
1401
2.97M
        if (position_pred(step->predicates)) {
1402
424
            struct tree *node = position_filter(work, step, state);
1403
424
            if (node) {
1404
0
                ns_add(next, node, state);
1405
0
                ns_clear_added(next);
1406
0
            }
1407
2.97M
        } else {
1408
5.22M
            for (int i=0; i < work->used; i++) {
1409
2.25M
                for (struct tree *node = step_first(step, work->nodes[i]);
1410
4.77M
                     node != NULL;
1411
2.52M
                     node = step_next(step, work->nodes[i], node)) {
1412
2.52M
                    ns_add(next, node, state);
1413
2.52M
                }
1414
2.25M
            }
1415
2.97M
            ns_clear_added(next);
1416
2.97M
            ns_filter(next, step->predicates, state);
1417
2.97M
            if (HAS_ERROR(state))
1418
2.89k
                goto error;
1419
2.97M
        }
1420
2.97M
        cur_ns += 1;
1421
2.97M
    }
1422
1423
1.35M
    state->ctx = old_ctx;
1424
1.35M
    return;
1425
2.89k
 error:
1426
2.89k
    if (*ns != NULL) {
1427
68.9k
        for (int i=0; i <= *maxns; i++)
1428
66.0k
            free_nodeset((*ns)[i]);
1429
2.89k
        FREE(*ns);
1430
2.89k
    }
1431
2.89k
    state->ctx = old_ctx;
1432
2.89k
    return;
1433
1.35M
}
1434
1435
1.36M
static void eval_filter(struct expr *expr, struct state *state) {
1436
1.36M
    struct locpath *lp = expr->locpath;
1437
1.36M
    struct nodeset **ns = NULL;
1438
1.36M
    struct locpath_trace *lpt = state->locpath_trace;
1439
1.36M
    uint maxns;
1440
1441
1.36M
    state->locpath_trace = NULL;
1442
1.36M
    if (expr->primary == NULL) {
1443
1.35M
        ns_from_locpath(lp, &maxns, &ns, NULL, state);
1444
1.35M
    } else {
1445
5.54k
        eval_expr(expr->primary, state);
1446
5.54k
        RET_ON_ERROR;
1447
8
        value_ind_t primary_ind = pop_value_ind(state);
1448
8
        struct value *primary = state->value_pool + primary_ind;
1449
8
        assert(primary->tag == T_NODESET);
1450
8
        ns_filter(primary->nodeset, expr->predicates, state);
1451
        /* Evaluating predicates might have reallocated the value_pool */
1452
8
        primary = state->value_pool + primary_ind;
1453
8
        ns_from_locpath(lp, &maxns, &ns, primary->nodeset, state);
1454
8
    }
1455
1.35M
    RET_ON_ERROR;
1456
1457
1.35M
    value_ind_t vind = make_value(T_NODESET, state);
1458
1.35M
    RET_ON_ERROR;
1459
1.35M
    state->value_pool[vind].nodeset = ns[maxns];
1460
1.35M
    push_value(vind, state);
1461
1462
1.35M
    if (lpt != NULL) {
1463
2.12k
        assert(lpt->ns == NULL);
1464
2.12k
        assert(lpt->lp == NULL);
1465
2.12k
        lpt->maxns = maxns;
1466
2.12k
        lpt->ns = ns;
1467
2.12k
        lpt->lp = lp;
1468
2.12k
        state->locpath_trace = lpt;
1469
1.35M
    } else {
1470
4.31M
        for (int i=0; i < maxns; i++)
1471
2.96M
            free_nodeset(ns[i]);
1472
1.35M
        FREE(ns);
1473
1.35M
    }
1474
1.35M
}
1475
1476
static struct value *lookup_var(const char *ident,
1477
0
                                const struct pathx_symtab *symtab) {
1478
0
    list_for_each(tab, symtab) {
1479
0
        if (STREQ(ident, tab->name))
1480
0
            return tab->value;
1481
0
    }
1482
0
    return NULL;
1483
0
}
1484
1485
0
static void eval_var(struct expr *expr, struct state *state) {
1486
0
    struct value *v = lookup_var(expr->ident, state->symtab);
1487
0
    value_ind_t vind = clone_value(v, state);
1488
0
    RET_ON_ERROR;
1489
0
    push_value(vind, state);
1490
0
}
1491
1492
3.75M
static void eval_expr(struct expr *expr, struct state *state) {
1493
3.75M
    RET_ON_ERROR;
1494
3.74M
    switch (expr->tag) {
1495
1.36M
    case E_FILTER:
1496
1.36M
        eval_filter(expr, state);
1497
1.36M
        break;
1498
1.27M
    case E_BINARY:
1499
1.27M
        eval_binary(expr, state);
1500
1.27M
        break;
1501
1.04M
    case E_VALUE:
1502
1.04M
        push_value(expr->value_ind, state);
1503
1.04M
        break;
1504
0
    case E_VAR:
1505
0
        eval_var(expr, state);
1506
0
        break;
1507
62.2k
    case E_APP:
1508
62.2k
        eval_app(expr, state);
1509
62.2k
        if (expr->fold) {
1510
            /* Do constant folding: replace the function application with
1511
             * a reference to the value that resulted from evaluating it */
1512
822
            for (int i=0; i < expr->func->arity; i++)
1513
411
                free_expr(expr->args[i]);
1514
411
            free(expr->args);
1515
411
            value_ind_t vind = state->values_used - 1;
1516
411
            expr->tag = E_VALUE;
1517
411
            expr->value_ind = state->values[vind];
1518
411
        }
1519
62.2k
        break;
1520
0
    default:
1521
0
        assert(0);
1522
3.74M
    }
1523
3.74M
}
1524
1525
/*************************************************************************
1526
 * Typechecker
1527
 *************************************************************************/
1528
1529
static void check_expr(struct expr *expr, struct state *state);
1530
1531
/* Typecheck a list of predicates. A predicate is a function of
1532
 * one of the following types:
1533
 *
1534
 * T_NODESET -> T_BOOLEAN
1535
 * T_NUMBER  -> T_BOOLEAN  (position test)
1536
 * T_BOOLEAN -> T_BOOLEAN
1537
 */
1538
425k
static void check_preds(struct pred *pred, struct state *state) {
1539
425k
    if (pred == NULL)
1540
333k
        return;
1541
266k
    for (int i=0; i < pred->nexpr; i++) {
1542
173k
        struct expr *e = pred->exprs[i];
1543
173k
        check_expr(e, state);
1544
173k
        RET_ON_ERROR;
1545
173k
        if (e->type != T_NODESET && e->type != T_NUMBER &&
1546
91.1k
            e->type != T_BOOLEAN && e->type != T_STRING) {
1547
0
            STATE_ERROR(state, PATHX_ETYPE);
1548
0
            return;
1549
0
        }
1550
173k
    }
1551
92.1k
}
1552
1553
222k
static void check_filter(struct expr *expr, struct state *state) {
1554
222k
    assert(expr->tag == E_FILTER);
1555
222k
    struct locpath *locpath = expr->locpath;
1556
1557
222k
    if (expr->primary != NULL) {
1558
6.45k
        check_expr(expr->primary, state);
1559
6.45k
        if (expr->primary->type != T_NODESET) {
1560
0
            STATE_ERROR(state, PATHX_ETYPE);
1561
0
            return;
1562
0
        }
1563
6.45k
        check_preds(expr->predicates, state);
1564
6.45k
        RET_ON_ERROR;
1565
6.45k
    }
1566
419k
    list_for_each(s, locpath->steps) {
1567
419k
        check_preds(s->predicates, state);
1568
419k
        RET_ON_ERROR;
1569
419k
    }
1570
222k
    expr->type = T_NODESET;
1571
222k
}
1572
1573
42.8k
static void check_app(struct expr *expr, struct state *state) {
1574
42.8k
    assert(expr->tag == E_APP);
1575
1576
86.1k
    for (int i=0; i < expr->func->arity; i++) {
1577
43.3k
        check_expr(expr->args[i], state);
1578
43.3k
        RET_ON_ERROR;
1579
43.3k
    }
1580
1581
42.8k
    int f;
1582
234k
    for (f=0; f < ARRAY_CARDINALITY(builtin_funcs); f++) {
1583
234k
        const struct func *fn = builtin_funcs + f;
1584
234k
        if (STRNEQ(expr->func->name, fn->name))
1585
186k
            continue;
1586
48.4k
        if (expr->func->arity != fn->arity)
1587
1.17k
            continue;
1588
1589
47.2k
        int match = 1;
1590
90.6k
        for (int i=0; i < expr->func->arity; i++) {
1591
47.8k
            if (expr->args[i]->type != fn->arg_types[i]) {
1592
4.47k
                match = 0;
1593
4.47k
                break;
1594
4.47k
            }
1595
47.8k
        }
1596
47.2k
        if (match)
1597
42.8k
            break;
1598
47.2k
    }
1599
1600
42.8k
    if (f < ARRAY_CARDINALITY(builtin_funcs)) {
1601
42.8k
        expr->func = builtin_funcs + f;
1602
42.8k
        expr->type = expr->func->type;
1603
42.8k
        expr->fold = expr->func->pure;
1604
42.8k
        if (expr->fold) {
1605
            /* We only do constant folding for invocations of pure functions
1606
             * whose arguments are literal values. That misses opportunities
1607
             * for constant folding, e.g., "regexp('foo' + 'bar')" but is
1608
             * a bit simpler than doing full tracking of constants
1609
             */
1610
85.6k
            for (int i=0; i < expr->func->arity; i++) {
1611
43.1k
                if (expr->args[i]->tag != E_VALUE)
1612
3.89k
                    expr->fold = false;
1613
43.1k
            }
1614
42.5k
        }
1615
42.8k
    } else {
1616
0
        STATE_ERROR(state, PATHX_ETYPE);
1617
0
    }
1618
42.8k
}
1619
1620
/* Check the binary operators. Type rules:
1621
 *
1622
 * '=', '!='  : T_NODESET -> T_NODESET -> T_BOOLEAN
1623
 *              T_STRING  -> T_NODESET -> T_BOOLEAN
1624
 *              T_NODESET -> T_STRING  -> T_BOOLEAN
1625
 *              T_NUMBER  -> T_NUMBER  -> T_BOOLEAN
1626
 *
1627
 * '>', '>=',
1628
 * '<', '<='  : T_NUMBER -> T_NUMBER -> T_BOOLEAN
1629
 *              T_STRING -> T_STRING -> T_BOOLEAN
1630
 * '+'        : T_NUMBER -> T_NUMBER -> T_NUMBER
1631
 *              T_STRING -> T_STRING -> T_STRING
1632
 *              T_REGEXP -> T_REGEXP -> T_REGEXP
1633
 * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER
1634
 *
1635
 * 'and', 'or': T_BOOLEAN -> T_BOOLEAN -> T_BOOLEAN
1636
 * '=~', '!~' : T_STRING  -> T_REGEXP  -> T_BOOLEAN
1637
 *              T_NODESET -> T_REGEXP  -> T_BOOLEAN
1638
 *
1639
 * '|'        : T_NODESET -> T_NODESET -> T_NODESET
1640
 *
1641
 * Any type can be coerced to T_BOOLEAN (see coerce_to_bool)
1642
 */
1643
360k
static void check_binary(struct expr *expr, struct state *state) {
1644
360k
    check_expr(expr->left, state);
1645
360k
    check_expr(expr->right, state);
1646
360k
    RET_ON_ERROR;
1647
1648
122k
    enum type l = expr->left->type;
1649
122k
    enum type r = expr->right->type;
1650
122k
    int  ok = 1;
1651
122k
    enum type res;
1652
1653
122k
    switch(expr->op) {
1654
309
    case OP_EQ:
1655
309
    case OP_NEQ:
1656
309
        ok = ((l == T_NODESET || l == T_STRING)
1657
309
              && (r == T_NODESET || r == T_STRING))
1658
0
            || (l == T_NUMBER && r == T_NUMBER);;
1659
309
        res = T_BOOLEAN;
1660
309
        break;
1661
0
    case OP_LT:
1662
0
    case OP_LE:
1663
0
    case OP_GT:
1664
0
    case OP_GE:
1665
0
        ok = (l == T_NUMBER && r == T_NUMBER)
1666
0
            || (l == T_STRING && r == T_STRING);
1667
0
        res = T_BOOLEAN;
1668
0
        break;
1669
48
    case OP_PLUS:
1670
48
        ok = (l == r && (l == T_NUMBER || l == T_STRING || l == T_REGEXP));
1671
48
        res = l;
1672
48
        break;
1673
0
    case OP_MINUS:
1674
6
    case OP_STAR:
1675
6
        ok =  (l == T_NUMBER && r == T_NUMBER);
1676
6
        res = T_NUMBER;
1677
6
        break;
1678
23.9k
    case OP_UNION:
1679
23.9k
        ok = (l == T_NODESET && r == T_NODESET);
1680
23.9k
        res = T_NODESET;
1681
23.9k
        break;
1682
2
    case OP_AND:
1683
2
    case OP_OR:
1684
2
        ok = 1;
1685
2
        res = T_BOOLEAN;
1686
2
        break;
1687
55.3k
    case OP_ELSE:
1688
55.3k
        if (l == T_NODESET && r == T_NODESET) {
1689
6.45k
            res = T_NODESET;
1690
48.8k
        } else {
1691
48.8k
            res = T_BOOLEAN;
1692
48.8k
        }
1693
55.3k
        ok = 1;
1694
55.3k
        break;
1695
0
    case OP_RE_MATCH:
1696
42.5k
    case OP_RE_NOMATCH:
1697
42.5k
        ok = ((l == T_STRING || l == T_NODESET) && r == T_REGEXP);
1698
42.5k
        res = T_BOOLEAN;
1699
42.5k
        break;
1700
0
    default:
1701
0
        assert(0);
1702
122k
    }
1703
122k
    if (! ok) {
1704
7
        STATE_ERROR(state, PATHX_ETYPE);
1705
122k
    } else {
1706
122k
        expr->type = res;
1707
122k
    }
1708
122k
}
1709
1710
0
static void check_var(struct expr *expr, struct state *state) {
1711
0
    struct value *v = lookup_var(expr->ident, state->symtab);
1712
0
    if (v == NULL) {
1713
0
        STATE_ERROR(state, PATHX_ENOVAR);
1714
0
        return;
1715
0
    }
1716
0
    expr->type = v->tag;
1717
0
}
1718
1719
/* Typecheck an expression */
1720
951k
static void check_expr(struct expr *expr, struct state *state) {
1721
951k
    RET_ON_ERROR;
1722
713k
    switch(expr->tag) {
1723
222k
    case E_FILTER:
1724
222k
        check_filter(expr, state);
1725
222k
        break;
1726
360k
    case E_BINARY:
1727
360k
        check_binary(expr, state);
1728
360k
        break;
1729
88.3k
    case E_VALUE:
1730
88.3k
        expr->type = expr_value(expr, state)->tag;
1731
88.3k
        break;
1732
0
    case E_VAR:
1733
0
        check_var(expr, state);
1734
0
        break;
1735
42.8k
    case E_APP:
1736
42.8k
        check_app(expr, state);
1737
42.8k
        break;
1738
0
    default:
1739
0
        assert(0);
1740
713k
    }
1741
713k
}
1742
1743
/*
1744
 * Utility functions for the parser
1745
 */
1746
1747
22.1M
static void skipws(struct state *state) {
1748
22.1M
    while (isspace(*state->pos)) state->pos += 1;
1749
22.1M
}
1750
1751
19.0M
static int match(struct state *state, char m) {
1752
19.0M
    skipws(state);
1753
1754
19.0M
    if (*state->pos == '\0')
1755
30.2k
        return 0;
1756
19.0M
    if (*state->pos == m) {
1757
3.63M
        state->pos += 1;
1758
3.63M
        return 1;
1759
3.63M
    }
1760
15.3M
    return 0;
1761
19.0M
}
1762
1763
4.90M
static int peek(struct state *state, const char *chars) {
1764
4.90M
    return strchr(chars, *state->pos) != NULL;
1765
4.90M
}
1766
1767
/* Return 1 if STATE->POS starts with TOKEN, followed by optional
1768
 * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the
1769
 * first character after FOLLOW. Return 0 otherwise and leave STATE->POS
1770
 * unchanged.
1771
 */
1772
static int looking_at(struct state *state, const char *token,
1773
22.8M
                      const char *follow) {
1774
22.8M
    if (STREQLEN(state->pos, token, strlen(token))) {
1775
54.5k
        const char *p = state->pos + strlen(token);
1776
58.2k
        while (isspace(*p)) p++;
1777
54.5k
        if (STREQLEN(p, follow, strlen(follow))) {
1778
54.0k
            state->pos = p + strlen(follow);
1779
54.0k
            return 1;
1780
54.0k
        }
1781
54.5k
    }
1782
22.7M
    return 0;
1783
22.8M
}
1784
1785
/*************************************************************************
1786
 * The parser
1787
 *************************************************************************/
1788
1789
static void parse_expr(struct state *state);
1790
1791
2.50M
static struct expr* pop_expr(struct state *state) {
1792
2.50M
    if (state->exprs_used > 0) {
1793
2.50M
        state->exprs_used -= 1;
1794
2.50M
        return state->exprs[state->exprs_used];
1795
2.50M
    } else {
1796
0
        STATE_ERROR(state, PATHX_EINTERNAL);
1797
0
        assert(0);
1798
0
        return NULL;
1799
0
    }
1800
2.50M
}
1801
1802
3.99M
static void push_expr(struct expr *expr, struct state *state) {
1803
3.99M
    if (state->exprs_used >= state->exprs_size) {
1804
8.78k
        size_t new_size = 2*state->exprs_size;
1805
8.78k
        if (new_size == 0) new_size = 8;
1806
8.78k
        if (REALLOC_N(state->exprs, new_size) < 0) {
1807
0
            STATE_ENOMEM;
1808
0
            return;
1809
0
        }
1810
8.78k
        state->exprs_size = new_size;
1811
8.78k
    }
1812
3.99M
    state->exprs[state->exprs_used++] = expr;
1813
3.99M
}
1814
1815
899k
static void push_new_binary_op(enum binary_op op, struct state *state) {
1816
899k
    struct expr *expr = NULL;
1817
899k
    if (ALLOC(expr) < 0) {
1818
0
        STATE_ENOMEM;
1819
0
        return;
1820
0
    }
1821
1822
899k
    expr->tag = E_BINARY;
1823
899k
    expr->op  = op;
1824
899k
    expr->right = pop_expr(state);
1825
899k
    expr->left = pop_expr(state);
1826
899k
    expr->left_matched = false;  /* for 'else' operator only, true if any matches on LHS */
1827
899k
    push_expr(expr, state);
1828
899k
}
1829
1830
32.5k
int pathx_escape_name(const char *in, char **out) {
1831
32.5k
    const char *p;
1832
32.5k
    int num_to_escape = 0;
1833
32.5k
    char *s;
1834
1835
32.5k
    *out = NULL;
1836
1837
41.9M
    for (p = in; *p; p++) {
1838
41.9M
        if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1839
40.3M
            num_to_escape += 1;
1840
41.9M
    }
1841
1842
32.5k
    if (num_to_escape == 0)
1843
31.7k
        return 0;
1844
1845
767
    if (ALLOC_N(*out, strlen(in) + num_to_escape + 1) < 0)
1846
0
        return -1;
1847
1848
41.7M
    for (p = in, s = *out; *p; p++) {
1849
41.7M
        if (strchr(name_follow, *p) || isspace(*p) || *p == '\\')
1850
40.3M
            *s++ = '\\';
1851
41.7M
        *s++ = *p;
1852
41.7M
    }
1853
767
    *s = '\0';
1854
767
    return 0;
1855
767
}
1856
1857
/* Return true if POS is preceded by an odd number of backslashes, i.e., if
1858
 * POS is escaped. Stop the search when we get to START */
1859
1.11M
static bool backslash_escaped(const char *pos, const char *start) {
1860
1.11M
    bool result=false;
1861
1.11M
    while (pos-- > start && *pos == '\\') {
1862
0
        result = !result;
1863
0
    }
1864
1.11M
    return result;
1865
1.11M
}
1866
1867
/*
1868
 * NameNoWS ::= [^][|/\= \t\n] | \\.
1869
 * NameWS   ::= [^][|/\=] | \\.
1870
 * Name ::= NameNoWS NameWS* NameNoWS | NameNoWS
1871
 */
1872
2.00M
static char *parse_name(struct state *state) {
1873
2.00M
    const char *s = state->pos;
1874
2.00M
    char *result;
1875
1876
    /* Advance state->pos until it points to the first character that is
1877
     * not part of a name. */
1878
66.2M
    while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
1879
        /* Since we allow spaces in names, we need to avoid gobbling up
1880
         * stuff that is in follow(Name), e.g. 'or' so that things like
1881
         * [name1 or name2] still work. In other words, we'll parse 'x frob
1882
         * y' as one name, but for 'x or y', we consider 'x' a name in its
1883
         * own right. */
1884
64.2M
        if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
1885
64.2M
            STREQLEN(state->pos, " and ", strlen(" and ")) ||
1886
64.2M
            STREQLEN(state->pos, " else ", strlen(" else ")))
1887
7.45k
            break;
1888
1889
64.2M
        if (*state->pos == '\\') {
1890
68.8k
            state->pos += 1;
1891
68.8k
            if (*state->pos == '\0') {
1892
0
                STATE_ERROR(state, PATHX_ENAME);
1893
0
                return NULL;
1894
0
            }
1895
68.8k
        }
1896
64.2M
        state->pos += 1;
1897
64.2M
    }
1898
1899
    /* Strip trailing white space. Make sure we respect escaped whitespace
1900
     * and don't strip it as in "x\\ " */
1901
2.00M
    if (state->pos > s) {
1902
2.00M
        state->pos -= 1;
1903
3.11M
        while (isspace(*state->pos) && state->pos > s
1904
1.11M
               && !backslash_escaped(state->pos, s))
1905
1.11M
            state->pos -= 1;
1906
2.00M
        state->pos += 1;
1907
2.00M
    }
1908
1909
2.00M
    if (state->pos == s) {
1910
123
        STATE_ERROR(state, PATHX_ENAME);
1911
123
        return NULL;
1912
123
    }
1913
1914
2.00M
    result = strndup(s, state->pos - s);
1915
2.00M
    if (result == NULL) {
1916
0
        STATE_ENOMEM;
1917
0
        return NULL;
1918
0
    }
1919
1920
2.00M
    char *p = result;
1921
65.1M
    for (char *t = result; *t != '\0'; t++, p++) {
1922
63.1M
        if (*t == '\\')
1923
68.8k
            t += 1;
1924
63.1M
        *p = *t;
1925
63.1M
    }
1926
2.00M
    *p = '\0';
1927
1928
2.00M
    return result;
1929
2.00M
}
1930
1931
/*
1932
 * Predicate    ::= "[" Expr "]" *
1933
 */
1934
3.20M
static struct pred *parse_predicates(struct state *state) {
1935
3.20M
    struct pred *pred = NULL;
1936
3.20M
    int nexpr = 0;
1937
1938
3.81M
    while (match(state, L_BRACK)) {
1939
763k
        parse_expr(state);
1940
763k
        nexpr += 1;
1941
763k
        RET0_ON_ERROR;
1942
1943
619k
        if (! match(state, R_BRACK)) {
1944
35
            STATE_ERROR(state, PATHX_EPRED);
1945
35
            return NULL;
1946
35
        }
1947
619k
        skipws(state);
1948
619k
    }
1949
1950
3.05M
    if (nexpr == 0)
1951
2.92M
        return NULL;
1952
1953
127k
    if (ALLOC(pred) < 0) {
1954
0
        STATE_ENOMEM;
1955
0
        return NULL;
1956
0
    }
1957
127k
    pred->nexpr = nexpr;
1958
1959
127k
    if (ALLOC_N(pred->exprs, nexpr) < 0) {
1960
0
        free_pred(pred);
1961
0
        STATE_ENOMEM;
1962
0
        return NULL;
1963
0
    }
1964
1965
744k
    for (int i = nexpr - 1; i >= 0; i--)
1966
617k
        pred->exprs[i] = pop_expr(state);
1967
1968
127k
    return pred;
1969
127k
}
1970
1971
/*
1972
 * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..'
1973
 * AxisSpecifier ::= AxisName '::' | <epsilon>
1974
 * AxisName ::= 'ancestor'
1975
 *            | 'ancestor-or-self'
1976
 *            | 'child'
1977
 *            | 'descendant'
1978
 *            | 'descendant-or-self'
1979
 *            | 'parent'
1980
 *            | 'self'
1981
 *            | 'root'
1982
 */
1983
2.25M
static struct step *parse_step(struct state *state) {
1984
2.25M
    struct step *step;
1985
2.25M
    int explicit_axis = 0, allow_predicates = 1;
1986
1987
2.25M
    if (ALLOC(step) < 0) {
1988
0
        STATE_ENOMEM;
1989
0
        return NULL;
1990
0
    }
1991
1992
2.25M
    step->axis = CHILD;
1993
24.8M
    for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) {
1994
22.5M
        if (looking_at(state, axis_names[i], "::")) {
1995
4
            step->axis = i;
1996
4
            explicit_axis = 1;
1997
4
            break;
1998
4
        }
1999
22.5M
    }
2000
2001
2.25M
    if (! match(state, '*')) {
2002
2.00M
        step->name = parse_name(state);
2003
2.00M
        if (HAS_ERROR(state))
2004
123
            goto error;
2005
2.00M
        if (! explicit_axis) {
2006
2.00M
            if (STREQ(step->name, ".") || STREQ(step->name, "..")) {
2007
4.87k
                step->axis = STREQ(step->name, ".") ? SELF : PARENT;
2008
4.87k
                FREE(step->name);
2009
4.87k
                allow_predicates = 0;
2010
4.87k
            }
2011
2.00M
        }
2012
2.00M
    }
2013
2014
2.25M
    if (allow_predicates) {
2015
2.24M
        step->predicates = parse_predicates(state);
2016
2.24M
        if (HAS_ERROR(state))
2017
115k
            goto error;
2018
2.24M
    }
2019
2020
2.13M
    return step;
2021
2022
115k
 error:
2023
115k
    free_step(step);
2024
115k
    return NULL;
2025
2.25M
}
2026
2027
75.2k
static struct step *make_step(enum axis axis, struct state *state) {
2028
75.2k
    struct step *result = NULL;
2029
2030
75.2k
    if (ALLOC(result) < 0) {
2031
0
        STATE_ENOMEM;
2032
0
        return NULL;
2033
0
    }
2034
75.2k
    result->axis = axis;
2035
75.2k
    return result;
2036
75.2k
}
2037
2038
/*
2039
 * RelativeLocationPath ::= Step
2040
 *                        | RelativeLocationPath '/' Step
2041
 *                        | AbbreviatedRelativeLocationPath
2042
 * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
2043
 *
2044
 * The above is the same as
2045
 * RelativeLocationPath ::= Step ('/' Step | '//' Step)*
2046
 */
2047
static struct locpath *
2048
2.11M
parse_relative_location_path(struct state *state) {
2049
2.11M
    struct step *step = NULL;
2050
2.11M
    struct locpath *locpath = NULL;
2051
2052
2.11M
    step = parse_step(state);
2053
2.11M
    if (HAS_ERROR(state))
2054
115k
        goto error;
2055
2056
2.00M
    if (ALLOC(locpath) < 0) {
2057
0
        STATE_ENOMEM;
2058
0
        goto error;
2059
0
    }
2060
2.00M
    list_append(locpath->steps, step);
2061
2.00M
    step = NULL;
2062
2063
2.13M
    while (match(state, '/')) {
2064
137k
        if (*state->pos == '/') {
2065
40.9k
            state->pos += 1;
2066
40.9k
            step = make_step(DESCENDANT_OR_SELF, state);
2067
40.9k
            if (step == NULL) {
2068
0
                STATE_ENOMEM;
2069
0
                goto error;
2070
0
            }
2071
40.9k
            list_append(locpath->steps, step);
2072
40.9k
        }
2073
137k
        step = parse_step(state);
2074
137k
        if (HAS_ERROR(state))
2075
45
            goto error;
2076
137k
        list_append(locpath->steps, step);
2077
137k
        step = NULL;
2078
137k
    }
2079
2.00M
    return locpath;
2080
2081
115k
 error:
2082
115k
    free_step(step);
2083
115k
    free_locpath(locpath);
2084
115k
    return NULL;
2085
2.00M
}
2086
2087
/*
2088
 * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
2089
 * AbsoluteLocationPath ::= '/' RelativeLocationPath?
2090
 *                        | AbbreviatedAbsoluteLocationPath
2091
 * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
2092
 *
2093
 */
2094
2.11M
static void parse_location_path(struct state *state) {
2095
2.11M
    struct expr *expr = NULL;
2096
2.11M
    struct locpath *locpath = NULL;
2097
2098
2.11M
    if (match(state, '/')) {
2099
34.3k
        if (*state->pos == '/') {
2100
20.2k
            state->pos += 1;
2101
20.2k
            locpath = parse_relative_location_path(state);
2102
20.2k
            if (HAS_ERROR(state))
2103
26
                goto error;
2104
20.2k
            struct step *step = make_step(DESCENDANT_OR_SELF, state);
2105
20.2k
            if (HAS_ERROR(state))
2106
0
                goto error;
2107
20.2k
            list_cons(locpath->steps, step);
2108
20.2k
        } else {
2109
14.0k
            if (*state->pos != '\0') {
2110
14.0k
                locpath = parse_relative_location_path(state);
2111
14.0k
            } else {
2112
0
                if (ALLOC(locpath) < 0)
2113
0
                    goto err_nomem;
2114
0
            }
2115
14.0k
            struct step *step = make_step(ROOT, state);
2116
14.0k
            if (HAS_ERROR(state)) {
2117
4
                free_step(step);
2118
4
                goto error;
2119
4
            }
2120
14.0k
            list_cons(locpath->steps, step);
2121
14.0k
        }
2122
2.07M
    } else {
2123
2.07M
        locpath = parse_relative_location_path(state);
2124
2.07M
    }
2125
2126
2.11M
    if (ALLOC(expr) < 0)
2127
0
        goto err_nomem;
2128
2.11M
    expr->tag = E_FILTER;
2129
2.11M
    expr->locpath = locpath;
2130
2.11M
    push_expr(expr, state);
2131
2.11M
    return;
2132
2133
0
 err_nomem:
2134
0
    STATE_ENOMEM;
2135
30
 error:
2136
30
    free_expr(expr);
2137
30
    free_locpath(locpath);
2138
30
    return;
2139
0
}
2140
2141
/*
2142
 * Number       ::= /[0-9]+/
2143
 */
2144
813k
static void parse_number(struct state *state) {
2145
813k
    struct expr *expr = NULL;
2146
813k
    unsigned long val;
2147
813k
    char *end;
2148
2149
813k
    errno = 0;
2150
813k
    val = strtoul(state->pos, &end, 10);
2151
813k
    if (errno || end == state->pos || (int) val != val) {
2152
0
        STATE_ERROR(state, PATHX_ENUMBER);
2153
0
        return;
2154
0
    }
2155
2156
813k
    state->pos = end;
2157
2158
813k
    if (ALLOC(expr) < 0)
2159
0
        goto err_nomem;
2160
813k
    expr->tag = E_VALUE;
2161
813k
    expr->value_ind = make_value(T_NUMBER, state);
2162
813k
    if (HAS_ERROR(state))
2163
0
        goto error;
2164
813k
    expr_value(expr, state)->number = val;
2165
2166
813k
    push_expr(expr, state);
2167
813k
    return;
2168
2169
0
 err_nomem:
2170
0
    STATE_ENOMEM;
2171
0
 error:
2172
0
    free_expr(expr);
2173
0
    return;
2174
0
}
2175
2176
/*
2177
 * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'"
2178
 */
2179
87.8k
static void parse_literal(struct state *state) {
2180
87.8k
    char delim;
2181
87.8k
    const char *s;
2182
87.8k
    struct expr *expr = NULL;
2183
2184
87.8k
    if (*state->pos == '"')
2185
632
        delim = '"';
2186
87.2k
    else if (*state->pos == '\'')
2187
87.2k
        delim = '\'';
2188
0
    else {
2189
0
        STATE_ERROR(state, PATHX_ESTRING);
2190
0
        return;
2191
0
    }
2192
87.8k
    state->pos += 1;
2193
2194
87.8k
    s = state->pos;
2195
25.8M
    while (*state->pos != '\0' && *state->pos != delim) state->pos += 1;
2196
2197
87.8k
    if (*state->pos != delim) {
2198
1
        STATE_ERROR(state, PATHX_EDELIM);
2199
1
        return;
2200
1
    }
2201
87.8k
    state->pos += 1;
2202
2203
87.8k
    if (ALLOC(expr) < 0)
2204
0
        goto err_nomem;
2205
87.8k
    expr->tag = E_VALUE;
2206
87.8k
    expr->value_ind = make_value(T_STRING, state);
2207
87.8k
    if (HAS_ERROR(state))
2208
0
        goto error;
2209
87.8k
    expr_value(expr, state)->string = strndup(s, state->pos - s - 1);
2210
87.8k
    if (expr_value(expr, state)->string == NULL)
2211
0
        goto err_nomem;
2212
2213
87.8k
    push_expr(expr, state);
2214
87.8k
    return;
2215
2216
0
 err_nomem:
2217
0
    STATE_ENOMEM;
2218
0
 error:
2219
0
    free_expr(expr);
2220
0
    return;
2221
0
}
2222
2223
/*
2224
 * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')'
2225
 */
2226
54.0k
static void parse_function_call(struct state *state) {
2227
54.0k
    const struct func *func = NULL;
2228
54.0k
    struct expr *expr = NULL;
2229
54.0k
    int nargs = 0, find = 0;
2230
2231
284k
    for (; find < ARRAY_CARDINALITY(builtin_funcs); find++) {
2232
284k
        if (looking_at(state, builtin_funcs[find].name, "(")) {
2233
54.0k
            func = builtin_funcs + find;
2234
54.0k
            break;
2235
54.0k
        }
2236
284k
    }
2237
54.0k
    if (func == NULL) {
2238
2
        STATE_ERROR(state, PATHX_ENAME);
2239
2
        return;
2240
2
    }
2241
2242
54.0k
    if (! match(state, ')')) {
2243
1.26M
        do {
2244
1.26M
            nargs += 1;
2245
1.26M
            parse_expr(state);
2246
1.26M
            RET_ON_ERROR;
2247
1.26M
        } while (match(state, ','));
2248
2249
42.8k
        if (! match(state, ')')) {
2250
3
            STATE_ERROR(state, PATHX_EPAREN);
2251
3
            return;
2252
3
        }
2253
42.8k
    }
2254
2255
42.8k
    int found = 0; /* Whether there is a builtin matching in name and arity */
2256
44.0k
    for (int i=find; i < ARRAY_CARDINALITY(builtin_funcs); i++) {
2257
44.0k
        if (STRNEQ(func->name, builtin_funcs[i].name))
2258
3
            break;
2259
44.0k
        if (builtin_funcs[i].arity == nargs) {
2260
42.8k
            func = builtin_funcs + i;
2261
42.8k
            found = 1;
2262
42.8k
            break;
2263
42.8k
        }
2264
44.0k
    }
2265
2266
42.8k
    if (! found) {
2267
3
        STATE_ERROR(state, PATHX_EARITY);
2268
3
        return;
2269
3
    }
2270
2271
42.8k
    if (ALLOC(expr) < 0) {
2272
0
        STATE_ENOMEM;
2273
0
        return;
2274
0
    }
2275
42.8k
    expr->tag = E_APP;
2276
42.8k
    if (ALLOC_N(expr->args, nargs) < 0) {
2277
0
        free_expr(expr);
2278
0
        STATE_ENOMEM;
2279
0
        return;
2280
0
    }
2281
42.8k
    expr->func = func;
2282
86.2k
    for (int i = nargs - 1; i >= 0; i--)
2283
43.4k
        expr->args[i] = pop_expr(state);
2284
2285
42.8k
    push_expr(expr, state);
2286
42.8k
}
2287
2288
/*
2289
 * VariableReference ::= '$' /[a-zA-Z_][a-zA-Z0-9_]* /
2290
 *
2291
 * The '$' is consumed by parse_primary_expr
2292
 */
2293
2
static void parse_var(struct state *state) {
2294
2
    const char *id = state->pos;
2295
2
    struct expr *expr = NULL;
2296
2297
2
    if (!isalpha(*id) && *id != '_') {
2298
2
        STATE_ERROR(state, PATHX_ENAME);
2299
2
        return;
2300
2
    }
2301
0
    id++;
2302
0
    while (isalpha(*id) || isdigit(*id) || *id == '_')
2303
0
        id += 1;
2304
2305
0
    if (ALLOC(expr) < 0)
2306
0
        goto err_nomem;
2307
0
    expr->tag = E_VAR;
2308
0
    expr->ident = strndup(state->pos, id - state->pos);
2309
0
    if (expr->ident == NULL)
2310
0
        goto err_nomem;
2311
2312
0
    push_expr(expr, state);
2313
0
    state->pos = id;
2314
0
    return;
2315
0
 err_nomem:
2316
0
    STATE_ENOMEM;
2317
0
    free_expr(expr);
2318
0
    return;
2319
0
}
2320
2321
/*
2322
 * PrimaryExpr ::= Literal
2323
 *               | Number
2324
 *               | FunctionCall
2325
 *               | VariableReference
2326
 *               | '(' Expr ')'
2327
 *
2328
 */
2329
961k
static void parse_primary_expr(struct state *state) {
2330
961k
    if (peek(state, "'\"")) {
2331
87.8k
        parse_literal(state);
2332
873k
    } else if (peek(state, "0123456789")) {
2333
813k
        parse_number(state);
2334
813k
    } else if (match(state, '(')) {
2335
6.53k
        parse_expr(state);
2336
6.53k
        RET_ON_ERROR;
2337
6.50k
        if (! match(state, ')')) {
2338
25
            STATE_ERROR(state, PATHX_EPAREN);
2339
25
            return;
2340
25
        }
2341
54.0k
    } else if (match(state, '$')) {
2342
2
        parse_var(state);
2343
54.0k
    } else {
2344
54.0k
        parse_function_call(state);
2345
54.0k
    }
2346
961k
}
2347
2348
3.07M
static int looking_at_primary_expr(struct state *state) {
2349
3.07M
    const char *s = state->pos;
2350
    /* Is it a Number, Literal or VariableReference ? */
2351
3.07M
    if (peek(state, "$'\"0123456789"))
2352
901k
        return 1;
2353
2354
    /* Or maybe a function call, i.e. a word followed by a '(' ?
2355
     * Note that our function names are only [a-zA-Z]+
2356
     */
2357
2.57M
    while (*s != '\0' && isalpha(*s)) s++;
2358
2.18M
    while (*s != '\0' && isspace(*s)) s++;
2359
2.17M
    return *s == '(';
2360
3.07M
}
2361
2362
/*
2363
 * PathExpr ::= LocationPath
2364
 *            | FilterExpr
2365
 *            | FilterExpr '/' RelativeLocationPath
2366
 *            | FilterExpr '//' RelativeLocationPath
2367
 *
2368
 * FilterExpr ::= PrimaryExpr Predicate
2369
 *
2370
 * The grammar is ambiguous here: the expression '42' can either be the
2371
 * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The
2372
 * reason for this ambiguity is that we allow node names like '42' in the
2373
 * tree; rather than forbid them, we resolve the ambiguity by always
2374
 * parsing '42' as a number, and requiring that the user write the
2375
 * RelativeLocationPath in a different form, e.g. 'child::42' or './42'.
2376
 */
2377
3.07M
static void parse_path_expr(struct state *state) {
2378
3.07M
    struct expr *expr = NULL;
2379
3.07M
    struct pred *predicates = NULL;
2380
3.07M
    struct locpath *locpath = NULL;
2381
2382
3.07M
    if (looking_at_primary_expr(state)) {
2383
961k
        parse_primary_expr(state);
2384
961k
        RET_ON_ERROR;
2385
950k
        predicates = parse_predicates(state);
2386
950k
        RET_ON_ERROR;
2387
921k
        if (match(state, '/')) {
2388
6.50k
            if (match(state, '/')) {
2389
6
                locpath = parse_relative_location_path(state);
2390
6
                if (HAS_ERROR(state))
2391
0
                    goto error;
2392
2393
6
                struct step *step = make_step(DESCENDANT_OR_SELF, state);
2394
6
                if (HAS_ERROR(state))
2395
0
                    return;
2396
6
                list_cons(locpath->steps, step);
2397
6.49k
            } else {
2398
6.49k
                if (*state->pos == '\0') {
2399
0
                    STATE_ERROR(state, PATHX_EEND);
2400
0
                    goto error;
2401
0
                }
2402
6.49k
                locpath = parse_relative_location_path(state);
2403
6.49k
            }
2404
6.50k
        }
2405
        /* A PathExpr without predicates and locpath is
2406
         * just a PrimaryExpr
2407
         */
2408
921k
        if (predicates == NULL && locpath == NULL)
2409
881k
            return;
2410
        /* To make evaluation easier, we parse something like
2411
         *   $var[pred] as $var[pred]/.
2412
         */
2413
39.8k
        if (locpath == NULL) {
2414
33.3k
            if (ALLOC(locpath) < 0)
2415
0
                goto error;
2416
33.3k
            if (ALLOC(locpath->steps) < 0)
2417
0
                goto error;
2418
33.3k
            locpath->steps->axis = SELF;
2419
33.3k
        }
2420
39.8k
        if (ALLOC(expr) < 0)
2421
0
            goto error;
2422
39.8k
        expr->tag = E_FILTER;
2423
39.8k
        expr->predicates = predicates;
2424
39.8k
        expr->primary    = pop_expr(state);
2425
39.8k
        expr->locpath    = locpath;
2426
39.8k
        push_expr(expr, state);
2427
2.11M
    } else {
2428
2.11M
        parse_location_path(state);
2429
2.11M
    }
2430
2.15M
    return;
2431
2.15M
 error:
2432
0
    free_expr(expr);
2433
0
    free_pred(predicates);
2434
0
    free_locpath(locpath);
2435
0
    return;
2436
3.07M
}
2437
2438
/*
2439
 * UnionExpr ::= PathExpr ('|' PathExpr)*
2440
 */
2441
2.95M
static void parse_union_expr(struct state *state) {
2442
2.95M
    parse_path_expr(state);
2443
2.95M
    RET_ON_ERROR;
2444
2.91M
    while (match(state, '|')) {
2445
121k
        parse_path_expr(state);
2446
121k
        RET_ON_ERROR;
2447
87.2k
        push_new_binary_op(OP_UNION, state);
2448
87.2k
    }
2449
2.83M
}
2450
2451
/*
2452
 * MultiplicativeExpr ::= UnionExpr ('*' UnionExpr)*
2453
 */
2454
2.51M
static void parse_multiplicative_expr(struct state *state) {
2455
2.51M
    parse_union_expr(state);
2456
2.51M
    RET_ON_ERROR;
2457
2.79M
    while (match(state, '*')) {
2458
438k
        parse_union_expr(state);
2459
438k
        RET_ON_ERROR;
2460
438k
        push_new_binary_op(OP_STAR, state);
2461
438k
    }
2462
2.35M
}
2463
2464
/*
2465
 * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)*
2466
 * AdditiveOp   ::= '+' | '-'
2467
 */
2468
2.18M
static void parse_additive_expr(struct state *state) {
2469
2.18M
    parse_multiplicative_expr(state);
2470
2.18M
    RET_ON_ERROR;
2471
2.35M
    while (*state->pos == '+' || *state->pos == '-') {
2472
322k
        enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS;
2473
322k
        state->pos += 1;
2474
322k
        skipws(state);
2475
322k
        parse_multiplicative_expr(state);
2476
322k
        RET_ON_ERROR;
2477
275k
        push_new_binary_op(op, state);
2478
275k
    }
2479
2.08M
}
2480
2481
/*
2482
 * RelationalExpr ::= AdditiveExpr (RelationalOp AdditiveExpr)?
2483
 * RelationalOp ::= ">" | "<" | ">=" | "<="
2484
 */
2485
2.16M
static void parse_relational_expr(struct state *state) {
2486
2.16M
    parse_additive_expr(state);
2487
2.16M
    RET_ON_ERROR;
2488
2.03M
    if (*state->pos == '<' || *state->pos == '>') {
2489
26.0k
        enum binary_op op = (*state->pos == '<') ? OP_LT : OP_GT;
2490
26.0k
        state->pos += 1;
2491
26.0k
        if (*state->pos == '=') {
2492
0
            op = (op == OP_LT) ? OP_LE : OP_GE;
2493
0
            state->pos += 1;
2494
0
        }
2495
26.0k
        skipws(state);
2496
26.0k
        parse_additive_expr(state);
2497
26.0k
        RET_ON_ERROR;
2498
32
        push_new_binary_op(op, state);
2499
32
    }
2500
2.03M
}
2501
2502
/*
2503
 * EqualityExpr ::= RelationalExpr (EqualityOp RelationalExpr)? | ReMatchExpr
2504
 * EqualityOp ::= "=" | "!="
2505
 * ReMatchExpr ::= RelationalExpr MatchOp RelationalExpr
2506
 * MatchOp ::= "=~" | "!~"
2507
 */
2508
2.09M
static void parse_equality_expr(struct state *state) {
2509
2.09M
    parse_relational_expr(state);
2510
2.09M
    RET_ON_ERROR;
2511
1.96M
    if ((*state->pos == '=' || *state->pos == '!') && state->pos[1] == '~') {
2512
68.4k
        enum binary_op op = (*state->pos == '=') ? OP_RE_MATCH : OP_RE_NOMATCH;
2513
68.4k
        state->pos += 2;
2514
68.4k
        skipws(state);
2515
68.4k
        parse_relational_expr(state);
2516
68.4k
        RET_ON_ERROR;
2517
42.5k
        push_new_binary_op(op, state);
2518
1.89M
    } else if (*state->pos == '=' ||
2519
1.89M
        (*state->pos == '!' && state->pos[1] == '=')) {
2520
360
        enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ;
2521
360
        state->pos += (op == OP_EQ) ? 1 : 2;
2522
360
        skipws(state);
2523
360
        parse_relational_expr(state);
2524
360
        RET_ON_ERROR;
2525
346
        push_new_binary_op(op, state);
2526
346
    }
2527
1.96M
}
2528
2529
/*
2530
 * AndExpr ::= EqualityExpr ('and' EqualityExpr)*
2531
 */
2532
2.09M
static void parse_and_expr(struct state *state) {
2533
2.09M
    parse_equality_expr(state);
2534
2.09M
    RET_ON_ERROR;
2535
1.93M
    while (*state->pos == 'a' && state->pos[1] == 'n'
2536
591
        && state->pos[2] == 'd') {
2537
591
        state->pos += 3;
2538
591
        skipws(state);
2539
591
        parse_equality_expr(state);
2540
591
        RET_ON_ERROR;
2541
38
        push_new_binary_op(OP_AND, state);
2542
38
    }
2543
1.93M
}
2544
2545
/*
2546
 * OrExpr ::= AndExpr ('or' AndExpr)*
2547
 */
2548
2.09M
static void parse_or_expr(struct state *state) {
2549
2.09M
    parse_and_expr(state);
2550
2.09M
    RET_ON_ERROR;
2551
1.93M
    while (*state->pos == 'o' && state->pos[1] == 'r') {
2552
97
        state->pos += 2;
2553
97
        skipws(state);
2554
97
        parse_and_expr(state);
2555
97
        RET_ON_ERROR;
2556
90
        push_new_binary_op(OP_OR, state);
2557
90
    }
2558
1.93M
}
2559
2560
/*
2561
 * ElseExpr ::= OrExpr ('else' OrExpr)*
2562
 */
2563
2.03M
static void parse_else_expr(struct state *state) {
2564
2.03M
    parse_or_expr(state);
2565
2.03M
    RET_ON_ERROR;
2566
1.93M
    while (*state->pos == 'e' && state->pos[1] == 'l'
2567
55.3k
        && state->pos[2] == 's' && state->pos[3] == 'e' ) {
2568
55.3k
        state->pos += 4;
2569
55.3k
        skipws(state);
2570
55.3k
        parse_or_expr(state);
2571
55.3k
        RET_ON_ERROR;
2572
55.3k
        push_new_binary_op(OP_ELSE, state);
2573
55.3k
        state->has_else = 1;
2574
55.3k
    }
2575
1.88M
}
2576
2577
/*
2578
 * Expr ::= ElseExpr
2579
 */
2580
2.03M
static void parse_expr(struct state *state) {
2581
2.03M
    skipws(state);
2582
2.03M
    parse_else_expr(state);
2583
2.03M
}
2584
2585
7.91k
static void store_error(struct pathx *pathx) {
2586
7.91k
    const char *pathx_msg = NULL;
2587
7.91k
    const char *path = pathx->state->txt;
2588
7.91k
    const pathx_errcode_t errcode = pathx->state->errcode;
2589
7.91k
    struct error *err = pathx->state->error;
2590
2591
7.91k
    char *pos_str = pathx->state->errmsg;
2592
7.91k
    pathx->state->errmsg = NULL;
2593
2594
7.91k
    if (err == NULL || errcode == PATHX_NOERROR || err->code != AUG_NOERROR)
2595
7.51k
        return;
2596
2597
405
    switch (errcode) {
2598
0
    case PATHX_ENOMEM:
2599
0
        err->code = AUG_ENOMEM;
2600
0
        break;
2601
0
    case PATHX_EMMATCH:
2602
0
        err->code = AUG_EMMATCH;
2603
0
        break;
2604
0
    case PATHX_ENOMATCH:
2605
0
        err->code = AUG_ENOMATCH;
2606
0
        break;
2607
405
    default:
2608
405
        err->code = AUG_EPATHX;
2609
405
        break;
2610
405
    }
2611
2612
    /* We only need details for pathx syntax errors */
2613
405
    if (err->code != AUG_EPATHX)
2614
0
        return;
2615
2616
405
    int pos;
2617
405
    pathx_msg = pathx_error(pathx, NULL, &pos);
2618
2619
405
    bool has_msg = pos_str != NULL;
2620
405
    int pos_str_len = pos_str == NULL ? 0 : strlen(pos_str);
2621
405
    if (REALLOC_N(pos_str, pos_str_len + strlen(path) + 8) >= 0) {
2622
405
        if (has_msg) {
2623
89
            strcat(pos_str, " in ");
2624
89
            strncat(pos_str, path, pos);
2625
316
        } else {
2626
            /* initialize pos_str explicitly, path might be "" */
2627
316
            pos_str[0] = '\0';
2628
316
            strncat(pos_str, path, pos);
2629
316
        }
2630
405
        strcat(pos_str, "|=|");
2631
405
        strcat(pos_str, path + pos);
2632
405
    }
2633
2634
405
    err->minor = errcode;
2635
405
    err->details = pos_str;
2636
405
    pos_str = NULL;
2637
405
    err->minor_details = pathx_msg;
2638
405
}
2639
2640
int pathx_parse(const struct tree *tree,
2641
                struct error *err,
2642
                const char *txt,
2643
                bool need_nodeset,
2644
                struct pathx_symtab *symtab,
2645
                struct tree *root_ctx,
2646
7.82k
                struct pathx **pathx) {
2647
7.82k
    struct state *state = NULL;
2648
2649
7.82k
    *pathx = NULL;
2650
2651
7.82k
    if (ALLOC(*pathx) < 0)
2652
0
        goto oom;
2653
2654
7.82k
    (*pathx)->origin = (struct tree *) tree;
2655
2656
    /* Set up state */
2657
7.82k
    if (ALLOC((*pathx)->state) < 0)
2658
0
        goto oom;
2659
7.82k
    state = (*pathx)->state;
2660
2661
7.82k
    state->errcode = PATHX_NOERROR;
2662
7.82k
    state->errmsg = NULL;
2663
7.82k
    state->txt = txt;
2664
7.82k
    state->pos = txt;
2665
7.82k
    state->symtab = symtab;
2666
7.82k
    state->root_ctx = root_ctx;
2667
7.82k
    state->error = err;
2668
2669
7.82k
    if (ALLOC_N(state->value_pool, 8) < 0) {
2670
0
        STATE_ENOMEM;
2671
0
        goto done;
2672
0
    }
2673
7.82k
    state->value_pool_size = 8;
2674
7.82k
    state->value_pool[0].tag = T_BOOLEAN;
2675
7.82k
    state->value_pool[0].boolval = 0;
2676
7.82k
    state->value_pool[1].tag = T_BOOLEAN;
2677
7.82k
    state->value_pool[1].boolval = 1;
2678
7.82k
    state->value_pool_used = 2;
2679
2680
    /* Parse */
2681
7.82k
    parse_expr(state);
2682
7.82k
    if (HAS_ERROR(state))
2683
194
        goto done;
2684
7.63k
    if (state->pos != state->txt + strlen(state->txt)) {
2685
110
        STATE_ERROR(state, PATHX_EEND);
2686
110
        goto done;
2687
110
    }
2688
2689
7.52k
    if (state->exprs_used != 1) {
2690
0
        STATE_ERROR(state, PATHX_EINTERNAL);
2691
0
        goto done;
2692
0
    }
2693
2694
    /* Typecheck */
2695
7.52k
    check_expr(state->exprs[0], state);
2696
7.52k
    if (HAS_ERROR(state))
2697
7
        goto done;
2698
2699
7.51k
    if (need_nodeset && state->exprs[0]->type != T_NODESET) {
2700
5
        STATE_ERROR(state, PATHX_ETYPE);
2701
5
        goto done;
2702
5
    }
2703
2704
7.82k
 done:
2705
7.82k
    store_error(*pathx);
2706
7.82k
    return state->errcode;
2707
0
 oom:
2708
0
    free_pathx(*pathx);
2709
0
    *pathx = NULL;
2710
0
    if (err != NULL)
2711
0
        err->code = AUG_ENOMEM;
2712
0
    return PATHX_ENOMEM;
2713
7.51k
}
2714
2715
/*************************************************************************
2716
 * Searching in the tree
2717
 *************************************************************************/
2718
2719
3.29M
static bool step_matches(struct step *step, struct tree *tree) {
2720
3.29M
    if ( step->axis == SEQ && step->name == NULL ) {
2721
0
        if ( tree->label == NULL )
2722
0
            return false;
2723
        /* label matches if it consists of numeric digits only */
2724
0
        for( char *s = tree->label; *s ; s++) {
2725
0
            if ( ! isdigit(*s) )
2726
0
                return false;
2727
0
        }
2728
0
        return true;
2729
3.29M
    } else if (step->name == NULL) {
2730
2.51M
        return step->axis == ROOT || tree->label != NULL;
2731
2.51M
    } else {
2732
780k
        return streqx(step->name, tree->label);
2733
780k
    }
2734
3.29M
}
2735
2736
0
static struct tree *tree_prev(struct tree *pos) {
2737
0
    struct tree *node = NULL;
2738
0
    if (pos != pos->parent->children) {
2739
0
        for (node = pos->parent->children;
2740
0
             node->next != pos;
2741
0
             node = node->next);
2742
0
    }
2743
0
    return node;
2744
0
}
2745
2746
/* When the first step doesn't begin with ROOT then use relative root context
2747
 * instead. */
2748
static struct tree *step_root(struct step *step, struct tree *ctx,
2749
1.35M
                              struct tree *root_ctx) {
2750
1.35M
    struct tree *node = NULL;
2751
1.35M
    switch (step->axis) {
2752
1.04M
    case SELF:
2753
1.23M
    case CHILD:
2754
1.23M
    case SEQ:
2755
1.23M
    case DESCENDANT:
2756
1.23M
    case PARENT:
2757
1.23M
    case ANCESTOR:
2758
1.23M
    case PRECEDING_SIBLING:
2759
1.23M
    case FOLLOWING_SIBLING:
2760
        /* only use root_ctx when ctx is the absolute tree root */
2761
1.23M
        if (ctx == ctx->parent && root_ctx != NULL)
2762
1.20k
            node = root_ctx;
2763
1.23M
        else
2764
1.23M
            node = ctx;
2765
1.23M
        break;
2766
71.5k
    case ROOT:
2767
124k
    case DESCENDANT_OR_SELF:
2768
124k
        node = ctx;
2769
124k
        break;
2770
0
    default:
2771
0
        assert(0);
2772
1.35M
    }
2773
1.35M
    if (node == NULL)
2774
0
        return NULL;
2775
1.35M
    return node;
2776
1.35M
}
2777
2778
2.25M
static struct tree *step_first(struct step *step, struct tree *ctx) {
2779
2.25M
    struct tree *node = NULL;
2780
2.25M
    switch (step->axis) {
2781
1.06M
    case SELF:
2782
1.12M
    case DESCENDANT_OR_SELF:
2783
1.12M
        node = ctx;
2784
1.12M
        break;
2785
1.05M
    case CHILD:
2786
1.05M
    case SEQ:
2787
1.05M
    case DESCENDANT:
2788
1.05M
        node = ctx->children;
2789
1.05M
        break;
2790
0
    case PARENT:
2791
0
    case ANCESTOR:
2792
0
        node = ctx->parent;
2793
0
        break;
2794
71.5k
    case ROOT:
2795
182k
        while (ctx->parent != ctx)
2796
110k
            ctx = ctx->parent;
2797
71.5k
        node = ctx;
2798
71.5k
        break;
2799
0
    case PRECEDING_SIBLING:
2800
0
        node = tree_prev(ctx);
2801
0
        break;
2802
0
    case FOLLOWING_SIBLING:
2803
0
        node = ctx->next;
2804
0
        break;
2805
0
    default:
2806
0
        assert(0);
2807
2.25M
    }
2808
2.25M
    if (node == NULL)
2809
677k
        return NULL;
2810
1.57M
    if (step_matches(step, node))
2811
1.37M
        return node;
2812
195k
    return step_next(step, ctx, node);
2813
1.57M
}
2814
2815
static struct tree *step_next(struct step *step, struct tree *ctx,
2816
2.71M
                              struct tree *node) {
2817
4.87M
    while (node != NULL) {
2818
3.29M
        switch (step->axis) {
2819
1.06M
        case SELF:
2820
1.06M
            node = NULL;
2821
1.06M
            break;
2822
0
        case SEQ:
2823
1.39M
        case CHILD:
2824
1.39M
            node = node->next;
2825
1.39M
            break;
2826
0
        case DESCENDANT:
2827
763k
        case DESCENDANT_OR_SELF:
2828
763k
            if (node->children != NULL) {
2829
215k
                node = node->children;
2830
547k
            } else {
2831
763k
                while (node->next == NULL && node != ctx)
2832
215k
                    node = node->parent;
2833
547k
                if (node == ctx)
2834
55.5k
                    node = NULL;
2835
492k
                else
2836
492k
                    node = node->next;
2837
547k
            }
2838
763k
            break;
2839
0
        case PARENT:
2840
71.5k
        case ROOT:
2841
71.5k
            node = NULL;
2842
71.5k
            break;
2843
0
        case ANCESTOR:
2844
0
            if (node->parent == node)
2845
0
                node = NULL;
2846
0
            else
2847
0
                node = node->parent;
2848
0
            break;
2849
0
        case PRECEDING_SIBLING:
2850
0
            node = tree_prev(node);
2851
0
            break;
2852
0
        case FOLLOWING_SIBLING:
2853
0
            node = node->next;
2854
0
            break;
2855
0
        default:
2856
0
            assert(0);
2857
3.29M
        }
2858
3.29M
        if (node != NULL && step_matches(step, node))
2859
1.14M
            break;
2860
3.29M
    }
2861
2.71M
    return node;
2862
2.71M
}
2863
2864
7.56k
static struct value *pathx_eval(struct pathx *pathx) {
2865
7.56k
    struct state *state = pathx->state;
2866
7.56k
    state->ctx = pathx->origin;
2867
7.56k
    state->ctx_pos = 1;
2868
7.56k
    state->ctx_len = 1;
2869
7.56k
    eval_expr(state->exprs[0], state);
2870
7.56k
    if (HAS_ERROR(state))
2871
89
        return NULL;
2872
2873
7.47k
    if (state->values_used != 1) {
2874
0
        STATE_ERROR(state, PATHX_EINTERNAL);
2875
0
        return NULL;
2876
0
    }
2877
7.47k
    return pop_value(state);
2878
7.47k
}
2879
2880
11.3k
struct tree *pathx_next(struct pathx *pathx) {
2881
11.3k
    if (pathx->node + 1 < pathx->nodeset->used)
2882
11.3k
        return pathx->nodeset->nodes[++pathx->node];
2883
62
    return NULL;
2884
11.3k
}
2885
2886
/* Find the first node in TREE matching PATH. */
2887
7.77k
struct tree *pathx_first(struct pathx *pathx) {
2888
7.77k
    if (pathx->nodeset == NULL) {
2889
5.38k
        struct value *v = pathx_eval(pathx);
2890
2891
5.38k
        if (HAS_ERROR(pathx->state))
2892
86
            goto error;
2893
5.38k
        assert(v->tag == T_NODESET);
2894
5.29k
        pathx->nodeset = v->nodeset;
2895
5.29k
    }
2896
7.69k
    pathx->node = 0;
2897
7.69k
    if (pathx->nodeset->used == 0)
2898
244
        return NULL;
2899
7.44k
    else
2900
7.44k
        return pathx->nodeset->nodes[0];
2901
86
 error:
2902
86
    store_error(pathx);
2903
86
    return NULL;
2904
7.69k
}
2905
2906
/* Find a node in the tree that matches the longest prefix of PATH.
2907
 *
2908
 * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete
2909
 * prefix matches, and -1 if more than one node in the tree match.
2910
 *
2911
 * TMATCH is set to the tree node that matches, and SMATCH to the next step
2912
 * after the one where TMATCH matched. If no node matches or multiple nodes
2913
 * at the same depth match, TMATCH and SMATCH will be NULL. When exactly
2914
 * one node matches, TMATCH will be that node, and SMATCH will be NULL.
2915
 */
2916
static int locpath_search(struct locpath_trace *lpt,
2917
2.12k
                          struct tree **tmatch, struct step **smatch) {
2918
2.12k
    int last;
2919
2.12k
    int result = -1;
2920
2921
4.55k
    for (last=lpt->maxns; last >= 0 && lpt->ns[last]->used == 0; last--);
2922
2.12k
    if (last < 0) {
2923
0
        *smatch = lpt->lp->steps;
2924
0
        result = 1;
2925
0
        goto done;
2926
0
    }
2927
2.12k
    if (lpt->ns[last]->used > 1) {
2928
0
        result = -1;
2929
0
        goto done;
2930
0
    }
2931
2.12k
    result = 0;
2932
2.12k
    *tmatch = lpt->ns[last]->nodes[0];
2933
2.12k
    *smatch = lpt->lp->steps;
2934
9.11k
    for (int i=0; i < last; i++)
2935
6.99k
        *smatch = (*smatch)->next;
2936
2.12k
 done:
2937
11.5k
    for (int i=0; i < lpt->maxns; i++)
2938
9.43k
        free_nodeset(lpt->ns[i]);
2939
2.12k
    FREE(lpt->ns);
2940
2.12k
    return result;
2941
2.12k
}
2942
2943
static char *step_seq_choose_name(struct pathx *path, struct tree *tree);
2944
2945
/* Expand the tree ROOT so that it contains all components of PATH. PATH
2946
 * must have been initialized against ROOT by a call to PATH_FIND_ONE.
2947
 *
2948
 * Return the first segment that was created by this operation, or NULL on
2949
 * error.
2950
 */
2951
2.12k
int pathx_expand_tree(struct pathx *path, struct tree **tree) {
2952
2.12k
    int r;
2953
2.12k
    struct step *step = NULL;
2954
2.12k
    struct locpath_trace lpt;
2955
2.12k
    struct tree *first_child = NULL;
2956
2.12k
    struct value *v = NULL;
2957
2958
2.12k
    MEMZERO(&lpt, 1);
2959
2.12k
    path->state->locpath_trace = &lpt;
2960
2.12k
    v = pathx_eval(path);
2961
2.12k
    path->state->locpath_trace = NULL;
2962
2.12k
    if (HAS_ERROR(path->state))
2963
0
        goto error;
2964
2965
2.12k
    if (lpt.maxns == 0) {
2966
0
        if (v->tag != T_NODESET || v->nodeset->used == 0) {
2967
0
            STATE_ERROR(path->state, PATHX_ENOMATCH);
2968
0
            goto error;
2969
0
        }
2970
0
        if (v->nodeset->used > 1)
2971
0
            goto error;
2972
0
        *tree = v->nodeset->nodes[0];
2973
0
        return 0;
2974
0
    }
2975
2976
2.12k
    *tree = path->origin;
2977
2.12k
    r = locpath_search(&lpt, tree, &step);
2978
2.12k
    if (r == -1) {
2979
0
        STATE_ERROR(path->state, PATHX_EMMATCH);
2980
0
        goto error;
2981
0
    }
2982
2983
2.12k
    if (step == NULL)
2984
106
        return 0;
2985
2986
2.01k
    struct tree *parent = *tree;
2987
2.01k
    if (parent == NULL)
2988
0
        parent = path->origin;
2989
2990
2.43k
    list_for_each(s, step) {
2991
2.43k
        if (s->axis != CHILD && s->axis != SEQ)
2992
0
            goto error;
2993
2.43k
        if (s->axis==SEQ && s->name == NULL) {
2994
0
            s->name = step_seq_choose_name(path, parent);
2995
0
        }
2996
2.43k
        if (s->name == NULL )
2997
0
            goto error;
2998
2.43k
        struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL);
2999
2.43k
        if (first_child == NULL)
3000
2.01k
            first_child = t;
3001
2.43k
        if (t == NULL || t->label == NULL)
3002
0
            goto error;
3003
2.43k
        list_append(parent->children, t);
3004
2.43k
        parent = t;
3005
2.43k
    }
3006
3007
2.43k
    while (first_child->children != NULL)
3008
424
        first_child = first_child->children;
3009
3010
2.01k
    *tree = first_child;
3011
2.01k
    return 1;
3012
3013
0
 error:
3014
0
    if (first_child != NULL) {
3015
0
        list_remove(first_child, first_child->parent->children);
3016
0
        free_tree(first_child);
3017
0
    }
3018
0
    *tree = NULL;
3019
0
    store_error(path);
3020
0
    return -1;
3021
2.01k
}
3022
3023
/* Generate a numeric string to use for step->name
3024
 * Scan tree->children for the highest numbered label, and add 1 to that
3025
 * numeric labels may be interspersed with #comment or other labels
3026
 */
3027
0
static char *step_seq_choose_name(struct pathx *path, struct tree *tree) {
3028
0
    unsigned long int max_node_n=0;
3029
0
    unsigned long int node_n;
3030
0
    char *step_name;
3031
0
    char *label_end;
3032
0
    for(tree=tree->children; tree!=NULL; tree=tree->next) {
3033
0
        if ( tree->label == NULL)
3034
0
          continue;
3035
0
        node_n=strtoul(tree->label, &label_end, 10);
3036
0
        if ( label_end == tree->label || *label_end != '\0' )
3037
          /* label is not a number - ignore it */
3038
0
          continue;
3039
0
        if ( node_n >= ULONG_MAX ) {
3040
0
          STATE_ERROR(path->state, PATHX_ENUMBER);
3041
0
          return NULL;
3042
0
        }
3043
0
        if( node_n > max_node_n )
3044
0
            max_node_n = node_n;
3045
0
    }
3046
0
    if (asprintf(&step_name,"%lu",max_node_n+1) >= 0)
3047
0
        return step_name;
3048
0
    else
3049
0
        return NULL;
3050
0
}
3051
3052
5.16k
int pathx_find_one(struct pathx *path, struct tree **tree) {
3053
5.16k
    *tree = pathx_first(path);
3054
5.16k
    if (HAS_ERROR(path->state))
3055
52
        return -1;
3056
5.11k
    return path->nodeset->used;
3057
5.16k
}
3058
3059
0
struct error *err_of_pathx(struct pathx *px) {
3060
0
    return px->state->error;
3061
0
}
3062
3063
405
const char *pathx_error(struct pathx *path, const char **txt, int *pos) {
3064
405
    int errcode = PATHX_ENOMEM;
3065
3066
405
    if (path != NULL) {
3067
405
        if (path->state->errcode < ARRAY_CARDINALITY(errcodes))
3068
405
            errcode = path->state->errcode;
3069
0
        else
3070
0
            errcode = PATHX_EINTERNAL;
3071
3072
405
        if (txt)
3073
0
            *txt = path->state->txt;
3074
3075
405
        if (pos)
3076
405
            *pos = path->state->pos - path->state->txt;
3077
405
    }
3078
405
    return errcodes[errcode];
3079
405
}
3080
3081
/*
3082
 * Symbol tables
3083
 */
3084
static struct pathx_symtab
3085
*make_symtab(struct pathx_symtab *symtab, const char *name,
3086
             struct value *value)
3087
63
{
3088
63
    struct pathx_symtab *new;
3089
63
    char *n = NULL;
3090
3091
63
    n = strdup(name);
3092
63
    if (n == NULL)
3093
0
        return NULL;
3094
3095
63
    if (ALLOC(new) < 0) {
3096
0
        free(n);
3097
0
        return NULL;
3098
0
    }
3099
63
    new->name = n;
3100
63
    new->value = value;
3101
63
    if (symtab == NULL) {
3102
63
        return new;
3103
63
    } else {
3104
0
        new->next = symtab->next;
3105
0
        symtab->next = new;
3106
0
    }
3107
0
    return symtab;
3108
63
}
3109
3110
106
void free_symtab(struct pathx_symtab *symtab) {
3111
3112
169
    while (symtab != NULL) {
3113
63
        struct pathx_symtab *del = symtab;
3114
63
        symtab = del->next;
3115
63
        free(del->name);
3116
63
        release_value(del->value);
3117
63
        free(del->value);
3118
63
        free(del);
3119
63
    }
3120
106
}
3121
3122
0
struct pathx_symtab *pathx_get_symtab(struct pathx *pathx) {
3123
0
    return pathx->state->symtab;
3124
0
}
3125
3126
static int pathx_symtab_set(struct pathx_symtab **symtab,
3127
63
                            const char *name, struct value *v) {
3128
63
    int found = 0;
3129
3130
63
    list_for_each(tab, *symtab) {
3131
0
        if (STREQ(tab->name, name)) {
3132
0
            release_value(tab->value);
3133
0
            free(tab->value);
3134
0
            tab->value = v;
3135
0
            found = 1;
3136
0
            break;
3137
0
        }
3138
0
    }
3139
3140
63
    if (!found) {
3141
63
        struct pathx_symtab *new = NULL;
3142
3143
63
        new = make_symtab(*symtab, name, v);
3144
63
        if (new == NULL)
3145
0
            goto error;
3146
63
        *symtab = new;
3147
63
    }
3148
63
    return 0;
3149
0
 error:
3150
0
    return -1;
3151
63
}
3152
3153
int pathx_symtab_define(struct pathx_symtab **symtab,
3154
66
                        const char *name, struct pathx *px) {
3155
66
    int r;
3156
66
    struct value *value = NULL, *v = NULL;
3157
66
    struct state *state = px->state;
3158
3159
66
    value = pathx_eval(px);
3160
66
    if (HAS_ERROR(px->state))
3161
3
        goto error;
3162
3163
63
    if (ALLOC(v) < 0) {
3164
0
        STATE_ENOMEM;
3165
0
        goto error;
3166
0
    }
3167
3168
63
    *v = *value;
3169
63
    value->tag = T_BOOLEAN;
3170
3171
63
    r = pathx_symtab_set(symtab, name, v);
3172
63
    if (r < 0) {
3173
0
        STATE_ENOMEM;
3174
0
        goto error;
3175
0
    }
3176
3177
63
    if (v->tag == T_NODESET)
3178
55
        return v->nodeset->used;
3179
8
    else
3180
8
        return 0;
3181
3
 error:
3182
3
    release_value(value);
3183
3
    free(value);
3184
3
    release_value(v);
3185
3
    free(v);
3186
3
    store_error(px);
3187
3
    return -1;
3188
63
}
3189
3190
0
int pathx_symtab_undefine(struct pathx_symtab **symtab, const char *name) {
3191
0
    struct pathx_symtab *del = NULL;
3192
3193
0
    for(del = *symtab;
3194
0
        del != NULL && !STREQ(del->name, name);
3195
0
        del = del->next);
3196
0
    if (del == NULL)
3197
0
        return 0;
3198
0
    list_remove(del, *symtab);
3199
0
    free_symtab(del);
3200
0
    return 0;
3201
0
}
3202
3203
int pathx_symtab_assign_tree(struct pathx_symtab **symtab,
3204
0
                             const char *name, struct tree *tree) {
3205
0
    struct value *v = NULL;
3206
0
    int r;
3207
3208
0
    if (ALLOC(v) < 0)
3209
0
        goto error;
3210
3211
0
    v->tag = T_NODESET;
3212
0
    if (ALLOC(v->nodeset) < 0)
3213
0
        goto error;
3214
0
    if (ALLOC_N(v->nodeset->nodes, 1) < 0)
3215
0
        goto error;
3216
0
    v->nodeset->used = 1;
3217
0
    v->nodeset->size = 1;
3218
0
    v->nodeset->nodes[0] = tree;
3219
3220
0
    r = pathx_symtab_set(symtab, name, v);
3221
0
    if (r < 0)
3222
0
        goto error;
3223
0
    return 1;
3224
0
 error:
3225
0
    release_value(v);
3226
0
    free(v);
3227
0
    return -1;
3228
0
}
3229
3230
int
3231
0
pathx_symtab_count(const struct pathx_symtab *symtab, const char *name) {
3232
0
    struct value *v = lookup_var(name, symtab);
3233
3234
0
    if (v == NULL || v->tag != T_NODESET)
3235
0
        return -1;
3236
3237
0
    return v->nodeset->used;
3238
0
}
3239
3240
struct tree *
3241
pathx_symtab_get_tree(struct pathx_symtab *symtab,
3242
0
                      const char *name, int i) {
3243
0
    struct value *v = lookup_var(name, symtab);
3244
0
    if (v == NULL)
3245
0
        return NULL;
3246
0
    if (v->tag != T_NODESET)
3247
0
        return NULL;
3248
0
    if (i >= v->nodeset->used)
3249
0
        return NULL;
3250
0
    return v->nodeset->nodes[i];
3251
0
}
3252
3253
void pathx_symtab_remove_descendants(struct pathx_symtab *symtab,
3254
0
                                     const struct tree *tree) {
3255
0
    list_for_each(tab, symtab) {
3256
0
        if (tab->value->tag != T_NODESET)
3257
0
            continue;
3258
0
        struct nodeset *ns = tab->value->nodeset;
3259
0
        for (int i=0; i < ns->used;) {
3260
0
            struct tree *t = ns->nodes[i];
3261
0
            while (t != t->parent && t != tree)
3262
0
                t = t->parent;
3263
0
            if (t == tree)
3264
0
                ns_remove(ns, i, 1);
3265
0
            else
3266
0
                i += 1;
3267
0
        }
3268
0
    }
3269
0
}
3270
3271
/*
3272
 * Local variables:
3273
 *  indent-tabs-mode: nil
3274
 *  c-indent-level: 4
3275
 *  c-basic-offset: 4
3276
 *  tab-width: 4
3277
 * End:
3278
 */