Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * pathx.c: handling path expressions |
3 | | * |
4 | | * Copyright (C) 2007-2016 David Lutterkort |
5 | | * |
6 | | * This library is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * This library is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with this library; if not, write to the Free Software |
18 | | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 | | * |
20 | | * Author: David Lutterkort <dlutter@redhat.com> |
21 | | */ |
22 | | |
23 | | #include <config.h> |
24 | | #include <internal.h> |
25 | | #include <stdint.h> |
26 | | #include <stdbool.h> |
27 | | #include <memory.h> |
28 | | #include <ctype.h> |
29 | | |
30 | | #include "ref.h" |
31 | | #include "regexp.h" |
32 | | #include "errcode.h" |
33 | | |
34 | | static const char *const errcodes[] = { |
35 | | "no error", |
36 | | "empty name", |
37 | | "illegal string literal", |
38 | | "illegal number", /* PATHX_ENUMBER */ |
39 | | "string missing ending ' or \"", |
40 | | "expected '='", |
41 | | "allocation failed", |
42 | | "unmatched '['", |
43 | | "unmatched '('", |
44 | | "expected a '/'", |
45 | | "internal error", /* PATHX_EINTERNAL */ |
46 | | "type error", /* PATHX_ETYPE */ |
47 | | "undefined variable", /* PATHX_ENOVAR */ |
48 | | "garbage at end of path expression", /* PATHX_EEND */ |
49 | | "no match for path expression", /* PATHX_ENOMATCH */ |
50 | | "wrong number of arguments in function call", /* PATHX_EARITY */ |
51 | | "invalid regular expression", /* PATHX_EREGEXP */ |
52 | | "too many matches", /* PATHX_EMMATCH */ |
53 | | "wrong flag for regexp" /* PATHX_EREGEXPFLAG */ |
54 | | }; |
55 | | |
56 | | /* |
57 | | * Path expressions are strings that use a notation modelled on XPath. |
58 | | */ |
59 | | |
60 | | enum type { |
61 | | T_NONE = 0, /* Not a type */ |
62 | | T_NODESET, |
63 | | T_BOOLEAN, |
64 | | T_NUMBER, |
65 | | T_STRING, |
66 | | T_REGEXP |
67 | | }; |
68 | | |
69 | | enum expr_tag { |
70 | | E_FILTER, |
71 | | E_BINARY, |
72 | | E_VALUE, |
73 | | E_VAR, |
74 | | E_APP |
75 | | }; |
76 | | |
77 | | enum binary_op { |
78 | | OP_EQ, /* '=' */ |
79 | | OP_NEQ, /* '!=' */ |
80 | | OP_LT, /* '<' */ |
81 | | OP_LE, /* '<=' */ |
82 | | OP_GT, /* '>' */ |
83 | | OP_GE, /* '>=' */ |
84 | | OP_PLUS, /* '+' */ |
85 | | OP_MINUS, /* '-' */ |
86 | | OP_STAR, /* '*' */ |
87 | | OP_AND, /* 'and' */ |
88 | | OP_OR, /* 'or' */ |
89 | | OP_ELSE, /* 'else' */ |
90 | | OP_RE_MATCH, /* '=~' */ |
91 | | OP_RE_NOMATCH, /* '!~' */ |
92 | | OP_UNION /* '|' */ |
93 | | }; |
94 | | |
95 | | struct pred { |
96 | | int nexpr; |
97 | | struct expr **exprs; |
98 | | }; |
99 | | |
100 | | enum axis { |
101 | | SELF, |
102 | | CHILD, |
103 | | SEQ, |
104 | | DESCENDANT, |
105 | | DESCENDANT_OR_SELF, |
106 | | PARENT, |
107 | | ANCESTOR, |
108 | | ROOT, |
109 | | PRECEDING_SIBLING, |
110 | | FOLLOWING_SIBLING |
111 | | }; |
112 | | |
113 | | /* This array is indexed by enum axis */ |
114 | | static const char *const axis_names[] = { |
115 | | "self", |
116 | | "child", |
117 | | "seq", /* Like child, but only selects node-names which are integers */ |
118 | | "descendant", |
119 | | "descendant-or-self", |
120 | | "parent", |
121 | | "ancestor", |
122 | | "root", |
123 | | "preceding-sibling", |
124 | | "following-sibling" |
125 | | }; |
126 | | |
127 | | /* The characters that can follow a name in a location expression (aka path) |
128 | | * The parser will assume that name (path component) is finished when it |
129 | | * encounters any of these characters, unless they are escaped by preceding |
130 | | * them with a '\\'. |
131 | | * |
132 | | * See parse_name for the gory details |
133 | | */ |
134 | | static const char name_follow[] = "][|/=()!,"; |
135 | | |
136 | | /* Doubly linked list of location steps. Besides the information from the |
137 | | * path expression, also contains information to iterate over a node set, |
138 | | * in particular, the context node CTX for the step, and the current node |
139 | | * CUR within that context. |
140 | | */ |
141 | | struct step { |
142 | | struct step *next; |
143 | | enum axis axis; |
144 | | char *name; /* NULL to match any name */ |
145 | | struct pred *predicates; |
146 | | }; |
147 | | |
148 | | /* Initialise the root nodeset with the first step */ |
149 | | static struct tree *step_root(struct step *step, struct tree *ctx, |
150 | | struct tree *root_ctx); |
151 | | /* Iteration over the nodes on a step, ignoring the predicates */ |
152 | | static struct tree *step_first(struct step *step, struct tree *ctx); |
153 | | static struct tree *step_next(struct step *step, struct tree *ctx, |
154 | | struct tree *node); |
155 | | |
156 | | struct pathx_symtab { |
157 | | struct pathx_symtab *next; |
158 | | char *name; |
159 | | struct value *value; |
160 | | }; |
161 | | |
162 | | struct pathx { |
163 | | struct state *state; |
164 | | struct nodeset *nodeset; |
165 | | int node; |
166 | | struct tree *origin; |
167 | | }; |
168 | | |
169 | 18.5M | #define L_BRACK '[' |
170 | 6.03M | #define R_BRACK ']' |
171 | | |
172 | | struct locpath { |
173 | | struct step *steps; |
174 | | }; |
175 | | |
176 | | struct nodeset { |
177 | | struct tree **nodes; |
178 | | size_t used; |
179 | | size_t size; |
180 | | }; |
181 | | |
182 | | typedef uint32_t value_ind_t; |
183 | | |
184 | | struct value { |
185 | | enum type tag; |
186 | | union { |
187 | | struct nodeset *nodeset; /* T_NODESET */ |
188 | | int64_t number; /* T_NUMBER */ |
189 | | char *string; /* T_STRING */ |
190 | | bool boolval; /* T_BOOLEAN */ |
191 | | struct regexp *regexp; /* T_REGEXP */ |
192 | | }; |
193 | | }; |
194 | | |
195 | | struct expr { |
196 | | enum expr_tag tag; |
197 | | enum type type; |
198 | | union { |
199 | | struct { /* E_FILTER */ |
200 | | struct expr *primary; |
201 | | struct pred *predicates; |
202 | | struct locpath *locpath; |
203 | | }; |
204 | | struct { /* E_BINARY */ |
205 | | enum binary_op op; |
206 | | struct expr *left; |
207 | | struct expr *right; |
208 | | bool left_matched; |
209 | | }; |
210 | | value_ind_t value_ind; /* E_VALUE */ |
211 | | char *ident; /* E_VAR */ |
212 | | struct { /* E_APP */ |
213 | | const struct func *func; |
214 | | struct expr **args; |
215 | | /* If fold is true, replace this function invocation |
216 | | * with its value after the first time we evaluate this |
217 | | * expression */ |
218 | | bool fold; |
219 | | }; |
220 | | }; |
221 | | }; |
222 | | |
223 | | struct locpath_trace { |
224 | | unsigned int maxns; |
225 | | struct nodeset **ns; |
226 | | struct locpath *lp; |
227 | | }; |
228 | | |
229 | | /* Internal state of the evaluator/parser */ |
230 | | struct state { |
231 | | pathx_errcode_t errcode; |
232 | | const char *file; |
233 | | int line; |
234 | | char *errmsg; |
235 | | |
236 | | const char *txt; /* Entire expression */ |
237 | | const char *pos; /* Current position within TXT during parsing */ |
238 | | |
239 | | struct tree *ctx; /* The current node */ |
240 | | uint ctx_pos; |
241 | | uint ctx_len; |
242 | | |
243 | | struct tree *root_ctx; /* Root context for relative paths */ |
244 | | |
245 | | /* A table of all values. The table is dynamically reallocated, i.e. |
246 | | * pointers to struct value should not be used across calls that |
247 | | * might allocate new values |
248 | | * |
249 | | * value_pool[0] is always the boolean false, and value_pool[1] |
250 | | * always the boolean true |
251 | | */ |
252 | | struct value *value_pool; |
253 | | value_ind_t value_pool_used; |
254 | | value_ind_t value_pool_size; |
255 | | /* Stack of values (as indices into value_pool), with bottom of |
256 | | stack in values[0] */ |
257 | | value_ind_t *values; |
258 | | size_t values_used; |
259 | | size_t values_size; |
260 | | /* Stack of expressions, with bottom of stack in exprs[0] */ |
261 | | struct expr **exprs; |
262 | | size_t exprs_used; |
263 | | size_t exprs_size; |
264 | | /* Trace of a locpath evaluation, needed by pathx_expand_tree. |
265 | | Generally NULL, unless a trace is needed. |
266 | | */ |
267 | | struct locpath_trace *locpath_trace; |
268 | | /* Symbol table for variable lookups */ |
269 | | struct pathx_symtab *symtab; |
270 | | /* Error structure, used to communicate errors to struct augeas; |
271 | | * we never own this structure, and therefore never free it */ |
272 | | struct error *error; |
273 | | /* If a filter-expression contains the 'else' operator, we need |
274 | | * we need to evaluate the filter twice. The has_else flag |
275 | | * means we don't do this unless we really need to */ |
276 | | bool has_else; |
277 | | }; |
278 | | |
279 | | /* We consider NULL and the empty string to be equal */ |
280 | | ATTRIBUTE_PURE |
281 | 1.20M | static inline int streqx(const char *s1, const char *s2) { |
282 | 1.20M | if (s1 == NULL) |
283 | 3 | return (s2 == NULL || strlen(s2) == 0); |
284 | 1.20M | if (s2 == NULL) |
285 | 77 | return strlen(s1) == 0; |
286 | 1.20M | return STREQ(s1, s2); |
287 | 1.20M | } |
288 | | |
289 | | /* Functions */ |
290 | | |
291 | | typedef void (*func_impl_t)(struct state *state, int nargs); |
292 | | |
293 | | struct func { |
294 | | const char *name; |
295 | | unsigned int arity; |
296 | | enum type type; |
297 | | bool pure; /* Result only depends on args */ |
298 | | const enum type *arg_types; |
299 | | func_impl_t impl; |
300 | | }; |
301 | | |
302 | | static void func_last(struct state *state, int nargs); |
303 | | static void func_position(struct state *state, int nargs); |
304 | | static void func_count(struct state *state, int nargs); |
305 | | static void func_label(struct state *state, int nargs); |
306 | | static void func_regexp(struct state *state, int nargs); |
307 | | static void func_regexp_flag(struct state *state, int nargs); |
308 | | static void func_glob(struct state *state, int nargs); |
309 | | static void func_int(struct state *state, int nargs); |
310 | | static void func_not(struct state *state, int nargs); |
311 | | static void func_modified(struct state *state, int nargs); |
312 | | |
313 | | static const enum type arg_types_nodeset[] = { T_NODESET }; |
314 | | static const enum type arg_types_string[] = { T_STRING }; |
315 | | static const enum type arg_types_bool[] = { T_BOOLEAN }; |
316 | | static const enum type arg_types_string_string[] = { T_STRING, T_STRING }; |
317 | | static const enum type arg_types_nodeset_string[] = { T_NODESET, T_STRING }; |
318 | | |
319 | | static const struct func builtin_funcs[] = { |
320 | | { .name = "last", .arity = 0, .type = T_NUMBER, .arg_types = NULL, |
321 | | .impl = func_last, .pure = false }, |
322 | | { .name = "position", .arity = 0, .type = T_NUMBER, .arg_types = NULL, |
323 | | .impl = func_position, .pure = false }, |
324 | | { .name = "label", .arity = 0, .type = T_STRING, .arg_types = NULL, |
325 | | .impl = func_label, .pure = false }, |
326 | | { .name = "count", .arity = 1, .type = T_NUMBER, |
327 | | .arg_types = arg_types_nodeset, |
328 | | .impl = func_count, .pure = false }, |
329 | | { .name = "regexp", .arity = 1, .type = T_REGEXP, |
330 | | .arg_types = arg_types_string, |
331 | | .impl = func_regexp, .pure = true }, |
332 | | { .name = "regexp", .arity = 1, .type = T_REGEXP, |
333 | | .arg_types = arg_types_nodeset, |
334 | | .impl = func_regexp, .pure = true }, |
335 | | { .name = "regexp", .arity = 2, .type = T_REGEXP, |
336 | | .arg_types = arg_types_string_string, |
337 | | .impl = func_regexp_flag, .pure = true }, |
338 | | { .name = "regexp", .arity = 2, .type = T_REGEXP, |
339 | | .arg_types = arg_types_nodeset_string, |
340 | | .impl = func_regexp_flag, .pure = true }, |
341 | | { .name = "glob", .arity = 1, .type = T_REGEXP, |
342 | | .arg_types = arg_types_string, |
343 | | .impl = func_glob, .pure = true }, |
344 | | { .name = "glob", .arity = 1, .type = T_REGEXP, |
345 | | .arg_types = arg_types_nodeset, |
346 | | .impl = func_glob, .pure = true }, |
347 | | { .name = "int", .arity = 1, .type = T_NUMBER, |
348 | | .arg_types = arg_types_string, .impl = func_int, .pure = false }, |
349 | | { .name = "int", .arity = 1, .type = T_NUMBER, |
350 | | .arg_types = arg_types_nodeset, .impl = func_int, .pure = false }, |
351 | | { .name = "int", .arity = 1, .type = T_NUMBER, |
352 | | .arg_types = arg_types_bool, .impl = func_int, .pure = false }, |
353 | | { .name = "modified", .arity = 0, .type = T_BOOLEAN, |
354 | | .arg_types = NULL, .impl = func_modified, .pure = false }, |
355 | | { .name = "not", .arity = 1, .type = T_BOOLEAN, |
356 | | .arg_types = arg_types_bool, .impl = func_not, .pure = true } |
357 | | }; |
358 | | |
359 | | #define RET_ON_ERROR \ |
360 | 113M | if (state->errcode != PATHX_NOERROR) return |
361 | | |
362 | | #define RET0_ON_ERROR \ |
363 | 8.15M | if (state->errcode != PATHX_NOERROR) return 0 |
364 | | |
365 | | #define STATE_ERROR(state, err) \ |
366 | 1.58k | do { \ |
367 | 1.58k | state->errcode = err; \ |
368 | 1.58k | state->file = __FILE__; \ |
369 | 1.58k | state->line = __LINE__; \ |
370 | 1.58k | } while (0) |
371 | | |
372 | 40.1M | #define HAS_ERROR(state) (state->errcode != PATHX_NOERROR) |
373 | | |
374 | 0 | #define STATE_ENOMEM STATE_ERROR(state, PATHX_ENOMEM) |
375 | | |
376 | | /* |
377 | | * Free the various data structures |
378 | | */ |
379 | | |
380 | | static void free_expr(struct expr *expr); |
381 | | |
382 | 8.55M | static void free_pred(struct pred *pred) { |
383 | 8.55M | if (pred == NULL) |
384 | 8.54M | return; |
385 | | |
386 | 4.09M | for (int i=0; i < pred->nexpr; i++) { |
387 | 4.09M | free_expr(pred->exprs[i]); |
388 | 4.09M | } |
389 | 8.36k | free(pred->exprs); |
390 | 8.36k | free(pred); |
391 | 8.36k | } |
392 | | |
393 | 322k | static void free_step(struct step *step) { |
394 | 483k | while (step != NULL) { |
395 | 161k | struct step *del = step; |
396 | 161k | step = del->next; |
397 | 161k | free(del->name); |
398 | 161k | free_pred(del->predicates); |
399 | 161k | free(del); |
400 | 161k | } |
401 | 322k | } |
402 | | |
403 | 4.26M | static void free_locpath(struct locpath *locpath) { |
404 | 4.26M | if (locpath == NULL) |
405 | 322k | return; |
406 | 8.23M | while (locpath->steps != NULL) { |
407 | 4.29M | struct step *step = locpath->steps; |
408 | 4.29M | locpath->steps = step->next; |
409 | 4.29M | free(step->name); |
410 | 4.29M | free_pred(step->predicates); |
411 | 4.29M | free(step); |
412 | 4.29M | } |
413 | 3.94M | free(locpath); |
414 | 3.94M | } |
415 | | |
416 | 20.6M | static void free_expr(struct expr *expr) { |
417 | 20.6M | if (expr == NULL) |
418 | 4.10M | return; |
419 | 16.5M | switch (expr->tag) { |
420 | 4.10M | case E_FILTER: |
421 | 4.10M | free_expr(expr->primary); |
422 | 4.10M | free_pred(expr->predicates); |
423 | 4.10M | free_locpath(expr->locpath); |
424 | 4.10M | break; |
425 | 4.22M | case E_BINARY: |
426 | 4.22M | free_expr(expr->left); |
427 | 4.22M | free_expr(expr->right); |
428 | 4.22M | break; |
429 | 8.21M | case E_VALUE: |
430 | 8.21M | break; |
431 | 808 | case E_VAR: |
432 | 808 | free(expr->ident); |
433 | 808 | break; |
434 | 777 | case E_APP: |
435 | 1.84k | for (int i=0; i < expr->func->arity; i++) |
436 | 1.06k | free_expr(expr->args[i]); |
437 | 777 | free(expr->args); |
438 | 777 | break; |
439 | 0 | default: |
440 | 0 | assert(0); |
441 | 16.5M | } |
442 | 16.5M | free(expr); |
443 | 16.5M | } |
444 | | |
445 | 6.62M | static void free_nodeset(struct nodeset *ns) { |
446 | 6.62M | if (ns != NULL) { |
447 | 6.62M | free(ns->nodes); |
448 | 6.62M | free(ns); |
449 | 6.62M | } |
450 | 6.62M | } |
451 | | |
452 | | /* Free all objects used by VALUE, but not VALUE itself */ |
453 | 11.0M | static void release_value(struct value *v) { |
454 | 11.0M | if (v == NULL) |
455 | 4 | return; |
456 | | |
457 | 11.0M | switch (v->tag) { |
458 | 2.53M | case T_NODESET: |
459 | 2.53M | free_nodeset(v->nodeset); |
460 | 2.53M | break; |
461 | 3.32k | case T_STRING: |
462 | 3.32k | free(v->string); |
463 | 3.32k | break; |
464 | 206k | case T_BOOLEAN: |
465 | 8.45M | case T_NUMBER: |
466 | 8.45M | break; |
467 | 12.7k | case T_REGEXP: |
468 | 12.7k | unref(v->regexp, regexp); |
469 | 12.7k | break; |
470 | 12.7k | default: |
471 | 0 | assert(0); |
472 | 11.0M | } |
473 | 11.0M | } |
474 | | |
475 | 103k | static void free_state(struct state *state) { |
476 | 103k | if (state == NULL) |
477 | 0 | return; |
478 | | |
479 | 4.09M | for(int i=0; i < state->exprs_used; i++) |
480 | 3.99M | free_expr(state->exprs[i]); |
481 | 103k | free(state->exprs); |
482 | | |
483 | 11.1M | for(int i=0; i < state->value_pool_used; i++) |
484 | 11.0M | release_value(state->value_pool + i); |
485 | 103k | free(state->value_pool); |
486 | 103k | free(state->values); |
487 | 103k | free(state); |
488 | 103k | } |
489 | | |
490 | 103k | void free_pathx(struct pathx *pathx) { |
491 | 103k | if (pathx == NULL) |
492 | 307 | return; |
493 | 103k | free_state(pathx->state); |
494 | 103k | free(pathx); |
495 | 103k | } |
496 | | |
497 | | /* |
498 | | * Nodeset helpers |
499 | | */ |
500 | 6.43M | static struct nodeset *make_nodeset(struct state *state) { |
501 | 6.43M | struct nodeset *result; |
502 | 6.43M | if (ALLOC(result) < 0) |
503 | 6.43M | STATE_ENOMEM; |
504 | 6.43M | return result; |
505 | 6.43M | } |
506 | | |
507 | | /* Add NODE to NS if it is not in NS yet. This relies on the flag |
508 | | * NODE->ADDED and care must be taken that NS_CLEAR_ADDED is called on NS |
509 | | * as soon as we are done adding nodes to it. |
510 | | */ |
511 | | static void ns_add(struct nodeset *ns, struct tree *node, |
512 | 7.15M | struct state *state) { |
513 | 7.15M | if (node->added) |
514 | 4.23k | return; |
515 | 7.15M | if (ns->used >= ns->size) { |
516 | 4.91M | size_t size = 2 * ns->size; |
517 | 4.91M | if (size < 10) size = 10; |
518 | 4.91M | if (REALLOC_N(ns->nodes, size) < 0) |
519 | 4.91M | STATE_ENOMEM; |
520 | 4.91M | ns->size = size; |
521 | 4.91M | } |
522 | 7.15M | ns->nodes[ns->used] = node; |
523 | 7.15M | node->added = 1; |
524 | 7.15M | ns->used += 1; |
525 | 7.15M | } |
526 | | |
527 | 6.62M | static void ns_clear_added(struct nodeset *ns) { |
528 | 18.5M | for (int i=0; i < ns->used; i++) |
529 | 11.9M | ns->nodes[i]->added = 0; |
530 | 6.62M | } |
531 | | |
532 | | static struct nodeset * |
533 | | clone_nodeset(struct nodeset *ns, struct state *state) |
534 | 193k | { |
535 | 193k | struct nodeset *clone; |
536 | 193k | if (ALLOC(clone) < 0) { |
537 | 0 | STATE_ENOMEM; |
538 | 0 | return NULL; |
539 | 0 | } |
540 | 193k | if (ALLOC_N(clone->nodes, ns->used) < 0) { |
541 | 0 | free(clone); |
542 | 0 | STATE_ENOMEM; |
543 | 0 | return NULL; |
544 | 0 | } |
545 | 193k | clone->used = ns->used; |
546 | 193k | clone->size = ns->used; |
547 | 4.94M | for (int i=0; i < ns->used; i++) |
548 | 4.75M | clone->nodes[i] = ns->nodes[i]; |
549 | 193k | return clone; |
550 | 193k | } |
551 | | |
552 | | /* |
553 | | * Handling values |
554 | | */ |
555 | 10.7M | static value_ind_t make_value(enum type tag, struct state *state) { |
556 | 10.7M | assert(tag != T_BOOLEAN); |
557 | | |
558 | 10.7M | if (state->value_pool_used >= state->value_pool_size) { |
559 | 5.91k | value_ind_t new_size = 2*state->value_pool_size; |
560 | 5.91k | if (new_size <= state->value_pool_size) { |
561 | 0 | STATE_ENOMEM; |
562 | 0 | return 0; |
563 | 0 | } |
564 | 5.91k | if (REALLOC_N(state->value_pool, new_size) < 0) { |
565 | 0 | STATE_ENOMEM; |
566 | 0 | return 0; |
567 | 0 | } |
568 | 5.91k | state->value_pool_size = new_size; |
569 | 5.91k | } |
570 | 10.7M | state->value_pool[state->value_pool_used].tag = tag; |
571 | 10.7M | state->value_pool[state->value_pool_used].nodeset = NULL; |
572 | 10.7M | return state->value_pool_used++; |
573 | 10.7M | } |
574 | | |
575 | 0 | static value_ind_t clone_value(struct value *v, struct state *state) { |
576 | 0 | value_ind_t vind = make_value(v->tag, state); |
577 | 0 | RET0_ON_ERROR; |
578 | 0 | struct value *clone = state->value_pool + vind; |
579 | |
|
580 | 0 | switch (v->tag) { |
581 | 0 | case T_NODESET: |
582 | 0 | clone->nodeset = clone_nodeset(v->nodeset, state); |
583 | 0 | break; |
584 | 0 | case T_NUMBER: |
585 | 0 | clone->number = v->number; |
586 | 0 | break; |
587 | 0 | case T_STRING: |
588 | 0 | clone->string = strdup(v->string); |
589 | 0 | if (clone->string == NULL) { |
590 | 0 | STATE_ENOMEM; |
591 | 0 | } |
592 | 0 | break; |
593 | 0 | case T_BOOLEAN: |
594 | 0 | clone->boolval = v->boolval; |
595 | 0 | break; |
596 | 0 | case T_REGEXP: |
597 | 0 | clone->regexp = ref(v->regexp); |
598 | 0 | break; |
599 | 0 | default: |
600 | 0 | assert(0); |
601 | 0 | } |
602 | 0 | return vind; |
603 | 0 | } |
604 | | |
605 | 6.46M | static value_ind_t pop_value_ind(struct state *state) { |
606 | 6.46M | if (state->values_used > 0) { |
607 | 6.46M | state->values_used -= 1; |
608 | 6.46M | return state->values[state->values_used]; |
609 | 6.46M | } else { |
610 | 0 | STATE_ERROR(state, PATHX_EINTERNAL); |
611 | 0 | assert(0); |
612 | 0 | return 0; |
613 | 0 | } |
614 | 6.46M | } |
615 | | |
616 | 6.46M | static struct value *pop_value(struct state *state) { |
617 | 6.46M | value_ind_t vind = pop_value_ind(state); |
618 | 6.46M | if (HAS_ERROR(state)) |
619 | 0 | return NULL; |
620 | 6.46M | return state->value_pool + vind; |
621 | 6.46M | } |
622 | | |
623 | 6.46M | static void push_value(value_ind_t vind, struct state *state) { |
624 | 6.46M | if (state->values_used >= state->values_size) { |
625 | 101k | size_t new_size = 2*state->values_size; |
626 | 101k | if (new_size == 0) new_size = 8; |
627 | 101k | if (REALLOC_N(state->values, new_size) < 0) { |
628 | 0 | STATE_ENOMEM; |
629 | 0 | return; |
630 | 0 | } |
631 | 101k | state->values_size = new_size; |
632 | 101k | } |
633 | 6.46M | state->values[state->values_used++] = vind; |
634 | 6.46M | } |
635 | | |
636 | 1.98M | static void push_boolean_value(int b, struct state *state) { |
637 | 1.98M | push_value(b != 0, state); |
638 | 1.98M | } |
639 | | |
640 | | ATTRIBUTE_PURE |
641 | 9.32M | static struct value *expr_value(struct expr *expr, struct state *state) { |
642 | 9.32M | return state->value_pool + expr->value_ind; |
643 | 9.32M | } |
644 | | |
645 | | /************************************************************************* |
646 | | * Evaluation |
647 | | ************************************************************************/ |
648 | | static void eval_expr(struct expr *expr, struct state *state); |
649 | | |
650 | | |
651 | | #define ensure_arity(min, max) \ |
652 | 52.4k | if (nargs < min || nargs > max) { \ |
653 | 0 | STATE_ERROR(state, PATHX_EINTERNAL); \ |
654 | 0 | return; \ |
655 | 0 | } |
656 | | |
657 | 0 | static void func_last(struct state *state, int nargs) { |
658 | 0 | ensure_arity(0, 0); |
659 | 0 | value_ind_t vind = make_value(T_NUMBER, state); |
660 | 0 | RET_ON_ERROR; |
661 | | |
662 | 0 | state->value_pool[vind].number = state->ctx_len; |
663 | 0 | push_value(vind, state); |
664 | 0 | } |
665 | | |
666 | 0 | static void func_position(struct state *state, int nargs) { |
667 | 0 | ensure_arity(0, 0); |
668 | 0 | value_ind_t vind = make_value(T_NUMBER, state); |
669 | 0 | RET_ON_ERROR; |
670 | | |
671 | 0 | state->value_pool[vind].number = state->ctx_pos; |
672 | 0 | push_value(vind, state); |
673 | 0 | } |
674 | | |
675 | 0 | static void func_count(struct state *state, int nargs) { |
676 | 0 | ensure_arity(1, 1); |
677 | 0 | value_ind_t vind = make_value(T_NUMBER, state); |
678 | 0 | RET_ON_ERROR; |
679 | | |
680 | 0 | struct value *ns = pop_value(state); |
681 | 0 | state->value_pool[vind].number = ns->nodeset->used; |
682 | 0 | push_value(vind, state); |
683 | 0 | } |
684 | | |
685 | 0 | static void func_label(struct state *state, int nargs) { |
686 | 0 | ensure_arity(0, 0); |
687 | 0 | value_ind_t vind = make_value(T_STRING, state); |
688 | 0 | char *s; |
689 | |
|
690 | 0 | RET_ON_ERROR; |
691 | | |
692 | 0 | if (state->ctx->label) |
693 | 0 | s = strdup(state->ctx->label); |
694 | 0 | else |
695 | 0 | s = strdup(""); |
696 | 0 | if (s == NULL) { |
697 | 0 | STATE_ENOMEM; |
698 | 0 | return; |
699 | 0 | } |
700 | 0 | state->value_pool[vind].string = s; |
701 | 0 | push_value(vind, state); |
702 | 0 | } |
703 | | |
704 | 39.7k | static void func_int(struct state *state, int nargs) { |
705 | 39.7k | ensure_arity(1, 1); |
706 | 39.7k | value_ind_t vind = make_value(T_NUMBER, state); |
707 | 39.7k | int64_t i = -1; |
708 | 39.7k | RET_ON_ERROR; |
709 | | |
710 | 39.7k | struct value *v = pop_value(state); |
711 | 39.7k | if (v->tag == T_BOOLEAN) { |
712 | 39.7k | i = v->boolval; |
713 | 39.7k | } else { |
714 | 0 | const char *s = NULL; |
715 | 0 | if (v->tag == T_STRING) { |
716 | 0 | s = v->string; |
717 | 0 | } else { |
718 | | /* T_NODESET */ |
719 | 0 | if (v->nodeset->used != 1) { |
720 | 0 | STATE_ERROR(state, PATHX_EMMATCH); |
721 | 0 | return; |
722 | 0 | } |
723 | 0 | s = v->nodeset->nodes[0]->value; |
724 | 0 | } |
725 | 0 | if (s != NULL) { |
726 | 0 | int r; |
727 | 0 | r = xstrtoint64(s, 10, &i); |
728 | 0 | if (r < 0) { |
729 | 0 | STATE_ERROR(state, PATHX_ENUMBER); |
730 | 0 | return; |
731 | 0 | } |
732 | 0 | } |
733 | 0 | } |
734 | 39.7k | state->value_pool[vind].number = i; |
735 | 39.7k | push_value(vind, state); |
736 | 39.7k | } |
737 | | |
738 | 0 | static void func_modified(struct state *state, int nargs) { |
739 | 0 | ensure_arity(0, 0); |
740 | |
|
741 | 0 | push_boolean_value(state->ctx->dirty , state); |
742 | 0 | } |
743 | | |
744 | 0 | static void func_not(struct state *state, int nargs) { |
745 | 0 | ensure_arity(1, 1); |
746 | 0 | RET_ON_ERROR; |
747 | | |
748 | 0 | struct value *v = pop_value(state); |
749 | 0 | if (v->tag == T_BOOLEAN) { |
750 | 0 | push_boolean_value(! v->boolval, state); |
751 | 0 | } |
752 | 0 | } |
753 | | |
754 | | static struct regexp * |
755 | 12.5k | nodeset_as_regexp(struct info *info, struct nodeset *ns, int glob, int nocase) { |
756 | 12.5k | struct regexp *result = NULL; |
757 | 12.5k | struct regexp **rx = NULL; |
758 | 12.5k | int used = 0; |
759 | | |
760 | 18.5k | for (int i = 0; i < ns->used; i++) { |
761 | 6.04k | if (ns->nodes[i]->value != NULL) |
762 | 2.22k | used += 1; |
763 | 6.04k | } |
764 | | |
765 | 12.5k | if (used == 0) { |
766 | | /* If the nodeset is empty, make sure we produce a regexp |
767 | | * that never matches anything */ |
768 | 12.2k | result = make_regexp_unescape(info, "[^\001-\7ff]", nocase); |
769 | 12.2k | } else { |
770 | 325 | if (ALLOC_N(rx, ns->used) < 0) |
771 | 0 | goto error; |
772 | 4.92k | for (int i=0; i < ns->used; i++) { |
773 | 4.60k | if (ns->nodes[i]->value == NULL) |
774 | 2.37k | continue; |
775 | | |
776 | 2.22k | if (glob) |
777 | 0 | rx[i] = make_regexp_from_glob(info, ns->nodes[i]->value); |
778 | 2.22k | else |
779 | 2.22k | rx[i] = make_regexp_unescape(info, ns->nodes[i]->value, 0); |
780 | 2.22k | if (rx[i] == NULL) |
781 | 0 | goto error; |
782 | 2.22k | } |
783 | 325 | result = regexp_union_n(info, ns->used, rx); |
784 | 325 | } |
785 | | |
786 | 12.5k | error: |
787 | 12.5k | if (rx != NULL) { |
788 | 4.92k | for (int i=0; i < ns->used; i++) |
789 | 4.60k | unref(rx[i], regexp); |
790 | 325 | free(rx); |
791 | 325 | } |
792 | 12.5k | return result; |
793 | 12.5k | } |
794 | | |
795 | 12.7k | static void func_regexp_or_glob(struct state *state, int glob, int nocase) { |
796 | 12.7k | value_ind_t vind = make_value(T_REGEXP, state); |
797 | 12.7k | int r; |
798 | | |
799 | 12.7k | RET_ON_ERROR; |
800 | | |
801 | 12.7k | struct value *v = pop_value(state); |
802 | 12.7k | struct regexp *rx = NULL; |
803 | | |
804 | 12.7k | if (v->tag == T_STRING) { |
805 | 198 | if (glob) |
806 | 0 | rx = make_regexp_from_glob(state->error->info, v->string); |
807 | 198 | else |
808 | 198 | rx = make_regexp_unescape(state->error->info, v->string, nocase); |
809 | 12.5k | } else if (v->tag == T_NODESET) { |
810 | 12.5k | rx = nodeset_as_regexp(state->error->info, v->nodeset, glob, nocase); |
811 | 12.5k | } else { |
812 | 0 | assert(0); |
813 | 0 | } |
814 | | |
815 | 12.7k | if (rx == NULL) { |
816 | 0 | STATE_ENOMEM; |
817 | 0 | return; |
818 | 0 | } |
819 | | |
820 | 12.7k | state->value_pool[vind].regexp = rx; |
821 | 12.7k | r = regexp_compile(rx); |
822 | 12.7k | if (r < 0) { |
823 | 27 | const char *msg; |
824 | 27 | regexp_check(rx, &msg); |
825 | 27 | state->errmsg = strdup(msg); |
826 | 27 | STATE_ERROR(state, PATHX_EREGEXP); |
827 | 27 | return; |
828 | 27 | } |
829 | 12.7k | push_value(vind, state); |
830 | 12.7k | } |
831 | | |
832 | 3.10k | static void func_regexp(struct state *state, int nargs) { |
833 | 3.10k | ensure_arity(1, 1); |
834 | 3.10k | func_regexp_or_glob(state, 0, 0); |
835 | 3.10k | } |
836 | | |
837 | 9.62k | static void func_regexp_flag(struct state *state, int nargs) { |
838 | 9.62k | ensure_arity(2, 2); |
839 | 9.62k | int nocase = 0; |
840 | 9.62k | struct value *f = pop_value(state); |
841 | | |
842 | 9.62k | if (STREQ("i", f->string)) |
843 | 9.62k | nocase = 1; |
844 | 0 | else |
845 | 0 | STATE_ERROR(state, PATHX_EREGEXPFLAG); |
846 | | |
847 | 9.62k | func_regexp_or_glob(state, 0, nocase); |
848 | 9.62k | } |
849 | | |
850 | 0 | static void func_glob(struct state *state, int nargs) { |
851 | 0 | ensure_arity(1, 1); |
852 | 0 | func_regexp_or_glob(state, 1, 0); |
853 | 0 | } |
854 | | |
855 | 78.9k | static bool coerce_to_bool(struct value *v) { |
856 | 78.9k | switch (v->tag) { |
857 | 39.4k | case T_NODESET: |
858 | 39.4k | return v->nodeset->used > 0; |
859 | 0 | break; |
860 | 39.4k | case T_BOOLEAN: |
861 | 39.4k | return v->boolval; |
862 | 0 | break; |
863 | 0 | case T_NUMBER: |
864 | 0 | return v->number > 0; |
865 | 0 | break; |
866 | 0 | case T_STRING: |
867 | 0 | return strlen(v->string) > 0; |
868 | 0 | break; |
869 | 0 | case T_REGEXP: |
870 | 0 | return true; |
871 | 0 | default: |
872 | 0 | assert(0); |
873 | 0 | return false; |
874 | 78.9k | } |
875 | 78.9k | } |
876 | | |
877 | | static int calc_eq_nodeset_nodeset(struct nodeset *ns1, struct nodeset *ns2, |
878 | 39.7k | int neq) { |
879 | 39.8k | for (int i1=0; i1 < ns1->used; i1++) { |
880 | 85 | struct tree *t1 = ns1->nodes[i1]; |
881 | 86 | for (int i2=0; i2 < ns2->used; i2++) { |
882 | 20 | struct tree *t2 = ns2->nodes[i2]; |
883 | 20 | if (neq) { |
884 | 20 | if (!streqx(t1->value, t2->value)) |
885 | 19 | return 1; |
886 | 20 | } else { |
887 | 0 | if (streqx(t1->value, t2->value)) |
888 | 0 | return 1; |
889 | 0 | } |
890 | 20 | } |
891 | 85 | } |
892 | 39.7k | return 0; |
893 | 39.7k | } |
894 | | |
895 | | static int calc_eq_nodeset_string(struct nodeset *ns, const char *s, |
896 | 0 | int neq) { |
897 | 0 | for (int i=0; i < ns->used; i++) { |
898 | 0 | struct tree *t = ns->nodes[i]; |
899 | 0 | if (neq) { |
900 | 0 | if (!streqx(t->value, s)) |
901 | 0 | return 1; |
902 | 0 | } else { |
903 | 0 | if (streqx(t->value, s)) |
904 | 0 | return 1; |
905 | 0 | } |
906 | 0 | } |
907 | 0 | return 0; |
908 | 0 | } |
909 | | |
910 | 39.7k | static void eval_eq(struct state *state, int neq) { |
911 | 39.7k | struct value *r = pop_value(state); |
912 | 39.7k | struct value *l = pop_value(state); |
913 | 39.7k | int res; |
914 | | |
915 | 39.7k | if (l->tag == T_NODESET && r->tag == T_NODESET) { |
916 | 39.7k | res = calc_eq_nodeset_nodeset(l->nodeset, r->nodeset, neq); |
917 | 39.7k | } else if (l->tag == T_NODESET) { |
918 | 0 | res = calc_eq_nodeset_string(l->nodeset, r->string, neq); |
919 | 0 | } else if (r->tag == T_NODESET) { |
920 | 0 | res = calc_eq_nodeset_string(r->nodeset, l->string, neq); |
921 | 0 | } else if (l->tag == T_NUMBER && r->tag == T_NUMBER) { |
922 | 0 | if (neq) |
923 | 0 | res = (l->number != r->number); |
924 | 0 | else |
925 | 0 | res = (l->number == r->number); |
926 | 0 | } else { |
927 | 0 | assert(l->tag == T_STRING); |
928 | 0 | assert(r->tag == T_STRING); |
929 | 0 | res = streqx(l->string, r->string); |
930 | 0 | if (neq) |
931 | 0 | res = !res; |
932 | 0 | } |
933 | 39.7k | RET_ON_ERROR; |
934 | | |
935 | 39.7k | push_boolean_value(res, state); |
936 | 39.7k | } |
937 | | |
938 | 500 | static void eval_arith(struct state *state, enum binary_op op) { |
939 | 500 | value_ind_t vind = make_value(T_NUMBER, state); |
940 | 500 | struct value *r = pop_value(state); |
941 | 500 | struct value *l = pop_value(state); |
942 | 500 | int res; |
943 | | |
944 | 500 | assert(l->tag == T_NUMBER); |
945 | 500 | assert(r->tag == T_NUMBER); |
946 | | |
947 | 500 | RET_ON_ERROR; |
948 | | |
949 | 500 | if (op == OP_PLUS) |
950 | 0 | res = l->number + r->number; |
951 | 500 | else if (op == OP_MINUS) |
952 | 0 | res = l->number - r->number; |
953 | 500 | else if (op == OP_STAR) |
954 | 500 | res = l->number * r->number; |
955 | 0 | else |
956 | 0 | assert(0); |
957 | | |
958 | 500 | state->value_pool[vind].number = res; |
959 | 500 | push_value(vind, state); |
960 | 500 | } |
961 | | |
962 | 0 | static void eval_rel(struct state *state, bool greater, bool equal) { |
963 | 0 | struct value *r, *l; |
964 | 0 | int res; |
965 | | |
966 | | /* We always check l < r or l <= r */ |
967 | 0 | if (greater) { |
968 | 0 | l = pop_value(state); |
969 | 0 | r = pop_value(state); |
970 | 0 | } else { |
971 | 0 | r = pop_value(state); |
972 | 0 | l = pop_value(state); |
973 | 0 | } |
974 | 0 | if (l->tag == T_NUMBER) { |
975 | 0 | if (equal) |
976 | 0 | res = (l->number <= r->number); |
977 | 0 | else |
978 | 0 | res = (l->number < r->number); |
979 | 0 | } else if (l->tag == T_STRING) { |
980 | 0 | int cmp = strcmp(l->string, r->string); |
981 | 0 | if (equal) |
982 | 0 | res = cmp <= 0; |
983 | 0 | else |
984 | 0 | res = cmp < 0; |
985 | 0 | } else { |
986 | 0 | assert(0); |
987 | 0 | } |
988 | | |
989 | 0 | push_boolean_value(res, state); |
990 | 0 | } |
991 | | |
992 | 20 | static void eval_and_or(struct state *state, enum binary_op op) { |
993 | 20 | struct value *r = pop_value(state); |
994 | 20 | struct value *l = pop_value(state); |
995 | 20 | bool left = coerce_to_bool(l); |
996 | 20 | bool right = coerce_to_bool(r); |
997 | | |
998 | | |
999 | 20 | if (op == OP_AND) |
1000 | 20 | push_boolean_value(left && right, state); |
1001 | 0 | else |
1002 | 0 | push_boolean_value(left || right, state); |
1003 | 20 | } |
1004 | | |
1005 | 39.5k | static void eval_else(struct state *state, struct expr *expr, struct locpath_trace *lpt_right) { |
1006 | 39.5k | struct value *r = pop_value(state); |
1007 | 39.5k | struct value *l = pop_value(state); |
1008 | | |
1009 | 39.5k | if ( l->tag == T_NODESET && r->tag == T_NODESET ) { |
1010 | 73 | int discard_maxns=0; |
1011 | 73 | struct nodeset **discard_ns=NULL; |
1012 | 73 | struct locpath_trace *lpt = state->locpath_trace; |
1013 | 73 | value_ind_t vind = make_value(T_NODESET, state); |
1014 | 73 | if (l->nodeset->used >0 || expr->left_matched) { |
1015 | 40 | expr->left_matched = 1; |
1016 | 40 | state->value_pool[vind].nodeset = clone_nodeset(l->nodeset, state); |
1017 | 40 | if( lpt_right != NULL ) { |
1018 | 40 | discard_maxns = lpt_right->maxns; |
1019 | 40 | discard_ns = lpt_right->ns; |
1020 | 40 | } |
1021 | 40 | } else { |
1022 | 33 | state->value_pool[vind].nodeset = clone_nodeset(r->nodeset, state); |
1023 | 33 | if( lpt != NULL && lpt_right != NULL ) { |
1024 | 0 | discard_maxns = lpt->maxns; |
1025 | 0 | discard_ns = lpt->ns; |
1026 | 0 | lpt->maxns = lpt_right->maxns; |
1027 | 0 | lpt->ns = lpt_right->ns; |
1028 | 0 | lpt->lp = lpt_right->lp; |
1029 | 0 | } |
1030 | 33 | } |
1031 | 73 | push_value(vind, state); |
1032 | 73 | if ( lpt != NULL && lpt_right != NULL ) { |
1033 | 0 | for (int i=0; i < discard_maxns; i++) |
1034 | 0 | free_nodeset(discard_ns[i]); |
1035 | 0 | FREE(discard_ns); |
1036 | 0 | } |
1037 | 39.4k | } else { |
1038 | 39.4k | bool left = coerce_to_bool(l); |
1039 | 39.4k | bool right = coerce_to_bool(r); |
1040 | | |
1041 | 39.4k | expr->left_matched = expr->left_matched || left; |
1042 | 39.4k | if (expr->left_matched) { |
1043 | | /* One or more LHS have matched, so we're not interested in the right expr */ |
1044 | 0 | push_boolean_value(left, state); |
1045 | 39.4k | } else { |
1046 | | /* no LHS has matched (yet), so keep the right expr */ |
1047 | | /* If this is the 2nd pass, and expr->left_matched is true, no RHS nodes will be included */ |
1048 | 39.4k | push_boolean_value(right, state); |
1049 | 39.4k | } |
1050 | 39.4k | } |
1051 | 39.5k | } |
1052 | | |
1053 | | static bool eval_re_match_str(struct state *state, struct regexp *rx, |
1054 | 1.97M | const char *str) { |
1055 | 1.97M | int r; |
1056 | | |
1057 | 1.97M | if (str == NULL) |
1058 | 1.55M | str = ""; |
1059 | | |
1060 | 1.97M | r = regexp_match(rx, str, strlen(str), 0, NULL); |
1061 | 1.97M | if (r == -2) { |
1062 | 0 | STATE_ERROR(state, PATHX_EINTERNAL); |
1063 | 1.97M | } else if (r == -3) { |
1064 | | /* We should never get this far; func_regexp should catch |
1065 | | * invalid regexps */ |
1066 | 0 | assert(false); |
1067 | 0 | } |
1068 | 1.97M | return r == strlen(str); |
1069 | 1.97M | } |
1070 | | |
1071 | 193k | static void eval_union(struct state *state, struct locpath_trace *lpt_right) { |
1072 | 193k | value_ind_t vind = make_value(T_NODESET, state); |
1073 | 193k | struct value *r = pop_value(state); |
1074 | 193k | struct value *l = pop_value(state); |
1075 | 193k | struct nodeset *res = NULL; |
1076 | 193k | struct locpath_trace *lpt = state->locpath_trace; |
1077 | | |
1078 | 193k | assert(l->tag == T_NODESET); |
1079 | 193k | assert(r->tag == T_NODESET); |
1080 | | |
1081 | 193k | RET_ON_ERROR; |
1082 | | |
1083 | 193k | res = clone_nodeset(l->nodeset, state); |
1084 | 193k | RET_ON_ERROR; |
1085 | 336k | for (int i=0; i < r->nodeset->used; i++) { |
1086 | 142k | ns_add(res, r->nodeset->nodes[i], state); |
1087 | 142k | if (HAS_ERROR(state)) |
1088 | 0 | goto error; |
1089 | 142k | } |
1090 | 193k | state->value_pool[vind].nodeset = res; |
1091 | 193k | push_value(vind, state); |
1092 | | |
1093 | 193k | if( lpt != NULL && lpt_right != NULL ) { |
1094 | 0 | STATE_ERROR(state, PATHX_EMMATCH); |
1095 | 0 | for (int i=0; i < lpt_right->maxns; i++) |
1096 | 0 | free_nodeset(lpt_right->ns[i]); |
1097 | 0 | FREE(lpt_right->ns); |
1098 | 0 | } |
1099 | 193k | error: |
1100 | 193k | ns_clear_added(res); |
1101 | 193k | } |
1102 | | |
1103 | 0 | static void eval_concat_string(struct state *state) { |
1104 | 0 | value_ind_t vind = make_value(T_STRING, state); |
1105 | 0 | struct value *r = pop_value(state); |
1106 | 0 | struct value *l = pop_value(state); |
1107 | 0 | char *res = NULL; |
1108 | |
|
1109 | 0 | RET_ON_ERROR; |
1110 | | |
1111 | 0 | if (ALLOC_N(res, strlen(l->string) + strlen(r->string) + 1) < 0) { |
1112 | 0 | STATE_ENOMEM; |
1113 | 0 | return; |
1114 | 0 | } |
1115 | 0 | strcpy(res, l->string); |
1116 | 0 | strcat(res, r->string); |
1117 | 0 | state->value_pool[vind].string = res; |
1118 | 0 | push_value(vind, state); |
1119 | 0 | } |
1120 | | |
1121 | 0 | static void eval_concat_regexp(struct state *state) { |
1122 | 0 | value_ind_t vind = make_value(T_REGEXP, state); |
1123 | 0 | struct value *r = pop_value(state); |
1124 | 0 | struct value *l = pop_value(state); |
1125 | 0 | struct regexp *rx = NULL; |
1126 | |
|
1127 | 0 | RET_ON_ERROR; |
1128 | | |
1129 | 0 | rx = regexp_concat(state->error->info, l->regexp, r->regexp); |
1130 | 0 | if (rx == NULL) { |
1131 | 0 | STATE_ENOMEM; |
1132 | 0 | return; |
1133 | 0 | } |
1134 | | |
1135 | 0 | state->value_pool[vind].regexp = rx; |
1136 | 0 | push_value(vind, state); |
1137 | 0 | } |
1138 | | |
1139 | 1.90M | static void eval_re_match(struct state *state, enum binary_op op) { |
1140 | 1.90M | struct value *rx = pop_value(state); |
1141 | 1.90M | struct value *v = pop_value(state); |
1142 | | |
1143 | 1.90M | bool result = false; |
1144 | | |
1145 | 1.90M | if (v->tag == T_STRING) { |
1146 | 32 | result = eval_re_match_str(state, rx->regexp, v->string); |
1147 | 32 | RET_ON_ERROR; |
1148 | 1.90M | } else if (v->tag == T_NODESET) { |
1149 | 3.87M | for (int i=0; i < v->nodeset->used && result == false; i++) { |
1150 | 1.97M | struct tree *t = v->nodeset->nodes[i]; |
1151 | 1.97M | result = eval_re_match_str(state, rx->regexp, t->value); |
1152 | 1.97M | RET_ON_ERROR; |
1153 | 1.97M | } |
1154 | 1.90M | } |
1155 | 1.90M | if (op == OP_RE_NOMATCH) |
1156 | 1.90M | result = !result; |
1157 | 1.90M | push_boolean_value(result, state); |
1158 | 1.90M | } |
1159 | | |
1160 | 2.17M | static void eval_binary(struct expr *expr, struct state *state) { |
1161 | 2.17M | struct locpath_trace *lpt = state->locpath_trace; |
1162 | 2.17M | struct locpath_trace lpt_right; |
1163 | | |
1164 | 2.17M | eval_expr(expr->left, state); |
1165 | 2.17M | if ( lpt != NULL && expr->type == T_NODESET ) { |
1166 | 0 | MEMZERO(&lpt_right, 1); |
1167 | 0 | state->locpath_trace = &lpt_right; |
1168 | 0 | } |
1169 | 2.17M | eval_expr(expr->right, state); |
1170 | 2.17M | state->locpath_trace = lpt; |
1171 | 2.17M | RET_ON_ERROR; |
1172 | | |
1173 | 2.17M | switch (expr->op) { |
1174 | 39.7k | case OP_EQ: |
1175 | 39.7k | eval_eq(state, 0); |
1176 | 39.7k | break; |
1177 | 37 | case OP_NEQ: |
1178 | 37 | eval_eq(state, 1); |
1179 | 37 | break; |
1180 | 0 | case OP_LT: |
1181 | 0 | eval_rel(state, false, false); |
1182 | 0 | break; |
1183 | 0 | case OP_LE: |
1184 | 0 | eval_rel(state, false, true); |
1185 | 0 | break; |
1186 | 0 | case OP_GT: |
1187 | 0 | eval_rel(state, true, false); |
1188 | 0 | break; |
1189 | 0 | case OP_GE: |
1190 | 0 | eval_rel(state, true, true); |
1191 | 0 | break; |
1192 | 0 | case OP_PLUS: |
1193 | 0 | if (expr->type == T_NUMBER) |
1194 | 0 | eval_arith(state, expr->op); |
1195 | 0 | else if (expr->type == T_STRING) |
1196 | 0 | eval_concat_string(state); |
1197 | 0 | else if (expr->type == T_REGEXP) |
1198 | 0 | eval_concat_regexp(state); |
1199 | 0 | break; |
1200 | 0 | case OP_MINUS: |
1201 | 500 | case OP_STAR: |
1202 | 500 | eval_arith(state, expr->op); |
1203 | 500 | break; |
1204 | 20 | case OP_AND: |
1205 | 20 | case OP_OR: |
1206 | 20 | eval_and_or(state, expr->op); |
1207 | 20 | break; |
1208 | 39.5k | case OP_ELSE: |
1209 | 39.5k | eval_else(state, expr, &lpt_right); |
1210 | 39.5k | break; |
1211 | 193k | case OP_UNION: |
1212 | 193k | eval_union(state, &lpt_right); |
1213 | 193k | break; |
1214 | 2 | case OP_RE_MATCH: |
1215 | 1.90M | case OP_RE_NOMATCH: |
1216 | 1.90M | eval_re_match(state, expr->op); |
1217 | 1.90M | break; |
1218 | 0 | default: |
1219 | 0 | assert(0); |
1220 | 2.17M | } |
1221 | 2.17M | } |
1222 | | |
1223 | 52.4k | static void eval_app(struct expr *expr, struct state *state) { |
1224 | 52.4k | assert(expr->tag == E_APP); |
1225 | | |
1226 | 114k | for (int i=0; i < expr->func->arity; i++) { |
1227 | 62.0k | eval_expr(expr->args[i], state); |
1228 | 62.0k | RET_ON_ERROR; |
1229 | 62.0k | } |
1230 | 52.4k | expr->func->impl(state, expr->func->arity); |
1231 | 52.4k | } |
1232 | | |
1233 | 1.95M | static bool eval_pred(struct expr *expr, struct state *state) { |
1234 | 1.95M | eval_expr(expr, state); |
1235 | 1.95M | RET0_ON_ERROR; |
1236 | | |
1237 | 1.95M | struct value *v = pop_value(state); |
1238 | 1.95M | switch(v->tag) { |
1239 | 1.90M | case T_BOOLEAN: |
1240 | 1.90M | return v->boolval; |
1241 | 40.2k | case T_NUMBER: |
1242 | 40.2k | return (state->ctx_pos == v->number); |
1243 | 10.3k | case T_NODESET: |
1244 | 10.3k | return v->nodeset->used > 0; |
1245 | 0 | case T_STRING: |
1246 | 0 | return streqv(state->ctx->value, v->string); |
1247 | 0 | default: |
1248 | 0 | assert(0); |
1249 | 0 | return false; |
1250 | 1.95M | } |
1251 | 1.95M | } |
1252 | | |
1253 | | /* Remove COUNT successive entries from NS. The first entry to remove is at |
1254 | | IND */ |
1255 | 47.6k | static void ns_remove(struct nodeset *ns, int ind, int count) { |
1256 | 47.6k | if (count < 1) |
1257 | 0 | return; |
1258 | 47.6k | memmove(ns->nodes + ind, ns->nodes + ind+count, |
1259 | 47.6k | sizeof(ns->nodes[0]) * (ns->used - (ind+count))); |
1260 | 47.6k | ns->used -= count; |
1261 | 47.6k | } |
1262 | | |
1263 | | /* |
1264 | | * Remove all nodes from NS for which one of PRED is false |
1265 | | */ |
1266 | | static void ns_filter(struct nodeset *ns, struct pred *predicates, |
1267 | 4.08M | struct state *state) { |
1268 | 4.08M | if (predicates == NULL) |
1269 | 4.03M | return; |
1270 | | |
1271 | 51.0k | struct tree *old_ctx = state->ctx; |
1272 | 51.0k | uint old_ctx_len = state->ctx_len; |
1273 | 51.0k | uint old_ctx_pos = state->ctx_pos; |
1274 | | |
1275 | 102k | for (int p=0; p < predicates->nexpr; p++) { |
1276 | 51.0k | if ( state->has_else) { |
1277 | 1.01M | for (int i=0; i < ns->used; i++) { |
1278 | | /* 1st pass, check if any else statements have match on the left */ |
1279 | | /* Don't delete any nodes (yet) */ |
1280 | 970k | state->ctx = ns->nodes[i]; |
1281 | 970k | eval_pred(predicates->exprs[p], state); |
1282 | 970k | } |
1283 | 49.5k | } |
1284 | 51.0k | int first_bad = -1; /* The index of the first non-matching node */ |
1285 | 51.0k | state->ctx_len = ns->used; |
1286 | 51.0k | state->ctx_pos = 1; |
1287 | 1.03M | for (int i=0; i < ns->used; state->ctx_pos++) { |
1288 | 981k | state->ctx = ns->nodes[i]; |
1289 | 981k | bool match = eval_pred(predicates->exprs[p], state); |
1290 | 981k | RET_ON_ERROR; |
1291 | | /* We remove non-matching nodes from NS in batches; this logic |
1292 | | * makes sure that we only call ns_remove at the end of a run |
1293 | | * of non-matching nodes |
1294 | | */ |
1295 | 981k | if (match) { |
1296 | 48.3k | if (first_bad >= 0) { |
1297 | 18.0k | ns_remove(ns, first_bad, i - first_bad); |
1298 | 18.0k | i = first_bad + 1; |
1299 | 30.2k | } else { |
1300 | 30.2k | i += 1; |
1301 | 30.2k | } |
1302 | 48.3k | first_bad = -1; |
1303 | 933k | } else { |
1304 | 933k | if (first_bad == -1) |
1305 | 47.6k | first_bad = i; |
1306 | 933k | i += 1; |
1307 | 933k | } |
1308 | 981k | } |
1309 | 50.9k | if (first_bad >= 0) { |
1310 | 29.5k | ns_remove(ns, first_bad, ns->used - first_bad); |
1311 | 29.5k | } |
1312 | 50.9k | } |
1313 | | |
1314 | 50.9k | state->ctx = old_ctx; |
1315 | 50.9k | state->ctx_pos = old_ctx_pos; |
1316 | 50.9k | state->ctx_len = old_ctx_len; |
1317 | 50.9k | } |
1318 | | |
1319 | | /* Return true if PRED is solely the predicate '[n]' as in 'foo[17]' */ |
1320 | 4.09M | static bool position_pred(struct pred *pred) { |
1321 | 4.09M | return pred != NULL && |
1322 | 4.09M | pred->nexpr == 1 && |
1323 | 4.09M | pred->exprs[0]->tag == E_VALUE && |
1324 | 4.09M | pred->exprs[0]->type == T_NUMBER; |
1325 | 4.09M | } |
1326 | | |
1327 | | /* Return the tree node at the position implied by STEP->PREDICATES. It is |
1328 | | assumed and required that STEP->PREDICATES is actually a |
1329 | | POSITION_PRED. |
1330 | | |
1331 | | This method hand-optimizes the important case of a path expression like |
1332 | | 'service[42]' |
1333 | | */ |
1334 | | static struct tree *position_filter(struct nodeset *ns, |
1335 | | struct step *step, |
1336 | 6.72k | struct state *state) { |
1337 | 6.72k | int value_ind = step->predicates->exprs[0]->value_ind; |
1338 | 6.72k | int number = state->value_pool[value_ind].number; |
1339 | | |
1340 | 6.72k | int pos = 1; |
1341 | 11.7k | for (int i=0; i < ns->used; i++) { |
1342 | 5.04k | for (struct tree *node = step_first(step, ns->nodes[i]); |
1343 | 15.1k | node != NULL; |
1344 | 10.0k | node = step_next(step, ns->nodes[i], node), pos++) { |
1345 | 10.0k | if (pos == number) |
1346 | 0 | return node; |
1347 | 10.0k | } |
1348 | 5.04k | } |
1349 | | |
1350 | 6.72k | return NULL; |
1351 | 6.72k | } |
1352 | | |
1353 | | /* Return an array of nodesets, one for each step in the locpath. |
1354 | | * |
1355 | | * On return, (*NS)[0] will contain state->ctx, and (*NS)[*MAXNS] will |
1356 | | * contain the nodes that matched the entire locpath |
1357 | | */ |
1358 | | static void ns_from_locpath(struct locpath *lp, uint *maxns, |
1359 | | struct nodeset ***ns, |
1360 | | const struct nodeset *root, |
1361 | 2.33M | struct state *state) { |
1362 | 2.33M | struct tree *old_ctx = state->ctx; |
1363 | 2.33M | *maxns = 0; |
1364 | | |
1365 | 2.33M | ensure(lp != NULL, state); |
1366 | | |
1367 | 2.33M | *ns = NULL; |
1368 | 2.33M | list_for_each(step, lp->steps) |
1369 | 4.09M | *maxns += 1; |
1370 | 2.33M | if (ALLOC_N(*ns, *maxns+1) < 0) { |
1371 | 0 | STATE_ERROR(state, PATHX_ENOMEM); |
1372 | 0 | goto error; |
1373 | 0 | } |
1374 | 8.77M | for (int i=0; i <= *maxns; i++) { |
1375 | 6.43M | (*ns)[i] = make_nodeset(state); |
1376 | 6.43M | if (HAS_ERROR(state)) |
1377 | 0 | goto error; |
1378 | 6.43M | } |
1379 | | |
1380 | 2.33M | if (root == NULL) { |
1381 | 2.33M | struct step *first_step = NULL; |
1382 | 2.33M | first_step = lp->steps; |
1383 | | |
1384 | 2.33M | struct tree *root_tree; |
1385 | 2.33M | root_tree = step_root(first_step, state->ctx, state->root_ctx); |
1386 | 2.33M | ns_add((*ns)[0], root_tree, state); |
1387 | 2.33M | ns_clear_added((*ns)[0]); |
1388 | 2.33M | } else { |
1389 | 226 | for (int i=0; i < root->used; i++) |
1390 | 224 | ns_add((*ns)[0], root->nodes[i], state); |
1391 | 2 | ns_clear_added((*ns)[0]); |
1392 | 2 | } |
1393 | | |
1394 | 2.33M | if (HAS_ERROR(state)) |
1395 | 0 | goto error; |
1396 | | |
1397 | 2.33M | uint cur_ns = 0; |
1398 | 4.09M | list_for_each(step, lp->steps) { |
1399 | 4.09M | struct nodeset *work = (*ns)[cur_ns]; |
1400 | 4.09M | struct nodeset *next = (*ns)[cur_ns + 1]; |
1401 | 4.09M | if (position_pred(step->predicates)) { |
1402 | 6.72k | struct tree *node = position_filter(work, step, state); |
1403 | 6.72k | if (node) { |
1404 | 0 | ns_add(next, node, state); |
1405 | 0 | ns_clear_added(next); |
1406 | 0 | } |
1407 | 4.08M | } else { |
1408 | 8.03M | for (int i=0; i < work->used; i++) { |
1409 | 3.95M | for (struct tree *node = step_first(step, work->nodes[i]); |
1410 | 8.62M | node != NULL; |
1411 | 4.67M | node = step_next(step, work->nodes[i], node)) { |
1412 | 4.67M | ns_add(next, node, state); |
1413 | 4.67M | } |
1414 | 3.95M | } |
1415 | 4.08M | ns_clear_added(next); |
1416 | 4.08M | ns_filter(next, step->predicates, state); |
1417 | 4.08M | if (HAS_ERROR(state)) |
1418 | 25 | goto error; |
1419 | 4.08M | } |
1420 | 4.09M | cur_ns += 1; |
1421 | 4.09M | } |
1422 | | |
1423 | 2.33M | state->ctx = old_ctx; |
1424 | 2.33M | return; |
1425 | 25 | error: |
1426 | 25 | if (*ns != NULL) { |
1427 | 100 | for (int i=0; i <= *maxns; i++) |
1428 | 75 | free_nodeset((*ns)[i]); |
1429 | 25 | FREE(*ns); |
1430 | 25 | } |
1431 | 25 | state->ctx = old_ctx; |
1432 | 25 | return; |
1433 | 2.33M | } |
1434 | | |
1435 | 2.33M | static void eval_filter(struct expr *expr, struct state *state) { |
1436 | 2.33M | struct locpath *lp = expr->locpath; |
1437 | 2.33M | struct nodeset **ns = NULL; |
1438 | 2.33M | struct locpath_trace *lpt = state->locpath_trace; |
1439 | 2.33M | uint maxns; |
1440 | | |
1441 | 2.33M | state->locpath_trace = NULL; |
1442 | 2.33M | if (expr->primary == NULL) { |
1443 | 2.33M | ns_from_locpath(lp, &maxns, &ns, NULL, state); |
1444 | 2.33M | } else { |
1445 | 2 | eval_expr(expr->primary, state); |
1446 | 2 | RET_ON_ERROR; |
1447 | 2 | value_ind_t primary_ind = pop_value_ind(state); |
1448 | 2 | struct value *primary = state->value_pool + primary_ind; |
1449 | 2 | assert(primary->tag == T_NODESET); |
1450 | 2 | ns_filter(primary->nodeset, expr->predicates, state); |
1451 | | /* Evaluating predicates might have reallocated the value_pool */ |
1452 | 2 | primary = state->value_pool + primary_ind; |
1453 | 2 | ns_from_locpath(lp, &maxns, &ns, primary->nodeset, state); |
1454 | 2 | } |
1455 | 2.33M | RET_ON_ERROR; |
1456 | | |
1457 | 2.33M | value_ind_t vind = make_value(T_NODESET, state); |
1458 | 2.33M | RET_ON_ERROR; |
1459 | 2.33M | state->value_pool[vind].nodeset = ns[maxns]; |
1460 | 2.33M | push_value(vind, state); |
1461 | | |
1462 | 2.33M | if (lpt != NULL) { |
1463 | 33.6k | assert(lpt->ns == NULL); |
1464 | 33.6k | assert(lpt->lp == NULL); |
1465 | 33.6k | lpt->maxns = maxns; |
1466 | 33.6k | lpt->ns = ns; |
1467 | 33.6k | lpt->lp = lp; |
1468 | 33.6k | state->locpath_trace = lpt; |
1469 | 2.30M | } else { |
1470 | 6.25M | for (int i=0; i < maxns; i++) |
1471 | 3.94M | free_nodeset(ns[i]); |
1472 | 2.30M | FREE(ns); |
1473 | 2.30M | } |
1474 | 2.33M | } |
1475 | | |
1476 | | static struct value *lookup_var(const char *ident, |
1477 | 56 | const struct pathx_symtab *symtab) { |
1478 | 56 | list_for_each(tab, symtab) { |
1479 | 0 | if (STREQ(ident, tab->name)) |
1480 | 0 | return tab->value; |
1481 | 0 | } |
1482 | 56 | return NULL; |
1483 | 56 | } |
1484 | | |
1485 | 0 | static void eval_var(struct expr *expr, struct state *state) { |
1486 | 0 | struct value *v = lookup_var(expr->ident, state->symtab); |
1487 | 0 | value_ind_t vind = clone_value(v, state); |
1488 | 0 | RET_ON_ERROR; |
1489 | 0 | push_value(vind, state); |
1490 | 0 | } |
1491 | | |
1492 | 6.46M | static void eval_expr(struct expr *expr, struct state *state) { |
1493 | 6.46M | RET_ON_ERROR; |
1494 | 6.46M | switch (expr->tag) { |
1495 | 2.33M | case E_FILTER: |
1496 | 2.33M | eval_filter(expr, state); |
1497 | 2.33M | break; |
1498 | 2.17M | case E_BINARY: |
1499 | 2.17M | eval_binary(expr, state); |
1500 | 2.17M | break; |
1501 | 1.89M | case E_VALUE: |
1502 | 1.89M | push_value(expr->value_ind, state); |
1503 | 1.89M | break; |
1504 | 0 | case E_VAR: |
1505 | 0 | eval_var(expr, state); |
1506 | 0 | break; |
1507 | 52.4k | case E_APP: |
1508 | 52.4k | eval_app(expr, state); |
1509 | 52.4k | if (expr->fold) { |
1510 | | /* Do constant folding: replace the function application with |
1511 | | * a reference to the value that resulted from evaluating it */ |
1512 | 396 | for (int i=0; i < expr->func->arity; i++) |
1513 | 198 | free_expr(expr->args[i]); |
1514 | 198 | free(expr->args); |
1515 | 198 | value_ind_t vind = state->values_used - 1; |
1516 | 198 | expr->tag = E_VALUE; |
1517 | 198 | expr->value_ind = state->values[vind]; |
1518 | 198 | } |
1519 | 52.4k | break; |
1520 | 0 | default: |
1521 | 0 | assert(0); |
1522 | 6.46M | } |
1523 | 6.46M | } |
1524 | | |
1525 | | /************************************************************************* |
1526 | | * Typechecker |
1527 | | *************************************************************************/ |
1528 | | |
1529 | | static void check_expr(struct expr *expr, struct state *state); |
1530 | | |
1531 | | /* Typecheck a list of predicates. A predicate is a function of |
1532 | | * one of the following types: |
1533 | | * |
1534 | | * T_NODESET -> T_BOOLEAN |
1535 | | * T_NUMBER -> T_BOOLEAN (position test) |
1536 | | * T_BOOLEAN -> T_BOOLEAN |
1537 | | */ |
1538 | 446k | static void check_preds(struct pred *pred, struct state *state) { |
1539 | 446k | if (pred == NULL) |
1540 | 438k | return; |
1541 | 16.4k | for (int i=0; i < pred->nexpr; i++) { |
1542 | 8.20k | struct expr *e = pred->exprs[i]; |
1543 | 8.20k | check_expr(e, state); |
1544 | 8.20k | RET_ON_ERROR; |
1545 | 8.20k | if (e->type != T_NODESET && e->type != T_NUMBER && |
1546 | 8.20k | e->type != T_BOOLEAN && e->type != T_STRING) { |
1547 | 0 | STATE_ERROR(state, PATHX_ETYPE); |
1548 | 0 | return; |
1549 | 0 | } |
1550 | 8.20k | } |
1551 | 8.20k | } |
1552 | | |
1553 | 128k | static void check_filter(struct expr *expr, struct state *state) { |
1554 | 128k | assert(expr->tag == E_FILTER); |
1555 | 128k | struct locpath *locpath = expr->locpath; |
1556 | | |
1557 | 128k | if (expr->primary != NULL) { |
1558 | 4 | check_expr(expr->primary, state); |
1559 | 4 | if (expr->primary->type != T_NODESET) { |
1560 | 0 | STATE_ERROR(state, PATHX_ETYPE); |
1561 | 0 | return; |
1562 | 0 | } |
1563 | 4 | check_preds(expr->predicates, state); |
1564 | 4 | RET_ON_ERROR; |
1565 | 4 | } |
1566 | 446k | list_for_each(s, locpath->steps) { |
1567 | 446k | check_preds(s->predicates, state); |
1568 | 446k | RET_ON_ERROR; |
1569 | 446k | } |
1570 | 128k | expr->type = T_NODESET; |
1571 | 128k | } |
1572 | | |
1573 | 943 | static void check_app(struct expr *expr, struct state *state) { |
1574 | 943 | assert(expr->tag == E_APP); |
1575 | | |
1576 | 2.17k | for (int i=0; i < expr->func->arity; i++) { |
1577 | 1.23k | check_expr(expr->args[i], state); |
1578 | 1.23k | RET_ON_ERROR; |
1579 | 1.23k | } |
1580 | | |
1581 | 943 | int f; |
1582 | 7.43k | for (f=0; f < ARRAY_CARDINALITY(builtin_funcs); f++) { |
1583 | 7.43k | const struct func *fn = builtin_funcs + f; |
1584 | 7.43k | if (STRNEQ(expr->func->name, fn->name)) |
1585 | 5.00k | continue; |
1586 | 2.43k | if (expr->func->arity != fn->arity) |
1587 | 576 | continue; |
1588 | | |
1589 | 1.85k | int match = 1; |
1590 | 3.08k | for (int i=0; i < expr->func->arity; i++) { |
1591 | 2.14k | if (expr->args[i]->type != fn->arg_types[i]) { |
1592 | 914 | match = 0; |
1593 | 914 | break; |
1594 | 914 | } |
1595 | 2.14k | } |
1596 | 1.85k | if (match) |
1597 | 943 | break; |
1598 | 1.85k | } |
1599 | | |
1600 | 943 | if (f < ARRAY_CARDINALITY(builtin_funcs)) { |
1601 | 943 | expr->func = builtin_funcs + f; |
1602 | 943 | expr->type = expr->func->type; |
1603 | 943 | expr->fold = expr->func->pure; |
1604 | 943 | if (expr->fold) { |
1605 | | /* We only do constant folding for invocations of pure functions |
1606 | | * whose arguments are literal values. That misses opportunities |
1607 | | * for constant folding, e.g., "regexp('foo' + 'bar')" but is |
1608 | | * a bit simpler than doing full tracking of constants |
1609 | | */ |
1610 | 1.76k | for (int i=0; i < expr->func->arity; i++) { |
1611 | 1.02k | if (expr->args[i]->tag != E_VALUE) |
1612 | 504 | expr->fold = false; |
1613 | 1.02k | } |
1614 | 738 | } |
1615 | 943 | } else { |
1616 | 0 | STATE_ERROR(state, PATHX_ETYPE); |
1617 | 0 | } |
1618 | 943 | } |
1619 | | |
1620 | | /* Check the binary operators. Type rules: |
1621 | | * |
1622 | | * '=', '!=' : T_NODESET -> T_NODESET -> T_BOOLEAN |
1623 | | * T_STRING -> T_NODESET -> T_BOOLEAN |
1624 | | * T_NODESET -> T_STRING -> T_BOOLEAN |
1625 | | * T_NUMBER -> T_NUMBER -> T_BOOLEAN |
1626 | | * |
1627 | | * '>', '>=', |
1628 | | * '<', '<=' : T_NUMBER -> T_NUMBER -> T_BOOLEAN |
1629 | | * T_STRING -> T_STRING -> T_BOOLEAN |
1630 | | * '+' : T_NUMBER -> T_NUMBER -> T_NUMBER |
1631 | | * T_STRING -> T_STRING -> T_STRING |
1632 | | * T_REGEXP -> T_REGEXP -> T_REGEXP |
1633 | | * '+', '-', '*': T_NUMBER -> T_NUMBER -> T_NUMBER |
1634 | | * |
1635 | | * 'and', 'or': T_BOOLEAN -> T_BOOLEAN -> T_BOOLEAN |
1636 | | * '=~', '!~' : T_STRING -> T_REGEXP -> T_BOOLEAN |
1637 | | * T_NODESET -> T_REGEXP -> T_BOOLEAN |
1638 | | * |
1639 | | * '|' : T_NODESET -> T_NODESET -> T_NODESET |
1640 | | * |
1641 | | * Any type can be coerced to T_BOOLEAN (see coerce_to_bool) |
1642 | | */ |
1643 | 1.95M | static void check_binary(struct expr *expr, struct state *state) { |
1644 | 1.95M | check_expr(expr->left, state); |
1645 | 1.95M | check_expr(expr->right, state); |
1646 | 1.95M | RET_ON_ERROR; |
1647 | | |
1648 | 1.13M | enum type l = expr->left->type; |
1649 | 1.13M | enum type r = expr->right->type; |
1650 | 1.13M | int ok = 1; |
1651 | 1.13M | enum type res; |
1652 | | |
1653 | 1.13M | switch(expr->op) { |
1654 | 209 | case OP_EQ: |
1655 | 236 | case OP_NEQ: |
1656 | 236 | ok = ((l == T_NODESET || l == T_STRING) |
1657 | 236 | && (r == T_NODESET || r == T_STRING)) |
1658 | 236 | || (l == T_NUMBER && r == T_NUMBER);; |
1659 | 236 | res = T_BOOLEAN; |
1660 | 236 | break; |
1661 | 418 | case OP_LT: |
1662 | 418 | case OP_LE: |
1663 | 1.30k | case OP_GT: |
1664 | 1.30k | case OP_GE: |
1665 | 1.30k | ok = (l == T_NUMBER && r == T_NUMBER) |
1666 | 1.30k | || (l == T_STRING && r == T_STRING); |
1667 | 1.30k | res = T_BOOLEAN; |
1668 | 1.30k | break; |
1669 | 1.64k | case OP_PLUS: |
1670 | 1.64k | ok = (l == r && (l == T_NUMBER || l == T_STRING || l == T_REGEXP)); |
1671 | 1.64k | res = l; |
1672 | 1.64k | break; |
1673 | 51.0k | case OP_MINUS: |
1674 | 1.10M | case OP_STAR: |
1675 | 1.10M | ok = (l == T_NUMBER && r == T_NUMBER); |
1676 | 1.10M | res = T_NUMBER; |
1677 | 1.10M | break; |
1678 | 19.0k | case OP_UNION: |
1679 | 19.0k | ok = (l == T_NODESET && r == T_NODESET); |
1680 | 19.0k | res = T_NODESET; |
1681 | 19.0k | break; |
1682 | 409 | case OP_AND: |
1683 | 4.89k | case OP_OR: |
1684 | 4.89k | ok = 1; |
1685 | 4.89k | res = T_BOOLEAN; |
1686 | 4.89k | break; |
1687 | 1.91k | case OP_ELSE: |
1688 | 1.91k | if (l == T_NODESET && r == T_NODESET) { |
1689 | 463 | res = T_NODESET; |
1690 | 1.45k | } else { |
1691 | 1.45k | res = T_BOOLEAN; |
1692 | 1.45k | } |
1693 | 1.91k | ok = 1; |
1694 | 1.91k | break; |
1695 | 7 | case OP_RE_MATCH: |
1696 | 738 | case OP_RE_NOMATCH: |
1697 | 738 | ok = ((l == T_STRING || l == T_NODESET) && r == T_REGEXP); |
1698 | 738 | res = T_BOOLEAN; |
1699 | 738 | break; |
1700 | 0 | default: |
1701 | 0 | assert(0); |
1702 | 1.13M | } |
1703 | 1.13M | if (! ok) { |
1704 | 269 | STATE_ERROR(state, PATHX_ETYPE); |
1705 | 1.13M | } else { |
1706 | 1.13M | expr->type = res; |
1707 | 1.13M | } |
1708 | 1.13M | } |
1709 | | |
1710 | 56 | static void check_var(struct expr *expr, struct state *state) { |
1711 | 56 | struct value *v = lookup_var(expr->ident, state->symtab); |
1712 | 56 | if (v == NULL) { |
1713 | 56 | STATE_ERROR(state, PATHX_ENOVAR); |
1714 | 56 | return; |
1715 | 56 | } |
1716 | 0 | expr->type = v->tag; |
1717 | 0 | } |
1718 | | |
1719 | | /* Typecheck an expression */ |
1720 | 4.01M | static void check_expr(struct expr *expr, struct state *state) { |
1721 | 4.01M | RET_ON_ERROR; |
1722 | 3.19M | switch(expr->tag) { |
1723 | 128k | case E_FILTER: |
1724 | 128k | check_filter(expr, state); |
1725 | 128k | break; |
1726 | 1.95M | case E_BINARY: |
1727 | 1.95M | check_binary(expr, state); |
1728 | 1.95M | break; |
1729 | 1.11M | case E_VALUE: |
1730 | 1.11M | expr->type = expr_value(expr, state)->tag; |
1731 | 1.11M | break; |
1732 | 56 | case E_VAR: |
1733 | 56 | check_var(expr, state); |
1734 | 56 | break; |
1735 | 943 | case E_APP: |
1736 | 943 | check_app(expr, state); |
1737 | 943 | break; |
1738 | 0 | default: |
1739 | 0 | assert(0); |
1740 | 3.19M | } |
1741 | 3.19M | } |
1742 | | |
1743 | | /* |
1744 | | * Utility functions for the parser |
1745 | | */ |
1746 | | |
1747 | 85.0M | static void skipws(struct state *state) { |
1748 | 85.0M | while (isspace(*state->pos)) state->pos += 1; |
1749 | 85.0M | } |
1750 | | |
1751 | 70.9M | static int match(struct state *state, char m) { |
1752 | 70.9M | skipws(state); |
1753 | | |
1754 | 70.9M | if (*state->pos == '\0') |
1755 | 408k | return 0; |
1756 | 70.5M | if (*state->pos == m) { |
1757 | 19.2M | state->pos += 1; |
1758 | 19.2M | return 1; |
1759 | 19.2M | } |
1760 | 51.2M | return 0; |
1761 | 70.5M | } |
1762 | | |
1763 | 28.7M | static int peek(struct state *state, const char *chars) { |
1764 | 28.7M | return strchr(chars, *state->pos) != NULL; |
1765 | 28.7M | } |
1766 | | |
1767 | | /* Return 1 if STATE->POS starts with TOKEN, followed by optional |
1768 | | * whitespace, followed by FOLLOW. In that case, STATE->POS is set to the |
1769 | | * first character after FOLLOW. Return 0 otherwise and leave STATE->POS |
1770 | | * unchanged. |
1771 | | */ |
1772 | | static int looking_at(struct state *state, const char *token, |
1773 | 42.9M | const char *follow) { |
1774 | 42.9M | if (STREQLEN(state->pos, token, strlen(token))) { |
1775 | 16.1k | const char *p = state->pos + strlen(token); |
1776 | 16.1k | while (isspace(*p)) p++; |
1777 | 16.1k | if (STREQLEN(p, follow, strlen(follow))) { |
1778 | 12.8k | state->pos = p + strlen(follow); |
1779 | 12.8k | return 1; |
1780 | 12.8k | } |
1781 | 16.1k | } |
1782 | 42.9M | return 0; |
1783 | 42.9M | } |
1784 | | |
1785 | | /************************************************************************* |
1786 | | * The parser |
1787 | | *************************************************************************/ |
1788 | | |
1789 | | static void parse_expr(struct state *state); |
1790 | | |
1791 | 12.5M | static struct expr* pop_expr(struct state *state) { |
1792 | 12.5M | if (state->exprs_used > 0) { |
1793 | 12.5M | state->exprs_used -= 1; |
1794 | 12.5M | return state->exprs[state->exprs_used]; |
1795 | 12.5M | } else { |
1796 | 0 | STATE_ERROR(state, PATHX_EINTERNAL); |
1797 | 0 | assert(0); |
1798 | 0 | return NULL; |
1799 | 0 | } |
1800 | 12.5M | } |
1801 | | |
1802 | 16.5M | static void push_expr(struct expr *expr, struct state *state) { |
1803 | 16.5M | if (state->exprs_used >= state->exprs_size) { |
1804 | 105k | size_t new_size = 2*state->exprs_size; |
1805 | 105k | if (new_size == 0) new_size = 8; |
1806 | 105k | if (REALLOC_N(state->exprs, new_size) < 0) { |
1807 | 0 | STATE_ENOMEM; |
1808 | 0 | return; |
1809 | 0 | } |
1810 | 105k | state->exprs_size = new_size; |
1811 | 105k | } |
1812 | 16.5M | state->exprs[state->exprs_used++] = expr; |
1813 | 16.5M | } |
1814 | | |
1815 | 4.22M | static void push_new_binary_op(enum binary_op op, struct state *state) { |
1816 | 4.22M | struct expr *expr = NULL; |
1817 | 4.22M | if (ALLOC(expr) < 0) { |
1818 | 0 | STATE_ENOMEM; |
1819 | 0 | return; |
1820 | 0 | } |
1821 | | |
1822 | 4.22M | expr->tag = E_BINARY; |
1823 | 4.22M | expr->op = op; |
1824 | 4.22M | expr->right = pop_expr(state); |
1825 | 4.22M | expr->left = pop_expr(state); |
1826 | 4.22M | expr->left_matched = false; /* for 'else' operator only, true if any matches on LHS */ |
1827 | 4.22M | push_expr(expr, state); |
1828 | 4.22M | } |
1829 | | |
1830 | 36.5k | int pathx_escape_name(const char *in, char **out) { |
1831 | 36.5k | const char *p; |
1832 | 36.5k | int num_to_escape = 0; |
1833 | 36.5k | char *s; |
1834 | | |
1835 | 36.5k | *out = NULL; |
1836 | | |
1837 | 104M | for (p = in; *p; p++) { |
1838 | 104M | if (strchr(name_follow, *p) || isspace(*p) || *p == '\\') |
1839 | 81.9M | num_to_escape += 1; |
1840 | 104M | } |
1841 | | |
1842 | 36.5k | if (num_to_escape == 0) |
1843 | 36.0k | return 0; |
1844 | | |
1845 | 482 | if (ALLOC_N(*out, strlen(in) + num_to_escape + 1) < 0) |
1846 | 0 | return -1; |
1847 | | |
1848 | 95.0M | for (p = in, s = *out; *p; p++) { |
1849 | 95.0M | if (strchr(name_follow, *p) || isspace(*p) || *p == '\\') |
1850 | 81.9M | *s++ = '\\'; |
1851 | 95.0M | *s++ = *p; |
1852 | 95.0M | } |
1853 | 482 | *s = '\0'; |
1854 | 482 | return 0; |
1855 | 482 | } |
1856 | | |
1857 | | /* Return true if POS is preceded by an odd number of backslashes, i.e., if |
1858 | | * POS is escaped. Stop the search when we get to START */ |
1859 | 3.40M | static bool backslash_escaped(const char *pos, const char *start) { |
1860 | 3.40M | bool result=false; |
1861 | 3.40M | while (pos-- > start && *pos == '\\') { |
1862 | 275 | result = !result; |
1863 | 275 | } |
1864 | 3.40M | return result; |
1865 | 3.40M | } |
1866 | | |
1867 | | /* |
1868 | | * NameNoWS ::= [^][|/\= \t\n] | \\. |
1869 | | * NameWS ::= [^][|/\=] | \\. |
1870 | | * Name ::= NameNoWS NameWS* NameNoWS | NameNoWS |
1871 | | */ |
1872 | 3.33M | static char *parse_name(struct state *state) { |
1873 | 3.33M | const char *s = state->pos; |
1874 | 3.33M | char *result; |
1875 | | |
1876 | | /* Advance state->pos until it points to the first character that is |
1877 | | * not part of a name. */ |
1878 | 197M | while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) { |
1879 | | /* Since we allow spaces in names, we need to avoid gobbling up |
1880 | | * stuff that is in follow(Name), e.g. 'or' so that things like |
1881 | | * [name1 or name2] still work. In other words, we'll parse 'x frob |
1882 | | * y' as one name, but for 'x or y', we consider 'x' a name in its |
1883 | | * own right. */ |
1884 | 194M | if (STREQLEN(state->pos, " or ", strlen(" or ")) || |
1885 | 194M | STREQLEN(state->pos, " and ", strlen(" and ")) || |
1886 | 194M | STREQLEN(state->pos, " else ", strlen(" else "))) |
1887 | 894 | break; |
1888 | | |
1889 | 194M | if (*state->pos == '\\') { |
1890 | 4.70M | state->pos += 1; |
1891 | 4.70M | if (*state->pos == '\0') { |
1892 | 0 | STATE_ERROR(state, PATHX_ENAME); |
1893 | 0 | return NULL; |
1894 | 0 | } |
1895 | 4.70M | } |
1896 | 194M | state->pos += 1; |
1897 | 194M | } |
1898 | | |
1899 | | /* Strip trailing white space. Make sure we respect escaped whitespace |
1900 | | * and don't strip it as in "x\\ " */ |
1901 | 3.33M | if (state->pos > s) { |
1902 | 3.33M | state->pos -= 1; |
1903 | 6.73M | while (isspace(*state->pos) && state->pos > s |
1904 | 6.73M | && !backslash_escaped(state->pos, s)) |
1905 | 3.40M | state->pos -= 1; |
1906 | 3.33M | state->pos += 1; |
1907 | 3.33M | } |
1908 | | |
1909 | 3.33M | if (state->pos == s) { |
1910 | 79 | STATE_ERROR(state, PATHX_ENAME); |
1911 | 79 | return NULL; |
1912 | 79 | } |
1913 | | |
1914 | 3.33M | result = strndup(s, state->pos - s); |
1915 | 3.33M | if (result == NULL) { |
1916 | 0 | STATE_ENOMEM; |
1917 | 0 | return NULL; |
1918 | 0 | } |
1919 | | |
1920 | 3.33M | char *p = result; |
1921 | 194M | for (char *t = result; *t != '\0'; t++, p++) { |
1922 | 190M | if (*t == '\\') |
1923 | 4.70M | t += 1; |
1924 | 190M | *p = *t; |
1925 | 190M | } |
1926 | 3.33M | *p = '\0'; |
1927 | | |
1928 | 3.33M | return result; |
1929 | 3.33M | } |
1930 | | |
1931 | | /* |
1932 | | * Predicate ::= "[" Expr "]" * |
1933 | | */ |
1934 | 12.4M | static struct pred *parse_predicates(struct state *state) { |
1935 | 12.4M | struct pred *pred = NULL; |
1936 | 12.4M | int nexpr = 0; |
1937 | | |
1938 | 18.5M | while (match(state, L_BRACK)) { |
1939 | 6.20M | parse_expr(state); |
1940 | 6.20M | nexpr += 1; |
1941 | 6.20M | RET0_ON_ERROR; |
1942 | | |
1943 | 6.03M | if (! match(state, R_BRACK)) { |
1944 | 117 | STATE_ERROR(state, PATHX_EPRED); |
1945 | 117 | return NULL; |
1946 | 117 | } |
1947 | 6.03M | skipws(state); |
1948 | 6.03M | } |
1949 | | |
1950 | 12.3M | if (nexpr == 0) |
1951 | 12.2M | return NULL; |
1952 | | |
1953 | 8.36k | if (ALLOC(pred) < 0) { |
1954 | 0 | STATE_ENOMEM; |
1955 | 0 | return NULL; |
1956 | 0 | } |
1957 | 8.36k | pred->nexpr = nexpr; |
1958 | | |
1959 | 8.36k | if (ALLOC_N(pred->exprs, nexpr) < 0) { |
1960 | 0 | free_pred(pred); |
1961 | 0 | STATE_ENOMEM; |
1962 | 0 | return NULL; |
1963 | 0 | } |
1964 | | |
1965 | 4.09M | for (int i = nexpr - 1; i >= 0; i--) |
1966 | 4.09M | pred->exprs[i] = pop_expr(state); |
1967 | | |
1968 | 8.36k | return pred; |
1969 | 8.36k | } |
1970 | | |
1971 | | /* |
1972 | | * Step ::= AxisSpecifier NameTest Predicate* | '.' | '..' |
1973 | | * AxisSpecifier ::= AxisName '::' | <epsilon> |
1974 | | * AxisName ::= 'ancestor' |
1975 | | * | 'ancestor-or-self' |
1976 | | * | 'child' |
1977 | | * | 'descendant' |
1978 | | * | 'descendant-or-self' |
1979 | | * | 'parent' |
1980 | | * | 'self' |
1981 | | * | 'root' |
1982 | | */ |
1983 | 4.29M | static struct step *parse_step(struct state *state) { |
1984 | 4.29M | struct step *step; |
1985 | 4.29M | int explicit_axis = 0, allow_predicates = 1; |
1986 | | |
1987 | 4.29M | if (ALLOC(step) < 0) { |
1988 | 0 | STATE_ENOMEM; |
1989 | 0 | return NULL; |
1990 | 0 | } |
1991 | | |
1992 | 4.29M | step->axis = CHILD; |
1993 | 47.1M | for (int i = 0; i < ARRAY_CARDINALITY(axis_names); i++) { |
1994 | 42.9M | if (looking_at(state, axis_names[i], "::")) { |
1995 | 625 | step->axis = i; |
1996 | 625 | explicit_axis = 1; |
1997 | 625 | break; |
1998 | 625 | } |
1999 | 42.9M | } |
2000 | | |
2001 | 4.29M | if (! match(state, '*')) { |
2002 | 3.33M | step->name = parse_name(state); |
2003 | 3.33M | if (HAS_ERROR(state)) |
2004 | 79 | goto error; |
2005 | 3.33M | if (! explicit_axis) { |
2006 | 3.33M | if (STREQ(step->name, ".") || STREQ(step->name, "..")) { |
2007 | 35.7k | step->axis = STREQ(step->name, ".") ? SELF : PARENT; |
2008 | 35.7k | FREE(step->name); |
2009 | 35.7k | allow_predicates = 0; |
2010 | 35.7k | } |
2011 | 3.33M | } |
2012 | 3.33M | } |
2013 | | |
2014 | 4.29M | if (allow_predicates) { |
2015 | 4.25M | step->predicates = parse_predicates(state); |
2016 | 4.25M | if (HAS_ERROR(state)) |
2017 | 161k | goto error; |
2018 | 4.25M | } |
2019 | | |
2020 | 4.12M | return step; |
2021 | | |
2022 | 161k | error: |
2023 | 161k | free_step(step); |
2024 | 161k | return NULL; |
2025 | 4.29M | } |
2026 | | |
2027 | 163k | static struct step *make_step(enum axis axis, struct state *state) { |
2028 | 163k | struct step *result = NULL; |
2029 | | |
2030 | 163k | if (ALLOC(result) < 0) { |
2031 | 0 | STATE_ENOMEM; |
2032 | 0 | return NULL; |
2033 | 0 | } |
2034 | 163k | result->axis = axis; |
2035 | 163k | return result; |
2036 | 163k | } |
2037 | | |
2038 | | /* |
2039 | | * RelativeLocationPath ::= Step |
2040 | | * | RelativeLocationPath '/' Step |
2041 | | * | AbbreviatedRelativeLocationPath |
2042 | | * AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step |
2043 | | * |
2044 | | * The above is the same as |
2045 | | * RelativeLocationPath ::= Step ('/' Step | '//' Step)* |
2046 | | */ |
2047 | | static struct locpath * |
2048 | 4.10M | parse_relative_location_path(struct state *state) { |
2049 | 4.10M | struct step *step = NULL; |
2050 | 4.10M | struct locpath *locpath = NULL; |
2051 | | |
2052 | 4.10M | step = parse_step(state); |
2053 | 4.10M | if (HAS_ERROR(state)) |
2054 | 161k | goto error; |
2055 | | |
2056 | 3.94M | if (ALLOC(locpath) < 0) { |
2057 | 0 | STATE_ENOMEM; |
2058 | 0 | goto error; |
2059 | 0 | } |
2060 | 3.94M | list_append(locpath->steps, step); |
2061 | 0 | step = NULL; |
2062 | | |
2063 | 4.12M | while (match(state, '/')) { |
2064 | 187k | if (*state->pos == '/') { |
2065 | 36.3k | state->pos += 1; |
2066 | 36.3k | step = make_step(DESCENDANT_OR_SELF, state); |
2067 | 36.3k | if (step == NULL) { |
2068 | 0 | STATE_ENOMEM; |
2069 | 0 | goto error; |
2070 | 0 | } |
2071 | 36.3k | list_append(locpath->steps, step); |
2072 | 36.3k | } |
2073 | 187k | step = parse_step(state); |
2074 | 187k | if (HAS_ERROR(state)) |
2075 | 147 | goto error; |
2076 | 187k | list_append(locpath->steps, step); |
2077 | 0 | step = NULL; |
2078 | 187k | } |
2079 | 3.94M | return locpath; |
2080 | | |
2081 | 161k | error: |
2082 | 161k | free_step(step); |
2083 | 161k | free_locpath(locpath); |
2084 | 161k | return NULL; |
2085 | 3.94M | } |
2086 | | |
2087 | | /* |
2088 | | * LocationPath ::= RelativeLocationPath | AbsoluteLocationPath |
2089 | | * AbsoluteLocationPath ::= '/' RelativeLocationPath? |
2090 | | * | AbbreviatedAbsoluteLocationPath |
2091 | | * AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath |
2092 | | * |
2093 | | */ |
2094 | 4.10M | static void parse_location_path(struct state *state) { |
2095 | 4.10M | struct expr *expr = NULL; |
2096 | 4.10M | struct locpath *locpath = NULL; |
2097 | | |
2098 | 4.10M | if (match(state, '/')) { |
2099 | 127k | if (*state->pos == '/') { |
2100 | 21.5k | state->pos += 1; |
2101 | 21.5k | locpath = parse_relative_location_path(state); |
2102 | 21.5k | if (HAS_ERROR(state)) |
2103 | 76 | goto error; |
2104 | 21.4k | struct step *step = make_step(DESCENDANT_OR_SELF, state); |
2105 | 21.4k | if (HAS_ERROR(state)) |
2106 | 0 | goto error; |
2107 | 21.4k | list_cons(locpath->steps, step); |
2108 | 105k | } else { |
2109 | 105k | if (*state->pos != '\0') { |
2110 | 105k | locpath = parse_relative_location_path(state); |
2111 | 105k | } else { |
2112 | 0 | if (ALLOC(locpath) < 0) |
2113 | 0 | goto err_nomem; |
2114 | 0 | } |
2115 | 105k | struct step *step = make_step(ROOT, state); |
2116 | 105k | if (HAS_ERROR(state)) { |
2117 | 3 | free_step(step); |
2118 | 3 | goto error; |
2119 | 3 | } |
2120 | 105k | list_cons(locpath->steps, step); |
2121 | 105k | } |
2122 | 3.97M | } else { |
2123 | 3.97M | locpath = parse_relative_location_path(state); |
2124 | 3.97M | } |
2125 | | |
2126 | 4.10M | if (ALLOC(expr) < 0) |
2127 | 0 | goto err_nomem; |
2128 | 4.10M | expr->tag = E_FILTER; |
2129 | 4.10M | expr->locpath = locpath; |
2130 | 4.10M | push_expr(expr, state); |
2131 | 4.10M | return; |
2132 | | |
2133 | 0 | err_nomem: |
2134 | 0 | STATE_ENOMEM; |
2135 | 79 | error: |
2136 | 79 | free_expr(expr); |
2137 | 79 | free_locpath(locpath); |
2138 | 79 | return; |
2139 | 0 | } |
2140 | | |
2141 | | /* |
2142 | | * Number ::= /[0-9]+/ |
2143 | | */ |
2144 | 8.20M | static void parse_number(struct state *state) { |
2145 | 8.20M | struct expr *expr = NULL; |
2146 | 8.20M | unsigned long val; |
2147 | 8.20M | char *end; |
2148 | | |
2149 | 8.20M | errno = 0; |
2150 | 8.20M | val = strtoul(state->pos, &end, 10); |
2151 | 8.20M | if (errno || end == state->pos || (int) val != val) { |
2152 | 68 | STATE_ERROR(state, PATHX_ENUMBER); |
2153 | 68 | return; |
2154 | 68 | } |
2155 | | |
2156 | 8.20M | state->pos = end; |
2157 | | |
2158 | 8.20M | if (ALLOC(expr) < 0) |
2159 | 0 | goto err_nomem; |
2160 | 8.20M | expr->tag = E_VALUE; |
2161 | 8.20M | expr->value_ind = make_value(T_NUMBER, state); |
2162 | 8.20M | if (HAS_ERROR(state)) |
2163 | 0 | goto error; |
2164 | 8.20M | expr_value(expr, state)->number = val; |
2165 | | |
2166 | 8.20M | push_expr(expr, state); |
2167 | 8.20M | return; |
2168 | | |
2169 | 0 | err_nomem: |
2170 | 0 | STATE_ENOMEM; |
2171 | 0 | error: |
2172 | 0 | free_expr(expr); |
2173 | 0 | return; |
2174 | 0 | } |
2175 | | |
2176 | | /* |
2177 | | * Literal ::= '"' /[^"]* / '"' | "'" /[^']* / "'" |
2178 | | */ |
2179 | 3.56k | static void parse_literal(struct state *state) { |
2180 | 3.56k | char delim; |
2181 | 3.56k | const char *s; |
2182 | 3.56k | struct expr *expr = NULL; |
2183 | | |
2184 | 3.56k | if (*state->pos == '"') |
2185 | 2.11k | delim = '"'; |
2186 | 1.45k | else if (*state->pos == '\'') |
2187 | 1.29k | delim = '\''; |
2188 | 165 | else { |
2189 | 165 | STATE_ERROR(state, PATHX_ESTRING); |
2190 | 165 | return; |
2191 | 165 | } |
2192 | 3.40k | state->pos += 1; |
2193 | | |
2194 | 3.40k | s = state->pos; |
2195 | 9.25M | while (*state->pos != '\0' && *state->pos != delim) state->pos += 1; |
2196 | | |
2197 | 3.40k | if (*state->pos != delim) { |
2198 | 83 | STATE_ERROR(state, PATHX_EDELIM); |
2199 | 83 | return; |
2200 | 83 | } |
2201 | 3.32k | state->pos += 1; |
2202 | | |
2203 | 3.32k | if (ALLOC(expr) < 0) |
2204 | 0 | goto err_nomem; |
2205 | 3.32k | expr->tag = E_VALUE; |
2206 | 3.32k | expr->value_ind = make_value(T_STRING, state); |
2207 | 3.32k | if (HAS_ERROR(state)) |
2208 | 0 | goto error; |
2209 | 3.32k | expr_value(expr, state)->string = strndup(s, state->pos - s - 1); |
2210 | 3.32k | if (expr_value(expr, state)->string == NULL) |
2211 | 0 | goto err_nomem; |
2212 | | |
2213 | 3.32k | push_expr(expr, state); |
2214 | 3.32k | return; |
2215 | | |
2216 | 0 | err_nomem: |
2217 | 0 | STATE_ENOMEM; |
2218 | 0 | error: |
2219 | 0 | free_expr(expr); |
2220 | 0 | return; |
2221 | 0 | } |
2222 | | |
2223 | | /* |
2224 | | * FunctionCall ::= Name '(' ( Expr ( ',' Expr )* )? ')' |
2225 | | */ |
2226 | 12.1k | static void parse_function_call(struct state *state) { |
2227 | 12.1k | const struct func *func = NULL; |
2228 | 12.1k | struct expr *expr = NULL; |
2229 | 12.1k | int nargs = 0, find = 0; |
2230 | | |
2231 | 62.3k | for (; find < ARRAY_CARDINALITY(builtin_funcs); find++) { |
2232 | 62.3k | if (looking_at(state, builtin_funcs[find].name, "(")) { |
2233 | 12.1k | func = builtin_funcs + find; |
2234 | 12.1k | break; |
2235 | 12.1k | } |
2236 | 62.3k | } |
2237 | 12.1k | if (func == NULL) { |
2238 | 10 | STATE_ERROR(state, PATHX_ENAME); |
2239 | 10 | return; |
2240 | 10 | } |
2241 | | |
2242 | 12.1k | if (! match(state, ')')) { |
2243 | 1.52M | do { |
2244 | 1.52M | nargs += 1; |
2245 | 1.52M | parse_expr(state); |
2246 | 1.52M | RET_ON_ERROR; |
2247 | 1.52M | } while (match(state, ',')); |
2248 | | |
2249 | 1.01k | if (! match(state, ')')) { |
2250 | 25 | STATE_ERROR(state, PATHX_EPAREN); |
2251 | 25 | return; |
2252 | 25 | } |
2253 | 1.01k | } |
2254 | | |
2255 | 993 | int found = 0; /* Whether there is a builtin matching in name and arity */ |
2256 | 1.59k | for (int i=find; i < ARRAY_CARDINALITY(builtin_funcs); i++) { |
2257 | 1.59k | if (STRNEQ(func->name, builtin_funcs[i].name)) |
2258 | 18 | break; |
2259 | 1.57k | if (builtin_funcs[i].arity == nargs) { |
2260 | 975 | func = builtin_funcs + i; |
2261 | 975 | found = 1; |
2262 | 975 | break; |
2263 | 975 | } |
2264 | 1.57k | } |
2265 | | |
2266 | 993 | if (! found) { |
2267 | 18 | STATE_ERROR(state, PATHX_EARITY); |
2268 | 18 | return; |
2269 | 18 | } |
2270 | | |
2271 | 975 | if (ALLOC(expr) < 0) { |
2272 | 0 | STATE_ENOMEM; |
2273 | 0 | return; |
2274 | 0 | } |
2275 | 975 | expr->tag = E_APP; |
2276 | 975 | if (ALLOC_N(expr->args, nargs) < 0) { |
2277 | 0 | free_expr(expr); |
2278 | 0 | STATE_ENOMEM; |
2279 | 0 | return; |
2280 | 0 | } |
2281 | 975 | expr->func = func; |
2282 | 2.23k | for (int i = nargs - 1; i >= 0; i--) |
2283 | 1.26k | expr->args[i] = pop_expr(state); |
2284 | | |
2285 | 975 | push_expr(expr, state); |
2286 | 975 | } |
2287 | | |
2288 | | /* |
2289 | | * VariableReference ::= '$' /[a-zA-Z_][a-zA-Z0-9_]* / |
2290 | | * |
2291 | | * The '$' is consumed by parse_primary_expr |
2292 | | */ |
2293 | 843 | static void parse_var(struct state *state) { |
2294 | 843 | const char *id = state->pos; |
2295 | 843 | struct expr *expr = NULL; |
2296 | | |
2297 | 843 | if (!isalpha(*id) && *id != '_') { |
2298 | 35 | STATE_ERROR(state, PATHX_ENAME); |
2299 | 35 | return; |
2300 | 35 | } |
2301 | 808 | id++; |
2302 | 1.38k | while (isalpha(*id) || isdigit(*id) || *id == '_') |
2303 | 574 | id += 1; |
2304 | | |
2305 | 808 | if (ALLOC(expr) < 0) |
2306 | 0 | goto err_nomem; |
2307 | 808 | expr->tag = E_VAR; |
2308 | 808 | expr->ident = strndup(state->pos, id - state->pos); |
2309 | 808 | if (expr->ident == NULL) |
2310 | 0 | goto err_nomem; |
2311 | | |
2312 | 808 | push_expr(expr, state); |
2313 | 808 | state->pos = id; |
2314 | 808 | return; |
2315 | 0 | err_nomem: |
2316 | 0 | STATE_ENOMEM; |
2317 | 0 | free_expr(expr); |
2318 | 0 | return; |
2319 | 808 | } |
2320 | | |
2321 | | /* |
2322 | | * PrimaryExpr ::= Literal |
2323 | | * | Number |
2324 | | * | FunctionCall |
2325 | | * | VariableReference |
2326 | | * | '(' Expr ')' |
2327 | | * |
2328 | | */ |
2329 | 8.22M | static void parse_primary_expr(struct state *state) { |
2330 | 8.22M | if (peek(state, "'\"")) { |
2331 | 3.56k | parse_literal(state); |
2332 | 8.22M | } else if (peek(state, "0123456789")) { |
2333 | 8.20M | parse_number(state); |
2334 | 8.20M | } else if (match(state, '(')) { |
2335 | 55 | parse_expr(state); |
2336 | 55 | RET_ON_ERROR; |
2337 | 29 | if (! match(state, ')')) { |
2338 | 9 | STATE_ERROR(state, PATHX_EPAREN); |
2339 | 9 | return; |
2340 | 9 | } |
2341 | 13.0k | } else if (match(state, '$')) { |
2342 | 843 | parse_var(state); |
2343 | 12.1k | } else { |
2344 | 12.1k | parse_function_call(state); |
2345 | 12.1k | } |
2346 | 8.22M | } |
2347 | | |
2348 | 12.3M | static int looking_at_primary_expr(struct state *state) { |
2349 | 12.3M | const char *s = state->pos; |
2350 | | /* Is it a Number, Literal or VariableReference ? */ |
2351 | 12.3M | if (peek(state, "$'\"0123456789")) |
2352 | 8.21M | return 1; |
2353 | | |
2354 | | /* Or maybe a function call, i.e. a word followed by a '(' ? |
2355 | | * Note that our function names are only [a-zA-Z]+ |
2356 | | */ |
2357 | 7.23M | while (*s != '\0' && isalpha(*s)) s++; |
2358 | 13.2M | while (*s != '\0' && isspace(*s)) s++; |
2359 | 4.11M | return *s == '('; |
2360 | 12.3M | } |
2361 | | |
2362 | | /* |
2363 | | * PathExpr ::= LocationPath |
2364 | | * | FilterExpr |
2365 | | * | FilterExpr '/' RelativeLocationPath |
2366 | | * | FilterExpr '//' RelativeLocationPath |
2367 | | * |
2368 | | * FilterExpr ::= PrimaryExpr Predicate |
2369 | | * |
2370 | | * The grammar is ambiguous here: the expression '42' can either be the |
2371 | | * number 42 (a PrimaryExpr) or the RelativeLocationPath 'child::42'. The |
2372 | | * reason for this ambiguity is that we allow node names like '42' in the |
2373 | | * tree; rather than forbid them, we resolve the ambiguity by always |
2374 | | * parsing '42' as a number, and requiring that the user write the |
2375 | | * RelativeLocationPath in a different form, e.g. 'child::42' or './42'. |
2376 | | */ |
2377 | 12.3M | static void parse_path_expr(struct state *state) { |
2378 | 12.3M | struct expr *expr = NULL; |
2379 | 12.3M | struct pred *predicates = NULL; |
2380 | 12.3M | struct locpath *locpath = NULL; |
2381 | | |
2382 | 12.3M | if (looking_at_primary_expr(state)) { |
2383 | 8.22M | parse_primary_expr(state); |
2384 | 8.22M | RET_ON_ERROR; |
2385 | 8.21M | predicates = parse_predicates(state); |
2386 | 8.21M | RET_ON_ERROR; |
2387 | 8.20M | if (match(state, '/')) { |
2388 | 99 | if (match(state, '/')) { |
2389 | 36 | locpath = parse_relative_location_path(state); |
2390 | 36 | if (HAS_ERROR(state)) |
2391 | 24 | goto error; |
2392 | | |
2393 | 12 | struct step *step = make_step(DESCENDANT_OR_SELF, state); |
2394 | 12 | if (HAS_ERROR(state)) |
2395 | 0 | return; |
2396 | 12 | list_cons(locpath->steps, step); |
2397 | 63 | } else { |
2398 | 63 | if (*state->pos == '\0') { |
2399 | 0 | STATE_ERROR(state, PATHX_EEND); |
2400 | 0 | goto error; |
2401 | 0 | } |
2402 | 63 | locpath = parse_relative_location_path(state); |
2403 | 63 | } |
2404 | 99 | } |
2405 | | /* A PathExpr without predicates and locpath is |
2406 | | * just a PrimaryExpr |
2407 | | */ |
2408 | 8.20M | if (predicates == NULL && locpath == NULL) |
2409 | 8.20M | return; |
2410 | | /* To make evaluation easier, we parse something like |
2411 | | * $var[pred] as $var[pred]/. |
2412 | | */ |
2413 | 105 | if (locpath == NULL) { |
2414 | 30 | if (ALLOC(locpath) < 0) |
2415 | 0 | goto error; |
2416 | 30 | if (ALLOC(locpath->steps) < 0) |
2417 | 0 | goto error; |
2418 | 30 | locpath->steps->axis = SELF; |
2419 | 30 | } |
2420 | 105 | if (ALLOC(expr) < 0) |
2421 | 0 | goto error; |
2422 | 105 | expr->tag = E_FILTER; |
2423 | 105 | expr->predicates = predicates; |
2424 | 105 | expr->primary = pop_expr(state); |
2425 | 105 | expr->locpath = locpath; |
2426 | 105 | push_expr(expr, state); |
2427 | 4.10M | } else { |
2428 | 4.10M | parse_location_path(state); |
2429 | 4.10M | } |
2430 | 4.10M | return; |
2431 | 4.10M | error: |
2432 | 24 | free_expr(expr); |
2433 | 24 | free_pred(predicates); |
2434 | 24 | free_locpath(locpath); |
2435 | 24 | return; |
2436 | 12.3M | } |
2437 | | |
2438 | | /* |
2439 | | * UnionExpr ::= PathExpr ('|' PathExpr)* |
2440 | | */ |
2441 | 12.1M | static void parse_union_expr(struct state *state) { |
2442 | 12.1M | parse_path_expr(state); |
2443 | 12.1M | RET_ON_ERROR; |
2444 | 12.1M | while (match(state, '|')) { |
2445 | 180k | parse_path_expr(state); |
2446 | 180k | RET_ON_ERROR; |
2447 | 109k | push_new_binary_op(OP_UNION, state); |
2448 | 109k | } |
2449 | 12.0M | } |
2450 | | |
2451 | | /* |
2452 | | * MultiplicativeExpr ::= UnionExpr ('*' UnionExpr)* |
2453 | | */ |
2454 | 8.10M | static void parse_multiplicative_expr(struct state *state) { |
2455 | 8.10M | parse_union_expr(state); |
2456 | 8.10M | RET_ON_ERROR; |
2457 | 11.9M | while (match(state, '*')) { |
2458 | 4.04M | parse_union_expr(state); |
2459 | 4.04M | RET_ON_ERROR; |
2460 | 4.04M | push_new_binary_op(OP_STAR, state); |
2461 | 4.04M | } |
2462 | 7.92M | } |
2463 | | |
2464 | | /* |
2465 | | * AdditiveExpr ::= MultiplicativeExpr (AdditiveOp MultiplicativeExpr)* |
2466 | | * AdditiveOp ::= '+' | '-' |
2467 | | */ |
2468 | 7.98M | static void parse_additive_expr(struct state *state) { |
2469 | 7.98M | parse_multiplicative_expr(state); |
2470 | 7.98M | RET_ON_ERROR; |
2471 | 7.92M | while (*state->pos == '+' || *state->pos == '-') { |
2472 | 116k | enum binary_op op = (*state->pos == '+') ? OP_PLUS : OP_MINUS; |
2473 | 116k | state->pos += 1; |
2474 | 116k | skipws(state); |
2475 | 116k | parse_multiplicative_expr(state); |
2476 | 116k | RET_ON_ERROR; |
2477 | 58.2k | push_new_binary_op(op, state); |
2478 | 58.2k | } |
2479 | 7.86M | } |
2480 | | |
2481 | | /* |
2482 | | * RelationalExpr ::= AdditiveExpr (RelationalOp AdditiveExpr)? |
2483 | | * RelationalOp ::= ">" | "<" | ">=" | "<=" |
2484 | | */ |
2485 | 7.91M | static void parse_relational_expr(struct state *state) { |
2486 | 7.91M | parse_additive_expr(state); |
2487 | 7.91M | RET_ON_ERROR; |
2488 | 7.80M | if (*state->pos == '<' || *state->pos == '>') { |
2489 | 69.8k | enum binary_op op = (*state->pos == '<') ? OP_LT : OP_GT; |
2490 | 69.8k | state->pos += 1; |
2491 | 69.8k | if (*state->pos == '=') { |
2492 | 0 | op = (op == OP_LT) ? OP_LE : OP_GE; |
2493 | 0 | state->pos += 1; |
2494 | 0 | } |
2495 | 69.8k | skipws(state); |
2496 | 69.8k | parse_additive_expr(state); |
2497 | 69.8k | RET_ON_ERROR; |
2498 | 2.76k | push_new_binary_op(op, state); |
2499 | 2.76k | } |
2500 | 7.80M | } |
2501 | | |
2502 | | /* |
2503 | | * EqualityExpr ::= RelationalExpr (EqualityOp RelationalExpr)? | ReMatchExpr |
2504 | | * EqualityOp ::= "=" | "!=" |
2505 | | * ReMatchExpr ::= RelationalExpr MatchOp RelationalExpr |
2506 | | * MatchOp ::= "=~" | "!~" |
2507 | | */ |
2508 | 7.84M | static void parse_equality_expr(struct state *state) { |
2509 | 7.84M | parse_relational_expr(state); |
2510 | 7.84M | RET_ON_ERROR; |
2511 | 7.73M | if ((*state->pos == '=' || *state->pos == '!') && state->pos[1] == '~') { |
2512 | 67.7k | enum binary_op op = (*state->pos == '=') ? OP_RE_MATCH : OP_RE_NOMATCH; |
2513 | 67.7k | state->pos += 2; |
2514 | 67.7k | skipws(state); |
2515 | 67.7k | parse_relational_expr(state); |
2516 | 67.7k | RET_ON_ERROR; |
2517 | 778 | push_new_binary_op(op, state); |
2518 | 7.66M | } else if (*state->pos == '=' || |
2519 | 7.66M | (*state->pos == '!' && state->pos[1] == '=')) { |
2520 | 5.88k | enum binary_op op = (*state->pos == '=') ? OP_EQ : OP_NEQ; |
2521 | 5.88k | state->pos += (op == OP_EQ) ? 1 : 2; |
2522 | 5.88k | skipws(state); |
2523 | 5.88k | parse_relational_expr(state); |
2524 | 5.88k | RET_ON_ERROR; |
2525 | 306 | push_new_binary_op(op, state); |
2526 | 306 | } |
2527 | 7.73M | } |
2528 | | |
2529 | | /* |
2530 | | * AndExpr ::= EqualityExpr ('and' EqualityExpr)* |
2531 | | */ |
2532 | 7.84M | static void parse_and_expr(struct state *state) { |
2533 | 7.84M | parse_equality_expr(state); |
2534 | 7.84M | RET_ON_ERROR; |
2535 | 7.66M | while (*state->pos == 'a' && state->pos[1] == 'n' |
2536 | 7.66M | && state->pos[2] == 'd') { |
2537 | 1.42k | state->pos += 3; |
2538 | 1.42k | skipws(state); |
2539 | 1.42k | parse_equality_expr(state); |
2540 | 1.42k | RET_ON_ERROR; |
2541 | 856 | push_new_binary_op(OP_AND, state); |
2542 | 856 | } |
2543 | 7.66M | } |
2544 | | |
2545 | | /* |
2546 | | * OrExpr ::= AndExpr ('or' AndExpr)* |
2547 | | */ |
2548 | 7.83M | static void parse_or_expr(struct state *state) { |
2549 | 7.83M | parse_and_expr(state); |
2550 | 7.83M | RET_ON_ERROR; |
2551 | 7.66M | while (*state->pos == 'o' && state->pos[1] == 'r') { |
2552 | 7.58k | state->pos += 2; |
2553 | 7.58k | skipws(state); |
2554 | 7.58k | parse_and_expr(state); |
2555 | 7.58k | RET_ON_ERROR; |
2556 | 7.51k | push_new_binary_op(OP_OR, state); |
2557 | 7.51k | } |
2558 | 7.65M | } |
2559 | | |
2560 | | /* |
2561 | | * ElseExpr ::= OrExpr ('else' OrExpr)* |
2562 | | */ |
2563 | 7.83M | static void parse_else_expr(struct state *state) { |
2564 | 7.83M | parse_or_expr(state); |
2565 | 7.83M | RET_ON_ERROR; |
2566 | 7.65M | while (*state->pos == 'e' && state->pos[1] == 'l' |
2567 | 7.65M | && state->pos[2] == 's' && state->pos[3] == 'e' ) { |
2568 | 3.58k | state->pos += 4; |
2569 | 3.58k | skipws(state); |
2570 | 3.58k | parse_or_expr(state); |
2571 | 3.58k | RET_ON_ERROR; |
2572 | 3.56k | push_new_binary_op(OP_ELSE, state); |
2573 | 3.56k | state->has_else = 1; |
2574 | 3.56k | } |
2575 | 7.65M | } |
2576 | | |
2577 | | /* |
2578 | | * Expr ::= ElseExpr |
2579 | | */ |
2580 | 7.83M | static void parse_expr(struct state *state) { |
2581 | 7.83M | skipws(state); |
2582 | 7.83M | parse_else_expr(state); |
2583 | 7.83M | } |
2584 | | |
2585 | 103k | static void store_error(struct pathx *pathx) { |
2586 | 103k | const char *pathx_msg = NULL; |
2587 | 103k | const char *path = pathx->state->txt; |
2588 | 103k | const pathx_errcode_t errcode = pathx->state->errcode; |
2589 | 103k | struct error *err = pathx->state->error; |
2590 | | |
2591 | 103k | char *pos_str = pathx->state->errmsg; |
2592 | 103k | pathx->state->errmsg = NULL; |
2593 | | |
2594 | 103k | if (err == NULL || errcode == PATHX_NOERROR || err->code != AUG_NOERROR) |
2595 | 101k | return; |
2596 | | |
2597 | 1.58k | switch (errcode) { |
2598 | 0 | case PATHX_ENOMEM: |
2599 | 0 | err->code = AUG_ENOMEM; |
2600 | 0 | break; |
2601 | 0 | case PATHX_EMMATCH: |
2602 | 0 | err->code = AUG_EMMATCH; |
2603 | 0 | break; |
2604 | 0 | case PATHX_ENOMATCH: |
2605 | 0 | err->code = AUG_ENOMATCH; |
2606 | 0 | break; |
2607 | 1.58k | default: |
2608 | 1.58k | err->code = AUG_EPATHX; |
2609 | 1.58k | break; |
2610 | 1.58k | } |
2611 | | |
2612 | | /* We only need details for pathx syntax errors */ |
2613 | 1.58k | if (err->code != AUG_EPATHX) |
2614 | 0 | return; |
2615 | | |
2616 | 1.58k | int pos; |
2617 | 1.58k | pathx_msg = pathx_error(pathx, NULL, &pos); |
2618 | | |
2619 | 1.58k | bool has_msg = pos_str != NULL; |
2620 | 1.58k | int pos_str_len = pos_str == NULL ? 0 : strlen(pos_str); |
2621 | 1.58k | if (REALLOC_N(pos_str, pos_str_len + strlen(path) + 8) >= 0) { |
2622 | 1.58k | if (has_msg) { |
2623 | 27 | strcat(pos_str, " in "); |
2624 | 27 | strncat(pos_str, path, pos); |
2625 | 1.56k | } else { |
2626 | | /* initialize pos_str explicitly, path might be "" */ |
2627 | 1.56k | pos_str[0] = '\0'; |
2628 | 1.56k | strncat(pos_str, path, pos); |
2629 | 1.56k | } |
2630 | 1.58k | strcat(pos_str, "|=|"); |
2631 | 1.58k | strcat(pos_str, path + pos); |
2632 | 1.58k | } |
2633 | | |
2634 | 1.58k | err->minor = errcode; |
2635 | 1.58k | err->details = pos_str; |
2636 | 1.58k | pos_str = NULL; |
2637 | 1.58k | err->minor_details = pathx_msg; |
2638 | 1.58k | } |
2639 | | |
2640 | | int pathx_parse(const struct tree *tree, |
2641 | | struct error *err, |
2642 | | const char *txt, |
2643 | | bool need_nodeset, |
2644 | | struct pathx_symtab *symtab, |
2645 | | struct tree *root_ctx, |
2646 | 103k | struct pathx **pathx) { |
2647 | 103k | struct state *state = NULL; |
2648 | | |
2649 | 103k | *pathx = NULL; |
2650 | | |
2651 | 103k | if (ALLOC(*pathx) < 0) |
2652 | 0 | goto oom; |
2653 | | |
2654 | 103k | (*pathx)->origin = (struct tree *) tree; |
2655 | | |
2656 | | /* Set up state */ |
2657 | 103k | if (ALLOC((*pathx)->state) < 0) |
2658 | 0 | goto oom; |
2659 | 103k | state = (*pathx)->state; |
2660 | | |
2661 | 103k | state->errcode = PATHX_NOERROR; |
2662 | 103k | state->errmsg = NULL; |
2663 | 103k | state->txt = txt; |
2664 | 103k | state->pos = txt; |
2665 | 103k | state->symtab = symtab; |
2666 | 103k | state->root_ctx = root_ctx; |
2667 | 103k | state->error = err; |
2668 | | |
2669 | 103k | if (ALLOC_N(state->value_pool, 8) < 0) { |
2670 | 0 | STATE_ENOMEM; |
2671 | 0 | goto done; |
2672 | 0 | } |
2673 | 103k | state->value_pool_size = 8; |
2674 | 103k | state->value_pool[0].tag = T_BOOLEAN; |
2675 | 103k | state->value_pool[0].boolval = 0; |
2676 | 103k | state->value_pool[1].tag = T_BOOLEAN; |
2677 | 103k | state->value_pool[1].boolval = 1; |
2678 | 103k | state->value_pool_used = 2; |
2679 | | |
2680 | | /* Parse */ |
2681 | 103k | parse_expr(state); |
2682 | 103k | if (HAS_ERROR(state)) |
2683 | 609 | goto done; |
2684 | 102k | if (state->pos != state->txt + strlen(state->txt)) { |
2685 | 357 | STATE_ERROR(state, PATHX_EEND); |
2686 | 357 | goto done; |
2687 | 357 | } |
2688 | | |
2689 | 102k | if (state->exprs_used != 1) { |
2690 | 0 | STATE_ERROR(state, PATHX_EINTERNAL); |
2691 | 0 | goto done; |
2692 | 0 | } |
2693 | | |
2694 | | /* Typecheck */ |
2695 | 102k | check_expr(state->exprs[0], state); |
2696 | 102k | if (HAS_ERROR(state)) |
2697 | 325 | goto done; |
2698 | | |
2699 | 101k | if (need_nodeset && state->exprs[0]->type != T_NODESET) { |
2700 | 271 | STATE_ERROR(state, PATHX_ETYPE); |
2701 | 271 | goto done; |
2702 | 271 | } |
2703 | | |
2704 | 103k | done: |
2705 | 103k | store_error(*pathx); |
2706 | 103k | return state->errcode; |
2707 | 0 | oom: |
2708 | 0 | free_pathx(*pathx); |
2709 | 0 | *pathx = NULL; |
2710 | 0 | if (err != NULL) |
2711 | 0 | err->code = AUG_ENOMEM; |
2712 | 0 | return PATHX_ENOMEM; |
2713 | 101k | } |
2714 | | |
2715 | | /************************************************************************* |
2716 | | * Searching in the tree |
2717 | | *************************************************************************/ |
2718 | | |
2719 | 5.69M | static bool step_matches(struct step *step, struct tree *tree) { |
2720 | 5.69M | if ( step->axis == SEQ && step->name == NULL ) { |
2721 | 682 | if ( tree->label == NULL ) |
2722 | 0 | return false; |
2723 | | /* label matches if it consists of numeric digits only */ |
2724 | 734 | for( char *s = tree->label; *s ; s++) { |
2725 | 682 | if ( ! isdigit(*s) ) |
2726 | 630 | return false; |
2727 | 682 | } |
2728 | 52 | return true; |
2729 | 5.69M | } else if (step->name == NULL) { |
2730 | 4.49M | return step->axis == ROOT || tree->label != NULL; |
2731 | 4.49M | } else { |
2732 | 1.20M | return streqx(step->name, tree->label); |
2733 | 1.20M | } |
2734 | 5.69M | } |
2735 | | |
2736 | 30 | static struct tree *tree_prev(struct tree *pos) { |
2737 | 30 | struct tree *node = NULL; |
2738 | 30 | if (pos != pos->parent->children) { |
2739 | 15 | for (node = pos->parent->children; |
2740 | 15 | node->next != pos; |
2741 | 15 | node = node->next); |
2742 | 15 | } |
2743 | 30 | return node; |
2744 | 30 | } |
2745 | | |
2746 | | /* When the first step doesn't begin with ROOT then use relative root context |
2747 | | * instead. */ |
2748 | | static struct tree *step_root(struct step *step, struct tree *ctx, |
2749 | 2.33M | struct tree *root_ctx) { |
2750 | 2.33M | struct tree *node = NULL; |
2751 | 2.33M | switch (step->axis) { |
2752 | 1.89M | case SELF: |
2753 | 2.08M | case CHILD: |
2754 | 2.08M | case SEQ: |
2755 | 2.08M | case DESCENDANT: |
2756 | 2.08M | case PARENT: |
2757 | 2.08M | case ANCESTOR: |
2758 | 2.08M | case PRECEDING_SIBLING: |
2759 | 2.08M | case FOLLOWING_SIBLING: |
2760 | | /* only use root_ctx when ctx is the absolute tree root */ |
2761 | 2.08M | if (ctx == ctx->parent && root_ctx != NULL) |
2762 | 2.35k | node = root_ctx; |
2763 | 2.08M | else |
2764 | 2.08M | node = ctx; |
2765 | 2.08M | break; |
2766 | 160k | case ROOT: |
2767 | 254k | case DESCENDANT_OR_SELF: |
2768 | 254k | node = ctx; |
2769 | 254k | break; |
2770 | 0 | default: |
2771 | 0 | assert(0); |
2772 | 2.33M | } |
2773 | 2.33M | if (node == NULL) |
2774 | 0 | return NULL; |
2775 | 2.33M | return node; |
2776 | 2.33M | } |
2777 | | |
2778 | 3.95M | static struct tree *step_first(struct step *step, struct tree *ctx) { |
2779 | 3.95M | struct tree *node = NULL; |
2780 | 3.95M | switch (step->axis) { |
2781 | 2.03M | case SELF: |
2782 | 2.12M | case DESCENDANT_OR_SELF: |
2783 | 2.12M | node = ctx; |
2784 | 2.12M | break; |
2785 | 1.66M | case CHILD: |
2786 | 1.66M | case SEQ: |
2787 | 1.66M | case DESCENDANT: |
2788 | 1.66M | node = ctx->children; |
2789 | 1.66M | break; |
2790 | 6 | case PARENT: |
2791 | 12 | case ANCESTOR: |
2792 | 12 | node = ctx->parent; |
2793 | 12 | break; |
2794 | 160k | case ROOT: |
2795 | 343k | while (ctx->parent != ctx) |
2796 | 182k | ctx = ctx->parent; |
2797 | 160k | node = ctx; |
2798 | 160k | break; |
2799 | 15 | case PRECEDING_SIBLING: |
2800 | 15 | node = tree_prev(ctx); |
2801 | 15 | break; |
2802 | 6 | case FOLLOWING_SIBLING: |
2803 | 6 | node = ctx->next; |
2804 | 6 | break; |
2805 | 0 | default: |
2806 | 0 | assert(0); |
2807 | 3.95M | } |
2808 | 3.95M | if (node == NULL) |
2809 | 638k | return NULL; |
2810 | 3.31M | if (step_matches(step, node)) |
2811 | 3.01M | return node; |
2812 | 299k | return step_next(step, ctx, node); |
2813 | 3.31M | } |
2814 | | |
2815 | | static struct tree *step_next(struct step *step, struct tree *ctx, |
2816 | 4.98M | struct tree *node) { |
2817 | 9.01M | while (node != NULL) { |
2818 | 5.69M | switch (step->axis) { |
2819 | 2.03M | case SELF: |
2820 | 2.03M | node = NULL; |
2821 | 2.03M | break; |
2822 | 850 | case SEQ: |
2823 | 2.20M | case CHILD: |
2824 | 2.20M | node = node->next; |
2825 | 2.20M | break; |
2826 | 0 | case DESCENDANT: |
2827 | 1.30M | case DESCENDANT_OR_SELF: |
2828 | 1.30M | if (node->children != NULL) { |
2829 | 708k | node = node->children; |
2830 | 708k | } else { |
2831 | 1.30M | while (node->next == NULL && node != ctx) |
2832 | 708k | node = node->parent; |
2833 | 593k | if (node == ctx) |
2834 | 97.3k | node = NULL; |
2835 | 496k | else |
2836 | 496k | node = node->next; |
2837 | 593k | } |
2838 | 1.30M | break; |
2839 | 6 | case PARENT: |
2840 | 160k | case ROOT: |
2841 | 160k | node = NULL; |
2842 | 160k | break; |
2843 | 6 | case ANCESTOR: |
2844 | 6 | if (node->parent == node) |
2845 | 6 | node = NULL; |
2846 | 0 | else |
2847 | 0 | node = node->parent; |
2848 | 6 | break; |
2849 | 15 | case PRECEDING_SIBLING: |
2850 | 15 | node = tree_prev(node); |
2851 | 15 | break; |
2852 | 0 | case FOLLOWING_SIBLING: |
2853 | 0 | node = node->next; |
2854 | 0 | break; |
2855 | 0 | default: |
2856 | 0 | assert(0); |
2857 | 5.69M | } |
2858 | 5.69M | if (node != NULL && step_matches(step, node)) |
2859 | 1.66M | break; |
2860 | 5.69M | } |
2861 | 4.98M | return node; |
2862 | 4.98M | } |
2863 | | |
2864 | 103k | static struct value *pathx_eval(struct pathx *pathx) { |
2865 | 103k | struct state *state = pathx->state; |
2866 | 103k | state->ctx = pathx->origin; |
2867 | 103k | state->ctx_pos = 1; |
2868 | 103k | state->ctx_len = 1; |
2869 | 103k | eval_expr(state->exprs[0], state); |
2870 | 103k | if (HAS_ERROR(state)) |
2871 | 27 | return NULL; |
2872 | | |
2873 | 103k | if (state->values_used != 1) { |
2874 | 0 | STATE_ERROR(state, PATHX_EINTERNAL); |
2875 | 0 | return NULL; |
2876 | 0 | } |
2877 | 103k | return pop_value(state); |
2878 | 103k | } |
2879 | | |
2880 | 12.5k | struct tree *pathx_next(struct pathx *pathx) { |
2881 | 12.5k | if (pathx->node + 1 < pathx->nodeset->used) |
2882 | 12.3k | return pathx->nodeset->nodes[++pathx->node]; |
2883 | 154 | return NULL; |
2884 | 12.5k | } |
2885 | | |
2886 | | /* Find the first node in TREE matching PATH. */ |
2887 | 100k | struct tree *pathx_first(struct pathx *pathx) { |
2888 | 100k | if (pathx->nodeset == NULL) { |
2889 | 69.3k | struct value *v = pathx_eval(pathx); |
2890 | | |
2891 | 69.3k | if (HAS_ERROR(pathx->state)) |
2892 | 25 | goto error; |
2893 | 69.3k | assert(v->tag == T_NODESET); |
2894 | 69.3k | pathx->nodeset = v->nodeset; |
2895 | 69.3k | } |
2896 | 100k | pathx->node = 0; |
2897 | 100k | if (pathx->nodeset->used == 0) |
2898 | 3.83k | return NULL; |
2899 | 96.4k | else |
2900 | 96.4k | return pathx->nodeset->nodes[0]; |
2901 | 25 | error: |
2902 | 25 | store_error(pathx); |
2903 | 25 | return NULL; |
2904 | 100k | } |
2905 | | |
2906 | | /* Find a node in the tree that matches the longest prefix of PATH. |
2907 | | * |
2908 | | * Return 1 if a node was found that exactly matches PATH, 0 if an incomplete |
2909 | | * prefix matches, and -1 if more than one node in the tree match. |
2910 | | * |
2911 | | * TMATCH is set to the tree node that matches, and SMATCH to the next step |
2912 | | * after the one where TMATCH matched. If no node matches or multiple nodes |
2913 | | * at the same depth match, TMATCH and SMATCH will be NULL. When exactly |
2914 | | * one node matches, TMATCH will be that node, and SMATCH will be NULL. |
2915 | | */ |
2916 | | static int locpath_search(struct locpath_trace *lpt, |
2917 | 33.6k | struct tree **tmatch, struct step **smatch) { |
2918 | 33.6k | int last; |
2919 | 33.6k | int result = -1; |
2920 | | |
2921 | 72.3k | for (last=lpt->maxns; last >= 0 && lpt->ns[last]->used == 0; last--); |
2922 | 33.6k | if (last < 0) { |
2923 | 0 | *smatch = lpt->lp->steps; |
2924 | 0 | result = 1; |
2925 | 0 | goto done; |
2926 | 0 | } |
2927 | 33.6k | if (lpt->ns[last]->used > 1) { |
2928 | 0 | result = -1; |
2929 | 0 | goto done; |
2930 | 0 | } |
2931 | 33.6k | result = 0; |
2932 | 33.6k | *tmatch = lpt->ns[last]->nodes[0]; |
2933 | 33.6k | *smatch = lpt->lp->steps; |
2934 | 144k | for (int i=0; i < last; i++) |
2935 | 111k | *smatch = (*smatch)->next; |
2936 | 33.6k | done: |
2937 | 183k | for (int i=0; i < lpt->maxns; i++) |
2938 | 149k | free_nodeset(lpt->ns[i]); |
2939 | 33.6k | FREE(lpt->ns); |
2940 | 33.6k | return result; |
2941 | 33.6k | } |
2942 | | |
2943 | | static char *step_seq_choose_name(struct pathx *path, struct tree *tree); |
2944 | | |
2945 | | /* Expand the tree ROOT so that it contains all components of PATH. PATH |
2946 | | * must have been initialized against ROOT by a call to PATH_FIND_ONE. |
2947 | | * |
2948 | | * Return the first segment that was created by this operation, or NULL on |
2949 | | * error. |
2950 | | */ |
2951 | 33.6k | int pathx_expand_tree(struct pathx *path, struct tree **tree) { |
2952 | 33.6k | int r; |
2953 | 33.6k | struct step *step = NULL; |
2954 | 33.6k | struct locpath_trace lpt; |
2955 | 33.6k | struct tree *first_child = NULL; |
2956 | 33.6k | struct value *v = NULL; |
2957 | | |
2958 | 33.6k | MEMZERO(&lpt, 1); |
2959 | 33.6k | path->state->locpath_trace = &lpt; |
2960 | 33.6k | v = pathx_eval(path); |
2961 | 33.6k | path->state->locpath_trace = NULL; |
2962 | 33.6k | if (HAS_ERROR(path->state)) |
2963 | 0 | goto error; |
2964 | | |
2965 | 33.6k | if (lpt.maxns == 0) { |
2966 | 0 | if (v->tag != T_NODESET || v->nodeset->used == 0) { |
2967 | 0 | STATE_ERROR(path->state, PATHX_ENOMATCH); |
2968 | 0 | goto error; |
2969 | 0 | } |
2970 | 0 | if (v->nodeset->used > 1) |
2971 | 0 | goto error; |
2972 | 0 | *tree = v->nodeset->nodes[0]; |
2973 | 0 | return 0; |
2974 | 0 | } |
2975 | | |
2976 | 33.6k | *tree = path->origin; |
2977 | 33.6k | r = locpath_search(&lpt, tree, &step); |
2978 | 33.6k | if (r == -1) { |
2979 | 0 | STATE_ERROR(path->state, PATHX_EMMATCH); |
2980 | 0 | goto error; |
2981 | 0 | } |
2982 | | |
2983 | 33.6k | if (step == NULL) |
2984 | 1.68k | return 0; |
2985 | | |
2986 | 31.9k | struct tree *parent = *tree; |
2987 | 31.9k | if (parent == NULL) |
2988 | 0 | parent = path->origin; |
2989 | | |
2990 | 38.6k | list_for_each(s, step) { |
2991 | 38.6k | if (s->axis != CHILD && s->axis != SEQ) |
2992 | 0 | goto error; |
2993 | 38.6k | if (s->axis==SEQ && s->name == NULL) { |
2994 | 0 | s->name = step_seq_choose_name(path, parent); |
2995 | 0 | } |
2996 | 38.6k | if (s->name == NULL ) |
2997 | 0 | goto error; |
2998 | 38.6k | struct tree *t = make_tree(strdup(s->name), NULL, parent, NULL); |
2999 | 38.6k | if (first_child == NULL) |
3000 | 31.9k | first_child = t; |
3001 | 38.6k | if (t == NULL || t->label == NULL) |
3002 | 0 | goto error; |
3003 | 38.6k | list_append(parent->children, t); |
3004 | 0 | parent = t; |
3005 | 38.6k | } |
3006 | | |
3007 | 38.6k | while (first_child->children != NULL) |
3008 | 6.72k | first_child = first_child->children; |
3009 | | |
3010 | 31.9k | *tree = first_child; |
3011 | 31.9k | return 1; |
3012 | | |
3013 | 0 | error: |
3014 | 0 | if (first_child != NULL) { |
3015 | 0 | list_remove(first_child, first_child->parent->children); |
3016 | 0 | free_tree(first_child); |
3017 | 0 | } |
3018 | 0 | *tree = NULL; |
3019 | 0 | store_error(path); |
3020 | 0 | return -1; |
3021 | 31.9k | } |
3022 | | |
3023 | | /* Generate a numeric string to use for step->name |
3024 | | * Scan tree->children for the highest numbered label, and add 1 to that |
3025 | | * numeric labels may be interspersed with #comment or other labels |
3026 | | */ |
3027 | 0 | static char *step_seq_choose_name(struct pathx *path, struct tree *tree) { |
3028 | 0 | unsigned long int max_node_n=0; |
3029 | 0 | unsigned long int node_n; |
3030 | 0 | char *step_name; |
3031 | 0 | char *label_end; |
3032 | 0 | for(tree=tree->children; tree!=NULL; tree=tree->next) { |
3033 | 0 | if ( tree->label == NULL) |
3034 | 0 | continue; |
3035 | 0 | node_n=strtoul(tree->label, &label_end, 10); |
3036 | 0 | if ( label_end == tree->label || *label_end != '\0' ) |
3037 | | /* label is not a number - ignore it */ |
3038 | 0 | continue; |
3039 | 0 | if ( node_n >= ULONG_MAX ) { |
3040 | 0 | STATE_ERROR(path->state, PATHX_ENUMBER); |
3041 | 0 | return NULL; |
3042 | 0 | } |
3043 | 0 | if( node_n > max_node_n ) |
3044 | 0 | max_node_n = node_n; |
3045 | 0 | } |
3046 | 0 | if (asprintf(&step_name,"%lu",max_node_n+1) >= 0) |
3047 | 0 | return step_name; |
3048 | 0 | else |
3049 | 0 | return NULL; |
3050 | 0 | } |
3051 | | |
3052 | 67.0k | int pathx_find_one(struct pathx *path, struct tree **tree) { |
3053 | 67.0k | *tree = pathx_first(path); |
3054 | 67.0k | if (HAS_ERROR(path->state)) |
3055 | 19 | return -1; |
3056 | 67.0k | return path->nodeset->used; |
3057 | 67.0k | } |
3058 | | |
3059 | 0 | struct error *err_of_pathx(struct pathx *px) { |
3060 | 0 | return px->state->error; |
3061 | 0 | } |
3062 | | |
3063 | 1.58k | const char *pathx_error(struct pathx *path, const char **txt, int *pos) { |
3064 | 1.58k | int errcode = PATHX_ENOMEM; |
3065 | | |
3066 | 1.58k | if (path != NULL) { |
3067 | 1.58k | if (path->state->errcode < ARRAY_CARDINALITY(errcodes)) |
3068 | 1.58k | errcode = path->state->errcode; |
3069 | 0 | else |
3070 | 0 | errcode = PATHX_EINTERNAL; |
3071 | | |
3072 | 1.58k | if (txt) |
3073 | 0 | *txt = path->state->txt; |
3074 | | |
3075 | 1.58k | if (pos) |
3076 | 1.58k | *pos = path->state->pos - path->state->txt; |
3077 | 1.58k | } |
3078 | 1.58k | return errcodes[errcode]; |
3079 | 1.58k | } |
3080 | | |
3081 | | /* |
3082 | | * Symbol tables |
3083 | | */ |
3084 | | static struct pathx_symtab |
3085 | | *make_symtab(struct pathx_symtab *symtab, const char *name, |
3086 | | struct value *value) |
3087 | 92 | { |
3088 | 92 | struct pathx_symtab *new; |
3089 | 92 | char *n = NULL; |
3090 | | |
3091 | 92 | n = strdup(name); |
3092 | 92 | if (n == NULL) |
3093 | 0 | return NULL; |
3094 | | |
3095 | 92 | if (ALLOC(new) < 0) { |
3096 | 0 | free(n); |
3097 | 0 | return NULL; |
3098 | 0 | } |
3099 | 92 | new->name = n; |
3100 | 92 | new->value = value; |
3101 | 92 | if (symtab == NULL) { |
3102 | 92 | return new; |
3103 | 92 | } else { |
3104 | 0 | new->next = symtab->next; |
3105 | 0 | symtab->next = new; |
3106 | 0 | } |
3107 | 0 | return symtab; |
3108 | 92 | } |
3109 | | |
3110 | 1.68k | void free_symtab(struct pathx_symtab *symtab) { |
3111 | | |
3112 | 1.77k | while (symtab != NULL) { |
3113 | 92 | struct pathx_symtab *del = symtab; |
3114 | 92 | symtab = del->next; |
3115 | 92 | free(del->name); |
3116 | 92 | release_value(del->value); |
3117 | 92 | free(del->value); |
3118 | 92 | free(del); |
3119 | 92 | } |
3120 | 1.68k | } |
3121 | | |
3122 | 0 | struct pathx_symtab *pathx_get_symtab(struct pathx *pathx) { |
3123 | 0 | return pathx->state->symtab; |
3124 | 0 | } |
3125 | | |
3126 | | static int pathx_symtab_set(struct pathx_symtab **symtab, |
3127 | 92 | const char *name, struct value *v) { |
3128 | 92 | int found = 0; |
3129 | | |
3130 | 92 | list_for_each(tab, *symtab) { |
3131 | 0 | if (STREQ(tab->name, name)) { |
3132 | 0 | release_value(tab->value); |
3133 | 0 | free(tab->value); |
3134 | 0 | tab->value = v; |
3135 | 0 | found = 1; |
3136 | 0 | break; |
3137 | 0 | } |
3138 | 0 | } |
3139 | | |
3140 | 92 | if (!found) { |
3141 | 92 | struct pathx_symtab *new = NULL; |
3142 | | |
3143 | 92 | new = make_symtab(*symtab, name, v); |
3144 | 92 | if (new == NULL) |
3145 | 0 | goto error; |
3146 | 92 | *symtab = new; |
3147 | 92 | } |
3148 | 92 | return 0; |
3149 | 0 | error: |
3150 | 0 | return -1; |
3151 | 92 | } |
3152 | | |
3153 | | int pathx_symtab_define(struct pathx_symtab **symtab, |
3154 | 94 | const char *name, struct pathx *px) { |
3155 | 94 | int r; |
3156 | 94 | struct value *value = NULL, *v = NULL; |
3157 | 94 | struct state *state = px->state; |
3158 | | |
3159 | 94 | value = pathx_eval(px); |
3160 | 94 | if (HAS_ERROR(px->state)) |
3161 | 2 | goto error; |
3162 | | |
3163 | 92 | if (ALLOC(v) < 0) { |
3164 | 0 | STATE_ENOMEM; |
3165 | 0 | goto error; |
3166 | 0 | } |
3167 | | |
3168 | 92 | *v = *value; |
3169 | 92 | value->tag = T_BOOLEAN; |
3170 | | |
3171 | 92 | r = pathx_symtab_set(symtab, name, v); |
3172 | 92 | if (r < 0) { |
3173 | 0 | STATE_ENOMEM; |
3174 | 0 | goto error; |
3175 | 0 | } |
3176 | | |
3177 | 92 | if (v->tag == T_NODESET) |
3178 | 60 | return v->nodeset->used; |
3179 | 32 | else |
3180 | 32 | return 0; |
3181 | 2 | error: |
3182 | 2 | release_value(value); |
3183 | 2 | free(value); |
3184 | 2 | release_value(v); |
3185 | 2 | free(v); |
3186 | 2 | store_error(px); |
3187 | 2 | return -1; |
3188 | 92 | } |
3189 | | |
3190 | 0 | int pathx_symtab_undefine(struct pathx_symtab **symtab, const char *name) { |
3191 | 0 | struct pathx_symtab *del = NULL; |
3192 | |
|
3193 | 0 | for(del = *symtab; |
3194 | 0 | del != NULL && !STREQ(del->name, name); |
3195 | 0 | del = del->next); |
3196 | 0 | if (del == NULL) |
3197 | 0 | return 0; |
3198 | 0 | list_remove(del, *symtab); |
3199 | 0 | free_symtab(del); |
3200 | 0 | return 0; |
3201 | 0 | } |
3202 | | |
3203 | | int pathx_symtab_assign_tree(struct pathx_symtab **symtab, |
3204 | 0 | const char *name, struct tree *tree) { |
3205 | 0 | struct value *v = NULL; |
3206 | 0 | int r; |
3207 | |
|
3208 | 0 | if (ALLOC(v) < 0) |
3209 | 0 | goto error; |
3210 | | |
3211 | 0 | v->tag = T_NODESET; |
3212 | 0 | if (ALLOC(v->nodeset) < 0) |
3213 | 0 | goto error; |
3214 | 0 | if (ALLOC_N(v->nodeset->nodes, 1) < 0) |
3215 | 0 | goto error; |
3216 | 0 | v->nodeset->used = 1; |
3217 | 0 | v->nodeset->size = 1; |
3218 | 0 | v->nodeset->nodes[0] = tree; |
3219 | |
|
3220 | 0 | r = pathx_symtab_set(symtab, name, v); |
3221 | 0 | if (r < 0) |
3222 | 0 | goto error; |
3223 | 0 | return 1; |
3224 | 0 | error: |
3225 | 0 | release_value(v); |
3226 | 0 | free(v); |
3227 | 0 | return -1; |
3228 | 0 | } |
3229 | | |
3230 | | int |
3231 | 0 | pathx_symtab_count(const struct pathx_symtab *symtab, const char *name) { |
3232 | 0 | struct value *v = lookup_var(name, symtab); |
3233 | |
|
3234 | 0 | if (v == NULL || v->tag != T_NODESET) |
3235 | 0 | return -1; |
3236 | | |
3237 | 0 | return v->nodeset->used; |
3238 | 0 | } |
3239 | | |
3240 | | struct tree * |
3241 | | pathx_symtab_get_tree(struct pathx_symtab *symtab, |
3242 | 0 | const char *name, int i) { |
3243 | 0 | struct value *v = lookup_var(name, symtab); |
3244 | 0 | if (v == NULL) |
3245 | 0 | return NULL; |
3246 | 0 | if (v->tag != T_NODESET) |
3247 | 0 | return NULL; |
3248 | 0 | if (i >= v->nodeset->used) |
3249 | 0 | return NULL; |
3250 | 0 | return v->nodeset->nodes[i]; |
3251 | 0 | } |
3252 | | |
3253 | | void pathx_symtab_remove_descendants(struct pathx_symtab *symtab, |
3254 | 0 | const struct tree *tree) { |
3255 | 0 | list_for_each(tab, symtab) { |
3256 | 0 | if (tab->value->tag != T_NODESET) |
3257 | 0 | continue; |
3258 | 0 | struct nodeset *ns = tab->value->nodeset; |
3259 | 0 | for (int i=0; i < ns->used;) { |
3260 | 0 | struct tree *t = ns->nodes[i]; |
3261 | 0 | while (t != t->parent && t != tree) |
3262 | 0 | t = t->parent; |
3263 | 0 | if (t == tree) |
3264 | 0 | ns_remove(ns, i, 1); |
3265 | 0 | else |
3266 | 0 | i += 1; |
3267 | 0 | } |
3268 | 0 | } |
3269 | 0 | } |
3270 | | |
3271 | | /* |
3272 | | * Local variables: |
3273 | | * indent-tabs-mode: nil |
3274 | | * c-indent-level: 4 |
3275 | | * c-basic-offset: 4 |
3276 | | * tab-width: 4 |
3277 | | * End: |
3278 | | */ |