Coverage Report

Created: 2026-06-10 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/hts_expr.c
Line
Count
Source
1
/*  hts_expr.c -- filter expression parsing and processing.
2
3
    Copyright (C) 2020-2022, 2024 Genome Research Ltd.
4
5
    Author: James Bonfield <jkb@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notices and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
// TODO:
26
// - ?: operator for conditionals?
27
28
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
29
#include <config.h>
30
31
#include <stdio.h>
32
#include <string.h>
33
#include <ctype.h>
34
#include <stdlib.h>
35
#include <stdint.h>
36
#include <float.h>
37
#include <regex.h>
38
#include <math.h>
39
40
#include "htslib/hts_expr.h"
41
#include "htslib/hts_alloc.h"
42
#include "htslib/hts_log.h"
43
#include "textutils_internal.h"
44
45
// Could also cache hts_expr_val_t stack here for kstring reuse?
46
0
#define MAX_REGEX 10
47
struct hts_filter_t {
48
    char *str;
49
    int parsed;
50
    int curr_regex, max_regex;
51
    regex_t preg[MAX_REGEX];
52
};
53
54
/*
55
 * This is designed to be mostly C like with mostly same the precedence rules,
56
 * with the exception of bit operators (widely considered as a mistake in C).
57
 * It's not full C (eg no bit-shifting), but good enough for our purposes.
58
 *
59
 * Supported syntax, in order of precedence:
60
 *
61
 * Grouping:      (, ),   eg "(1+2)*3"
62
 * Values:        integers, floats, strings or variables
63
 * Unary ops:     +, -, !, ~  eg -10 +10, !10 (0), ~5 (bitwise not)
64
 * Math ops:      *, /, %  [TODO: add // for floor division?]
65
 * Math ops:      +, -
66
 * Bit-wise:      &, ^, |  [NB as 3 precedence levels, in that order]
67
 * Conditionals:  >, >=, <, <=,
68
 * Equality:      ==, !=, =~, !~
69
 * Boolean:       &&, ||
70
 */
71
72
// Skip to start of term
73
0
static char *ws(char *str) {
74
0
    while (*str && (*str == ' ' || *str == '\t'))
75
0
        str++;
76
0
    return str;
77
0
}
78
79
static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
80
                      char *str, char **end, hts_expr_val_t *res);
81
82
/*
83
 * Simple functions operating on strings only.
84
 * length, min, max, avg.
85
 *
86
 * All return 0 on success,
87
 *           -1 on failure
88
 */
89
0
static int expr_func_length(hts_expr_val_t *res) {
90
0
    if (!res->is_str)
91
0
        return -1;
92
93
0
    res->is_str = 0;
94
0
    res->d = res->s.l;
95
0
    return 0;
96
0
}
97
98
0
static int expr_func_min(hts_expr_val_t *res) {
99
0
    if (!res->is_str)
100
0
        return -1;
101
102
0
    size_t l = res->s.l;
103
0
    int v = INT_MAX;
104
0
    const uint8_t *x = (uint8_t *)res->s.s;
105
0
    for (l = 0; l < res->s.l; l++)
106
0
        if (v > x[l])
107
0
            v = x[l];
108
109
0
    res->is_str = 0;
110
0
    res->d = v == INT_MAX ? NAN : v;
111
112
0
    return 0;
113
0
}
114
115
0
static int expr_func_max(hts_expr_val_t *res) {
116
0
    if (!res->is_str)
117
0
        return -1;
118
119
0
    size_t l = res->s.l;
120
0
    int v = INT_MIN;
121
0
    const uint8_t *x = (uint8_t *)res->s.s;
122
0
    for (l = 0; l < res->s.l; l++)
123
0
        if (v < x[l])
124
0
            v = x[l];
125
126
0
    res->is_str = 0;
127
0
    res->d = v == INT_MIN ? NAN : v;
128
129
0
    return 0;
130
0
}
131
132
0
static int expr_func_avg(hts_expr_val_t *res) {
133
0
    if (!res->is_str)
134
0
        return -1;
135
136
0
    size_t l = res->s.l;
137
0
    double v = 0;
138
0
    const uint8_t *x = (uint8_t *)res->s.s;
139
0
    for (l = 0; l < res->s.l; l++)
140
0
        v += x[l];
141
0
    if (l)
142
0
        v /= l;
143
144
0
    res->is_str = 0;
145
0
    res->d = v;
146
147
0
    return 0;
148
0
}
149
150
/*
151
 * functions:  FUNC(expr).
152
 * Note for simplicity of parsing, the "(" must immediately follow FUNC,
153
 * so "FUNC (x)" is invalid.
154
 */
155
static int func_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
156
0
                     char *str, char **end, hts_expr_val_t *res) {
157
0
    int func_ok = -1;
158
0
    switch (*str) {
159
0
    case 'a':
160
0
        if (strncmp(str, "avg(", 4) == 0) {
161
0
            if (expression(filt, data, fn, str+4, end, res)) return -1;
162
0
            func_ok = expr_func_avg(res);
163
0
        }
164
0
        break;
165
166
0
    case 'd':
167
0
        if (strncmp(str, "default(", 8) == 0) {
168
0
            if (expression(filt, data, fn, str+8, end, res)) return -1;
169
0
            if (**end != ',')
170
0
                return -1;
171
0
            (*end)++;
172
0
            hts_expr_val_t val = HTS_EXPR_VAL_INIT;
173
0
            if (expression(filt, data, fn, ws(*end), end, &val)) return -1;
174
0
            func_ok = 1;
175
0
            if (!hts_expr_val_existsT(res)) {
176
0
                kstring_t swap = res->s;
177
0
                *res = val;
178
0
                val.s = swap;
179
0
                hts_expr_val_free(&val);
180
0
            }
181
0
        }
182
0
        break;
183
184
0
    case 'e':
185
0
        if (strncmp(str, "exists(", 7) == 0) {
186
0
            if (expression(filt, data, fn, str+7, end, res)) return -1;
187
0
            func_ok = 1;
188
0
            res->is_true = res->d = hts_expr_val_existsT(res);
189
0
            res->is_str = 0;
190
0
        } else if (strncmp(str, "exp(", 4) == 0) {
191
0
            if (expression(filt, data, fn, str+4, end, res)) return -1;
192
0
            func_ok = 1;
193
0
            res->d = exp(res->d);
194
0
            res->is_str = 0;
195
0
            if (isnan(res->d))
196
0
                hts_expr_val_undef(res);
197
0
        }
198
199
0
        break;
200
201
0
    case 'l':
202
0
        if (strncmp(str, "length(", 7) == 0) {
203
0
            if (expression(filt, data, fn, str+7, end, res)) return -1;
204
0
            func_ok = expr_func_length(res);
205
0
        } else if (strncmp(str, "log(", 4) == 0) {
206
0
            if (expression(filt, data, fn, str+4, end, res)) return -1;
207
0
            func_ok = 1;
208
0
            res->d = log(res->d);
209
0
            res->is_str = 0;
210
0
            if (isnan(res->d))
211
0
                hts_expr_val_undef(res);
212
0
        }
213
0
        break;
214
215
0
    case 'm':
216
0
        if (strncmp(str, "min(", 4) == 0) {
217
0
            if (expression(filt, data, fn, str+4, end, res)) return -1;
218
0
            func_ok = expr_func_min(res);
219
0
        } else if (strncmp(str, "max(", 4) == 0) {
220
0
            if (expression(filt, data, fn, str+4, end, res)) return -1;
221
0
            func_ok = expr_func_max(res);
222
0
        }
223
0
        break;
224
225
0
    case 'p':
226
0
        if (strncmp(str, "pow(", 4) == 0) {
227
0
            if (expression(filt, data, fn, str+4, end, res)) return -1;
228
0
            func_ok = 1;
229
230
0
            if (**end != ',')
231
0
                return -1;
232
0
            (*end)++;
233
0
            hts_expr_val_t val = HTS_EXPR_VAL_INIT;
234
0
            if (expression(filt, data, fn, ws(*end), end, &val)) return -1;
235
0
            if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
236
0
                hts_expr_val_undef(res);
237
0
            } else if (res->is_str || val.is_str) {
238
0
                hts_expr_val_free(&val); // arith on strings
239
0
                return -1;
240
0
            } else {
241
0
                func_ok = 1;
242
0
                res->d = pow(res->d, val.d);
243
0
                hts_expr_val_free(&val);
244
0
                res->is_str = 0;
245
0
            }
246
247
0
            if (isnan(res->d))
248
0
                hts_expr_val_undef(res);
249
0
        }
250
0
        break;
251
252
0
    case 's':
253
0
        if (strncmp(str, "sqrt(", 5) == 0) {
254
0
            if (expression(filt, data, fn, str+5, end, res)) return -1;
255
0
            func_ok = 1;
256
0
            res->d = sqrt(res->d);
257
0
            res->is_str = 0;
258
0
            if (isnan(res->d))
259
0
                hts_expr_val_undef(res);
260
0
        }
261
0
        break;
262
0
    }
263
264
0
    if (func_ok < 0)
265
0
        return -1;
266
267
0
    str = ws(*end);
268
0
    if (*str != ')') {
269
0
        fprintf(stderr, "Missing ')'\n");
270
0
        return -1;
271
0
    }
272
0
    *end = str+1;
273
274
0
    return 0;
275
0
}
276
277
/*
278
 * simple_expr
279
 *     : identifier
280
 *     | constant
281
 *     | string
282
 *     | func_expr
283
 *     | '(' expression ')'
284
*/
285
static int simple_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
286
0
                       char *str, char **end, hts_expr_val_t *res) {
287
    // Main recursion step
288
0
    str = ws(str);
289
0
    if (*str == '(') {
290
0
        if (expression(filt, data, fn, str+1, end, res)) return -1;
291
0
        str = ws(*end);
292
0
        if (*str != ')') {
293
0
            fprintf(stderr, "Missing ')'\n");
294
0
            return -1;
295
0
        }
296
0
        *end = str+1;
297
298
0
        return 0;
299
0
    }
300
301
    // Otherwise a basic element.
302
0
    int fail = 0;
303
0
    double d = hts_str2dbl(str, end, &fail);
304
0
    if (str != *end) {
305
0
        res->is_str = 0;
306
0
        res->d = d;
307
0
    } else {
308
        // Not valid floating point syntax.
309
        // TODO: add function call names in here; len(), sqrt(), pow(), etc
310
0
        if (*str == '"') {
311
0
            res->is_str = 1;
312
0
            char *e = str+1;
313
0
            int backslash = 0;
314
0
            while (*e && *e != '"') {
315
0
                if (*e == '\\')
316
0
                    backslash=1, e+=1+(e[1]!='\0');
317
0
                else
318
0
                    e++;
319
0
            }
320
321
0
            kputsn(str+1, e-(str+1), ks_clear(&res->s));
322
0
            if (backslash) {
323
0
                size_t i, j;
324
0
                for (i = j = 0; i < res->s.l; i++) {
325
0
                    res->s.s[j++] = res->s.s[i];
326
0
                    if (res->s.s[i] == '\\') {
327
0
                        switch (res->s.s[++i]) {
328
0
                        case '"': res->s.s[j-1] = '"'; break;
329
0
                        case '\\':res->s.s[j-1] = '\\'; break;
330
0
                        case 't': res->s.s[j-1] = '\t'; break;
331
0
                        case 'n': res->s.s[j-1] = '\n'; break;
332
0
                        case 'r': res->s.s[j-1] = '\r'; break;
333
0
                        default:  res->s.s[j++] = res->s.s[i];
334
0
                        }
335
0
                    }
336
0
                }
337
0
                res->s.s[j] = 0;
338
0
                res->s.l = j;
339
0
            }
340
0
            if (*e != '"')
341
0
                return -1;
342
0
            *end = e+1;
343
0
        } else if (fn) {
344
            // Try lookup as variable, if not as function
345
0
            if (fn(data, str, end, res) == 0)
346
0
                return 0;
347
0
            else
348
0
                return func_expr(filt, data, fn, str, end, res);
349
0
        } else {
350
0
            return -1;
351
0
        }
352
0
    }
353
354
0
    return 0;
355
0
}
356
357
/*
358
 * unary_expr
359
 *     : simple_expr
360
 *     | '+' simple_expr
361
 *     | '-' simple_expr
362
 *     | '!' unary_expr // higher precedence
363
 *     | '~' unary_expr // higher precedence
364
 */
365
static int unary_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
366
0
                      char *str, char **end, hts_expr_val_t *res) {
367
0
    int err;
368
0
    str = ws(str);
369
0
    if (*str == '+' || *str == '-') {
370
0
        err = simple_expr(filt, data, fn, str+1, end, res);
371
0
        if (!hts_expr_val_exists(res)) {
372
0
            hts_expr_val_undef(res);
373
0
        } else {
374
0
            err |= res->is_str;
375
0
            if (*str == '-')
376
0
                res->d = -res->d;
377
0
            res->is_true = res->d != 0;
378
0
        }
379
0
    } else if (*str == '!') {
380
0
        err = unary_expr(filt, data, fn, str+1, end, res);
381
0
        if (res->is_true) {
382
            // Any explicitly true value becomes false
383
0
            res->d = res->is_true = 0;
384
0
        } else if (!hts_expr_val_exists(res)) {
385
            // We can also still negate undef values by toggling the
386
            // is_true override value.
387
0
            res->d = res->is_true = !res->is_true;
388
0
        } else if (res->is_str) {
389
            // !null = true, !"foo" = false, NOTE: !"" = false also
390
0
            res->d = res->is_true = (res->s.s == NULL);
391
0
        } else {
392
0
            res->d = !(int64_t)res->d;
393
0
            res->is_true = res->d != 0;
394
0
        }
395
0
        res->is_str = 0;
396
0
    } else if (*str == '~') {
397
0
        err = unary_expr(filt, data, fn, str+1, end, res);
398
0
        if (!hts_expr_val_exists(res)) {
399
0
            hts_expr_val_undef(res);
400
0
        } else {
401
0
            err |= res->is_str;
402
0
            if (!hts_expr_val_exists(res)) {
403
0
                hts_expr_val_undef(res);
404
0
            } else {
405
0
                res->d = ~(int64_t)res->d;
406
0
                res->is_true = res->d != 0;
407
0
            }
408
0
        }
409
0
    } else {
410
0
        err = simple_expr(filt, data, fn, str, end, res);
411
0
    }
412
0
    return err ? -1 : 0;
413
0
}
414
415
416
/*
417
 * mul_expr
418
 *     : unary_expr (
419
 *           '*' unary_expr
420
 *         | '/' unary_expr
421
 *         | '%' unary_expr
422
 *       )*
423
 */
424
static int mul_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
425
0
                    char *str, char **end, hts_expr_val_t *res) {
426
0
    if (unary_expr(filt, data, fn, str, end, res))
427
0
        return -1;
428
429
0
    str = *end;
430
0
    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
431
0
    while (*str) {
432
0
        str = ws(str);
433
0
        if (*str == '*' || *str == '/' || *str == '%') {
434
0
            if (unary_expr(filt, data, fn, str+1, end, &val)) return -1;
435
0
            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
436
0
                hts_expr_val_undef(res);
437
0
            } else if (val.is_str || res->is_str) {
438
0
                hts_expr_val_free(&val);
439
0
                return -1; // arith on strings
440
0
            }
441
0
        }
442
443
0
        if (*str == '*')
444
0
            res->d *= val.d;
445
0
        else if (*str == '/')
446
0
            res->d /= val.d;
447
0
        else if (*str == '%') {
448
0
            if (val.d)
449
0
                res->d = (int64_t)res->d % (int64_t)val.d;
450
0
            else
451
0
                hts_expr_val_undef(res);
452
0
        } else
453
0
            break;
454
455
0
        res->is_true = hts_expr_val_exists(res) && (res->d != 0);
456
0
        str = *end;
457
0
    }
458
459
0
    hts_expr_val_free(&val);
460
461
0
    return 0;
462
0
}
463
464
/*
465
 * add_expr
466
 *     : mul_expr (
467
 *           '+' mul_expr
468
 *         | '-' mul_expr
469
 *       )*
470
 */
471
static int add_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
472
0
                    char *str, char **end, hts_expr_val_t *res) {
473
0
    if (mul_expr(filt, data, fn, str, end, res))
474
0
        return -1;
475
476
0
    str = *end;
477
0
    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
478
0
    while (*str) {
479
0
        str = ws(str);
480
0
        int undef = 0;
481
0
        if (*str == '+' || *str == '-') {
482
0
            if (mul_expr(filt, data, fn, str+1, end, &val)) return -1;
483
0
            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
484
0
                undef = 1;
485
0
            } else if (val.is_str || res->is_str) {
486
0
                hts_expr_val_free(&val);
487
0
                return -1; // arith on strings
488
0
            }
489
0
        }
490
491
0
        if (*str == '+')
492
0
            res->d += val.d;
493
0
        else if (*str == '-')
494
0
            res->d -= val.d;
495
0
        else
496
0
            break;
497
498
0
        if (undef)
499
0
            hts_expr_val_undef(res);
500
0
        else
501
0
            res->is_true = res->d != 0;
502
503
0
        str = *end;
504
0
    }
505
506
0
    hts_expr_val_free(&val);
507
508
0
    return 0;
509
0
}
510
511
/*
512
 * bitand_expr
513
 *     : add_expr
514
 *     | bitand_expr '&' add_expr
515
 */
516
static int bitand_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
517
0
                       char *str, char **end, hts_expr_val_t *res) {
518
0
    if (add_expr(filt, data, fn, str, end, res)) return -1;
519
520
0
    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
521
0
    int undef = 0;
522
0
    for (;;) {
523
0
        str = ws(*end);
524
0
        if (*str == '&' && str[1] != '&') {
525
0
            if (add_expr(filt, data, fn, str+1, end, &val)) return -1;
526
0
            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
527
0
                undef = 1;
528
0
            } else if (res->is_str || val.is_str) {
529
0
                hts_expr_val_free(&val);
530
0
                return -1;
531
0
            } else {
532
0
                res->is_true =
533
0
                    (res->d = ((int64_t)res->d & (int64_t)val.d)) != 0;
534
0
            }
535
0
        } else {
536
0
            break;
537
0
        }
538
0
    }
539
0
    hts_expr_val_free(&val);
540
0
    if (undef)
541
0
        hts_expr_val_undef(res);
542
543
0
    return 0;
544
0
}
545
546
/*
547
 * bitxor_expr
548
 *     : bitand_expr
549
 *     | bitxor_expr '^' bitand_expr
550
 */
551
static int bitxor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
552
0
                       char *str, char **end, hts_expr_val_t *res) {
553
0
    if (bitand_expr(filt, data, fn, str, end, res)) return -1;
554
555
0
    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
556
0
    int undef = 0;
557
0
    for (;;) {
558
0
        str = ws(*end);
559
0
        if (*str == '^') {
560
0
            if (bitand_expr(filt, data, fn, str+1, end, &val)) return -1;
561
0
            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
562
0
                undef = 1;
563
0
            } else if (res->is_str || val.is_str) {
564
0
                hts_expr_val_free(&val);
565
0
                return -1;
566
0
            } else {
567
0
                res->is_true =
568
0
                    (res->d = ((int64_t)res->d ^ (int64_t)val.d)) != 0;
569
0
            }
570
0
        } else {
571
0
            break;
572
0
        }
573
0
    }
574
0
    hts_expr_val_free(&val);
575
0
    if (undef)
576
0
        hts_expr_val_undef(res);
577
578
0
    return 0;
579
0
}
580
581
/*
582
 * bitor_expr
583
 *     : bitxor_expr
584
 *     | bitor_expr '|' bitxor_expr
585
 */
586
static int bitor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
587
0
                      char *str, char **end, hts_expr_val_t *res) {
588
0
    if (bitxor_expr(filt, data, fn, str, end, res)) return -1;
589
590
0
    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
591
0
    int undef = 0;
592
0
    for (;;) {
593
0
        str = ws(*end);
594
0
        if (*str == '|' && str[1] != '|') {
595
0
            if (bitxor_expr(filt, data, fn, str+1, end, &val)) return -1;
596
0
            if (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)) {
597
0
                undef = 1;
598
0
            } else if (res->is_str || val.is_str) {
599
0
                hts_expr_val_free(&val);
600
0
                return -1;
601
0
            } else {
602
0
                res->is_true =
603
0
                    (res->d = ((int64_t)res->d | (int64_t)val.d)) != 0;
604
0
            }
605
0
        } else {
606
0
            break;
607
0
        }
608
0
    }
609
0
    hts_expr_val_free(&val);
610
0
    if (undef)
611
0
        hts_expr_val_undef(res);
612
613
0
    return 0;
614
0
}
615
616
/*
617
 * cmp_expr
618
 *     : bitor_expr
619
 *     | cmp_expr '<=' bitor_expr
620
 *     | cmp_expr '<'  bitor_expr
621
 *     | cmp_expr '>=' bitor_expr
622
 *     | cmp_expr '>'  bitor_expr
623
 */
624
static int cmp_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
625
0
                    char *str, char **end, hts_expr_val_t *res) {
626
0
    if (bitor_expr(filt, data, fn, str, end, res)) return -1;
627
628
0
    str = ws(*end);
629
0
    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
630
0
    int err = 0, cmp_done = 0;
631
632
0
    if (*str == '>' && str[1] == '=') {
633
0
        cmp_done = 1;
634
0
        err = cmp_expr(filt, data, fn, str+2, end, &val);
635
0
        if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
636
0
            hts_expr_val_undef(res);
637
0
        } else {
638
0
            res->is_true=res->d
639
0
                = res->is_str && res->s.s && val.is_str && val.s.s
640
0
                ? strcmp(res->s.s, val.s.s) >= 0
641
0
                : !res->is_str && !val.is_str && res->d >= val.d;
642
0
            res->is_str = 0;
643
0
        }
644
0
    } else if (*str == '>') {
645
0
        cmp_done = 1;
646
0
        err = cmp_expr(filt, data, fn, str+1, end, &val);
647
0
        if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
648
0
            hts_expr_val_undef(res);
649
0
        } else {
650
0
            res->is_true=res->d
651
0
                = res->is_str && res->s.s && val.is_str && val.s.s
652
0
                ? strcmp(res->s.s, val.s.s) > 0
653
0
                : !res->is_str && !val.is_str && res->d > val.d;
654
0
            res->is_str = 0;
655
0
        }
656
0
    } else if (*str == '<' && str[1] == '=') {
657
0
        cmp_done = 1;
658
0
        err = cmp_expr(filt, data, fn, str+2, end, &val);
659
0
        if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
660
0
            hts_expr_val_undef(res);
661
0
        } else {
662
0
            res->is_true=res->d
663
0
                = res->is_str && res->s.s && val.is_str && val.s.s
664
0
                ? strcmp(res->s.s, val.s.s) <= 0
665
0
                : !res->is_str && !val.is_str && res->d <= val.d;
666
0
            res->is_str = 0;
667
0
        }
668
0
    } else if (*str == '<') {
669
0
        cmp_done = 1;
670
0
        err = cmp_expr(filt, data, fn, str+1, end, &val);
671
0
        if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
672
0
            hts_expr_val_undef(res);
673
0
        } else {
674
0
            res->is_true=res->d
675
0
                = res->is_str && res->s.s && val.is_str && val.s.s
676
0
                ? strcmp(res->s.s, val.s.s) < 0
677
0
                : !res->is_str && !val.is_str && res->d < val.d;
678
0
            res->is_str = 0;
679
0
        }
680
0
    }
681
682
0
    if (cmp_done && (!hts_expr_val_exists(&val) || !hts_expr_val_exists(res)))
683
0
        hts_expr_val_undef(res);
684
0
    hts_expr_val_free(&val);
685
686
0
    return err ? -1 : 0;
687
0
}
688
689
/*
690
 * eq_expr
691
 *     : cmp_expr
692
 *     | eq_expr '==' cmp_expr
693
 *     | eq_expr '!=' cmp_expr
694
 *     | eq_expr '=~' cmp_expr
695
 *     | eq_expr '!~' cmp_expr
696
 */
697
static int eq_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
698
0
                   char *str, char **end, hts_expr_val_t *res) {
699
0
    if (cmp_expr(filt, data, fn, str, end, res)) return -1;
700
701
0
    str = ws(*end);
702
703
0
    int err = 0, eq_done = 0;
704
0
    hts_expr_val_t val = HTS_EXPR_VAL_INIT;
705
706
    // numeric vs numeric comparison is as expected
707
    // string vs string comparison is as expected
708
    // numeric vs string is false
709
0
    if (str[0] == '=' && str[1] == '=') {
710
0
        eq_done = 1;
711
0
        if ((err = eq_expr(filt, data, fn, str+2, end, &val))) {
712
0
            res->is_true = res->d = 0;
713
0
        } else {
714
0
            if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
715
0
                hts_expr_val_undef(res);
716
0
            } else {
717
0
                res->is_true = res->d = res->is_str
718
0
                    ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s)==0 :0)
719
0
                    : !res->is_str && !val.is_str && res->d == val.d;
720
0
            }
721
0
        }
722
0
        res->is_str = 0;
723
724
0
    } else if (str[0] == '!' && str[1] == '=') {
725
0
        eq_done = 1;
726
0
        if ((err = eq_expr(filt, data, fn, str+2, end, &val))) {
727
0
            res->is_true = res->d = 0;
728
0
        } else {
729
0
            if (!hts_expr_val_exists(res) || !hts_expr_val_exists(&val)) {
730
0
                hts_expr_val_undef(res);
731
0
            } else {
732
0
                res->is_true = res->d = res->is_str
733
0
                    ? (res->s.s && val.s.s ?strcmp(res->s.s, val.s.s) != 0 :1)
734
0
                    : res->is_str != val.is_str || res->d != val.d;
735
0
            }
736
0
        }
737
0
        res->is_str = 0;
738
739
0
    } else if ((str[0] == '=' && str[1] == '~') ||
740
0
               (str[0] == '!' && str[1] == '~')) {
741
0
        eq_done = 1;
742
0
        err = eq_expr(filt, data, fn, str+2, end, &val);
743
0
        if (!val.is_str || !res->is_str) {
744
0
            hts_expr_val_free(&val);
745
0
            return -1;
746
0
        }
747
0
        if (val.s.s && res->s.s && val.is_true >= 0 && res->is_true >= 0) {
748
0
            regex_t preg_, *preg;
749
0
            if (filt->curr_regex >= filt->max_regex) {
750
                // Compile regex if not seen before
751
0
                if (filt->curr_regex >= MAX_REGEX) {
752
0
                    preg = &preg_;
753
0
                } else {
754
0
                    preg = &filt->preg[filt->curr_regex];
755
0
                    filt->max_regex++;
756
0
                }
757
758
0
                int ec = regcomp(preg, val.s.s, REG_EXTENDED | REG_NOSUB);
759
0
                if (ec != 0) {
760
0
                    char errbuf[1024];
761
0
                    regerror(ec, preg, errbuf, 1024);
762
0
                    fprintf(stderr, "Failed regex: %.1024s\n", errbuf);
763
0
                    hts_expr_val_free(&val);
764
0
                    return -1;
765
0
                }
766
0
            } else {
767
0
                preg = &filt->preg[filt->curr_regex];
768
0
            }
769
0
            res->is_true = res->d = regexec(preg, res->s.s, 0, NULL, 0) == 0
770
0
                ? *str == '='  // matcn
771
0
                : *str == '!'; // no-match
772
0
            if (preg == &preg_)
773
0
                regfree(preg);
774
775
0
            filt->curr_regex++;
776
0
        } else {
777
            // nul regexp or input is considered false
778
0
            res->is_true = 0;
779
0
        }
780
0
        res->is_str = 0;
781
0
    }
782
783
0
    if (eq_done && ((!hts_expr_val_exists(&val)) || !hts_expr_val_exists(res)))
784
0
        hts_expr_val_undef(res);
785
0
    hts_expr_val_free(&val);
786
787
0
    return err ? -1 : 0;
788
0
}
789
790
/*
791
 * and_expr
792
 *     : eq_expr
793
 *     | and_expr 'and' eq_expr
794
 *     | and_expr 'or'  eq_expr
795
 */
796
static int and_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
797
0
                    char *str, char **end, hts_expr_val_t *res) {
798
0
    if (eq_expr(filt, data, fn, str, end, res)) return -1;
799
800
0
    for (;;) {
801
0
        hts_expr_val_t val = HTS_EXPR_VAL_INIT;
802
0
        str = ws(*end);
803
0
        if (str[0] == '&' && str[1] == '&') {
804
0
            if (eq_expr(filt, data, fn, str+2, end, &val)) return -1;
805
0
            if (!hts_expr_val_existsT(res) || !hts_expr_val_existsT(&val)) {
806
0
                hts_expr_val_undef(res);
807
0
                res->d = 0;
808
0
            } else {
809
0
                res->is_true = res->d =
810
0
                    (res->is_true || (res->is_str && res->s.s) || res->d) &&
811
0
                    (val.is_true  || (val.is_str && val.s.s) || val.d);
812
0
                res->is_str = 0;
813
0
            }
814
0
        } else if (str[0] == '|' && str[1] == '|') {
815
0
            if (eq_expr(filt, data, fn, str+2, end, &val)) return -1;
816
0
            if (!hts_expr_val_existsT(res) && !hts_expr_val_existsT(&val)) {
817
                // neither defined
818
0
                hts_expr_val_undef(res);
819
0
                res->d = 0;
820
0
            } else if (!hts_expr_val_existsT(res) &&
821
0
                       !(val.is_true  || (val.is_str  && val.s.s ) || val.d)) {
822
                // LHS undef and RHS false
823
0
                hts_expr_val_undef(res);
824
0
                res->d = 0;
825
0
            } else if (!hts_expr_val_existsT(&val) &&
826
0
                       !(res->is_true || (res->is_str && res->s.s) || res->d)){
827
                // RHS undef and LHS false
828
0
                hts_expr_val_undef(res);
829
0
                res->d = 0;
830
0
            } else {
831
0
                res->is_true = res->d =
832
0
                    res->is_true || (res->is_str && res->s.s) || res->d ||
833
0
                    val.is_true  || (val.is_str  && val.s.s ) || val.d;
834
0
                res->is_str = 0;
835
0
            }
836
0
        } else {
837
0
            break;
838
0
        }
839
0
        hts_expr_val_free(&val);
840
0
    }
841
842
0
    return 0;
843
0
}
844
845
static int expression(hts_filter_t *filt, void *data, hts_expr_sym_func *fn,
846
0
                      char *str, char **end, hts_expr_val_t *res) {
847
0
    return and_expr(filt, data, fn, str, end, res);
848
0
}
849
850
0
hts_filter_t *hts_filter_init(const char *str) {
851
0
    hts_filter_t *f = calloc(1, sizeof(*f));
852
0
    if (!f) return NULL;
853
854
    // Oversize to permit faster comparisons with memcmp over strcmp
855
0
    size_t len = strlen(str);
856
0
    if (!(f->str = hts_malloc_ps(sizeof(*f->str), len, 100))) {
857
0
        free(f);
858
0
        return NULL;
859
0
    }
860
0
    memcpy(f->str, str, len + 1);
861
0
    return f;
862
0
}
863
864
77.6k
void hts_filter_free(hts_filter_t *filt) {
865
77.6k
    if (!filt)
866
77.6k
        return;
867
868
0
    int i;
869
0
    for (i = 0; i < filt->max_regex; i++)
870
0
        regfree(&filt->preg[i]);
871
872
0
    free(filt->str);
873
0
    free(filt);
874
0
}
875
876
static int hts_filter_eval_(hts_filter_t *filt,
877
                            void *data, hts_expr_sym_func *fn,
878
0
                            hts_expr_val_t *res) {
879
0
    char *end = NULL;
880
881
0
    filt->curr_regex = 0;
882
0
    if (expression(filt, data, fn, filt->str, &end, res))
883
0
        return -1;
884
885
0
    if (end && *ws(end)) {
886
0
        fprintf(stderr, "Unable to parse expression at %s\n", filt->str);
887
0
        return -1;
888
0
    }
889
890
    // Strings evaluate to true.  An empty string is also true, but an
891
    // absent (null) string is false, unless overriden by is_true.  An
892
    // empty string has kstring length of zero, but a pointer as it's
893
    // nul-terminated.
894
0
    if (res->is_str) {
895
0
        res->is_true |= res->s.s != NULL;
896
0
        res->d = res->is_true;
897
0
    } else if (hts_expr_val_exists(res)) {
898
0
        res->is_true |= res->d != 0;
899
0
    }
900
901
0
    return 0;
902
0
}
903
904
int hts_filter_eval(hts_filter_t *filt,
905
                    void *data, hts_expr_sym_func *fn,
906
0
                    hts_expr_val_t *res) {
907
0
    if (res->s.l != 0 || res->s.m != 0 || res->s.s != NULL) {
908
        // As *res is cleared below, it's not safe to call this function
909
        // with res->s.s set, as memory would be leaked.  It's also not
910
        // possible to know is res was initialised correctly, so in
911
        // either case we fail.
912
0
        hts_log_error("Results structure must be cleared before calling this function");
913
0
        return -1;
914
0
    }
915
916
0
    memset(res, 0, sizeof(*res));
917
918
0
    return hts_filter_eval_(filt, data, fn, res);
919
0
}
920
921
int hts_filter_eval2(hts_filter_t *filt,
922
                     void *data, hts_expr_sym_func *fn,
923
0
                     hts_expr_val_t *res) {
924
0
    ks_free(&res->s);
925
0
    memset(res, 0, sizeof(*res));
926
927
0
    return hts_filter_eval_(filt, data, fn, res);
928
0
}