Coverage Report

Created: 2025-12-14 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/yara/libyara/parser.c
Line
Count
Source
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
#include "yara/compiler.h"
47
#include "yara/types.h"
48
49
#define todigit(x)                                        \
50
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
51
                             : ((uint8_t) (x - '0'))
52
53
int yr_parser_emit(
54
    yyscan_t yyscanner,
55
    uint8_t instruction,
56
    YR_ARENA_REF* instruction_ref)
57
96.6k
{
58
96.6k
  return yr_arena_write_data(
59
96.6k
      yyget_extra(yyscanner)->arena,
60
96.6k
      YR_CODE_SECTION,
61
96.6k
      &instruction,
62
96.6k
      sizeof(uint8_t),
63
96.6k
      instruction_ref);
64
96.6k
}
65
66
int yr_parser_emit_with_arg_double(
67
    yyscan_t yyscanner,
68
    uint8_t instruction,
69
    double argument,
70
    YR_ARENA_REF* instruction_ref,
71
    YR_ARENA_REF* argument_ref)
72
2.38k
{
73
2.38k
  int result = yr_arena_write_data(
74
2.38k
      yyget_extra(yyscanner)->arena,
75
2.38k
      YR_CODE_SECTION,
76
2.38k
      &instruction,
77
2.38k
      sizeof(uint8_t),
78
2.38k
      instruction_ref);
79
80
2.38k
  if (result == ERROR_SUCCESS)
81
2.38k
    result = yr_arena_write_data(
82
2.38k
        yyget_extra(yyscanner)->arena,
83
2.38k
        YR_CODE_SECTION,
84
2.38k
        &argument,
85
2.38k
        sizeof(double),
86
2.38k
        argument_ref);
87
88
2.38k
  return result;
89
2.38k
}
90
91
int yr_parser_emit_with_arg_int32(
92
    yyscan_t yyscanner,
93
    uint8_t instruction,
94
    int32_t argument,
95
    YR_ARENA_REF* instruction_ref,
96
    YR_ARENA_REF* argument_ref)
97
19.9k
{
98
19.9k
  int result = yr_arena_write_data(
99
19.9k
      yyget_extra(yyscanner)->arena,
100
19.9k
      YR_CODE_SECTION,
101
19.9k
      &instruction,
102
19.9k
      sizeof(uint8_t),
103
19.9k
      instruction_ref);
104
105
19.9k
  if (result == ERROR_SUCCESS)
106
19.9k
    result = yr_arena_write_data(
107
19.9k
        yyget_extra(yyscanner)->arena,
108
19.9k
        YR_CODE_SECTION,
109
19.9k
        &argument,
110
19.9k
        sizeof(int32_t),
111
19.9k
        argument_ref);
112
113
19.9k
  return result;
114
19.9k
}
115
116
int yr_parser_emit_with_arg(
117
    yyscan_t yyscanner,
118
    uint8_t instruction,
119
    int64_t argument,
120
    YR_ARENA_REF* instruction_ref,
121
    YR_ARENA_REF* argument_ref)
122
38.0k
{
123
38.0k
  int result = yr_arena_write_data(
124
38.0k
      yyget_extra(yyscanner)->arena,
125
38.0k
      YR_CODE_SECTION,
126
38.0k
      &instruction,
127
38.0k
      sizeof(uint8_t),
128
38.0k
      instruction_ref);
129
130
38.0k
  if (result == ERROR_SUCCESS)
131
38.0k
    result = yr_arena_write_data(
132
38.0k
        yyget_extra(yyscanner)->arena,
133
38.0k
        YR_CODE_SECTION,
134
38.0k
        &argument,
135
38.0k
        sizeof(int64_t),
136
38.0k
        argument_ref);
137
138
38.0k
  return result;
139
38.0k
}
140
141
int yr_parser_emit_with_arg_reloc(
142
    yyscan_t yyscanner,
143
    uint8_t instruction,
144
    void* argument,
145
    YR_ARENA_REF* instruction_ref,
146
    YR_ARENA_REF* argument_ref)
147
879k
{
148
879k
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
149
150
879k
  DECLARE_REFERENCE(void*, ptr) arg;
151
152
879k
  memset(&arg, 0, sizeof(arg));
153
879k
  arg.ptr = argument;
154
155
879k
  int result = yr_arena_write_data(
156
879k
      yyget_extra(yyscanner)->arena,
157
879k
      YR_CODE_SECTION,
158
879k
      &instruction,
159
879k
      sizeof(uint8_t),
160
879k
      instruction_ref);
161
162
879k
  if (result == ERROR_SUCCESS)
163
879k
    result = yr_arena_write_data(
164
879k
        yyget_extra(yyscanner)->arena,
165
879k
        YR_CODE_SECTION,
166
879k
        &arg,
167
879k
        sizeof(arg),
168
879k
        &ref);
169
170
879k
  if (result == ERROR_SUCCESS)
171
879k
    result = yr_arena_make_ptr_relocatable(
172
879k
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
173
174
879k
  if (argument_ref != NULL)
175
0
    *argument_ref = ref;
176
177
879k
  return result;
178
879k
}
179
180
int yr_parser_emit_pushes_for_strings(
181
    yyscan_t yyscanner,
182
    const char* identifier,
183
    YR_STRING_SET* strings)
184
8.13k
{
185
8.13k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
186
187
8.13k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
188
8.13k
      compiler, compiler->current_rule_idx);
189
190
8.13k
  YR_STRING* string;
191
192
8.13k
  const char* string_identifier;
193
8.13k
  const char* target_identifier;
194
195
8.13k
  strings->count = 0;
196
8.13k
  strings->head = NULL;
197
8.13k
  YR_STRING_SET_ELEMENT** tail_ptr = &strings->head;
198
199
8.13k
  yr_rule_strings_foreach(current_rule, string)
200
879k
  {
201
    // Don't generate pushes for strings chained to another one, we are
202
    // only interested in non-chained strings or the head of the chain.
203
204
879k
    if (string->chained_to == NULL)
205
875k
    {
206
875k
      string_identifier = string->identifier;
207
875k
      target_identifier = identifier;
208
209
1.75M
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
210
875k
             *target_identifier == *string_identifier)
211
875k
      {
212
875k
        target_identifier++;
213
875k
        string_identifier++;
214
875k
      }
215
216
875k
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
217
7.98k
          *target_identifier == '*')
218
869k
      {
219
869k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
220
221
869k
        string->flags |= STRING_FLAGS_REFERENCED;
222
869k
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
223
869k
        strings->count++;
224
225
869k
        *tail_ptr = yr_malloc(sizeof(YR_STRING_SET_ELEMENT));
226
869k
        yr_arena_ptr_to_ref(compiler->arena, string, &((*tail_ptr)->element));
227
869k
        (*tail_ptr)->next = NULL;
228
869k
        tail_ptr = &(*tail_ptr)->next;
229
869k
      }
230
875k
    }
231
879k
  }
232
233
8.13k
  if (strings->count == 0)
234
48
  {
235
48
    yr_compiler_set_error_extra_info(
236
48
        compiler, identifier) return ERROR_UNDEFINED_STRING;
237
48
  }
238
239
8.08k
  return ERROR_SUCCESS;
240
8.13k
}
241
242
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
243
// prefix.
244
int yr_parser_emit_pushes_for_rules(
245
    yyscan_t yyscanner,
246
    const char* prefix,
247
    int* count)
248
736
{
249
736
  YR_COMPILER* compiler = yyget_extra(yyscanner);
250
251
  // Make sure the compiler is parsing a rule
252
736
  assert(compiler->current_rule_idx != UINT32_MAX);
253
254
736
  YR_RULE* rule;
255
736
  int matching = 0;
256
257
736
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
258
736
      compiler->arena,
259
736
      YR_NAMESPACES_TABLE,
260
736
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
261
262
  // Can't use yr_rules_foreach here as that requires the rules to have been
263
  // finalized (inserting a NULL rule at the end). This is done when
264
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
265
  // into the current position in the code arena. Obviously we aren't done
266
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
267
  // this I'm manually walking all the currently compiled rules (up to the
268
  // current rule index) and comparing identifiers to see if it is one we should
269
  // use.
270
  //
271
  // Further, we have to get compiler->current_rule_idx before we start because
272
  // if we emit an OP_PUSH_RULE
273
736
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
274
275
3.76k
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
276
3.02k
  {
277
    // Is rule->identifier prefixed by prefix?
278
3.02k
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
279
705
    {
280
705
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
281
705
          compiler->rules_table, rule->identifier, ns->name);
282
283
705
      if (rule_idx != UINT32_MAX)
284
705
      {
285
705
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
286
705
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
287
705
        matching++;
288
705
      }
289
705
    }
290
291
3.02k
    rule++;
292
3.02k
  }
293
294
736
  if (count != NULL)
295
736
  {
296
736
    *count = matching;
297
736
  }
298
299
736
  if (matching == 0)
300
76
  {
301
76
    yr_compiler_set_error_extra_info(compiler, prefix);
302
76
    return ERROR_UNDEFINED_IDENTIFIER;
303
76
  }
304
305
660
  return ERROR_SUCCESS;
306
736
}
307
308
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
309
53.3k
{
310
53.3k
  uint8_t opcode[9];
311
53.3k
  int opcode_len = 1;
312
313
53.3k
  if (argument == YR_UNDEFINED)
314
9.84k
  {
315
9.84k
    opcode[0] = OP_PUSH_U;
316
9.84k
  }
317
43.4k
  else if (argument <= 0xff)
318
39.6k
  {
319
39.6k
    opcode[0] = OP_PUSH_8;
320
39.6k
    opcode[1] = (uint8_t) argument;
321
39.6k
    opcode_len += sizeof(uint8_t);
322
39.6k
  }
323
3.83k
  else if (argument <= 0xffff)
324
1.91k
  {
325
1.91k
    opcode[0] = OP_PUSH_16;
326
1.91k
    uint16_t u = (uint16_t) argument;
327
1.91k
    memcpy(opcode + 1, &u, sizeof(uint16_t));
328
1.91k
    opcode_len += sizeof(uint16_t);
329
1.91k
  }
330
1.92k
  else if (argument <= 0xffffffff)
331
1.06k
  {
332
1.06k
    opcode[0] = OP_PUSH_32;
333
1.06k
    uint32_t u = (uint32_t) argument;
334
1.06k
    memcpy(opcode + 1, &u, sizeof(uint32_t));
335
1.06k
    opcode_len += sizeof(uint32_t);
336
1.06k
  }
337
857
  else
338
857
  {
339
857
    opcode[0] = OP_PUSH;
340
857
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
341
857
    opcode_len += sizeof(uint64_t);
342
857
  }
343
344
53.3k
  return yr_arena_write_data(
345
53.3k
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
346
53.3k
}
347
348
int yr_parser_check_types(
349
    YR_COMPILER* compiler,
350
    YR_OBJECT_FUNCTION* function,
351
    const char* actual_args_fmt)
352
380
{
353
380
  int i;
354
355
525
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
356
525
  {
357
525
    if (function->prototypes[i].arguments_fmt == NULL)
358
12
      break;
359
360
513
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
361
368
      return ERROR_SUCCESS;
362
513
  }
363
364
12
  yr_compiler_set_error_extra_info(compiler, function->identifier)
365
366
12
      return ERROR_WRONG_ARGUMENTS;
367
380
}
368
369
int yr_parser_lookup_string(
370
    yyscan_t yyscanner,
371
    const char* identifier,
372
    YR_STRING** string)
373
2.74k
{
374
2.74k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
375
376
2.74k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
377
2.74k
      compiler, compiler->current_rule_idx);
378
379
2.74k
  yr_rule_strings_foreach(current_rule, *string)
380
3.08k
  {
381
    // If some string $a gets fragmented into multiple chained
382
    // strings, all those fragments have the same $a identifier
383
    // but we are interested in the heading fragment, which is
384
    // that with chained_to == NULL
385
386
3.08k
    if ((*string)->chained_to == NULL &&
387
2.92k
        strcmp((*string)->identifier, identifier) == 0)
388
2.71k
    {
389
2.71k
      return ERROR_SUCCESS;
390
2.71k
    }
391
3.08k
  }
392
393
30
  yr_compiler_set_error_extra_info(compiler, identifier)
394
395
30
      * string = NULL;
396
397
30
  return ERROR_UNDEFINED_STRING;
398
2.74k
}
399
400
////////////////////////////////////////////////////////////////////////////////
401
// Searches for a variable with the given identifier in the scope of the current
402
// "for" loop. In case of nested "for" loops the identifier is searched starting
403
// at the top-level loop and going down thorough the nested loops until the
404
// current one. This is ok because inner loops can not re-define an identifier
405
// already defined by an outer loop.
406
//
407
// If the variable is found, the return value is the position that the variable
408
// occupies among all the currently defined variables. If the variable doesn't
409
// exist the return value is -1.
410
//
411
// The function can receive a pointer to a YR_EXPRESSION that will populated
412
// with information about the variable if found. This pointer can be NULL if
413
// the caller is not interested in getting that information.
414
//
415
int yr_parser_lookup_loop_variable(
416
    yyscan_t yyscanner,
417
    const char* identifier,
418
    YR_EXPRESSION* expr)
419
16.7k
{
420
16.7k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
421
16.7k
  int i, j;
422
16.7k
  int var_offset = 0;
423
424
20.5k
  for (i = 0; i <= compiler->loop_index; i++)
425
13.5k
  {
426
13.5k
    var_offset += compiler->loop[i].vars_internal_count;
427
428
22.2k
    for (j = 0; j < compiler->loop[i].vars_count; j++)
429
18.5k
    {
430
18.5k
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
431
18.1k
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
432
9.74k
      {
433
9.74k
        if (expr != NULL)
434
9.71k
          *expr = compiler->loop[i].vars[j];
435
436
9.74k
        return var_offset + j;
437
9.74k
      }
438
18.5k
    }
439
440
3.75k
    var_offset += compiler->loop[i].vars_count;
441
3.75k
  }
442
443
7.01k
  return -1;
444
16.7k
}
445
446
static int _yr_parser_write_string(
447
    const char* identifier,
448
    YR_MODIFIER modifier,
449
    YR_COMPILER* compiler,
450
    SIZED_STRING* str,
451
    RE_AST* re_ast,
452
    YR_ARENA_REF* string_ref,
453
    int* min_atom_quality,
454
    int* num_atom)
455
24.5k
{
456
24.5k
  SIZED_STRING* literal_string;
457
24.5k
  YR_ATOM_LIST_ITEM* atom;
458
24.5k
  YR_ATOM_LIST_ITEM* atom_list = NULL;
459
460
24.5k
  int c, result;
461
24.5k
  int max_string_len;
462
24.5k
  bool free_literal = false;
463
464
24.5k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
465
24.5k
      compiler->arena,
466
24.5k
      YR_STRINGS_TABLE,
467
24.5k
      sizeof(YR_STRING),
468
24.5k
      string_ref,
469
24.5k
      offsetof(YR_STRING, identifier),
470
24.5k
      offsetof(YR_STRING, string),
471
24.5k
      offsetof(YR_STRING, chained_to),
472
24.5k
      EOL));
473
474
24.5k
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
475
24.5k
      compiler->arena, string_ref);
476
477
24.5k
  YR_ARENA_REF ref;
478
479
24.5k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
480
481
24.5k
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
482
24.5k
  string->rule_idx = compiler->current_rule_idx;
483
24.5k
  string->idx = compiler->current_string_idx;
484
24.5k
  string->fixed_offset = YR_UNDEFINED;
485
486
24.5k
  compiler->current_string_idx++;
487
488
24.5k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
489
23.1k
      modifier.flags & STRING_FLAGS_REGEXP ||
490
3.35k
      modifier.flags & STRING_FLAGS_BASE64 ||
491
2.42k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
492
22.6k
  {
493
22.6k
    literal_string = yr_re_ast_extract_literal(re_ast);
494
495
22.6k
    if (literal_string != NULL)
496
16.5k
      free_literal = true;
497
22.6k
  }
498
1.86k
  else
499
1.86k
  {
500
1.86k
    literal_string = str;
501
1.86k
  }
502
503
24.5k
  if (literal_string != NULL)
504
18.4k
  {
505
18.4k
    modifier.flags |= STRING_FLAGS_LITERAL;
506
507
18.4k
    result = _yr_compiler_store_data(
508
18.4k
        compiler,
509
18.4k
        literal_string->c_string,
510
18.4k
        literal_string->length + 1,  // +1 to include terminating NULL
511
18.4k
        &ref);
512
513
18.4k
    if (result != ERROR_SUCCESS)
514
0
      goto cleanup;
515
516
18.4k
    string->length = (uint32_t) literal_string->length;
517
18.4k
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
518
519
18.4k
    if (modifier.flags & STRING_FLAGS_WIDE)
520
1.61k
      max_string_len = string->length * 2;
521
16.8k
    else
522
16.8k
      max_string_len = string->length;
523
524
18.4k
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
525
14.1k
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
526
527
18.4k
    result = yr_atoms_extract_from_string(
528
18.4k
        &compiler->atoms_config,
529
18.4k
        (uint8_t*) literal_string->c_string,
530
18.4k
        (int32_t) literal_string->length,
531
18.4k
        modifier,
532
18.4k
        &atom_list,
533
18.4k
        min_atom_quality);
534
535
18.4k
    if (result != ERROR_SUCCESS)
536
0
      goto cleanup;
537
18.4k
  }
538
6.13k
  else
539
6.13k
  {
540
    // Non-literal strings can't be marked as fixed offset because once we
541
    // find a string atom in the scanned data we don't know the offset where
542
    // the string should start, as the non-literal strings can contain
543
    // variable-length portions.
544
6.13k
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
545
546
    // Save the position where the RE forward code starts for later reference.
547
6.13k
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
548
6.13k
        compiler->arena, YR_RE_CODE_SECTION);
549
550
    // Emit forwards code
551
6.13k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
552
553
6.13k
    if (result != ERROR_SUCCESS)
554
130
      goto cleanup;
555
556
    // Emit backwards code
557
6.00k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
558
559
6.00k
    if (result != ERROR_SUCCESS)
560
9
      goto cleanup;
561
562
    // Extract atoms from the regular expression.
563
5.99k
    result = yr_atoms_extract_from_re(
564
5.99k
        &compiler->atoms_config,
565
5.99k
        re_ast,
566
5.99k
        modifier,
567
5.99k
        &atom_list,
568
5.99k
        min_atom_quality);
569
570
5.99k
    if (result != ERROR_SUCCESS)
571
0
      goto cleanup;
572
573
    // If no atom was extracted let's add a zero-length atom.
574
5.99k
    if (atom_list == NULL)
575
1.52k
    {
576
1.52k
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
577
578
1.52k
      if (atom_list == NULL)
579
0
      {
580
0
        result = ERROR_INSUFFICIENT_MEMORY;
581
0
        goto cleanup;
582
0
      }
583
584
1.52k
      atom_list->atom.length = 0;
585
1.52k
      atom_list->backtrack = 0;
586
1.52k
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
587
1.52k
      atom_list->next = NULL;
588
589
1.52k
      yr_arena_ptr_to_ref(
590
1.52k
          compiler->arena,
591
1.52k
          yr_arena_get_ptr(
592
1.52k
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
593
1.52k
          &(atom_list->forward_code_ref));
594
1.52k
    }
595
5.99k
  }
596
597
24.4k
  string->flags = modifier.flags;
598
599
  // Add the string to Aho-Corasick automaton.
600
24.4k
  result = yr_ac_add_string(
601
24.4k
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
602
603
24.4k
  if (result != ERROR_SUCCESS)
604
0
    goto cleanup;
605
606
24.4k
  atom = atom_list;
607
24.4k
  c = 0;
608
609
1.79M
  while (atom != NULL)
610
1.77M
  {
611
1.77M
    atom = atom->next;
612
1.77M
    c++;
613
1.77M
  }
614
615
24.4k
  (*num_atom) += c;
616
617
24.5k
cleanup:
618
24.5k
  if (free_literal)
619
16.5k
    yr_free(literal_string);
620
621
24.5k
  if (atom_list != NULL)
622
24.4k
    yr_atoms_list_destroy(atom_list);
623
624
24.5k
  return result;
625
24.4k
}
626
627
static int _yr_parser_check_string_modifiers(
628
    yyscan_t yyscanner,
629
    YR_MODIFIER modifier)
630
20.9k
{
631
20.9k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
632
633
  // xor and nocase together is not implemented.
634
20.9k
  if (modifier.flags & STRING_FLAGS_XOR &&
635
692
      modifier.flags & STRING_FLAGS_NO_CASE)
636
0
  {
637
0
    yr_compiler_set_error_extra_info(
638
0
        compiler, "invalid modifier combination: xor nocase");
639
0
    return ERROR_INVALID_MODIFIER;
640
0
  }
641
642
  // base64 and nocase together is not implemented.
643
20.9k
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
644
3.60k
      (modifier.flags & STRING_FLAGS_BASE64 ||
645
3.59k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
646
3
  {
647
3
    yr_compiler_set_error_extra_info(
648
3
        compiler,
649
3
        modifier.flags & STRING_FLAGS_BASE64
650
3
            ? "invalid modifier combination: base64 nocase"
651
3
            : "invalid modifier combination: base64wide nocase");
652
3
    return ERROR_INVALID_MODIFIER;
653
3
  }
654
655
  // base64 and fullword together is not implemented.
656
20.9k
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
657
105
      (modifier.flags & STRING_FLAGS_BASE64 ||
658
105
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
659
5
  {
660
5
    yr_compiler_set_error_extra_info(
661
5
        compiler,
662
5
        modifier.flags & STRING_FLAGS_BASE64
663
5
            ? "invalid modifier combination: base64 fullword"
664
5
            : "invalid modifier combination: base64wide fullword");
665
5
    return ERROR_INVALID_MODIFIER;
666
5
  }
667
668
  // base64 and xor together is not implemented.
669
20.9k
  if (modifier.flags & STRING_FLAGS_XOR &&
670
692
      (modifier.flags & STRING_FLAGS_BASE64 ||
671
692
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
672
4
  {
673
4
    yr_compiler_set_error_extra_info(
674
4
        compiler,
675
4
        modifier.flags & STRING_FLAGS_BASE64
676
4
            ? "invalid modifier combination: base64 xor"
677
4
            : "invalid modifier combination: base64wide xor");
678
4
    return ERROR_INVALID_MODIFIER;
679
4
  }
680
681
20.9k
  return ERROR_SUCCESS;
682
20.9k
}
683
684
int yr_parser_reduce_string_declaration(
685
    yyscan_t yyscanner,
686
    YR_MODIFIER modifier,
687
    const char* identifier,
688
    SIZED_STRING* str,
689
    YR_ARENA_REF* string_ref)
690
20.9k
{
691
20.9k
  int result = ERROR_SUCCESS;
692
20.9k
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
693
20.9k
  int atom_quality;
694
695
20.9k
  char message[512];
696
697
20.9k
  int32_t min_gap = 0;
698
20.9k
  int32_t max_gap = 0;
699
700
20.9k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
701
702
20.9k
  RE_AST* re_ast = NULL;
703
20.9k
  RE_AST* remainder_re_ast = NULL;
704
20.9k
  RE_ERROR re_error;
705
706
20.9k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
707
20.9k
      compiler, compiler->current_rule_idx);
708
709
  // Determine if a string with the same identifier was already defined
710
  // by searching for the identifier in strings_table.
711
20.9k
  uint32_t string_idx = yr_hash_table_lookup_uint32(
712
20.9k
      compiler->strings_table, identifier, NULL);
713
714
  // The string was already defined, return an error.
715
20.9k
  if (string_idx != UINT32_MAX)
716
15
  {
717
15
    yr_compiler_set_error_extra_info(compiler, identifier);
718
15
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
719
15
  }
720
721
  // Empty strings are not allowed.
722
20.9k
  if (str->length == 0)
723
5
  {
724
5
    yr_compiler_set_error_extra_info(compiler, identifier);
725
5
    return ERROR_EMPTY_STRING;
726
5
  }
727
728
20.9k
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
729
3.32k
    modifier.flags |= STRING_FLAGS_NO_CASE;
730
731
20.9k
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
732
310
    modifier.flags |= STRING_FLAGS_DOT_ALL;
733
734
  // Hex strings are always handled as DOT_ALL regexps.
735
20.9k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
736
1.08k
    modifier.flags |= STRING_FLAGS_DOT_ALL;
737
738
20.9k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
739
19.6k
      !(modifier.flags & STRING_FLAGS_BASE64 ||
740
18.7k
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
741
18.4k
  {
742
18.4k
    modifier.flags |= STRING_FLAGS_ASCII;
743
18.4k
  }
744
745
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
746
  // a single match for the string is enough. This is true in
747
  // most cases, except when the string count (#) and string offset (@)
748
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
749
  // initially, and unmarked later if required.
750
20.9k
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
751
752
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
753
  // need to be searched all over the file because the user is using the
754
  // "at" operator. The string must be searched at a fixed offset in the
755
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
756
  // and unmarked later if required.
757
20.9k
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
758
759
  // If string identifier is $ this is an anonymous string, if not add the
760
  // identifier to strings_table.
761
20.9k
  if (strcmp(identifier, "$") == 0)
762
19.8k
  {
763
19.8k
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
764
19.8k
  }
765
1.08k
  else
766
1.08k
  {
767
1.08k
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
768
1.08k
        compiler->strings_table,
769
1.08k
        identifier,
770
1.08k
        NULL,
771
1.08k
        compiler->current_string_idx));
772
1.08k
  }
773
774
  // Make sure that the the string does not have an invalid combination of
775
  // modifiers.
776
20.9k
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
777
778
20.9k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
779
19.8k
      modifier.flags & STRING_FLAGS_REGEXP ||
780
3.35k
      modifier.flags & STRING_FLAGS_BASE64 ||
781
2.42k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
782
19.0k
  {
783
19.0k
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
784
1.08k
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
785
17.9k
    else if (modifier.flags & STRING_FLAGS_REGEXP)
786
16.4k
    {
787
16.4k
      int flags = RE_PARSER_FLAG_NONE;
788
16.4k
      if (compiler->strict_escape)
789
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
790
16.4k
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
791
16.4k
    }
792
1.49k
    else
793
1.49k
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
794
795
19.0k
    if (result != ERROR_SUCCESS)
796
782
    {
797
782
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
798
0
      {
799
0
        yywarning(yyscanner, "unknown escape sequence");
800
0
      }
801
782
      else
802
782
      {
803
782
        snprintf(
804
782
            message,
805
782
            sizeof(message),
806
782
            "invalid %s \"%s\": %s",
807
782
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
808
782
                                                        : "regular expression",
809
782
            identifier,
810
782
            re_error.message);
811
812
782
        yr_compiler_set_error_extra_info(compiler, message);
813
782
        goto _exit;
814
782
      }
815
782
    }
816
817
18.2k
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
818
736
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
819
820
18.2k
    if (re_ast->flags & RE_FLAGS_GREEDY)
821
1.05k
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
822
823
    // Regular expressions in the strings section can't mix greedy and
824
    // ungreedy quantifiers like .* and .*?. That's because these regular
825
    // expressions can be matched forwards and/or backwards depending on the
826
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
827
    // be able to properly calculate the length of the match.
828
829
18.2k
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
830
1.05k
        (re_ast->flags & RE_FLAGS_UNGREEDY))
831
7
    {
832
7
      result = ERROR_INVALID_REGULAR_EXPRESSION;
833
834
7
      yr_compiler_set_error_extra_info(
835
7
          compiler,
836
7
          "greedy and ungreedy quantifiers can't be mixed in a regular "
837
7
          "expression");
838
839
7
      goto _exit;
840
7
    }
841
842
18.2k
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
843
1.82k
    {
844
1.82k
      yywarning(
845
1.82k
          yyscanner,
846
1.82k
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
847
1.82k
          "with a reasonable value for N",
848
1.82k
          identifier);
849
1.82k
    }
850
851
18.2k
    if (compiler->re_ast_callback != NULL)
852
0
    {
853
0
      compiler->re_ast_callback(
854
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
855
0
    }
856
857
18.2k
    *string_ref = YR_ARENA_NULL_REF;
858
859
40.8k
    while (re_ast != NULL)
860
22.6k
    {
861
22.6k
      YR_ARENA_REF ref;
862
863
22.6k
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
864
865
22.6k
      int32_t prev_min_gap = min_gap;
866
22.6k
      int32_t prev_max_gap = max_gap;
867
868
22.6k
      result = yr_re_ast_split_at_chaining_point(
869
22.6k
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
870
871
22.6k
      if (result != ERROR_SUCCESS)
872
0
        goto _exit;
873
874
22.6k
      result = _yr_parser_write_string(
875
22.6k
          identifier,
876
22.6k
          modifier,
877
22.6k
          compiler,
878
22.6k
          NULL,
879
22.6k
          re_ast,
880
22.6k
          &ref,
881
22.6k
          &atom_quality,
882
22.6k
          &current_rule->num_atoms);
883
884
22.6k
      if (result != ERROR_SUCCESS)
885
139
        goto _exit;
886
887
22.5k
      if (atom_quality < min_atom_quality)
888
17.7k
        min_atom_quality = atom_quality;
889
890
22.5k
      if (YR_ARENA_IS_NULL_REF(*string_ref))
891
18.1k
      {
892
        // This is the first string in the chain, the string reference
893
        // returned by this function must point to this string.
894
18.1k
        *string_ref = ref;
895
18.1k
      }
896
4.42k
      else
897
4.42k
      {
898
        // This is not the first string in the chain, set the appropriate
899
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
900
        // fields.
901
4.42k
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
902
4.42k
            compiler->arena,
903
4.42k
            YR_STRINGS_TABLE,
904
4.42k
            prev_string_idx * sizeof(YR_STRING));
905
906
4.42k
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
907
4.42k
            compiler->arena, &ref);
908
909
4.42k
        new_string->chained_to = prev_string;
910
4.42k
        new_string->chain_gap_min = prev_min_gap;
911
4.42k
        new_string->chain_gap_max = prev_max_gap;
912
913
        // A string chained to another one can't have a fixed offset, only the
914
        // head of the string chain can have a fixed offset.
915
4.42k
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
916
917
        // There is a previous string, but that string wasn't marked as part
918
        // of a chain because we can't do that until knowing there will be
919
        // another string, let's flag it now the we know.
920
4.42k
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
921
922
        // There is a previous string, so this string is part of a chain, but
923
        // there will be no more strings because there are no more AST to
924
        // split, which means that this is the chain's tail.
925
4.42k
        if (remainder_re_ast == NULL)
926
1.33k
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
927
1.33k
                               STRING_FLAGS_CHAIN_TAIL;
928
4.42k
      }
929
930
22.5k
      yr_re_ast_destroy(re_ast);
931
22.5k
      re_ast = remainder_re_ast;
932
22.5k
    }
933
18.2k
  }
934
1.86k
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
935
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
936
1.86k
  {
937
1.86k
    result = _yr_parser_write_string(
938
1.86k
        identifier,
939
1.86k
        modifier,
940
1.86k
        compiler,
941
1.86k
        str,
942
1.86k
        NULL,
943
1.86k
        string_ref,
944
1.86k
        &min_atom_quality,
945
1.86k
        &current_rule->num_atoms);
946
947
1.86k
    if (result != ERROR_SUCCESS)
948
0
      goto _exit;
949
1.86k
  }
950
951
20.0k
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
952
6.36k
  {
953
6.36k
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
954
6.36k
  }
955
956
20.9k
_exit:
957
958
20.9k
  if (re_ast != NULL)
959
461
    yr_re_ast_destroy(re_ast);
960
961
20.9k
  if (remainder_re_ast != NULL)
962
1
    yr_re_ast_destroy(remainder_re_ast);
963
964
20.9k
  return result;
965
20.0k
}
966
967
static int wildcard_iterator(
968
    void* prefix,
969
    size_t prefix_len,
970
    void* _value,
971
    void* data)
972
2.74k
{
973
2.74k
  const char* identifier = (const char*) data;
974
975
  // If the identifier is prefixed by prefix, then it matches the wildcard.
976
2.74k
  if (!strncmp(prefix, identifier, prefix_len))
977
172
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
978
979
2.57k
  return ERROR_SUCCESS;
980
2.74k
}
981
982
int yr_parser_reduce_rule_declaration_phase_1(
983
    yyscan_t yyscanner,
984
    int32_t flags,
985
    const char* identifier,
986
    YR_ARENA_REF* rule_ref)
987
24.6k
{
988
24.6k
  int result;
989
24.6k
  YR_FIXUP* fixup;
990
24.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
991
992
24.6k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
993
24.6k
      compiler->arena,
994
24.6k
      YR_NAMESPACES_TABLE,
995
24.6k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
996
997
24.6k
  if (yr_hash_table_lookup_uint32(
998
24.6k
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
999
9.51k
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
1000
15.1k
  {
1001
    // A rule or variable with the same identifier already exists, return the
1002
    // appropriate error.
1003
1004
15.1k
    yr_compiler_set_error_extra_info(compiler, identifier);
1005
15.1k
    return ERROR_DUPLICATED_IDENTIFIER;
1006
15.1k
  }
1007
1008
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1009
  // any of them are a prefix of the identifier being declared. If so, return
1010
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1011
9.51k
  result = yr_hash_table_iterate(
1012
9.51k
      compiler->wildcard_identifiers_table,
1013
9.51k
      ns->name,
1014
9.51k
      wildcard_iterator,
1015
9.51k
      (void*) identifier);
1016
1017
9.51k
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1018
172
  {
1019
    // This rule matches an existing wildcard rule set.
1020
172
    yr_compiler_set_error_extra_info(compiler, identifier);
1021
172
  }
1022
1023
9.51k
  FAIL_ON_ERROR(result);
1024
1025
9.34k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1026
9.34k
      compiler->arena,
1027
9.34k
      YR_RULES_TABLE,
1028
9.34k
      sizeof(YR_RULE),
1029
9.34k
      rule_ref,
1030
9.34k
      offsetof(YR_RULE, identifier),
1031
9.34k
      offsetof(YR_RULE, tags),
1032
9.34k
      offsetof(YR_RULE, strings),
1033
9.34k
      offsetof(YR_RULE, metas),
1034
9.34k
      offsetof(YR_RULE, ns),
1035
9.34k
      EOL));
1036
1037
9.34k
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1038
1039
9.34k
  YR_ARENA_REF ref;
1040
1041
9.34k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1042
1043
9.34k
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1044
9.34k
  rule->flags = flags;
1045
9.34k
  rule->ns = ns;
1046
9.34k
  rule->num_atoms = 0;
1047
1048
9.34k
  YR_ARENA_REF jmp_offset_ref;
1049
1050
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1051
9.34k
  compiler->current_rule_idx = compiler->next_rule_idx;
1052
9.34k
  compiler->next_rule_idx++;
1053
1054
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1055
  // disabled it skips over the rule's code and go straight to the next rule's
1056
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1057
  // is set to 0 as we don't know the jump target yet. When we finish
1058
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1059
  // the jump offset is set to its final value.
1060
1061
9.34k
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1062
9.34k
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1063
1064
9.34k
  FAIL_ON_ERROR(yr_arena_write_data(
1065
9.34k
      compiler->arena,
1066
9.34k
      YR_CODE_SECTION,
1067
9.34k
      &compiler->current_rule_idx,
1068
9.34k
      sizeof(compiler->current_rule_idx),
1069
9.34k
      NULL));
1070
1071
  // Create a fixup entry for the jump and push it in the stack
1072
9.34k
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1073
1074
9.34k
  if (fixup == NULL)
1075
0
    return ERROR_INSUFFICIENT_MEMORY;
1076
1077
9.34k
  fixup->ref = jmp_offset_ref;
1078
9.34k
  fixup->next = compiler->fixup_stack_head;
1079
9.34k
  compiler->fixup_stack_head = fixup;
1080
1081
  // Clean strings_table as we are starting to parse a new rule.
1082
9.34k
  yr_hash_table_clean(compiler->strings_table, NULL);
1083
1084
9.34k
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1085
9.34k
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1086
1087
9.34k
  return ERROR_SUCCESS;
1088
9.34k
}
1089
1090
int yr_parser_reduce_rule_declaration_phase_2(
1091
    yyscan_t yyscanner,
1092
    YR_ARENA_REF* rule_ref)
1093
417
{
1094
417
  uint32_t max_strings_per_rule;
1095
417
  uint32_t strings_in_rule = 0;
1096
1097
417
  YR_FIXUP* fixup;
1098
417
  YR_STRING* string;
1099
417
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1100
1101
417
  yr_get_configuration_uint32(
1102
417
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1103
1104
417
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1105
1106
  // Show warning if the rule is generating too many atoms. The warning is
1107
  // shown if the number of atoms is greater than 20 times the maximum number
1108
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1109
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1110
1111
417
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1112
19
  {
1113
19
    yywarning(yyscanner, "rule is slowing down scanning");
1114
19
  }
1115
1116
417
  yr_rule_strings_foreach(rule, string)
1117
2.11k
  {
1118
    // Only the heading fragment in a chain of strings (the one with
1119
    // chained_to == NULL) must be referenced. All other fragments
1120
    // are never marked as referenced.
1121
    //
1122
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1123
    // strings must always be referenced.
1124
1125
2.11k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1126
156
        (STRING_IS_ANONYMOUS(string) ||
1127
149
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1128
19
    {
1129
19
      yr_compiler_set_error_extra_info(
1130
19
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1131
19
    }
1132
1133
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1134
    // that it will match anywhere.
1135
2.09k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1136
137
        STRING_IS_FIXED_OFFSET(string))
1137
94
    {
1138
94
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1139
94
    }
1140
1141
2.09k
    strings_in_rule++;
1142
1143
2.09k
    if (strings_in_rule > max_strings_per_rule)
1144
0
    {
1145
0
      yr_compiler_set_error_extra_info(
1146
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1147
0
    }
1148
2.09k
  }
1149
1150
398
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1151
398
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1152
1153
398
  fixup = compiler->fixup_stack_head;
1154
1155
398
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1156
398
      compiler->arena, &fixup->ref);
1157
1158
398
  int32_t jmp_offset = yr_arena_get_current_offset(
1159
398
                           compiler->arena, YR_CODE_SECTION) -
1160
398
                       fixup->ref.offset + 1;
1161
1162
398
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1163
1164
  // Remove fixup from the stack.
1165
398
  compiler->fixup_stack_head = fixup->next;
1166
398
  yr_free(fixup);
1167
1168
  // We have finished parsing the current rule set current_rule_idx to
1169
  // UINT32_MAX indicating that we are not currently parsing a rule.
1170
398
  compiler->current_rule_idx = UINT32_MAX;
1171
1172
398
  return ERROR_SUCCESS;
1173
398
}
1174
1175
int yr_parser_reduce_string_identifier(
1176
    yyscan_t yyscanner,
1177
    const char* identifier,
1178
    uint8_t instruction,
1179
    uint64_t at_offset)
1180
14.8k
{
1181
14.8k
  YR_STRING* string;
1182
14.8k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1183
1184
14.8k
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1185
12.0k
  {
1186
12.0k
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1187
11.9k
    {
1188
11.9k
      yr_parser_emit_with_arg(
1189
11.9k
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1190
1191
11.9k
      yr_parser_emit(yyscanner, instruction, NULL);
1192
1193
11.9k
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1194
11.9k
          compiler, compiler->current_rule_idx);
1195
1196
11.9k
      yr_rule_strings_foreach(current_rule, string)
1197
881k
      {
1198
881k
        if (instruction != OP_FOUND)
1199
880k
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1200
1201
881k
        if (instruction == OP_FOUND_AT)
1202
759
        {
1203
          // Avoid overwriting any previous fixed offset
1204
759
          if (string->fixed_offset == YR_UNDEFINED)
1205
255
            string->fixed_offset = at_offset;
1206
1207
          // If a previous fixed offset was different, disable
1208
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1209
          // have room to store a single fixed offset value
1210
759
          if (string->fixed_offset != at_offset)
1211
434
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1212
759
        }
1213
880k
        else
1214
880k
        {
1215
880k
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1216
880k
        }
1217
881k
      }
1218
11.9k
    }
1219
82
    else
1220
82
    {
1221
      // Anonymous strings not allowed outside of a loop
1222
82
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1223
82
    }
1224
12.0k
  }
1225
2.74k
  else
1226
2.74k
  {
1227
2.74k
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1228
1229
2.71k
    FAIL_ON_ERROR(
1230
2.71k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1231
1232
2.71k
    if (instruction != OP_FOUND)
1233
2.42k
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1234
1235
2.71k
    if (instruction == OP_FOUND_AT)
1236
598
    {
1237
      // Avoid overwriting any previous fixed offset
1238
1239
598
      if (string->fixed_offset == YR_UNDEFINED)
1240
237
        string->fixed_offset = at_offset;
1241
1242
      // If a previous fixed offset was different, disable
1243
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1244
      // have room to store a single fixed offset value
1245
1246
598
      if (string->fixed_offset == YR_UNDEFINED ||
1247
370
          string->fixed_offset != at_offset)
1248
393
      {
1249
393
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1250
393
      }
1251
598
    }
1252
2.11k
    else
1253
2.11k
    {
1254
2.11k
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1255
2.11k
    }
1256
1257
2.71k
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1258
1259
2.71k
    string->flags |= STRING_FLAGS_REFERENCED;
1260
2.71k
  }
1261
1262
14.6k
  return ERROR_SUCCESS;
1263
14.8k
}
1264
1265
int yr_parser_reduce_meta_declaration(
1266
    yyscan_t yyscanner,
1267
    int32_t type,
1268
    const char* identifier,
1269
    const char* string,
1270
    int64_t integer,
1271
    YR_ARENA_REF* meta_ref)
1272
875
{
1273
875
  YR_ARENA_REF ref;
1274
875
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1275
1276
875
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1277
875
      compiler->arena,
1278
875
      YR_METAS_TABLE,
1279
875
      sizeof(YR_META),
1280
875
      meta_ref,
1281
875
      offsetof(YR_META, identifier),
1282
875
      offsetof(YR_META, string),
1283
875
      EOL));
1284
1285
875
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1286
1287
875
  meta->type = type;
1288
875
  meta->integer = integer;
1289
1290
875
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1291
1292
875
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1293
1294
875
  if (string != NULL)
1295
263
  {
1296
263
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1297
1298
263
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1299
263
  }
1300
612
  else
1301
612
  {
1302
612
    meta->string = NULL;
1303
612
  }
1304
1305
875
  compiler->current_meta_idx++;
1306
1307
875
  return ERROR_SUCCESS;
1308
875
}
1309
1310
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1311
2.98k
{
1312
2.98k
  if (module_name->length == 0)
1313
178
    return false;
1314
1315
2.80k
  if (strlen(module_name->c_string) != module_name->length)
1316
172
    return false;
1317
1318
2.63k
  return true;
1319
2.80k
}
1320
1321
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1322
2.98k
{
1323
2.98k
  int result;
1324
1325
2.98k
  YR_ARENA_REF ref;
1326
2.98k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1327
2.98k
  YR_OBJECT* module_structure;
1328
1329
2.98k
  if (!_yr_parser_valid_module_name(module_name))
1330
350
  {
1331
350
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1332
1333
350
    return ERROR_INVALID_MODULE_NAME;
1334
350
  }
1335
1336
2.63k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1337
2.63k
      compiler->arena,
1338
2.63k
      YR_NAMESPACES_TABLE,
1339
2.63k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1340
1341
2.63k
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1342
2.63k
      compiler->objects_table, module_name->c_string, ns->name);
1343
1344
  // if module already imported, do nothing
1345
1346
2.63k
  if (module_structure != NULL)
1347
2.20k
    return ERROR_SUCCESS;
1348
1349
427
  FAIL_ON_ERROR(yr_object_create(
1350
427
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1351
1352
427
  FAIL_ON_ERROR(yr_hash_table_add(
1353
427
      compiler->objects_table,
1354
427
      module_name->c_string,
1355
427
      ns->name,
1356
427
      module_structure));
1357
1358
427
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1359
1360
427
  if (result == ERROR_UNKNOWN_MODULE)
1361
231
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1362
1363
427
  if (result != ERROR_SUCCESS)
1364
231
    return result;
1365
1366
196
  FAIL_ON_ERROR(
1367
196
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1368
1369
196
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1370
196
      yyscanner,
1371
196
      OP_IMPORT,
1372
196
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1373
196
      NULL,
1374
196
      NULL));
1375
1376
196
  return ERROR_SUCCESS;
1377
196
}
1378
1379
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1380
18.8k
{
1381
18.8k
  int opcode = 0;
1382
1383
18.8k
  switch (expression_type)
1384
18.8k
  {
1385
12.2k
  case EXPRESSION_TYPE_INTEGER:
1386
12.2k
    opcode = OP_INT_BEGIN;
1387
12.2k
    break;
1388
3.36k
  case EXPRESSION_TYPE_FLOAT:
1389
3.36k
    opcode = OP_DBL_BEGIN;
1390
3.36k
    break;
1391
3.26k
  case EXPRESSION_TYPE_STRING:
1392
3.26k
    opcode = OP_STR_BEGIN;
1393
3.26k
    break;
1394
0
  default:
1395
0
    assert(false);
1396
18.8k
  }
1397
1398
18.8k
  if (op[0] == '<')
1399
1.90k
  {
1400
1.90k
    if (op[1] == '=')
1401
410
      opcode += _OP_LE;
1402
1.49k
    else
1403
1.49k
      opcode += _OP_LT;
1404
1.90k
  }
1405
16.9k
  else if (op[0] == '>')
1406
1.70k
  {
1407
1.70k
    if (op[1] == '=')
1408
510
      opcode += _OP_GE;
1409
1.19k
    else
1410
1.19k
      opcode += _OP_GT;
1411
1.70k
  }
1412
15.2k
  else if (op[1] == '=')
1413
1.59k
  {
1414
1.59k
    if (op[0] == '=')
1415
827
      opcode += _OP_EQ;
1416
765
    else
1417
765
      opcode += _OP_NEQ;
1418
1.59k
  }
1419
13.6k
  else if (op[0] == '+')
1420
3.60k
  {
1421
3.60k
    opcode += _OP_ADD;
1422
3.60k
  }
1423
10.0k
  else if (op[0] == '-')
1424
6.79k
  {
1425
6.79k
    opcode += _OP_SUB;
1426
6.79k
  }
1427
3.28k
  else if (op[0] == '*')
1428
1.83k
  {
1429
1.83k
    opcode += _OP_MUL;
1430
1.83k
  }
1431
1.44k
  else if (op[0] == '\\')
1432
1.44k
  {
1433
1.44k
    opcode += _OP_DIV;
1434
1.44k
  }
1435
1436
18.8k
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1437
18.8k
  {
1438
18.8k
    return opcode;
1439
18.8k
  }
1440
1441
8
  return OP_ERROR;
1442
18.8k
}
1443
1444
int yr_parser_reduce_operation(
1445
    yyscan_t yyscanner,
1446
    const char* op,
1447
    YR_EXPRESSION left_operand,
1448
    YR_EXPRESSION right_operand)
1449
19.0k
{
1450
19.0k
  int expression_type;
1451
1452
19.0k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1453
1454
19.0k
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1455
5.22k
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1456
15.6k
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1457
2.42k
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1458
15.6k
  {
1459
15.6k
    if (left_operand.type != right_operand.type)
1460
2.57k
    {
1461
      // One operand is double and the other is integer,
1462
      // cast the integer to double
1463
1464
2.57k
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1465
2.57k
          yyscanner,
1466
2.57k
          OP_INT_TO_DBL,
1467
2.57k
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1468
2.57k
          NULL,
1469
2.57k
          NULL));
1470
2.57k
    }
1471
1472
15.6k
    expression_type = EXPRESSION_TYPE_FLOAT;
1473
1474
15.6k
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1475
13.8k
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1476
12.2k
    {
1477
12.2k
      expression_type = EXPRESSION_TYPE_INTEGER;
1478
12.2k
    }
1479
1480
15.6k
    FAIL_ON_ERROR(yr_parser_emit(
1481
15.6k
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1482
15.6k
  }
1483
3.48k
  else if (
1484
3.48k
      left_operand.type == EXPRESSION_TYPE_STRING &&
1485
3.32k
      right_operand.type == EXPRESSION_TYPE_STRING)
1486
3.26k
  {
1487
3.26k
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1488
1489
3.26k
    if (opcode != OP_ERROR)
1490
3.25k
    {
1491
3.25k
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1492
3.25k
    }
1493
8
    else
1494
8
    {
1495
8
      yr_compiler_set_error_extra_info_fmt(
1496
8
          compiler, "strings don't support \"%s\" operation", op);
1497
1498
8
      return ERROR_WRONG_TYPE;
1499
8
    }
1500
3.26k
  }
1501
218
  else
1502
218
  {
1503
218
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1504
1505
218
    return ERROR_WRONG_TYPE;
1506
218
  }
1507
1508
18.8k
  return ERROR_SUCCESS;
1509
19.0k
}
1510
1511
int yr_parser_mark_nonfast(
1512
   yyscan_t yyscanner,
1513
   YR_STRING_SET string_set
1514
5.89k
) {
1515
5.89k
 YR_COMPILER* compiler = yyget_extra(yyscanner);
1516
1517
5.89k
 YR_STRING_SET_ELEMENT* head = string_set.head;
1518
863k
  while (head != NULL) {
1519
857k
    YR_STRING* string_ptr = yr_arena_ref_to_ptr(compiler->arena, &head->element);
1520
857k
    string_ptr->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1521
857k
    head = head->next;
1522
857k
  }
1523
5.89k
  return ERROR_SUCCESS;
1524
5.89k
}