Coverage Report

Created: 2026-01-17 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/yara/libyara/parser.c
Line
Count
Source
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
#include "yara/compiler.h"
47
#include "yara/types.h"
48
49
#define todigit(x)                                        \
50
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
51
                             : ((uint8_t) (x - '0'))
52
53
int yr_parser_emit(
54
    yyscan_t yyscanner,
55
    uint8_t instruction,
56
    YR_ARENA_REF* instruction_ref)
57
103k
{
58
103k
  return yr_arena_write_data(
59
103k
      yyget_extra(yyscanner)->arena,
60
103k
      YR_CODE_SECTION,
61
103k
      &instruction,
62
103k
      sizeof(uint8_t),
63
103k
      instruction_ref);
64
103k
}
65
66
int yr_parser_emit_with_arg_double(
67
    yyscan_t yyscanner,
68
    uint8_t instruction,
69
    double argument,
70
    YR_ARENA_REF* instruction_ref,
71
    YR_ARENA_REF* argument_ref)
72
3.90k
{
73
3.90k
  int result = yr_arena_write_data(
74
3.90k
      yyget_extra(yyscanner)->arena,
75
3.90k
      YR_CODE_SECTION,
76
3.90k
      &instruction,
77
3.90k
      sizeof(uint8_t),
78
3.90k
      instruction_ref);
79
80
3.90k
  if (result == ERROR_SUCCESS)
81
3.90k
    result = yr_arena_write_data(
82
3.90k
        yyget_extra(yyscanner)->arena,
83
3.90k
        YR_CODE_SECTION,
84
3.90k
        &argument,
85
3.90k
        sizeof(double),
86
3.90k
        argument_ref);
87
88
3.90k
  return result;
89
3.90k
}
90
91
int yr_parser_emit_with_arg_int32(
92
    yyscan_t yyscanner,
93
    uint8_t instruction,
94
    int32_t argument,
95
    YR_ARENA_REF* instruction_ref,
96
    YR_ARENA_REF* argument_ref)
97
21.5k
{
98
21.5k
  int result = yr_arena_write_data(
99
21.5k
      yyget_extra(yyscanner)->arena,
100
21.5k
      YR_CODE_SECTION,
101
21.5k
      &instruction,
102
21.5k
      sizeof(uint8_t),
103
21.5k
      instruction_ref);
104
105
21.5k
  if (result == ERROR_SUCCESS)
106
21.5k
    result = yr_arena_write_data(
107
21.5k
        yyget_extra(yyscanner)->arena,
108
21.5k
        YR_CODE_SECTION,
109
21.5k
        &argument,
110
21.5k
        sizeof(int32_t),
111
21.5k
        argument_ref);
112
113
21.5k
  return result;
114
21.5k
}
115
116
int yr_parser_emit_with_arg(
117
    yyscan_t yyscanner,
118
    uint8_t instruction,
119
    int64_t argument,
120
    YR_ARENA_REF* instruction_ref,
121
    YR_ARENA_REF* argument_ref)
122
42.6k
{
123
42.6k
  int result = yr_arena_write_data(
124
42.6k
      yyget_extra(yyscanner)->arena,
125
42.6k
      YR_CODE_SECTION,
126
42.6k
      &instruction,
127
42.6k
      sizeof(uint8_t),
128
42.6k
      instruction_ref);
129
130
42.6k
  if (result == ERROR_SUCCESS)
131
42.6k
    result = yr_arena_write_data(
132
42.6k
        yyget_extra(yyscanner)->arena,
133
42.6k
        YR_CODE_SECTION,
134
42.6k
        &argument,
135
42.6k
        sizeof(int64_t),
136
42.6k
        argument_ref);
137
138
42.6k
  return result;
139
42.6k
}
140
141
int yr_parser_emit_with_arg_reloc(
142
    yyscan_t yyscanner,
143
    uint8_t instruction,
144
    void* argument,
145
    YR_ARENA_REF* instruction_ref,
146
    YR_ARENA_REF* argument_ref)
147
844k
{
148
844k
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
149
150
844k
  DECLARE_REFERENCE(void*, ptr) arg;
151
152
844k
  memset(&arg, 0, sizeof(arg));
153
844k
  arg.ptr = argument;
154
155
844k
  int result = yr_arena_write_data(
156
844k
      yyget_extra(yyscanner)->arena,
157
844k
      YR_CODE_SECTION,
158
844k
      &instruction,
159
844k
      sizeof(uint8_t),
160
844k
      instruction_ref);
161
162
844k
  if (result == ERROR_SUCCESS)
163
844k
    result = yr_arena_write_data(
164
844k
        yyget_extra(yyscanner)->arena,
165
844k
        YR_CODE_SECTION,
166
844k
        &arg,
167
844k
        sizeof(arg),
168
844k
        &ref);
169
170
844k
  if (result == ERROR_SUCCESS)
171
844k
    result = yr_arena_make_ptr_relocatable(
172
844k
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
173
174
844k
  if (argument_ref != NULL)
175
0
    *argument_ref = ref;
176
177
844k
  return result;
178
844k
}
179
180
int yr_parser_emit_pushes_for_strings(
181
    yyscan_t yyscanner,
182
    const char* identifier,
183
    YR_STRING_SET* strings)
184
7.75k
{
185
7.75k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
186
187
7.75k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
188
7.75k
      compiler, compiler->current_rule_idx);
189
190
7.75k
  YR_STRING* string;
191
192
7.75k
  const char* string_identifier;
193
7.75k
  const char* target_identifier;
194
195
7.75k
  strings->count = 0;
196
7.75k
  strings->head = NULL;
197
7.75k
  YR_STRING_SET_ELEMENT** tail_ptr = &strings->head;
198
199
7.75k
  yr_rule_strings_foreach(current_rule, string)
200
840k
  {
201
    // Don't generate pushes for strings chained to another one, we are
202
    // only interested in non-chained strings or the head of the chain.
203
204
840k
    if (string->chained_to == NULL)
205
837k
    {
206
837k
      string_identifier = string->identifier;
207
837k
      target_identifier = identifier;
208
209
1.67M
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
210
837k
             *target_identifier == *string_identifier)
211
837k
      {
212
837k
        target_identifier++;
213
837k
        string_identifier++;
214
837k
      }
215
216
837k
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
217
8.16k
          *target_identifier == '*')
218
831k
      {
219
831k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
220
221
831k
        string->flags |= STRING_FLAGS_REFERENCED;
222
831k
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
223
831k
        strings->count++;
224
225
831k
        *tail_ptr = yr_malloc(sizeof(YR_STRING_SET_ELEMENT));
226
831k
        yr_arena_ptr_to_ref(compiler->arena, string, &((*tail_ptr)->element));
227
831k
        (*tail_ptr)->next = NULL;
228
831k
        tail_ptr = &(*tail_ptr)->next;
229
831k
      }
230
837k
    }
231
840k
  }
232
233
7.75k
  if (strings->count == 0)
234
45
  {
235
45
    yr_compiler_set_error_extra_info(
236
45
        compiler, identifier) return ERROR_UNDEFINED_STRING;
237
45
  }
238
239
7.71k
  return ERROR_SUCCESS;
240
7.75k
}
241
242
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
243
// prefix.
244
int yr_parser_emit_pushes_for_rules(
245
    yyscan_t yyscanner,
246
    const char* prefix,
247
    int* count)
248
934
{
249
934
  YR_COMPILER* compiler = yyget_extra(yyscanner);
250
251
  // Make sure the compiler is parsing a rule
252
934
  assert(compiler->current_rule_idx != UINT32_MAX);
253
254
934
  YR_RULE* rule;
255
934
  int matching = 0;
256
257
934
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
258
934
      compiler->arena,
259
934
      YR_NAMESPACES_TABLE,
260
934
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
261
262
  // Can't use yr_rules_foreach here as that requires the rules to have been
263
  // finalized (inserting a NULL rule at the end). This is done when
264
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
265
  // into the current position in the code arena. Obviously we aren't done
266
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
267
  // this I'm manually walking all the currently compiled rules (up to the
268
  // current rule index) and comparing identifiers to see if it is one we should
269
  // use.
270
  //
271
  // Further, we have to get compiler->current_rule_idx before we start because
272
  // if we emit an OP_PUSH_RULE
273
934
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
274
275
4.05k
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
276
3.12k
  {
277
    // Is rule->identifier prefixed by prefix?
278
3.12k
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
279
937
    {
280
937
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
281
937
          compiler->rules_table, rule->identifier, ns->name);
282
283
937
      if (rule_idx != UINT32_MAX)
284
937
      {
285
937
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
286
937
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
287
937
        matching++;
288
937
      }
289
937
    }
290
291
3.12k
    rule++;
292
3.12k
  }
293
294
934
  if (count != NULL)
295
934
  {
296
934
    *count = matching;
297
934
  }
298
299
934
  if (matching == 0)
300
30
  {
301
30
    yr_compiler_set_error_extra_info(compiler, prefix);
302
30
    return ERROR_UNDEFINED_IDENTIFIER;
303
30
  }
304
305
904
  return ERROR_SUCCESS;
306
934
}
307
308
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
309
58.1k
{
310
58.1k
  uint8_t opcode[9];
311
58.1k
  int opcode_len = 1;
312
313
58.1k
  if (argument == YR_UNDEFINED)
314
8.78k
  {
315
8.78k
    opcode[0] = OP_PUSH_U;
316
8.78k
  }
317
49.3k
  else if (argument <= 0xff)
318
44.3k
  {
319
44.3k
    opcode[0] = OP_PUSH_8;
320
44.3k
    opcode[1] = (uint8_t) argument;
321
44.3k
    opcode_len += sizeof(uint8_t);
322
44.3k
  }
323
5.06k
  else if (argument <= 0xffff)
324
2.39k
  {
325
2.39k
    opcode[0] = OP_PUSH_16;
326
2.39k
    uint16_t u = (uint16_t) argument;
327
2.39k
    memcpy(opcode + 1, &u, sizeof(uint16_t));
328
2.39k
    opcode_len += sizeof(uint16_t);
329
2.39k
  }
330
2.67k
  else if (argument <= 0xffffffff)
331
1.39k
  {
332
1.39k
    opcode[0] = OP_PUSH_32;
333
1.39k
    uint32_t u = (uint32_t) argument;
334
1.39k
    memcpy(opcode + 1, &u, sizeof(uint32_t));
335
1.39k
    opcode_len += sizeof(uint32_t);
336
1.39k
  }
337
1.28k
  else
338
1.28k
  {
339
1.28k
    opcode[0] = OP_PUSH;
340
1.28k
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
341
1.28k
    opcode_len += sizeof(uint64_t);
342
1.28k
  }
343
344
58.1k
  return yr_arena_write_data(
345
58.1k
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
346
58.1k
}
347
348
int yr_parser_check_types(
349
    YR_COMPILER* compiler,
350
    YR_OBJECT_FUNCTION* function,
351
    const char* actual_args_fmt)
352
392
{
353
392
  int i;
354
355
504
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
356
504
  {
357
504
    if (function->prototypes[i].arguments_fmt == NULL)
358
8
      break;
359
360
496
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
361
384
      return ERROR_SUCCESS;
362
496
  }
363
364
8
  yr_compiler_set_error_extra_info(compiler, function->identifier)
365
366
8
      return ERROR_WRONG_ARGUMENTS;
367
392
}
368
369
int yr_parser_lookup_string(
370
    yyscan_t yyscanner,
371
    const char* identifier,
372
    YR_STRING** string)
373
2.68k
{
374
2.68k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
375
376
2.68k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
377
2.68k
      compiler, compiler->current_rule_idx);
378
379
2.68k
  yr_rule_strings_foreach(current_rule, *string)
380
3.06k
  {
381
    // If some string $a gets fragmented into multiple chained
382
    // strings, all those fragments have the same $a identifier
383
    // but we are interested in the heading fragment, which is
384
    // that with chained_to == NULL
385
386
3.06k
    if ((*string)->chained_to == NULL &&
387
2.90k
        strcmp((*string)->identifier, identifier) == 0)
388
2.62k
    {
389
2.62k
      return ERROR_SUCCESS;
390
2.62k
    }
391
3.06k
  }
392
393
57
  yr_compiler_set_error_extra_info(compiler, identifier)
394
395
57
      * string = NULL;
396
397
57
  return ERROR_UNDEFINED_STRING;
398
2.68k
}
399
400
////////////////////////////////////////////////////////////////////////////////
401
// Searches for a variable with the given identifier in the scope of the current
402
// "for" loop. In case of nested "for" loops the identifier is searched starting
403
// at the top-level loop and going down thorough the nested loops until the
404
// current one. This is ok because inner loops can not re-define an identifier
405
// already defined by an outer loop.
406
//
407
// If the variable is found, the return value is the position that the variable
408
// occupies among all the currently defined variables. If the variable doesn't
409
// exist the return value is -1.
410
//
411
// The function can receive a pointer to a YR_EXPRESSION that will populated
412
// with information about the variable if found. This pointer can be NULL if
413
// the caller is not interested in getting that information.
414
//
415
int yr_parser_lookup_loop_variable(
416
    yyscan_t yyscanner,
417
    const char* identifier,
418
    YR_EXPRESSION* expr)
419
17.7k
{
420
17.7k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
421
17.7k
  int i, j;
422
17.7k
  int var_offset = 0;
423
424
23.0k
  for (i = 0; i <= compiler->loop_index; i++)
425
14.1k
  {
426
14.1k
    var_offset += compiler->loop[i].vars_internal_count;
427
428
23.7k
    for (j = 0; j < compiler->loop[i].vars_count; j++)
429
18.4k
    {
430
18.4k
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
431
17.6k
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
432
8.84k
      {
433
8.84k
        if (expr != NULL)
434
8.77k
          *expr = compiler->loop[i].vars[j];
435
436
8.84k
        return var_offset + j;
437
8.84k
      }
438
18.4k
    }
439
440
5.33k
    var_offset += compiler->loop[i].vars_count;
441
5.33k
  }
442
443
8.88k
  return -1;
444
17.7k
}
445
446
static int _yr_parser_write_string(
447
    const char* identifier,
448
    YR_MODIFIER modifier,
449
    YR_COMPILER* compiler,
450
    SIZED_STRING* str,
451
    RE_AST* re_ast,
452
    YR_ARENA_REF* string_ref,
453
    int* min_atom_quality,
454
    int* num_atom)
455
27.6k
{
456
27.6k
  SIZED_STRING* literal_string;
457
27.6k
  YR_ATOM_LIST_ITEM* atom;
458
27.6k
  YR_ATOM_LIST_ITEM* atom_list = NULL;
459
460
27.6k
  int c, result;
461
27.6k
  int max_string_len;
462
27.6k
  bool free_literal = false;
463
464
27.6k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
465
27.6k
      compiler->arena,
466
27.6k
      YR_STRINGS_TABLE,
467
27.6k
      sizeof(YR_STRING),
468
27.6k
      string_ref,
469
27.6k
      offsetof(YR_STRING, identifier),
470
27.6k
      offsetof(YR_STRING, string),
471
27.6k
      offsetof(YR_STRING, chained_to),
472
27.6k
      EOL));
473
474
27.6k
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
475
27.6k
      compiler->arena, string_ref);
476
477
27.6k
  YR_ARENA_REF ref;
478
479
27.6k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
480
481
27.6k
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
482
27.6k
  string->rule_idx = compiler->current_rule_idx;
483
27.6k
  string->idx = compiler->current_string_idx;
484
27.6k
  string->fixed_offset = YR_UNDEFINED;
485
486
27.6k
  compiler->current_string_idx++;
487
488
27.6k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
489
25.9k
      modifier.flags & STRING_FLAGS_REGEXP ||
490
3.82k
      modifier.flags & STRING_FLAGS_BASE64 ||
491
2.44k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
492
25.7k
  {
493
25.7k
    literal_string = yr_re_ast_extract_literal(re_ast);
494
495
25.7k
    if (literal_string != NULL)
496
18.4k
      free_literal = true;
497
25.7k
  }
498
1.94k
  else
499
1.94k
  {
500
1.94k
    literal_string = str;
501
1.94k
  }
502
503
27.6k
  if (literal_string != NULL)
504
20.3k
  {
505
20.3k
    modifier.flags |= STRING_FLAGS_LITERAL;
506
507
20.3k
    result = _yr_compiler_store_data(
508
20.3k
        compiler,
509
20.3k
        literal_string->c_string,
510
20.3k
        literal_string->length + 1,  // +1 to include terminating NULL
511
20.3k
        &ref);
512
513
20.3k
    if (result != ERROR_SUCCESS)
514
0
      goto cleanup;
515
516
20.3k
    string->length = (uint32_t) literal_string->length;
517
20.3k
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
518
519
20.3k
    if (modifier.flags & STRING_FLAGS_WIDE)
520
1.84k
      max_string_len = string->length * 2;
521
18.5k
    else
522
18.5k
      max_string_len = string->length;
523
524
20.3k
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
525
15.9k
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
526
527
20.3k
    result = yr_atoms_extract_from_string(
528
20.3k
        &compiler->atoms_config,
529
20.3k
        (uint8_t*) literal_string->c_string,
530
20.3k
        (int32_t) literal_string->length,
531
20.3k
        modifier,
532
20.3k
        &atom_list,
533
20.3k
        min_atom_quality);
534
535
20.3k
    if (result != ERROR_SUCCESS)
536
0
      goto cleanup;
537
20.3k
  }
538
7.27k
  else
539
7.27k
  {
540
    // Non-literal strings can't be marked as fixed offset because once we
541
    // find a string atom in the scanned data we don't know the offset where
542
    // the string should start, as the non-literal strings can contain
543
    // variable-length portions.
544
7.27k
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
545
546
    // Save the position where the RE forward code starts for later reference.
547
7.27k
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
548
7.27k
        compiler->arena, YR_RE_CODE_SECTION);
549
550
    // Emit forwards code
551
7.27k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
552
553
7.27k
    if (result != ERROR_SUCCESS)
554
126
      goto cleanup;
555
556
    // Emit backwards code
557
7.14k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
558
559
7.14k
    if (result != ERROR_SUCCESS)
560
9
      goto cleanup;
561
562
    // Extract atoms from the regular expression.
563
7.14k
    result = yr_atoms_extract_from_re(
564
7.14k
        &compiler->atoms_config,
565
7.14k
        re_ast,
566
7.14k
        modifier,
567
7.14k
        &atom_list,
568
7.14k
        min_atom_quality);
569
570
7.14k
    if (result != ERROR_SUCCESS)
571
0
      goto cleanup;
572
573
    // If no atom was extracted let's add a zero-length atom.
574
7.14k
    if (atom_list == NULL)
575
1.72k
    {
576
1.72k
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
577
578
1.72k
      if (atom_list == NULL)
579
0
      {
580
0
        result = ERROR_INSUFFICIENT_MEMORY;
581
0
        goto cleanup;
582
0
      }
583
584
1.72k
      atom_list->atom.length = 0;
585
1.72k
      atom_list->backtrack = 0;
586
1.72k
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
587
1.72k
      atom_list->next = NULL;
588
589
1.72k
      yr_arena_ptr_to_ref(
590
1.72k
          compiler->arena,
591
1.72k
          yr_arena_get_ptr(
592
1.72k
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
593
1.72k
          &(atom_list->forward_code_ref));
594
1.72k
    }
595
7.14k
  }
596
597
27.5k
  string->flags = modifier.flags;
598
599
  // Add the string to Aho-Corasick automaton.
600
27.5k
  result = yr_ac_add_string(
601
27.5k
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
602
603
27.5k
  if (result != ERROR_SUCCESS)
604
0
    goto cleanup;
605
606
27.5k
  atom = atom_list;
607
27.5k
  c = 0;
608
609
1.77M
  while (atom != NULL)
610
1.74M
  {
611
1.74M
    atom = atom->next;
612
1.74M
    c++;
613
1.74M
  }
614
615
27.5k
  (*num_atom) += c;
616
617
27.6k
cleanup:
618
27.6k
  if (free_literal)
619
18.4k
    yr_free(literal_string);
620
621
27.6k
  if (atom_list != NULL)
622
27.5k
    yr_atoms_list_destroy(atom_list);
623
624
27.6k
  return result;
625
27.5k
}
626
627
static int _yr_parser_check_string_modifiers(
628
    yyscan_t yyscanner,
629
    YR_MODIFIER modifier)
630
23.8k
{
631
23.8k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
632
633
  // xor and nocase together is not implemented.
634
23.8k
  if (modifier.flags & STRING_FLAGS_XOR &&
635
757
      modifier.flags & STRING_FLAGS_NO_CASE)
636
0
  {
637
0
    yr_compiler_set_error_extra_info(
638
0
        compiler, "invalid modifier combination: xor nocase");
639
0
    return ERROR_INVALID_MODIFIER;
640
0
  }
641
642
  // base64 and nocase together is not implemented.
643
23.8k
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
644
3.98k
      (modifier.flags & STRING_FLAGS_BASE64 ||
645
3.98k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
646
7
  {
647
7
    yr_compiler_set_error_extra_info(
648
7
        compiler,
649
7
        modifier.flags & STRING_FLAGS_BASE64
650
7
            ? "invalid modifier combination: base64 nocase"
651
7
            : "invalid modifier combination: base64wide nocase");
652
7
    return ERROR_INVALID_MODIFIER;
653
7
  }
654
655
  // base64 and fullword together is not implemented.
656
23.8k
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
657
125
      (modifier.flags & STRING_FLAGS_BASE64 ||
658
125
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
659
4
  {
660
4
    yr_compiler_set_error_extra_info(
661
4
        compiler,
662
4
        modifier.flags & STRING_FLAGS_BASE64
663
4
            ? "invalid modifier combination: base64 fullword"
664
4
            : "invalid modifier combination: base64wide fullword");
665
4
    return ERROR_INVALID_MODIFIER;
666
4
  }
667
668
  // base64 and xor together is not implemented.
669
23.8k
  if (modifier.flags & STRING_FLAGS_XOR &&
670
757
      (modifier.flags & STRING_FLAGS_BASE64 ||
671
757
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
672
4
  {
673
4
    yr_compiler_set_error_extra_info(
674
4
        compiler,
675
4
        modifier.flags & STRING_FLAGS_BASE64
676
4
            ? "invalid modifier combination: base64 xor"
677
4
            : "invalid modifier combination: base64wide xor");
678
4
    return ERROR_INVALID_MODIFIER;
679
4
  }
680
681
23.8k
  return ERROR_SUCCESS;
682
23.8k
}
683
684
int yr_parser_reduce_string_declaration(
685
    yyscan_t yyscanner,
686
    YR_MODIFIER modifier,
687
    const char* identifier,
688
    SIZED_STRING* str,
689
    YR_ARENA_REF* string_ref)
690
23.8k
{
691
23.8k
  int result = ERROR_SUCCESS;
692
23.8k
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
693
23.8k
  int atom_quality;
694
695
23.8k
  char message[512];
696
697
23.8k
  int32_t min_gap = 0;
698
23.8k
  int32_t max_gap = 0;
699
700
23.8k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
701
702
23.8k
  RE_AST* re_ast = NULL;
703
23.8k
  RE_AST* remainder_re_ast = NULL;
704
23.8k
  RE_ERROR re_error;
705
706
23.8k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
707
23.8k
      compiler, compiler->current_rule_idx);
708
709
  // Determine if a string with the same identifier was already defined
710
  // by searching for the identifier in strings_table.
711
23.8k
  uint32_t string_idx = yr_hash_table_lookup_uint32(
712
23.8k
      compiler->strings_table, identifier, NULL);
713
714
  // The string was already defined, return an error.
715
23.8k
  if (string_idx != UINT32_MAX)
716
22
  {
717
22
    yr_compiler_set_error_extra_info(compiler, identifier);
718
22
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
719
22
  }
720
721
  // Empty strings are not allowed.
722
23.8k
  if (str->length == 0)
723
4
  {
724
4
    yr_compiler_set_error_extra_info(compiler, identifier);
725
4
    return ERROR_EMPTY_STRING;
726
4
  }
727
728
23.8k
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
729
3.67k
    modifier.flags |= STRING_FLAGS_NO_CASE;
730
731
23.8k
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
732
290
    modifier.flags |= STRING_FLAGS_DOT_ALL;
733
734
  // Hex strings are always handled as DOT_ALL regexps.
735
23.8k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
736
1.15k
    modifier.flags |= STRING_FLAGS_DOT_ALL;
737
738
23.8k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
739
22.2k
      !(modifier.flags & STRING_FLAGS_BASE64 ||
740
20.9k
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
741
20.8k
  {
742
20.8k
    modifier.flags |= STRING_FLAGS_ASCII;
743
20.8k
  }
744
745
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
746
  // a single match for the string is enough. This is true in
747
  // most cases, except when the string count (#) and string offset (@)
748
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
749
  // initially, and unmarked later if required.
750
23.8k
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
751
752
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
753
  // need to be searched all over the file because the user is using the
754
  // "at" operator. The string must be searched at a fixed offset in the
755
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
756
  // and unmarked later if required.
757
23.8k
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
758
759
  // If string identifier is $ this is an anonymous string, if not add the
760
  // identifier to strings_table.
761
23.8k
  if (strcmp(identifier, "$") == 0)
762
22.5k
  {
763
22.5k
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
764
22.5k
  }
765
1.27k
  else
766
1.27k
  {
767
1.27k
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
768
1.27k
        compiler->strings_table,
769
1.27k
        identifier,
770
1.27k
        NULL,
771
1.27k
        compiler->current_string_idx));
772
1.27k
  }
773
774
  // Make sure that the the string does not have an invalid combination of
775
  // modifiers.
776
23.8k
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
777
778
23.8k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
779
22.6k
      modifier.flags & STRING_FLAGS_REGEXP ||
780
3.82k
      modifier.flags & STRING_FLAGS_BASE64 ||
781
2.44k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
782
21.8k
  {
783
21.8k
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
784
1.15k
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
785
20.7k
    else if (modifier.flags & STRING_FLAGS_REGEXP)
786
18.8k
    {
787
18.8k
      int flags = RE_PARSER_FLAG_NONE;
788
18.8k
      if (compiler->strict_escape)
789
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
790
18.8k
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
791
18.8k
    }
792
1.87k
    else
793
1.87k
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
794
795
21.8k
    if (result != ERROR_SUCCESS)
796
981
    {
797
981
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
798
0
      {
799
0
        yywarning(yyscanner, "unknown escape sequence");
800
0
      }
801
981
      else
802
981
      {
803
981
        snprintf(
804
981
            message,
805
981
            sizeof(message),
806
981
            "invalid %s \"%s\": %s",
807
981
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
808
981
                                                        : "regular expression",
809
981
            identifier,
810
981
            re_error.message);
811
812
981
        yr_compiler_set_error_extra_info(compiler, message);
813
981
        goto _exit;
814
981
      }
815
981
    }
816
817
20.9k
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
818
700
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
819
820
20.9k
    if (re_ast->flags & RE_FLAGS_GREEDY)
821
1.04k
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
822
823
    // Regular expressions in the strings section can't mix greedy and
824
    // ungreedy quantifiers like .* and .*?. That's because these regular
825
    // expressions can be matched forwards and/or backwards depending on the
826
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
827
    // be able to properly calculate the length of the match.
828
829
20.9k
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
830
1.04k
        (re_ast->flags & RE_FLAGS_UNGREEDY))
831
6
    {
832
6
      result = ERROR_INVALID_REGULAR_EXPRESSION;
833
834
6
      yr_compiler_set_error_extra_info(
835
6
          compiler,
836
6
          "greedy and ungreedy quantifiers can't be mixed in a regular "
837
6
          "expression");
838
839
6
      goto _exit;
840
6
    }
841
842
20.8k
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
843
1.73k
    {
844
1.73k
      yywarning(
845
1.73k
          yyscanner,
846
1.73k
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
847
1.73k
          "with a reasonable value for N",
848
1.73k
          identifier);
849
1.73k
    }
850
851
20.8k
    if (compiler->re_ast_callback != NULL)
852
0
    {
853
0
      compiler->re_ast_callback(
854
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
855
0
    }
856
857
20.8k
    *string_ref = YR_ARENA_NULL_REF;
858
859
46.4k
    while (re_ast != NULL)
860
25.7k
    {
861
25.7k
      YR_ARENA_REF ref;
862
863
25.7k
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
864
865
25.7k
      int32_t prev_min_gap = min_gap;
866
25.7k
      int32_t prev_max_gap = max_gap;
867
868
25.7k
      result = yr_re_ast_split_at_chaining_point(
869
25.7k
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
870
871
25.7k
      if (result != ERROR_SUCCESS)
872
0
        goto _exit;
873
874
25.7k
      result = _yr_parser_write_string(
875
25.7k
          identifier,
876
25.7k
          modifier,
877
25.7k
          compiler,
878
25.7k
          NULL,
879
25.7k
          re_ast,
880
25.7k
          &ref,
881
25.7k
          &atom_quality,
882
25.7k
          &current_rule->num_atoms);
883
884
25.7k
      if (result != ERROR_SUCCESS)
885
135
        goto _exit;
886
887
25.5k
      if (atom_quality < min_atom_quality)
888
20.0k
        min_atom_quality = atom_quality;
889
890
25.5k
      if (YR_ARENA_IS_NULL_REF(*string_ref))
891
20.7k
      {
892
        // This is the first string in the chain, the string reference
893
        // returned by this function must point to this string.
894
20.7k
        *string_ref = ref;
895
20.7k
      }
896
4.82k
      else
897
4.82k
      {
898
        // This is not the first string in the chain, set the appropriate
899
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
900
        // fields.
901
4.82k
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
902
4.82k
            compiler->arena,
903
4.82k
            YR_STRINGS_TABLE,
904
4.82k
            prev_string_idx * sizeof(YR_STRING));
905
906
4.82k
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
907
4.82k
            compiler->arena, &ref);
908
909
4.82k
        new_string->chained_to = prev_string;
910
4.82k
        new_string->chain_gap_min = prev_min_gap;
911
4.82k
        new_string->chain_gap_max = prev_max_gap;
912
913
        // A string chained to another one can't have a fixed offset, only the
914
        // head of the string chain can have a fixed offset.
915
4.82k
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
916
917
        // There is a previous string, but that string wasn't marked as part
918
        // of a chain because we can't do that until knowing there will be
919
        // another string, let's flag it now the we know.
920
4.82k
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
921
922
        // There is a previous string, so this string is part of a chain, but
923
        // there will be no more strings because there are no more AST to
924
        // split, which means that this is the chain's tail.
925
4.82k
        if (remainder_re_ast == NULL)
926
1.26k
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
927
1.26k
                               STRING_FLAGS_CHAIN_TAIL;
928
4.82k
      }
929
930
25.5k
      yr_re_ast_destroy(re_ast);
931
25.5k
      re_ast = remainder_re_ast;
932
25.5k
    }
933
20.8k
  }
934
1.94k
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
935
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
936
1.94k
  {
937
1.94k
    result = _yr_parser_write_string(
938
1.94k
        identifier,
939
1.94k
        modifier,
940
1.94k
        compiler,
941
1.94k
        str,
942
1.94k
        NULL,
943
1.94k
        string_ref,
944
1.94k
        &min_atom_quality,
945
1.94k
        &current_rule->num_atoms);
946
947
1.94k
    if (result != ERROR_SUCCESS)
948
0
      goto _exit;
949
1.94k
  }
950
951
22.7k
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
952
6.99k
  {
953
6.99k
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
954
6.99k
  }
955
956
23.8k
_exit:
957
958
23.8k
  if (re_ast != NULL)
959
565
    yr_re_ast_destroy(re_ast);
960
961
23.8k
  if (remainder_re_ast != NULL)
962
1
    yr_re_ast_destroy(remainder_re_ast);
963
964
23.8k
  return result;
965
22.7k
}
966
967
static int wildcard_iterator(
968
    void* prefix,
969
    size_t prefix_len,
970
    void* _value,
971
    void* data)
972
2.22k
{
973
2.22k
  const char* identifier = (const char*) data;
974
975
  // If the identifier is prefixed by prefix, then it matches the wildcard.
976
2.22k
  if (!strncmp(prefix, identifier, prefix_len))
977
164
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
978
979
2.06k
  return ERROR_SUCCESS;
980
2.22k
}
981
982
int yr_parser_reduce_rule_declaration_phase_1(
983
    yyscan_t yyscanner,
984
    int32_t flags,
985
    const char* identifier,
986
    YR_ARENA_REF* rule_ref)
987
28.7k
{
988
28.7k
  int result;
989
28.7k
  YR_FIXUP* fixup;
990
28.7k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
991
992
28.7k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
993
28.7k
      compiler->arena,
994
28.7k
      YR_NAMESPACES_TABLE,
995
28.7k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
996
997
28.7k
  if (yr_hash_table_lookup_uint32(
998
28.7k
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
999
11.4k
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
1000
17.2k
  {
1001
    // A rule or variable with the same identifier already exists, return the
1002
    // appropriate error.
1003
1004
17.2k
    yr_compiler_set_error_extra_info(compiler, identifier);
1005
17.2k
    return ERROR_DUPLICATED_IDENTIFIER;
1006
17.2k
  }
1007
1008
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1009
  // any of them are a prefix of the identifier being declared. If so, return
1010
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1011
11.4k
  result = yr_hash_table_iterate(
1012
11.4k
      compiler->wildcard_identifiers_table,
1013
11.4k
      ns->name,
1014
11.4k
      wildcard_iterator,
1015
11.4k
      (void*) identifier);
1016
1017
11.4k
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1018
164
  {
1019
    // This rule matches an existing wildcard rule set.
1020
164
    yr_compiler_set_error_extra_info(compiler, identifier);
1021
164
  }
1022
1023
11.4k
  FAIL_ON_ERROR(result);
1024
1025
11.3k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1026
11.3k
      compiler->arena,
1027
11.3k
      YR_RULES_TABLE,
1028
11.3k
      sizeof(YR_RULE),
1029
11.3k
      rule_ref,
1030
11.3k
      offsetof(YR_RULE, identifier),
1031
11.3k
      offsetof(YR_RULE, tags),
1032
11.3k
      offsetof(YR_RULE, strings),
1033
11.3k
      offsetof(YR_RULE, metas),
1034
11.3k
      offsetof(YR_RULE, ns),
1035
11.3k
      EOL));
1036
1037
11.3k
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1038
1039
11.3k
  YR_ARENA_REF ref;
1040
1041
11.3k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1042
1043
11.3k
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1044
11.3k
  rule->flags = flags;
1045
11.3k
  rule->ns = ns;
1046
11.3k
  rule->num_atoms = 0;
1047
1048
11.3k
  YR_ARENA_REF jmp_offset_ref;
1049
1050
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1051
11.3k
  compiler->current_rule_idx = compiler->next_rule_idx;
1052
11.3k
  compiler->next_rule_idx++;
1053
1054
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1055
  // disabled it skips over the rule's code and go straight to the next rule's
1056
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1057
  // is set to 0 as we don't know the jump target yet. When we finish
1058
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1059
  // the jump offset is set to its final value.
1060
1061
11.3k
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1062
11.3k
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1063
1064
11.3k
  FAIL_ON_ERROR(yr_arena_write_data(
1065
11.3k
      compiler->arena,
1066
11.3k
      YR_CODE_SECTION,
1067
11.3k
      &compiler->current_rule_idx,
1068
11.3k
      sizeof(compiler->current_rule_idx),
1069
11.3k
      NULL));
1070
1071
  // Create a fixup entry for the jump and push it in the stack
1072
11.3k
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1073
1074
11.3k
  if (fixup == NULL)
1075
0
    return ERROR_INSUFFICIENT_MEMORY;
1076
1077
11.3k
  fixup->ref = jmp_offset_ref;
1078
11.3k
  fixup->next = compiler->fixup_stack_head;
1079
11.3k
  compiler->fixup_stack_head = fixup;
1080
1081
  // Clean strings_table as we are starting to parse a new rule.
1082
11.3k
  yr_hash_table_clean(compiler->strings_table, NULL);
1083
1084
11.3k
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1085
11.3k
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1086
1087
11.3k
  return ERROR_SUCCESS;
1088
11.3k
}
1089
1090
int yr_parser_reduce_rule_declaration_phase_2(
1091
    yyscan_t yyscanner,
1092
    YR_ARENA_REF* rule_ref)
1093
419
{
1094
419
  uint32_t max_strings_per_rule;
1095
419
  uint32_t strings_in_rule = 0;
1096
1097
419
  YR_FIXUP* fixup;
1098
419
  YR_STRING* string;
1099
419
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1100
1101
419
  yr_get_configuration_uint32(
1102
419
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1103
1104
419
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1105
1106
  // Show warning if the rule is generating too many atoms. The warning is
1107
  // shown if the number of atoms is greater than 20 times the maximum number
1108
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1109
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1110
1111
419
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1112
20
  {
1113
20
    yywarning(yyscanner, "rule is slowing down scanning");
1114
20
  }
1115
1116
419
  yr_rule_strings_foreach(rule, string)
1117
3.17k
  {
1118
    // Only the heading fragment in a chain of strings (the one with
1119
    // chained_to == NULL) must be referenced. All other fragments
1120
    // are never marked as referenced.
1121
    //
1122
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1123
    // strings must always be referenced.
1124
1125
3.17k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1126
203
        (STRING_IS_ANONYMOUS(string) ||
1127
195
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1128
21
    {
1129
21
      yr_compiler_set_error_extra_info(
1130
21
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1131
21
    }
1132
1133
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1134
    // that it will match anywhere.
1135
3.15k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1136
182
        STRING_IS_FIXED_OFFSET(string))
1137
119
    {
1138
119
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1139
119
    }
1140
1141
3.15k
    strings_in_rule++;
1142
1143
3.15k
    if (strings_in_rule > max_strings_per_rule)
1144
0
    {
1145
0
      yr_compiler_set_error_extra_info(
1146
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1147
0
    }
1148
3.15k
  }
1149
1150
398
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1151
398
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1152
1153
398
  fixup = compiler->fixup_stack_head;
1154
1155
398
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1156
398
      compiler->arena, &fixup->ref);
1157
1158
398
  int32_t jmp_offset = yr_arena_get_current_offset(
1159
398
                           compiler->arena, YR_CODE_SECTION) -
1160
398
                       fixup->ref.offset + 1;
1161
1162
398
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1163
1164
  // Remove fixup from the stack.
1165
398
  compiler->fixup_stack_head = fixup->next;
1166
398
  yr_free(fixup);
1167
1168
  // We have finished parsing the current rule set current_rule_idx to
1169
  // UINT32_MAX indicating that we are not currently parsing a rule.
1170
398
  compiler->current_rule_idx = UINT32_MAX;
1171
1172
398
  return ERROR_SUCCESS;
1173
398
}
1174
1175
int yr_parser_reduce_string_identifier(
1176
    yyscan_t yyscanner,
1177
    const char* identifier,
1178
    uint8_t instruction,
1179
    uint64_t at_offset)
1180
16.0k
{
1181
16.0k
  YR_STRING* string;
1182
16.0k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1183
1184
16.0k
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1185
13.3k
  {
1186
13.3k
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1187
13.2k
    {
1188
13.2k
      yr_parser_emit_with_arg(
1189
13.2k
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1190
1191
13.2k
      yr_parser_emit(yyscanner, instruction, NULL);
1192
1193
13.2k
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1194
13.2k
          compiler, compiler->current_rule_idx);
1195
1196
13.2k
      yr_rule_strings_foreach(current_rule, string)
1197
847k
      {
1198
847k
        if (instruction != OP_FOUND)
1199
847k
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1200
1201
847k
        if (instruction == OP_FOUND_AT)
1202
756
        {
1203
          // Avoid overwriting any previous fixed offset
1204
756
          if (string->fixed_offset == YR_UNDEFINED)
1205
262
            string->fixed_offset = at_offset;
1206
1207
          // If a previous fixed offset was different, disable
1208
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1209
          // have room to store a single fixed offset value
1210
756
          if (string->fixed_offset != at_offset)
1211
326
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1212
756
        }
1213
846k
        else
1214
846k
        {
1215
846k
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1216
846k
        }
1217
847k
      }
1218
13.2k
    }
1219
73
    else
1220
73
    {
1221
      // Anonymous strings not allowed outside of a loop
1222
73
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1223
73
    }
1224
13.3k
  }
1225
2.68k
  else
1226
2.68k
  {
1227
2.68k
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1228
1229
2.62k
    FAIL_ON_ERROR(
1230
2.62k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1231
1232
2.62k
    if (instruction != OP_FOUND)
1233
2.21k
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1234
1235
2.62k
    if (instruction == OP_FOUND_AT)
1236
768
    {
1237
      // Avoid overwriting any previous fixed offset
1238
1239
768
      if (string->fixed_offset == YR_UNDEFINED)
1240
172
        string->fixed_offset = at_offset;
1241
1242
      // If a previous fixed offset was different, disable
1243
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1244
      // have room to store a single fixed offset value
1245
1246
768
      if (string->fixed_offset == YR_UNDEFINED ||
1247
607
          string->fixed_offset != at_offset)
1248
366
      {
1249
366
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1250
366
      }
1251
768
    }
1252
1.85k
    else
1253
1.85k
    {
1254
1.85k
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1255
1.85k
    }
1256
1257
2.62k
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1258
1259
2.62k
    string->flags |= STRING_FLAGS_REFERENCED;
1260
2.62k
  }
1261
1262
15.9k
  return ERROR_SUCCESS;
1263
16.0k
}
1264
1265
int yr_parser_reduce_meta_declaration(
1266
    yyscan_t yyscanner,
1267
    int32_t type,
1268
    const char* identifier,
1269
    const char* string,
1270
    int64_t integer,
1271
    YR_ARENA_REF* meta_ref)
1272
2.52k
{
1273
2.52k
  YR_ARENA_REF ref;
1274
2.52k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1275
1276
2.52k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1277
2.52k
      compiler->arena,
1278
2.52k
      YR_METAS_TABLE,
1279
2.52k
      sizeof(YR_META),
1280
2.52k
      meta_ref,
1281
2.52k
      offsetof(YR_META, identifier),
1282
2.52k
      offsetof(YR_META, string),
1283
2.52k
      EOL));
1284
1285
2.52k
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1286
1287
2.52k
  meta->type = type;
1288
2.52k
  meta->integer = integer;
1289
1290
2.52k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1291
1292
2.52k
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1293
1294
2.52k
  if (string != NULL)
1295
264
  {
1296
264
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1297
1298
264
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1299
264
  }
1300
2.26k
  else
1301
2.26k
  {
1302
2.26k
    meta->string = NULL;
1303
2.26k
  }
1304
1305
2.52k
  compiler->current_meta_idx++;
1306
1307
2.52k
  return ERROR_SUCCESS;
1308
2.52k
}
1309
1310
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1311
2.61k
{
1312
2.61k
  if (module_name->length == 0)
1313
176
    return false;
1314
1315
2.43k
  if (strlen(module_name->c_string) != module_name->length)
1316
175
    return false;
1317
1318
2.25k
  return true;
1319
2.43k
}
1320
1321
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1322
2.61k
{
1323
2.61k
  int result;
1324
1325
2.61k
  YR_ARENA_REF ref;
1326
2.61k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1327
2.61k
  YR_OBJECT* module_structure;
1328
1329
2.61k
  if (!_yr_parser_valid_module_name(module_name))
1330
351
  {
1331
351
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1332
1333
351
    return ERROR_INVALID_MODULE_NAME;
1334
351
  }
1335
1336
2.25k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1337
2.25k
      compiler->arena,
1338
2.25k
      YR_NAMESPACES_TABLE,
1339
2.25k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1340
1341
2.25k
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1342
2.25k
      compiler->objects_table, module_name->c_string, ns->name);
1343
1344
  // if module already imported, do nothing
1345
1346
2.25k
  if (module_structure != NULL)
1347
1.87k
    return ERROR_SUCCESS;
1348
1349
383
  FAIL_ON_ERROR(yr_object_create(
1350
383
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1351
1352
383
  FAIL_ON_ERROR(yr_hash_table_add(
1353
383
      compiler->objects_table,
1354
383
      module_name->c_string,
1355
383
      ns->name,
1356
383
      module_structure));
1357
1358
383
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1359
1360
383
  if (result == ERROR_UNKNOWN_MODULE)
1361
170
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1362
1363
383
  if (result != ERROR_SUCCESS)
1364
170
    return result;
1365
1366
213
  FAIL_ON_ERROR(
1367
213
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1368
1369
213
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1370
213
      yyscanner,
1371
213
      OP_IMPORT,
1372
213
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1373
213
      NULL,
1374
213
      NULL));
1375
1376
213
  return ERROR_SUCCESS;
1377
213
}
1378
1379
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1380
22.5k
{
1381
22.5k
  int opcode = 0;
1382
1383
22.5k
  switch (expression_type)
1384
22.5k
  {
1385
14.5k
  case EXPRESSION_TYPE_INTEGER:
1386
14.5k
    opcode = OP_INT_BEGIN;
1387
14.5k
    break;
1388
4.79k
  case EXPRESSION_TYPE_FLOAT:
1389
4.79k
    opcode = OP_DBL_BEGIN;
1390
4.79k
    break;
1391
3.19k
  case EXPRESSION_TYPE_STRING:
1392
3.19k
    opcode = OP_STR_BEGIN;
1393
3.19k
    break;
1394
0
  default:
1395
0
    assert(false);
1396
22.5k
  }
1397
1398
22.5k
  if (op[0] == '<')
1399
2.08k
  {
1400
2.08k
    if (op[1] == '=')
1401
648
      opcode += _OP_LE;
1402
1.44k
    else
1403
1.44k
      opcode += _OP_LT;
1404
2.08k
  }
1405
20.4k
  else if (op[0] == '>')
1406
1.76k
  {
1407
1.76k
    if (op[1] == '=')
1408
563
      opcode += _OP_GE;
1409
1.20k
    else
1410
1.20k
      opcode += _OP_GT;
1411
1.76k
  }
1412
18.6k
  else if (op[1] == '=')
1413
1.60k
  {
1414
1.60k
    if (op[0] == '=')
1415
883
      opcode += _OP_EQ;
1416
720
    else
1417
720
      opcode += _OP_NEQ;
1418
1.60k
  }
1419
17.0k
  else if (op[0] == '+')
1420
4.24k
  {
1421
4.24k
    opcode += _OP_ADD;
1422
4.24k
  }
1423
12.8k
  else if (op[0] == '-')
1424
8.69k
  {
1425
8.69k
    opcode += _OP_SUB;
1426
8.69k
  }
1427
4.15k
  else if (op[0] == '*')
1428
2.49k
  {
1429
2.49k
    opcode += _OP_MUL;
1430
2.49k
  }
1431
1.66k
  else if (op[0] == '\\')
1432
1.66k
  {
1433
1.66k
    opcode += _OP_DIV;
1434
1.66k
  }
1435
1436
22.5k
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1437
22.5k
  {
1438
22.5k
    return opcode;
1439
22.5k
  }
1440
1441
17
  return OP_ERROR;
1442
22.5k
}
1443
1444
int yr_parser_reduce_operation(
1445
    yyscan_t yyscanner,
1446
    const char* op,
1447
    YR_EXPRESSION left_operand,
1448
    YR_EXPRESSION right_operand)
1449
22.7k
{
1450
22.7k
  int expression_type;
1451
1452
22.7k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1453
1454
22.7k
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1455
5.21k
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1456
19.3k
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1457
4.02k
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1458
19.3k
  {
1459
19.3k
    if (left_operand.type != right_operand.type)
1460
3.80k
    {
1461
      // One operand is double and the other is integer,
1462
      // cast the integer to double
1463
1464
3.80k
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1465
3.80k
          yyscanner,
1466
3.80k
          OP_INT_TO_DBL,
1467
3.80k
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1468
3.80k
          NULL,
1469
3.80k
          NULL));
1470
3.80k
    }
1471
1472
19.3k
    expression_type = EXPRESSION_TYPE_FLOAT;
1473
1474
19.3k
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1475
17.5k
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1476
14.5k
    {
1477
14.5k
      expression_type = EXPRESSION_TYPE_INTEGER;
1478
14.5k
    }
1479
1480
19.3k
    FAIL_ON_ERROR(yr_parser_emit(
1481
19.3k
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1482
19.3k
  }
1483
3.42k
  else if (
1484
3.42k
      left_operand.type == EXPRESSION_TYPE_STRING &&
1485
3.26k
      right_operand.type == EXPRESSION_TYPE_STRING)
1486
3.19k
  {
1487
3.19k
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1488
1489
3.19k
    if (opcode != OP_ERROR)
1490
3.17k
    {
1491
3.17k
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1492
3.17k
    }
1493
17
    else
1494
17
    {
1495
17
      yr_compiler_set_error_extra_info_fmt(
1496
17
          compiler, "strings don't support \"%s\" operation", op);
1497
1498
17
      return ERROR_WRONG_TYPE;
1499
17
    }
1500
3.19k
  }
1501
225
  else
1502
225
  {
1503
225
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1504
1505
225
    return ERROR_WRONG_TYPE;
1506
225
  }
1507
1508
22.5k
  return ERROR_SUCCESS;
1509
22.7k
}
1510
1511
int yr_parser_mark_nonfast(
1512
   yyscan_t yyscanner,
1513
   YR_STRING_SET string_set
1514
4.95k
) {
1515
4.95k
 YR_COMPILER* compiler = yyget_extra(yyscanner);
1516
1517
4.95k
 YR_STRING_SET_ELEMENT* head = string_set.head;
1518
822k
  while (head != NULL) {
1519
817k
    YR_STRING* string_ptr = yr_arena_ref_to_ptr(compiler->arena, &head->element);
1520
817k
    string_ptr->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1521
817k
    head = head->next;
1522
817k
  }
1523
4.95k
  return ERROR_SUCCESS;
1524
4.95k
}