Coverage Report

Created: 2025-07-11 06:08

/src/yara/libyara/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
47
#define todigit(x)                                        \
48
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
49
                             : ((uint8_t) (x - '0'))
50
51
int yr_parser_emit(
52
    yyscan_t yyscanner,
53
    uint8_t instruction,
54
    YR_ARENA_REF* instruction_ref)
55
115k
{
56
115k
  return yr_arena_write_data(
57
115k
      yyget_extra(yyscanner)->arena,
58
115k
      YR_CODE_SECTION,
59
115k
      &instruction,
60
115k
      sizeof(uint8_t),
61
115k
      instruction_ref);
62
115k
}
63
64
int yr_parser_emit_with_arg_double(
65
    yyscan_t yyscanner,
66
    uint8_t instruction,
67
    double argument,
68
    YR_ARENA_REF* instruction_ref,
69
    YR_ARENA_REF* argument_ref)
70
4.40k
{
71
4.40k
  int result = yr_arena_write_data(
72
4.40k
      yyget_extra(yyscanner)->arena,
73
4.40k
      YR_CODE_SECTION,
74
4.40k
      &instruction,
75
4.40k
      sizeof(uint8_t),
76
4.40k
      instruction_ref);
77
78
4.40k
  if (result == ERROR_SUCCESS)
79
4.40k
    result = yr_arena_write_data(
80
4.40k
        yyget_extra(yyscanner)->arena,
81
4.40k
        YR_CODE_SECTION,
82
4.40k
        &argument,
83
4.40k
        sizeof(double),
84
4.40k
        argument_ref);
85
86
4.40k
  return result;
87
4.40k
}
88
89
int yr_parser_emit_with_arg_int32(
90
    yyscan_t yyscanner,
91
    uint8_t instruction,
92
    int32_t argument,
93
    YR_ARENA_REF* instruction_ref,
94
    YR_ARENA_REF* argument_ref)
95
20.6k
{
96
20.6k
  int result = yr_arena_write_data(
97
20.6k
      yyget_extra(yyscanner)->arena,
98
20.6k
      YR_CODE_SECTION,
99
20.6k
      &instruction,
100
20.6k
      sizeof(uint8_t),
101
20.6k
      instruction_ref);
102
103
20.6k
  if (result == ERROR_SUCCESS)
104
20.6k
    result = yr_arena_write_data(
105
20.6k
        yyget_extra(yyscanner)->arena,
106
20.6k
        YR_CODE_SECTION,
107
20.6k
        &argument,
108
20.6k
        sizeof(int32_t),
109
20.6k
        argument_ref);
110
111
20.6k
  return result;
112
20.6k
}
113
114
int yr_parser_emit_with_arg(
115
    yyscan_t yyscanner,
116
    uint8_t instruction,
117
    int64_t argument,
118
    YR_ARENA_REF* instruction_ref,
119
    YR_ARENA_REF* argument_ref)
120
36.1k
{
121
36.1k
  int result = yr_arena_write_data(
122
36.1k
      yyget_extra(yyscanner)->arena,
123
36.1k
      YR_CODE_SECTION,
124
36.1k
      &instruction,
125
36.1k
      sizeof(uint8_t),
126
36.1k
      instruction_ref);
127
128
36.1k
  if (result == ERROR_SUCCESS)
129
36.1k
    result = yr_arena_write_data(
130
36.1k
        yyget_extra(yyscanner)->arena,
131
36.1k
        YR_CODE_SECTION,
132
36.1k
        &argument,
133
36.1k
        sizeof(int64_t),
134
36.1k
        argument_ref);
135
136
36.1k
  return result;
137
36.1k
}
138
139
int yr_parser_emit_with_arg_reloc(
140
    yyscan_t yyscanner,
141
    uint8_t instruction,
142
    void* argument,
143
    YR_ARENA_REF* instruction_ref,
144
    YR_ARENA_REF* argument_ref)
145
647k
{
146
647k
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
147
148
647k
  DECLARE_REFERENCE(void*, ptr) arg;
149
150
647k
  memset(&arg, 0, sizeof(arg));
151
647k
  arg.ptr = argument;
152
153
647k
  int result = yr_arena_write_data(
154
647k
      yyget_extra(yyscanner)->arena,
155
647k
      YR_CODE_SECTION,
156
647k
      &instruction,
157
647k
      sizeof(uint8_t),
158
647k
      instruction_ref);
159
160
647k
  if (result == ERROR_SUCCESS)
161
647k
    result = yr_arena_write_data(
162
647k
        yyget_extra(yyscanner)->arena,
163
647k
        YR_CODE_SECTION,
164
647k
        &arg,
165
647k
        sizeof(arg),
166
647k
        &ref);
167
168
647k
  if (result == ERROR_SUCCESS)
169
647k
    result = yr_arena_make_ptr_relocatable(
170
647k
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
171
172
647k
  if (argument_ref != NULL)
173
0
    *argument_ref = ref;
174
175
647k
  return result;
176
647k
}
177
178
int yr_parser_emit_pushes_for_strings(
179
    yyscan_t yyscanner,
180
    const char* identifier,
181
    int* count)
182
7.97k
{
183
7.97k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
184
185
7.97k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
186
7.97k
      compiler, compiler->current_rule_idx);
187
188
7.97k
  YR_STRING* string;
189
190
7.97k
  const char* string_identifier;
191
7.97k
  const char* target_identifier;
192
193
7.97k
  int matching = 0;
194
195
7.97k
  yr_rule_strings_foreach(current_rule, string)
196
643k
  {
197
    // Don't generate pushes for strings chained to another one, we are
198
    // only interested in non-chained strings or the head of the chain.
199
200
643k
    if (string->chained_to == NULL)
201
640k
    {
202
640k
      string_identifier = string->identifier;
203
640k
      target_identifier = identifier;
204
205
1.28M
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
206
1.28M
             *target_identifier == *string_identifier)
207
640k
      {
208
640k
        target_identifier++;
209
640k
        string_identifier++;
210
640k
      }
211
212
640k
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
213
640k
          *target_identifier == '*')
214
636k
      {
215
636k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
216
217
636k
        string->flags |= STRING_FLAGS_REFERENCED;
218
636k
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
219
636k
        string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
220
636k
        matching++;
221
636k
      }
222
640k
    }
223
643k
  }
224
225
7.97k
  if (count != NULL)
226
7.97k
  {
227
7.97k
    *count = matching;
228
7.97k
  }
229
230
7.97k
  if (matching == 0)
231
52
  {
232
52
    yr_compiler_set_error_extra_info(
233
52
        compiler, identifier) return ERROR_UNDEFINED_STRING;
234
52
  }
235
236
7.92k
  return ERROR_SUCCESS;
237
7.97k
}
238
239
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
240
// prefix.
241
int yr_parser_emit_pushes_for_rules(
242
    yyscan_t yyscanner,
243
    const char* prefix,
244
    int* count)
245
952
{
246
952
  YR_COMPILER* compiler = yyget_extra(yyscanner);
247
248
  // Make sure the compiler is parsing a rule
249
952
  assert(compiler->current_rule_idx != UINT32_MAX);
250
251
952
  YR_RULE* rule;
252
952
  int matching = 0;
253
254
952
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
255
952
      compiler->arena,
256
952
      YR_NAMESPACES_TABLE,
257
952
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
258
259
  // Can't use yr_rules_foreach here as that requires the rules to have been
260
  // finalized (inserting a NULL rule at the end). This is done when
261
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
262
  // into the current position in the code arena. Obviously we aren't done
263
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
264
  // this I'm manually walking all the currently compiled rules (up to the
265
  // current rule index) and comparing identifiers to see if it is one we should
266
  // use.
267
  //
268
  // Further, we have to get compiler->current_rule_idx before we start because
269
  // if we emit an OP_PUSH_RULE
270
952
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
271
272
3.40k
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
273
2.45k
  {
274
    // Is rule->identifier prefixed by prefix?
275
2.45k
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
276
921
    {
277
921
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
278
921
          compiler->rules_table, rule->identifier, ns->name);
279
280
921
      if (rule_idx != UINT32_MAX)
281
921
      {
282
921
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
283
921
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
284
921
        matching++;
285
921
      }
286
921
    }
287
288
2.45k
    rule++;
289
2.45k
  }
290
291
952
  if (count != NULL)
292
952
  {
293
952
    *count = matching;
294
952
  }
295
296
952
  if (matching == 0)
297
89
  {
298
89
    yr_compiler_set_error_extra_info(compiler, prefix);
299
89
    return ERROR_UNDEFINED_IDENTIFIER;
300
89
  }
301
302
863
  return ERROR_SUCCESS;
303
952
}
304
305
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
306
63.6k
{
307
63.6k
  uint8_t opcode[9];
308
63.6k
  int opcode_len = 1;
309
310
63.6k
  if (argument == YR_UNDEFINED)
311
9.33k
  {
312
9.33k
    opcode[0] = OP_PUSH_U;
313
9.33k
  }
314
54.3k
  else if (argument <= 0xff)
315
44.3k
  {
316
44.3k
    opcode[0] = OP_PUSH_8;
317
44.3k
    opcode[1] = (uint8_t) argument;
318
44.3k
    opcode_len += sizeof(uint8_t);
319
44.3k
  }
320
10.0k
  else if (argument <= 0xffff)
321
3.95k
  {
322
3.95k
    opcode[0] = OP_PUSH_16;
323
3.95k
    uint16_t u = (uint16_t) argument;
324
3.95k
    memcpy(opcode + 1, &u, sizeof(uint16_t));
325
3.95k
    opcode_len += sizeof(uint16_t);
326
3.95k
  }
327
6.05k
  else if (argument <= 0xffffffff)
328
2.53k
  {
329
2.53k
    opcode[0] = OP_PUSH_32;
330
2.53k
    uint32_t u = (uint32_t) argument;
331
2.53k
    memcpy(opcode + 1, &u, sizeof(uint32_t));
332
2.53k
    opcode_len += sizeof(uint32_t);
333
2.53k
  }
334
3.52k
  else
335
3.52k
  {
336
3.52k
    opcode[0] = OP_PUSH;
337
3.52k
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
338
3.52k
    opcode_len += sizeof(uint64_t);
339
3.52k
  }
340
341
63.6k
  return yr_arena_write_data(
342
63.6k
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
343
63.6k
}
344
345
int yr_parser_check_types(
346
    YR_COMPILER* compiler,
347
    YR_OBJECT_FUNCTION* function,
348
    const char* actual_args_fmt)
349
320
{
350
320
  int i;
351
352
639
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
353
639
  {
354
639
    if (function->prototypes[i].arguments_fmt == NULL)
355
4
      break;
356
357
635
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
358
316
      return ERROR_SUCCESS;
359
635
  }
360
361
4
  yr_compiler_set_error_extra_info(compiler, function->identifier)
362
363
4
      return ERROR_WRONG_ARGUMENTS;
364
320
}
365
366
int yr_parser_lookup_string(
367
    yyscan_t yyscanner,
368
    const char* identifier,
369
    YR_STRING** string)
370
2.18k
{
371
2.18k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
372
373
2.18k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
374
2.18k
      compiler, compiler->current_rule_idx);
375
376
2.18k
  yr_rule_strings_foreach(current_rule, *string)
377
3.40k
  {
378
    // If some string $a gets fragmented into multiple chained
379
    // strings, all those fragments have the same $a identifier
380
    // but we are interested in the heading fragment, which is
381
    // that with chained_to == NULL
382
383
3.40k
    if ((*string)->chained_to == NULL &&
384
3.40k
        strcmp((*string)->identifier, identifier) == 0)
385
2.12k
    {
386
2.12k
      return ERROR_SUCCESS;
387
2.12k
    }
388
3.40k
  }
389
390
55
  yr_compiler_set_error_extra_info(compiler, identifier)
391
392
55
      * string = NULL;
393
394
55
  return ERROR_UNDEFINED_STRING;
395
2.18k
}
396
397
////////////////////////////////////////////////////////////////////////////////
398
// Searches for a variable with the given identifier in the scope of the current
399
// "for" loop. In case of nested "for" loops the identifier is searched starting
400
// at the top-level loop and going down thorough the nested loops until the
401
// current one. This is ok because inner loops can not re-define an identifier
402
// already defined by an outer loop.
403
//
404
// If the variable is found, the return value is the position that the variable
405
// occupies among all the currently defined variables. If the variable doesn't
406
// exist the return value is -1.
407
//
408
// The function can receive a pointer to a YR_EXPRESSION that will populated
409
// with information about the variable if found. This pointer can be NULL if
410
// the caller is not interested in getting that information.
411
//
412
int yr_parser_lookup_loop_variable(
413
    yyscan_t yyscanner,
414
    const char* identifier,
415
    YR_EXPRESSION* expr)
416
12.3k
{
417
12.3k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
418
12.3k
  int i, j;
419
12.3k
  int var_offset = 0;
420
421
17.4k
  for (i = 0; i <= compiler->loop_index; i++)
422
8.50k
  {
423
8.50k
    var_offset += compiler->loop[i].vars_internal_count;
424
425
14.3k
    for (j = 0; j < compiler->loop[i].vars_count; j++)
426
9.23k
    {
427
9.23k
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
428
9.23k
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
429
3.37k
      {
430
3.37k
        if (expr != NULL)
431
3.34k
          *expr = compiler->loop[i].vars[j];
432
433
3.37k
        return var_offset + j;
434
3.37k
      }
435
9.23k
    }
436
437
5.13k
    var_offset += compiler->loop[i].vars_count;
438
5.13k
  }
439
440
8.98k
  return -1;
441
12.3k
}
442
443
static int _yr_parser_write_string(
444
    const char* identifier,
445
    YR_MODIFIER modifier,
446
    YR_COMPILER* compiler,
447
    SIZED_STRING* str,
448
    RE_AST* re_ast,
449
    YR_ARENA_REF* string_ref,
450
    int* min_atom_quality,
451
    int* num_atom)
452
33.8k
{
453
33.8k
  SIZED_STRING* literal_string;
454
33.8k
  YR_ATOM_LIST_ITEM* atom;
455
33.8k
  YR_ATOM_LIST_ITEM* atom_list = NULL;
456
457
33.8k
  int c, result;
458
33.8k
  int max_string_len;
459
33.8k
  bool free_literal = false;
460
461
33.8k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
462
33.8k
      compiler->arena,
463
33.8k
      YR_STRINGS_TABLE,
464
33.8k
      sizeof(YR_STRING),
465
33.8k
      string_ref,
466
33.8k
      offsetof(YR_STRING, identifier),
467
33.8k
      offsetof(YR_STRING, string),
468
33.8k
      offsetof(YR_STRING, chained_to),
469
33.8k
      EOL));
470
471
33.8k
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
472
33.8k
      compiler->arena, string_ref);
473
474
33.8k
  YR_ARENA_REF ref;
475
476
33.8k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
477
478
33.8k
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
479
33.8k
  string->rule_idx = compiler->current_rule_idx;
480
33.8k
  string->idx = compiler->current_string_idx;
481
33.8k
  string->fixed_offset = YR_UNDEFINED;
482
483
33.8k
  compiler->current_string_idx++;
484
485
33.8k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
486
33.8k
      modifier.flags & STRING_FLAGS_REGEXP ||
487
33.8k
      modifier.flags & STRING_FLAGS_BASE64 ||
488
33.8k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
489
31.8k
  {
490
31.8k
    literal_string = yr_re_ast_extract_literal(re_ast);
491
492
31.8k
    if (literal_string != NULL)
493
23.9k
      free_literal = true;
494
31.8k
  }
495
1.94k
  else
496
1.94k
  {
497
1.94k
    literal_string = str;
498
1.94k
  }
499
500
33.8k
  if (literal_string != NULL)
501
25.9k
  {
502
25.9k
    modifier.flags |= STRING_FLAGS_LITERAL;
503
504
25.9k
    result = _yr_compiler_store_data(
505
25.9k
        compiler,
506
25.9k
        literal_string->c_string,
507
25.9k
        literal_string->length + 1,  // +1 to include terminating NULL
508
25.9k
        &ref);
509
510
25.9k
    if (result != ERROR_SUCCESS)
511
0
      goto cleanup;
512
513
25.9k
    string->length = (uint32_t) literal_string->length;
514
25.9k
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
515
516
25.9k
    if (modifier.flags & STRING_FLAGS_WIDE)
517
1.56k
      max_string_len = string->length * 2;
518
24.3k
    else
519
24.3k
      max_string_len = string->length;
520
521
25.9k
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
522
19.5k
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
523
524
25.9k
    result = yr_atoms_extract_from_string(
525
25.9k
        &compiler->atoms_config,
526
25.9k
        (uint8_t*) literal_string->c_string,
527
25.9k
        (int32_t) literal_string->length,
528
25.9k
        modifier,
529
25.9k
        &atom_list,
530
25.9k
        min_atom_quality);
531
532
25.9k
    if (result != ERROR_SUCCESS)
533
0
      goto cleanup;
534
25.9k
  }
535
7.86k
  else
536
7.86k
  {
537
    // Non-literal strings can't be marked as fixed offset because once we
538
    // find a string atom in the scanned data we don't know the offset where
539
    // the string should start, as the non-literal strings can contain
540
    // variable-length portions.
541
7.86k
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
542
543
    // Save the position where the RE forward code starts for later reference.
544
7.86k
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
545
7.86k
        compiler->arena, YR_RE_CODE_SECTION);
546
547
    // Emit forwards code
548
7.86k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
549
550
7.86k
    if (result != ERROR_SUCCESS)
551
123
      goto cleanup;
552
553
    // Emit backwards code
554
7.74k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
555
556
7.74k
    if (result != ERROR_SUCCESS)
557
7
      goto cleanup;
558
559
    // Extract atoms from the regular expression.
560
7.73k
    result = yr_atoms_extract_from_re(
561
7.73k
        &compiler->atoms_config,
562
7.73k
        re_ast,
563
7.73k
        modifier,
564
7.73k
        &atom_list,
565
7.73k
        min_atom_quality);
566
567
7.73k
    if (result != ERROR_SUCCESS)
568
0
      goto cleanup;
569
570
    // If no atom was extracted let's add a zero-length atom.
571
7.73k
    if (atom_list == NULL)
572
3.27k
    {
573
3.27k
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
574
575
3.27k
      if (atom_list == NULL)
576
0
      {
577
0
        result = ERROR_INSUFFICIENT_MEMORY;
578
0
        goto cleanup;
579
0
      }
580
581
3.27k
      atom_list->atom.length = 0;
582
3.27k
      atom_list->backtrack = 0;
583
3.27k
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
584
3.27k
      atom_list->next = NULL;
585
586
3.27k
      yr_arena_ptr_to_ref(
587
3.27k
          compiler->arena,
588
3.27k
          yr_arena_get_ptr(
589
3.27k
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
590
3.27k
          &(atom_list->forward_code_ref));
591
3.27k
    }
592
7.73k
  }
593
594
33.6k
  string->flags = modifier.flags;
595
596
  // Add the string to Aho-Corasick automaton.
597
33.6k
  result = yr_ac_add_string(
598
33.6k
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
599
600
33.6k
  if (result != ERROR_SUCCESS)
601
0
    goto cleanup;
602
603
33.6k
  atom = atom_list;
604
33.6k
  c = 0;
605
606
3.61M
  while (atom != NULL)
607
3.58M
  {
608
3.58M
    atom = atom->next;
609
3.58M
    c++;
610
3.58M
  }
611
612
33.6k
  (*num_atom) += c;
613
614
33.8k
cleanup:
615
33.8k
  if (free_literal)
616
23.9k
    yr_free(literal_string);
617
618
33.8k
  if (atom_list != NULL)
619
33.6k
    yr_atoms_list_destroy(atom_list);
620
621
33.8k
  return result;
622
33.6k
}
623
624
static int _yr_parser_check_string_modifiers(
625
    yyscan_t yyscanner,
626
    YR_MODIFIER modifier)
627
28.6k
{
628
28.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
629
630
  // xor and nocase together is not implemented.
631
28.6k
  if (modifier.flags & STRING_FLAGS_XOR &&
632
28.6k
      modifier.flags & STRING_FLAGS_NO_CASE)
633
4
  {
634
4
    yr_compiler_set_error_extra_info(
635
4
        compiler, "invalid modifier combination: xor nocase");
636
4
    return ERROR_INVALID_MODIFIER;
637
4
  }
638
639
  // base64 and nocase together is not implemented.
640
28.6k
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
641
28.6k
      (modifier.flags & STRING_FLAGS_BASE64 ||
642
5.67k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
643
7
  {
644
7
    yr_compiler_set_error_extra_info(
645
7
        compiler,
646
7
        modifier.flags & STRING_FLAGS_BASE64
647
7
            ? "invalid modifier combination: base64 nocase"
648
7
            : "invalid modifier combination: base64wide nocase");
649
7
    return ERROR_INVALID_MODIFIER;
650
7
  }
651
652
  // base64 and fullword together is not implemented.
653
28.6k
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
654
28.6k
      (modifier.flags & STRING_FLAGS_BASE64 ||
655
144
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
656
0
  {
657
0
    yr_compiler_set_error_extra_info(
658
0
        compiler,
659
0
        modifier.flags & STRING_FLAGS_BASE64
660
0
            ? "invalid modifier combination: base64 fullword"
661
0
            : "invalid modifier combination: base64wide fullword");
662
0
    return ERROR_INVALID_MODIFIER;
663
0
  }
664
665
  // base64 and xor together is not implemented.
666
28.6k
  if (modifier.flags & STRING_FLAGS_XOR &&
667
28.6k
      (modifier.flags & STRING_FLAGS_BASE64 ||
668
707
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
669
9
  {
670
9
    yr_compiler_set_error_extra_info(
671
9
        compiler,
672
9
        modifier.flags & STRING_FLAGS_BASE64
673
9
            ? "invalid modifier combination: base64 xor"
674
9
            : "invalid modifier combination: base64wide xor");
675
9
    return ERROR_INVALID_MODIFIER;
676
9
  }
677
678
28.6k
  return ERROR_SUCCESS;
679
28.6k
}
680
681
int yr_parser_reduce_string_declaration(
682
    yyscan_t yyscanner,
683
    YR_MODIFIER modifier,
684
    const char* identifier,
685
    SIZED_STRING* str,
686
    YR_ARENA_REF* string_ref)
687
28.6k
{
688
28.6k
  int result = ERROR_SUCCESS;
689
28.6k
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
690
28.6k
  int atom_quality;
691
692
28.6k
  char message[512];
693
694
28.6k
  int32_t min_gap = 0;
695
28.6k
  int32_t max_gap = 0;
696
697
28.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
698
699
28.6k
  RE_AST* re_ast = NULL;
700
28.6k
  RE_AST* remainder_re_ast = NULL;
701
28.6k
  RE_ERROR re_error;
702
703
28.6k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
704
28.6k
      compiler, compiler->current_rule_idx);
705
706
  // Determine if a string with the same identifier was already defined
707
  // by searching for the identifier in strings_table.
708
28.6k
  uint32_t string_idx = yr_hash_table_lookup_uint32(
709
28.6k
      compiler->strings_table, identifier, NULL);
710
711
  // The string was already defined, return an error.
712
28.6k
  if (string_idx != UINT32_MAX)
713
14
  {
714
14
    yr_compiler_set_error_extra_info(compiler, identifier);
715
14
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
716
14
  }
717
718
  // Empty strings are not allowed.
719
28.6k
  if (str->length == 0)
720
0
  {
721
0
    yr_compiler_set_error_extra_info(compiler, identifier);
722
0
    return ERROR_EMPTY_STRING;
723
0
  }
724
725
28.6k
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
726
5.35k
    modifier.flags |= STRING_FLAGS_NO_CASE;
727
728
28.6k
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
729
324
    modifier.flags |= STRING_FLAGS_DOT_ALL;
730
731
  // Hex strings are always handled as DOT_ALL regexps.
732
28.6k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
733
1.16k
    modifier.flags |= STRING_FLAGS_DOT_ALL;
734
735
28.6k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
736
28.6k
      !(modifier.flags & STRING_FLAGS_BASE64 ||
737
27.5k
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
738
26.3k
  {
739
26.3k
    modifier.flags |= STRING_FLAGS_ASCII;
740
26.3k
  }
741
742
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
743
  // a single match for the string is enough. This is true in
744
  // most cases, except when the string count (#) and string offset (@)
745
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
746
  // initially, and unmarked later if required.
747
28.6k
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
748
749
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
750
  // need to be searched all over the file because the user is using the
751
  // "at" operator. The string must be searched at a fixed offset in the
752
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
753
  // and unmarked later if required.
754
28.6k
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
755
756
  // If string identifier is $ this is an anonymous string, if not add the
757
  // identifier to strings_table.
758
28.6k
  if (strcmp(identifier, "$") == 0)
759
27.0k
  {
760
27.0k
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
761
27.0k
  }
762
1.63k
  else
763
1.63k
  {
764
1.63k
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
765
1.63k
        compiler->strings_table,
766
1.63k
        identifier,
767
1.63k
        NULL,
768
1.63k
        compiler->current_string_idx));
769
1.63k
  }
770
771
  // Make sure that the the string does not have an invalid combination of
772
  // modifiers.
773
28.6k
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
774
775
28.6k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
776
28.6k
      modifier.flags & STRING_FLAGS_REGEXP ||
777
28.6k
      modifier.flags & STRING_FLAGS_BASE64 ||
778
28.6k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
779
26.7k
  {
780
26.7k
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
781
1.16k
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
782
25.5k
    else if (modifier.flags & STRING_FLAGS_REGEXP)
783
24.1k
    {
784
24.1k
      int flags = RE_PARSER_FLAG_NONE;
785
24.1k
      if (compiler->strict_escape)
786
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
787
24.1k
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
788
24.1k
    }
789
1.39k
    else
790
1.39k
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
791
792
26.7k
    if (result != ERROR_SUCCESS)
793
754
    {
794
754
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
795
0
      {
796
0
        yywarning(yyscanner, "unknown escape sequence");
797
0
      }
798
754
      else
799
754
      {
800
754
        snprintf(
801
754
            message,
802
754
            sizeof(message),
803
754
            "invalid %s \"%s\": %s",
804
754
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
805
754
                                                        : "regular expression",
806
754
            identifier,
807
754
            re_error.message);
808
809
754
        yr_compiler_set_error_extra_info(compiler, message);
810
754
        goto _exit;
811
754
      }
812
754
    }
813
814
25.9k
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
815
774
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
816
817
25.9k
    if (re_ast->flags & RE_FLAGS_GREEDY)
818
978
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
819
820
    // Regular expressions in the strings section can't mix greedy and
821
    // ungreedy quantifiers like .* and .*?. That's because these regular
822
    // expressions can be matched forwards and/or backwards depending on the
823
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
824
    // be able to properly calculate the length of the match.
825
826
25.9k
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
827
25.9k
        (re_ast->flags & RE_FLAGS_UNGREEDY))
828
6
    {
829
6
      result = ERROR_INVALID_REGULAR_EXPRESSION;
830
831
6
      yr_compiler_set_error_extra_info(
832
6
          compiler,
833
6
          "greedy and ungreedy quantifiers can't be mixed in a regular "
834
6
          "expression");
835
836
6
      goto _exit;
837
6
    }
838
839
25.9k
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
840
1.43k
    {
841
1.43k
      yywarning(
842
1.43k
          yyscanner,
843
1.43k
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
844
1.43k
          "with a reasonable value for N",
845
1.43k
          identifier);
846
1.43k
    }
847
848
25.9k
    if (compiler->re_ast_callback != NULL)
849
0
    {
850
0
      compiler->re_ast_callback(
851
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
852
0
    }
853
854
25.9k
    *string_ref = YR_ARENA_NULL_REF;
855
856
57.6k
    while (re_ast != NULL)
857
31.8k
    {
858
31.8k
      YR_ARENA_REF ref;
859
860
31.8k
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
861
862
31.8k
      int32_t prev_min_gap = min_gap;
863
31.8k
      int32_t prev_max_gap = max_gap;
864
865
31.8k
      result = yr_re_ast_split_at_chaining_point(
866
31.8k
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
867
868
31.8k
      if (result != ERROR_SUCCESS)
869
0
        goto _exit;
870
871
31.8k
      result = _yr_parser_write_string(
872
31.8k
          identifier,
873
31.8k
          modifier,
874
31.8k
          compiler,
875
31.8k
          NULL,
876
31.8k
          re_ast,
877
31.8k
          &ref,
878
31.8k
          &atom_quality,
879
31.8k
          &current_rule->num_atoms);
880
881
31.8k
      if (result != ERROR_SUCCESS)
882
130
        goto _exit;
883
884
31.7k
      if (atom_quality < min_atom_quality)
885
24.1k
        min_atom_quality = atom_quality;
886
887
31.7k
      if (YR_ARENA_IS_NULL_REF(*string_ref))
888
25.8k
      {
889
        // This is the first string in the chain, the string reference
890
        // returned by this function must point to this string.
891
25.8k
        *string_ref = ref;
892
25.8k
      }
893
5.91k
      else
894
5.91k
      {
895
        // This is not the first string in the chain, set the appropriate
896
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
897
        // fields.
898
5.91k
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
899
5.91k
            compiler->arena,
900
5.91k
            YR_STRINGS_TABLE,
901
5.91k
            prev_string_idx * sizeof(YR_STRING));
902
903
5.91k
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
904
5.91k
            compiler->arena, &ref);
905
906
5.91k
        new_string->chained_to = prev_string;
907
5.91k
        new_string->chain_gap_min = prev_min_gap;
908
5.91k
        new_string->chain_gap_max = prev_max_gap;
909
910
        // A string chained to another one can't have a fixed offset, only the
911
        // head of the string chain can have a fixed offset.
912
5.91k
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
913
914
        // There is a previous string, but that string wasn't marked as part
915
        // of a chain because we can't do that until knowing there will be
916
        // another string, let's flag it now the we know.
917
5.91k
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
918
919
        // There is a previous string, so this string is part of a chain, but
920
        // there will be no more strings because there are no more AST to
921
        // split, which means that this is the chain's tail.
922
5.91k
        if (remainder_re_ast == NULL)
923
1.08k
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
924
1.08k
                               STRING_FLAGS_CHAIN_TAIL;
925
5.91k
      }
926
927
31.7k
      yr_re_ast_destroy(re_ast);
928
31.7k
      re_ast = remainder_re_ast;
929
31.7k
    }
930
25.9k
  }
931
1.94k
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
932
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
933
1.94k
  {
934
1.94k
    result = _yr_parser_write_string(
935
1.94k
        identifier,
936
1.94k
        modifier,
937
1.94k
        compiler,
938
1.94k
        str,
939
1.94k
        NULL,
940
1.94k
        string_ref,
941
1.94k
        &min_atom_quality,
942
1.94k
        &current_rule->num_atoms);
943
944
1.94k
    if (result != ERROR_SUCCESS)
945
0
      goto _exit;
946
1.94k
  }
947
948
27.7k
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
949
7.17k
  {
950
7.17k
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
951
7.17k
  }
952
953
28.6k
_exit:
954
955
28.6k
  if (re_ast != NULL)
956
464
    yr_re_ast_destroy(re_ast);
957
958
28.6k
  if (remainder_re_ast != NULL)
959
2
    yr_re_ast_destroy(remainder_re_ast);
960
961
28.6k
  return result;
962
27.7k
}
963
964
static int wildcard_iterator(
965
    void* prefix,
966
    size_t prefix_len,
967
    void* _value,
968
    void* data)
969
3.40k
{
970
3.40k
  const char* identifier = (const char*) data;
971
972
  // If the identifier is prefixed by prefix, then it matches the wildcard.
973
3.40k
  if (!strncmp(prefix, identifier, prefix_len))
974
160
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
975
976
3.24k
  return ERROR_SUCCESS;
977
3.40k
}
978
979
int yr_parser_reduce_rule_declaration_phase_1(
980
    yyscan_t yyscanner,
981
    int32_t flags,
982
    const char* identifier,
983
    YR_ARENA_REF* rule_ref)
984
26.2k
{
985
26.2k
  int result;
986
26.2k
  YR_FIXUP* fixup;
987
26.2k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
988
989
26.2k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
990
26.2k
      compiler->arena,
991
26.2k
      YR_NAMESPACES_TABLE,
992
26.2k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
993
994
26.2k
  if (yr_hash_table_lookup_uint32(
995
26.2k
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
996
26.2k
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
997
15.2k
  {
998
    // A rule or variable with the same identifier already exists, return the
999
    // appropriate error.
1000
1001
15.2k
    yr_compiler_set_error_extra_info(compiler, identifier);
1002
15.2k
    return ERROR_DUPLICATED_IDENTIFIER;
1003
15.2k
  }
1004
1005
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1006
  // any of them are a prefix of the identifier being declared. If so, return
1007
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1008
10.9k
  result = yr_hash_table_iterate(
1009
10.9k
      compiler->wildcard_identifiers_table,
1010
10.9k
      ns->name,
1011
10.9k
      wildcard_iterator,
1012
10.9k
      (void*) identifier);
1013
1014
10.9k
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1015
160
  {
1016
    // This rule matches an existing wildcard rule set.
1017
160
    yr_compiler_set_error_extra_info(compiler, identifier);
1018
160
  }
1019
1020
10.9k
  FAIL_ON_ERROR(result);
1021
1022
10.8k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1023
10.8k
      compiler->arena,
1024
10.8k
      YR_RULES_TABLE,
1025
10.8k
      sizeof(YR_RULE),
1026
10.8k
      rule_ref,
1027
10.8k
      offsetof(YR_RULE, identifier),
1028
10.8k
      offsetof(YR_RULE, tags),
1029
10.8k
      offsetof(YR_RULE, strings),
1030
10.8k
      offsetof(YR_RULE, metas),
1031
10.8k
      offsetof(YR_RULE, ns),
1032
10.8k
      EOL));
1033
1034
10.8k
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1035
1036
10.8k
  YR_ARENA_REF ref;
1037
1038
10.8k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1039
1040
10.8k
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1041
10.8k
  rule->flags = flags;
1042
10.8k
  rule->ns = ns;
1043
10.8k
  rule->num_atoms = 0;
1044
1045
10.8k
  YR_ARENA_REF jmp_offset_ref;
1046
1047
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1048
10.8k
  compiler->current_rule_idx = compiler->next_rule_idx;
1049
10.8k
  compiler->next_rule_idx++;
1050
1051
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1052
  // disabled it skips over the rule's code and go straight to the next rule's
1053
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1054
  // is set to 0 as we don't know the jump target yet. When we finish
1055
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1056
  // the jump offset is set to its final value.
1057
1058
10.8k
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1059
10.8k
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1060
1061
10.8k
  FAIL_ON_ERROR(yr_arena_write_data(
1062
10.8k
      compiler->arena,
1063
10.8k
      YR_CODE_SECTION,
1064
10.8k
      &compiler->current_rule_idx,
1065
10.8k
      sizeof(compiler->current_rule_idx),
1066
10.8k
      NULL));
1067
1068
  // Create a fixup entry for the jump and push it in the stack
1069
10.8k
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1070
1071
10.8k
  if (fixup == NULL)
1072
0
    return ERROR_INSUFFICIENT_MEMORY;
1073
1074
10.8k
  fixup->ref = jmp_offset_ref;
1075
10.8k
  fixup->next = compiler->fixup_stack_head;
1076
10.8k
  compiler->fixup_stack_head = fixup;
1077
1078
  // Clean strings_table as we are starting to parse a new rule.
1079
10.8k
  yr_hash_table_clean(compiler->strings_table, NULL);
1080
1081
10.8k
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1082
10.8k
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1083
1084
10.8k
  return ERROR_SUCCESS;
1085
10.8k
}
1086
1087
int yr_parser_reduce_rule_declaration_phase_2(
1088
    yyscan_t yyscanner,
1089
    YR_ARENA_REF* rule_ref)
1090
763
{
1091
763
  uint32_t max_strings_per_rule;
1092
763
  uint32_t strings_in_rule = 0;
1093
1094
763
  YR_FIXUP* fixup;
1095
763
  YR_STRING* string;
1096
763
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1097
1098
763
  yr_get_configuration_uint32(
1099
763
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1100
1101
763
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1102
1103
  // Show warning if the rule is generating too many atoms. The warning is
1104
  // shown if the number of atoms is greater than 20 times the maximum number
1105
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1106
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1107
1108
763
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1109
27
  {
1110
27
    yywarning(yyscanner, "rule is slowing down scanning");
1111
27
  }
1112
1113
763
  yr_rule_strings_foreach(rule, string)
1114
4.52k
  {
1115
    // Only the heading fragment in a chain of strings (the one with
1116
    // chained_to == NULL) must be referenced. All other fragments
1117
    // are never marked as referenced.
1118
    //
1119
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1120
    // strings must always be referenced.
1121
1122
4.52k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1123
4.52k
        (STRING_IS_ANONYMOUS(string) ||
1124
177
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1125
17
    {
1126
17
      yr_compiler_set_error_extra_info(
1127
17
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1128
17
    }
1129
1130
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1131
    // that it will match anywhere.
1132
4.50k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1133
4.50k
        STRING_IS_FIXED_OFFSET(string))
1134
109
    {
1135
109
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1136
109
    }
1137
1138
4.50k
    strings_in_rule++;
1139
1140
4.50k
    if (strings_in_rule > max_strings_per_rule)
1141
0
    {
1142
0
      yr_compiler_set_error_extra_info(
1143
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1144
0
    }
1145
4.50k
  }
1146
1147
746
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1148
746
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1149
1150
746
  fixup = compiler->fixup_stack_head;
1151
1152
746
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1153
746
      compiler->arena, &fixup->ref);
1154
1155
746
  int32_t jmp_offset = yr_arena_get_current_offset(
1156
746
                           compiler->arena, YR_CODE_SECTION) -
1157
746
                       fixup->ref.offset + 1;
1158
1159
746
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1160
1161
  // Remove fixup from the stack.
1162
746
  compiler->fixup_stack_head = fixup->next;
1163
746
  yr_free(fixup);
1164
1165
  // We have finished parsing the current rule set current_rule_idx to
1166
  // UINT32_MAX indicating that we are not currently parsing a rule.
1167
746
  compiler->current_rule_idx = UINT32_MAX;
1168
1169
746
  return ERROR_SUCCESS;
1170
746
}
1171
1172
int yr_parser_reduce_string_identifier(
1173
    yyscan_t yyscanner,
1174
    const char* identifier,
1175
    uint8_t instruction,
1176
    uint64_t at_offset)
1177
14.2k
{
1178
14.2k
  YR_STRING* string;
1179
14.2k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1180
1181
14.2k
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1182
12.0k
  {
1183
12.0k
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1184
11.9k
    {
1185
11.9k
      yr_parser_emit_with_arg(
1186
11.9k
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1187
1188
11.9k
      yr_parser_emit(yyscanner, instruction, NULL);
1189
1190
11.9k
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1191
11.9k
          compiler, compiler->current_rule_idx);
1192
1193
11.9k
      yr_rule_strings_foreach(current_rule, string)
1194
647k
      {
1195
647k
        if (instruction != OP_FOUND)
1196
647k
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1197
1198
647k
        if (instruction == OP_FOUND_AT)
1199
394
        {
1200
          // Avoid overwriting any previous fixed offset
1201
394
          if (string->fixed_offset == YR_UNDEFINED)
1202
38
            string->fixed_offset = at_offset;
1203
1204
          // If a previous fixed offset was different, disable
1205
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1206
          // have room to store a single fixed offset value
1207
394
          if (string->fixed_offset != at_offset)
1208
353
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1209
394
        }
1210
647k
        else
1211
647k
        {
1212
647k
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1213
647k
        }
1214
647k
      }
1215
11.9k
    }
1216
132
    else
1217
132
    {
1218
      // Anonymous strings not allowed outside of a loop
1219
132
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1220
132
    }
1221
12.0k
  }
1222
2.18k
  else
1223
2.18k
  {
1224
2.18k
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1225
1226
2.12k
    FAIL_ON_ERROR(
1227
2.12k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1228
1229
2.12k
    if (instruction != OP_FOUND)
1230
1.75k
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1231
1232
2.12k
    if (instruction == OP_FOUND_AT)
1233
429
    {
1234
      // Avoid overwriting any previous fixed offset
1235
1236
429
      if (string->fixed_offset == YR_UNDEFINED)
1237
189
        string->fixed_offset = at_offset;
1238
1239
      // If a previous fixed offset was different, disable
1240
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1241
      // have room to store a single fixed offset value
1242
1243
429
      if (string->fixed_offset == YR_UNDEFINED ||
1244
429
          string->fixed_offset != at_offset)
1245
345
      {
1246
345
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1247
345
      }
1248
429
    }
1249
1.69k
    else
1250
1.69k
    {
1251
1.69k
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1252
1.69k
    }
1253
1254
2.12k
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1255
1256
2.12k
    string->flags |= STRING_FLAGS_REFERENCED;
1257
2.12k
  }
1258
1259
14.0k
  return ERROR_SUCCESS;
1260
14.2k
}
1261
1262
int yr_parser_reduce_meta_declaration(
1263
    yyscan_t yyscanner,
1264
    int32_t type,
1265
    const char* identifier,
1266
    const char* string,
1267
    int64_t integer,
1268
    YR_ARENA_REF* meta_ref)
1269
713
{
1270
713
  YR_ARENA_REF ref;
1271
713
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1272
1273
713
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1274
713
      compiler->arena,
1275
713
      YR_METAS_TABLE,
1276
713
      sizeof(YR_META),
1277
713
      meta_ref,
1278
713
      offsetof(YR_META, identifier),
1279
713
      offsetof(YR_META, string),
1280
713
      EOL));
1281
1282
713
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1283
1284
713
  meta->type = type;
1285
713
  meta->integer = integer;
1286
1287
713
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1288
1289
713
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1290
1291
713
  if (string != NULL)
1292
95
  {
1293
95
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1294
1295
95
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1296
95
  }
1297
618
  else
1298
618
  {
1299
618
    meta->string = NULL;
1300
618
  }
1301
1302
713
  compiler->current_meta_idx++;
1303
1304
713
  return ERROR_SUCCESS;
1305
713
}
1306
1307
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1308
2.34k
{
1309
2.34k
  if (module_name->length == 0)
1310
193
    return false;
1311
1312
2.15k
  if (strlen(module_name->c_string) != module_name->length)
1313
182
    return false;
1314
1315
1.97k
  return true;
1316
2.15k
}
1317
1318
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1319
2.34k
{
1320
2.34k
  int result;
1321
1322
2.34k
  YR_ARENA_REF ref;
1323
2.34k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1324
2.34k
  YR_OBJECT* module_structure;
1325
1326
2.34k
  if (!_yr_parser_valid_module_name(module_name))
1327
375
  {
1328
375
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1329
1330
375
    return ERROR_INVALID_MODULE_NAME;
1331
375
  }
1332
1333
1.97k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1334
1.97k
      compiler->arena,
1335
1.97k
      YR_NAMESPACES_TABLE,
1336
1.97k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1337
1338
1.97k
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1339
1.97k
      compiler->objects_table, module_name->c_string, ns->name);
1340
1341
  // if module already imported, do nothing
1342
1343
1.97k
  if (module_structure != NULL)
1344
1.56k
    return ERROR_SUCCESS;
1345
1346
406
  FAIL_ON_ERROR(yr_object_create(
1347
406
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1348
1349
406
  FAIL_ON_ERROR(yr_hash_table_add(
1350
406
      compiler->objects_table,
1351
406
      module_name->c_string,
1352
406
      ns->name,
1353
406
      module_structure));
1354
1355
406
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1356
1357
406
  if (result == ERROR_UNKNOWN_MODULE)
1358
215
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1359
1360
406
  if (result != ERROR_SUCCESS)
1361
215
    return result;
1362
1363
191
  FAIL_ON_ERROR(
1364
191
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1365
1366
191
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1367
191
      yyscanner,
1368
191
      OP_IMPORT,
1369
191
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1370
191
      NULL,
1371
191
      NULL));
1372
1373
191
  return ERROR_SUCCESS;
1374
191
}
1375
1376
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1377
27.7k
{
1378
27.7k
  int opcode = 0;
1379
1380
27.7k
  switch (expression_type)
1381
27.7k
  {
1382
19.9k
  case EXPRESSION_TYPE_INTEGER:
1383
19.9k
    opcode = OP_INT_BEGIN;
1384
19.9k
    break;
1385
6.42k
  case EXPRESSION_TYPE_FLOAT:
1386
6.42k
    opcode = OP_DBL_BEGIN;
1387
6.42k
    break;
1388
1.32k
  case EXPRESSION_TYPE_STRING:
1389
1.32k
    opcode = OP_STR_BEGIN;
1390
1.32k
    break;
1391
0
  default:
1392
0
    assert(false);
1393
27.7k
  }
1394
1395
27.7k
  if (op[0] == '<')
1396
1.46k
  {
1397
1.46k
    if (op[1] == '=')
1398
868
      opcode += _OP_LE;
1399
599
    else
1400
599
      opcode += _OP_LT;
1401
1.46k
  }
1402
26.2k
  else if (op[0] == '>')
1403
918
  {
1404
918
    if (op[1] == '=')
1405
400
      opcode += _OP_GE;
1406
518
    else
1407
518
      opcode += _OP_GT;
1408
918
  }
1409
25.3k
  else if (op[1] == '=')
1410
1.07k
  {
1411
1.07k
    if (op[0] == '=')
1412
435
      opcode += _OP_EQ;
1413
637
    else
1414
637
      opcode += _OP_NEQ;
1415
1.07k
  }
1416
24.2k
  else if (op[0] == '+')
1417
9.31k
  {
1418
9.31k
    opcode += _OP_ADD;
1419
9.31k
  }
1420
14.9k
  else if (op[0] == '-')
1421
8.98k
  {
1422
8.98k
    opcode += _OP_SUB;
1423
8.98k
  }
1424
5.97k
  else if (op[0] == '*')
1425
3.77k
  {
1426
3.77k
    opcode += _OP_MUL;
1427
3.77k
  }
1428
2.19k
  else if (op[0] == '\\')
1429
2.19k
  {
1430
2.19k
    opcode += _OP_DIV;
1431
2.19k
  }
1432
1433
27.7k
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1434
27.7k
  {
1435
27.7k
    return opcode;
1436
27.7k
  }
1437
1438
0
  return OP_ERROR;
1439
27.7k
}
1440
1441
int yr_parser_reduce_operation(
1442
    yyscan_t yyscanner,
1443
    const char* op,
1444
    YR_EXPRESSION left_operand,
1445
    YR_EXPRESSION right_operand)
1446
27.9k
{
1447
27.9k
  int expression_type;
1448
1449
27.9k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1450
1451
27.9k
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1452
27.9k
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1453
27.9k
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1454
26.4k
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1455
26.4k
  {
1456
26.4k
    if (left_operand.type != right_operand.type)
1457
4.42k
    {
1458
      // One operand is double and the other is integer,
1459
      // cast the integer to double
1460
1461
4.42k
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1462
4.42k
          yyscanner,
1463
4.42k
          OP_INT_TO_DBL,
1464
4.42k
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1465
4.42k
          NULL,
1466
4.42k
          NULL));
1467
4.42k
    }
1468
1469
26.4k
    expression_type = EXPRESSION_TYPE_FLOAT;
1470
1471
26.4k
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1472
26.4k
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1473
19.9k
    {
1474
19.9k
      expression_type = EXPRESSION_TYPE_INTEGER;
1475
19.9k
    }
1476
1477
26.4k
    FAIL_ON_ERROR(yr_parser_emit(
1478
26.4k
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1479
26.4k
  }
1480
1.58k
  else if (
1481
1.58k
      left_operand.type == EXPRESSION_TYPE_STRING &&
1482
1.58k
      right_operand.type == EXPRESSION_TYPE_STRING)
1483
1.32k
  {
1484
1.32k
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1485
1486
1.32k
    if (opcode != OP_ERROR)
1487
1.32k
    {
1488
1.32k
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1489
1.32k
    }
1490
0
    else
1491
0
    {
1492
0
      yr_compiler_set_error_extra_info_fmt(
1493
0
          compiler, "strings don't support \"%s\" operation", op);
1494
1495
0
      return ERROR_WRONG_TYPE;
1496
0
    }
1497
1.32k
  }
1498
258
  else
1499
258
  {
1500
258
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1501
1502
258
    return ERROR_WRONG_TYPE;
1503
258
  }
1504
1505
27.7k
  return ERROR_SUCCESS;
1506
27.9k
}