Coverage Report

Created: 2025-04-11 06:09

/src/yara/libyara/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
47
#define todigit(x)                                        \
48
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
49
                             : ((uint8_t) (x - '0'))
50
51
int yr_parser_emit(
52
    yyscan_t yyscanner,
53
    uint8_t instruction,
54
    YR_ARENA_REF* instruction_ref)
55
100k
{
56
100k
  return yr_arena_write_data(
57
100k
      yyget_extra(yyscanner)->arena,
58
100k
      YR_CODE_SECTION,
59
100k
      &instruction,
60
100k
      sizeof(uint8_t),
61
100k
      instruction_ref);
62
100k
}
63
64
int yr_parser_emit_with_arg_double(
65
    yyscan_t yyscanner,
66
    uint8_t instruction,
67
    double argument,
68
    YR_ARENA_REF* instruction_ref,
69
    YR_ARENA_REF* argument_ref)
70
3.75k
{
71
3.75k
  int result = yr_arena_write_data(
72
3.75k
      yyget_extra(yyscanner)->arena,
73
3.75k
      YR_CODE_SECTION,
74
3.75k
      &instruction,
75
3.75k
      sizeof(uint8_t),
76
3.75k
      instruction_ref);
77
78
3.75k
  if (result == ERROR_SUCCESS)
79
3.75k
    result = yr_arena_write_data(
80
3.75k
        yyget_extra(yyscanner)->arena,
81
3.75k
        YR_CODE_SECTION,
82
3.75k
        &argument,
83
3.75k
        sizeof(double),
84
3.75k
        argument_ref);
85
86
3.75k
  return result;
87
3.75k
}
88
89
int yr_parser_emit_with_arg_int32(
90
    yyscan_t yyscanner,
91
    uint8_t instruction,
92
    int32_t argument,
93
    YR_ARENA_REF* instruction_ref,
94
    YR_ARENA_REF* argument_ref)
95
22.6k
{
96
22.6k
  int result = yr_arena_write_data(
97
22.6k
      yyget_extra(yyscanner)->arena,
98
22.6k
      YR_CODE_SECTION,
99
22.6k
      &instruction,
100
22.6k
      sizeof(uint8_t),
101
22.6k
      instruction_ref);
102
103
22.6k
  if (result == ERROR_SUCCESS)
104
22.6k
    result = yr_arena_write_data(
105
22.6k
        yyget_extra(yyscanner)->arena,
106
22.6k
        YR_CODE_SECTION,
107
22.6k
        &argument,
108
22.6k
        sizeof(int32_t),
109
22.6k
        argument_ref);
110
111
22.6k
  return result;
112
22.6k
}
113
114
int yr_parser_emit_with_arg(
115
    yyscan_t yyscanner,
116
    uint8_t instruction,
117
    int64_t argument,
118
    YR_ARENA_REF* instruction_ref,
119
    YR_ARENA_REF* argument_ref)
120
46.2k
{
121
46.2k
  int result = yr_arena_write_data(
122
46.2k
      yyget_extra(yyscanner)->arena,
123
46.2k
      YR_CODE_SECTION,
124
46.2k
      &instruction,
125
46.2k
      sizeof(uint8_t),
126
46.2k
      instruction_ref);
127
128
46.2k
  if (result == ERROR_SUCCESS)
129
46.2k
    result = yr_arena_write_data(
130
46.2k
        yyget_extra(yyscanner)->arena,
131
46.2k
        YR_CODE_SECTION,
132
46.2k
        &argument,
133
46.2k
        sizeof(int64_t),
134
46.2k
        argument_ref);
135
136
46.2k
  return result;
137
46.2k
}
138
139
int yr_parser_emit_with_arg_reloc(
140
    yyscan_t yyscanner,
141
    uint8_t instruction,
142
    void* argument,
143
    YR_ARENA_REF* instruction_ref,
144
    YR_ARENA_REF* argument_ref)
145
749k
{
146
749k
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
147
148
749k
  DECLARE_REFERENCE(void*, ptr) arg;
149
150
749k
  memset(&arg, 0, sizeof(arg));
151
749k
  arg.ptr = argument;
152
153
749k
  int result = yr_arena_write_data(
154
749k
      yyget_extra(yyscanner)->arena,
155
749k
      YR_CODE_SECTION,
156
749k
      &instruction,
157
749k
      sizeof(uint8_t),
158
749k
      instruction_ref);
159
160
749k
  if (result == ERROR_SUCCESS)
161
749k
    result = yr_arena_write_data(
162
749k
        yyget_extra(yyscanner)->arena,
163
749k
        YR_CODE_SECTION,
164
749k
        &arg,
165
749k
        sizeof(arg),
166
749k
        &ref);
167
168
749k
  if (result == ERROR_SUCCESS)
169
749k
    result = yr_arena_make_ptr_relocatable(
170
749k
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
171
172
749k
  if (argument_ref != NULL)
173
0
    *argument_ref = ref;
174
175
749k
  return result;
176
749k
}
177
178
int yr_parser_emit_pushes_for_strings(
179
    yyscan_t yyscanner,
180
    const char* identifier,
181
    int* count)
182
11.3k
{
183
11.3k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
184
185
11.3k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
186
11.3k
      compiler, compiler->current_rule_idx);
187
188
11.3k
  YR_STRING* string;
189
190
11.3k
  const char* string_identifier;
191
11.3k
  const char* target_identifier;
192
193
11.3k
  int matching = 0;
194
195
11.3k
  yr_rule_strings_foreach(current_rule, string)
196
749k
  {
197
    // Don't generate pushes for strings chained to another one, we are
198
    // only interested in non-chained strings or the head of the chain.
199
200
749k
    if (string->chained_to == NULL)
201
745k
    {
202
745k
      string_identifier = string->identifier;
203
745k
      target_identifier = identifier;
204
205
1.49M
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
206
1.49M
             *target_identifier == *string_identifier)
207
745k
      {
208
745k
        target_identifier++;
209
745k
        string_identifier++;
210
745k
      }
211
212
745k
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
213
745k
          *target_identifier == '*')
214
739k
      {
215
739k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
216
217
739k
        string->flags |= STRING_FLAGS_REFERENCED;
218
739k
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
219
739k
        string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
220
739k
        matching++;
221
739k
      }
222
745k
    }
223
749k
  }
224
225
11.3k
  if (count != NULL)
226
11.3k
  {
227
11.3k
    *count = matching;
228
11.3k
  }
229
230
11.3k
  if (matching == 0)
231
18
  {
232
18
    yr_compiler_set_error_extra_info(
233
18
        compiler, identifier) return ERROR_UNDEFINED_STRING;
234
18
  }
235
236
11.3k
  return ERROR_SUCCESS;
237
11.3k
}
238
239
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
240
// prefix.
241
int yr_parser_emit_pushes_for_rules(
242
    yyscan_t yyscanner,
243
    const char* prefix,
244
    int* count)
245
1.17k
{
246
1.17k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
247
248
  // Make sure the compiler is parsing a rule
249
1.17k
  assert(compiler->current_rule_idx != UINT32_MAX);
250
251
1.17k
  YR_RULE* rule;
252
1.17k
  int matching = 0;
253
254
1.17k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
255
1.17k
      compiler->arena,
256
1.17k
      YR_NAMESPACES_TABLE,
257
1.17k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
258
259
  // Can't use yr_rules_foreach here as that requires the rules to have been
260
  // finalized (inserting a NULL rule at the end). This is done when
261
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
262
  // into the current position in the code arena. Obviously we aren't done
263
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
264
  // this I'm manually walking all the currently compiled rules (up to the
265
  // current rule index) and comparing identifiers to see if it is one we should
266
  // use.
267
  //
268
  // Further, we have to get compiler->current_rule_idx before we start because
269
  // if we emit an OP_PUSH_RULE
270
1.17k
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
271
272
3.95k
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
273
2.78k
  {
274
    // Is rule->identifier prefixed by prefix?
275
2.78k
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
276
1.25k
    {
277
1.25k
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
278
1.25k
          compiler->rules_table, rule->identifier, ns->name);
279
280
1.25k
      if (rule_idx != UINT32_MAX)
281
1.25k
      {
282
1.25k
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
283
1.25k
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
284
1.25k
        matching++;
285
1.25k
      }
286
1.25k
    }
287
288
2.78k
    rule++;
289
2.78k
  }
290
291
1.17k
  if (count != NULL)
292
1.17k
  {
293
1.17k
    *count = matching;
294
1.17k
  }
295
296
1.17k
  if (matching == 0)
297
84
  {
298
84
    yr_compiler_set_error_extra_info(compiler, prefix);
299
84
    return ERROR_UNDEFINED_IDENTIFIER;
300
84
  }
301
302
1.08k
  return ERROR_SUCCESS;
303
1.17k
}
304
305
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
306
68.2k
{
307
68.2k
  uint8_t opcode[9];
308
68.2k
  int opcode_len = 1;
309
310
68.2k
  if (argument == YR_UNDEFINED)
311
12.7k
  {
312
12.7k
    opcode[0] = OP_PUSH_U;
313
12.7k
  }
314
55.4k
  else if (argument <= 0xff)
315
51.4k
  {
316
51.4k
    opcode[0] = OP_PUSH_8;
317
51.4k
    opcode[1] = (uint8_t) argument;
318
51.4k
    opcode_len += sizeof(uint8_t);
319
51.4k
  }
320
4.04k
  else if (argument <= 0xffff)
321
2.11k
  {
322
2.11k
    opcode[0] = OP_PUSH_16;
323
2.11k
    uint16_t u = (uint16_t) argument;
324
2.11k
    memcpy(opcode + 1, &u, sizeof(uint16_t));
325
2.11k
    opcode_len += sizeof(uint16_t);
326
2.11k
  }
327
1.93k
  else if (argument <= 0xffffffff)
328
1.25k
  {
329
1.25k
    opcode[0] = OP_PUSH_32;
330
1.25k
    uint32_t u = (uint32_t) argument;
331
1.25k
    memcpy(opcode + 1, &u, sizeof(uint32_t));
332
1.25k
    opcode_len += sizeof(uint32_t);
333
1.25k
  }
334
675
  else
335
675
  {
336
675
    opcode[0] = OP_PUSH;
337
675
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
338
675
    opcode_len += sizeof(uint64_t);
339
675
  }
340
341
68.2k
  return yr_arena_write_data(
342
68.2k
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
343
68.2k
}
344
345
int yr_parser_check_types(
346
    YR_COMPILER* compiler,
347
    YR_OBJECT_FUNCTION* function,
348
    const char* actual_args_fmt)
349
11
{
350
11
  int i;
351
352
50
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
353
50
  {
354
50
    if (function->prototypes[i].arguments_fmt == NULL)
355
5
      break;
356
357
45
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
358
6
      return ERROR_SUCCESS;
359
45
  }
360
361
5
  yr_compiler_set_error_extra_info(compiler, function->identifier)
362
363
5
      return ERROR_WRONG_ARGUMENTS;
364
11
}
365
366
int yr_parser_lookup_string(
367
    yyscan_t yyscanner,
368
    const char* identifier,
369
    YR_STRING** string)
370
2.20k
{
371
2.20k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
372
373
2.20k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
374
2.20k
      compiler, compiler->current_rule_idx);
375
376
2.20k
  yr_rule_strings_foreach(current_rule, *string)
377
3.09k
  {
378
    // If some string $a gets fragmented into multiple chained
379
    // strings, all those fragments have the same $a identifier
380
    // but we are interested in the heading fragment, which is
381
    // that with chained_to == NULL
382
383
3.09k
    if ((*string)->chained_to == NULL &&
384
3.09k
        strcmp((*string)->identifier, identifier) == 0)
385
2.16k
    {
386
2.16k
      return ERROR_SUCCESS;
387
2.16k
    }
388
3.09k
  }
389
390
40
  yr_compiler_set_error_extra_info(compiler, identifier)
391
392
40
      * string = NULL;
393
394
40
  return ERROR_UNDEFINED_STRING;
395
2.20k
}
396
397
////////////////////////////////////////////////////////////////////////////////
398
// Searches for a variable with the given identifier in the scope of the current
399
// "for" loop. In case of nested "for" loops the identifier is searched starting
400
// at the top-level loop and going down thorough the nested loops until the
401
// current one. This is ok because inner loops can not re-define an identifier
402
// already defined by an outer loop.
403
//
404
// If the variable is found, the return value is the position that the variable
405
// occupies among all the currently defined variables. If the variable doesn't
406
// exist the return value is -1.
407
//
408
// The function can receive a pointer to a YR_EXPRESSION that will populated
409
// with information about the variable if found. This pointer can be NULL if
410
// the caller is not interested in getting that information.
411
//
412
int yr_parser_lookup_loop_variable(
413
    yyscan_t yyscanner,
414
    const char* identifier,
415
    YR_EXPRESSION* expr)
416
17.7k
{
417
17.7k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
418
17.7k
  int i, j;
419
17.7k
  int var_offset = 0;
420
421
22.1k
  for (i = 0; i <= compiler->loop_index; i++)
422
12.3k
  {
423
12.3k
    var_offset += compiler->loop[i].vars_internal_count;
424
425
20.3k
    for (j = 0; j < compiler->loop[i].vars_count; j++)
426
16.0k
    {
427
16.0k
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
428
16.0k
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
429
8.03k
      {
430
8.03k
        if (expr != NULL)
431
8.00k
          *expr = compiler->loop[i].vars[j];
432
433
8.03k
        return var_offset + j;
434
8.03k
      }
435
16.0k
    }
436
437
4.33k
    var_offset += compiler->loop[i].vars_count;
438
4.33k
  }
439
440
9.73k
  return -1;
441
17.7k
}
442
443
static int _yr_parser_write_string(
444
    const char* identifier,
445
    YR_MODIFIER modifier,
446
    YR_COMPILER* compiler,
447
    SIZED_STRING* str,
448
    RE_AST* re_ast,
449
    YR_ARENA_REF* string_ref,
450
    int* min_atom_quality,
451
    int* num_atom)
452
22.4k
{
453
22.4k
  SIZED_STRING* literal_string;
454
22.4k
  YR_ATOM_LIST_ITEM* atom;
455
22.4k
  YR_ATOM_LIST_ITEM* atom_list = NULL;
456
457
22.4k
  int c, result;
458
22.4k
  int max_string_len;
459
22.4k
  bool free_literal = false;
460
461
22.4k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
462
22.4k
      compiler->arena,
463
22.4k
      YR_STRINGS_TABLE,
464
22.4k
      sizeof(YR_STRING),
465
22.4k
      string_ref,
466
22.4k
      offsetof(YR_STRING, identifier),
467
22.4k
      offsetof(YR_STRING, string),
468
22.4k
      offsetof(YR_STRING, chained_to),
469
22.4k
      EOL));
470
471
22.4k
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
472
22.4k
      compiler->arena, string_ref);
473
474
22.4k
  YR_ARENA_REF ref;
475
476
22.4k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
477
478
22.4k
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
479
22.4k
  string->rule_idx = compiler->current_rule_idx;
480
22.4k
  string->idx = compiler->current_string_idx;
481
22.4k
  string->fixed_offset = YR_UNDEFINED;
482
483
22.4k
  compiler->current_string_idx++;
484
485
22.4k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
486
22.4k
      modifier.flags & STRING_FLAGS_REGEXP ||
487
22.4k
      modifier.flags & STRING_FLAGS_BASE64 ||
488
22.4k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
489
19.7k
  {
490
19.7k
    literal_string = yr_re_ast_extract_literal(re_ast);
491
492
19.7k
    if (literal_string != NULL)
493
10.8k
      free_literal = true;
494
19.7k
  }
495
2.67k
  else
496
2.67k
  {
497
2.67k
    literal_string = str;
498
2.67k
  }
499
500
22.4k
  if (literal_string != NULL)
501
13.5k
  {
502
13.5k
    modifier.flags |= STRING_FLAGS_LITERAL;
503
504
13.5k
    result = _yr_compiler_store_data(
505
13.5k
        compiler,
506
13.5k
        literal_string->c_string,
507
13.5k
        literal_string->length + 1,  // +1 to include terminating NULL
508
13.5k
        &ref);
509
510
13.5k
    if (result != ERROR_SUCCESS)
511
0
      goto cleanup;
512
513
13.5k
    string->length = (uint32_t) literal_string->length;
514
13.5k
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
515
516
13.5k
    if (modifier.flags & STRING_FLAGS_WIDE)
517
1.49k
      max_string_len = string->length * 2;
518
12.0k
    else
519
12.0k
      max_string_len = string->length;
520
521
13.5k
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
522
9.81k
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
523
524
13.5k
    result = yr_atoms_extract_from_string(
525
13.5k
        &compiler->atoms_config,
526
13.5k
        (uint8_t*) literal_string->c_string,
527
13.5k
        (int32_t) literal_string->length,
528
13.5k
        modifier,
529
13.5k
        &atom_list,
530
13.5k
        min_atom_quality);
531
532
13.5k
    if (result != ERROR_SUCCESS)
533
0
      goto cleanup;
534
13.5k
  }
535
8.90k
  else
536
8.90k
  {
537
    // Non-literal strings can't be marked as fixed offset because once we
538
    // find a string atom in the scanned data we don't know the offset where
539
    // the string should start, as the non-literal strings can contain
540
    // variable-length portions.
541
8.90k
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
542
543
    // Save the position where the RE forward code starts for later reference.
544
8.90k
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
545
8.90k
        compiler->arena, YR_RE_CODE_SECTION);
546
547
    // Emit forwards code
548
8.90k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
549
550
8.90k
    if (result != ERROR_SUCCESS)
551
115
      goto cleanup;
552
553
    // Emit backwards code
554
8.78k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
555
556
8.78k
    if (result != ERROR_SUCCESS)
557
3
      goto cleanup;
558
559
    // Extract atoms from the regular expression.
560
8.78k
    result = yr_atoms_extract_from_re(
561
8.78k
        &compiler->atoms_config,
562
8.78k
        re_ast,
563
8.78k
        modifier,
564
8.78k
        &atom_list,
565
8.78k
        min_atom_quality);
566
567
8.78k
    if (result != ERROR_SUCCESS)
568
0
      goto cleanup;
569
570
    // If no atom was extracted let's add a zero-length atom.
571
8.78k
    if (atom_list == NULL)
572
3.46k
    {
573
3.46k
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
574
575
3.46k
      if (atom_list == NULL)
576
0
      {
577
0
        result = ERROR_INSUFFICIENT_MEMORY;
578
0
        goto cleanup;
579
0
      }
580
581
3.46k
      atom_list->atom.length = 0;
582
3.46k
      atom_list->backtrack = 0;
583
3.46k
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
584
3.46k
      atom_list->next = NULL;
585
586
3.46k
      yr_arena_ptr_to_ref(
587
3.46k
          compiler->arena,
588
3.46k
          yr_arena_get_ptr(
589
3.46k
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
590
3.46k
          &(atom_list->forward_code_ref));
591
3.46k
    }
592
8.78k
  }
593
594
22.2k
  string->flags = modifier.flags;
595
596
  // Add the string to Aho-Corasick automaton.
597
22.2k
  result = yr_ac_add_string(
598
22.2k
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
599
600
22.2k
  if (result != ERROR_SUCCESS)
601
0
    goto cleanup;
602
603
22.2k
  atom = atom_list;
604
22.2k
  c = 0;
605
606
4.84M
  while (atom != NULL)
607
4.82M
  {
608
4.82M
    atom = atom->next;
609
4.82M
    c++;
610
4.82M
  }
611
612
22.2k
  (*num_atom) += c;
613
614
22.4k
cleanup:
615
22.4k
  if (free_literal)
616
10.8k
    yr_free(literal_string);
617
618
22.4k
  if (atom_list != NULL)
619
22.2k
    yr_atoms_list_destroy(atom_list);
620
621
22.4k
  return result;
622
22.2k
}
623
624
static int _yr_parser_check_string_modifiers(
625
    yyscan_t yyscanner,
626
    YR_MODIFIER modifier)
627
18.1k
{
628
18.1k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
629
630
  // xor and nocase together is not implemented.
631
18.1k
  if (modifier.flags & STRING_FLAGS_XOR &&
632
18.1k
      modifier.flags & STRING_FLAGS_NO_CASE)
633
0
  {
634
0
    yr_compiler_set_error_extra_info(
635
0
        compiler, "invalid modifier combination: xor nocase");
636
0
    return ERROR_INVALID_MODIFIER;
637
0
  }
638
639
  // base64 and nocase together is not implemented.
640
18.1k
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
641
18.1k
      (modifier.flags & STRING_FLAGS_BASE64 ||
642
2.48k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
643
0
  {
644
0
    yr_compiler_set_error_extra_info(
645
0
        compiler,
646
0
        modifier.flags & STRING_FLAGS_BASE64
647
0
            ? "invalid modifier combination: base64 nocase"
648
0
            : "invalid modifier combination: base64wide nocase");
649
0
    return ERROR_INVALID_MODIFIER;
650
0
  }
651
652
  // base64 and fullword together is not implemented.
653
18.1k
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
654
18.1k
      (modifier.flags & STRING_FLAGS_BASE64 ||
655
270
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
656
0
  {
657
0
    yr_compiler_set_error_extra_info(
658
0
        compiler,
659
0
        modifier.flags & STRING_FLAGS_BASE64
660
0
            ? "invalid modifier combination: base64 fullword"
661
0
            : "invalid modifier combination: base64wide fullword");
662
0
    return ERROR_INVALID_MODIFIER;
663
0
  }
664
665
  // base64 and xor together is not implemented.
666
18.1k
  if (modifier.flags & STRING_FLAGS_XOR &&
667
18.1k
      (modifier.flags & STRING_FLAGS_BASE64 ||
668
1.51k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
669
8
  {
670
8
    yr_compiler_set_error_extra_info(
671
8
        compiler,
672
8
        modifier.flags & STRING_FLAGS_BASE64
673
8
            ? "invalid modifier combination: base64 xor"
674
8
            : "invalid modifier combination: base64wide xor");
675
8
    return ERROR_INVALID_MODIFIER;
676
8
  }
677
678
18.1k
  return ERROR_SUCCESS;
679
18.1k
}
680
681
int yr_parser_reduce_string_declaration(
682
    yyscan_t yyscanner,
683
    YR_MODIFIER modifier,
684
    const char* identifier,
685
    SIZED_STRING* str,
686
    YR_ARENA_REF* string_ref)
687
18.1k
{
688
18.1k
  int result = ERROR_SUCCESS;
689
18.1k
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
690
18.1k
  int atom_quality;
691
692
18.1k
  char message[512];
693
694
18.1k
  int32_t min_gap = 0;
695
18.1k
  int32_t max_gap = 0;
696
697
18.1k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
698
699
18.1k
  RE_AST* re_ast = NULL;
700
18.1k
  RE_AST* remainder_re_ast = NULL;
701
18.1k
  RE_ERROR re_error;
702
703
18.1k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
704
18.1k
      compiler, compiler->current_rule_idx);
705
706
  // Determine if a string with the same identifier was already defined
707
  // by searching for the identifier in strings_table.
708
18.1k
  uint32_t string_idx = yr_hash_table_lookup_uint32(
709
18.1k
      compiler->strings_table, identifier, NULL);
710
711
  // The string was already defined, return an error.
712
18.1k
  if (string_idx != UINT32_MAX)
713
13
  {
714
13
    yr_compiler_set_error_extra_info(compiler, identifier);
715
13
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
716
13
  }
717
718
  // Empty strings are not allowed.
719
18.1k
  if (str->length == 0)
720
0
  {
721
0
    yr_compiler_set_error_extra_info(compiler, identifier);
722
0
    return ERROR_EMPTY_STRING;
723
0
  }
724
725
18.1k
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
726
2.08k
    modifier.flags |= STRING_FLAGS_NO_CASE;
727
728
18.1k
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
729
302
    modifier.flags |= STRING_FLAGS_DOT_ALL;
730
731
  // Hex strings are always handled as DOT_ALL regexps.
732
18.1k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
733
1.27k
    modifier.flags |= STRING_FLAGS_DOT_ALL;
734
735
18.1k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
736
18.1k
      !(modifier.flags & STRING_FLAGS_BASE64 ||
737
16.7k
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
738
15.2k
  {
739
15.2k
    modifier.flags |= STRING_FLAGS_ASCII;
740
15.2k
  }
741
742
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
743
  // a single match for the string is enough. This is true in
744
  // most cases, except when the string count (#) and string offset (@)
745
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
746
  // initially, and unmarked later if required.
747
18.1k
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
748
749
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
750
  // need to be searched all over the file because the user is using the
751
  // "at" operator. The string must be searched at a fixed offset in the
752
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
753
  // and unmarked later if required.
754
18.1k
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
755
756
  // If string identifier is $ this is an anonymous string, if not add the
757
  // identifier to strings_table.
758
18.1k
  if (strcmp(identifier, "$") == 0)
759
17.0k
  {
760
17.0k
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
761
17.0k
  }
762
1.12k
  else
763
1.12k
  {
764
1.12k
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
765
1.12k
        compiler->strings_table,
766
1.12k
        identifier,
767
1.12k
        NULL,
768
1.12k
        compiler->current_string_idx));
769
1.12k
  }
770
771
  // Make sure that the the string does not have an invalid combination of
772
  // modifiers.
773
18.1k
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
774
775
18.1k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
776
18.1k
      modifier.flags & STRING_FLAGS_REGEXP ||
777
18.1k
      modifier.flags & STRING_FLAGS_BASE64 ||
778
18.1k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
779
15.5k
  {
780
15.5k
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
781
1.27k
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
782
14.2k
    else if (modifier.flags & STRING_FLAGS_REGEXP)
783
12.2k
    {
784
12.2k
      int flags = RE_PARSER_FLAG_NONE;
785
12.2k
      if (compiler->strict_escape)
786
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
787
12.2k
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
788
12.2k
    }
789
1.95k
    else
790
1.95k
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
791
792
15.5k
    if (result != ERROR_SUCCESS)
793
756
    {
794
756
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
795
0
      {
796
0
        yywarning(yyscanner, "unknown escape sequence");
797
0
      }
798
756
      else
799
756
      {
800
756
        snprintf(
801
756
            message,
802
756
            sizeof(message),
803
756
            "invalid %s \"%s\": %s",
804
756
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
805
756
                                                        : "regular expression",
806
756
            identifier,
807
756
            re_error.message);
808
809
756
        yr_compiler_set_error_extra_info(compiler, message);
810
756
        goto _exit;
811
756
      }
812
756
    }
813
814
14.7k
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
815
997
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
816
817
14.7k
    if (re_ast->flags & RE_FLAGS_GREEDY)
818
897
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
819
820
    // Regular expressions in the strings section can't mix greedy and
821
    // ungreedy quantifiers like .* and .*?. That's because these regular
822
    // expressions can be matched forwards and/or backwards depending on the
823
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
824
    // be able to properly calculate the length of the match.
825
826
14.7k
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
827
14.7k
        (re_ast->flags & RE_FLAGS_UNGREEDY))
828
11
    {
829
11
      result = ERROR_INVALID_REGULAR_EXPRESSION;
830
831
11
      yr_compiler_set_error_extra_info(
832
11
          compiler,
833
11
          "greedy and ungreedy quantifiers can't be mixed in a regular "
834
11
          "expression");
835
836
11
      goto _exit;
837
11
    }
838
839
14.7k
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
840
972
    {
841
972
      yywarning(
842
972
          yyscanner,
843
972
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
844
972
          "with a reasonable value for N",
845
972
          identifier);
846
972
    }
847
848
14.7k
    if (compiler->re_ast_callback != NULL)
849
0
    {
850
0
      compiler->re_ast_callback(
851
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
852
0
    }
853
854
14.7k
    *string_ref = YR_ARENA_NULL_REF;
855
856
34.3k
    while (re_ast != NULL)
857
19.7k
    {
858
19.7k
      YR_ARENA_REF ref;
859
860
19.7k
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
861
862
19.7k
      int32_t prev_min_gap = min_gap;
863
19.7k
      int32_t prev_max_gap = max_gap;
864
865
19.7k
      result = yr_re_ast_split_at_chaining_point(
866
19.7k
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
867
868
19.7k
      if (result != ERROR_SUCCESS)
869
0
        goto _exit;
870
871
19.7k
      result = _yr_parser_write_string(
872
19.7k
          identifier,
873
19.7k
          modifier,
874
19.7k
          compiler,
875
19.7k
          NULL,
876
19.7k
          re_ast,
877
19.7k
          &ref,
878
19.7k
          &atom_quality,
879
19.7k
          &current_rule->num_atoms);
880
881
19.7k
      if (result != ERROR_SUCCESS)
882
118
        goto _exit;
883
884
19.6k
      if (atom_quality < min_atom_quality)
885
13.8k
        min_atom_quality = atom_quality;
886
887
19.6k
      if (YR_ARENA_IS_NULL_REF(*string_ref))
888
14.6k
      {
889
        // This is the first string in the chain, the string reference
890
        // returned by this function must point to this string.
891
14.6k
        *string_ref = ref;
892
14.6k
      }
893
5.00k
      else
894
5.00k
      {
895
        // This is not the first string in the chain, set the appropriate
896
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
897
        // fields.
898
5.00k
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
899
5.00k
            compiler->arena,
900
5.00k
            YR_STRINGS_TABLE,
901
5.00k
            prev_string_idx * sizeof(YR_STRING));
902
903
5.00k
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
904
5.00k
            compiler->arena, &ref);
905
906
5.00k
        new_string->chained_to = prev_string;
907
5.00k
        new_string->chain_gap_min = prev_min_gap;
908
5.00k
        new_string->chain_gap_max = prev_max_gap;
909
910
        // A string chained to another one can't have a fixed offset, only the
911
        // head of the string chain can have a fixed offset.
912
5.00k
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
913
914
        // There is a previous string, but that string wasn't marked as part
915
        // of a chain because we can't do that until knowing there will be
916
        // another string, let's flag it now the we know.
917
5.00k
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
918
919
        // There is a previous string, so this string is part of a chain, but
920
        // there will be no more strings because there are no more AST to
921
        // split, which means that this is the chain's tail.
922
5.00k
        if (remainder_re_ast == NULL)
923
582
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
924
582
                               STRING_FLAGS_CHAIN_TAIL;
925
5.00k
      }
926
927
19.6k
      yr_re_ast_destroy(re_ast);
928
19.6k
      re_ast = remainder_re_ast;
929
19.6k
    }
930
14.7k
  }
931
2.67k
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
932
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
933
2.67k
  {
934
2.67k
    result = _yr_parser_write_string(
935
2.67k
        identifier,
936
2.67k
        modifier,
937
2.67k
        compiler,
938
2.67k
        str,
939
2.67k
        NULL,
940
2.67k
        string_ref,
941
2.67k
        &min_atom_quality,
942
2.67k
        &current_rule->num_atoms);
943
944
2.67k
    if (result != ERROR_SUCCESS)
945
0
      goto _exit;
946
2.67k
  }
947
948
17.2k
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
949
7.20k
  {
950
7.20k
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
951
7.20k
  }
952
953
18.1k
_exit:
954
955
18.1k
  if (re_ast != NULL)
956
372
    yr_re_ast_destroy(re_ast);
957
958
18.1k
  if (remainder_re_ast != NULL)
959
1
    yr_re_ast_destroy(remainder_re_ast);
960
961
18.1k
  return result;
962
17.2k
}
963
964
static int wildcard_iterator(
965
    void* prefix,
966
    size_t prefix_len,
967
    void* _value,
968
    void* data)
969
1.27k
{
970
1.27k
  const char* identifier = (const char*) data;
971
972
  // If the identifier is prefixed by prefix, then it matches the wildcard.
973
1.27k
  if (!strncmp(prefix, identifier, prefix_len))
974
180
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
975
976
1.09k
  return ERROR_SUCCESS;
977
1.27k
}
978
979
int yr_parser_reduce_rule_declaration_phase_1(
980
    yyscan_t yyscanner,
981
    int32_t flags,
982
    const char* identifier,
983
    YR_ARENA_REF* rule_ref)
984
25.6k
{
985
25.6k
  int result;
986
25.6k
  YR_FIXUP* fixup;
987
25.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
988
989
25.6k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
990
25.6k
      compiler->arena,
991
25.6k
      YR_NAMESPACES_TABLE,
992
25.6k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
993
994
25.6k
  if (yr_hash_table_lookup_uint32(
995
25.6k
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
996
25.6k
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
997
15.5k
  {
998
    // A rule or variable with the same identifier already exists, return the
999
    // appropriate error.
1000
1001
15.5k
    yr_compiler_set_error_extra_info(compiler, identifier);
1002
15.5k
    return ERROR_DUPLICATED_IDENTIFIER;
1003
15.5k
  }
1004
1005
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1006
  // any of them are a prefix of the identifier being declared. If so, return
1007
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1008
10.0k
  result = yr_hash_table_iterate(
1009
10.0k
      compiler->wildcard_identifiers_table,
1010
10.0k
      ns->name,
1011
10.0k
      wildcard_iterator,
1012
10.0k
      (void*) identifier);
1013
1014
10.0k
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1015
180
  {
1016
    // This rule matches an existing wildcard rule set.
1017
180
    yr_compiler_set_error_extra_info(compiler, identifier);
1018
180
  }
1019
1020
10.0k
  FAIL_ON_ERROR(result);
1021
1022
9.88k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1023
9.88k
      compiler->arena,
1024
9.88k
      YR_RULES_TABLE,
1025
9.88k
      sizeof(YR_RULE),
1026
9.88k
      rule_ref,
1027
9.88k
      offsetof(YR_RULE, identifier),
1028
9.88k
      offsetof(YR_RULE, tags),
1029
9.88k
      offsetof(YR_RULE, strings),
1030
9.88k
      offsetof(YR_RULE, metas),
1031
9.88k
      offsetof(YR_RULE, ns),
1032
9.88k
      EOL));
1033
1034
9.88k
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1035
1036
9.88k
  YR_ARENA_REF ref;
1037
1038
9.88k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1039
1040
9.88k
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1041
9.88k
  rule->flags = flags;
1042
9.88k
  rule->ns = ns;
1043
9.88k
  rule->num_atoms = 0;
1044
1045
9.88k
  YR_ARENA_REF jmp_offset_ref;
1046
1047
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1048
9.88k
  compiler->current_rule_idx = compiler->next_rule_idx;
1049
9.88k
  compiler->next_rule_idx++;
1050
1051
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1052
  // disabled it skips over the rule's code and go straight to the next rule's
1053
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1054
  // is set to 0 as we don't know the jump target yet. When we finish
1055
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1056
  // the jump offset is set to its final value.
1057
1058
9.88k
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1059
9.88k
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1060
1061
9.88k
  FAIL_ON_ERROR(yr_arena_write_data(
1062
9.88k
      compiler->arena,
1063
9.88k
      YR_CODE_SECTION,
1064
9.88k
      &compiler->current_rule_idx,
1065
9.88k
      sizeof(compiler->current_rule_idx),
1066
9.88k
      NULL));
1067
1068
  // Create a fixup entry for the jump and push it in the stack
1069
9.88k
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1070
1071
9.88k
  if (fixup == NULL)
1072
0
    return ERROR_INSUFFICIENT_MEMORY;
1073
1074
9.88k
  fixup->ref = jmp_offset_ref;
1075
9.88k
  fixup->next = compiler->fixup_stack_head;
1076
9.88k
  compiler->fixup_stack_head = fixup;
1077
1078
  // Clean strings_table as we are starting to parse a new rule.
1079
9.88k
  yr_hash_table_clean(compiler->strings_table, NULL);
1080
1081
9.88k
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1082
9.88k
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1083
1084
9.88k
  return ERROR_SUCCESS;
1085
9.88k
}
1086
1087
int yr_parser_reduce_rule_declaration_phase_2(
1088
    yyscan_t yyscanner,
1089
    YR_ARENA_REF* rule_ref)
1090
388
{
1091
388
  uint32_t max_strings_per_rule;
1092
388
  uint32_t strings_in_rule = 0;
1093
1094
388
  YR_FIXUP* fixup;
1095
388
  YR_STRING* string;
1096
388
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1097
1098
388
  yr_get_configuration_uint32(
1099
388
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1100
1101
388
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1102
1103
  // Show warning if the rule is generating too many atoms. The warning is
1104
  // shown if the number of atoms is greater than 20 times the maximum number
1105
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1106
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1107
1108
388
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1109
20
  {
1110
20
    yywarning(yyscanner, "rule is slowing down scanning");
1111
20
  }
1112
1113
388
  yr_rule_strings_foreach(rule, string)
1114
1.55k
  {
1115
    // Only the heading fragment in a chain of strings (the one with
1116
    // chained_to == NULL) must be referenced. All other fragments
1117
    // are never marked as referenced.
1118
    //
1119
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1120
    // strings must always be referenced.
1121
1122
1.55k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1123
1.55k
        (STRING_IS_ANONYMOUS(string) ||
1124
301
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1125
20
    {
1126
20
      yr_compiler_set_error_extra_info(
1127
20
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1128
20
    }
1129
1130
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1131
    // that it will match anywhere.
1132
1.53k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1133
1.53k
        STRING_IS_FIXED_OFFSET(string))
1134
186
    {
1135
186
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1136
186
    }
1137
1138
1.53k
    strings_in_rule++;
1139
1140
1.53k
    if (strings_in_rule > max_strings_per_rule)
1141
0
    {
1142
0
      yr_compiler_set_error_extra_info(
1143
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1144
0
    }
1145
1.53k
  }
1146
1147
368
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1148
368
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1149
1150
368
  fixup = compiler->fixup_stack_head;
1151
1152
368
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1153
368
      compiler->arena, &fixup->ref);
1154
1155
368
  int32_t jmp_offset = yr_arena_get_current_offset(
1156
368
                           compiler->arena, YR_CODE_SECTION) -
1157
368
                       fixup->ref.offset + 1;
1158
1159
368
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1160
1161
  // Remove fixup from the stack.
1162
368
  compiler->fixup_stack_head = fixup->next;
1163
368
  yr_free(fixup);
1164
1165
  // We have finished parsing the current rule set current_rule_idx to
1166
  // UINT32_MAX indicating that we are not currently parsing a rule.
1167
368
  compiler->current_rule_idx = UINT32_MAX;
1168
1169
368
  return ERROR_SUCCESS;
1170
368
}
1171
1172
int yr_parser_reduce_string_identifier(
1173
    yyscan_t yyscanner,
1174
    const char* identifier,
1175
    uint8_t instruction,
1176
    uint64_t at_offset)
1177
18.8k
{
1178
18.8k
  YR_STRING* string;
1179
18.8k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1180
1181
18.8k
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1182
16.6k
  {
1183
16.6k
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1184
16.5k
    {
1185
16.5k
      yr_parser_emit_with_arg(
1186
16.5k
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1187
1188
16.5k
      yr_parser_emit(yyscanner, instruction, NULL);
1189
1190
16.5k
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1191
16.5k
          compiler, compiler->current_rule_idx);
1192
1193
16.5k
      yr_rule_strings_foreach(current_rule, string)
1194
750k
      {
1195
750k
        if (instruction != OP_FOUND)
1196
749k
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1197
1198
750k
        if (instruction == OP_FOUND_AT)
1199
539
        {
1200
          // Avoid overwriting any previous fixed offset
1201
539
          if (string->fixed_offset == YR_UNDEFINED)
1202
281
            string->fixed_offset = at_offset;
1203
1204
          // If a previous fixed offset was different, disable
1205
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1206
          // have room to store a single fixed offset value
1207
539
          if (string->fixed_offset != at_offset)
1208
258
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1209
539
        }
1210
749k
        else
1211
749k
        {
1212
749k
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1213
749k
        }
1214
750k
      }
1215
16.5k
    }
1216
130
    else
1217
130
    {
1218
      // Anonymous strings not allowed outside of a loop
1219
130
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1220
130
    }
1221
16.6k
  }
1222
2.20k
  else
1223
2.20k
  {
1224
2.20k
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1225
1226
2.16k
    FAIL_ON_ERROR(
1227
2.16k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1228
1229
2.16k
    if (instruction != OP_FOUND)
1230
1.94k
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1231
1232
2.16k
    if (instruction == OP_FOUND_AT)
1233
532
    {
1234
      // Avoid overwriting any previous fixed offset
1235
1236
532
      if (string->fixed_offset == YR_UNDEFINED)
1237
209
        string->fixed_offset = at_offset;
1238
1239
      // If a previous fixed offset was different, disable
1240
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1241
      // have room to store a single fixed offset value
1242
1243
532
      if (string->fixed_offset == YR_UNDEFINED ||
1244
532
          string->fixed_offset != at_offset)
1245
368
      {
1246
368
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1247
368
      }
1248
532
    }
1249
1.63k
    else
1250
1.63k
    {
1251
1.63k
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1252
1.63k
    }
1253
1254
2.16k
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1255
1256
2.16k
    string->flags |= STRING_FLAGS_REFERENCED;
1257
2.16k
  }
1258
1259
18.7k
  return ERROR_SUCCESS;
1260
18.8k
}
1261
1262
int yr_parser_reduce_meta_declaration(
1263
    yyscan_t yyscanner,
1264
    int32_t type,
1265
    const char* identifier,
1266
    const char* string,
1267
    int64_t integer,
1268
    YR_ARENA_REF* meta_ref)
1269
1.06k
{
1270
1.06k
  YR_ARENA_REF ref;
1271
1.06k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1272
1273
1.06k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1274
1.06k
      compiler->arena,
1275
1.06k
      YR_METAS_TABLE,
1276
1.06k
      sizeof(YR_META),
1277
1.06k
      meta_ref,
1278
1.06k
      offsetof(YR_META, identifier),
1279
1.06k
      offsetof(YR_META, string),
1280
1.06k
      EOL));
1281
1282
1.06k
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1283
1284
1.06k
  meta->type = type;
1285
1.06k
  meta->integer = integer;
1286
1287
1.06k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1288
1289
1.06k
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1290
1291
1.06k
  if (string != NULL)
1292
326
  {
1293
326
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1294
1295
326
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1296
326
  }
1297
738
  else
1298
738
  {
1299
738
    meta->string = NULL;
1300
738
  }
1301
1302
1.06k
  compiler->current_meta_idx++;
1303
1304
1.06k
  return ERROR_SUCCESS;
1305
1.06k
}
1306
1307
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1308
1.91k
{
1309
1.91k
  if (module_name->length == 0)
1310
164
    return false;
1311
1312
1.75k
  if (strlen(module_name->c_string) != module_name->length)
1313
289
    return false;
1314
1315
1.46k
  return true;
1316
1.75k
}
1317
1318
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1319
1.91k
{
1320
1.91k
  int result;
1321
1322
1.91k
  YR_ARENA_REF ref;
1323
1.91k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1324
1.91k
  YR_OBJECT* module_structure;
1325
1326
1.91k
  if (!_yr_parser_valid_module_name(module_name))
1327
453
  {
1328
453
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1329
1330
453
    return ERROR_INVALID_MODULE_NAME;
1331
453
  }
1332
1333
1.46k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1334
1.46k
      compiler->arena,
1335
1.46k
      YR_NAMESPACES_TABLE,
1336
1.46k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1337
1338
1.46k
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1339
1.46k
      compiler->objects_table, module_name->c_string, ns->name);
1340
1341
  // if module already imported, do nothing
1342
1343
1.46k
  if (module_structure != NULL)
1344
1.10k
    return ERROR_SUCCESS;
1345
1346
360
  FAIL_ON_ERROR(yr_object_create(
1347
360
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1348
1349
360
  FAIL_ON_ERROR(yr_hash_table_add(
1350
360
      compiler->objects_table,
1351
360
      module_name->c_string,
1352
360
      ns->name,
1353
360
      module_structure));
1354
1355
360
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1356
1357
360
  if (result == ERROR_UNKNOWN_MODULE)
1358
177
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1359
1360
360
  if (result != ERROR_SUCCESS)
1361
177
    return result;
1362
1363
183
  FAIL_ON_ERROR(
1364
183
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1365
1366
183
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1367
183
      yyscanner,
1368
183
      OP_IMPORT,
1369
183
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1370
183
      NULL,
1371
183
      NULL));
1372
1373
183
  return ERROR_SUCCESS;
1374
183
}
1375
1376
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1377
23.4k
{
1378
23.4k
  int opcode = 0;
1379
1380
23.4k
  switch (expression_type)
1381
23.4k
  {
1382
15.0k
  case EXPRESSION_TYPE_INTEGER:
1383
15.0k
    opcode = OP_INT_BEGIN;
1384
15.0k
    break;
1385
5.09k
  case EXPRESSION_TYPE_FLOAT:
1386
5.09k
    opcode = OP_DBL_BEGIN;
1387
5.09k
    break;
1388
3.28k
  case EXPRESSION_TYPE_STRING:
1389
3.28k
    opcode = OP_STR_BEGIN;
1390
3.28k
    break;
1391
0
  default:
1392
0
    assert(false);
1393
23.4k
  }
1394
1395
23.4k
  if (op[0] == '<')
1396
5.05k
  {
1397
5.05k
    if (op[1] == '=')
1398
3.77k
      opcode += _OP_LE;
1399
1.27k
    else
1400
1.27k
      opcode += _OP_LT;
1401
5.05k
  }
1402
18.3k
  else if (op[0] == '>')
1403
1.67k
  {
1404
1.67k
    if (op[1] == '=')
1405
660
      opcode += _OP_GE;
1406
1.01k
    else
1407
1.01k
      opcode += _OP_GT;
1408
1.67k
  }
1409
16.6k
  else if (op[1] == '=')
1410
1.46k
  {
1411
1.46k
    if (op[0] == '=')
1412
798
      opcode += _OP_EQ;
1413
670
    else
1414
670
      opcode += _OP_NEQ;
1415
1.46k
  }
1416
15.2k
  else if (op[0] == '+')
1417
4.84k
  {
1418
4.84k
    opcode += _OP_ADD;
1419
4.84k
  }
1420
10.3k
  else if (op[0] == '-')
1421
5.69k
  {
1422
5.69k
    opcode += _OP_SUB;
1423
5.69k
  }
1424
4.66k
  else if (op[0] == '*')
1425
2.78k
  {
1426
2.78k
    opcode += _OP_MUL;
1427
2.78k
  }
1428
1.88k
  else if (op[0] == '\\')
1429
1.88k
  {
1430
1.88k
    opcode += _OP_DIV;
1431
1.88k
  }
1432
1433
23.4k
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1434
23.4k
  {
1435
23.4k
    return opcode;
1436
23.4k
  }
1437
1438
1
  return OP_ERROR;
1439
23.4k
}
1440
1441
int yr_parser_reduce_operation(
1442
    yyscan_t yyscanner,
1443
    const char* op,
1444
    YR_EXPRESSION left_operand,
1445
    YR_EXPRESSION right_operand)
1446
23.6k
{
1447
23.6k
  int expression_type;
1448
1449
23.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1450
1451
23.6k
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1452
23.6k
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1453
23.6k
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1454
20.1k
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1455
20.1k
  {
1456
20.1k
    if (left_operand.type != right_operand.type)
1457
4.08k
    {
1458
      // One operand is double and the other is integer,
1459
      // cast the integer to double
1460
1461
4.08k
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1462
4.08k
          yyscanner,
1463
4.08k
          OP_INT_TO_DBL,
1464
4.08k
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1465
4.08k
          NULL,
1466
4.08k
          NULL));
1467
4.08k
    }
1468
1469
20.1k
    expression_type = EXPRESSION_TYPE_FLOAT;
1470
1471
20.1k
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1472
20.1k
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1473
15.0k
    {
1474
15.0k
      expression_type = EXPRESSION_TYPE_INTEGER;
1475
15.0k
    }
1476
1477
20.1k
    FAIL_ON_ERROR(yr_parser_emit(
1478
20.1k
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1479
20.1k
  }
1480
3.56k
  else if (
1481
3.56k
      left_operand.type == EXPRESSION_TYPE_STRING &&
1482
3.56k
      right_operand.type == EXPRESSION_TYPE_STRING)
1483
3.28k
  {
1484
3.28k
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1485
1486
3.28k
    if (opcode != OP_ERROR)
1487
3.28k
    {
1488
3.28k
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1489
3.28k
    }
1490
1
    else
1491
1
    {
1492
1
      yr_compiler_set_error_extra_info_fmt(
1493
1
          compiler, "strings don't support \"%s\" operation", op);
1494
1495
1
      return ERROR_WRONG_TYPE;
1496
1
    }
1497
3.28k
  }
1498
277
  else
1499
277
  {
1500
277
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1501
1502
277
    return ERROR_WRONG_TYPE;
1503
277
  }
1504
1505
23.4k
  return ERROR_SUCCESS;
1506
23.6k
}