Coverage Report

Created: 2025-07-23 06:46

/src/yara/libyara/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
47
#define todigit(x)                                        \
48
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
49
                             : ((uint8_t) (x - '0'))
50
51
int yr_parser_emit(
52
    yyscan_t yyscanner,
53
    uint8_t instruction,
54
    YR_ARENA_REF* instruction_ref)
55
98.7k
{
56
98.7k
  return yr_arena_write_data(
57
98.7k
      yyget_extra(yyscanner)->arena,
58
98.7k
      YR_CODE_SECTION,
59
98.7k
      &instruction,
60
98.7k
      sizeof(uint8_t),
61
98.7k
      instruction_ref);
62
98.7k
}
63
64
int yr_parser_emit_with_arg_double(
65
    yyscan_t yyscanner,
66
    uint8_t instruction,
67
    double argument,
68
    YR_ARENA_REF* instruction_ref,
69
    YR_ARENA_REF* argument_ref)
70
5.70k
{
71
5.70k
  int result = yr_arena_write_data(
72
5.70k
      yyget_extra(yyscanner)->arena,
73
5.70k
      YR_CODE_SECTION,
74
5.70k
      &instruction,
75
5.70k
      sizeof(uint8_t),
76
5.70k
      instruction_ref);
77
78
5.70k
  if (result == ERROR_SUCCESS)
79
5.70k
    result = yr_arena_write_data(
80
5.70k
        yyget_extra(yyscanner)->arena,
81
5.70k
        YR_CODE_SECTION,
82
5.70k
        &argument,
83
5.70k
        sizeof(double),
84
5.70k
        argument_ref);
85
86
5.70k
  return result;
87
5.70k
}
88
89
int yr_parser_emit_with_arg_int32(
90
    yyscan_t yyscanner,
91
    uint8_t instruction,
92
    int32_t argument,
93
    YR_ARENA_REF* instruction_ref,
94
    YR_ARENA_REF* argument_ref)
95
23.7k
{
96
23.7k
  int result = yr_arena_write_data(
97
23.7k
      yyget_extra(yyscanner)->arena,
98
23.7k
      YR_CODE_SECTION,
99
23.7k
      &instruction,
100
23.7k
      sizeof(uint8_t),
101
23.7k
      instruction_ref);
102
103
23.7k
  if (result == ERROR_SUCCESS)
104
23.7k
    result = yr_arena_write_data(
105
23.7k
        yyget_extra(yyscanner)->arena,
106
23.7k
        YR_CODE_SECTION,
107
23.7k
        &argument,
108
23.7k
        sizeof(int32_t),
109
23.7k
        argument_ref);
110
111
23.7k
  return result;
112
23.7k
}
113
114
int yr_parser_emit_with_arg(
115
    yyscan_t yyscanner,
116
    uint8_t instruction,
117
    int64_t argument,
118
    YR_ARENA_REF* instruction_ref,
119
    YR_ARENA_REF* argument_ref)
120
41.6k
{
121
41.6k
  int result = yr_arena_write_data(
122
41.6k
      yyget_extra(yyscanner)->arena,
123
41.6k
      YR_CODE_SECTION,
124
41.6k
      &instruction,
125
41.6k
      sizeof(uint8_t),
126
41.6k
      instruction_ref);
127
128
41.6k
  if (result == ERROR_SUCCESS)
129
41.6k
    result = yr_arena_write_data(
130
41.6k
        yyget_extra(yyscanner)->arena,
131
41.6k
        YR_CODE_SECTION,
132
41.6k
        &argument,
133
41.6k
        sizeof(int64_t),
134
41.6k
        argument_ref);
135
136
41.6k
  return result;
137
41.6k
}
138
139
int yr_parser_emit_with_arg_reloc(
140
    yyscan_t yyscanner,
141
    uint8_t instruction,
142
    void* argument,
143
    YR_ARENA_REF* instruction_ref,
144
    YR_ARENA_REF* argument_ref)
145
1.16M
{
146
1.16M
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
147
148
1.16M
  DECLARE_REFERENCE(void*, ptr) arg;
149
150
1.16M
  memset(&arg, 0, sizeof(arg));
151
1.16M
  arg.ptr = argument;
152
153
1.16M
  int result = yr_arena_write_data(
154
1.16M
      yyget_extra(yyscanner)->arena,
155
1.16M
      YR_CODE_SECTION,
156
1.16M
      &instruction,
157
1.16M
      sizeof(uint8_t),
158
1.16M
      instruction_ref);
159
160
1.16M
  if (result == ERROR_SUCCESS)
161
1.16M
    result = yr_arena_write_data(
162
1.16M
        yyget_extra(yyscanner)->arena,
163
1.16M
        YR_CODE_SECTION,
164
1.16M
        &arg,
165
1.16M
        sizeof(arg),
166
1.16M
        &ref);
167
168
1.16M
  if (result == ERROR_SUCCESS)
169
1.16M
    result = yr_arena_make_ptr_relocatable(
170
1.16M
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
171
172
1.16M
  if (argument_ref != NULL)
173
0
    *argument_ref = ref;
174
175
1.16M
  return result;
176
1.16M
}
177
178
int yr_parser_emit_pushes_for_strings(
179
    yyscan_t yyscanner,
180
    const char* identifier,
181
    int* count)
182
9.42k
{
183
9.42k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
184
185
9.42k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
186
9.42k
      compiler, compiler->current_rule_idx);
187
188
9.42k
  YR_STRING* string;
189
190
9.42k
  const char* string_identifier;
191
9.42k
  const char* target_identifier;
192
193
9.42k
  int matching = 0;
194
195
9.42k
  yr_rule_strings_foreach(current_rule, string)
196
1.16M
  {
197
    // Don't generate pushes for strings chained to another one, we are
198
    // only interested in non-chained strings or the head of the chain.
199
200
1.16M
    if (string->chained_to == NULL)
201
1.16M
    {
202
1.16M
      string_identifier = string->identifier;
203
1.16M
      target_identifier = identifier;
204
205
2.32M
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
206
2.32M
             *target_identifier == *string_identifier)
207
1.16M
      {
208
1.16M
        target_identifier++;
209
1.16M
        string_identifier++;
210
1.16M
      }
211
212
1.16M
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
213
1.16M
          *target_identifier == '*')
214
1.15M
      {
215
1.15M
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
216
217
1.15M
        string->flags |= STRING_FLAGS_REFERENCED;
218
1.15M
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
219
1.15M
        string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
220
1.15M
        matching++;
221
1.15M
      }
222
1.16M
    }
223
1.16M
  }
224
225
9.42k
  if (count != NULL)
226
9.42k
  {
227
9.42k
    *count = matching;
228
9.42k
  }
229
230
9.42k
  if (matching == 0)
231
54
  {
232
54
    yr_compiler_set_error_extra_info(
233
54
        compiler, identifier) return ERROR_UNDEFINED_STRING;
234
54
  }
235
236
9.36k
  return ERROR_SUCCESS;
237
9.42k
}
238
239
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
240
// prefix.
241
int yr_parser_emit_pushes_for_rules(
242
    yyscan_t yyscanner,
243
    const char* prefix,
244
    int* count)
245
748
{
246
748
  YR_COMPILER* compiler = yyget_extra(yyscanner);
247
248
  // Make sure the compiler is parsing a rule
249
748
  assert(compiler->current_rule_idx != UINT32_MAX);
250
251
748
  YR_RULE* rule;
252
748
  int matching = 0;
253
254
748
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
255
748
      compiler->arena,
256
748
      YR_NAMESPACES_TABLE,
257
748
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
258
259
  // Can't use yr_rules_foreach here as that requires the rules to have been
260
  // finalized (inserting a NULL rule at the end). This is done when
261
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
262
  // into the current position in the code arena. Obviously we aren't done
263
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
264
  // this I'm manually walking all the currently compiled rules (up to the
265
  // current rule index) and comparing identifiers to see if it is one we should
266
  // use.
267
  //
268
  // Further, we have to get compiler->current_rule_idx before we start because
269
  // if we emit an OP_PUSH_RULE
270
748
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
271
272
3.67k
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
273
2.92k
  {
274
    // Is rule->identifier prefixed by prefix?
275
2.92k
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
276
755
    {
277
755
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
278
755
          compiler->rules_table, rule->identifier, ns->name);
279
280
755
      if (rule_idx != UINT32_MAX)
281
755
      {
282
755
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
283
755
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
284
755
        matching++;
285
755
      }
286
755
    }
287
288
2.92k
    rule++;
289
2.92k
  }
290
291
748
  if (count != NULL)
292
748
  {
293
748
    *count = matching;
294
748
  }
295
296
748
  if (matching == 0)
297
88
  {
298
88
    yr_compiler_set_error_extra_info(compiler, prefix);
299
88
    return ERROR_UNDEFINED_IDENTIFIER;
300
88
  }
301
302
660
  return ERROR_SUCCESS;
303
748
}
304
305
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
306
59.3k
{
307
59.3k
  uint8_t opcode[9];
308
59.3k
  int opcode_len = 1;
309
310
59.3k
  if (argument == YR_UNDEFINED)
311
11.0k
  {
312
11.0k
    opcode[0] = OP_PUSH_U;
313
11.0k
  }
314
48.2k
  else if (argument <= 0xff)
315
44.6k
  {
316
44.6k
    opcode[0] = OP_PUSH_8;
317
44.6k
    opcode[1] = (uint8_t) argument;
318
44.6k
    opcode_len += sizeof(uint8_t);
319
44.6k
  }
320
3.56k
  else if (argument <= 0xffff)
321
1.53k
  {
322
1.53k
    opcode[0] = OP_PUSH_16;
323
1.53k
    uint16_t u = (uint16_t) argument;
324
1.53k
    memcpy(opcode + 1, &u, sizeof(uint16_t));
325
1.53k
    opcode_len += sizeof(uint16_t);
326
1.53k
  }
327
2.02k
  else if (argument <= 0xffffffff)
328
1.11k
  {
329
1.11k
    opcode[0] = OP_PUSH_32;
330
1.11k
    uint32_t u = (uint32_t) argument;
331
1.11k
    memcpy(opcode + 1, &u, sizeof(uint32_t));
332
1.11k
    opcode_len += sizeof(uint32_t);
333
1.11k
  }
334
910
  else
335
910
  {
336
910
    opcode[0] = OP_PUSH;
337
910
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
338
910
    opcode_len += sizeof(uint64_t);
339
910
  }
340
341
59.3k
  return yr_arena_write_data(
342
59.3k
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
343
59.3k
}
344
345
int yr_parser_check_types(
346
    YR_COMPILER* compiler,
347
    YR_OBJECT_FUNCTION* function,
348
    const char* actual_args_fmt)
349
554
{
350
554
  int i;
351
352
1.03k
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
353
1.03k
  {
354
1.03k
    if (function->prototypes[i].arguments_fmt == NULL)
355
4
      break;
356
357
1.03k
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
358
550
      return ERROR_SUCCESS;
359
1.03k
  }
360
361
4
  yr_compiler_set_error_extra_info(compiler, function->identifier)
362
363
4
      return ERROR_WRONG_ARGUMENTS;
364
554
}
365
366
int yr_parser_lookup_string(
367
    yyscan_t yyscanner,
368
    const char* identifier,
369
    YR_STRING** string)
370
1.83k
{
371
1.83k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
372
373
1.83k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
374
1.83k
      compiler, compiler->current_rule_idx);
375
376
1.83k
  yr_rule_strings_foreach(current_rule, *string)
377
2.35k
  {
378
    // If some string $a gets fragmented into multiple chained
379
    // strings, all those fragments have the same $a identifier
380
    // but we are interested in the heading fragment, which is
381
    // that with chained_to == NULL
382
383
2.35k
    if ((*string)->chained_to == NULL &&
384
2.35k
        strcmp((*string)->identifier, identifier) == 0)
385
1.78k
    {
386
1.78k
      return ERROR_SUCCESS;
387
1.78k
    }
388
2.35k
  }
389
390
47
  yr_compiler_set_error_extra_info(compiler, identifier)
391
392
47
      * string = NULL;
393
394
47
  return ERROR_UNDEFINED_STRING;
395
1.83k
}
396
397
////////////////////////////////////////////////////////////////////////////////
398
// Searches for a variable with the given identifier in the scope of the current
399
// "for" loop. In case of nested "for" loops the identifier is searched starting
400
// at the top-level loop and going down thorough the nested loops until the
401
// current one. This is ok because inner loops can not re-define an identifier
402
// already defined by an outer loop.
403
//
404
// If the variable is found, the return value is the position that the variable
405
// occupies among all the currently defined variables. If the variable doesn't
406
// exist the return value is -1.
407
//
408
// The function can receive a pointer to a YR_EXPRESSION that will populated
409
// with information about the variable if found. This pointer can be NULL if
410
// the caller is not interested in getting that information.
411
//
412
int yr_parser_lookup_loop_variable(
413
    yyscan_t yyscanner,
414
    const char* identifier,
415
    YR_EXPRESSION* expr)
416
16.3k
{
417
16.3k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
418
16.3k
  int i, j;
419
16.3k
  int var_offset = 0;
420
421
21.6k
  for (i = 0; i <= compiler->loop_index; i++)
422
12.5k
  {
423
12.5k
    var_offset += compiler->loop[i].vars_internal_count;
424
425
21.2k
    for (j = 0; j < compiler->loop[i].vars_count; j++)
426
16.0k
    {
427
16.0k
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
428
16.0k
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
429
7.24k
      {
430
7.24k
        if (expr != NULL)
431
7.21k
          *expr = compiler->loop[i].vars[j];
432
433
7.24k
        return var_offset + j;
434
7.24k
      }
435
16.0k
    }
436
437
5.26k
    var_offset += compiler->loop[i].vars_count;
438
5.26k
  }
439
440
9.11k
  return -1;
441
16.3k
}
442
443
static int _yr_parser_write_string(
444
    const char* identifier,
445
    YR_MODIFIER modifier,
446
    YR_COMPILER* compiler,
447
    SIZED_STRING* str,
448
    RE_AST* re_ast,
449
    YR_ARENA_REF* string_ref,
450
    int* min_atom_quality,
451
    int* num_atom)
452
35.6k
{
453
35.6k
  SIZED_STRING* literal_string;
454
35.6k
  YR_ATOM_LIST_ITEM* atom;
455
35.6k
  YR_ATOM_LIST_ITEM* atom_list = NULL;
456
457
35.6k
  int c, result;
458
35.6k
  int max_string_len;
459
35.6k
  bool free_literal = false;
460
461
35.6k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
462
35.6k
      compiler->arena,
463
35.6k
      YR_STRINGS_TABLE,
464
35.6k
      sizeof(YR_STRING),
465
35.6k
      string_ref,
466
35.6k
      offsetof(YR_STRING, identifier),
467
35.6k
      offsetof(YR_STRING, string),
468
35.6k
      offsetof(YR_STRING, chained_to),
469
35.6k
      EOL));
470
471
35.6k
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
472
35.6k
      compiler->arena, string_ref);
473
474
35.6k
  YR_ARENA_REF ref;
475
476
35.6k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
477
478
35.6k
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
479
35.6k
  string->rule_idx = compiler->current_rule_idx;
480
35.6k
  string->idx = compiler->current_string_idx;
481
35.6k
  string->fixed_offset = YR_UNDEFINED;
482
483
35.6k
  compiler->current_string_idx++;
484
485
35.6k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
486
35.6k
      modifier.flags & STRING_FLAGS_REGEXP ||
487
35.6k
      modifier.flags & STRING_FLAGS_BASE64 ||
488
35.6k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
489
33.5k
  {
490
33.5k
    literal_string = yr_re_ast_extract_literal(re_ast);
491
492
33.5k
    if (literal_string != NULL)
493
25.3k
      free_literal = true;
494
33.5k
  }
495
2.14k
  else
496
2.14k
  {
497
2.14k
    literal_string = str;
498
2.14k
  }
499
500
35.6k
  if (literal_string != NULL)
501
27.5k
  {
502
27.5k
    modifier.flags |= STRING_FLAGS_LITERAL;
503
504
27.5k
    result = _yr_compiler_store_data(
505
27.5k
        compiler,
506
27.5k
        literal_string->c_string,
507
27.5k
        literal_string->length + 1,  // +1 to include terminating NULL
508
27.5k
        &ref);
509
510
27.5k
    if (result != ERROR_SUCCESS)
511
0
      goto cleanup;
512
513
27.5k
    string->length = (uint32_t) literal_string->length;
514
27.5k
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
515
516
27.5k
    if (modifier.flags & STRING_FLAGS_WIDE)
517
1.33k
      max_string_len = string->length * 2;
518
26.1k
    else
519
26.1k
      max_string_len = string->length;
520
521
27.5k
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
522
21.2k
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
523
524
27.5k
    result = yr_atoms_extract_from_string(
525
27.5k
        &compiler->atoms_config,
526
27.5k
        (uint8_t*) literal_string->c_string,
527
27.5k
        (int32_t) literal_string->length,
528
27.5k
        modifier,
529
27.5k
        &atom_list,
530
27.5k
        min_atom_quality);
531
532
27.5k
    if (result != ERROR_SUCCESS)
533
0
      goto cleanup;
534
27.5k
  }
535
8.16k
  else
536
8.16k
  {
537
    // Non-literal strings can't be marked as fixed offset because once we
538
    // find a string atom in the scanned data we don't know the offset where
539
    // the string should start, as the non-literal strings can contain
540
    // variable-length portions.
541
8.16k
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
542
543
    // Save the position where the RE forward code starts for later reference.
544
8.16k
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
545
8.16k
        compiler->arena, YR_RE_CODE_SECTION);
546
547
    // Emit forwards code
548
8.16k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
549
550
8.16k
    if (result != ERROR_SUCCESS)
551
123
      goto cleanup;
552
553
    // Emit backwards code
554
8.04k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
555
556
8.04k
    if (result != ERROR_SUCCESS)
557
7
      goto cleanup;
558
559
    // Extract atoms from the regular expression.
560
8.03k
    result = yr_atoms_extract_from_re(
561
8.03k
        &compiler->atoms_config,
562
8.03k
        re_ast,
563
8.03k
        modifier,
564
8.03k
        &atom_list,
565
8.03k
        min_atom_quality);
566
567
8.03k
    if (result != ERROR_SUCCESS)
568
0
      goto cleanup;
569
570
    // If no atom was extracted let's add a zero-length atom.
571
8.03k
    if (atom_list == NULL)
572
2.90k
    {
573
2.90k
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
574
575
2.90k
      if (atom_list == NULL)
576
0
      {
577
0
        result = ERROR_INSUFFICIENT_MEMORY;
578
0
        goto cleanup;
579
0
      }
580
581
2.90k
      atom_list->atom.length = 0;
582
2.90k
      atom_list->backtrack = 0;
583
2.90k
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
584
2.90k
      atom_list->next = NULL;
585
586
2.90k
      yr_arena_ptr_to_ref(
587
2.90k
          compiler->arena,
588
2.90k
          yr_arena_get_ptr(
589
2.90k
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
590
2.90k
          &(atom_list->forward_code_ref));
591
2.90k
    }
592
8.03k
  }
593
594
35.5k
  string->flags = modifier.flags;
595
596
  // Add the string to Aho-Corasick automaton.
597
35.5k
  result = yr_ac_add_string(
598
35.5k
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
599
600
35.5k
  if (result != ERROR_SUCCESS)
601
0
    goto cleanup;
602
603
35.5k
  atom = atom_list;
604
35.5k
  c = 0;
605
606
2.76M
  while (atom != NULL)
607
2.72M
  {
608
2.72M
    atom = atom->next;
609
2.72M
    c++;
610
2.72M
  }
611
612
35.5k
  (*num_atom) += c;
613
614
35.6k
cleanup:
615
35.6k
  if (free_literal)
616
25.3k
    yr_free(literal_string);
617
618
35.6k
  if (atom_list != NULL)
619
35.5k
    yr_atoms_list_destroy(atom_list);
620
621
35.6k
  return result;
622
35.5k
}
623
624
static int _yr_parser_check_string_modifiers(
625
    yyscan_t yyscanner,
626
    YR_MODIFIER modifier)
627
30.6k
{
628
30.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
629
630
  // xor and nocase together is not implemented.
631
30.6k
  if (modifier.flags & STRING_FLAGS_XOR &&
632
30.6k
      modifier.flags & STRING_FLAGS_NO_CASE)
633
4
  {
634
4
    yr_compiler_set_error_extra_info(
635
4
        compiler, "invalid modifier combination: xor nocase");
636
4
    return ERROR_INVALID_MODIFIER;
637
4
  }
638
639
  // base64 and nocase together is not implemented.
640
30.6k
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
641
30.6k
      (modifier.flags & STRING_FLAGS_BASE64 ||
642
5.90k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
643
7
  {
644
7
    yr_compiler_set_error_extra_info(
645
7
        compiler,
646
7
        modifier.flags & STRING_FLAGS_BASE64
647
7
            ? "invalid modifier combination: base64 nocase"
648
7
            : "invalid modifier combination: base64wide nocase");
649
7
    return ERROR_INVALID_MODIFIER;
650
7
  }
651
652
  // base64 and fullword together is not implemented.
653
30.6k
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
654
30.6k
      (modifier.flags & STRING_FLAGS_BASE64 ||
655
123
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
656
0
  {
657
0
    yr_compiler_set_error_extra_info(
658
0
        compiler,
659
0
        modifier.flags & STRING_FLAGS_BASE64
660
0
            ? "invalid modifier combination: base64 fullword"
661
0
            : "invalid modifier combination: base64wide fullword");
662
0
    return ERROR_INVALID_MODIFIER;
663
0
  }
664
665
  // base64 and xor together is not implemented.
666
30.6k
  if (modifier.flags & STRING_FLAGS_XOR &&
667
30.6k
      (modifier.flags & STRING_FLAGS_BASE64 ||
668
812
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
669
7
  {
670
7
    yr_compiler_set_error_extra_info(
671
7
        compiler,
672
7
        modifier.flags & STRING_FLAGS_BASE64
673
7
            ? "invalid modifier combination: base64 xor"
674
7
            : "invalid modifier combination: base64wide xor");
675
7
    return ERROR_INVALID_MODIFIER;
676
7
  }
677
678
30.6k
  return ERROR_SUCCESS;
679
30.6k
}
680
681
int yr_parser_reduce_string_declaration(
682
    yyscan_t yyscanner,
683
    YR_MODIFIER modifier,
684
    const char* identifier,
685
    SIZED_STRING* str,
686
    YR_ARENA_REF* string_ref)
687
30.6k
{
688
30.6k
  int result = ERROR_SUCCESS;
689
30.6k
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
690
30.6k
  int atom_quality;
691
692
30.6k
  char message[512];
693
694
30.6k
  int32_t min_gap = 0;
695
30.6k
  int32_t max_gap = 0;
696
697
30.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
698
699
30.6k
  RE_AST* re_ast = NULL;
700
30.6k
  RE_AST* remainder_re_ast = NULL;
701
30.6k
  RE_ERROR re_error;
702
703
30.6k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
704
30.6k
      compiler, compiler->current_rule_idx);
705
706
  // Determine if a string with the same identifier was already defined
707
  // by searching for the identifier in strings_table.
708
30.6k
  uint32_t string_idx = yr_hash_table_lookup_uint32(
709
30.6k
      compiler->strings_table, identifier, NULL);
710
711
  // The string was already defined, return an error.
712
30.6k
  if (string_idx != UINT32_MAX)
713
14
  {
714
14
    yr_compiler_set_error_extra_info(compiler, identifier);
715
14
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
716
14
  }
717
718
  // Empty strings are not allowed.
719
30.6k
  if (str->length == 0)
720
1
  {
721
1
    yr_compiler_set_error_extra_info(compiler, identifier);
722
1
    return ERROR_EMPTY_STRING;
723
1
  }
724
725
30.6k
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
726
5.42k
    modifier.flags |= STRING_FLAGS_NO_CASE;
727
728
30.6k
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
729
311
    modifier.flags |= STRING_FLAGS_DOT_ALL;
730
731
  // Hex strings are always handled as DOT_ALL regexps.
732
30.6k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
733
1.35k
    modifier.flags |= STRING_FLAGS_DOT_ALL;
734
735
30.6k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
736
30.6k
      !(modifier.flags & STRING_FLAGS_BASE64 ||
737
29.4k
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
738
28.0k
  {
739
28.0k
    modifier.flags |= STRING_FLAGS_ASCII;
740
28.0k
  }
741
742
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
743
  // a single match for the string is enough. This is true in
744
  // most cases, except when the string count (#) and string offset (@)
745
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
746
  // initially, and unmarked later if required.
747
30.6k
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
748
749
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
750
  // need to be searched all over the file because the user is using the
751
  // "at" operator. The string must be searched at a fixed offset in the
752
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
753
  // and unmarked later if required.
754
30.6k
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
755
756
  // If string identifier is $ this is an anonymous string, if not add the
757
  // identifier to strings_table.
758
30.6k
  if (strcmp(identifier, "$") == 0)
759
29.1k
  {
760
29.1k
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
761
29.1k
  }
762
1.54k
  else
763
1.54k
  {
764
1.54k
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
765
1.54k
        compiler->strings_table,
766
1.54k
        identifier,
767
1.54k
        NULL,
768
1.54k
        compiler->current_string_idx));
769
1.54k
  }
770
771
  // Make sure that the the string does not have an invalid combination of
772
  // modifiers.
773
30.6k
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
774
775
30.6k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
776
30.6k
      modifier.flags & STRING_FLAGS_REGEXP ||
777
30.6k
      modifier.flags & STRING_FLAGS_BASE64 ||
778
30.6k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
779
28.5k
  {
780
28.5k
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
781
1.35k
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
782
27.1k
    else if (modifier.flags & STRING_FLAGS_REGEXP)
783
25.2k
    {
784
25.2k
      int flags = RE_PARSER_FLAG_NONE;
785
25.2k
      if (compiler->strict_escape)
786
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
787
25.2k
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
788
25.2k
    }
789
1.91k
    else
790
1.91k
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
791
792
28.5k
    if (result != ERROR_SUCCESS)
793
930
    {
794
930
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
795
0
      {
796
0
        yywarning(yyscanner, "unknown escape sequence");
797
0
      }
798
930
      else
799
930
      {
800
930
        snprintf(
801
930
            message,
802
930
            sizeof(message),
803
930
            "invalid %s \"%s\": %s",
804
930
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
805
930
                                                        : "regular expression",
806
930
            identifier,
807
930
            re_error.message);
808
809
930
        yr_compiler_set_error_extra_info(compiler, message);
810
930
        goto _exit;
811
930
      }
812
930
    }
813
814
27.5k
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
815
954
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
816
817
27.5k
    if (re_ast->flags & RE_FLAGS_GREEDY)
818
1.08k
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
819
820
    // Regular expressions in the strings section can't mix greedy and
821
    // ungreedy quantifiers like .* and .*?. That's because these regular
822
    // expressions can be matched forwards and/or backwards depending on the
823
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
824
    // be able to properly calculate the length of the match.
825
826
27.5k
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
827
27.5k
        (re_ast->flags & RE_FLAGS_UNGREEDY))
828
6
    {
829
6
      result = ERROR_INVALID_REGULAR_EXPRESSION;
830
831
6
      yr_compiler_set_error_extra_info(
832
6
          compiler,
833
6
          "greedy and ungreedy quantifiers can't be mixed in a regular "
834
6
          "expression");
835
836
6
      goto _exit;
837
6
    }
838
839
27.5k
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
840
2.11k
    {
841
2.11k
      yywarning(
842
2.11k
          yyscanner,
843
2.11k
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
844
2.11k
          "with a reasonable value for N",
845
2.11k
          identifier);
846
2.11k
    }
847
848
27.5k
    if (compiler->re_ast_callback != NULL)
849
0
    {
850
0
      compiler->re_ast_callback(
851
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
852
0
    }
853
854
27.5k
    *string_ref = YR_ARENA_NULL_REF;
855
856
60.9k
    while (re_ast != NULL)
857
33.5k
    {
858
33.5k
      YR_ARENA_REF ref;
859
860
33.5k
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
861
862
33.5k
      int32_t prev_min_gap = min_gap;
863
33.5k
      int32_t prev_max_gap = max_gap;
864
865
33.5k
      result = yr_re_ast_split_at_chaining_point(
866
33.5k
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
867
868
33.5k
      if (result != ERROR_SUCCESS)
869
0
        goto _exit;
870
871
33.5k
      result = _yr_parser_write_string(
872
33.5k
          identifier,
873
33.5k
          modifier,
874
33.5k
          compiler,
875
33.5k
          NULL,
876
33.5k
          re_ast,
877
33.5k
          &ref,
878
33.5k
          &atom_quality,
879
33.5k
          &current_rule->num_atoms);
880
881
33.5k
      if (result != ERROR_SUCCESS)
882
130
        goto _exit;
883
884
33.3k
      if (atom_quality < min_atom_quality)
885
26.3k
        min_atom_quality = atom_quality;
886
887
33.3k
      if (YR_ARENA_IS_NULL_REF(*string_ref))
888
27.4k
      {
889
        // This is the first string in the chain, the string reference
890
        // returned by this function must point to this string.
891
27.4k
        *string_ref = ref;
892
27.4k
      }
893
5.94k
      else
894
5.94k
      {
895
        // This is not the first string in the chain, set the appropriate
896
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
897
        // fields.
898
5.94k
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
899
5.94k
            compiler->arena,
900
5.94k
            YR_STRINGS_TABLE,
901
5.94k
            prev_string_idx * sizeof(YR_STRING));
902
903
5.94k
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
904
5.94k
            compiler->arena, &ref);
905
906
5.94k
        new_string->chained_to = prev_string;
907
5.94k
        new_string->chain_gap_min = prev_min_gap;
908
5.94k
        new_string->chain_gap_max = prev_max_gap;
909
910
        // A string chained to another one can't have a fixed offset, only the
911
        // head of the string chain can have a fixed offset.
912
5.94k
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
913
914
        // There is a previous string, but that string wasn't marked as part
915
        // of a chain because we can't do that until knowing there will be
916
        // another string, let's flag it now the we know.
917
5.94k
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
918
919
        // There is a previous string, so this string is part of a chain, but
920
        // there will be no more strings because there are no more AST to
921
        // split, which means that this is the chain's tail.
922
5.94k
        if (remainder_re_ast == NULL)
923
1.71k
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
924
1.71k
                               STRING_FLAGS_CHAIN_TAIL;
925
5.94k
      }
926
927
33.3k
      yr_re_ast_destroy(re_ast);
928
33.3k
      re_ast = remainder_re_ast;
929
33.3k
    }
930
27.5k
  }
931
2.14k
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
932
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
933
2.14k
  {
934
2.14k
    result = _yr_parser_write_string(
935
2.14k
        identifier,
936
2.14k
        modifier,
937
2.14k
        compiler,
938
2.14k
        str,
939
2.14k
        NULL,
940
2.14k
        string_ref,
941
2.14k
        &min_atom_quality,
942
2.14k
        &current_rule->num_atoms);
943
944
2.14k
    if (result != ERROR_SUCCESS)
945
0
      goto _exit;
946
2.14k
  }
947
948
29.5k
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
949
7.94k
  {
950
7.94k
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
951
7.94k
  }
952
953
30.6k
_exit:
954
955
30.6k
  if (re_ast != NULL)
956
478
    yr_re_ast_destroy(re_ast);
957
958
30.6k
  if (remainder_re_ast != NULL)
959
1
    yr_re_ast_destroy(remainder_re_ast);
960
961
30.6k
  return result;
962
29.5k
}
963
964
static int wildcard_iterator(
965
    void* prefix,
966
    size_t prefix_len,
967
    void* _value,
968
    void* data)
969
8.13k
{
970
8.13k
  const char* identifier = (const char*) data;
971
972
  // If the identifier is prefixed by prefix, then it matches the wildcard.
973
8.13k
  if (!strncmp(prefix, identifier, prefix_len))
974
268
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
975
976
7.86k
  return ERROR_SUCCESS;
977
8.13k
}
978
979
int yr_parser_reduce_rule_declaration_phase_1(
980
    yyscan_t yyscanner,
981
    int32_t flags,
982
    const char* identifier,
983
    YR_ARENA_REF* rule_ref)
984
39.2k
{
985
39.2k
  int result;
986
39.2k
  YR_FIXUP* fixup;
987
39.2k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
988
989
39.2k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
990
39.2k
      compiler->arena,
991
39.2k
      YR_NAMESPACES_TABLE,
992
39.2k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
993
994
39.2k
  if (yr_hash_table_lookup_uint32(
995
39.2k
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
996
39.2k
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
997
27.1k
  {
998
    // A rule or variable with the same identifier already exists, return the
999
    // appropriate error.
1000
1001
27.1k
    yr_compiler_set_error_extra_info(compiler, identifier);
1002
27.1k
    return ERROR_DUPLICATED_IDENTIFIER;
1003
27.1k
  }
1004
1005
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1006
  // any of them are a prefix of the identifier being declared. If so, return
1007
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1008
12.0k
  result = yr_hash_table_iterate(
1009
12.0k
      compiler->wildcard_identifiers_table,
1010
12.0k
      ns->name,
1011
12.0k
      wildcard_iterator,
1012
12.0k
      (void*) identifier);
1013
1014
12.0k
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1015
268
  {
1016
    // This rule matches an existing wildcard rule set.
1017
268
    yr_compiler_set_error_extra_info(compiler, identifier);
1018
268
  }
1019
1020
12.0k
  FAIL_ON_ERROR(result);
1021
1022
11.8k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1023
11.8k
      compiler->arena,
1024
11.8k
      YR_RULES_TABLE,
1025
11.8k
      sizeof(YR_RULE),
1026
11.8k
      rule_ref,
1027
11.8k
      offsetof(YR_RULE, identifier),
1028
11.8k
      offsetof(YR_RULE, tags),
1029
11.8k
      offsetof(YR_RULE, strings),
1030
11.8k
      offsetof(YR_RULE, metas),
1031
11.8k
      offsetof(YR_RULE, ns),
1032
11.8k
      EOL));
1033
1034
11.8k
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1035
1036
11.8k
  YR_ARENA_REF ref;
1037
1038
11.8k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1039
1040
11.8k
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1041
11.8k
  rule->flags = flags;
1042
11.8k
  rule->ns = ns;
1043
11.8k
  rule->num_atoms = 0;
1044
1045
11.8k
  YR_ARENA_REF jmp_offset_ref;
1046
1047
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1048
11.8k
  compiler->current_rule_idx = compiler->next_rule_idx;
1049
11.8k
  compiler->next_rule_idx++;
1050
1051
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1052
  // disabled it skips over the rule's code and go straight to the next rule's
1053
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1054
  // is set to 0 as we don't know the jump target yet. When we finish
1055
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1056
  // the jump offset is set to its final value.
1057
1058
11.8k
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1059
11.8k
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1060
1061
11.8k
  FAIL_ON_ERROR(yr_arena_write_data(
1062
11.8k
      compiler->arena,
1063
11.8k
      YR_CODE_SECTION,
1064
11.8k
      &compiler->current_rule_idx,
1065
11.8k
      sizeof(compiler->current_rule_idx),
1066
11.8k
      NULL));
1067
1068
  // Create a fixup entry for the jump and push it in the stack
1069
11.8k
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1070
1071
11.8k
  if (fixup == NULL)
1072
0
    return ERROR_INSUFFICIENT_MEMORY;
1073
1074
11.8k
  fixup->ref = jmp_offset_ref;
1075
11.8k
  fixup->next = compiler->fixup_stack_head;
1076
11.8k
  compiler->fixup_stack_head = fixup;
1077
1078
  // Clean strings_table as we are starting to parse a new rule.
1079
11.8k
  yr_hash_table_clean(compiler->strings_table, NULL);
1080
1081
11.8k
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1082
11.8k
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1083
1084
11.8k
  return ERROR_SUCCESS;
1085
11.8k
}
1086
1087
int yr_parser_reduce_rule_declaration_phase_2(
1088
    yyscan_t yyscanner,
1089
    YR_ARENA_REF* rule_ref)
1090
685
{
1091
685
  uint32_t max_strings_per_rule;
1092
685
  uint32_t strings_in_rule = 0;
1093
1094
685
  YR_FIXUP* fixup;
1095
685
  YR_STRING* string;
1096
685
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1097
1098
685
  yr_get_configuration_uint32(
1099
685
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1100
1101
685
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1102
1103
  // Show warning if the rule is generating too many atoms. The warning is
1104
  // shown if the number of atoms is greater than 20 times the maximum number
1105
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1106
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1107
1108
685
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1109
22
  {
1110
22
    yywarning(yyscanner, "rule is slowing down scanning");
1111
22
  }
1112
1113
685
  yr_rule_strings_foreach(rule, string)
1114
3.40k
  {
1115
    // Only the heading fragment in a chain of strings (the one with
1116
    // chained_to == NULL) must be referenced. All other fragments
1117
    // are never marked as referenced.
1118
    //
1119
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1120
    // strings must always be referenced.
1121
1122
3.40k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1123
3.40k
        (STRING_IS_ANONYMOUS(string) ||
1124
269
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1125
18
    {
1126
18
      yr_compiler_set_error_extra_info(
1127
18
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1128
18
    }
1129
1130
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1131
    // that it will match anywhere.
1132
3.38k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1133
3.38k
        STRING_IS_FIXED_OFFSET(string))
1134
174
    {
1135
174
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1136
174
    }
1137
1138
3.38k
    strings_in_rule++;
1139
1140
3.38k
    if (strings_in_rule > max_strings_per_rule)
1141
0
    {
1142
0
      yr_compiler_set_error_extra_info(
1143
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1144
0
    }
1145
3.38k
  }
1146
1147
667
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1148
667
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1149
1150
667
  fixup = compiler->fixup_stack_head;
1151
1152
667
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1153
667
      compiler->arena, &fixup->ref);
1154
1155
667
  int32_t jmp_offset = yr_arena_get_current_offset(
1156
667
                           compiler->arena, YR_CODE_SECTION) -
1157
667
                       fixup->ref.offset + 1;
1158
1159
667
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1160
1161
  // Remove fixup from the stack.
1162
667
  compiler->fixup_stack_head = fixup->next;
1163
667
  yr_free(fixup);
1164
1165
  // We have finished parsing the current rule set current_rule_idx to
1166
  // UINT32_MAX indicating that we are not currently parsing a rule.
1167
667
  compiler->current_rule_idx = UINT32_MAX;
1168
1169
667
  return ERROR_SUCCESS;
1170
667
}
1171
1172
int yr_parser_reduce_string_identifier(
1173
    yyscan_t yyscanner,
1174
    const char* identifier,
1175
    uint8_t instruction,
1176
    uint64_t at_offset)
1177
16.2k
{
1178
16.2k
  YR_STRING* string;
1179
16.2k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1180
1181
16.2k
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1182
14.3k
  {
1183
14.3k
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1184
14.2k
    {
1185
14.2k
      yr_parser_emit_with_arg(
1186
14.2k
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1187
1188
14.2k
      yr_parser_emit(yyscanner, instruction, NULL);
1189
1190
14.2k
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1191
14.2k
          compiler, compiler->current_rule_idx);
1192
1193
14.2k
      yr_rule_strings_foreach(current_rule, string)
1194
1.17M
      {
1195
1.17M
        if (instruction != OP_FOUND)
1196
1.17M
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1197
1198
1.17M
        if (instruction == OP_FOUND_AT)
1199
478
        {
1200
          // Avoid overwriting any previous fixed offset
1201
478
          if (string->fixed_offset == YR_UNDEFINED)
1202
35
            string->fixed_offset = at_offset;
1203
1204
          // If a previous fixed offset was different, disable
1205
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1206
          // have room to store a single fixed offset value
1207
478
          if (string->fixed_offset != at_offset)
1208
389
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1209
478
        }
1210
1.17M
        else
1211
1.17M
        {
1212
1.17M
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1213
1.17M
        }
1214
1.17M
      }
1215
14.2k
    }
1216
138
    else
1217
138
    {
1218
      // Anonymous strings not allowed outside of a loop
1219
138
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1220
138
    }
1221
14.3k
  }
1222
1.83k
  else
1223
1.83k
  {
1224
1.83k
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1225
1226
1.78k
    FAIL_ON_ERROR(
1227
1.78k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1228
1229
1.78k
    if (instruction != OP_FOUND)
1230
1.42k
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1231
1232
1.78k
    if (instruction == OP_FOUND_AT)
1233
548
    {
1234
      // Avoid overwriting any previous fixed offset
1235
1236
548
      if (string->fixed_offset == YR_UNDEFINED)
1237
188
        string->fixed_offset = at_offset;
1238
1239
      // If a previous fixed offset was different, disable
1240
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1241
      // have room to store a single fixed offset value
1242
1243
548
      if (string->fixed_offset == YR_UNDEFINED ||
1244
548
          string->fixed_offset != at_offset)
1245
369
      {
1246
369
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1247
369
      }
1248
548
    }
1249
1.23k
    else
1250
1.23k
    {
1251
1.23k
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1252
1.23k
    }
1253
1254
1.78k
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1255
1256
1.78k
    string->flags |= STRING_FLAGS_REFERENCED;
1257
1.78k
  }
1258
1259
16.0k
  return ERROR_SUCCESS;
1260
16.2k
}
1261
1262
int yr_parser_reduce_meta_declaration(
1263
    yyscan_t yyscanner,
1264
    int32_t type,
1265
    const char* identifier,
1266
    const char* string,
1267
    int64_t integer,
1268
    YR_ARENA_REF* meta_ref)
1269
887
{
1270
887
  YR_ARENA_REF ref;
1271
887
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1272
1273
887
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1274
887
      compiler->arena,
1275
887
      YR_METAS_TABLE,
1276
887
      sizeof(YR_META),
1277
887
      meta_ref,
1278
887
      offsetof(YR_META, identifier),
1279
887
      offsetof(YR_META, string),
1280
887
      EOL));
1281
1282
887
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1283
1284
887
  meta->type = type;
1285
887
  meta->integer = integer;
1286
1287
887
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1288
1289
887
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1290
1291
887
  if (string != NULL)
1292
244
  {
1293
244
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1294
1295
244
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1296
244
  }
1297
643
  else
1298
643
  {
1299
643
    meta->string = NULL;
1300
643
  }
1301
1302
887
  compiler->current_meta_idx++;
1303
1304
887
  return ERROR_SUCCESS;
1305
887
}
1306
1307
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1308
2.27k
{
1309
2.27k
  if (module_name->length == 0)
1310
175
    return false;
1311
1312
2.10k
  if (strlen(module_name->c_string) != module_name->length)
1313
180
    return false;
1314
1315
1.92k
  return true;
1316
2.10k
}
1317
1318
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1319
2.27k
{
1320
2.27k
  int result;
1321
1322
2.27k
  YR_ARENA_REF ref;
1323
2.27k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1324
2.27k
  YR_OBJECT* module_structure;
1325
1326
2.27k
  if (!_yr_parser_valid_module_name(module_name))
1327
355
  {
1328
355
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1329
1330
355
    return ERROR_INVALID_MODULE_NAME;
1331
355
  }
1332
1333
1.92k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1334
1.92k
      compiler->arena,
1335
1.92k
      YR_NAMESPACES_TABLE,
1336
1.92k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1337
1338
1.92k
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1339
1.92k
      compiler->objects_table, module_name->c_string, ns->name);
1340
1341
  // if module already imported, do nothing
1342
1343
1.92k
  if (module_structure != NULL)
1344
1.51k
    return ERROR_SUCCESS;
1345
1346
406
  FAIL_ON_ERROR(yr_object_create(
1347
406
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1348
1349
406
  FAIL_ON_ERROR(yr_hash_table_add(
1350
406
      compiler->objects_table,
1351
406
      module_name->c_string,
1352
406
      ns->name,
1353
406
      module_structure));
1354
1355
406
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1356
1357
406
  if (result == ERROR_UNKNOWN_MODULE)
1358
210
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1359
1360
406
  if (result != ERROR_SUCCESS)
1361
210
    return result;
1362
1363
196
  FAIL_ON_ERROR(
1364
196
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1365
1366
196
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1367
196
      yyscanner,
1368
196
      OP_IMPORT,
1369
196
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1370
196
      NULL,
1371
196
      NULL));
1372
1373
196
  return ERROR_SUCCESS;
1374
196
}
1375
1376
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1377
20.9k
{
1378
20.9k
  int opcode = 0;
1379
1380
20.9k
  switch (expression_type)
1381
20.9k
  {
1382
13.4k
  case EXPRESSION_TYPE_INTEGER:
1383
13.4k
    opcode = OP_INT_BEGIN;
1384
13.4k
    break;
1385
4.64k
  case EXPRESSION_TYPE_FLOAT:
1386
4.64k
    opcode = OP_DBL_BEGIN;
1387
4.64k
    break;
1388
2.92k
  case EXPRESSION_TYPE_STRING:
1389
2.92k
    opcode = OP_STR_BEGIN;
1390
2.92k
    break;
1391
0
  default:
1392
0
    assert(false);
1393
20.9k
  }
1394
1395
20.9k
  if (op[0] == '<')
1396
1.81k
  {
1397
1.81k
    if (op[1] == '=')
1398
784
      opcode += _OP_LE;
1399
1.03k
    else
1400
1.03k
      opcode += _OP_LT;
1401
1.81k
  }
1402
19.1k
  else if (op[0] == '>')
1403
1.69k
  {
1404
1.69k
    if (op[1] == '=')
1405
579
      opcode += _OP_GE;
1406
1.11k
    else
1407
1.11k
      opcode += _OP_GT;
1408
1.69k
  }
1409
17.4k
  else if (op[1] == '=')
1410
1.33k
  {
1411
1.33k
    if (op[0] == '=')
1412
664
      opcode += _OP_EQ;
1413
671
    else
1414
671
      opcode += _OP_NEQ;
1415
1.33k
  }
1416
16.1k
  else if (op[0] == '+')
1417
4.63k
  {
1418
4.63k
    opcode += _OP_ADD;
1419
4.63k
  }
1420
11.4k
  else if (op[0] == '-')
1421
7.74k
  {
1422
7.74k
    opcode += _OP_SUB;
1423
7.74k
  }
1424
3.73k
  else if (op[0] == '*')
1425
2.09k
  {
1426
2.09k
    opcode += _OP_MUL;
1427
2.09k
  }
1428
1.63k
  else if (op[0] == '\\')
1429
1.63k
  {
1430
1.63k
    opcode += _OP_DIV;
1431
1.63k
  }
1432
1433
20.9k
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1434
20.9k
  {
1435
20.9k
    return opcode;
1436
20.9k
  }
1437
1438
1
  return OP_ERROR;
1439
20.9k
}
1440
1441
int yr_parser_reduce_operation(
1442
    yyscan_t yyscanner,
1443
    const char* op,
1444
    YR_EXPRESSION left_operand,
1445
    YR_EXPRESSION right_operand)
1446
21.2k
{
1447
21.2k
  int expression_type;
1448
1449
21.2k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1450
1451
21.2k
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1452
21.2k
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1453
21.2k
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1454
18.0k
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1455
18.0k
  {
1456
18.0k
    if (left_operand.type != right_operand.type)
1457
3.64k
    {
1458
      // One operand is double and the other is integer,
1459
      // cast the integer to double
1460
1461
3.64k
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1462
3.64k
          yyscanner,
1463
3.64k
          OP_INT_TO_DBL,
1464
3.64k
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1465
3.64k
          NULL,
1466
3.64k
          NULL));
1467
3.64k
    }
1468
1469
18.0k
    expression_type = EXPRESSION_TYPE_FLOAT;
1470
1471
18.0k
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1472
18.0k
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1473
13.4k
    {
1474
13.4k
      expression_type = EXPRESSION_TYPE_INTEGER;
1475
13.4k
    }
1476
1477
18.0k
    FAIL_ON_ERROR(yr_parser_emit(
1478
18.0k
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1479
18.0k
  }
1480
3.20k
  else if (
1481
3.20k
      left_operand.type == EXPRESSION_TYPE_STRING &&
1482
3.20k
      right_operand.type == EXPRESSION_TYPE_STRING)
1483
2.92k
  {
1484
2.92k
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1485
1486
2.92k
    if (opcode != OP_ERROR)
1487
2.92k
    {
1488
2.92k
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1489
2.92k
    }
1490
1
    else
1491
1
    {
1492
1
      yr_compiler_set_error_extra_info_fmt(
1493
1
          compiler, "strings don't support \"%s\" operation", op);
1494
1495
1
      return ERROR_WRONG_TYPE;
1496
1
    }
1497
2.92k
  }
1498
279
  else
1499
279
  {
1500
279
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1501
1502
279
    return ERROR_WRONG_TYPE;
1503
279
  }
1504
1505
20.9k
  return ERROR_SUCCESS;
1506
21.2k
}