Coverage Report

Created: 2023-09-25 07:13

/src/yara/libyara/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
47
#define todigit(x)                                        \
48
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
49
                             : ((uint8_t) (x - '0'))
50
51
int yr_parser_emit(
52
    yyscan_t yyscanner,
53
    uint8_t instruction,
54
    YR_ARENA_REF* instruction_ref)
55
58.4k
{
56
58.4k
  return yr_arena_write_data(
57
58.4k
      yyget_extra(yyscanner)->arena,
58
58.4k
      YR_CODE_SECTION,
59
58.4k
      &instruction,
60
58.4k
      sizeof(uint8_t),
61
58.4k
      instruction_ref);
62
58.4k
}
63
64
int yr_parser_emit_with_arg_double(
65
    yyscan_t yyscanner,
66
    uint8_t instruction,
67
    double argument,
68
    YR_ARENA_REF* instruction_ref,
69
    YR_ARENA_REF* argument_ref)
70
1.24k
{
71
1.24k
  int result = yr_arena_write_data(
72
1.24k
      yyget_extra(yyscanner)->arena,
73
1.24k
      YR_CODE_SECTION,
74
1.24k
      &instruction,
75
1.24k
      sizeof(uint8_t),
76
1.24k
      instruction_ref);
77
78
1.24k
  if (result == ERROR_SUCCESS)
79
1.24k
    result = yr_arena_write_data(
80
1.24k
        yyget_extra(yyscanner)->arena,
81
1.24k
        YR_CODE_SECTION,
82
1.24k
        &argument,
83
1.24k
        sizeof(double),
84
1.24k
        argument_ref);
85
86
1.24k
  return result;
87
1.24k
}
88
89
int yr_parser_emit_with_arg_int32(
90
    yyscan_t yyscanner,
91
    uint8_t instruction,
92
    int32_t argument,
93
    YR_ARENA_REF* instruction_ref,
94
    YR_ARENA_REF* argument_ref)
95
17.3k
{
96
17.3k
  int result = yr_arena_write_data(
97
17.3k
      yyget_extra(yyscanner)->arena,
98
17.3k
      YR_CODE_SECTION,
99
17.3k
      &instruction,
100
17.3k
      sizeof(uint8_t),
101
17.3k
      instruction_ref);
102
103
17.3k
  if (result == ERROR_SUCCESS)
104
17.3k
    result = yr_arena_write_data(
105
17.3k
        yyget_extra(yyscanner)->arena,
106
17.3k
        YR_CODE_SECTION,
107
17.3k
        &argument,
108
17.3k
        sizeof(int32_t),
109
17.3k
        argument_ref);
110
111
17.3k
  return result;
112
17.3k
}
113
114
int yr_parser_emit_with_arg(
115
    yyscan_t yyscanner,
116
    uint8_t instruction,
117
    int64_t argument,
118
    YR_ARENA_REF* instruction_ref,
119
    YR_ARENA_REF* argument_ref)
120
36.6k
{
121
36.6k
  int result = yr_arena_write_data(
122
36.6k
      yyget_extra(yyscanner)->arena,
123
36.6k
      YR_CODE_SECTION,
124
36.6k
      &instruction,
125
36.6k
      sizeof(uint8_t),
126
36.6k
      instruction_ref);
127
128
36.6k
  if (result == ERROR_SUCCESS)
129
36.6k
    result = yr_arena_write_data(
130
36.6k
        yyget_extra(yyscanner)->arena,
131
36.6k
        YR_CODE_SECTION,
132
36.6k
        &argument,
133
36.6k
        sizeof(int64_t),
134
36.6k
        argument_ref);
135
136
36.6k
  return result;
137
36.6k
}
138
139
int yr_parser_emit_with_arg_reloc(
140
    yyscan_t yyscanner,
141
    uint8_t instruction,
142
    void* argument,
143
    YR_ARENA_REF* instruction_ref,
144
    YR_ARENA_REF* argument_ref)
145
412k
{
146
412k
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
147
148
412k
  DECLARE_REFERENCE(void*, ptr) arg;
149
150
412k
  memset(&arg, 0, sizeof(arg));
151
412k
  arg.ptr = argument;
152
153
412k
  int result = yr_arena_write_data(
154
412k
      yyget_extra(yyscanner)->arena,
155
412k
      YR_CODE_SECTION,
156
412k
      &instruction,
157
412k
      sizeof(uint8_t),
158
412k
      instruction_ref);
159
160
412k
  if (result == ERROR_SUCCESS)
161
412k
    result = yr_arena_write_data(
162
412k
        yyget_extra(yyscanner)->arena,
163
412k
        YR_CODE_SECTION,
164
412k
        &arg,
165
412k
        sizeof(arg),
166
412k
        &ref);
167
168
412k
  if (result == ERROR_SUCCESS)
169
412k
    result = yr_arena_make_ptr_relocatable(
170
412k
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
171
172
412k
  if (argument_ref != NULL)
173
0
    *argument_ref = ref;
174
175
412k
  return result;
176
412k
}
177
178
int yr_parser_emit_pushes_for_strings(
179
    yyscan_t yyscanner,
180
    const char* identifier,
181
    int* count)
182
8.15k
{
183
8.15k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
184
185
8.15k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
186
8.15k
      compiler, compiler->current_rule_idx);
187
188
8.15k
  YR_STRING* string;
189
190
8.15k
  const char* string_identifier;
191
8.15k
  const char* target_identifier;
192
193
8.15k
  int matching = 0;
194
195
8.15k
  yr_rule_strings_foreach(current_rule, string)
196
412k
  {
197
    // Don't generate pushes for strings chained to another one, we are
198
    // only interested in non-chained strings or the head of the chain.
199
200
412k
    if (string->chained_to == NULL)
201
410k
    {
202
410k
      string_identifier = string->identifier;
203
410k
      target_identifier = identifier;
204
205
820k
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
206
820k
             *target_identifier == *string_identifier)
207
410k
      {
208
410k
        target_identifier++;
209
410k
        string_identifier++;
210
410k
      }
211
212
410k
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
213
410k
          *target_identifier == '*')
214
408k
      {
215
408k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
216
217
408k
        string->flags |= STRING_FLAGS_REFERENCED;
218
408k
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
219
408k
        string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
220
408k
        matching++;
221
408k
      }
222
410k
    }
223
412k
  }
224
225
8.15k
  if (count != NULL)
226
8.15k
  {
227
8.15k
    *count = matching;
228
8.15k
  }
229
230
8.15k
  if (matching == 0)
231
5
  {
232
5
    yr_compiler_set_error_extra_info(
233
5
        compiler, identifier) return ERROR_UNDEFINED_STRING;
234
5
  }
235
236
8.15k
  return ERROR_SUCCESS;
237
8.15k
}
238
239
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
240
// prefix.
241
int yr_parser_emit_pushes_for_rules(
242
    yyscan_t yyscanner,
243
    const char* prefix,
244
    int* count)
245
633
{
246
633
  YR_COMPILER* compiler = yyget_extra(yyscanner);
247
248
  // Make sure the compiler is parsing a rule
249
633
  assert(compiler->current_rule_idx != UINT32_MAX);
250
251
633
  YR_RULE* rule;
252
633
  int matching = 0;
253
254
633
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
255
633
      compiler->arena,
256
633
      YR_NAMESPACES_TABLE,
257
633
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
258
259
  // Can't use yr_rules_foreach here as that requires the rules to have been
260
  // finalized (inserting a NULL rule at the end). This is done when
261
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
262
  // into the current position in the code arena. Obviously we aren't done
263
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
264
  // this I'm manually walking all the currently compiled rules (up to the
265
  // current rule index) and comparing identifiers to see if it is one we should
266
  // use.
267
  //
268
  // Further, we have to get compiler->current_rule_idx before we start because
269
  // if we emit an OP_PUSH_RULE
270
633
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
271
272
4.39k
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
273
3.75k
  {
274
    // Is rule->identifier prefixed by prefix?
275
3.75k
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
276
1.30k
    {
277
1.30k
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
278
1.30k
          compiler->rules_table, rule->identifier, ns->name);
279
280
1.30k
      if (rule_idx != UINT32_MAX)
281
1.30k
      {
282
1.30k
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
283
1.30k
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
284
1.30k
        matching++;
285
1.30k
      }
286
1.30k
    }
287
288
3.75k
    rule++;
289
3.75k
  }
290
291
633
  if (count != NULL)
292
633
  {
293
633
    *count = matching;
294
633
  }
295
296
633
  if (matching == 0)
297
39
  {
298
39
    yr_compiler_set_error_extra_info(compiler, prefix);
299
39
    return ERROR_UNDEFINED_IDENTIFIER;
300
39
  }
301
302
594
  return ERROR_SUCCESS;
303
633
}
304
305
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
306
45.9k
{
307
45.9k
  uint8_t opcode[9];
308
45.9k
  int opcode_len = 1;
309
310
45.9k
  if (argument == YR_UNDEFINED)
311
14.6k
  {
312
14.6k
    opcode[0] = OP_PUSH_U;
313
14.6k
  }
314
31.3k
  else if (argument <= 0xff)
315
28.6k
  {
316
28.6k
    opcode[0] = OP_PUSH_8;
317
28.6k
    opcode[1] = (uint8_t) argument;
318
28.6k
    opcode_len += sizeof(uint8_t);
319
28.6k
  }
320
2.68k
  else if (argument <= 0xffff)
321
1.03k
  {
322
1.03k
    opcode[0] = OP_PUSH_16;
323
1.03k
    uint16_t u = (uint16_t) argument;
324
1.03k
    memcpy(opcode + 1, &u, sizeof(uint16_t));
325
1.03k
    opcode_len += sizeof(uint16_t);
326
1.03k
  }
327
1.64k
  else if (argument <= 0xffffffff)
328
842
  {
329
842
    opcode[0] = OP_PUSH_32;
330
842
    uint32_t u = (uint32_t) argument;
331
842
    memcpy(opcode + 1, &u, sizeof(uint32_t));
332
842
    opcode_len += sizeof(uint32_t);
333
842
  }
334
805
  else
335
805
  {
336
805
    opcode[0] = OP_PUSH;
337
805
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
338
805
    opcode_len += sizeof(uint64_t);
339
805
  }
340
341
45.9k
  return yr_arena_write_data(
342
45.9k
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
343
45.9k
}
344
345
int yr_parser_check_types(
346
    YR_COMPILER* compiler,
347
    YR_OBJECT_FUNCTION* function,
348
    const char* actual_args_fmt)
349
299
{
350
299
  int i;
351
352
525
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
353
525
  {
354
525
    if (function->prototypes[i].arguments_fmt == NULL)
355
0
      break;
356
357
525
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
358
299
      return ERROR_SUCCESS;
359
525
  }
360
361
0
  yr_compiler_set_error_extra_info(compiler, function->identifier)
362
363
0
      return ERROR_WRONG_ARGUMENTS;
364
299
}
365
366
int yr_parser_lookup_string(
367
    yyscan_t yyscanner,
368
    const char* identifier,
369
    YR_STRING** string)
370
373
{
371
373
  YR_COMPILER* compiler = yyget_extra(yyscanner);
372
373
373
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
374
373
      compiler, compiler->current_rule_idx);
375
376
373
  yr_rule_strings_foreach(current_rule, *string)
377
485
  {
378
    // If some string $a gets fragmented into multiple chained
379
    // strings, all those fragments have the same $a identifier
380
    // but we are interested in the heading fragment, which is
381
    // that with chained_to == NULL
382
383
485
    if ((*string)->chained_to == NULL &&
384
485
        strcmp((*string)->identifier, identifier) == 0)
385
343
    {
386
343
      return ERROR_SUCCESS;
387
343
    }
388
485
  }
389
390
30
  yr_compiler_set_error_extra_info(compiler, identifier)
391
392
30
      * string = NULL;
393
394
30
  return ERROR_UNDEFINED_STRING;
395
373
}
396
397
////////////////////////////////////////////////////////////////////////////////
398
// Searches for a variable with the given identifier in the scope of the current
399
// "for" loop. In case of nested "for" loops the identifier is searched starting
400
// at the top-level loop and going down thorough the nested loops until the
401
// current one. This is ok because inner loops can not re-define an identifier
402
// already defined by an outer loop.
403
//
404
// If the variable is found, the return value is the position that the variable
405
// occupies among all the currently defined variables. If the variable doesn't
406
// exist the return value is -1.
407
//
408
// The function can receive a pointer to a YR_EXPRESSION that will populated
409
// with information about the variable if found. This pointer can be NULL if
410
// the caller is not interested in getting that information.
411
//
412
int yr_parser_lookup_loop_variable(
413
    yyscan_t yyscanner,
414
    const char* identifier,
415
    YR_EXPRESSION* expr)
416
15.0k
{
417
15.0k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
418
15.0k
  int i, j;
419
15.0k
  int var_offset = 0;
420
421
18.3k
  for (i = 0; i <= compiler->loop_index; i++)
422
12.7k
  {
423
12.7k
    var_offset += compiler->loop[i].vars_internal_count;
424
425
22.2k
    for (j = 0; j < compiler->loop[i].vars_count; j++)
426
18.9k
    {
427
18.9k
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
428
18.9k
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
429
9.45k
      {
430
9.45k
        if (expr != NULL)
431
9.35k
          *expr = compiler->loop[i].vars[j];
432
433
9.45k
        return var_offset + j;
434
9.45k
      }
435
18.9k
    }
436
437
3.30k
    var_offset += compiler->loop[i].vars_count;
438
3.30k
  }
439
440
5.59k
  return -1;
441
15.0k
}
442
443
static int _yr_parser_write_string(
444
    const char* identifier,
445
    YR_MODIFIER modifier,
446
    YR_COMPILER* compiler,
447
    SIZED_STRING* str,
448
    RE_AST* re_ast,
449
    YR_ARENA_REF* string_ref,
450
    int* min_atom_quality,
451
    int* num_atom)
452
20.1k
{
453
20.1k
  SIZED_STRING* literal_string;
454
20.1k
  YR_ATOM_LIST_ITEM* atom;
455
20.1k
  YR_ATOM_LIST_ITEM* atom_list = NULL;
456
457
20.1k
  int c, result;
458
20.1k
  int max_string_len;
459
20.1k
  bool free_literal = false;
460
461
20.1k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
462
20.1k
      compiler->arena,
463
20.1k
      YR_STRINGS_TABLE,
464
20.1k
      sizeof(YR_STRING),
465
20.1k
      string_ref,
466
20.1k
      offsetof(YR_STRING, identifier),
467
20.1k
      offsetof(YR_STRING, string),
468
20.1k
      offsetof(YR_STRING, chained_to),
469
20.1k
      EOL));
470
471
20.1k
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
472
20.1k
      compiler->arena, string_ref);
473
474
20.1k
  YR_ARENA_REF ref;
475
476
20.1k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
477
478
20.1k
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
479
480
20.1k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
481
20.1k
      modifier.flags & STRING_FLAGS_REGEXP ||
482
20.1k
      modifier.flags & STRING_FLAGS_BASE64 ||
483
20.1k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
484
19.2k
  {
485
19.2k
    literal_string = yr_re_ast_extract_literal(re_ast);
486
487
19.2k
    if (literal_string != NULL)
488
13.8k
      free_literal = true;
489
19.2k
  }
490
948
  else
491
948
  {
492
948
    literal_string = str;
493
948
  }
494
495
20.1k
  if (literal_string != NULL)
496
14.7k
  {
497
14.7k
    modifier.flags |= STRING_FLAGS_LITERAL;
498
499
14.7k
    result = _yr_compiler_store_data(
500
14.7k
        compiler,
501
14.7k
        literal_string->c_string,
502
14.7k
        literal_string->length + 1,  // +1 to include terminating NULL
503
14.7k
        &ref);
504
505
14.7k
    string->length = (uint32_t) literal_string->length;
506
14.7k
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
507
508
14.7k
    if (result == ERROR_SUCCESS)
509
14.7k
    {
510
14.7k
      result = yr_atoms_extract_from_string(
511
14.7k
          &compiler->atoms_config,
512
14.7k
          (uint8_t*) literal_string->c_string,
513
14.7k
          (int32_t) literal_string->length,
514
14.7k
          modifier,
515
14.7k
          &atom_list,
516
14.7k
          min_atom_quality);
517
14.7k
    }
518
14.7k
  }
519
5.37k
  else
520
5.37k
  {
521
    // Non-literal strings can't be marked as fixed offset because once we
522
    // find a string atom in the scanned data we don't know the offset where
523
    // the string should start, as the non-literal strings can contain
524
    // variable-length portions.
525
5.37k
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
526
527
    // Emit forwards code
528
5.37k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
529
530
    // Emit backwards code
531
5.37k
    if (result == ERROR_SUCCESS)
532
5.30k
      result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
533
534
5.37k
    if (result == ERROR_SUCCESS)
535
5.30k
      result = yr_atoms_extract_from_re(
536
5.30k
          &compiler->atoms_config,
537
5.30k
          re_ast,
538
5.30k
          modifier,
539
5.30k
          &atom_list,
540
5.30k
          min_atom_quality);
541
5.37k
  }
542
543
20.1k
  string->flags = modifier.flags;
544
20.1k
  string->rule_idx = compiler->current_rule_idx;
545
20.1k
  string->idx = compiler->current_string_idx;
546
20.1k
  string->fixed_offset = YR_UNDEFINED;
547
548
20.1k
  if (result == ERROR_SUCCESS)
549
20.1k
  {
550
    // Add the string to Aho-Corasick automaton.
551
20.1k
    result = yr_ac_add_string(
552
20.1k
        compiler->automaton,
553
20.1k
        string,
554
20.1k
        compiler->current_string_idx,
555
20.1k
        atom_list,
556
20.1k
        compiler->arena);
557
20.1k
  }
558
559
20.1k
  if (modifier.flags & STRING_FLAGS_LITERAL)
560
14.7k
  {
561
14.7k
    if (modifier.flags & STRING_FLAGS_WIDE)
562
2.07k
      max_string_len = string->length * 2;
563
12.7k
    else
564
12.7k
      max_string_len = string->length;
565
566
14.7k
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
567
10.7k
      string->flags |= STRING_FLAGS_FITS_IN_ATOM;
568
14.7k
  }
569
570
20.1k
  atom = atom_list;
571
20.1k
  c = 0;
572
573
5.39M
  while (atom != NULL)
574
5.37M
  {
575
5.37M
    atom = atom->next;
576
5.37M
    c++;
577
5.37M
  }
578
579
20.1k
  (*num_atom) += c;
580
581
20.1k
  compiler->current_string_idx++;
582
583
20.1k
  if (free_literal)
584
13.8k
    yr_free(literal_string);
585
586
20.1k
  if (atom_list != NULL)
587
20.1k
    yr_atoms_list_destroy(atom_list);
588
589
20.1k
  return result;
590
20.1k
}
591
592
static int _yr_parser_check_string_modifiers(
593
    yyscan_t yyscanner,
594
    YR_MODIFIER modifier)
595
16.8k
{
596
16.8k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
597
598
  // xor and nocase together is not implemented.
599
16.8k
  if (modifier.flags & STRING_FLAGS_XOR &&
600
16.8k
      modifier.flags & STRING_FLAGS_NO_CASE)
601
0
  {
602
0
    yr_compiler_set_error_extra_info(
603
0
        compiler, "invalid modifier combination: xor nocase");
604
0
    return ERROR_INVALID_MODIFIER;
605
0
  }
606
607
  // base64 and nocase together is not implemented.
608
16.8k
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
609
16.8k
      (modifier.flags & STRING_FLAGS_BASE64 ||
610
2.84k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
611
0
  {
612
0
    yr_compiler_set_error_extra_info(
613
0
        compiler,
614
0
        modifier.flags & STRING_FLAGS_BASE64
615
0
            ? "invalid modifier combination: base64 nocase"
616
0
            : "invalid modifier combination: base64wide nocase");
617
0
    return ERROR_INVALID_MODIFIER;
618
0
  }
619
620
  // base64 and fullword together is not implemented.
621
16.8k
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
622
16.8k
      (modifier.flags & STRING_FLAGS_BASE64 ||
623
58
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
624
2
  {
625
2
    yr_compiler_set_error_extra_info(
626
2
        compiler,
627
2
        modifier.flags & STRING_FLAGS_BASE64
628
2
            ? "invalid modifier combination: base64 fullword"
629
2
            : "invalid modifier combination: base64wide fullword");
630
2
    return ERROR_INVALID_MODIFIER;
631
2
  }
632
633
  // base64 and xor together is not implemented.
634
16.8k
  if (modifier.flags & STRING_FLAGS_XOR &&
635
16.8k
      (modifier.flags & STRING_FLAGS_BASE64 ||
636
413
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
637
0
  {
638
0
    yr_compiler_set_error_extra_info(
639
0
        compiler,
640
0
        modifier.flags & STRING_FLAGS_BASE64
641
0
            ? "invalid modifier combination: base64 xor"
642
0
            : "invalid modifier combination: base64wide xor");
643
0
    return ERROR_INVALID_MODIFIER;
644
0
  }
645
646
16.8k
  return ERROR_SUCCESS;
647
16.8k
}
648
649
int yr_parser_reduce_string_declaration(
650
    yyscan_t yyscanner,
651
    YR_MODIFIER modifier,
652
    const char* identifier,
653
    SIZED_STRING* str,
654
    YR_ARENA_REF* string_ref)
655
16.8k
{
656
16.8k
  int result = ERROR_SUCCESS;
657
16.8k
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
658
16.8k
  int atom_quality;
659
660
16.8k
  char message[512];
661
662
16.8k
  int32_t min_gap = 0;
663
16.8k
  int32_t max_gap = 0;
664
665
16.8k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
666
667
16.8k
  RE_AST* re_ast = NULL;
668
16.8k
  RE_AST* remainder_re_ast = NULL;
669
16.8k
  RE_ERROR re_error;
670
671
16.8k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
672
16.8k
      compiler, compiler->current_rule_idx);
673
674
  // Determine if a string with the same identifier was already defined
675
  // by searching for the identifier in strings_table.
676
16.8k
  uint32_t string_idx = yr_hash_table_lookup_uint32(
677
16.8k
      compiler->strings_table, identifier, NULL);
678
679
  // The string was already defined, return an error.
680
16.8k
  if (string_idx != UINT32_MAX)
681
10
  {
682
10
    yr_compiler_set_error_extra_info(compiler, identifier);
683
10
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
684
10
  }
685
686
  // Empty strings are not allowed.
687
16.8k
  if (str->length == 0)
688
2
  {
689
2
    yr_compiler_set_error_extra_info(compiler, identifier);
690
2
    return ERROR_EMPTY_STRING;
691
2
  }
692
693
16.8k
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
694
2.58k
    modifier.flags |= STRING_FLAGS_NO_CASE;
695
696
16.8k
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
697
422
    modifier.flags |= STRING_FLAGS_DOT_ALL;
698
699
  // Hex strings are always handled as DOT_ALL regexps.
700
16.8k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
701
440
    modifier.flags |= STRING_FLAGS_DOT_ALL;
702
703
16.8k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
704
16.8k
      !(modifier.flags & STRING_FLAGS_BASE64 ||
705
15.7k
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
706
14.8k
  {
707
14.8k
    modifier.flags |= STRING_FLAGS_ASCII;
708
14.8k
  }
709
710
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
711
  // a single match for the string is enough. This is true in
712
  // most cases, except when the string count (#) and string offset (@)
713
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
714
  // initially, and unmarked later if required.
715
16.8k
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
716
717
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
718
  // need to be searched all over the file because the user is using the
719
  // "at" operator. The string must be searched at a fixed offset in the
720
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
721
  // and unmarked later if required.
722
16.8k
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
723
724
  // If string identifier is $ this is an anonymous string, if not add the
725
  // identifier to strings_table.
726
16.8k
  if (strcmp(identifier, "$") == 0)
727
16.3k
  {
728
16.3k
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
729
16.3k
  }
730
465
  else
731
465
  {
732
465
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
733
465
        compiler->strings_table,
734
465
        identifier,
735
465
        NULL,
736
465
        compiler->current_string_idx));
737
465
  }
738
739
  // Make sure that the the string does not have an invalid combination of
740
  // modifiers.
741
16.8k
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
742
743
16.8k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
744
16.8k
      modifier.flags & STRING_FLAGS_REGEXP ||
745
16.8k
      modifier.flags & STRING_FLAGS_BASE64 ||
746
16.8k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
747
15.8k
  {
748
15.8k
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
749
440
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
750
15.4k
    else if (modifier.flags & STRING_FLAGS_REGEXP)
751
13.9k
    {
752
13.9k
      int flags = RE_PARSER_FLAG_NONE;
753
13.9k
      if (compiler->strict_escape)
754
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
755
13.9k
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
756
13.9k
    }
757
1.44k
    else
758
1.44k
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
759
760
15.8k
    if (result != ERROR_SUCCESS)
761
446
    {
762
446
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
763
0
      {
764
0
        yywarning(
765
0
          yyscanner,
766
0
          "unknown escape sequence");
767
0
      }
768
446
      else 
769
446
      {
770
446
        snprintf(
771
446
            message,
772
446
            sizeof(message),
773
446
            "invalid %s \"%s\": %s",
774
446
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
775
446
                                                        : "regular expression",
776
446
            identifier,
777
446
            re_error.message);
778
779
446
        yr_compiler_set_error_extra_info(compiler, message);
780
446
        goto _exit;
781
446
      }
782
446
    }
783
784
15.4k
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
785
165
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
786
787
15.4k
    if (re_ast->flags & RE_FLAGS_GREEDY)
788
787
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
789
790
    // Regular expressions in the strings section can't mix greedy and
791
    // ungreedy quantifiers like .* and .*?. That's because these regular
792
    // expressions can be matched forwards and/or backwards depending on the
793
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
794
    // be able to properly calculate the length of the match.
795
796
15.4k
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
797
15.4k
        (re_ast->flags & RE_FLAGS_UNGREEDY))
798
6
    {
799
6
      result = ERROR_INVALID_REGULAR_EXPRESSION;
800
801
6
      yr_compiler_set_error_extra_info(
802
6
          compiler,
803
6
          "greedy and ungreedy quantifiers can't be mixed in a regular "
804
6
          "expression");
805
806
6
      goto _exit;
807
6
    }
808
809
15.4k
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
810
592
    {
811
592
      yywarning(
812
592
          yyscanner,
813
592
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
814
592
          "with a reasonable value for N",
815
592
          identifier);
816
592
    }
817
818
15.4k
    if (compiler->re_ast_callback != NULL)
819
0
    {
820
0
      compiler->re_ast_callback(
821
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
822
0
    }
823
824
15.4k
    *string_ref = YR_ARENA_NULL_REF;
825
826
34.5k
    while (re_ast != NULL)
827
19.2k
    {
828
19.2k
      YR_ARENA_REF ref;
829
830
19.2k
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
831
832
19.2k
      int32_t prev_min_gap = min_gap;
833
19.2k
      int32_t prev_max_gap = max_gap;
834
835
19.2k
      result = yr_re_ast_split_at_chaining_point(
836
19.2k
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
837
838
19.2k
      if (result != ERROR_SUCCESS)
839
0
        goto _exit;
840
841
19.2k
      result = _yr_parser_write_string(
842
19.2k
          identifier,
843
19.2k
          modifier,
844
19.2k
          compiler,
845
19.2k
          NULL,
846
19.2k
          re_ast,
847
19.2k
          &ref,
848
19.2k
          &atom_quality,
849
19.2k
          &current_rule->num_atoms);
850
851
19.2k
      if (result != ERROR_SUCCESS)
852
74
        goto _exit;
853
854
19.1k
      if (atom_quality < min_atom_quality)
855
13.7k
        min_atom_quality = atom_quality;
856
857
19.1k
      if (YR_ARENA_IS_NULL_REF(*string_ref))
858
15.3k
      {
859
        // This is the first string in the chain, the string reference
860
        // returned by this function must point to this string.
861
15.3k
        *string_ref = ref;
862
15.3k
      }
863
3.79k
      else
864
3.79k
      {
865
        // This is not the first string in the chain, set the appropriate
866
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
867
        // fields.
868
3.79k
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
869
3.79k
            compiler->arena,
870
3.79k
            YR_STRINGS_TABLE,
871
3.79k
            prev_string_idx * sizeof(YR_STRING));
872
873
3.79k
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
874
3.79k
            compiler->arena, &ref);
875
876
3.79k
        new_string->chained_to = prev_string;
877
3.79k
        new_string->chain_gap_min = prev_min_gap;
878
3.79k
        new_string->chain_gap_max = prev_max_gap;
879
880
        // A string chained to another one can't have a fixed offset, only the
881
        // head of the string chain can have a fixed offset.
882
3.79k
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
883
884
        // There is a previous string, but that string wasn't marked as part
885
        // of a chain because we can't do that until knowing there will be
886
        // another string, let's flag it now the we know.
887
3.79k
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
888
889
        // There is a previous string, so this string is part of a chain, but
890
        // there will be no more strings because there are no more AST to
891
        // split, which means that this is the chain's tail.
892
3.79k
        if (remainder_re_ast == NULL)
893
316
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
894
316
                               STRING_FLAGS_CHAIN_TAIL;
895
3.79k
      }
896
897
19.1k
      yr_re_ast_destroy(re_ast);
898
19.1k
      re_ast = remainder_re_ast;
899
19.1k
    }
900
15.4k
  }
901
948
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
902
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
903
948
  {
904
948
    result = _yr_parser_write_string(
905
948
        identifier,
906
948
        modifier,
907
948
        compiler,
908
948
        str,
909
948
        NULL,
910
948
        string_ref,
911
948
        &min_atom_quality,
912
948
        &current_rule->num_atoms);
913
914
948
    if (result != ERROR_SUCCESS)
915
0
      goto _exit;
916
948
  }
917
918
16.3k
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
919
3.40k
  {
920
3.40k
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
921
3.40k
  }
922
923
16.8k
_exit:
924
925
16.8k
  if (re_ast != NULL)
926
352
    yr_re_ast_destroy(re_ast);
927
928
16.8k
  if (remainder_re_ast != NULL)
929
0
    yr_re_ast_destroy(remainder_re_ast);
930
931
16.8k
  return result;
932
16.3k
}
933
934
static int wildcard_iterator(
935
    void* prefix,
936
    size_t prefix_len,
937
    void* _value,
938
    void* data)
939
1.19k
{
940
1.19k
  const char* identifier = (const char*) data;
941
942
  // If the identifier is prefixed by prefix, then it matches the wildcard.
943
1.19k
  if (!strncmp(prefix, identifier, prefix_len))
944
276
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
945
946
914
  return ERROR_SUCCESS;
947
1.19k
}
948
949
int yr_parser_reduce_rule_declaration_phase_1(
950
    yyscan_t yyscanner,
951
    int32_t flags,
952
    const char* identifier,
953
    YR_ARENA_REF* rule_ref)
954
18.0k
{
955
18.0k
  int result;
956
18.0k
  YR_FIXUP* fixup;
957
18.0k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
958
959
18.0k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
960
18.0k
      compiler->arena,
961
18.0k
      YR_NAMESPACES_TABLE,
962
18.0k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
963
964
18.0k
  if (yr_hash_table_lookup_uint32(
965
18.0k
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
966
18.0k
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
967
9.76k
  {
968
    // A rule or variable with the same identifier already exists, return the
969
    // appropriate error.
970
971
9.76k
    yr_compiler_set_error_extra_info(compiler, identifier);
972
9.76k
    return ERROR_DUPLICATED_IDENTIFIER;
973
9.76k
  }
974
975
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
976
  // any of them are a prefix of the identifier being declared. If so, return
977
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
978
8.31k
  result = yr_hash_table_iterate(
979
8.31k
      compiler->wildcard_identifiers_table,
980
8.31k
      ns->name,
981
8.31k
      wildcard_iterator,
982
8.31k
      (void*) identifier);
983
984
8.31k
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
985
276
  {
986
    // This rule matches an existing wildcard rule set.
987
276
    yr_compiler_set_error_extra_info(compiler, identifier);
988
276
  }
989
990
8.31k
  FAIL_ON_ERROR(result);
991
992
8.04k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
993
8.04k
      compiler->arena,
994
8.04k
      YR_RULES_TABLE,
995
8.04k
      sizeof(YR_RULE),
996
8.04k
      rule_ref,
997
8.04k
      offsetof(YR_RULE, identifier),
998
8.04k
      offsetof(YR_RULE, tags),
999
8.04k
      offsetof(YR_RULE, strings),
1000
8.04k
      offsetof(YR_RULE, metas),
1001
8.04k
      offsetof(YR_RULE, ns),
1002
8.04k
      EOL));
1003
1004
8.04k
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1005
1006
8.04k
  YR_ARENA_REF ref;
1007
1008
8.04k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1009
1010
8.04k
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1011
8.04k
  rule->flags = flags;
1012
8.04k
  rule->ns = ns;
1013
8.04k
  rule->num_atoms = 0;
1014
1015
8.04k
  YR_ARENA_REF jmp_offset_ref;
1016
1017
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1018
8.04k
  compiler->current_rule_idx = compiler->next_rule_idx;
1019
8.04k
  compiler->next_rule_idx++;
1020
1021
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1022
  // disabled it skips over the rule's code and go straight to the next rule's
1023
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1024
  // is set to 0 as we don't know the jump target yet. When we finish
1025
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1026
  // the jump offset is set to its final value.
1027
1028
8.04k
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1029
8.04k
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1030
1031
8.04k
  FAIL_ON_ERROR(yr_arena_write_data(
1032
8.04k
      compiler->arena,
1033
8.04k
      YR_CODE_SECTION,
1034
8.04k
      &compiler->current_rule_idx,
1035
8.04k
      sizeof(compiler->current_rule_idx),
1036
8.04k
      NULL));
1037
1038
  // Create a fixup entry for the jump and push it in the stack
1039
8.04k
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1040
1041
8.04k
  if (fixup == NULL)
1042
0
    return ERROR_INSUFFICIENT_MEMORY;
1043
1044
8.04k
  fixup->ref = jmp_offset_ref;
1045
8.04k
  fixup->next = compiler->fixup_stack_head;
1046
8.04k
  compiler->fixup_stack_head = fixup;
1047
1048
  // Clean strings_table as we are starting to parse a new rule.
1049
8.04k
  yr_hash_table_clean(compiler->strings_table, NULL);
1050
1051
8.04k
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1052
8.04k
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1053
1054
8.04k
  return ERROR_SUCCESS;
1055
8.04k
}
1056
1057
int yr_parser_reduce_rule_declaration_phase_2(
1058
    yyscan_t yyscanner,
1059
    YR_ARENA_REF* rule_ref)
1060
171
{
1061
171
  uint32_t max_strings_per_rule;
1062
171
  uint32_t strings_in_rule = 0;
1063
1064
171
  YR_FIXUP* fixup;
1065
171
  YR_STRING* string;
1066
171
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1067
1068
171
  yr_get_configuration_uint32(
1069
171
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1070
1071
171
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1072
1073
  // Show warning if the rule is generating too many atoms. The warning is
1074
  // shown if the number of atoms is greater than 20 times the maximum number
1075
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1076
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1077
1078
171
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1079
37
  {
1080
37
    yywarning(yyscanner, "rule is slowing down scanning");
1081
37
  }
1082
1083
171
  yr_rule_strings_foreach(rule, string)
1084
1.08k
  {
1085
    // Only the heading fragment in a chain of strings (the one with
1086
    // chained_to == NULL) must be referenced. All other fragments
1087
    // are never marked as referenced.
1088
    //
1089
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1090
    // strings must always be referenced.
1091
1092
1.08k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1093
1.08k
        (STRING_IS_ANONYMOUS(string) ||
1094
28
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1095
16
    {
1096
16
      yr_compiler_set_error_extra_info(
1097
16
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1098
16
    }
1099
1100
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1101
    // that it will match anywhere.
1102
1.06k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1103
1.06k
        STRING_IS_FIXED_OFFSET(string))
1104
0
    {
1105
0
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1106
0
    }
1107
1108
1.06k
    strings_in_rule++;
1109
1110
1.06k
    if (strings_in_rule > max_strings_per_rule)
1111
0
    {
1112
0
      yr_compiler_set_error_extra_info(
1113
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1114
0
    }
1115
1.06k
  }
1116
1117
155
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1118
155
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1119
1120
155
  fixup = compiler->fixup_stack_head;
1121
1122
155
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1123
155
      compiler->arena, &fixup->ref);
1124
1125
155
  int32_t jmp_offset = yr_arena_get_current_offset(
1126
155
                           compiler->arena, YR_CODE_SECTION) -
1127
155
                       fixup->ref.offset + 1;
1128
1129
155
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1130
1131
  // Remove fixup from the stack.
1132
155
  compiler->fixup_stack_head = fixup->next;
1133
155
  yr_free(fixup);
1134
1135
  // We have finished parsing the current rule set current_rule_idx to
1136
  // UINT32_MAX indicating that we are not currently parsing a rule.
1137
155
  compiler->current_rule_idx = UINT32_MAX;
1138
1139
155
  return ERROR_SUCCESS;
1140
155
}
1141
1142
int yr_parser_reduce_string_identifier(
1143
    yyscan_t yyscanner,
1144
    const char* identifier,
1145
    uint8_t instruction,
1146
    uint64_t at_offset)
1147
14.4k
{
1148
14.4k
  YR_STRING* string;
1149
14.4k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1150
1151
14.4k
  if (strcmp(identifier, "$") == 0)            // is an anonymous string ?
1152
14.0k
  {
1153
14.0k
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1154
14.0k
    {
1155
14.0k
      yr_parser_emit_with_arg(
1156
14.0k
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1157
1158
14.0k
      yr_parser_emit(yyscanner, instruction, NULL);
1159
1160
14.0k
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1161
14.0k
          compiler, compiler->current_rule_idx);
1162
1163
14.0k
      yr_rule_strings_foreach(current_rule, string)
1164
434k
      {
1165
434k
        if (instruction != OP_FOUND)
1166
434k
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1167
1168
434k
        if (instruction == OP_FOUND_AT)
1169
624
        {
1170
          // Avoid overwriting any previous fixed offset
1171
624
          if (string->fixed_offset == YR_UNDEFINED)
1172
356
            string->fixed_offset = at_offset;
1173
1174
          // If a previous fixed offset was different, disable
1175
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1176
          // have room to store a single fixed offset value
1177
624
          if (string->fixed_offset != at_offset)
1178
268
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1179
624
        }
1180
433k
        else
1181
433k
        {
1182
433k
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1183
433k
        }
1184
434k
      }
1185
14.0k
    }
1186
33
    else
1187
33
    {
1188
      // Anonymous strings not allowed outside of a loop
1189
33
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1190
33
    }
1191
14.0k
  }
1192
373
  else
1193
373
  {
1194
373
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1195
1196
343
    FAIL_ON_ERROR(
1197
343
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1198
1199
343
    if (instruction != OP_FOUND)
1200
257
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1201
1202
343
    if (instruction == OP_FOUND_AT)
1203
83
    {
1204
      // Avoid overwriting any previous fixed offset
1205
1206
83
      if (string->fixed_offset == YR_UNDEFINED)
1207
14
        string->fixed_offset = at_offset;
1208
1209
      // If a previous fixed offset was different, disable
1210
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1211
      // have room to store a single fixed offset value
1212
1213
83
      if (string->fixed_offset == YR_UNDEFINED ||
1214
83
          string->fixed_offset != at_offset)
1215
42
      {
1216
42
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1217
42
      }
1218
83
    }
1219
260
    else
1220
260
    {
1221
260
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1222
260
    }
1223
1224
343
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1225
1226
343
    string->flags |= STRING_FLAGS_REFERENCED;
1227
343
  }
1228
1229
14.4k
  return ERROR_SUCCESS;
1230
14.4k
}
1231
1232
int yr_parser_reduce_meta_declaration(
1233
    yyscan_t yyscanner,
1234
    int32_t type,
1235
    const char* identifier,
1236
    const char* string,
1237
    int64_t integer,
1238
    YR_ARENA_REF* meta_ref)
1239
1.18k
{
1240
1.18k
  YR_ARENA_REF ref;
1241
1.18k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1242
1243
1.18k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1244
1.18k
      compiler->arena,
1245
1.18k
      YR_METAS_TABLE,
1246
1.18k
      sizeof(YR_META),
1247
1.18k
      meta_ref,
1248
1.18k
      offsetof(YR_META, identifier),
1249
1.18k
      offsetof(YR_META, string),
1250
1.18k
      EOL));
1251
1252
1.18k
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1253
1254
1.18k
  meta->type = type;
1255
1.18k
  meta->integer = integer;
1256
1257
1.18k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1258
1259
1.18k
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1260
1261
1.18k
  if (string != NULL)
1262
20
  {
1263
20
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1264
1265
20
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1266
20
  }
1267
1.16k
  else
1268
1.16k
  {
1269
1.16k
    meta->string = NULL;
1270
1.16k
  }
1271
1272
1.18k
  compiler->current_meta_idx++;
1273
1274
1.18k
  return ERROR_SUCCESS;
1275
1.18k
}
1276
1277
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1278
1.33k
{
1279
1.33k
  if (module_name->length == 0)
1280
174
    return false;
1281
1282
1.15k
  if (strlen(module_name->c_string) != module_name->length)
1283
45
    return false;
1284
1285
1.11k
  return true;
1286
1.15k
}
1287
1288
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1289
1.33k
{
1290
1.33k
  int result;
1291
1292
1.33k
  YR_ARENA_REF ref;
1293
1.33k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1294
1.33k
  YR_OBJECT* module_structure;
1295
1296
1.33k
  if (!_yr_parser_valid_module_name(module_name))
1297
219
  {
1298
219
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1299
1300
219
    return ERROR_INVALID_MODULE_NAME;
1301
219
  }
1302
1303
1.11k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1304
1.11k
      compiler->arena,
1305
1.11k
      YR_NAMESPACES_TABLE,
1306
1.11k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1307
1308
1.11k
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1309
1.11k
      compiler->objects_table, module_name->c_string, ns->name);
1310
1311
  // if module already imported, do nothing
1312
1313
1.11k
  if (module_structure != NULL)
1314
900
    return ERROR_SUCCESS;
1315
1316
212
  FAIL_ON_ERROR(yr_object_create(
1317
212
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1318
1319
212
  FAIL_ON_ERROR(yr_hash_table_add(
1320
212
      compiler->objects_table,
1321
212
      module_name->c_string,
1322
212
      ns->name,
1323
212
      module_structure));
1324
1325
212
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1326
1327
212
  if (result == ERROR_UNKNOWN_MODULE)
1328
105
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1329
1330
212
  if (result != ERROR_SUCCESS)
1331
105
    return result;
1332
1333
107
  FAIL_ON_ERROR(
1334
107
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1335
1336
107
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1337
107
      yyscanner,
1338
107
      OP_IMPORT,
1339
107
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1340
107
      NULL,
1341
107
      NULL));
1342
1343
107
  return ERROR_SUCCESS;
1344
107
}
1345
1346
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1347
13.5k
{
1348
13.5k
  int opcode = 0;
1349
1350
13.5k
  switch (expression_type)
1351
13.5k
  {
1352
9.28k
  case EXPRESSION_TYPE_INTEGER:
1353
9.28k
    opcode = OP_INT_BEGIN;
1354
9.28k
    break;
1355
2.47k
  case EXPRESSION_TYPE_FLOAT:
1356
2.47k
    opcode = OP_DBL_BEGIN;
1357
2.47k
    break;
1358
1.83k
  case EXPRESSION_TYPE_STRING:
1359
1.83k
    opcode = OP_STR_BEGIN;
1360
1.83k
    break;
1361
0
  default:
1362
0
    assert(false);
1363
13.5k
  }
1364
1365
13.5k
  if (op[0] == '<')
1366
933
  {
1367
933
    if (op[1] == '=')
1368
176
      opcode += _OP_LE;
1369
757
    else
1370
757
      opcode += _OP_LT;
1371
933
  }
1372
12.6k
  else if (op[0] == '>')
1373
1.27k
  {
1374
1.27k
    if (op[1] == '=')
1375
190
      opcode += _OP_GE;
1376
1.08k
    else
1377
1.08k
      opcode += _OP_GT;
1378
1.27k
  }
1379
11.3k
  else if (op[1] == '=')
1380
231
  {
1381
231
    if (op[0] == '=')
1382
24
      opcode += _OP_EQ;
1383
207
    else
1384
207
      opcode += _OP_NEQ;
1385
231
  }
1386
11.1k
  else if (op[0] == '+')
1387
2.73k
  {
1388
2.73k
    opcode += _OP_ADD;
1389
2.73k
  }
1390
8.41k
  else if (op[0] == '-')
1391
5.22k
  {
1392
5.22k
    opcode += _OP_SUB;
1393
5.22k
  }
1394
3.18k
  else if (op[0] == '*')
1395
2.17k
  {
1396
2.17k
    opcode += _OP_MUL;
1397
2.17k
  }
1398
1.00k
  else if (op[0] == '\\')
1399
1.00k
  {
1400
1.00k
    opcode += _OP_DIV;
1401
1.00k
  }
1402
1403
13.5k
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1404
13.5k
  {
1405
13.5k
    return opcode;
1406
13.5k
  }
1407
1408
3
  return OP_ERROR;
1409
13.5k
}
1410
1411
int yr_parser_reduce_operation(
1412
    yyscan_t yyscanner,
1413
    const char* op,
1414
    YR_EXPRESSION left_operand,
1415
    YR_EXPRESSION right_operand)
1416
13.6k
{
1417
13.6k
  int expression_type;
1418
1419
13.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1420
1421
13.6k
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1422
13.6k
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1423
13.6k
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1424
11.7k
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1425
11.7k
  {
1426
11.7k
    if (left_operand.type != right_operand.type)
1427
1.96k
    {
1428
      // One operand is double and the other is integer,
1429
      // cast the integer to double
1430
1431
1.96k
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1432
1.96k
          yyscanner,
1433
1.96k
          OP_INT_TO_DBL,
1434
1.96k
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1435
1.96k
          NULL,
1436
1.96k
          NULL));
1437
1.96k
    }
1438
1439
11.7k
    expression_type = EXPRESSION_TYPE_FLOAT;
1440
1441
11.7k
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1442
11.7k
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1443
9.28k
    {
1444
9.28k
      expression_type = EXPRESSION_TYPE_INTEGER;
1445
9.28k
    }
1446
1447
11.7k
    FAIL_ON_ERROR(yr_parser_emit(
1448
11.7k
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1449
11.7k
  }
1450
1.88k
  else if (
1451
1.88k
      left_operand.type == EXPRESSION_TYPE_STRING &&
1452
1.88k
      right_operand.type == EXPRESSION_TYPE_STRING)
1453
1.83k
  {
1454
1.83k
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1455
1456
1.83k
    if (opcode != OP_ERROR)
1457
1.82k
    {
1458
1.82k
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1459
1.82k
    }
1460
3
    else
1461
3
    {
1462
3
      yr_compiler_set_error_extra_info_fmt(
1463
3
          compiler, "strings don't support \"%s\" operation", op);
1464
1465
3
      return ERROR_WRONG_TYPE;
1466
3
    }
1467
1.83k
  }
1468
57
  else
1469
57
  {
1470
57
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1471
1472
57
    return ERROR_WRONG_TYPE;
1473
57
  }
1474
1475
13.5k
  return ERROR_SUCCESS;
1476
13.6k
}