Coverage Report

Created: 2025-07-11 06:08

/src/yara/libyara/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
47
#define todigit(x)                                        \
48
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
49
                             : ((uint8_t) (x - '0'))
50
51
int yr_parser_emit(
52
    yyscan_t yyscanner,
53
    uint8_t instruction,
54
    YR_ARENA_REF* instruction_ref)
55
4
{
56
4
  return yr_arena_write_data(
57
4
      yyget_extra(yyscanner)->arena,
58
4
      YR_CODE_SECTION,
59
4
      &instruction,
60
4
      sizeof(uint8_t),
61
4
      instruction_ref);
62
4
}
63
64
int yr_parser_emit_with_arg_double(
65
    yyscan_t yyscanner,
66
    uint8_t instruction,
67
    double argument,
68
    YR_ARENA_REF* instruction_ref,
69
    YR_ARENA_REF* argument_ref)
70
0
{
71
0
  int result = yr_arena_write_data(
72
0
      yyget_extra(yyscanner)->arena,
73
0
      YR_CODE_SECTION,
74
0
      &instruction,
75
0
      sizeof(uint8_t),
76
0
      instruction_ref);
77
78
0
  if (result == ERROR_SUCCESS)
79
0
    result = yr_arena_write_data(
80
0
        yyget_extra(yyscanner)->arena,
81
0
        YR_CODE_SECTION,
82
0
        &argument,
83
0
        sizeof(double),
84
0
        argument_ref);
85
86
0
  return result;
87
0
}
88
89
int yr_parser_emit_with_arg_int32(
90
    yyscan_t yyscanner,
91
    uint8_t instruction,
92
    int32_t argument,
93
    YR_ARENA_REF* instruction_ref,
94
    YR_ARENA_REF* argument_ref)
95
2
{
96
2
  int result = yr_arena_write_data(
97
2
      yyget_extra(yyscanner)->arena,
98
2
      YR_CODE_SECTION,
99
2
      &instruction,
100
2
      sizeof(uint8_t),
101
2
      instruction_ref);
102
103
2
  if (result == ERROR_SUCCESS)
104
2
    result = yr_arena_write_data(
105
2
        yyget_extra(yyscanner)->arena,
106
2
        YR_CODE_SECTION,
107
2
        &argument,
108
2
        sizeof(int32_t),
109
2
        argument_ref);
110
111
2
  return result;
112
2
}
113
114
int yr_parser_emit_with_arg(
115
    yyscan_t yyscanner,
116
    uint8_t instruction,
117
    int64_t argument,
118
    YR_ARENA_REF* instruction_ref,
119
    YR_ARENA_REF* argument_ref)
120
2
{
121
2
  int result = yr_arena_write_data(
122
2
      yyget_extra(yyscanner)->arena,
123
2
      YR_CODE_SECTION,
124
2
      &instruction,
125
2
      sizeof(uint8_t),
126
2
      instruction_ref);
127
128
2
  if (result == ERROR_SUCCESS)
129
2
    result = yr_arena_write_data(
130
2
        yyget_extra(yyscanner)->arena,
131
2
        YR_CODE_SECTION,
132
2
        &argument,
133
2
        sizeof(int64_t),
134
2
        argument_ref);
135
136
2
  return result;
137
2
}
138
139
int yr_parser_emit_with_arg_reloc(
140
    yyscan_t yyscanner,
141
    uint8_t instruction,
142
    void* argument,
143
    YR_ARENA_REF* instruction_ref,
144
    YR_ARENA_REF* argument_ref)
145
8
{
146
8
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
147
148
8
  DECLARE_REFERENCE(void*, ptr) arg;
149
150
8
  memset(&arg, 0, sizeof(arg));
151
8
  arg.ptr = argument;
152
153
8
  int result = yr_arena_write_data(
154
8
      yyget_extra(yyscanner)->arena,
155
8
      YR_CODE_SECTION,
156
8
      &instruction,
157
8
      sizeof(uint8_t),
158
8
      instruction_ref);
159
160
8
  if (result == ERROR_SUCCESS)
161
8
    result = yr_arena_write_data(
162
8
        yyget_extra(yyscanner)->arena,
163
8
        YR_CODE_SECTION,
164
8
        &arg,
165
8
        sizeof(arg),
166
8
        &ref);
167
168
8
  if (result == ERROR_SUCCESS)
169
8
    result = yr_arena_make_ptr_relocatable(
170
8
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
171
172
8
  if (argument_ref != NULL)
173
0
    *argument_ref = ref;
174
175
8
  return result;
176
8
}
177
178
int yr_parser_emit_pushes_for_strings(
179
    yyscan_t yyscanner,
180
    const char* identifier,
181
    int* count)
182
0
{
183
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
184
185
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
186
0
      compiler, compiler->current_rule_idx);
187
188
0
  YR_STRING* string;
189
190
0
  const char* string_identifier;
191
0
  const char* target_identifier;
192
193
0
  int matching = 0;
194
195
0
  yr_rule_strings_foreach(current_rule, string)
196
0
  {
197
    // Don't generate pushes for strings chained to another one, we are
198
    // only interested in non-chained strings or the head of the chain.
199
200
0
    if (string->chained_to == NULL)
201
0
    {
202
0
      string_identifier = string->identifier;
203
0
      target_identifier = identifier;
204
205
0
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
206
0
             *target_identifier == *string_identifier)
207
0
      {
208
0
        target_identifier++;
209
0
        string_identifier++;
210
0
      }
211
212
0
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
213
0
          *target_identifier == '*')
214
0
      {
215
0
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
216
217
0
        string->flags |= STRING_FLAGS_REFERENCED;
218
0
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
219
0
        string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
220
0
        matching++;
221
0
      }
222
0
    }
223
0
  }
224
225
0
  if (count != NULL)
226
0
  {
227
0
    *count = matching;
228
0
  }
229
230
0
  if (matching == 0)
231
0
  {
232
0
    yr_compiler_set_error_extra_info(
233
0
        compiler, identifier) return ERROR_UNDEFINED_STRING;
234
0
  }
235
236
0
  return ERROR_SUCCESS;
237
0
}
238
239
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
240
// prefix.
241
int yr_parser_emit_pushes_for_rules(
242
    yyscan_t yyscanner,
243
    const char* prefix,
244
    int* count)
245
0
{
246
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
247
248
  // Make sure the compiler is parsing a rule
249
0
  assert(compiler->current_rule_idx != UINT32_MAX);
250
251
0
  YR_RULE* rule;
252
0
  int matching = 0;
253
254
0
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
255
0
      compiler->arena,
256
0
      YR_NAMESPACES_TABLE,
257
0
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
258
259
  // Can't use yr_rules_foreach here as that requires the rules to have been
260
  // finalized (inserting a NULL rule at the end). This is done when
261
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
262
  // into the current position in the code arena. Obviously we aren't done
263
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
264
  // this I'm manually walking all the currently compiled rules (up to the
265
  // current rule index) and comparing identifiers to see if it is one we should
266
  // use.
267
  //
268
  // Further, we have to get compiler->current_rule_idx before we start because
269
  // if we emit an OP_PUSH_RULE
270
0
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
271
272
0
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
273
0
  {
274
    // Is rule->identifier prefixed by prefix?
275
0
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
276
0
    {
277
0
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
278
0
          compiler->rules_table, rule->identifier, ns->name);
279
280
0
      if (rule_idx != UINT32_MAX)
281
0
      {
282
0
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
283
0
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
284
0
        matching++;
285
0
      }
286
0
    }
287
288
0
    rule++;
289
0
  }
290
291
0
  if (count != NULL)
292
0
  {
293
0
    *count = matching;
294
0
  }
295
296
0
  if (matching == 0)
297
0
  {
298
0
    yr_compiler_set_error_extra_info(compiler, prefix);
299
0
    return ERROR_UNDEFINED_IDENTIFIER;
300
0
  }
301
302
0
  return ERROR_SUCCESS;
303
0
}
304
305
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
306
0
{
307
0
  uint8_t opcode[9];
308
0
  int opcode_len = 1;
309
310
0
  if (argument == YR_UNDEFINED)
311
0
  {
312
0
    opcode[0] = OP_PUSH_U;
313
0
  }
314
0
  else if (argument <= 0xff)
315
0
  {
316
0
    opcode[0] = OP_PUSH_8;
317
0
    opcode[1] = (uint8_t) argument;
318
0
    opcode_len += sizeof(uint8_t);
319
0
  }
320
0
  else if (argument <= 0xffff)
321
0
  {
322
0
    opcode[0] = OP_PUSH_16;
323
0
    uint16_t u = (uint16_t) argument;
324
0
    memcpy(opcode + 1, &u, sizeof(uint16_t));
325
0
    opcode_len += sizeof(uint16_t);
326
0
  }
327
0
  else if (argument <= 0xffffffff)
328
0
  {
329
0
    opcode[0] = OP_PUSH_32;
330
0
    uint32_t u = (uint32_t) argument;
331
0
    memcpy(opcode + 1, &u, sizeof(uint32_t));
332
0
    opcode_len += sizeof(uint32_t);
333
0
  }
334
0
  else
335
0
  {
336
0
    opcode[0] = OP_PUSH;
337
0
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
338
0
    opcode_len += sizeof(uint64_t);
339
0
  }
340
341
0
  return yr_arena_write_data(
342
0
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
343
0
}
344
345
int yr_parser_check_types(
346
    YR_COMPILER* compiler,
347
    YR_OBJECT_FUNCTION* function,
348
    const char* actual_args_fmt)
349
0
{
350
0
  int i;
351
352
0
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
353
0
  {
354
0
    if (function->prototypes[i].arguments_fmt == NULL)
355
0
      break;
356
357
0
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
358
0
      return ERROR_SUCCESS;
359
0
  }
360
361
0
  yr_compiler_set_error_extra_info(compiler, function->identifier)
362
363
0
      return ERROR_WRONG_ARGUMENTS;
364
0
}
365
366
int yr_parser_lookup_string(
367
    yyscan_t yyscanner,
368
    const char* identifier,
369
    YR_STRING** string)
370
0
{
371
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
372
373
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
374
0
      compiler, compiler->current_rule_idx);
375
376
0
  yr_rule_strings_foreach(current_rule, *string)
377
0
  {
378
    // If some string $a gets fragmented into multiple chained
379
    // strings, all those fragments have the same $a identifier
380
    // but we are interested in the heading fragment, which is
381
    // that with chained_to == NULL
382
383
0
    if ((*string)->chained_to == NULL &&
384
0
        strcmp((*string)->identifier, identifier) == 0)
385
0
    {
386
0
      return ERROR_SUCCESS;
387
0
    }
388
0
  }
389
390
0
  yr_compiler_set_error_extra_info(compiler, identifier)
391
392
0
      * string = NULL;
393
394
0
  return ERROR_UNDEFINED_STRING;
395
0
}
396
397
////////////////////////////////////////////////////////////////////////////////
398
// Searches for a variable with the given identifier in the scope of the current
399
// "for" loop. In case of nested "for" loops the identifier is searched starting
400
// at the top-level loop and going down thorough the nested loops until the
401
// current one. This is ok because inner loops can not re-define an identifier
402
// already defined by an outer loop.
403
//
404
// If the variable is found, the return value is the position that the variable
405
// occupies among all the currently defined variables. If the variable doesn't
406
// exist the return value is -1.
407
//
408
// The function can receive a pointer to a YR_EXPRESSION that will populated
409
// with information about the variable if found. This pointer can be NULL if
410
// the caller is not interested in getting that information.
411
//
412
int yr_parser_lookup_loop_variable(
413
    yyscan_t yyscanner,
414
    const char* identifier,
415
    YR_EXPRESSION* expr)
416
2
{
417
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
418
2
  int i, j;
419
2
  int var_offset = 0;
420
421
2
  for (i = 0; i <= compiler->loop_index; i++)
422
0
  {
423
0
    var_offset += compiler->loop[i].vars_internal_count;
424
425
0
    for (j = 0; j < compiler->loop[i].vars_count; j++)
426
0
    {
427
0
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
428
0
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
429
0
      {
430
0
        if (expr != NULL)
431
0
          *expr = compiler->loop[i].vars[j];
432
433
0
        return var_offset + j;
434
0
      }
435
0
    }
436
437
0
    var_offset += compiler->loop[i].vars_count;
438
0
  }
439
440
2
  return -1;
441
2
}
442
443
static int _yr_parser_write_string(
444
    const char* identifier,
445
    YR_MODIFIER modifier,
446
    YR_COMPILER* compiler,
447
    SIZED_STRING* str,
448
    RE_AST* re_ast,
449
    YR_ARENA_REF* string_ref,
450
    int* min_atom_quality,
451
    int* num_atom)
452
0
{
453
0
  SIZED_STRING* literal_string;
454
0
  YR_ATOM_LIST_ITEM* atom;
455
0
  YR_ATOM_LIST_ITEM* atom_list = NULL;
456
457
0
  int c, result;
458
0
  int max_string_len;
459
0
  bool free_literal = false;
460
461
0
  FAIL_ON_ERROR(yr_arena_allocate_struct(
462
0
      compiler->arena,
463
0
      YR_STRINGS_TABLE,
464
0
      sizeof(YR_STRING),
465
0
      string_ref,
466
0
      offsetof(YR_STRING, identifier),
467
0
      offsetof(YR_STRING, string),
468
0
      offsetof(YR_STRING, chained_to),
469
0
      EOL));
470
471
0
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
472
0
      compiler->arena, string_ref);
473
474
0
  YR_ARENA_REF ref;
475
476
0
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
477
478
0
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
479
0
  string->rule_idx = compiler->current_rule_idx;
480
0
  string->idx = compiler->current_string_idx;
481
0
  string->fixed_offset = YR_UNDEFINED;
482
483
0
  compiler->current_string_idx++;
484
485
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
486
0
      modifier.flags & STRING_FLAGS_REGEXP ||
487
0
      modifier.flags & STRING_FLAGS_BASE64 ||
488
0
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
489
0
  {
490
0
    literal_string = yr_re_ast_extract_literal(re_ast);
491
492
0
    if (literal_string != NULL)
493
0
      free_literal = true;
494
0
  }
495
0
  else
496
0
  {
497
0
    literal_string = str;
498
0
  }
499
500
0
  if (literal_string != NULL)
501
0
  {
502
0
    modifier.flags |= STRING_FLAGS_LITERAL;
503
504
0
    result = _yr_compiler_store_data(
505
0
        compiler,
506
0
        literal_string->c_string,
507
0
        literal_string->length + 1,  // +1 to include terminating NULL
508
0
        &ref);
509
510
0
    if (result != ERROR_SUCCESS)
511
0
      goto cleanup;
512
513
0
    string->length = (uint32_t) literal_string->length;
514
0
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
515
516
0
    if (modifier.flags & STRING_FLAGS_WIDE)
517
0
      max_string_len = string->length * 2;
518
0
    else
519
0
      max_string_len = string->length;
520
521
0
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
522
0
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
523
524
0
    result = yr_atoms_extract_from_string(
525
0
        &compiler->atoms_config,
526
0
        (uint8_t*) literal_string->c_string,
527
0
        (int32_t) literal_string->length,
528
0
        modifier,
529
0
        &atom_list,
530
0
        min_atom_quality);
531
532
0
    if (result != ERROR_SUCCESS)
533
0
      goto cleanup;
534
0
  }
535
0
  else
536
0
  {
537
    // Non-literal strings can't be marked as fixed offset because once we
538
    // find a string atom in the scanned data we don't know the offset where
539
    // the string should start, as the non-literal strings can contain
540
    // variable-length portions.
541
0
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
542
543
    // Save the position where the RE forward code starts for later reference.
544
0
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
545
0
        compiler->arena, YR_RE_CODE_SECTION);
546
547
    // Emit forwards code
548
0
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
549
550
0
    if (result != ERROR_SUCCESS)
551
0
      goto cleanup;
552
553
    // Emit backwards code
554
0
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
555
556
0
    if (result != ERROR_SUCCESS)
557
0
      goto cleanup;
558
559
    // Extract atoms from the regular expression.
560
0
    result = yr_atoms_extract_from_re(
561
0
        &compiler->atoms_config,
562
0
        re_ast,
563
0
        modifier,
564
0
        &atom_list,
565
0
        min_atom_quality);
566
567
0
    if (result != ERROR_SUCCESS)
568
0
      goto cleanup;
569
570
    // If no atom was extracted let's add a zero-length atom.
571
0
    if (atom_list == NULL)
572
0
    {
573
0
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
574
575
0
      if (atom_list == NULL)
576
0
      {
577
0
        result = ERROR_INSUFFICIENT_MEMORY;
578
0
        goto cleanup;
579
0
      }
580
581
0
      atom_list->atom.length = 0;
582
0
      atom_list->backtrack = 0;
583
0
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
584
0
      atom_list->next = NULL;
585
586
0
      yr_arena_ptr_to_ref(
587
0
          compiler->arena,
588
0
          yr_arena_get_ptr(
589
0
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
590
0
          &(atom_list->forward_code_ref));
591
0
    }
592
0
  }
593
594
0
  string->flags = modifier.flags;
595
596
  // Add the string to Aho-Corasick automaton.
597
0
  result = yr_ac_add_string(
598
0
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
599
600
0
  if (result != ERROR_SUCCESS)
601
0
    goto cleanup;
602
603
0
  atom = atom_list;
604
0
  c = 0;
605
606
0
  while (atom != NULL)
607
0
  {
608
0
    atom = atom->next;
609
0
    c++;
610
0
  }
611
612
0
  (*num_atom) += c;
613
614
0
cleanup:
615
0
  if (free_literal)
616
0
    yr_free(literal_string);
617
618
0
  if (atom_list != NULL)
619
0
    yr_atoms_list_destroy(atom_list);
620
621
0
  return result;
622
0
}
623
624
static int _yr_parser_check_string_modifiers(
625
    yyscan_t yyscanner,
626
    YR_MODIFIER modifier)
627
0
{
628
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
629
630
  // xor and nocase together is not implemented.
631
0
  if (modifier.flags & STRING_FLAGS_XOR &&
632
0
      modifier.flags & STRING_FLAGS_NO_CASE)
633
0
  {
634
0
    yr_compiler_set_error_extra_info(
635
0
        compiler, "invalid modifier combination: xor nocase");
636
0
    return ERROR_INVALID_MODIFIER;
637
0
  }
638
639
  // base64 and nocase together is not implemented.
640
0
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
641
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
642
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
643
0
  {
644
0
    yr_compiler_set_error_extra_info(
645
0
        compiler,
646
0
        modifier.flags & STRING_FLAGS_BASE64
647
0
            ? "invalid modifier combination: base64 nocase"
648
0
            : "invalid modifier combination: base64wide nocase");
649
0
    return ERROR_INVALID_MODIFIER;
650
0
  }
651
652
  // base64 and fullword together is not implemented.
653
0
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
654
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
655
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
656
0
  {
657
0
    yr_compiler_set_error_extra_info(
658
0
        compiler,
659
0
        modifier.flags & STRING_FLAGS_BASE64
660
0
            ? "invalid modifier combination: base64 fullword"
661
0
            : "invalid modifier combination: base64wide fullword");
662
0
    return ERROR_INVALID_MODIFIER;
663
0
  }
664
665
  // base64 and xor together is not implemented.
666
0
  if (modifier.flags & STRING_FLAGS_XOR &&
667
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
668
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
669
0
  {
670
0
    yr_compiler_set_error_extra_info(
671
0
        compiler,
672
0
        modifier.flags & STRING_FLAGS_BASE64
673
0
            ? "invalid modifier combination: base64 xor"
674
0
            : "invalid modifier combination: base64wide xor");
675
0
    return ERROR_INVALID_MODIFIER;
676
0
  }
677
678
0
  return ERROR_SUCCESS;
679
0
}
680
681
int yr_parser_reduce_string_declaration(
682
    yyscan_t yyscanner,
683
    YR_MODIFIER modifier,
684
    const char* identifier,
685
    SIZED_STRING* str,
686
    YR_ARENA_REF* string_ref)
687
0
{
688
0
  int result = ERROR_SUCCESS;
689
0
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
690
0
  int atom_quality;
691
692
0
  char message[512];
693
694
0
  int32_t min_gap = 0;
695
0
  int32_t max_gap = 0;
696
697
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
698
699
0
  RE_AST* re_ast = NULL;
700
0
  RE_AST* remainder_re_ast = NULL;
701
0
  RE_ERROR re_error;
702
703
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
704
0
      compiler, compiler->current_rule_idx);
705
706
  // Determine if a string with the same identifier was already defined
707
  // by searching for the identifier in strings_table.
708
0
  uint32_t string_idx = yr_hash_table_lookup_uint32(
709
0
      compiler->strings_table, identifier, NULL);
710
711
  // The string was already defined, return an error.
712
0
  if (string_idx != UINT32_MAX)
713
0
  {
714
0
    yr_compiler_set_error_extra_info(compiler, identifier);
715
0
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
716
0
  }
717
718
  // Empty strings are not allowed.
719
0
  if (str->length == 0)
720
0
  {
721
0
    yr_compiler_set_error_extra_info(compiler, identifier);
722
0
    return ERROR_EMPTY_STRING;
723
0
  }
724
725
0
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
726
0
    modifier.flags |= STRING_FLAGS_NO_CASE;
727
728
0
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
729
0
    modifier.flags |= STRING_FLAGS_DOT_ALL;
730
731
  // Hex strings are always handled as DOT_ALL regexps.
732
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
733
0
    modifier.flags |= STRING_FLAGS_DOT_ALL;
734
735
0
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
736
0
      !(modifier.flags & STRING_FLAGS_BASE64 ||
737
0
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
738
0
  {
739
0
    modifier.flags |= STRING_FLAGS_ASCII;
740
0
  }
741
742
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
743
  // a single match for the string is enough. This is true in
744
  // most cases, except when the string count (#) and string offset (@)
745
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
746
  // initially, and unmarked later if required.
747
0
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
748
749
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
750
  // need to be searched all over the file because the user is using the
751
  // "at" operator. The string must be searched at a fixed offset in the
752
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
753
  // and unmarked later if required.
754
0
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
755
756
  // If string identifier is $ this is an anonymous string, if not add the
757
  // identifier to strings_table.
758
0
  if (strcmp(identifier, "$") == 0)
759
0
  {
760
0
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
761
0
  }
762
0
  else
763
0
  {
764
0
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
765
0
        compiler->strings_table,
766
0
        identifier,
767
0
        NULL,
768
0
        compiler->current_string_idx));
769
0
  }
770
771
  // Make sure that the the string does not have an invalid combination of
772
  // modifiers.
773
0
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
774
775
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
776
0
      modifier.flags & STRING_FLAGS_REGEXP ||
777
0
      modifier.flags & STRING_FLAGS_BASE64 ||
778
0
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
779
0
  {
780
0
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
781
0
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
782
0
    else if (modifier.flags & STRING_FLAGS_REGEXP)
783
0
    {
784
0
      int flags = RE_PARSER_FLAG_NONE;
785
0
      if (compiler->strict_escape)
786
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
787
0
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
788
0
    }
789
0
    else
790
0
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
791
792
0
    if (result != ERROR_SUCCESS)
793
0
    {
794
0
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
795
0
      {
796
0
        yywarning(yyscanner, "unknown escape sequence");
797
0
      }
798
0
      else
799
0
      {
800
0
        snprintf(
801
0
            message,
802
0
            sizeof(message),
803
0
            "invalid %s \"%s\": %s",
804
0
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
805
0
                                                        : "regular expression",
806
0
            identifier,
807
0
            re_error.message);
808
809
0
        yr_compiler_set_error_extra_info(compiler, message);
810
0
        goto _exit;
811
0
      }
812
0
    }
813
814
0
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
815
0
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
816
817
0
    if (re_ast->flags & RE_FLAGS_GREEDY)
818
0
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
819
820
    // Regular expressions in the strings section can't mix greedy and
821
    // ungreedy quantifiers like .* and .*?. That's because these regular
822
    // expressions can be matched forwards and/or backwards depending on the
823
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
824
    // be able to properly calculate the length of the match.
825
826
0
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
827
0
        (re_ast->flags & RE_FLAGS_UNGREEDY))
828
0
    {
829
0
      result = ERROR_INVALID_REGULAR_EXPRESSION;
830
831
0
      yr_compiler_set_error_extra_info(
832
0
          compiler,
833
0
          "greedy and ungreedy quantifiers can't be mixed in a regular "
834
0
          "expression");
835
836
0
      goto _exit;
837
0
    }
838
839
0
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
840
0
    {
841
0
      yywarning(
842
0
          yyscanner,
843
0
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
844
0
          "with a reasonable value for N",
845
0
          identifier);
846
0
    }
847
848
0
    if (compiler->re_ast_callback != NULL)
849
0
    {
850
0
      compiler->re_ast_callback(
851
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
852
0
    }
853
854
0
    *string_ref = YR_ARENA_NULL_REF;
855
856
0
    while (re_ast != NULL)
857
0
    {
858
0
      YR_ARENA_REF ref;
859
860
0
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
861
862
0
      int32_t prev_min_gap = min_gap;
863
0
      int32_t prev_max_gap = max_gap;
864
865
0
      result = yr_re_ast_split_at_chaining_point(
866
0
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
867
868
0
      if (result != ERROR_SUCCESS)
869
0
        goto _exit;
870
871
0
      result = _yr_parser_write_string(
872
0
          identifier,
873
0
          modifier,
874
0
          compiler,
875
0
          NULL,
876
0
          re_ast,
877
0
          &ref,
878
0
          &atom_quality,
879
0
          &current_rule->num_atoms);
880
881
0
      if (result != ERROR_SUCCESS)
882
0
        goto _exit;
883
884
0
      if (atom_quality < min_atom_quality)
885
0
        min_atom_quality = atom_quality;
886
887
0
      if (YR_ARENA_IS_NULL_REF(*string_ref))
888
0
      {
889
        // This is the first string in the chain, the string reference
890
        // returned by this function must point to this string.
891
0
        *string_ref = ref;
892
0
      }
893
0
      else
894
0
      {
895
        // This is not the first string in the chain, set the appropriate
896
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
897
        // fields.
898
0
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
899
0
            compiler->arena,
900
0
            YR_STRINGS_TABLE,
901
0
            prev_string_idx * sizeof(YR_STRING));
902
903
0
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
904
0
            compiler->arena, &ref);
905
906
0
        new_string->chained_to = prev_string;
907
0
        new_string->chain_gap_min = prev_min_gap;
908
0
        new_string->chain_gap_max = prev_max_gap;
909
910
        // A string chained to another one can't have a fixed offset, only the
911
        // head of the string chain can have a fixed offset.
912
0
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
913
914
        // There is a previous string, but that string wasn't marked as part
915
        // of a chain because we can't do that until knowing there will be
916
        // another string, let's flag it now the we know.
917
0
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
918
919
        // There is a previous string, so this string is part of a chain, but
920
        // there will be no more strings because there are no more AST to
921
        // split, which means that this is the chain's tail.
922
0
        if (remainder_re_ast == NULL)
923
0
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
924
0
                               STRING_FLAGS_CHAIN_TAIL;
925
0
      }
926
927
0
      yr_re_ast_destroy(re_ast);
928
0
      re_ast = remainder_re_ast;
929
0
    }
930
0
  }
931
0
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
932
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
933
0
  {
934
0
    result = _yr_parser_write_string(
935
0
        identifier,
936
0
        modifier,
937
0
        compiler,
938
0
        str,
939
0
        NULL,
940
0
        string_ref,
941
0
        &min_atom_quality,
942
0
        &current_rule->num_atoms);
943
944
0
    if (result != ERROR_SUCCESS)
945
0
      goto _exit;
946
0
  }
947
948
0
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
949
0
  {
950
0
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
951
0
  }
952
953
0
_exit:
954
955
0
  if (re_ast != NULL)
956
0
    yr_re_ast_destroy(re_ast);
957
958
0
  if (remainder_re_ast != NULL)
959
0
    yr_re_ast_destroy(remainder_re_ast);
960
961
0
  return result;
962
0
}
963
964
static int wildcard_iterator(
965
    void* prefix,
966
    size_t prefix_len,
967
    void* _value,
968
    void* data)
969
0
{
970
0
  const char* identifier = (const char*) data;
971
972
  // If the identifier is prefixed by prefix, then it matches the wildcard.
973
0
  if (!strncmp(prefix, identifier, prefix_len))
974
0
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
975
976
0
  return ERROR_SUCCESS;
977
0
}
978
979
int yr_parser_reduce_rule_declaration_phase_1(
980
    yyscan_t yyscanner,
981
    int32_t flags,
982
    const char* identifier,
983
    YR_ARENA_REF* rule_ref)
984
2
{
985
2
  int result;
986
2
  YR_FIXUP* fixup;
987
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
988
989
2
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
990
2
      compiler->arena,
991
2
      YR_NAMESPACES_TABLE,
992
2
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
993
994
2
  if (yr_hash_table_lookup_uint32(
995
2
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
996
2
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
997
0
  {
998
    // A rule or variable with the same identifier already exists, return the
999
    // appropriate error.
1000
1001
0
    yr_compiler_set_error_extra_info(compiler, identifier);
1002
0
    return ERROR_DUPLICATED_IDENTIFIER;
1003
0
  }
1004
1005
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1006
  // any of them are a prefix of the identifier being declared. If so, return
1007
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1008
2
  result = yr_hash_table_iterate(
1009
2
      compiler->wildcard_identifiers_table,
1010
2
      ns->name,
1011
2
      wildcard_iterator,
1012
2
      (void*) identifier);
1013
1014
2
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1015
0
  {
1016
    // This rule matches an existing wildcard rule set.
1017
0
    yr_compiler_set_error_extra_info(compiler, identifier);
1018
0
  }
1019
1020
2
  FAIL_ON_ERROR(result);
1021
1022
2
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1023
2
      compiler->arena,
1024
2
      YR_RULES_TABLE,
1025
2
      sizeof(YR_RULE),
1026
2
      rule_ref,
1027
2
      offsetof(YR_RULE, identifier),
1028
2
      offsetof(YR_RULE, tags),
1029
2
      offsetof(YR_RULE, strings),
1030
2
      offsetof(YR_RULE, metas),
1031
2
      offsetof(YR_RULE, ns),
1032
2
      EOL));
1033
1034
2
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1035
1036
2
  YR_ARENA_REF ref;
1037
1038
2
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1039
1040
2
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1041
2
  rule->flags = flags;
1042
2
  rule->ns = ns;
1043
2
  rule->num_atoms = 0;
1044
1045
2
  YR_ARENA_REF jmp_offset_ref;
1046
1047
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1048
2
  compiler->current_rule_idx = compiler->next_rule_idx;
1049
2
  compiler->next_rule_idx++;
1050
1051
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1052
  // disabled it skips over the rule's code and go straight to the next rule's
1053
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1054
  // is set to 0 as we don't know the jump target yet. When we finish
1055
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1056
  // the jump offset is set to its final value.
1057
1058
2
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1059
2
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1060
1061
2
  FAIL_ON_ERROR(yr_arena_write_data(
1062
2
      compiler->arena,
1063
2
      YR_CODE_SECTION,
1064
2
      &compiler->current_rule_idx,
1065
2
      sizeof(compiler->current_rule_idx),
1066
2
      NULL));
1067
1068
  // Create a fixup entry for the jump and push it in the stack
1069
2
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1070
1071
2
  if (fixup == NULL)
1072
0
    return ERROR_INSUFFICIENT_MEMORY;
1073
1074
2
  fixup->ref = jmp_offset_ref;
1075
2
  fixup->next = compiler->fixup_stack_head;
1076
2
  compiler->fixup_stack_head = fixup;
1077
1078
  // Clean strings_table as we are starting to parse a new rule.
1079
2
  yr_hash_table_clean(compiler->strings_table, NULL);
1080
1081
2
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1082
2
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1083
1084
2
  return ERROR_SUCCESS;
1085
2
}
1086
1087
int yr_parser_reduce_rule_declaration_phase_2(
1088
    yyscan_t yyscanner,
1089
    YR_ARENA_REF* rule_ref)
1090
2
{
1091
2
  uint32_t max_strings_per_rule;
1092
2
  uint32_t strings_in_rule = 0;
1093
1094
2
  YR_FIXUP* fixup;
1095
2
  YR_STRING* string;
1096
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1097
1098
2
  yr_get_configuration_uint32(
1099
2
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1100
1101
2
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1102
1103
  // Show warning if the rule is generating too many atoms. The warning is
1104
  // shown if the number of atoms is greater than 20 times the maximum number
1105
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1106
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1107
1108
2
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1109
0
  {
1110
0
    yywarning(yyscanner, "rule is slowing down scanning");
1111
0
  }
1112
1113
2
  yr_rule_strings_foreach(rule, string)
1114
0
  {
1115
    // Only the heading fragment in a chain of strings (the one with
1116
    // chained_to == NULL) must be referenced. All other fragments
1117
    // are never marked as referenced.
1118
    //
1119
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1120
    // strings must always be referenced.
1121
1122
0
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1123
0
        (STRING_IS_ANONYMOUS(string) ||
1124
0
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1125
0
    {
1126
0
      yr_compiler_set_error_extra_info(
1127
0
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1128
0
    }
1129
1130
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1131
    // that it will match anywhere.
1132
0
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1133
0
        STRING_IS_FIXED_OFFSET(string))
1134
0
    {
1135
0
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1136
0
    }
1137
1138
0
    strings_in_rule++;
1139
1140
0
    if (strings_in_rule > max_strings_per_rule)
1141
0
    {
1142
0
      yr_compiler_set_error_extra_info(
1143
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1144
0
    }
1145
0
  }
1146
1147
2
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1148
2
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1149
1150
2
  fixup = compiler->fixup_stack_head;
1151
1152
2
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1153
2
      compiler->arena, &fixup->ref);
1154
1155
2
  int32_t jmp_offset = yr_arena_get_current_offset(
1156
2
                           compiler->arena, YR_CODE_SECTION) -
1157
2
                       fixup->ref.offset + 1;
1158
1159
2
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1160
1161
  // Remove fixup from the stack.
1162
2
  compiler->fixup_stack_head = fixup->next;
1163
2
  yr_free(fixup);
1164
1165
  // We have finished parsing the current rule set current_rule_idx to
1166
  // UINT32_MAX indicating that we are not currently parsing a rule.
1167
2
  compiler->current_rule_idx = UINT32_MAX;
1168
1169
2
  return ERROR_SUCCESS;
1170
2
}
1171
1172
int yr_parser_reduce_string_identifier(
1173
    yyscan_t yyscanner,
1174
    const char* identifier,
1175
    uint8_t instruction,
1176
    uint64_t at_offset)
1177
0
{
1178
0
  YR_STRING* string;
1179
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1180
1181
0
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1182
0
  {
1183
0
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1184
0
    {
1185
0
      yr_parser_emit_with_arg(
1186
0
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1187
1188
0
      yr_parser_emit(yyscanner, instruction, NULL);
1189
1190
0
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1191
0
          compiler, compiler->current_rule_idx);
1192
1193
0
      yr_rule_strings_foreach(current_rule, string)
1194
0
      {
1195
0
        if (instruction != OP_FOUND)
1196
0
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1197
1198
0
        if (instruction == OP_FOUND_AT)
1199
0
        {
1200
          // Avoid overwriting any previous fixed offset
1201
0
          if (string->fixed_offset == YR_UNDEFINED)
1202
0
            string->fixed_offset = at_offset;
1203
1204
          // If a previous fixed offset was different, disable
1205
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1206
          // have room to store a single fixed offset value
1207
0
          if (string->fixed_offset != at_offset)
1208
0
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1209
0
        }
1210
0
        else
1211
0
        {
1212
0
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1213
0
        }
1214
0
      }
1215
0
    }
1216
0
    else
1217
0
    {
1218
      // Anonymous strings not allowed outside of a loop
1219
0
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1220
0
    }
1221
0
  }
1222
0
  else
1223
0
  {
1224
0
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1225
1226
0
    FAIL_ON_ERROR(
1227
0
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1228
1229
0
    if (instruction != OP_FOUND)
1230
0
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1231
1232
0
    if (instruction == OP_FOUND_AT)
1233
0
    {
1234
      // Avoid overwriting any previous fixed offset
1235
1236
0
      if (string->fixed_offset == YR_UNDEFINED)
1237
0
        string->fixed_offset = at_offset;
1238
1239
      // If a previous fixed offset was different, disable
1240
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1241
      // have room to store a single fixed offset value
1242
1243
0
      if (string->fixed_offset == YR_UNDEFINED ||
1244
0
          string->fixed_offset != at_offset)
1245
0
      {
1246
0
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1247
0
      }
1248
0
    }
1249
0
    else
1250
0
    {
1251
0
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1252
0
    }
1253
1254
0
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1255
1256
0
    string->flags |= STRING_FLAGS_REFERENCED;
1257
0
  }
1258
1259
0
  return ERROR_SUCCESS;
1260
0
}
1261
1262
int yr_parser_reduce_meta_declaration(
1263
    yyscan_t yyscanner,
1264
    int32_t type,
1265
    const char* identifier,
1266
    const char* string,
1267
    int64_t integer,
1268
    YR_ARENA_REF* meta_ref)
1269
0
{
1270
0
  YR_ARENA_REF ref;
1271
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1272
1273
0
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1274
0
      compiler->arena,
1275
0
      YR_METAS_TABLE,
1276
0
      sizeof(YR_META),
1277
0
      meta_ref,
1278
0
      offsetof(YR_META, identifier),
1279
0
      offsetof(YR_META, string),
1280
0
      EOL));
1281
1282
0
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1283
1284
0
  meta->type = type;
1285
0
  meta->integer = integer;
1286
1287
0
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1288
1289
0
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1290
1291
0
  if (string != NULL)
1292
0
  {
1293
0
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1294
1295
0
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1296
0
  }
1297
0
  else
1298
0
  {
1299
0
    meta->string = NULL;
1300
0
  }
1301
1302
0
  compiler->current_meta_idx++;
1303
1304
0
  return ERROR_SUCCESS;
1305
0
}
1306
1307
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1308
2
{
1309
2
  if (module_name->length == 0)
1310
0
    return false;
1311
1312
2
  if (strlen(module_name->c_string) != module_name->length)
1313
0
    return false;
1314
1315
2
  return true;
1316
2
}
1317
1318
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1319
2
{
1320
2
  int result;
1321
1322
2
  YR_ARENA_REF ref;
1323
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1324
2
  YR_OBJECT* module_structure;
1325
1326
2
  if (!_yr_parser_valid_module_name(module_name))
1327
0
  {
1328
0
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1329
1330
0
    return ERROR_INVALID_MODULE_NAME;
1331
0
  }
1332
1333
2
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1334
2
      compiler->arena,
1335
2
      YR_NAMESPACES_TABLE,
1336
2
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1337
1338
2
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1339
2
      compiler->objects_table, module_name->c_string, ns->name);
1340
1341
  // if module already imported, do nothing
1342
1343
2
  if (module_structure != NULL)
1344
0
    return ERROR_SUCCESS;
1345
1346
2
  FAIL_ON_ERROR(yr_object_create(
1347
2
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1348
1349
2
  FAIL_ON_ERROR(yr_hash_table_add(
1350
2
      compiler->objects_table,
1351
2
      module_name->c_string,
1352
2
      ns->name,
1353
2
      module_structure));
1354
1355
2
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1356
1357
2
  if (result == ERROR_UNKNOWN_MODULE)
1358
0
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1359
1360
2
  if (result != ERROR_SUCCESS)
1361
0
    return result;
1362
1363
2
  FAIL_ON_ERROR(
1364
2
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1365
1366
2
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1367
2
      yyscanner,
1368
2
      OP_IMPORT,
1369
2
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1370
2
      NULL,
1371
2
      NULL));
1372
1373
2
  return ERROR_SUCCESS;
1374
2
}
1375
1376
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1377
2
{
1378
2
  int opcode = 0;
1379
1380
2
  switch (expression_type)
1381
2
  {
1382
0
  case EXPRESSION_TYPE_INTEGER:
1383
0
    opcode = OP_INT_BEGIN;
1384
0
    break;
1385
0
  case EXPRESSION_TYPE_FLOAT:
1386
0
    opcode = OP_DBL_BEGIN;
1387
0
    break;
1388
2
  case EXPRESSION_TYPE_STRING:
1389
2
    opcode = OP_STR_BEGIN;
1390
2
    break;
1391
0
  default:
1392
0
    assert(false);
1393
2
  }
1394
1395
2
  if (op[0] == '<')
1396
0
  {
1397
0
    if (op[1] == '=')
1398
0
      opcode += _OP_LE;
1399
0
    else
1400
0
      opcode += _OP_LT;
1401
0
  }
1402
2
  else if (op[0] == '>')
1403
0
  {
1404
0
    if (op[1] == '=')
1405
0
      opcode += _OP_GE;
1406
0
    else
1407
0
      opcode += _OP_GT;
1408
0
  }
1409
2
  else if (op[1] == '=')
1410
2
  {
1411
2
    if (op[0] == '=')
1412
2
      opcode += _OP_EQ;
1413
0
    else
1414
0
      opcode += _OP_NEQ;
1415
2
  }
1416
0
  else if (op[0] == '+')
1417
0
  {
1418
0
    opcode += _OP_ADD;
1419
0
  }
1420
0
  else if (op[0] == '-')
1421
0
  {
1422
0
    opcode += _OP_SUB;
1423
0
  }
1424
0
  else if (op[0] == '*')
1425
0
  {
1426
0
    opcode += _OP_MUL;
1427
0
  }
1428
0
  else if (op[0] == '\\')
1429
0
  {
1430
0
    opcode += _OP_DIV;
1431
0
  }
1432
1433
2
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1434
2
  {
1435
2
    return opcode;
1436
2
  }
1437
1438
0
  return OP_ERROR;
1439
2
}
1440
1441
int yr_parser_reduce_operation(
1442
    yyscan_t yyscanner,
1443
    const char* op,
1444
    YR_EXPRESSION left_operand,
1445
    YR_EXPRESSION right_operand)
1446
2
{
1447
2
  int expression_type;
1448
1449
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1450
1451
2
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1452
2
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1453
2
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1454
0
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1455
0
  {
1456
0
    if (left_operand.type != right_operand.type)
1457
0
    {
1458
      // One operand is double and the other is integer,
1459
      // cast the integer to double
1460
1461
0
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1462
0
          yyscanner,
1463
0
          OP_INT_TO_DBL,
1464
0
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1465
0
          NULL,
1466
0
          NULL));
1467
0
    }
1468
1469
0
    expression_type = EXPRESSION_TYPE_FLOAT;
1470
1471
0
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1472
0
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1473
0
    {
1474
0
      expression_type = EXPRESSION_TYPE_INTEGER;
1475
0
    }
1476
1477
0
    FAIL_ON_ERROR(yr_parser_emit(
1478
0
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1479
0
  }
1480
2
  else if (
1481
2
      left_operand.type == EXPRESSION_TYPE_STRING &&
1482
2
      right_operand.type == EXPRESSION_TYPE_STRING)
1483
2
  {
1484
2
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1485
1486
2
    if (opcode != OP_ERROR)
1487
2
    {
1488
2
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1489
2
    }
1490
0
    else
1491
0
    {
1492
0
      yr_compiler_set_error_extra_info_fmt(
1493
0
          compiler, "strings don't support \"%s\" operation", op);
1494
1495
0
      return ERROR_WRONG_TYPE;
1496
0
    }
1497
2
  }
1498
0
  else
1499
0
  {
1500
0
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1501
1502
0
    return ERROR_WRONG_TYPE;
1503
0
  }
1504
1505
2
  return ERROR_SUCCESS;
1506
2
}