Coverage Report

Created: 2025-11-24 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/yara/libyara/parser.c
Line
Count
Source
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
#include "yara/compiler.h"
47
#include "yara/types.h"
48
49
#define todigit(x)                                        \
50
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
51
                             : ((uint8_t) (x - '0'))
52
53
int yr_parser_emit(
54
    yyscan_t yyscanner,
55
    uint8_t instruction,
56
    YR_ARENA_REF* instruction_ref)
57
0
{
58
0
  return yr_arena_write_data(
59
0
      yyget_extra(yyscanner)->arena,
60
0
      YR_CODE_SECTION,
61
0
      &instruction,
62
0
      sizeof(uint8_t),
63
0
      instruction_ref);
64
0
}
65
66
int yr_parser_emit_with_arg_double(
67
    yyscan_t yyscanner,
68
    uint8_t instruction,
69
    double argument,
70
    YR_ARENA_REF* instruction_ref,
71
    YR_ARENA_REF* argument_ref)
72
0
{
73
0
  int result = yr_arena_write_data(
74
0
      yyget_extra(yyscanner)->arena,
75
0
      YR_CODE_SECTION,
76
0
      &instruction,
77
0
      sizeof(uint8_t),
78
0
      instruction_ref);
79
80
0
  if (result == ERROR_SUCCESS)
81
0
    result = yr_arena_write_data(
82
0
        yyget_extra(yyscanner)->arena,
83
0
        YR_CODE_SECTION,
84
0
        &argument,
85
0
        sizeof(double),
86
0
        argument_ref);
87
88
0
  return result;
89
0
}
90
91
int yr_parser_emit_with_arg_int32(
92
    yyscan_t yyscanner,
93
    uint8_t instruction,
94
    int32_t argument,
95
    YR_ARENA_REF* instruction_ref,
96
    YR_ARENA_REF* argument_ref)
97
0
{
98
0
  int result = yr_arena_write_data(
99
0
      yyget_extra(yyscanner)->arena,
100
0
      YR_CODE_SECTION,
101
0
      &instruction,
102
0
      sizeof(uint8_t),
103
0
      instruction_ref);
104
105
0
  if (result == ERROR_SUCCESS)
106
0
    result = yr_arena_write_data(
107
0
        yyget_extra(yyscanner)->arena,
108
0
        YR_CODE_SECTION,
109
0
        &argument,
110
0
        sizeof(int32_t),
111
0
        argument_ref);
112
113
0
  return result;
114
0
}
115
116
int yr_parser_emit_with_arg(
117
    yyscan_t yyscanner,
118
    uint8_t instruction,
119
    int64_t argument,
120
    YR_ARENA_REF* instruction_ref,
121
    YR_ARENA_REF* argument_ref)
122
0
{
123
0
  int result = yr_arena_write_data(
124
0
      yyget_extra(yyscanner)->arena,
125
0
      YR_CODE_SECTION,
126
0
      &instruction,
127
0
      sizeof(uint8_t),
128
0
      instruction_ref);
129
130
0
  if (result == ERROR_SUCCESS)
131
0
    result = yr_arena_write_data(
132
0
        yyget_extra(yyscanner)->arena,
133
0
        YR_CODE_SECTION,
134
0
        &argument,
135
0
        sizeof(int64_t),
136
0
        argument_ref);
137
138
0
  return result;
139
0
}
140
141
int yr_parser_emit_with_arg_reloc(
142
    yyscan_t yyscanner,
143
    uint8_t instruction,
144
    void* argument,
145
    YR_ARENA_REF* instruction_ref,
146
    YR_ARENA_REF* argument_ref)
147
1
{
148
1
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
149
150
1
  DECLARE_REFERENCE(void*, ptr) arg;
151
152
1
  memset(&arg, 0, sizeof(arg));
153
1
  arg.ptr = argument;
154
155
1
  int result = yr_arena_write_data(
156
1
      yyget_extra(yyscanner)->arena,
157
1
      YR_CODE_SECTION,
158
1
      &instruction,
159
1
      sizeof(uint8_t),
160
1
      instruction_ref);
161
162
1
  if (result == ERROR_SUCCESS)
163
1
    result = yr_arena_write_data(
164
1
        yyget_extra(yyscanner)->arena,
165
1
        YR_CODE_SECTION,
166
1
        &arg,
167
1
        sizeof(arg),
168
1
        &ref);
169
170
1
  if (result == ERROR_SUCCESS)
171
1
    result = yr_arena_make_ptr_relocatable(
172
1
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
173
174
1
  if (argument_ref != NULL)
175
0
    *argument_ref = ref;
176
177
1
  return result;
178
1
}
179
180
int yr_parser_emit_pushes_for_strings(
181
    yyscan_t yyscanner,
182
    const char* identifier,
183
    YR_STRING_SET* strings)
184
0
{
185
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
186
187
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
188
0
      compiler, compiler->current_rule_idx);
189
190
0
  YR_STRING* string;
191
192
0
  const char* string_identifier;
193
0
  const char* target_identifier;
194
195
0
  strings->count = 0;
196
0
  strings->head = NULL;
197
0
  YR_STRING_SET_ELEMENT** tail_ptr = &strings->head;
198
199
0
  yr_rule_strings_foreach(current_rule, string)
200
0
  {
201
    // Don't generate pushes for strings chained to another one, we are
202
    // only interested in non-chained strings or the head of the chain.
203
204
0
    if (string->chained_to == NULL)
205
0
    {
206
0
      string_identifier = string->identifier;
207
0
      target_identifier = identifier;
208
209
0
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
210
0
             *target_identifier == *string_identifier)
211
0
      {
212
0
        target_identifier++;
213
0
        string_identifier++;
214
0
      }
215
216
0
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
217
0
          *target_identifier == '*')
218
0
      {
219
0
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
220
221
0
        string->flags |= STRING_FLAGS_REFERENCED;
222
0
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
223
0
        strings->count++;
224
225
0
        *tail_ptr = yr_malloc(sizeof(YR_STRING_SET_ELEMENT));
226
0
        yr_arena_ptr_to_ref(compiler->arena, string, &((*tail_ptr)->element));
227
0
        (*tail_ptr)->next = NULL;
228
0
        tail_ptr = &(*tail_ptr)->next;
229
0
      }
230
0
    }
231
0
  }
232
233
0
  if (strings->count == 0)
234
0
  {
235
0
    yr_compiler_set_error_extra_info(
236
0
        compiler, identifier) return ERROR_UNDEFINED_STRING;
237
0
  }
238
239
0
  return ERROR_SUCCESS;
240
0
}
241
242
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
243
// prefix.
244
int yr_parser_emit_pushes_for_rules(
245
    yyscan_t yyscanner,
246
    const char* prefix,
247
    int* count)
248
0
{
249
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
250
251
  // Make sure the compiler is parsing a rule
252
0
  assert(compiler->current_rule_idx != UINT32_MAX);
253
254
0
  YR_RULE* rule;
255
0
  int matching = 0;
256
257
0
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
258
0
      compiler->arena,
259
0
      YR_NAMESPACES_TABLE,
260
0
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
261
262
  // Can't use yr_rules_foreach here as that requires the rules to have been
263
  // finalized (inserting a NULL rule at the end). This is done when
264
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
265
  // into the current position in the code arena. Obviously we aren't done
266
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
267
  // this I'm manually walking all the currently compiled rules (up to the
268
  // current rule index) and comparing identifiers to see if it is one we should
269
  // use.
270
  //
271
  // Further, we have to get compiler->current_rule_idx before we start because
272
  // if we emit an OP_PUSH_RULE
273
0
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
274
275
0
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
276
0
  {
277
    // Is rule->identifier prefixed by prefix?
278
0
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
279
0
    {
280
0
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
281
0
          compiler->rules_table, rule->identifier, ns->name);
282
283
0
      if (rule_idx != UINT32_MAX)
284
0
      {
285
0
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
286
0
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
287
0
        matching++;
288
0
      }
289
0
    }
290
291
0
    rule++;
292
0
  }
293
294
0
  if (count != NULL)
295
0
  {
296
0
    *count = matching;
297
0
  }
298
299
0
  if (matching == 0)
300
0
  {
301
0
    yr_compiler_set_error_extra_info(compiler, prefix);
302
0
    return ERROR_UNDEFINED_IDENTIFIER;
303
0
  }
304
305
0
  return ERROR_SUCCESS;
306
0
}
307
308
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
309
0
{
310
0
  uint8_t opcode[9];
311
0
  int opcode_len = 1;
312
313
0
  if (argument == YR_UNDEFINED)
314
0
  {
315
0
    opcode[0] = OP_PUSH_U;
316
0
  }
317
0
  else if (argument <= 0xff)
318
0
  {
319
0
    opcode[0] = OP_PUSH_8;
320
0
    opcode[1] = (uint8_t) argument;
321
0
    opcode_len += sizeof(uint8_t);
322
0
  }
323
0
  else if (argument <= 0xffff)
324
0
  {
325
0
    opcode[0] = OP_PUSH_16;
326
0
    uint16_t u = (uint16_t) argument;
327
0
    memcpy(opcode + 1, &u, sizeof(uint16_t));
328
0
    opcode_len += sizeof(uint16_t);
329
0
  }
330
0
  else if (argument <= 0xffffffff)
331
0
  {
332
0
    opcode[0] = OP_PUSH_32;
333
0
    uint32_t u = (uint32_t) argument;
334
0
    memcpy(opcode + 1, &u, sizeof(uint32_t));
335
0
    opcode_len += sizeof(uint32_t);
336
0
  }
337
0
  else
338
0
  {
339
0
    opcode[0] = OP_PUSH;
340
0
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
341
0
    opcode_len += sizeof(uint64_t);
342
0
  }
343
344
0
  return yr_arena_write_data(
345
0
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
346
0
}
347
348
int yr_parser_check_types(
349
    YR_COMPILER* compiler,
350
    YR_OBJECT_FUNCTION* function,
351
    const char* actual_args_fmt)
352
0
{
353
0
  int i;
354
355
0
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
356
0
  {
357
0
    if (function->prototypes[i].arguments_fmt == NULL)
358
0
      break;
359
360
0
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
361
0
      return ERROR_SUCCESS;
362
0
  }
363
364
0
  yr_compiler_set_error_extra_info(compiler, function->identifier)
365
366
0
      return ERROR_WRONG_ARGUMENTS;
367
0
}
368
369
int yr_parser_lookup_string(
370
    yyscan_t yyscanner,
371
    const char* identifier,
372
    YR_STRING** string)
373
0
{
374
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
375
376
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
377
0
      compiler, compiler->current_rule_idx);
378
379
0
  yr_rule_strings_foreach(current_rule, *string)
380
0
  {
381
    // If some string $a gets fragmented into multiple chained
382
    // strings, all those fragments have the same $a identifier
383
    // but we are interested in the heading fragment, which is
384
    // that with chained_to == NULL
385
386
0
    if ((*string)->chained_to == NULL &&
387
0
        strcmp((*string)->identifier, identifier) == 0)
388
0
    {
389
0
      return ERROR_SUCCESS;
390
0
    }
391
0
  }
392
393
0
  yr_compiler_set_error_extra_info(compiler, identifier)
394
395
0
      * string = NULL;
396
397
0
  return ERROR_UNDEFINED_STRING;
398
0
}
399
400
////////////////////////////////////////////////////////////////////////////////
401
// Searches for a variable with the given identifier in the scope of the current
402
// "for" loop. In case of nested "for" loops the identifier is searched starting
403
// at the top-level loop and going down thorough the nested loops until the
404
// current one. This is ok because inner loops can not re-define an identifier
405
// already defined by an outer loop.
406
//
407
// If the variable is found, the return value is the position that the variable
408
// occupies among all the currently defined variables. If the variable doesn't
409
// exist the return value is -1.
410
//
411
// The function can receive a pointer to a YR_EXPRESSION that will populated
412
// with information about the variable if found. This pointer can be NULL if
413
// the caller is not interested in getting that information.
414
//
415
int yr_parser_lookup_loop_variable(
416
    yyscan_t yyscanner,
417
    const char* identifier,
418
    YR_EXPRESSION* expr)
419
0
{
420
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
421
0
  int i, j;
422
0
  int var_offset = 0;
423
424
0
  for (i = 0; i <= compiler->loop_index; i++)
425
0
  {
426
0
    var_offset += compiler->loop[i].vars_internal_count;
427
428
0
    for (j = 0; j < compiler->loop[i].vars_count; j++)
429
0
    {
430
0
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
431
0
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
432
0
      {
433
0
        if (expr != NULL)
434
0
          *expr = compiler->loop[i].vars[j];
435
436
0
        return var_offset + j;
437
0
      }
438
0
    }
439
440
0
    var_offset += compiler->loop[i].vars_count;
441
0
  }
442
443
0
  return -1;
444
0
}
445
446
static int _yr_parser_write_string(
447
    const char* identifier,
448
    YR_MODIFIER modifier,
449
    YR_COMPILER* compiler,
450
    SIZED_STRING* str,
451
    RE_AST* re_ast,
452
    YR_ARENA_REF* string_ref,
453
    int* min_atom_quality,
454
    int* num_atom)
455
0
{
456
0
  SIZED_STRING* literal_string;
457
0
  YR_ATOM_LIST_ITEM* atom;
458
0
  YR_ATOM_LIST_ITEM* atom_list = NULL;
459
460
0
  int c, result;
461
0
  int max_string_len;
462
0
  bool free_literal = false;
463
464
0
  FAIL_ON_ERROR(yr_arena_allocate_struct(
465
0
      compiler->arena,
466
0
      YR_STRINGS_TABLE,
467
0
      sizeof(YR_STRING),
468
0
      string_ref,
469
0
      offsetof(YR_STRING, identifier),
470
0
      offsetof(YR_STRING, string),
471
0
      offsetof(YR_STRING, chained_to),
472
0
      EOL));
473
474
0
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
475
0
      compiler->arena, string_ref);
476
477
0
  YR_ARENA_REF ref;
478
479
0
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
480
481
0
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
482
0
  string->rule_idx = compiler->current_rule_idx;
483
0
  string->idx = compiler->current_string_idx;
484
0
  string->fixed_offset = YR_UNDEFINED;
485
486
0
  compiler->current_string_idx++;
487
488
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
489
0
      modifier.flags & STRING_FLAGS_REGEXP ||
490
0
      modifier.flags & STRING_FLAGS_BASE64 ||
491
0
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
492
0
  {
493
0
    literal_string = yr_re_ast_extract_literal(re_ast);
494
495
0
    if (literal_string != NULL)
496
0
      free_literal = true;
497
0
  }
498
0
  else
499
0
  {
500
0
    literal_string = str;
501
0
  }
502
503
0
  if (literal_string != NULL)
504
0
  {
505
0
    modifier.flags |= STRING_FLAGS_LITERAL;
506
507
0
    result = _yr_compiler_store_data(
508
0
        compiler,
509
0
        literal_string->c_string,
510
0
        literal_string->length + 1,  // +1 to include terminating NULL
511
0
        &ref);
512
513
0
    if (result != ERROR_SUCCESS)
514
0
      goto cleanup;
515
516
0
    string->length = (uint32_t) literal_string->length;
517
0
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
518
519
0
    if (modifier.flags & STRING_FLAGS_WIDE)
520
0
      max_string_len = string->length * 2;
521
0
    else
522
0
      max_string_len = string->length;
523
524
0
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
525
0
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
526
527
0
    result = yr_atoms_extract_from_string(
528
0
        &compiler->atoms_config,
529
0
        (uint8_t*) literal_string->c_string,
530
0
        (int32_t) literal_string->length,
531
0
        modifier,
532
0
        &atom_list,
533
0
        min_atom_quality);
534
535
0
    if (result != ERROR_SUCCESS)
536
0
      goto cleanup;
537
0
  }
538
0
  else
539
0
  {
540
    // Non-literal strings can't be marked as fixed offset because once we
541
    // find a string atom in the scanned data we don't know the offset where
542
    // the string should start, as the non-literal strings can contain
543
    // variable-length portions.
544
0
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
545
546
    // Save the position where the RE forward code starts for later reference.
547
0
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
548
0
        compiler->arena, YR_RE_CODE_SECTION);
549
550
    // Emit forwards code
551
0
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
552
553
0
    if (result != ERROR_SUCCESS)
554
0
      goto cleanup;
555
556
    // Emit backwards code
557
0
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
558
559
0
    if (result != ERROR_SUCCESS)
560
0
      goto cleanup;
561
562
    // Extract atoms from the regular expression.
563
0
    result = yr_atoms_extract_from_re(
564
0
        &compiler->atoms_config,
565
0
        re_ast,
566
0
        modifier,
567
0
        &atom_list,
568
0
        min_atom_quality);
569
570
0
    if (result != ERROR_SUCCESS)
571
0
      goto cleanup;
572
573
    // If no atom was extracted let's add a zero-length atom.
574
0
    if (atom_list == NULL)
575
0
    {
576
0
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
577
578
0
      if (atom_list == NULL)
579
0
      {
580
0
        result = ERROR_INSUFFICIENT_MEMORY;
581
0
        goto cleanup;
582
0
      }
583
584
0
      atom_list->atom.length = 0;
585
0
      atom_list->backtrack = 0;
586
0
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
587
0
      atom_list->next = NULL;
588
589
0
      yr_arena_ptr_to_ref(
590
0
          compiler->arena,
591
0
          yr_arena_get_ptr(
592
0
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
593
0
          &(atom_list->forward_code_ref));
594
0
    }
595
0
  }
596
597
0
  string->flags = modifier.flags;
598
599
  // Add the string to Aho-Corasick automaton.
600
0
  result = yr_ac_add_string(
601
0
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
602
603
0
  if (result != ERROR_SUCCESS)
604
0
    goto cleanup;
605
606
0
  atom = atom_list;
607
0
  c = 0;
608
609
0
  while (atom != NULL)
610
0
  {
611
0
    atom = atom->next;
612
0
    c++;
613
0
  }
614
615
0
  (*num_atom) += c;
616
617
0
cleanup:
618
0
  if (free_literal)
619
0
    yr_free(literal_string);
620
621
0
  if (atom_list != NULL)
622
0
    yr_atoms_list_destroy(atom_list);
623
624
0
  return result;
625
0
}
626
627
static int _yr_parser_check_string_modifiers(
628
    yyscan_t yyscanner,
629
    YR_MODIFIER modifier)
630
0
{
631
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
632
633
  // xor and nocase together is not implemented.
634
0
  if (modifier.flags & STRING_FLAGS_XOR &&
635
0
      modifier.flags & STRING_FLAGS_NO_CASE)
636
0
  {
637
0
    yr_compiler_set_error_extra_info(
638
0
        compiler, "invalid modifier combination: xor nocase");
639
0
    return ERROR_INVALID_MODIFIER;
640
0
  }
641
642
  // base64 and nocase together is not implemented.
643
0
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
644
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
645
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
646
0
  {
647
0
    yr_compiler_set_error_extra_info(
648
0
        compiler,
649
0
        modifier.flags & STRING_FLAGS_BASE64
650
0
            ? "invalid modifier combination: base64 nocase"
651
0
            : "invalid modifier combination: base64wide nocase");
652
0
    return ERROR_INVALID_MODIFIER;
653
0
  }
654
655
  // base64 and fullword together is not implemented.
656
0
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
657
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
658
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
659
0
  {
660
0
    yr_compiler_set_error_extra_info(
661
0
        compiler,
662
0
        modifier.flags & STRING_FLAGS_BASE64
663
0
            ? "invalid modifier combination: base64 fullword"
664
0
            : "invalid modifier combination: base64wide fullword");
665
0
    return ERROR_INVALID_MODIFIER;
666
0
  }
667
668
  // base64 and xor together is not implemented.
669
0
  if (modifier.flags & STRING_FLAGS_XOR &&
670
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
671
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
672
0
  {
673
0
    yr_compiler_set_error_extra_info(
674
0
        compiler,
675
0
        modifier.flags & STRING_FLAGS_BASE64
676
0
            ? "invalid modifier combination: base64 xor"
677
0
            : "invalid modifier combination: base64wide xor");
678
0
    return ERROR_INVALID_MODIFIER;
679
0
  }
680
681
0
  return ERROR_SUCCESS;
682
0
}
683
684
int yr_parser_reduce_string_declaration(
685
    yyscan_t yyscanner,
686
    YR_MODIFIER modifier,
687
    const char* identifier,
688
    SIZED_STRING* str,
689
    YR_ARENA_REF* string_ref)
690
0
{
691
0
  int result = ERROR_SUCCESS;
692
0
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
693
0
  int atom_quality;
694
695
0
  char message[512];
696
697
0
  int32_t min_gap = 0;
698
0
  int32_t max_gap = 0;
699
700
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
701
702
0
  RE_AST* re_ast = NULL;
703
0
  RE_AST* remainder_re_ast = NULL;
704
0
  RE_ERROR re_error;
705
706
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
707
0
      compiler, compiler->current_rule_idx);
708
709
  // Determine if a string with the same identifier was already defined
710
  // by searching for the identifier in strings_table.
711
0
  uint32_t string_idx = yr_hash_table_lookup_uint32(
712
0
      compiler->strings_table, identifier, NULL);
713
714
  // The string was already defined, return an error.
715
0
  if (string_idx != UINT32_MAX)
716
0
  {
717
0
    yr_compiler_set_error_extra_info(compiler, identifier);
718
0
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
719
0
  }
720
721
  // Empty strings are not allowed.
722
0
  if (str->length == 0)
723
0
  {
724
0
    yr_compiler_set_error_extra_info(compiler, identifier);
725
0
    return ERROR_EMPTY_STRING;
726
0
  }
727
728
0
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
729
0
    modifier.flags |= STRING_FLAGS_NO_CASE;
730
731
0
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
732
0
    modifier.flags |= STRING_FLAGS_DOT_ALL;
733
734
  // Hex strings are always handled as DOT_ALL regexps.
735
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
736
0
    modifier.flags |= STRING_FLAGS_DOT_ALL;
737
738
0
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
739
0
      !(modifier.flags & STRING_FLAGS_BASE64 ||
740
0
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
741
0
  {
742
0
    modifier.flags |= STRING_FLAGS_ASCII;
743
0
  }
744
745
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
746
  // a single match for the string is enough. This is true in
747
  // most cases, except when the string count (#) and string offset (@)
748
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
749
  // initially, and unmarked later if required.
750
0
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
751
752
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
753
  // need to be searched all over the file because the user is using the
754
  // "at" operator. The string must be searched at a fixed offset in the
755
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
756
  // and unmarked later if required.
757
0
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
758
759
  // If string identifier is $ this is an anonymous string, if not add the
760
  // identifier to strings_table.
761
0
  if (strcmp(identifier, "$") == 0)
762
0
  {
763
0
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
764
0
  }
765
0
  else
766
0
  {
767
0
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
768
0
        compiler->strings_table,
769
0
        identifier,
770
0
        NULL,
771
0
        compiler->current_string_idx));
772
0
  }
773
774
  // Make sure that the the string does not have an invalid combination of
775
  // modifiers.
776
0
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
777
778
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
779
0
      modifier.flags & STRING_FLAGS_REGEXP ||
780
0
      modifier.flags & STRING_FLAGS_BASE64 ||
781
0
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
782
0
  {
783
0
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
784
0
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
785
0
    else if (modifier.flags & STRING_FLAGS_REGEXP)
786
0
    {
787
0
      int flags = RE_PARSER_FLAG_NONE;
788
0
      if (compiler->strict_escape)
789
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
790
0
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
791
0
    }
792
0
    else
793
0
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
794
795
0
    if (result != ERROR_SUCCESS)
796
0
    {
797
0
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
798
0
      {
799
0
        yywarning(yyscanner, "unknown escape sequence");
800
0
      }
801
0
      else
802
0
      {
803
0
        snprintf(
804
0
            message,
805
0
            sizeof(message),
806
0
            "invalid %s \"%s\": %s",
807
0
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
808
0
                                                        : "regular expression",
809
0
            identifier,
810
0
            re_error.message);
811
812
0
        yr_compiler_set_error_extra_info(compiler, message);
813
0
        goto _exit;
814
0
      }
815
0
    }
816
817
0
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
818
0
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
819
820
0
    if (re_ast->flags & RE_FLAGS_GREEDY)
821
0
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
822
823
    // Regular expressions in the strings section can't mix greedy and
824
    // ungreedy quantifiers like .* and .*?. That's because these regular
825
    // expressions can be matched forwards and/or backwards depending on the
826
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
827
    // be able to properly calculate the length of the match.
828
829
0
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
830
0
        (re_ast->flags & RE_FLAGS_UNGREEDY))
831
0
    {
832
0
      result = ERROR_INVALID_REGULAR_EXPRESSION;
833
834
0
      yr_compiler_set_error_extra_info(
835
0
          compiler,
836
0
          "greedy and ungreedy quantifiers can't be mixed in a regular "
837
0
          "expression");
838
839
0
      goto _exit;
840
0
    }
841
842
0
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
843
0
    {
844
0
      yywarning(
845
0
          yyscanner,
846
0
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
847
0
          "with a reasonable value for N",
848
0
          identifier);
849
0
    }
850
851
0
    if (compiler->re_ast_callback != NULL)
852
0
    {
853
0
      compiler->re_ast_callback(
854
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
855
0
    }
856
857
0
    *string_ref = YR_ARENA_NULL_REF;
858
859
0
    while (re_ast != NULL)
860
0
    {
861
0
      YR_ARENA_REF ref;
862
863
0
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
864
865
0
      int32_t prev_min_gap = min_gap;
866
0
      int32_t prev_max_gap = max_gap;
867
868
0
      result = yr_re_ast_split_at_chaining_point(
869
0
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
870
871
0
      if (result != ERROR_SUCCESS)
872
0
        goto _exit;
873
874
0
      result = _yr_parser_write_string(
875
0
          identifier,
876
0
          modifier,
877
0
          compiler,
878
0
          NULL,
879
0
          re_ast,
880
0
          &ref,
881
0
          &atom_quality,
882
0
          &current_rule->num_atoms);
883
884
0
      if (result != ERROR_SUCCESS)
885
0
        goto _exit;
886
887
0
      if (atom_quality < min_atom_quality)
888
0
        min_atom_quality = atom_quality;
889
890
0
      if (YR_ARENA_IS_NULL_REF(*string_ref))
891
0
      {
892
        // This is the first string in the chain, the string reference
893
        // returned by this function must point to this string.
894
0
        *string_ref = ref;
895
0
      }
896
0
      else
897
0
      {
898
        // This is not the first string in the chain, set the appropriate
899
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
900
        // fields.
901
0
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
902
0
            compiler->arena,
903
0
            YR_STRINGS_TABLE,
904
0
            prev_string_idx * sizeof(YR_STRING));
905
906
0
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
907
0
            compiler->arena, &ref);
908
909
0
        new_string->chained_to = prev_string;
910
0
        new_string->chain_gap_min = prev_min_gap;
911
0
        new_string->chain_gap_max = prev_max_gap;
912
913
        // A string chained to another one can't have a fixed offset, only the
914
        // head of the string chain can have a fixed offset.
915
0
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
916
917
        // There is a previous string, but that string wasn't marked as part
918
        // of a chain because we can't do that until knowing there will be
919
        // another string, let's flag it now the we know.
920
0
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
921
922
        // There is a previous string, so this string is part of a chain, but
923
        // there will be no more strings because there are no more AST to
924
        // split, which means that this is the chain's tail.
925
0
        if (remainder_re_ast == NULL)
926
0
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
927
0
                               STRING_FLAGS_CHAIN_TAIL;
928
0
      }
929
930
0
      yr_re_ast_destroy(re_ast);
931
0
      re_ast = remainder_re_ast;
932
0
    }
933
0
  }
934
0
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
935
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
936
0
  {
937
0
    result = _yr_parser_write_string(
938
0
        identifier,
939
0
        modifier,
940
0
        compiler,
941
0
        str,
942
0
        NULL,
943
0
        string_ref,
944
0
        &min_atom_quality,
945
0
        &current_rule->num_atoms);
946
947
0
    if (result != ERROR_SUCCESS)
948
0
      goto _exit;
949
0
  }
950
951
0
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
952
0
  {
953
0
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
954
0
  }
955
956
0
_exit:
957
958
0
  if (re_ast != NULL)
959
0
    yr_re_ast_destroy(re_ast);
960
961
0
  if (remainder_re_ast != NULL)
962
0
    yr_re_ast_destroy(remainder_re_ast);
963
964
0
  return result;
965
0
}
966
967
static int wildcard_iterator(
968
    void* prefix,
969
    size_t prefix_len,
970
    void* _value,
971
    void* data)
972
0
{
973
0
  const char* identifier = (const char*) data;
974
975
  // If the identifier is prefixed by prefix, then it matches the wildcard.
976
0
  if (!strncmp(prefix, identifier, prefix_len))
977
0
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
978
979
0
  return ERROR_SUCCESS;
980
0
}
981
982
int yr_parser_reduce_rule_declaration_phase_1(
983
    yyscan_t yyscanner,
984
    int32_t flags,
985
    const char* identifier,
986
    YR_ARENA_REF* rule_ref)
987
0
{
988
0
  int result;
989
0
  YR_FIXUP* fixup;
990
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
991
992
0
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
993
0
      compiler->arena,
994
0
      YR_NAMESPACES_TABLE,
995
0
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
996
997
0
  if (yr_hash_table_lookup_uint32(
998
0
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
999
0
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
1000
0
  {
1001
    // A rule or variable with the same identifier already exists, return the
1002
    // appropriate error.
1003
1004
0
    yr_compiler_set_error_extra_info(compiler, identifier);
1005
0
    return ERROR_DUPLICATED_IDENTIFIER;
1006
0
  }
1007
1008
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1009
  // any of them are a prefix of the identifier being declared. If so, return
1010
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1011
0
  result = yr_hash_table_iterate(
1012
0
      compiler->wildcard_identifiers_table,
1013
0
      ns->name,
1014
0
      wildcard_iterator,
1015
0
      (void*) identifier);
1016
1017
0
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1018
0
  {
1019
    // This rule matches an existing wildcard rule set.
1020
0
    yr_compiler_set_error_extra_info(compiler, identifier);
1021
0
  }
1022
1023
0
  FAIL_ON_ERROR(result);
1024
1025
0
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1026
0
      compiler->arena,
1027
0
      YR_RULES_TABLE,
1028
0
      sizeof(YR_RULE),
1029
0
      rule_ref,
1030
0
      offsetof(YR_RULE, identifier),
1031
0
      offsetof(YR_RULE, tags),
1032
0
      offsetof(YR_RULE, strings),
1033
0
      offsetof(YR_RULE, metas),
1034
0
      offsetof(YR_RULE, ns),
1035
0
      EOL));
1036
1037
0
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1038
1039
0
  YR_ARENA_REF ref;
1040
1041
0
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1042
1043
0
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1044
0
  rule->flags = flags;
1045
0
  rule->ns = ns;
1046
0
  rule->num_atoms = 0;
1047
1048
0
  YR_ARENA_REF jmp_offset_ref;
1049
1050
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1051
0
  compiler->current_rule_idx = compiler->next_rule_idx;
1052
0
  compiler->next_rule_idx++;
1053
1054
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1055
  // disabled it skips over the rule's code and go straight to the next rule's
1056
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1057
  // is set to 0 as we don't know the jump target yet. When we finish
1058
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1059
  // the jump offset is set to its final value.
1060
1061
0
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1062
0
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1063
1064
0
  FAIL_ON_ERROR(yr_arena_write_data(
1065
0
      compiler->arena,
1066
0
      YR_CODE_SECTION,
1067
0
      &compiler->current_rule_idx,
1068
0
      sizeof(compiler->current_rule_idx),
1069
0
      NULL));
1070
1071
  // Create a fixup entry for the jump and push it in the stack
1072
0
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1073
1074
0
  if (fixup == NULL)
1075
0
    return ERROR_INSUFFICIENT_MEMORY;
1076
1077
0
  fixup->ref = jmp_offset_ref;
1078
0
  fixup->next = compiler->fixup_stack_head;
1079
0
  compiler->fixup_stack_head = fixup;
1080
1081
  // Clean strings_table as we are starting to parse a new rule.
1082
0
  yr_hash_table_clean(compiler->strings_table, NULL);
1083
1084
0
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1085
0
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1086
1087
0
  return ERROR_SUCCESS;
1088
0
}
1089
1090
int yr_parser_reduce_rule_declaration_phase_2(
1091
    yyscan_t yyscanner,
1092
    YR_ARENA_REF* rule_ref)
1093
0
{
1094
0
  uint32_t max_strings_per_rule;
1095
0
  uint32_t strings_in_rule = 0;
1096
1097
0
  YR_FIXUP* fixup;
1098
0
  YR_STRING* string;
1099
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1100
1101
0
  yr_get_configuration_uint32(
1102
0
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1103
1104
0
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1105
1106
  // Show warning if the rule is generating too many atoms. The warning is
1107
  // shown if the number of atoms is greater than 20 times the maximum number
1108
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1109
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1110
1111
0
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1112
0
  {
1113
0
    yywarning(yyscanner, "rule is slowing down scanning");
1114
0
  }
1115
1116
0
  yr_rule_strings_foreach(rule, string)
1117
0
  {
1118
    // Only the heading fragment in a chain of strings (the one with
1119
    // chained_to == NULL) must be referenced. All other fragments
1120
    // are never marked as referenced.
1121
    //
1122
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1123
    // strings must always be referenced.
1124
1125
0
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1126
0
        (STRING_IS_ANONYMOUS(string) ||
1127
0
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1128
0
    {
1129
0
      yr_compiler_set_error_extra_info(
1130
0
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1131
0
    }
1132
1133
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1134
    // that it will match anywhere.
1135
0
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1136
0
        STRING_IS_FIXED_OFFSET(string))
1137
0
    {
1138
0
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1139
0
    }
1140
1141
0
    strings_in_rule++;
1142
1143
0
    if (strings_in_rule > max_strings_per_rule)
1144
0
    {
1145
0
      yr_compiler_set_error_extra_info(
1146
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1147
0
    }
1148
0
  }
1149
1150
0
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1151
0
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1152
1153
0
  fixup = compiler->fixup_stack_head;
1154
1155
0
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1156
0
      compiler->arena, &fixup->ref);
1157
1158
0
  int32_t jmp_offset = yr_arena_get_current_offset(
1159
0
                           compiler->arena, YR_CODE_SECTION) -
1160
0
                       fixup->ref.offset + 1;
1161
1162
0
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1163
1164
  // Remove fixup from the stack.
1165
0
  compiler->fixup_stack_head = fixup->next;
1166
0
  yr_free(fixup);
1167
1168
  // We have finished parsing the current rule set current_rule_idx to
1169
  // UINT32_MAX indicating that we are not currently parsing a rule.
1170
0
  compiler->current_rule_idx = UINT32_MAX;
1171
1172
0
  return ERROR_SUCCESS;
1173
0
}
1174
1175
int yr_parser_reduce_string_identifier(
1176
    yyscan_t yyscanner,
1177
    const char* identifier,
1178
    uint8_t instruction,
1179
    uint64_t at_offset)
1180
0
{
1181
0
  YR_STRING* string;
1182
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1183
1184
0
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1185
0
  {
1186
0
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1187
0
    {
1188
0
      yr_parser_emit_with_arg(
1189
0
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1190
1191
0
      yr_parser_emit(yyscanner, instruction, NULL);
1192
1193
0
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1194
0
          compiler, compiler->current_rule_idx);
1195
1196
0
      yr_rule_strings_foreach(current_rule, string)
1197
0
      {
1198
0
        if (instruction != OP_FOUND)
1199
0
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1200
1201
0
        if (instruction == OP_FOUND_AT)
1202
0
        {
1203
          // Avoid overwriting any previous fixed offset
1204
0
          if (string->fixed_offset == YR_UNDEFINED)
1205
0
            string->fixed_offset = at_offset;
1206
1207
          // If a previous fixed offset was different, disable
1208
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1209
          // have room to store a single fixed offset value
1210
0
          if (string->fixed_offset != at_offset)
1211
0
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1212
0
        }
1213
0
        else
1214
0
        {
1215
0
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1216
0
        }
1217
0
      }
1218
0
    }
1219
0
    else
1220
0
    {
1221
      // Anonymous strings not allowed outside of a loop
1222
0
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1223
0
    }
1224
0
  }
1225
0
  else
1226
0
  {
1227
0
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1228
1229
0
    FAIL_ON_ERROR(
1230
0
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1231
1232
0
    if (instruction != OP_FOUND)
1233
0
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1234
1235
0
    if (instruction == OP_FOUND_AT)
1236
0
    {
1237
      // Avoid overwriting any previous fixed offset
1238
1239
0
      if (string->fixed_offset == YR_UNDEFINED)
1240
0
        string->fixed_offset = at_offset;
1241
1242
      // If a previous fixed offset was different, disable
1243
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1244
      // have room to store a single fixed offset value
1245
1246
0
      if (string->fixed_offset == YR_UNDEFINED ||
1247
0
          string->fixed_offset != at_offset)
1248
0
      {
1249
0
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1250
0
      }
1251
0
    }
1252
0
    else
1253
0
    {
1254
0
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1255
0
    }
1256
1257
0
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1258
1259
0
    string->flags |= STRING_FLAGS_REFERENCED;
1260
0
  }
1261
1262
0
  return ERROR_SUCCESS;
1263
0
}
1264
1265
int yr_parser_reduce_meta_declaration(
1266
    yyscan_t yyscanner,
1267
    int32_t type,
1268
    const char* identifier,
1269
    const char* string,
1270
    int64_t integer,
1271
    YR_ARENA_REF* meta_ref)
1272
0
{
1273
0
  YR_ARENA_REF ref;
1274
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1275
1276
0
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1277
0
      compiler->arena,
1278
0
      YR_METAS_TABLE,
1279
0
      sizeof(YR_META),
1280
0
      meta_ref,
1281
0
      offsetof(YR_META, identifier),
1282
0
      offsetof(YR_META, string),
1283
0
      EOL));
1284
1285
0
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1286
1287
0
  meta->type = type;
1288
0
  meta->integer = integer;
1289
1290
0
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1291
1292
0
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1293
1294
0
  if (string != NULL)
1295
0
  {
1296
0
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1297
1298
0
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1299
0
  }
1300
0
  else
1301
0
  {
1302
0
    meta->string = NULL;
1303
0
  }
1304
1305
0
  compiler->current_meta_idx++;
1306
1307
0
  return ERROR_SUCCESS;
1308
0
}
1309
1310
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1311
1
{
1312
1
  if (module_name->length == 0)
1313
0
    return false;
1314
1315
1
  if (strlen(module_name->c_string) != module_name->length)
1316
0
    return false;
1317
1318
1
  return true;
1319
1
}
1320
1321
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1322
1
{
1323
1
  int result;
1324
1325
1
  YR_ARENA_REF ref;
1326
1
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1327
1
  YR_OBJECT* module_structure;
1328
1329
1
  if (!_yr_parser_valid_module_name(module_name))
1330
0
  {
1331
0
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1332
1333
0
    return ERROR_INVALID_MODULE_NAME;
1334
0
  }
1335
1336
1
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1337
1
      compiler->arena,
1338
1
      YR_NAMESPACES_TABLE,
1339
1
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1340
1341
1
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1342
1
      compiler->objects_table, module_name->c_string, ns->name);
1343
1344
  // if module already imported, do nothing
1345
1346
1
  if (module_structure != NULL)
1347
0
    return ERROR_SUCCESS;
1348
1349
1
  FAIL_ON_ERROR(yr_object_create(
1350
1
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1351
1352
1
  FAIL_ON_ERROR(yr_hash_table_add(
1353
1
      compiler->objects_table,
1354
1
      module_name->c_string,
1355
1
      ns->name,
1356
1
      module_structure));
1357
1358
1
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1359
1360
1
  if (result == ERROR_UNKNOWN_MODULE)
1361
0
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1362
1363
1
  if (result != ERROR_SUCCESS)
1364
0
    return result;
1365
1366
1
  FAIL_ON_ERROR(
1367
1
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1368
1369
1
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1370
1
      yyscanner,
1371
1
      OP_IMPORT,
1372
1
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1373
1
      NULL,
1374
1
      NULL));
1375
1376
1
  return ERROR_SUCCESS;
1377
1
}
1378
1379
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1380
0
{
1381
0
  int opcode = 0;
1382
1383
0
  switch (expression_type)
1384
0
  {
1385
0
  case EXPRESSION_TYPE_INTEGER:
1386
0
    opcode = OP_INT_BEGIN;
1387
0
    break;
1388
0
  case EXPRESSION_TYPE_FLOAT:
1389
0
    opcode = OP_DBL_BEGIN;
1390
0
    break;
1391
0
  case EXPRESSION_TYPE_STRING:
1392
0
    opcode = OP_STR_BEGIN;
1393
0
    break;
1394
0
  default:
1395
0
    assert(false);
1396
0
  }
1397
1398
0
  if (op[0] == '<')
1399
0
  {
1400
0
    if (op[1] == '=')
1401
0
      opcode += _OP_LE;
1402
0
    else
1403
0
      opcode += _OP_LT;
1404
0
  }
1405
0
  else if (op[0] == '>')
1406
0
  {
1407
0
    if (op[1] == '=')
1408
0
      opcode += _OP_GE;
1409
0
    else
1410
0
      opcode += _OP_GT;
1411
0
  }
1412
0
  else if (op[1] == '=')
1413
0
  {
1414
0
    if (op[0] == '=')
1415
0
      opcode += _OP_EQ;
1416
0
    else
1417
0
      opcode += _OP_NEQ;
1418
0
  }
1419
0
  else if (op[0] == '+')
1420
0
  {
1421
0
    opcode += _OP_ADD;
1422
0
  }
1423
0
  else if (op[0] == '-')
1424
0
  {
1425
0
    opcode += _OP_SUB;
1426
0
  }
1427
0
  else if (op[0] == '*')
1428
0
  {
1429
0
    opcode += _OP_MUL;
1430
0
  }
1431
0
  else if (op[0] == '\\')
1432
0
  {
1433
0
    opcode += _OP_DIV;
1434
0
  }
1435
1436
0
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1437
0
  {
1438
0
    return opcode;
1439
0
  }
1440
1441
0
  return OP_ERROR;
1442
0
}
1443
1444
int yr_parser_reduce_operation(
1445
    yyscan_t yyscanner,
1446
    const char* op,
1447
    YR_EXPRESSION left_operand,
1448
    YR_EXPRESSION right_operand)
1449
0
{
1450
0
  int expression_type;
1451
1452
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1453
1454
0
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1455
0
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1456
0
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1457
0
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1458
0
  {
1459
0
    if (left_operand.type != right_operand.type)
1460
0
    {
1461
      // One operand is double and the other is integer,
1462
      // cast the integer to double
1463
1464
0
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1465
0
          yyscanner,
1466
0
          OP_INT_TO_DBL,
1467
0
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1468
0
          NULL,
1469
0
          NULL));
1470
0
    }
1471
1472
0
    expression_type = EXPRESSION_TYPE_FLOAT;
1473
1474
0
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1475
0
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1476
0
    {
1477
0
      expression_type = EXPRESSION_TYPE_INTEGER;
1478
0
    }
1479
1480
0
    FAIL_ON_ERROR(yr_parser_emit(
1481
0
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1482
0
  }
1483
0
  else if (
1484
0
      left_operand.type == EXPRESSION_TYPE_STRING &&
1485
0
      right_operand.type == EXPRESSION_TYPE_STRING)
1486
0
  {
1487
0
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1488
1489
0
    if (opcode != OP_ERROR)
1490
0
    {
1491
0
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1492
0
    }
1493
0
    else
1494
0
    {
1495
0
      yr_compiler_set_error_extra_info_fmt(
1496
0
          compiler, "strings don't support \"%s\" operation", op);
1497
1498
0
      return ERROR_WRONG_TYPE;
1499
0
    }
1500
0
  }
1501
0
  else
1502
0
  {
1503
0
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1504
1505
0
    return ERROR_WRONG_TYPE;
1506
0
  }
1507
1508
0
  return ERROR_SUCCESS;
1509
0
}
1510
1511
int yr_parser_mark_nonfast(
1512
   yyscan_t yyscanner,
1513
   YR_STRING_SET string_set
1514
0
) {
1515
0
 YR_COMPILER* compiler = yyget_extra(yyscanner);
1516
1517
0
 YR_STRING_SET_ELEMENT* head = string_set.head;
1518
0
  while (head != NULL) {
1519
0
    YR_STRING* string_ptr = yr_arena_ref_to_ptr(compiler->arena, &head->element);
1520
0
    string_ptr->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1521
0
    head = head->next;
1522
0
  }
1523
0
  return ERROR_SUCCESS;
1524
0
}