Coverage Report

Created: 2023-06-07 07:18

/src/yara/libyara/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
47
#define todigit(x)                                        \
48
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
49
                             : ((uint8_t) (x - '0'))
50
51
int yr_parser_emit(
52
    yyscan_t yyscanner,
53
    uint8_t instruction,
54
    YR_ARENA_REF* instruction_ref)
55
4
{
56
4
  return yr_arena_write_data(
57
4
      yyget_extra(yyscanner)->arena,
58
4
      YR_CODE_SECTION,
59
4
      &instruction,
60
4
      sizeof(uint8_t),
61
4
      instruction_ref);
62
4
}
63
64
int yr_parser_emit_with_arg_double(
65
    yyscan_t yyscanner,
66
    uint8_t instruction,
67
    double argument,
68
    YR_ARENA_REF* instruction_ref,
69
    YR_ARENA_REF* argument_ref)
70
0
{
71
0
  int result = yr_arena_write_data(
72
0
      yyget_extra(yyscanner)->arena,
73
0
      YR_CODE_SECTION,
74
0
      &instruction,
75
0
      sizeof(uint8_t),
76
0
      instruction_ref);
77
78
0
  if (result == ERROR_SUCCESS)
79
0
    result = yr_arena_write_data(
80
0
        yyget_extra(yyscanner)->arena,
81
0
        YR_CODE_SECTION,
82
0
        &argument,
83
0
        sizeof(double),
84
0
        argument_ref);
85
86
0
  return result;
87
0
}
88
89
int yr_parser_emit_with_arg_int32(
90
    yyscan_t yyscanner,
91
    uint8_t instruction,
92
    int32_t argument,
93
    YR_ARENA_REF* instruction_ref,
94
    YR_ARENA_REF* argument_ref)
95
2
{
96
2
  int result = yr_arena_write_data(
97
2
      yyget_extra(yyscanner)->arena,
98
2
      YR_CODE_SECTION,
99
2
      &instruction,
100
2
      sizeof(uint8_t),
101
2
      instruction_ref);
102
103
2
  if (result == ERROR_SUCCESS)
104
2
    result = yr_arena_write_data(
105
2
        yyget_extra(yyscanner)->arena,
106
2
        YR_CODE_SECTION,
107
2
        &argument,
108
2
        sizeof(int32_t),
109
2
        argument_ref);
110
111
2
  return result;
112
2
}
113
114
int yr_parser_emit_with_arg(
115
    yyscan_t yyscanner,
116
    uint8_t instruction,
117
    int64_t argument,
118
    YR_ARENA_REF* instruction_ref,
119
    YR_ARENA_REF* argument_ref)
120
2
{
121
2
  int result = yr_arena_write_data(
122
2
      yyget_extra(yyscanner)->arena,
123
2
      YR_CODE_SECTION,
124
2
      &instruction,
125
2
      sizeof(uint8_t),
126
2
      instruction_ref);
127
128
2
  if (result == ERROR_SUCCESS)
129
2
    result = yr_arena_write_data(
130
2
        yyget_extra(yyscanner)->arena,
131
2
        YR_CODE_SECTION,
132
2
        &argument,
133
2
        sizeof(int64_t),
134
2
        argument_ref);
135
136
2
  return result;
137
2
}
138
139
int yr_parser_emit_with_arg_reloc(
140
    yyscan_t yyscanner,
141
    uint8_t instruction,
142
    void* argument,
143
    YR_ARENA_REF* instruction_ref,
144
    YR_ARENA_REF* argument_ref)
145
8
{
146
8
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
147
148
8
  DECLARE_REFERENCE(void*, ptr) arg;
149
150
8
  memset(&arg, 0, sizeof(arg));
151
8
  arg.ptr = argument;
152
153
8
  int result = yr_arena_write_data(
154
8
      yyget_extra(yyscanner)->arena,
155
8
      YR_CODE_SECTION,
156
8
      &instruction,
157
8
      sizeof(uint8_t),
158
8
      instruction_ref);
159
160
8
  if (result == ERROR_SUCCESS)
161
8
    result = yr_arena_write_data(
162
8
        yyget_extra(yyscanner)->arena,
163
8
        YR_CODE_SECTION,
164
8
        &arg,
165
8
        sizeof(arg),
166
8
        &ref);
167
168
8
  if (result == ERROR_SUCCESS)
169
8
    result = yr_arena_make_ptr_relocatable(
170
8
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
171
172
8
  if (argument_ref != NULL)
173
0
    *argument_ref = ref;
174
175
8
  return result;
176
8
}
177
178
int yr_parser_emit_pushes_for_strings(
179
    yyscan_t yyscanner,
180
    const char* identifier,
181
    int* count)
182
0
{
183
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
184
185
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
186
0
      compiler, compiler->current_rule_idx);
187
188
0
  YR_STRING* string;
189
190
0
  const char* string_identifier;
191
0
  const char* target_identifier;
192
193
0
  int matching = 0;
194
195
0
  yr_rule_strings_foreach(current_rule, string)
196
0
  {
197
    // Don't generate pushes for strings chained to another one, we are
198
    // only interested in non-chained strings or the head of the chain.
199
200
0
    if (string->chained_to == NULL)
201
0
    {
202
0
      string_identifier = string->identifier;
203
0
      target_identifier = identifier;
204
205
0
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
206
0
             *target_identifier == *string_identifier)
207
0
      {
208
0
        target_identifier++;
209
0
        string_identifier++;
210
0
      }
211
212
0
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
213
0
          *target_identifier == '*')
214
0
      {
215
0
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
216
217
0
        string->flags |= STRING_FLAGS_REFERENCED;
218
0
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
219
0
        matching++;
220
0
      }
221
0
    }
222
0
  }
223
224
0
  if (count != NULL)
225
0
  {
226
0
    *count = matching;
227
0
  }
228
229
0
  if (matching == 0)
230
0
  {
231
0
    yr_compiler_set_error_extra_info(
232
0
        compiler, identifier) return ERROR_UNDEFINED_STRING;
233
0
  }
234
235
0
  return ERROR_SUCCESS;
236
0
}
237
238
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
239
// prefix.
240
int yr_parser_emit_pushes_for_rules(
241
    yyscan_t yyscanner,
242
    const char* prefix,
243
    int* count)
244
0
{
245
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
246
247
  // Make sure the compiler is parsing a rule
248
0
  assert(compiler->current_rule_idx != UINT32_MAX);
249
250
0
  YR_RULE* rule;
251
0
  int matching = 0;
252
253
0
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
254
0
      compiler->arena,
255
0
      YR_NAMESPACES_TABLE,
256
0
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
257
258
  // Can't use yr_rules_foreach here as that requires the rules to have been
259
  // finalized (inserting a NULL rule at the end). This is done when
260
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
261
  // into the current position in the code arena. Obviously we aren't done
262
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
263
  // this I'm manually walking all the currently compiled rules (up to the
264
  // current rule index) and comparing identifiers to see if it is one we should
265
  // use.
266
  //
267
  // Further, we have to get compiler->current_rule_idx before we start because
268
  // if we emit an OP_PUSH_RULE
269
0
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
270
271
0
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
272
0
  {
273
    // Is rule->identifier prefixed by prefix?
274
0
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
275
0
    {
276
0
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
277
0
          compiler->rules_table, rule->identifier, ns->name);
278
279
0
      if (rule_idx != UINT32_MAX)
280
0
      {
281
0
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
282
0
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
283
0
        matching++;
284
0
      }
285
0
    }
286
287
0
    rule++;
288
0
  }
289
290
0
  if (count != NULL)
291
0
  {
292
0
    *count = matching;
293
0
  }
294
295
0
  if (matching == 0)
296
0
  {
297
0
    yr_compiler_set_error_extra_info(compiler, prefix);
298
0
    return ERROR_UNDEFINED_IDENTIFIER;
299
0
  }
300
301
0
  return ERROR_SUCCESS;
302
0
}
303
304
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
305
0
{
306
0
  uint8_t opcode[9];
307
0
  int opcode_len = 1;
308
309
0
  if (argument == YR_UNDEFINED)
310
0
  {
311
0
    opcode[0] = OP_PUSH_U;
312
0
  }
313
0
  else if (argument <= 0xff)
314
0
  {
315
0
    opcode[0] = OP_PUSH_8;
316
0
    opcode[1] = (uint8_t) argument;
317
0
    opcode_len += sizeof(uint8_t);
318
0
  }
319
0
  else if (argument <= 0xffff)
320
0
  {
321
0
    opcode[0] = OP_PUSH_16;
322
0
    uint16_t u = (uint16_t) argument;
323
0
    memcpy(opcode + 1, &u, sizeof(uint16_t));
324
0
    opcode_len += sizeof(uint16_t);
325
0
  }
326
0
  else if (argument <= 0xffffffff)
327
0
  {
328
0
    opcode[0] = OP_PUSH_32;
329
0
    uint32_t u = (uint32_t) argument;
330
0
    memcpy(opcode + 1, &u, sizeof(uint32_t));
331
0
    opcode_len += sizeof(uint32_t);
332
0
  }
333
0
  else
334
0
  {
335
0
    opcode[0] = OP_PUSH;
336
0
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
337
0
    opcode_len += sizeof(uint64_t);
338
0
  }
339
340
0
  return yr_arena_write_data(
341
0
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
342
0
}
343
344
int yr_parser_check_types(
345
    YR_COMPILER* compiler,
346
    YR_OBJECT_FUNCTION* function,
347
    const char* actual_args_fmt)
348
0
{
349
0
  int i;
350
351
0
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
352
0
  {
353
0
    if (function->prototypes[i].arguments_fmt == NULL)
354
0
      break;
355
356
0
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
357
0
      return ERROR_SUCCESS;
358
0
  }
359
360
0
  yr_compiler_set_error_extra_info(compiler, function->identifier)
361
362
0
      return ERROR_WRONG_ARGUMENTS;
363
0
}
364
365
int yr_parser_lookup_string(
366
    yyscan_t yyscanner,
367
    const char* identifier,
368
    YR_STRING** string)
369
0
{
370
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
371
372
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
373
0
      compiler, compiler->current_rule_idx);
374
375
0
  yr_rule_strings_foreach(current_rule, *string)
376
0
  {
377
    // If some string $a gets fragmented into multiple chained
378
    // strings, all those fragments have the same $a identifier
379
    // but we are interested in the heading fragment, which is
380
    // that with chained_to == NULL
381
382
0
    if ((*string)->chained_to == NULL &&
383
0
        strcmp((*string)->identifier, identifier) == 0)
384
0
    {
385
0
      return ERROR_SUCCESS;
386
0
    }
387
0
  }
388
389
0
  yr_compiler_set_error_extra_info(compiler, identifier)
390
391
0
      * string = NULL;
392
393
0
  return ERROR_UNDEFINED_STRING;
394
0
}
395
396
////////////////////////////////////////////////////////////////////////////////
397
// Searches for a variable with the given identifier in the scope of the current
398
// "for" loop. In case of nested "for" loops the identifier is searched starting
399
// at the top-level loop and going down thorough the nested loops until the
400
// current one. This is ok because inner loops can not re-define an identifier
401
// already defined by an outer loop.
402
//
403
// If the variable is found, the return value is the position that the variable
404
// occupies among all the currently defined variables. If the variable doesn't
405
// exist the return value is -1.
406
//
407
// The function can receive a pointer to a YR_EXPRESSION that will populated
408
// with information about the variable if found. This pointer can be NULL if
409
// the caller is not interested in getting that information.
410
//
411
int yr_parser_lookup_loop_variable(
412
    yyscan_t yyscanner,
413
    const char* identifier,
414
    YR_EXPRESSION* expr)
415
2
{
416
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
417
2
  int i, j;
418
2
  int var_offset = 0;
419
420
2
  for (i = 0; i <= compiler->loop_index; i++)
421
0
  {
422
0
    var_offset += compiler->loop[i].vars_internal_count;
423
424
0
    for (j = 0; j < compiler->loop[i].vars_count; j++)
425
0
    {
426
0
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
427
0
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
428
0
      {
429
0
        if (expr != NULL)
430
0
          *expr = compiler->loop[i].vars[j];
431
432
0
        return var_offset + j;
433
0
      }
434
0
    }
435
436
0
    var_offset += compiler->loop[i].vars_count;
437
0
  }
438
439
2
  return -1;
440
2
}
441
442
static int _yr_parser_write_string(
443
    const char* identifier,
444
    YR_MODIFIER modifier,
445
    YR_COMPILER* compiler,
446
    SIZED_STRING* str,
447
    RE_AST* re_ast,
448
    YR_ARENA_REF* string_ref,
449
    int* min_atom_quality,
450
    int* num_atom)
451
0
{
452
0
  SIZED_STRING* literal_string;
453
0
  YR_ATOM_LIST_ITEM* atom;
454
0
  YR_ATOM_LIST_ITEM* atom_list = NULL;
455
456
0
  int c, result;
457
0
  int max_string_len;
458
0
  bool free_literal = false;
459
460
0
  FAIL_ON_ERROR(yr_arena_allocate_struct(
461
0
      compiler->arena,
462
0
      YR_STRINGS_TABLE,
463
0
      sizeof(YR_STRING),
464
0
      string_ref,
465
0
      offsetof(YR_STRING, identifier),
466
0
      offsetof(YR_STRING, string),
467
0
      offsetof(YR_STRING, chained_to),
468
0
      EOL));
469
470
0
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
471
0
      compiler->arena, string_ref);
472
473
0
  YR_ARENA_REF ref;
474
475
0
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
476
477
0
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
478
479
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
480
0
      modifier.flags & STRING_FLAGS_REGEXP ||
481
0
      modifier.flags & STRING_FLAGS_BASE64 ||
482
0
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
483
0
  {
484
0
    literal_string = yr_re_ast_extract_literal(re_ast);
485
486
0
    if (literal_string != NULL)
487
0
      free_literal = true;
488
0
  }
489
0
  else
490
0
  {
491
0
    literal_string = str;
492
0
  }
493
494
0
  if (literal_string != NULL)
495
0
  {
496
0
    modifier.flags |= STRING_FLAGS_LITERAL;
497
498
0
    result = _yr_compiler_store_data(
499
0
        compiler,
500
0
        literal_string->c_string,
501
0
        literal_string->length + 1,  // +1 to include terminating NULL
502
0
        &ref);
503
504
0
    string->length = (uint32_t) literal_string->length;
505
0
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
506
507
0
    if (result == ERROR_SUCCESS)
508
0
    {
509
0
      result = yr_atoms_extract_from_string(
510
0
          &compiler->atoms_config,
511
0
          (uint8_t*) literal_string->c_string,
512
0
          (int32_t) literal_string->length,
513
0
          modifier,
514
0
          &atom_list,
515
0
          min_atom_quality);
516
0
    }
517
0
  }
518
0
  else
519
0
  {
520
    // Non-literal strings can't be marked as fixed offset because once we
521
    // find a string atom in the scanned data we don't know the offset where
522
    // the string should start, as the non-literal strings can contain
523
    // variable-length portions.
524
0
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
525
526
    // Emit forwards code
527
0
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
528
529
    // Emit backwards code
530
0
    if (result == ERROR_SUCCESS)
531
0
      result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
532
533
0
    if (result == ERROR_SUCCESS)
534
0
      result = yr_atoms_extract_from_re(
535
0
          &compiler->atoms_config,
536
0
          re_ast,
537
0
          modifier,
538
0
          &atom_list,
539
0
          min_atom_quality);
540
0
  }
541
542
0
  string->flags = modifier.flags;
543
0
  string->rule_idx = compiler->current_rule_idx;
544
0
  string->idx = compiler->current_string_idx;
545
0
  string->fixed_offset = YR_UNDEFINED;
546
547
0
  if (result == ERROR_SUCCESS)
548
0
  {
549
    // Add the string to Aho-Corasick automaton.
550
0
    result = yr_ac_add_string(
551
0
        compiler->automaton,
552
0
        string,
553
0
        compiler->current_string_idx,
554
0
        atom_list,
555
0
        compiler->arena);
556
0
  }
557
558
0
  if (modifier.flags & STRING_FLAGS_LITERAL)
559
0
  {
560
0
    if (modifier.flags & STRING_FLAGS_WIDE)
561
0
      max_string_len = string->length * 2;
562
0
    else
563
0
      max_string_len = string->length;
564
565
0
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
566
0
      string->flags |= STRING_FLAGS_FITS_IN_ATOM;
567
0
  }
568
569
0
  atom = atom_list;
570
0
  c = 0;
571
572
0
  while (atom != NULL)
573
0
  {
574
0
    atom = atom->next;
575
0
    c++;
576
0
  }
577
578
0
  (*num_atom) += c;
579
580
0
  compiler->current_string_idx++;
581
582
0
  if (free_literal)
583
0
    yr_free(literal_string);
584
585
0
  if (atom_list != NULL)
586
0
    yr_atoms_list_destroy(atom_list);
587
588
0
  return result;
589
0
}
590
591
static int _yr_parser_check_string_modifiers(
592
    yyscan_t yyscanner,
593
    YR_MODIFIER modifier)
594
0
{
595
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
596
597
  // xor and nocase together is not implemented.
598
0
  if (modifier.flags & STRING_FLAGS_XOR &&
599
0
      modifier.flags & STRING_FLAGS_NO_CASE)
600
0
  {
601
0
    yr_compiler_set_error_extra_info(
602
0
        compiler, "invalid modifier combination: xor nocase");
603
0
    return ERROR_INVALID_MODIFIER;
604
0
  }
605
606
  // base64 and nocase together is not implemented.
607
0
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
608
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
609
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
610
0
  {
611
0
    yr_compiler_set_error_extra_info(
612
0
        compiler,
613
0
        modifier.flags & STRING_FLAGS_BASE64
614
0
            ? "invalid modifier combination: base64 nocase"
615
0
            : "invalid modifier combination: base64wide nocase");
616
0
    return ERROR_INVALID_MODIFIER;
617
0
  }
618
619
  // base64 and fullword together is not implemented.
620
0
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
621
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
622
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
623
0
  {
624
0
    yr_compiler_set_error_extra_info(
625
0
        compiler,
626
0
        modifier.flags & STRING_FLAGS_BASE64
627
0
            ? "invalid modifier combination: base64 fullword"
628
0
            : "invalid modifier combination: base64wide fullword");
629
0
    return ERROR_INVALID_MODIFIER;
630
0
  }
631
632
  // base64 and xor together is not implemented.
633
0
  if (modifier.flags & STRING_FLAGS_XOR &&
634
0
      (modifier.flags & STRING_FLAGS_BASE64 ||
635
0
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
636
0
  {
637
0
    yr_compiler_set_error_extra_info(
638
0
        compiler,
639
0
        modifier.flags & STRING_FLAGS_BASE64
640
0
            ? "invalid modifier combination: base64 xor"
641
0
            : "invalid modifier combination: base64wide xor");
642
0
    return ERROR_INVALID_MODIFIER;
643
0
  }
644
645
0
  return ERROR_SUCCESS;
646
0
}
647
648
int yr_parser_reduce_string_declaration(
649
    yyscan_t yyscanner,
650
    YR_MODIFIER modifier,
651
    const char* identifier,
652
    SIZED_STRING* str,
653
    YR_ARENA_REF* string_ref)
654
0
{
655
0
  int result = ERROR_SUCCESS;
656
0
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
657
0
  int atom_quality;
658
659
0
  char message[512];
660
661
0
  int32_t min_gap = 0;
662
0
  int32_t max_gap = 0;
663
664
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
665
666
0
  RE_AST* re_ast = NULL;
667
0
  RE_AST* remainder_re_ast = NULL;
668
0
  RE_ERROR re_error;
669
670
0
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
671
0
      compiler, compiler->current_rule_idx);
672
673
  // Determine if a string with the same identifier was already defined
674
  // by searching for the identifier in strings_table.
675
0
  uint32_t string_idx = yr_hash_table_lookup_uint32(
676
0
      compiler->strings_table, identifier, NULL);
677
678
  // The string was already defined, return an error.
679
0
  if (string_idx != UINT32_MAX)
680
0
  {
681
0
    yr_compiler_set_error_extra_info(compiler, identifier);
682
0
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
683
0
  }
684
685
  // Empty strings are not allowed.
686
0
  if (str->length == 0)
687
0
  {
688
0
    yr_compiler_set_error_extra_info(compiler, identifier);
689
0
    return ERROR_EMPTY_STRING;
690
0
  }
691
692
0
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
693
0
    modifier.flags |= STRING_FLAGS_NO_CASE;
694
695
0
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
696
0
    modifier.flags |= STRING_FLAGS_DOT_ALL;
697
698
  // Hex strings are always handled as DOT_ALL regexps.
699
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
700
0
    modifier.flags |= STRING_FLAGS_DOT_ALL;
701
702
0
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
703
0
      !(modifier.flags & STRING_FLAGS_BASE64 ||
704
0
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
705
0
  {
706
0
    modifier.flags |= STRING_FLAGS_ASCII;
707
0
  }
708
709
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
710
  // a single match for the string is enough. This is true in
711
  // most cases, except when the string count (#) and string offset (@)
712
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
713
  // initially, and unmarked later if required.
714
0
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
715
716
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
717
  // need to be searched all over the file because the user is using the
718
  // "at" operator. The string must be searched at a fixed offset in the
719
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
720
  // and unmarked later if required.
721
0
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
722
723
  // If string identifier is $ this is an anonymous string, if not add the
724
  // identifier to strings_table.
725
0
  if (strcmp(identifier, "$") == 0)
726
0
  {
727
0
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
728
0
  }
729
0
  else
730
0
  {
731
0
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
732
0
        compiler->strings_table,
733
0
        identifier,
734
0
        NULL,
735
0
        compiler->current_string_idx));
736
0
  }
737
738
  // Make sure that the the string does not have an invalid combination of
739
  // modifiers.
740
0
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
741
742
0
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
743
0
      modifier.flags & STRING_FLAGS_REGEXP ||
744
0
      modifier.flags & STRING_FLAGS_BASE64 ||
745
0
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
746
0
  {
747
0
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
748
0
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
749
0
    else if (modifier.flags & STRING_FLAGS_REGEXP)
750
0
      result = yr_re_parse(str->c_string, &re_ast, &re_error);
751
0
    else
752
0
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
753
754
0
    if (result != ERROR_SUCCESS)
755
0
    {
756
0
      snprintf(
757
0
          message,
758
0
          sizeof(message),
759
0
          "invalid %s \"%s\": %s",
760
0
          (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
761
0
                                                      : "regular expression",
762
0
          identifier,
763
0
          re_error.message);
764
765
0
      yr_compiler_set_error_extra_info(compiler, message);
766
0
      goto _exit;
767
0
    }
768
769
0
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
770
0
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
771
772
0
    if (re_ast->flags & RE_FLAGS_GREEDY)
773
0
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
774
775
    // Regular expressions in the strings section can't mix greedy and
776
    // ungreedy quantifiers like .* and .*?. That's because these regular
777
    // expressions can be matched forwards and/or backwards depending on the
778
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
779
    // be able to properly calculate the length of the match.
780
781
0
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
782
0
        (re_ast->flags & RE_FLAGS_UNGREEDY))
783
0
    {
784
0
      result = ERROR_INVALID_REGULAR_EXPRESSION;
785
786
0
      yr_compiler_set_error_extra_info(
787
0
          compiler,
788
0
          "greedy and ungreedy quantifiers can't be mixed in a regular "
789
0
          "expression");
790
791
0
      goto _exit;
792
0
    }
793
794
0
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
795
0
    {
796
0
      yywarning(
797
0
          yyscanner,
798
0
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
799
0
          "with a reasonable value for N",
800
0
          identifier);
801
0
    }
802
803
0
    if (compiler->re_ast_callback != NULL)
804
0
    {
805
0
      compiler->re_ast_callback(
806
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
807
0
    }
808
809
0
    *string_ref = YR_ARENA_NULL_REF;
810
811
0
    while (re_ast != NULL)
812
0
    {
813
0
      YR_ARENA_REF ref;
814
815
0
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
816
817
0
      int32_t prev_min_gap = min_gap;
818
0
      int32_t prev_max_gap = max_gap;
819
820
0
      result = yr_re_ast_split_at_chaining_point(
821
0
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
822
823
0
      if (result != ERROR_SUCCESS)
824
0
        goto _exit;
825
826
0
      result = _yr_parser_write_string(
827
0
          identifier,
828
0
          modifier,
829
0
          compiler,
830
0
          NULL,
831
0
          re_ast,
832
0
          &ref,
833
0
          &atom_quality,
834
0
          &current_rule->num_atoms);
835
836
0
      if (result != ERROR_SUCCESS)
837
0
        goto _exit;
838
839
0
      if (atom_quality < min_atom_quality)
840
0
        min_atom_quality = atom_quality;
841
842
0
      if (YR_ARENA_IS_NULL_REF(*string_ref))
843
0
      {
844
        // This is the first string in the chain, the string reference
845
        // returned by this function must point to this string.
846
0
        *string_ref = ref;
847
0
      }
848
0
      else
849
0
      {
850
        // This is not the first string in the chain, set the appropriate
851
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
852
        // fields.
853
0
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
854
0
            compiler->arena,
855
0
            YR_STRINGS_TABLE,
856
0
            prev_string_idx * sizeof(YR_STRING));
857
858
0
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
859
0
            compiler->arena, &ref);
860
861
0
        new_string->chained_to = prev_string;
862
0
        new_string->chain_gap_min = prev_min_gap;
863
0
        new_string->chain_gap_max = prev_max_gap;
864
865
        // A string chained to another one can't have a fixed offset, only the
866
        // head of the string chain can have a fixed offset.
867
0
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
868
869
        // There is a previous string, but that string wasn't marked as part
870
        // of a chain because we can't do that until knowing there will be
871
        // another string, let's flag it now the we know.
872
0
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
873
874
        // There is a previous string, so this string is part of a chain, but
875
        // there will be no more strings because there are no more AST to
876
        // split, which means that this is the chain's tail.
877
0
        if (remainder_re_ast == NULL)
878
0
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
879
0
                               STRING_FLAGS_CHAIN_TAIL;
880
0
      }
881
882
0
      yr_re_ast_destroy(re_ast);
883
0
      re_ast = remainder_re_ast;
884
0
    }
885
0
  }
886
0
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
887
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
888
0
  {
889
0
    result = _yr_parser_write_string(
890
0
        identifier,
891
0
        modifier,
892
0
        compiler,
893
0
        str,
894
0
        NULL,
895
0
        string_ref,
896
0
        &min_atom_quality,
897
0
        &current_rule->num_atoms);
898
899
0
    if (result != ERROR_SUCCESS)
900
0
      goto _exit;
901
0
  }
902
903
0
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
904
0
  {
905
0
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
906
0
  }
907
908
0
_exit:
909
910
0
  if (re_ast != NULL)
911
0
    yr_re_ast_destroy(re_ast);
912
913
0
  if (remainder_re_ast != NULL)
914
0
    yr_re_ast_destroy(remainder_re_ast);
915
916
0
  return result;
917
0
}
918
919
static int wildcard_iterator(
920
    void* prefix,
921
    size_t prefix_len,
922
    void* _value,
923
    void* data)
924
0
{
925
0
  const char* identifier = (const char*) data;
926
927
  // If the identifier is prefixed by prefix, then it matches the wildcard.
928
0
  if (!strncmp(prefix, identifier, prefix_len))
929
0
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
930
931
0
  return ERROR_SUCCESS;
932
0
}
933
934
int yr_parser_reduce_rule_declaration_phase_1(
935
    yyscan_t yyscanner,
936
    int32_t flags,
937
    const char* identifier,
938
    YR_ARENA_REF* rule_ref)
939
2
{
940
2
  int result;
941
2
  YR_FIXUP* fixup;
942
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
943
944
2
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
945
2
      compiler->arena,
946
2
      YR_NAMESPACES_TABLE,
947
2
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
948
949
2
  if (yr_hash_table_lookup_uint32(
950
2
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
951
2
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
952
0
  {
953
    // A rule or variable with the same identifier already exists, return the
954
    // appropriate error.
955
956
0
    yr_compiler_set_error_extra_info(compiler, identifier);
957
0
    return ERROR_DUPLICATED_IDENTIFIER;
958
0
  }
959
960
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
961
  // any of them are a prefix of the identifier being declared. If so, return
962
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
963
2
  result = yr_hash_table_iterate(
964
2
      compiler->wildcard_identifiers_table,
965
2
      ns->name,
966
2
      wildcard_iterator,
967
2
      (void*) identifier);
968
969
2
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
970
0
  {
971
    // This rule matches an existing wildcard rule set.
972
0
    yr_compiler_set_error_extra_info(compiler, identifier);
973
0
  }
974
975
2
  FAIL_ON_ERROR(result);
976
977
2
  FAIL_ON_ERROR(yr_arena_allocate_struct(
978
2
      compiler->arena,
979
2
      YR_RULES_TABLE,
980
2
      sizeof(YR_RULE),
981
2
      rule_ref,
982
2
      offsetof(YR_RULE, identifier),
983
2
      offsetof(YR_RULE, tags),
984
2
      offsetof(YR_RULE, strings),
985
2
      offsetof(YR_RULE, metas),
986
2
      offsetof(YR_RULE, ns),
987
2
      EOL));
988
989
2
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
990
991
2
  YR_ARENA_REF ref;
992
993
2
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
994
995
2
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
996
2
  rule->flags = flags;
997
2
  rule->ns = ns;
998
2
  rule->num_atoms = 0;
999
1000
2
  YR_ARENA_REF jmp_offset_ref;
1001
1002
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1003
2
  compiler->current_rule_idx = compiler->next_rule_idx;
1004
2
  compiler->next_rule_idx++;
1005
1006
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1007
  // disabled it skips over the rule's code and go straight to the next rule's
1008
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1009
  // is set to 0 as we don't know the jump target yet. When we finish
1010
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1011
  // the jump offset is set to its final value.
1012
1013
2
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1014
2
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1015
1016
2
  FAIL_ON_ERROR(yr_arena_write_data(
1017
2
      compiler->arena,
1018
2
      YR_CODE_SECTION,
1019
2
      &compiler->current_rule_idx,
1020
2
      sizeof(compiler->current_rule_idx),
1021
2
      NULL));
1022
1023
  // Create a fixup entry for the jump and push it in the stack
1024
2
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1025
1026
2
  if (fixup == NULL)
1027
0
    return ERROR_INSUFFICIENT_MEMORY;
1028
1029
2
  fixup->ref = jmp_offset_ref;
1030
2
  fixup->next = compiler->fixup_stack_head;
1031
2
  compiler->fixup_stack_head = fixup;
1032
1033
  // Clean strings_table as we are starting to parse a new rule.
1034
2
  yr_hash_table_clean(compiler->strings_table, NULL);
1035
1036
2
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1037
2
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1038
1039
2
  return ERROR_SUCCESS;
1040
2
}
1041
1042
int yr_parser_reduce_rule_declaration_phase_2(
1043
    yyscan_t yyscanner,
1044
    YR_ARENA_REF* rule_ref)
1045
2
{
1046
2
  uint32_t max_strings_per_rule;
1047
2
  uint32_t strings_in_rule = 0;
1048
1049
2
  YR_FIXUP* fixup;
1050
2
  YR_STRING* string;
1051
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1052
1053
2
  yr_get_configuration_uint32(
1054
2
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1055
1056
2
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1057
1058
  // Show warning if the rule is generating too many atoms. The warning is
1059
  // shown if the number of atoms is greater than 20 times the maximum number
1060
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1061
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1062
1063
2
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1064
0
  {
1065
0
    yywarning(yyscanner, "rule is slowing down scanning");
1066
0
  }
1067
1068
2
  yr_rule_strings_foreach(rule, string)
1069
0
  {
1070
    // Only the heading fragment in a chain of strings (the one with
1071
    // chained_to == NULL) must be referenced. All other fragments
1072
    // are never marked as referenced.
1073
1074
0
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL)
1075
0
    {
1076
0
      yr_compiler_set_error_extra_info(
1077
0
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1078
0
    }
1079
1080
0
    strings_in_rule++;
1081
1082
0
    if (strings_in_rule > max_strings_per_rule)
1083
0
    {
1084
0
      yr_compiler_set_error_extra_info(
1085
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1086
0
    }
1087
0
  }
1088
1089
2
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1090
2
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1091
1092
2
  fixup = compiler->fixup_stack_head;
1093
1094
2
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1095
2
      compiler->arena, &fixup->ref);
1096
1097
2
  int32_t jmp_offset = yr_arena_get_current_offset(
1098
2
                           compiler->arena, YR_CODE_SECTION) -
1099
2
                       fixup->ref.offset + 1;
1100
1101
2
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1102
1103
  // Remove fixup from the stack.
1104
2
  compiler->fixup_stack_head = fixup->next;
1105
2
  yr_free(fixup);
1106
1107
  // We have finished parsing the current rule set current_rule_idx to
1108
  // UINT32_MAX indicating that we are not currently parsing a rule.
1109
2
  compiler->current_rule_idx = UINT32_MAX;
1110
1111
2
  return ERROR_SUCCESS;
1112
2
}
1113
1114
int yr_parser_reduce_string_identifier(
1115
    yyscan_t yyscanner,
1116
    const char* identifier,
1117
    uint8_t instruction,
1118
    uint64_t at_offset)
1119
0
{
1120
0
  YR_STRING* string;
1121
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1122
1123
0
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1124
0
  {
1125
0
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1126
0
    {
1127
0
      yr_parser_emit_with_arg(
1128
0
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1129
1130
0
      yr_parser_emit(yyscanner, instruction, NULL);
1131
1132
0
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1133
0
          compiler, compiler->current_rule_idx);
1134
1135
0
      yr_rule_strings_foreach(current_rule, string)
1136
0
      {
1137
0
        if (instruction != OP_FOUND)
1138
0
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1139
1140
0
        if (instruction == OP_FOUND_AT)
1141
0
        {
1142
          // Avoid overwriting any previous fixed offset
1143
0
          if (string->fixed_offset == YR_UNDEFINED)
1144
0
            string->fixed_offset = at_offset;
1145
1146
          // If a previous fixed offset was different, disable
1147
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1148
          // have room to store a single fixed offset value
1149
0
          if (string->fixed_offset != at_offset)
1150
0
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1151
0
        }
1152
0
        else
1153
0
        {
1154
0
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1155
0
        }
1156
0
      }
1157
0
    }
1158
0
    else
1159
0
    {
1160
      // Anonymous strings not allowed outside of a loop
1161
0
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1162
0
    }
1163
0
  }
1164
0
  else
1165
0
  {
1166
0
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1167
1168
0
    FAIL_ON_ERROR(
1169
0
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1170
1171
0
    if (instruction != OP_FOUND)
1172
0
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1173
1174
0
    if (instruction == OP_FOUND_AT)
1175
0
    {
1176
      // Avoid overwriting any previous fixed offset
1177
1178
0
      if (string->fixed_offset == YR_UNDEFINED)
1179
0
        string->fixed_offset = at_offset;
1180
1181
      // If a previous fixed offset was different, disable
1182
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1183
      // have room to store a single fixed offset value
1184
1185
0
      if (string->fixed_offset == YR_UNDEFINED ||
1186
0
          string->fixed_offset != at_offset)
1187
0
      {
1188
0
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1189
0
      }
1190
0
    }
1191
0
    else
1192
0
    {
1193
0
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1194
0
    }
1195
1196
0
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1197
1198
0
    string->flags |= STRING_FLAGS_REFERENCED;
1199
0
  }
1200
1201
0
  return ERROR_SUCCESS;
1202
0
}
1203
1204
int yr_parser_reduce_meta_declaration(
1205
    yyscan_t yyscanner,
1206
    int32_t type,
1207
    const char* identifier,
1208
    const char* string,
1209
    int64_t integer,
1210
    YR_ARENA_REF* meta_ref)
1211
0
{
1212
0
  YR_ARENA_REF ref;
1213
0
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1214
1215
0
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1216
0
      compiler->arena,
1217
0
      YR_METAS_TABLE,
1218
0
      sizeof(YR_META),
1219
0
      meta_ref,
1220
0
      offsetof(YR_META, identifier),
1221
0
      offsetof(YR_META, string),
1222
0
      EOL));
1223
1224
0
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1225
1226
0
  meta->type = type;
1227
0
  meta->integer = integer;
1228
1229
0
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1230
1231
0
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1232
1233
0
  if (string != NULL)
1234
0
  {
1235
0
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1236
1237
0
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1238
0
  }
1239
0
  else
1240
0
  {
1241
0
    meta->string = NULL;
1242
0
  }
1243
1244
0
  compiler->current_meta_idx++;
1245
1246
0
  return ERROR_SUCCESS;
1247
0
}
1248
1249
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1250
2
{
1251
2
  if (module_name->length == 0)
1252
0
    return false;
1253
1254
2
  if (strlen(module_name->c_string) != module_name->length)
1255
0
    return false;
1256
1257
2
  return true;
1258
2
}
1259
1260
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1261
2
{
1262
2
  int result;
1263
1264
2
  YR_ARENA_REF ref;
1265
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1266
2
  YR_OBJECT* module_structure;
1267
1268
2
  if (!_yr_parser_valid_module_name(module_name))
1269
0
  {
1270
0
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1271
1272
0
    return ERROR_INVALID_MODULE_NAME;
1273
0
  }
1274
1275
2
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1276
2
      compiler->arena,
1277
2
      YR_NAMESPACES_TABLE,
1278
2
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1279
1280
2
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1281
2
      compiler->objects_table, module_name->c_string, ns->name);
1282
1283
  // if module already imported, do nothing
1284
1285
2
  if (module_structure != NULL)
1286
0
    return ERROR_SUCCESS;
1287
1288
2
  FAIL_ON_ERROR(yr_object_create(
1289
2
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1290
1291
2
  FAIL_ON_ERROR(yr_hash_table_add(
1292
2
      compiler->objects_table,
1293
2
      module_name->c_string,
1294
2
      ns->name,
1295
2
      module_structure));
1296
1297
2
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1298
1299
2
  if (result == ERROR_UNKNOWN_MODULE)
1300
0
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1301
1302
2
  if (result != ERROR_SUCCESS)
1303
0
    return result;
1304
1305
2
  FAIL_ON_ERROR(
1306
2
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1307
1308
2
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1309
2
      yyscanner,
1310
2
      OP_IMPORT,
1311
2
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1312
2
      NULL,
1313
2
      NULL));
1314
1315
2
  return ERROR_SUCCESS;
1316
2
}
1317
1318
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1319
2
{
1320
2
  int opcode = 0;
1321
1322
2
  switch (expression_type)
1323
2
  {
1324
0
  case EXPRESSION_TYPE_INTEGER:
1325
0
    opcode = OP_INT_BEGIN;
1326
0
    break;
1327
0
  case EXPRESSION_TYPE_FLOAT:
1328
0
    opcode = OP_DBL_BEGIN;
1329
0
    break;
1330
2
  case EXPRESSION_TYPE_STRING:
1331
2
    opcode = OP_STR_BEGIN;
1332
2
    break;
1333
0
  default:
1334
0
    assert(false);
1335
2
  }
1336
1337
2
  if (op[0] == '<')
1338
0
  {
1339
0
    if (op[1] == '=')
1340
0
      opcode += _OP_LE;
1341
0
    else
1342
0
      opcode += _OP_LT;
1343
0
  }
1344
2
  else if (op[0] == '>')
1345
0
  {
1346
0
    if (op[1] == '=')
1347
0
      opcode += _OP_GE;
1348
0
    else
1349
0
      opcode += _OP_GT;
1350
0
  }
1351
2
  else if (op[1] == '=')
1352
2
  {
1353
2
    if (op[0] == '=')
1354
2
      opcode += _OP_EQ;
1355
0
    else
1356
0
      opcode += _OP_NEQ;
1357
2
  }
1358
0
  else if (op[0] == '+')
1359
0
  {
1360
0
    opcode += _OP_ADD;
1361
0
  }
1362
0
  else if (op[0] == '-')
1363
0
  {
1364
0
    opcode += _OP_SUB;
1365
0
  }
1366
0
  else if (op[0] == '*')
1367
0
  {
1368
0
    opcode += _OP_MUL;
1369
0
  }
1370
0
  else if (op[0] == '\\')
1371
0
  {
1372
0
    opcode += _OP_DIV;
1373
0
  }
1374
1375
2
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1376
2
  {
1377
2
    return opcode;
1378
2
  }
1379
1380
0
  return OP_ERROR;
1381
2
}
1382
1383
int yr_parser_reduce_operation(
1384
    yyscan_t yyscanner,
1385
    const char* op,
1386
    YR_EXPRESSION left_operand,
1387
    YR_EXPRESSION right_operand)
1388
2
{
1389
2
  int expression_type;
1390
1391
2
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1392
1393
2
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1394
2
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1395
2
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1396
0
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1397
0
  {
1398
0
    if (left_operand.type != right_operand.type)
1399
0
    {
1400
      // One operand is double and the other is integer,
1401
      // cast the integer to double
1402
1403
0
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1404
0
          yyscanner,
1405
0
          OP_INT_TO_DBL,
1406
0
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1407
0
          NULL,
1408
0
          NULL));
1409
0
    }
1410
1411
0
    expression_type = EXPRESSION_TYPE_FLOAT;
1412
1413
0
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1414
0
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1415
0
    {
1416
0
      expression_type = EXPRESSION_TYPE_INTEGER;
1417
0
    }
1418
1419
0
    FAIL_ON_ERROR(yr_parser_emit(
1420
0
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1421
0
  }
1422
2
  else if (
1423
2
      left_operand.type == EXPRESSION_TYPE_STRING &&
1424
2
      right_operand.type == EXPRESSION_TYPE_STRING)
1425
2
  {
1426
2
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1427
1428
2
    if (opcode != OP_ERROR)
1429
2
    {
1430
2
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1431
2
    }
1432
0
    else
1433
0
    {
1434
0
      yr_compiler_set_error_extra_info_fmt(
1435
0
          compiler, "strings don't support \"%s\" operation", op);
1436
1437
0
      return ERROR_WRONG_TYPE;
1438
0
    }
1439
2
  }
1440
0
  else
1441
0
  {
1442
0
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1443
1444
0
    return ERROR_WRONG_TYPE;
1445
0
  }
1446
1447
2
  return ERROR_SUCCESS;
1448
2
}