Coverage Report

Created: 2025-11-09 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/yara/libyara/parser.c
Line
Count
Source
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
#include "yara/compiler.h"
47
#include "yara/types.h"
48
49
#define todigit(x)                                        \
50
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
51
                             : ((uint8_t) (x - '0'))
52
53
int yr_parser_emit(
54
    yyscan_t yyscanner,
55
    uint8_t instruction,
56
    YR_ARENA_REF* instruction_ref)
57
105k
{
58
105k
  return yr_arena_write_data(
59
105k
      yyget_extra(yyscanner)->arena,
60
105k
      YR_CODE_SECTION,
61
105k
      &instruction,
62
105k
      sizeof(uint8_t),
63
105k
      instruction_ref);
64
105k
}
65
66
int yr_parser_emit_with_arg_double(
67
    yyscan_t yyscanner,
68
    uint8_t instruction,
69
    double argument,
70
    YR_ARENA_REF* instruction_ref,
71
    YR_ARENA_REF* argument_ref)
72
2.44k
{
73
2.44k
  int result = yr_arena_write_data(
74
2.44k
      yyget_extra(yyscanner)->arena,
75
2.44k
      YR_CODE_SECTION,
76
2.44k
      &instruction,
77
2.44k
      sizeof(uint8_t),
78
2.44k
      instruction_ref);
79
80
2.44k
  if (result == ERROR_SUCCESS)
81
2.44k
    result = yr_arena_write_data(
82
2.44k
        yyget_extra(yyscanner)->arena,
83
2.44k
        YR_CODE_SECTION,
84
2.44k
        &argument,
85
2.44k
        sizeof(double),
86
2.44k
        argument_ref);
87
88
2.44k
  return result;
89
2.44k
}
90
91
int yr_parser_emit_with_arg_int32(
92
    yyscan_t yyscanner,
93
    uint8_t instruction,
94
    int32_t argument,
95
    YR_ARENA_REF* instruction_ref,
96
    YR_ARENA_REF* argument_ref)
97
23.9k
{
98
23.9k
  int result = yr_arena_write_data(
99
23.9k
      yyget_extra(yyscanner)->arena,
100
23.9k
      YR_CODE_SECTION,
101
23.9k
      &instruction,
102
23.9k
      sizeof(uint8_t),
103
23.9k
      instruction_ref);
104
105
23.9k
  if (result == ERROR_SUCCESS)
106
23.9k
    result = yr_arena_write_data(
107
23.9k
        yyget_extra(yyscanner)->arena,
108
23.9k
        YR_CODE_SECTION,
109
23.9k
        &argument,
110
23.9k
        sizeof(int32_t),
111
23.9k
        argument_ref);
112
113
23.9k
  return result;
114
23.9k
}
115
116
int yr_parser_emit_with_arg(
117
    yyscan_t yyscanner,
118
    uint8_t instruction,
119
    int64_t argument,
120
    YR_ARENA_REF* instruction_ref,
121
    YR_ARENA_REF* argument_ref)
122
40.8k
{
123
40.8k
  int result = yr_arena_write_data(
124
40.8k
      yyget_extra(yyscanner)->arena,
125
40.8k
      YR_CODE_SECTION,
126
40.8k
      &instruction,
127
40.8k
      sizeof(uint8_t),
128
40.8k
      instruction_ref);
129
130
40.8k
  if (result == ERROR_SUCCESS)
131
40.8k
    result = yr_arena_write_data(
132
40.8k
        yyget_extra(yyscanner)->arena,
133
40.8k
        YR_CODE_SECTION,
134
40.8k
        &argument,
135
40.8k
        sizeof(int64_t),
136
40.8k
        argument_ref);
137
138
40.8k
  return result;
139
40.8k
}
140
141
int yr_parser_emit_with_arg_reloc(
142
    yyscan_t yyscanner,
143
    uint8_t instruction,
144
    void* argument,
145
    YR_ARENA_REF* instruction_ref,
146
    YR_ARENA_REF* argument_ref)
147
812k
{
148
812k
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
149
150
812k
  DECLARE_REFERENCE(void*, ptr) arg;
151
152
812k
  memset(&arg, 0, sizeof(arg));
153
812k
  arg.ptr = argument;
154
155
812k
  int result = yr_arena_write_data(
156
812k
      yyget_extra(yyscanner)->arena,
157
812k
      YR_CODE_SECTION,
158
812k
      &instruction,
159
812k
      sizeof(uint8_t),
160
812k
      instruction_ref);
161
162
812k
  if (result == ERROR_SUCCESS)
163
812k
    result = yr_arena_write_data(
164
812k
        yyget_extra(yyscanner)->arena,
165
812k
        YR_CODE_SECTION,
166
812k
        &arg,
167
812k
        sizeof(arg),
168
812k
        &ref);
169
170
812k
  if (result == ERROR_SUCCESS)
171
812k
    result = yr_arena_make_ptr_relocatable(
172
812k
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
173
174
812k
  if (argument_ref != NULL)
175
0
    *argument_ref = ref;
176
177
812k
  return result;
178
812k
}
179
180
int yr_parser_emit_pushes_for_strings(
181
    yyscan_t yyscanner,
182
    const char* identifier,
183
    YR_STRING_SET* strings)
184
9.58k
{
185
9.58k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
186
187
9.58k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
188
9.58k
      compiler, compiler->current_rule_idx);
189
190
9.58k
  YR_STRING* string;
191
192
9.58k
  const char* string_identifier;
193
9.58k
  const char* target_identifier;
194
195
9.58k
  strings->count = 0;
196
9.58k
  strings->head = NULL;
197
9.58k
  YR_STRING_SET_ELEMENT** tail_ptr = &strings->head;
198
199
9.58k
  yr_rule_strings_foreach(current_rule, string)
200
812k
  {
201
    // Don't generate pushes for strings chained to another one, we are
202
    // only interested in non-chained strings or the head of the chain.
203
204
812k
    if (string->chained_to == NULL)
205
808k
    {
206
808k
      string_identifier = string->identifier;
207
808k
      target_identifier = identifier;
208
209
1.61M
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
210
808k
             *target_identifier == *string_identifier)
211
808k
      {
212
808k
        target_identifier++;
213
808k
        string_identifier++;
214
808k
      }
215
216
808k
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
217
8.77k
          *target_identifier == '*')
218
801k
      {
219
801k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
220
221
801k
        string->flags |= STRING_FLAGS_REFERENCED;
222
801k
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
223
801k
        strings->count++;
224
225
801k
        *tail_ptr = yr_malloc(sizeof(YR_STRING_SET_ELEMENT));
226
801k
        yr_arena_ptr_to_ref(compiler->arena, string, &((*tail_ptr)->element));
227
801k
        (*tail_ptr)->next = NULL;
228
801k
        tail_ptr = &(*tail_ptr)->next;
229
801k
      }
230
808k
    }
231
812k
  }
232
233
9.58k
  if (strings->count == 0)
234
45
  {
235
45
    yr_compiler_set_error_extra_info(
236
45
        compiler, identifier) return ERROR_UNDEFINED_STRING;
237
45
  }
238
239
9.54k
  return ERROR_SUCCESS;
240
9.58k
}
241
242
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
243
// prefix.
244
int yr_parser_emit_pushes_for_rules(
245
    yyscan_t yyscanner,
246
    const char* prefix,
247
    int* count)
248
1.18k
{
249
1.18k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
250
251
  // Make sure the compiler is parsing a rule
252
1.18k
  assert(compiler->current_rule_idx != UINT32_MAX);
253
254
1.18k
  YR_RULE* rule;
255
1.18k
  int matching = 0;
256
257
1.18k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
258
1.18k
      compiler->arena,
259
1.18k
      YR_NAMESPACES_TABLE,
260
1.18k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
261
262
  // Can't use yr_rules_foreach here as that requires the rules to have been
263
  // finalized (inserting a NULL rule at the end). This is done when
264
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
265
  // into the current position in the code arena. Obviously we aren't done
266
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
267
  // this I'm manually walking all the currently compiled rules (up to the
268
  // current rule index) and comparing identifiers to see if it is one we should
269
  // use.
270
  //
271
  // Further, we have to get compiler->current_rule_idx before we start because
272
  // if we emit an OP_PUSH_RULE
273
1.18k
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
274
275
5.86k
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
276
4.68k
  {
277
    // Is rule->identifier prefixed by prefix?
278
4.68k
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
279
1.18k
    {
280
1.18k
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
281
1.18k
          compiler->rules_table, rule->identifier, ns->name);
282
283
1.18k
      if (rule_idx != UINT32_MAX)
284
1.18k
      {
285
1.18k
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
286
1.18k
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
287
1.18k
        matching++;
288
1.18k
      }
289
1.18k
    }
290
291
4.68k
    rule++;
292
4.68k
  }
293
294
1.18k
  if (count != NULL)
295
1.18k
  {
296
1.18k
    *count = matching;
297
1.18k
  }
298
299
1.18k
  if (matching == 0)
300
126
  {
301
126
    yr_compiler_set_error_extra_info(compiler, prefix);
302
126
    return ERROR_UNDEFINED_IDENTIFIER;
303
126
  }
304
305
1.06k
  return ERROR_SUCCESS;
306
1.18k
}
307
308
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
309
61.0k
{
310
61.0k
  uint8_t opcode[9];
311
61.0k
  int opcode_len = 1;
312
313
61.0k
  if (argument == YR_UNDEFINED)
314
11.0k
  {
315
11.0k
    opcode[0] = OP_PUSH_U;
316
11.0k
  }
317
49.9k
  else if (argument <= 0xff)
318
46.5k
  {
319
46.5k
    opcode[0] = OP_PUSH_8;
320
46.5k
    opcode[1] = (uint8_t) argument;
321
46.5k
    opcode_len += sizeof(uint8_t);
322
46.5k
  }
323
3.45k
  else if (argument <= 0xffff)
324
1.80k
  {
325
1.80k
    opcode[0] = OP_PUSH_16;
326
1.80k
    uint16_t u = (uint16_t) argument;
327
1.80k
    memcpy(opcode + 1, &u, sizeof(uint16_t));
328
1.80k
    opcode_len += sizeof(uint16_t);
329
1.80k
  }
330
1.64k
  else if (argument <= 0xffffffff)
331
970
  {
332
970
    opcode[0] = OP_PUSH_32;
333
970
    uint32_t u = (uint32_t) argument;
334
970
    memcpy(opcode + 1, &u, sizeof(uint32_t));
335
970
    opcode_len += sizeof(uint32_t);
336
970
  }
337
675
  else
338
675
  {
339
675
    opcode[0] = OP_PUSH;
340
675
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
341
675
    opcode_len += sizeof(uint64_t);
342
675
  }
343
344
61.0k
  return yr_arena_write_data(
345
61.0k
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
346
61.0k
}
347
348
int yr_parser_check_types(
349
    YR_COMPILER* compiler,
350
    YR_OBJECT_FUNCTION* function,
351
    const char* actual_args_fmt)
352
346
{
353
346
  int i;
354
355
488
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
356
488
  {
357
488
    if (function->prototypes[i].arguments_fmt == NULL)
358
13
      break;
359
360
475
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
361
333
      return ERROR_SUCCESS;
362
475
  }
363
364
13
  yr_compiler_set_error_extra_info(compiler, function->identifier)
365
366
13
      return ERROR_WRONG_ARGUMENTS;
367
346
}
368
369
int yr_parser_lookup_string(
370
    yyscan_t yyscanner,
371
    const char* identifier,
372
    YR_STRING** string)
373
3.00k
{
374
3.00k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
375
376
3.00k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
377
3.00k
      compiler, compiler->current_rule_idx);
378
379
3.00k
  yr_rule_strings_foreach(current_rule, *string)
380
3.35k
  {
381
    // If some string $a gets fragmented into multiple chained
382
    // strings, all those fragments have the same $a identifier
383
    // but we are interested in the heading fragment, which is
384
    // that with chained_to == NULL
385
386
3.35k
    if ((*string)->chained_to == NULL &&
387
3.19k
        strcmp((*string)->identifier, identifier) == 0)
388
2.96k
    {
389
2.96k
      return ERROR_SUCCESS;
390
2.96k
    }
391
3.35k
  }
392
393
40
  yr_compiler_set_error_extra_info(compiler, identifier)
394
395
40
      * string = NULL;
396
397
40
  return ERROR_UNDEFINED_STRING;
398
3.00k
}
399
400
////////////////////////////////////////////////////////////////////////////////
401
// Searches for a variable with the given identifier in the scope of the current
402
// "for" loop. In case of nested "for" loops the identifier is searched starting
403
// at the top-level loop and going down thorough the nested loops until the
404
// current one. This is ok because inner loops can not re-define an identifier
405
// already defined by an outer loop.
406
//
407
// If the variable is found, the return value is the position that the variable
408
// occupies among all the currently defined variables. If the variable doesn't
409
// exist the return value is -1.
410
//
411
// The function can receive a pointer to a YR_EXPRESSION that will populated
412
// with information about the variable if found. This pointer can be NULL if
413
// the caller is not interested in getting that information.
414
//
415
int yr_parser_lookup_loop_variable(
416
    yyscan_t yyscanner,
417
    const char* identifier,
418
    YR_EXPRESSION* expr)
419
15.7k
{
420
15.7k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
421
15.7k
  int i, j;
422
15.7k
  int var_offset = 0;
423
424
20.4k
  for (i = 0; i <= compiler->loop_index; i++)
425
12.6k
  {
426
12.6k
    var_offset += compiler->loop[i].vars_internal_count;
427
428
20.9k
    for (j = 0; j < compiler->loop[i].vars_count; j++)
429
16.1k
    {
430
16.1k
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
431
15.8k
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
432
7.94k
      {
433
7.94k
        if (expr != NULL)
434
7.91k
          *expr = compiler->loop[i].vars[j];
435
436
7.94k
        return var_offset + j;
437
7.94k
      }
438
16.1k
    }
439
440
4.74k
    var_offset += compiler->loop[i].vars_count;
441
4.74k
  }
442
443
7.77k
  return -1;
444
15.7k
}
445
446
static int _yr_parser_write_string(
447
    const char* identifier,
448
    YR_MODIFIER modifier,
449
    YR_COMPILER* compiler,
450
    SIZED_STRING* str,
451
    RE_AST* re_ast,
452
    YR_ARENA_REF* string_ref,
453
    int* min_atom_quality,
454
    int* num_atom)
455
34.5k
{
456
34.5k
  SIZED_STRING* literal_string;
457
34.5k
  YR_ATOM_LIST_ITEM* atom;
458
34.5k
  YR_ATOM_LIST_ITEM* atom_list = NULL;
459
460
34.5k
  int c, result;
461
34.5k
  int max_string_len;
462
34.5k
  bool free_literal = false;
463
464
34.5k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
465
34.5k
      compiler->arena,
466
34.5k
      YR_STRINGS_TABLE,
467
34.5k
      sizeof(YR_STRING),
468
34.5k
      string_ref,
469
34.5k
      offsetof(YR_STRING, identifier),
470
34.5k
      offsetof(YR_STRING, string),
471
34.5k
      offsetof(YR_STRING, chained_to),
472
34.5k
      EOL));
473
474
34.5k
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
475
34.5k
      compiler->arena, string_ref);
476
477
34.5k
  YR_ARENA_REF ref;
478
479
34.5k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
480
481
34.5k
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
482
34.5k
  string->rule_idx = compiler->current_rule_idx;
483
34.5k
  string->idx = compiler->current_string_idx;
484
34.5k
  string->fixed_offset = YR_UNDEFINED;
485
486
34.5k
  compiler->current_string_idx++;
487
488
34.5k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
489
32.7k
      modifier.flags & STRING_FLAGS_REGEXP ||
490
3.65k
      modifier.flags & STRING_FLAGS_BASE64 ||
491
2.29k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
492
32.8k
  {
493
32.8k
    literal_string = yr_re_ast_extract_literal(re_ast);
494
495
32.8k
    if (literal_string != NULL)
496
25.7k
      free_literal = true;
497
32.8k
  }
498
1.72k
  else
499
1.72k
  {
500
1.72k
    literal_string = str;
501
1.72k
  }
502
503
34.5k
  if (literal_string != NULL)
504
27.4k
  {
505
27.4k
    modifier.flags |= STRING_FLAGS_LITERAL;
506
507
27.4k
    result = _yr_compiler_store_data(
508
27.4k
        compiler,
509
27.4k
        literal_string->c_string,
510
27.4k
        literal_string->length + 1,  // +1 to include terminating NULL
511
27.4k
        &ref);
512
513
27.4k
    if (result != ERROR_SUCCESS)
514
0
      goto cleanup;
515
516
27.4k
    string->length = (uint32_t) literal_string->length;
517
27.4k
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
518
519
27.4k
    if (modifier.flags & STRING_FLAGS_WIDE)
520
1.40k
      max_string_len = string->length * 2;
521
26.0k
    else
522
26.0k
      max_string_len = string->length;
523
524
27.4k
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
525
21.0k
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
526
527
27.4k
    result = yr_atoms_extract_from_string(
528
27.4k
        &compiler->atoms_config,
529
27.4k
        (uint8_t*) literal_string->c_string,
530
27.4k
        (int32_t) literal_string->length,
531
27.4k
        modifier,
532
27.4k
        &atom_list,
533
27.4k
        min_atom_quality);
534
535
27.4k
    if (result != ERROR_SUCCESS)
536
0
      goto cleanup;
537
27.4k
  }
538
7.04k
  else
539
7.04k
  {
540
    // Non-literal strings can't be marked as fixed offset because once we
541
    // find a string atom in the scanned data we don't know the offset where
542
    // the string should start, as the non-literal strings can contain
543
    // variable-length portions.
544
7.04k
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
545
546
    // Save the position where the RE forward code starts for later reference.
547
7.04k
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
548
7.04k
        compiler->arena, YR_RE_CODE_SECTION);
549
550
    // Emit forwards code
551
7.04k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
552
553
7.04k
    if (result != ERROR_SUCCESS)
554
136
      goto cleanup;
555
556
    // Emit backwards code
557
6.90k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
558
559
6.90k
    if (result != ERROR_SUCCESS)
560
7
      goto cleanup;
561
562
    // Extract atoms from the regular expression.
563
6.89k
    result = yr_atoms_extract_from_re(
564
6.89k
        &compiler->atoms_config,
565
6.89k
        re_ast,
566
6.89k
        modifier,
567
6.89k
        &atom_list,
568
6.89k
        min_atom_quality);
569
570
6.89k
    if (result != ERROR_SUCCESS)
571
0
      goto cleanup;
572
573
    // If no atom was extracted let's add a zero-length atom.
574
6.89k
    if (atom_list == NULL)
575
2.39k
    {
576
2.39k
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
577
578
2.39k
      if (atom_list == NULL)
579
0
      {
580
0
        result = ERROR_INSUFFICIENT_MEMORY;
581
0
        goto cleanup;
582
0
      }
583
584
2.39k
      atom_list->atom.length = 0;
585
2.39k
      atom_list->backtrack = 0;
586
2.39k
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
587
2.39k
      atom_list->next = NULL;
588
589
2.39k
      yr_arena_ptr_to_ref(
590
2.39k
          compiler->arena,
591
2.39k
          yr_arena_get_ptr(
592
2.39k
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
593
2.39k
          &(atom_list->forward_code_ref));
594
2.39k
    }
595
6.89k
  }
596
597
34.3k
  string->flags = modifier.flags;
598
599
  // Add the string to Aho-Corasick automaton.
600
34.3k
  result = yr_ac_add_string(
601
34.3k
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
602
603
34.3k
  if (result != ERROR_SUCCESS)
604
0
    goto cleanup;
605
606
34.3k
  atom = atom_list;
607
34.3k
  c = 0;
608
609
1.80M
  while (atom != NULL)
610
1.77M
  {
611
1.77M
    atom = atom->next;
612
1.77M
    c++;
613
1.77M
  }
614
615
34.3k
  (*num_atom) += c;
616
617
34.5k
cleanup:
618
34.5k
  if (free_literal)
619
25.7k
    yr_free(literal_string);
620
621
34.5k
  if (atom_list != NULL)
622
34.3k
    yr_atoms_list_destroy(atom_list);
623
624
34.5k
  return result;
625
34.3k
}
626
627
static int _yr_parser_check_string_modifiers(
628
    yyscan_t yyscanner,
629
    YR_MODIFIER modifier)
630
30.4k
{
631
30.4k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
632
633
  // xor and nocase together is not implemented.
634
30.4k
  if (modifier.flags & STRING_FLAGS_XOR &&
635
563
      modifier.flags & STRING_FLAGS_NO_CASE)
636
0
  {
637
0
    yr_compiler_set_error_extra_info(
638
0
        compiler, "invalid modifier combination: xor nocase");
639
0
    return ERROR_INVALID_MODIFIER;
640
0
  }
641
642
  // base64 and nocase together is not implemented.
643
30.4k
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
644
5.79k
      (modifier.flags & STRING_FLAGS_BASE64 ||
645
5.78k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
646
4
  {
647
4
    yr_compiler_set_error_extra_info(
648
4
        compiler,
649
4
        modifier.flags & STRING_FLAGS_BASE64
650
4
            ? "invalid modifier combination: base64 nocase"
651
4
            : "invalid modifier combination: base64wide nocase");
652
4
    return ERROR_INVALID_MODIFIER;
653
4
  }
654
655
  // base64 and fullword together is not implemented.
656
30.4k
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
657
138
      (modifier.flags & STRING_FLAGS_BASE64 ||
658
138
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
659
0
  {
660
0
    yr_compiler_set_error_extra_info(
661
0
        compiler,
662
0
        modifier.flags & STRING_FLAGS_BASE64
663
0
            ? "invalid modifier combination: base64 fullword"
664
0
            : "invalid modifier combination: base64wide fullword");
665
0
    return ERROR_INVALID_MODIFIER;
666
0
  }
667
668
  // base64 and xor together is not implemented.
669
30.4k
  if (modifier.flags & STRING_FLAGS_XOR &&
670
563
      (modifier.flags & STRING_FLAGS_BASE64 ||
671
563
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
672
7
  {
673
7
    yr_compiler_set_error_extra_info(
674
7
        compiler,
675
7
        modifier.flags & STRING_FLAGS_BASE64
676
7
            ? "invalid modifier combination: base64 xor"
677
7
            : "invalid modifier combination: base64wide xor");
678
7
    return ERROR_INVALID_MODIFIER;
679
7
  }
680
681
30.4k
  return ERROR_SUCCESS;
682
30.4k
}
683
684
int yr_parser_reduce_string_declaration(
685
    yyscan_t yyscanner,
686
    YR_MODIFIER modifier,
687
    const char* identifier,
688
    SIZED_STRING* str,
689
    YR_ARENA_REF* string_ref)
690
30.4k
{
691
30.4k
  int result = ERROR_SUCCESS;
692
30.4k
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
693
30.4k
  int atom_quality;
694
695
30.4k
  char message[512];
696
697
30.4k
  int32_t min_gap = 0;
698
30.4k
  int32_t max_gap = 0;
699
700
30.4k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
701
702
30.4k
  RE_AST* re_ast = NULL;
703
30.4k
  RE_AST* remainder_re_ast = NULL;
704
30.4k
  RE_ERROR re_error;
705
706
30.4k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
707
30.4k
      compiler, compiler->current_rule_idx);
708
709
  // Determine if a string with the same identifier was already defined
710
  // by searching for the identifier in strings_table.
711
30.4k
  uint32_t string_idx = yr_hash_table_lookup_uint32(
712
30.4k
      compiler->strings_table, identifier, NULL);
713
714
  // The string was already defined, return an error.
715
30.4k
  if (string_idx != UINT32_MAX)
716
17
  {
717
17
    yr_compiler_set_error_extra_info(compiler, identifier);
718
17
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
719
17
  }
720
721
  // Empty strings are not allowed.
722
30.4k
  if (str->length == 0)
723
0
  {
724
0
    yr_compiler_set_error_extra_info(compiler, identifier);
725
0
    return ERROR_EMPTY_STRING;
726
0
  }
727
728
30.4k
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
729
5.65k
    modifier.flags |= STRING_FLAGS_NO_CASE;
730
731
30.4k
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
732
154
    modifier.flags |= STRING_FLAGS_DOT_ALL;
733
734
  // Hex strings are always handled as DOT_ALL regexps.
735
30.4k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
736
1.14k
    modifier.flags |= STRING_FLAGS_DOT_ALL;
737
738
30.4k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
739
29.1k
      !(modifier.flags & STRING_FLAGS_BASE64 ||
740
27.9k
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
741
27.7k
  {
742
27.7k
    modifier.flags |= STRING_FLAGS_ASCII;
743
27.7k
  }
744
745
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
746
  // a single match for the string is enough. This is true in
747
  // most cases, except when the string count (#) and string offset (@)
748
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
749
  // initially, and unmarked later if required.
750
30.4k
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
751
752
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
753
  // need to be searched all over the file because the user is using the
754
  // "at" operator. The string must be searched at a fixed offset in the
755
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
756
  // and unmarked later if required.
757
30.4k
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
758
759
  // If string identifier is $ this is an anonymous string, if not add the
760
  // identifier to strings_table.
761
30.4k
  if (strcmp(identifier, "$") == 0)
762
29.2k
  {
763
29.2k
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
764
29.2k
  }
765
1.24k
  else
766
1.24k
  {
767
1.24k
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
768
1.24k
        compiler->strings_table,
769
1.24k
        identifier,
770
1.24k
        NULL,
771
1.24k
        compiler->current_string_idx));
772
1.24k
  }
773
774
  // Make sure that the the string does not have an invalid combination of
775
  // modifiers.
776
30.4k
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
777
778
30.4k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
779
29.3k
      modifier.flags & STRING_FLAGS_REGEXP ||
780
3.65k
      modifier.flags & STRING_FLAGS_BASE64 ||
781
2.29k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
782
28.7k
  {
783
28.7k
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
784
1.14k
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
785
27.5k
    else if (modifier.flags & STRING_FLAGS_REGEXP)
786
25.6k
    {
787
25.6k
      int flags = RE_PARSER_FLAG_NONE;
788
25.6k
      if (compiler->strict_escape)
789
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
790
25.6k
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
791
25.6k
    }
792
1.92k
    else
793
1.92k
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
794
795
28.7k
    if (result != ERROR_SUCCESS)
796
890
    {
797
890
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
798
0
      {
799
0
        yywarning(yyscanner, "unknown escape sequence");
800
0
      }
801
890
      else
802
890
      {
803
890
        snprintf(
804
890
            message,
805
890
            sizeof(message),
806
890
            "invalid %s \"%s\": %s",
807
890
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
808
890
                                                        : "regular expression",
809
890
            identifier,
810
890
            re_error.message);
811
812
890
        yr_compiler_set_error_extra_info(compiler, message);
813
890
        goto _exit;
814
890
      }
815
890
    }
816
817
27.8k
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
818
722
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
819
820
27.8k
    if (re_ast->flags & RE_FLAGS_GREEDY)
821
977
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
822
823
    // Regular expressions in the strings section can't mix greedy and
824
    // ungreedy quantifiers like .* and .*?. That's because these regular
825
    // expressions can be matched forwards and/or backwards depending on the
826
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
827
    // be able to properly calculate the length of the match.
828
829
27.8k
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
830
977
        (re_ast->flags & RE_FLAGS_UNGREEDY))
831
8
    {
832
8
      result = ERROR_INVALID_REGULAR_EXPRESSION;
833
834
8
      yr_compiler_set_error_extra_info(
835
8
          compiler,
836
8
          "greedy and ungreedy quantifiers can't be mixed in a regular "
837
8
          "expression");
838
839
8
      goto _exit;
840
8
    }
841
842
27.8k
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
843
1.67k
    {
844
1.67k
      yywarning(
845
1.67k
          yyscanner,
846
1.67k
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
847
1.67k
          "with a reasonable value for N",
848
1.67k
          identifier);
849
1.67k
    }
850
851
27.8k
    if (compiler->re_ast_callback != NULL)
852
0
    {
853
0
      compiler->re_ast_callback(
854
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
855
0
    }
856
857
27.8k
    *string_ref = YR_ARENA_NULL_REF;
858
859
60.5k
    while (re_ast != NULL)
860
32.8k
    {
861
32.8k
      YR_ARENA_REF ref;
862
863
32.8k
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
864
865
32.8k
      int32_t prev_min_gap = min_gap;
866
32.8k
      int32_t prev_max_gap = max_gap;
867
868
32.8k
      result = yr_re_ast_split_at_chaining_point(
869
32.8k
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
870
871
32.8k
      if (result != ERROR_SUCCESS)
872
0
        goto _exit;
873
874
32.8k
      result = _yr_parser_write_string(
875
32.8k
          identifier,
876
32.8k
          modifier,
877
32.8k
          compiler,
878
32.8k
          NULL,
879
32.8k
          re_ast,
880
32.8k
          &ref,
881
32.8k
          &atom_quality,
882
32.8k
          &current_rule->num_atoms);
883
884
32.8k
      if (result != ERROR_SUCCESS)
885
143
        goto _exit;
886
887
32.6k
      if (atom_quality < min_atom_quality)
888
25.9k
        min_atom_quality = atom_quality;
889
890
32.6k
      if (YR_ARENA_IS_NULL_REF(*string_ref))
891
27.6k
      {
892
        // This is the first string in the chain, the string reference
893
        // returned by this function must point to this string.
894
27.6k
        *string_ref = ref;
895
27.6k
      }
896
4.96k
      else
897
4.96k
      {
898
        // This is not the first string in the chain, set the appropriate
899
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
900
        // fields.
901
4.96k
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
902
4.96k
            compiler->arena,
903
4.96k
            YR_STRINGS_TABLE,
904
4.96k
            prev_string_idx * sizeof(YR_STRING));
905
906
4.96k
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
907
4.96k
            compiler->arena, &ref);
908
909
4.96k
        new_string->chained_to = prev_string;
910
4.96k
        new_string->chain_gap_min = prev_min_gap;
911
4.96k
        new_string->chain_gap_max = prev_max_gap;
912
913
        // A string chained to another one can't have a fixed offset, only the
914
        // head of the string chain can have a fixed offset.
915
4.96k
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
916
917
        // There is a previous string, but that string wasn't marked as part
918
        // of a chain because we can't do that until knowing there will be
919
        // another string, let's flag it now the we know.
920
4.96k
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
921
922
        // There is a previous string, so this string is part of a chain, but
923
        // there will be no more strings because there are no more AST to
924
        // split, which means that this is the chain's tail.
925
4.96k
        if (remainder_re_ast == NULL)
926
1.23k
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
927
1.23k
                               STRING_FLAGS_CHAIN_TAIL;
928
4.96k
      }
929
930
32.6k
      yr_re_ast_destroy(re_ast);
931
32.6k
      re_ast = remainder_re_ast;
932
32.6k
    }
933
27.8k
  }
934
1.72k
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
935
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
936
1.72k
  {
937
1.72k
    result = _yr_parser_write_string(
938
1.72k
        identifier,
939
1.72k
        modifier,
940
1.72k
        compiler,
941
1.72k
        str,
942
1.72k
        NULL,
943
1.72k
        string_ref,
944
1.72k
        &min_atom_quality,
945
1.72k
        &current_rule->num_atoms);
946
947
1.72k
    if (result != ERROR_SUCCESS)
948
0
      goto _exit;
949
1.72k
  }
950
951
29.4k
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
952
7.21k
  {
953
7.21k
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
954
7.21k
  }
955
956
30.4k
_exit:
957
958
30.4k
  if (re_ast != NULL)
959
523
    yr_re_ast_destroy(re_ast);
960
961
30.4k
  if (remainder_re_ast != NULL)
962
1
    yr_re_ast_destroy(remainder_re_ast);
963
964
30.4k
  return result;
965
29.4k
}
966
967
static int wildcard_iterator(
968
    void* prefix,
969
    size_t prefix_len,
970
    void* _value,
971
    void* data)
972
5.84k
{
973
5.84k
  const char* identifier = (const char*) data;
974
975
  // If the identifier is prefixed by prefix, then it matches the wildcard.
976
5.84k
  if (!strncmp(prefix, identifier, prefix_len))
977
236
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
978
979
5.60k
  return ERROR_SUCCESS;
980
5.84k
}
981
982
int yr_parser_reduce_rule_declaration_phase_1(
983
    yyscan_t yyscanner,
984
    int32_t flags,
985
    const char* identifier,
986
    YR_ARENA_REF* rule_ref)
987
34.5k
{
988
34.5k
  int result;
989
34.5k
  YR_FIXUP* fixup;
990
34.5k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
991
992
34.5k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
993
34.5k
      compiler->arena,
994
34.5k
      YR_NAMESPACES_TABLE,
995
34.5k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
996
997
34.5k
  if (yr_hash_table_lookup_uint32(
998
34.5k
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
999
12.4k
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
1000
22.0k
  {
1001
    // A rule or variable with the same identifier already exists, return the
1002
    // appropriate error.
1003
1004
22.0k
    yr_compiler_set_error_extra_info(compiler, identifier);
1005
22.0k
    return ERROR_DUPLICATED_IDENTIFIER;
1006
22.0k
  }
1007
1008
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1009
  // any of them are a prefix of the identifier being declared. If so, return
1010
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1011
12.4k
  result = yr_hash_table_iterate(
1012
12.4k
      compiler->wildcard_identifiers_table,
1013
12.4k
      ns->name,
1014
12.4k
      wildcard_iterator,
1015
12.4k
      (void*) identifier);
1016
1017
12.4k
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1018
236
  {
1019
    // This rule matches an existing wildcard rule set.
1020
236
    yr_compiler_set_error_extra_info(compiler, identifier);
1021
236
  }
1022
1023
12.4k
  FAIL_ON_ERROR(result);
1024
1025
12.2k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1026
12.2k
      compiler->arena,
1027
12.2k
      YR_RULES_TABLE,
1028
12.2k
      sizeof(YR_RULE),
1029
12.2k
      rule_ref,
1030
12.2k
      offsetof(YR_RULE, identifier),
1031
12.2k
      offsetof(YR_RULE, tags),
1032
12.2k
      offsetof(YR_RULE, strings),
1033
12.2k
      offsetof(YR_RULE, metas),
1034
12.2k
      offsetof(YR_RULE, ns),
1035
12.2k
      EOL));
1036
1037
12.2k
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1038
1039
12.2k
  YR_ARENA_REF ref;
1040
1041
12.2k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1042
1043
12.2k
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1044
12.2k
  rule->flags = flags;
1045
12.2k
  rule->ns = ns;
1046
12.2k
  rule->num_atoms = 0;
1047
1048
12.2k
  YR_ARENA_REF jmp_offset_ref;
1049
1050
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1051
12.2k
  compiler->current_rule_idx = compiler->next_rule_idx;
1052
12.2k
  compiler->next_rule_idx++;
1053
1054
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1055
  // disabled it skips over the rule's code and go straight to the next rule's
1056
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1057
  // is set to 0 as we don't know the jump target yet. When we finish
1058
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1059
  // the jump offset is set to its final value.
1060
1061
12.2k
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1062
12.2k
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1063
1064
12.2k
  FAIL_ON_ERROR(yr_arena_write_data(
1065
12.2k
      compiler->arena,
1066
12.2k
      YR_CODE_SECTION,
1067
12.2k
      &compiler->current_rule_idx,
1068
12.2k
      sizeof(compiler->current_rule_idx),
1069
12.2k
      NULL));
1070
1071
  // Create a fixup entry for the jump and push it in the stack
1072
12.2k
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1073
1074
12.2k
  if (fixup == NULL)
1075
0
    return ERROR_INSUFFICIENT_MEMORY;
1076
1077
12.2k
  fixup->ref = jmp_offset_ref;
1078
12.2k
  fixup->next = compiler->fixup_stack_head;
1079
12.2k
  compiler->fixup_stack_head = fixup;
1080
1081
  // Clean strings_table as we are starting to parse a new rule.
1082
12.2k
  yr_hash_table_clean(compiler->strings_table, NULL);
1083
1084
12.2k
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1085
12.2k
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1086
1087
12.2k
  return ERROR_SUCCESS;
1088
12.2k
}
1089
1090
int yr_parser_reduce_rule_declaration_phase_2(
1091
    yyscan_t yyscanner,
1092
    YR_ARENA_REF* rule_ref)
1093
518
{
1094
518
  uint32_t max_strings_per_rule;
1095
518
  uint32_t strings_in_rule = 0;
1096
1097
518
  YR_FIXUP* fixup;
1098
518
  YR_STRING* string;
1099
518
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1100
1101
518
  yr_get_configuration_uint32(
1102
518
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1103
1104
518
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1105
1106
  // Show warning if the rule is generating too many atoms. The warning is
1107
  // shown if the number of atoms is greater than 20 times the maximum number
1108
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1109
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1110
1111
518
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1112
20
  {
1113
20
    yywarning(yyscanner, "rule is slowing down scanning");
1114
20
  }
1115
1116
518
  yr_rule_strings_foreach(rule, string)
1117
2.43k
  {
1118
    // Only the heading fragment in a chain of strings (the one with
1119
    // chained_to == NULL) must be referenced. All other fragments
1120
    // are never marked as referenced.
1121
    //
1122
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1123
    // strings must always be referenced.
1124
1125
2.43k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1126
191
        (STRING_IS_ANONYMOUS(string) ||
1127
184
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1128
21
    {
1129
21
      yr_compiler_set_error_extra_info(
1130
21
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1131
21
    }
1132
1133
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1134
    // that it will match anywhere.
1135
2.41k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1136
170
        STRING_IS_FIXED_OFFSET(string))
1137
110
    {
1138
110
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1139
110
    }
1140
1141
2.41k
    strings_in_rule++;
1142
1143
2.41k
    if (strings_in_rule > max_strings_per_rule)
1144
0
    {
1145
0
      yr_compiler_set_error_extra_info(
1146
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1147
0
    }
1148
2.41k
  }
1149
1150
497
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1151
497
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1152
1153
497
  fixup = compiler->fixup_stack_head;
1154
1155
497
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1156
497
      compiler->arena, &fixup->ref);
1157
1158
497
  int32_t jmp_offset = yr_arena_get_current_offset(
1159
497
                           compiler->arena, YR_CODE_SECTION) -
1160
497
                       fixup->ref.offset + 1;
1161
1162
497
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1163
1164
  // Remove fixup from the stack.
1165
497
  compiler->fixup_stack_head = fixup->next;
1166
497
  yr_free(fixup);
1167
1168
  // We have finished parsing the current rule set current_rule_idx to
1169
  // UINT32_MAX indicating that we are not currently parsing a rule.
1170
497
  compiler->current_rule_idx = UINT32_MAX;
1171
1172
497
  return ERROR_SUCCESS;
1173
497
}
1174
1175
int yr_parser_reduce_string_identifier(
1176
    yyscan_t yyscanner,
1177
    const char* identifier,
1178
    uint8_t instruction,
1179
    uint64_t at_offset)
1180
17.2k
{
1181
17.2k
  YR_STRING* string;
1182
17.2k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1183
1184
17.2k
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1185
14.2k
  {
1186
14.2k
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1187
14.1k
    {
1188
14.1k
      yr_parser_emit_with_arg(
1189
14.1k
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1190
1191
14.1k
      yr_parser_emit(yyscanner, instruction, NULL);
1192
1193
14.1k
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1194
14.1k
          compiler, compiler->current_rule_idx);
1195
1196
14.1k
      yr_rule_strings_foreach(current_rule, string)
1197
815k
      {
1198
815k
        if (instruction != OP_FOUND)
1199
815k
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1200
1201
815k
        if (instruction == OP_FOUND_AT)
1202
1.09k
        {
1203
          // Avoid overwriting any previous fixed offset
1204
1.09k
          if (string->fixed_offset == YR_UNDEFINED)
1205
315
            string->fixed_offset = at_offset;
1206
1207
          // If a previous fixed offset was different, disable
1208
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1209
          // have room to store a single fixed offset value
1210
1.09k
          if (string->fixed_offset != at_offset)
1211
677
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1212
1.09k
        }
1213
814k
        else
1214
814k
        {
1215
814k
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1216
814k
        }
1217
815k
      }
1218
14.1k
    }
1219
99
    else
1220
99
    {
1221
      // Anonymous strings not allowed outside of a loop
1222
99
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1223
99
    }
1224
14.2k
  }
1225
3.00k
  else
1226
3.00k
  {
1227
3.00k
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1228
1229
2.96k
    FAIL_ON_ERROR(
1230
2.96k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1231
1232
2.96k
    if (instruction != OP_FOUND)
1233
2.54k
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1234
1235
2.96k
    if (instruction == OP_FOUND_AT)
1236
589
    {
1237
      // Avoid overwriting any previous fixed offset
1238
1239
589
      if (string->fixed_offset == YR_UNDEFINED)
1240
196
        string->fixed_offset = at_offset;
1241
1242
      // If a previous fixed offset was different, disable
1243
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1244
      // have room to store a single fixed offset value
1245
1246
589
      if (string->fixed_offset == YR_UNDEFINED ||
1247
403
          string->fixed_offset != at_offset)
1248
353
      {
1249
353
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1250
353
      }
1251
589
    }
1252
2.37k
    else
1253
2.37k
    {
1254
2.37k
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1255
2.37k
    }
1256
1257
2.96k
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1258
1259
2.96k
    string->flags |= STRING_FLAGS_REFERENCED;
1260
2.96k
  }
1261
1262
17.1k
  return ERROR_SUCCESS;
1263
17.2k
}
1264
1265
int yr_parser_reduce_meta_declaration(
1266
    yyscan_t yyscanner,
1267
    int32_t type,
1268
    const char* identifier,
1269
    const char* string,
1270
    int64_t integer,
1271
    YR_ARENA_REF* meta_ref)
1272
1.07k
{
1273
1.07k
  YR_ARENA_REF ref;
1274
1.07k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1275
1276
1.07k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1277
1.07k
      compiler->arena,
1278
1.07k
      YR_METAS_TABLE,
1279
1.07k
      sizeof(YR_META),
1280
1.07k
      meta_ref,
1281
1.07k
      offsetof(YR_META, identifier),
1282
1.07k
      offsetof(YR_META, string),
1283
1.07k
      EOL));
1284
1285
1.07k
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1286
1287
1.07k
  meta->type = type;
1288
1.07k
  meta->integer = integer;
1289
1290
1.07k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1291
1292
1.07k
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1293
1294
1.07k
  if (string != NULL)
1295
269
  {
1296
269
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1297
1298
269
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1299
269
  }
1300
804
  else
1301
804
  {
1302
804
    meta->string = NULL;
1303
804
  }
1304
1305
1.07k
  compiler->current_meta_idx++;
1306
1307
1.07k
  return ERROR_SUCCESS;
1308
1.07k
}
1309
1310
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1311
3.37k
{
1312
3.37k
  if (module_name->length == 0)
1313
197
    return false;
1314
1315
3.17k
  if (strlen(module_name->c_string) != module_name->length)
1316
243
    return false;
1317
1318
2.93k
  return true;
1319
3.17k
}
1320
1321
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1322
3.37k
{
1323
3.37k
  int result;
1324
1325
3.37k
  YR_ARENA_REF ref;
1326
3.37k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1327
3.37k
  YR_OBJECT* module_structure;
1328
1329
3.37k
  if (!_yr_parser_valid_module_name(module_name))
1330
440
  {
1331
440
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1332
1333
440
    return ERROR_INVALID_MODULE_NAME;
1334
440
  }
1335
1336
2.93k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1337
2.93k
      compiler->arena,
1338
2.93k
      YR_NAMESPACES_TABLE,
1339
2.93k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1340
1341
2.93k
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1342
2.93k
      compiler->objects_table, module_name->c_string, ns->name);
1343
1344
  // if module already imported, do nothing
1345
1346
2.93k
  if (module_structure != NULL)
1347
2.33k
    return ERROR_SUCCESS;
1348
1349
602
  FAIL_ON_ERROR(yr_object_create(
1350
602
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1351
1352
602
  FAIL_ON_ERROR(yr_hash_table_add(
1353
602
      compiler->objects_table,
1354
602
      module_name->c_string,
1355
602
      ns->name,
1356
602
      module_structure));
1357
1358
602
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1359
1360
602
  if (result == ERROR_UNKNOWN_MODULE)
1361
381
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1362
1363
602
  if (result != ERROR_SUCCESS)
1364
381
    return result;
1365
1366
221
  FAIL_ON_ERROR(
1367
221
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1368
1369
221
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1370
221
      yyscanner,
1371
221
      OP_IMPORT,
1372
221
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1373
221
      NULL,
1374
221
      NULL));
1375
1376
221
  return ERROR_SUCCESS;
1377
221
}
1378
1379
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1380
20.7k
{
1381
20.7k
  int opcode = 0;
1382
1383
20.7k
  switch (expression_type)
1384
20.7k
  {
1385
14.5k
  case EXPRESSION_TYPE_INTEGER:
1386
14.5k
    opcode = OP_INT_BEGIN;
1387
14.5k
    break;
1388
3.45k
  case EXPRESSION_TYPE_FLOAT:
1389
3.45k
    opcode = OP_DBL_BEGIN;
1390
3.45k
    break;
1391
2.67k
  case EXPRESSION_TYPE_STRING:
1392
2.67k
    opcode = OP_STR_BEGIN;
1393
2.67k
    break;
1394
0
  default:
1395
0
    assert(false);
1396
20.7k
  }
1397
1398
20.7k
  if (op[0] == '<')
1399
1.77k
  {
1400
1.77k
    if (op[1] == '=')
1401
498
      opcode += _OP_LE;
1402
1.27k
    else
1403
1.27k
      opcode += _OP_LT;
1404
1.77k
  }
1405
18.9k
  else if (op[0] == '>')
1406
1.45k
  {
1407
1.45k
    if (op[1] == '=')
1408
498
      opcode += _OP_GE;
1409
960
    else
1410
960
      opcode += _OP_GT;
1411
1.45k
  }
1412
17.4k
  else if (op[1] == '=')
1413
1.43k
  {
1414
1.43k
    if (op[0] == '=')
1415
902
      opcode += _OP_EQ;
1416
531
    else
1417
531
      opcode += _OP_NEQ;
1418
1.43k
  }
1419
16.0k
  else if (op[0] == '+')
1420
3.99k
  {
1421
3.99k
    opcode += _OP_ADD;
1422
3.99k
  }
1423
12.0k
  else if (op[0] == '-')
1424
7.90k
  {
1425
7.90k
    opcode += _OP_SUB;
1426
7.90k
  }
1427
4.14k
  else if (op[0] == '*')
1428
2.68k
  {
1429
2.68k
    opcode += _OP_MUL;
1430
2.68k
  }
1431
1.45k
  else if (op[0] == '\\')
1432
1.45k
  {
1433
1.45k
    opcode += _OP_DIV;
1434
1.45k
  }
1435
1436
20.7k
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1437
20.7k
  {
1438
20.7k
    return opcode;
1439
20.7k
  }
1440
1441
8
  return OP_ERROR;
1442
20.7k
}
1443
1444
int yr_parser_reduce_operation(
1445
    yyscan_t yyscanner,
1446
    const char* op,
1447
    YR_EXPRESSION left_operand,
1448
    YR_EXPRESSION right_operand)
1449
20.9k
{
1450
20.9k
  int expression_type;
1451
1452
20.9k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1453
1454
20.9k
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1455
4.55k
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1456
18.0k
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1457
2.62k
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1458
18.0k
  {
1459
18.0k
    if (left_operand.type != right_operand.type)
1460
2.60k
    {
1461
      // One operand is double and the other is integer,
1462
      // cast the integer to double
1463
1464
2.60k
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1465
2.60k
          yyscanner,
1466
2.60k
          OP_INT_TO_DBL,
1467
2.60k
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1468
2.60k
          NULL,
1469
2.60k
          NULL));
1470
2.60k
    }
1471
1472
18.0k
    expression_type = EXPRESSION_TYPE_FLOAT;
1473
1474
18.0k
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1475
16.3k
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1476
14.5k
    {
1477
14.5k
      expression_type = EXPRESSION_TYPE_INTEGER;
1478
14.5k
    }
1479
1480
18.0k
    FAIL_ON_ERROR(yr_parser_emit(
1481
18.0k
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1482
18.0k
  }
1483
2.88k
  else if (
1484
2.88k
      left_operand.type == EXPRESSION_TYPE_STRING &&
1485
2.73k
      right_operand.type == EXPRESSION_TYPE_STRING)
1486
2.67k
  {
1487
2.67k
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1488
1489
2.67k
    if (opcode != OP_ERROR)
1490
2.66k
    {
1491
2.66k
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1492
2.66k
    }
1493
8
    else
1494
8
    {
1495
8
      yr_compiler_set_error_extra_info_fmt(
1496
8
          compiler, "strings don't support \"%s\" operation", op);
1497
1498
8
      return ERROR_WRONG_TYPE;
1499
8
    }
1500
2.67k
  }
1501
205
  else
1502
205
  {
1503
205
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1504
1505
205
    return ERROR_WRONG_TYPE;
1506
205
  }
1507
1508
20.7k
  return ERROR_SUCCESS;
1509
20.9k
}
1510
1511
int yr_parser_mark_nonfast(
1512
   yyscan_t yyscanner,
1513
   YR_STRING_SET string_set
1514
6.64k
) {
1515
6.64k
 YR_COMPILER* compiler = yyget_extra(yyscanner);
1516
1517
6.64k
 YR_STRING_SET_ELEMENT* head = string_set.head;
1518
795k
  while (head != NULL) {
1519
789k
    YR_STRING* string_ptr = yr_arena_ref_to_ptr(compiler->arena, &head->element);
1520
789k
    string_ptr->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1521
789k
    head = head->next;
1522
789k
  }
1523
6.64k
  return ERROR_SUCCESS;
1524
6.64k
}