Coverage Report

Created: 2026-04-01 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/yara/libyara/parser.c
Line
Count
Source
1
/*
2
Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <limits.h>
31
#include <stddef.h>
32
#include <string.h>
33
#include <yara/ahocorasick.h>
34
#include <yara/arena.h>
35
#include <yara/base64.h>
36
#include <yara/error.h>
37
#include <yara/exec.h>
38
#include <yara/integers.h>
39
#include <yara/mem.h>
40
#include <yara/modules.h>
41
#include <yara/object.h>
42
#include <yara/parser.h>
43
#include <yara/re.h>
44
#include <yara/strutils.h>
45
#include <yara/utils.h>
46
#include "yara/compiler.h"
47
#include "yara/types.h"
48
49
#define todigit(x)                                        \
50
  ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \
51
                             : ((uint8_t) (x - '0'))
52
53
int yr_parser_emit(
54
    yyscan_t yyscanner,
55
    uint8_t instruction,
56
    YR_ARENA_REF* instruction_ref)
57
103k
{
58
103k
  return yr_arena_write_data(
59
103k
      yyget_extra(yyscanner)->arena,
60
103k
      YR_CODE_SECTION,
61
103k
      &instruction,
62
103k
      sizeof(uint8_t),
63
103k
      instruction_ref);
64
103k
}
65
66
int yr_parser_emit_with_arg_double(
67
    yyscan_t yyscanner,
68
    uint8_t instruction,
69
    double argument,
70
    YR_ARENA_REF* instruction_ref,
71
    YR_ARENA_REF* argument_ref)
72
2.79k
{
73
2.79k
  int result = yr_arena_write_data(
74
2.79k
      yyget_extra(yyscanner)->arena,
75
2.79k
      YR_CODE_SECTION,
76
2.79k
      &instruction,
77
2.79k
      sizeof(uint8_t),
78
2.79k
      instruction_ref);
79
80
2.79k
  if (result == ERROR_SUCCESS)
81
2.79k
    result = yr_arena_write_data(
82
2.79k
        yyget_extra(yyscanner)->arena,
83
2.79k
        YR_CODE_SECTION,
84
2.79k
        &argument,
85
2.79k
        sizeof(double),
86
2.79k
        argument_ref);
87
88
2.79k
  return result;
89
2.79k
}
90
91
int yr_parser_emit_with_arg_int32(
92
    yyscan_t yyscanner,
93
    uint8_t instruction,
94
    int32_t argument,
95
    YR_ARENA_REF* instruction_ref,
96
    YR_ARENA_REF* argument_ref)
97
20.2k
{
98
20.2k
  int result = yr_arena_write_data(
99
20.2k
      yyget_extra(yyscanner)->arena,
100
20.2k
      YR_CODE_SECTION,
101
20.2k
      &instruction,
102
20.2k
      sizeof(uint8_t),
103
20.2k
      instruction_ref);
104
105
20.2k
  if (result == ERROR_SUCCESS)
106
20.2k
    result = yr_arena_write_data(
107
20.2k
        yyget_extra(yyscanner)->arena,
108
20.2k
        YR_CODE_SECTION,
109
20.2k
        &argument,
110
20.2k
        sizeof(int32_t),
111
20.2k
        argument_ref);
112
113
20.2k
  return result;
114
20.2k
}
115
116
int yr_parser_emit_with_arg(
117
    yyscan_t yyscanner,
118
    uint8_t instruction,
119
    int64_t argument,
120
    YR_ARENA_REF* instruction_ref,
121
    YR_ARENA_REF* argument_ref)
122
36.9k
{
123
36.9k
  int result = yr_arena_write_data(
124
36.9k
      yyget_extra(yyscanner)->arena,
125
36.9k
      YR_CODE_SECTION,
126
36.9k
      &instruction,
127
36.9k
      sizeof(uint8_t),
128
36.9k
      instruction_ref);
129
130
36.9k
  if (result == ERROR_SUCCESS)
131
36.9k
    result = yr_arena_write_data(
132
36.9k
        yyget_extra(yyscanner)->arena,
133
36.9k
        YR_CODE_SECTION,
134
36.9k
        &argument,
135
36.9k
        sizeof(int64_t),
136
36.9k
        argument_ref);
137
138
36.9k
  return result;
139
36.9k
}
140
141
int yr_parser_emit_with_arg_reloc(
142
    yyscan_t yyscanner,
143
    uint8_t instruction,
144
    void* argument,
145
    YR_ARENA_REF* instruction_ref,
146
    YR_ARENA_REF* argument_ref)
147
921k
{
148
921k
  YR_ARENA_REF ref = YR_ARENA_NULL_REF;
149
150
921k
  DECLARE_REFERENCE(void*, ptr) arg;
151
152
921k
  memset(&arg, 0, sizeof(arg));
153
921k
  arg.ptr = argument;
154
155
921k
  int result = yr_arena_write_data(
156
921k
      yyget_extra(yyscanner)->arena,
157
921k
      YR_CODE_SECTION,
158
921k
      &instruction,
159
921k
      sizeof(uint8_t),
160
921k
      instruction_ref);
161
162
921k
  if (result == ERROR_SUCCESS)
163
921k
    result = yr_arena_write_data(
164
921k
        yyget_extra(yyscanner)->arena,
165
921k
        YR_CODE_SECTION,
166
921k
        &arg,
167
921k
        sizeof(arg),
168
921k
        &ref);
169
170
921k
  if (result == ERROR_SUCCESS)
171
921k
    result = yr_arena_make_ptr_relocatable(
172
921k
        yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL);
173
174
921k
  if (argument_ref != NULL)
175
0
    *argument_ref = ref;
176
177
921k
  return result;
178
921k
}
179
180
int yr_parser_emit_pushes_for_strings(
181
    yyscan_t yyscanner,
182
    const char* identifier,
183
    YR_STRING_SET* strings)
184
7.36k
{
185
7.36k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
186
187
7.36k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
188
7.36k
      compiler, compiler->current_rule_idx);
189
190
7.36k
  YR_STRING* string;
191
192
7.36k
  const char* string_identifier;
193
7.36k
  const char* target_identifier;
194
195
7.36k
  strings->count = 0;
196
7.36k
  strings->head = NULL;
197
7.36k
  YR_STRING_SET_ELEMENT** tail_ptr = &strings->head;
198
199
7.36k
  yr_rule_strings_foreach(current_rule, string)
200
919k
  {
201
    // Don't generate pushes for strings chained to another one, we are
202
    // only interested in non-chained strings or the head of the chain.
203
204
919k
    if (string->chained_to == NULL)
205
915k
    {
206
915k
      string_identifier = string->identifier;
207
915k
      target_identifier = identifier;
208
209
1.83M
      while (*target_identifier != '\0' && *string_identifier != '\0' &&
210
915k
             *target_identifier == *string_identifier)
211
915k
      {
212
915k
        target_identifier++;
213
915k
        string_identifier++;
214
915k
      }
215
216
915k
      if ((*target_identifier == '\0' && *string_identifier == '\0') ||
217
10.6k
          *target_identifier == '*')
218
908k
      {
219
908k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL);
220
221
908k
        string->flags |= STRING_FLAGS_REFERENCED;
222
908k
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
223
908k
        strings->count++;
224
225
908k
        *tail_ptr = yr_malloc(sizeof(YR_STRING_SET_ELEMENT));
226
908k
        yr_arena_ptr_to_ref(compiler->arena, string, &((*tail_ptr)->element));
227
908k
        (*tail_ptr)->next = NULL;
228
908k
        tail_ptr = &(*tail_ptr)->next;
229
908k
      }
230
915k
    }
231
919k
  }
232
233
7.36k
  if (strings->count == 0)
234
48
  {
235
48
    yr_compiler_set_error_extra_info(
236
48
        compiler, identifier) return ERROR_UNDEFINED_STRING;
237
48
  }
238
239
7.32k
  return ERROR_SUCCESS;
240
7.36k
}
241
242
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
243
// prefix.
244
int yr_parser_emit_pushes_for_rules(
245
    yyscan_t yyscanner,
246
    const char* prefix,
247
    int* count)
248
1.07k
{
249
1.07k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
250
251
  // Make sure the compiler is parsing a rule
252
1.07k
  assert(compiler->current_rule_idx != UINT32_MAX);
253
254
1.07k
  YR_RULE* rule;
255
1.07k
  int matching = 0;
256
257
1.07k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
258
1.07k
      compiler->arena,
259
1.07k
      YR_NAMESPACES_TABLE,
260
1.07k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
261
262
  // Can't use yr_rules_foreach here as that requires the rules to have been
263
  // finalized (inserting a NULL rule at the end). This is done when
264
  // yr_compiler_get_rules() is called, which also inserts a HALT instruction
265
  // into the current position in the code arena. Obviously we aren't done
266
  // compiling the rules yet so inserting a HALT is a bad idea. To deal with
267
  // this I'm manually walking all the currently compiled rules (up to the
268
  // current rule index) and comparing identifiers to see if it is one we should
269
  // use.
270
  //
271
  // Further, we have to get compiler->current_rule_idx before we start because
272
  // if we emit an OP_PUSH_RULE
273
1.07k
  rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0);
274
275
4.76k
  for (uint32_t i = 0; i <= compiler->current_rule_idx; i++)
276
3.69k
  {
277
    // Is rule->identifier prefixed by prefix?
278
3.69k
    if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0)
279
1.17k
    {
280
1.17k
      uint32_t rule_idx = yr_hash_table_lookup_uint32(
281
1.17k
          compiler->rules_table, rule->identifier, ns->name);
282
283
1.17k
      if (rule_idx != UINT32_MAX)
284
1.17k
      {
285
1.17k
        FAIL_ON_ERROR(yr_parser_emit_with_arg(
286
1.17k
            yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL));
287
1.17k
        matching++;
288
1.17k
      }
289
1.17k
    }
290
291
3.69k
    rule++;
292
3.69k
  }
293
294
1.07k
  if (count != NULL)
295
1.07k
  {
296
1.07k
    *count = matching;
297
1.07k
  }
298
299
1.07k
  if (matching == 0)
300
70
  {
301
70
    yr_compiler_set_error_extra_info(compiler, prefix);
302
70
    return ERROR_UNDEFINED_IDENTIFIER;
303
70
  }
304
305
1.00k
  return ERROR_SUCCESS;
306
1.07k
}
307
308
int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument)
309
56.9k
{
310
56.9k
  uint8_t opcode[9];
311
56.9k
  int opcode_len = 1;
312
313
56.9k
  if (argument == YR_UNDEFINED)
314
8.89k
  {
315
8.89k
    opcode[0] = OP_PUSH_U;
316
8.89k
  }
317
48.0k
  else if (argument <= 0xff)
318
43.8k
  {
319
43.8k
    opcode[0] = OP_PUSH_8;
320
43.8k
    opcode[1] = (uint8_t) argument;
321
43.8k
    opcode_len += sizeof(uint8_t);
322
43.8k
  }
323
4.24k
  else if (argument <= 0xffff)
324
2.94k
  {
325
2.94k
    opcode[0] = OP_PUSH_16;
326
2.94k
    uint16_t u = (uint16_t) argument;
327
2.94k
    memcpy(opcode + 1, &u, sizeof(uint16_t));
328
2.94k
    opcode_len += sizeof(uint16_t);
329
2.94k
  }
330
1.30k
  else if (argument <= 0xffffffff)
331
770
  {
332
770
    opcode[0] = OP_PUSH_32;
333
770
    uint32_t u = (uint32_t) argument;
334
770
    memcpy(opcode + 1, &u, sizeof(uint32_t));
335
770
    opcode_len += sizeof(uint32_t);
336
770
  }
337
537
  else
338
537
  {
339
537
    opcode[0] = OP_PUSH;
340
537
    memcpy(opcode + 1, &argument, sizeof(uint64_t));
341
537
    opcode_len += sizeof(uint64_t);
342
537
  }
343
344
56.9k
  return yr_arena_write_data(
345
56.9k
      yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL);
346
56.9k
}
347
348
int yr_parser_check_types(
349
    YR_COMPILER* compiler,
350
    YR_OBJECT_FUNCTION* function,
351
    const char* actual_args_fmt)
352
473
{
353
473
  int i;
354
355
611
  for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++)
356
611
  {
357
611
    if (function->prototypes[i].arguments_fmt == NULL)
358
8
      break;
359
360
603
    if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0)
361
465
      return ERROR_SUCCESS;
362
603
  }
363
364
8
  yr_compiler_set_error_extra_info(compiler, function->identifier)
365
366
8
      return ERROR_WRONG_ARGUMENTS;
367
473
}
368
369
int yr_parser_lookup_string(
370
    yyscan_t yyscanner,
371
    const char* identifier,
372
    YR_STRING** string)
373
2.47k
{
374
2.47k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
375
376
2.47k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
377
2.47k
      compiler, compiler->current_rule_idx);
378
379
2.47k
  yr_rule_strings_foreach(current_rule, *string)
380
2.89k
  {
381
    // If some string $a gets fragmented into multiple chained
382
    // strings, all those fragments have the same $a identifier
383
    // but we are interested in the heading fragment, which is
384
    // that with chained_to == NULL
385
386
2.89k
    if ((*string)->chained_to == NULL &&
387
2.72k
        strcmp((*string)->identifier, identifier) == 0)
388
2.41k
    {
389
2.41k
      return ERROR_SUCCESS;
390
2.41k
    }
391
2.89k
  }
392
393
56
  yr_compiler_set_error_extra_info(compiler, identifier)
394
395
56
      * string = NULL;
396
397
56
  return ERROR_UNDEFINED_STRING;
398
2.47k
}
399
400
////////////////////////////////////////////////////////////////////////////////
401
// Searches for a variable with the given identifier in the scope of the current
402
// "for" loop. In case of nested "for" loops the identifier is searched starting
403
// at the top-level loop and going down thorough the nested loops until the
404
// current one. This is ok because inner loops can not re-define an identifier
405
// already defined by an outer loop.
406
//
407
// If the variable is found, the return value is the position that the variable
408
// occupies among all the currently defined variables. If the variable doesn't
409
// exist the return value is -1.
410
//
411
// The function can receive a pointer to a YR_EXPRESSION that will populated
412
// with information about the variable if found. This pointer can be NULL if
413
// the caller is not interested in getting that information.
414
//
415
int yr_parser_lookup_loop_variable(
416
    yyscan_t yyscanner,
417
    const char* identifier,
418
    YR_EXPRESSION* expr)
419
13.6k
{
420
13.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
421
13.6k
  int i, j;
422
13.6k
  int var_offset = 0;
423
424
18.2k
  for (i = 0; i <= compiler->loop_index; i++)
425
10.8k
  {
426
10.8k
    var_offset += compiler->loop[i].vars_internal_count;
427
428
17.6k
    for (j = 0; j < compiler->loop[i].vars_count; j++)
429
13.0k
    {
430
13.0k
      if (compiler->loop[i].vars[j].identifier.ptr != NULL &&
431
12.3k
          strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0)
432
6.18k
      {
433
6.18k
        if (expr != NULL)
434
6.16k
          *expr = compiler->loop[i].vars[j];
435
436
6.18k
        return var_offset + j;
437
6.18k
      }
438
13.0k
    }
439
440
4.62k
    var_offset += compiler->loop[i].vars_count;
441
4.62k
  }
442
443
7.44k
  return -1;
444
13.6k
}
445
446
static int _yr_parser_write_string(
447
    const char* identifier,
448
    YR_MODIFIER modifier,
449
    YR_COMPILER* compiler,
450
    SIZED_STRING* str,
451
    RE_AST* re_ast,
452
    YR_ARENA_REF* string_ref,
453
    int* min_atom_quality,
454
    int* num_atom)
455
30.1k
{
456
30.1k
  SIZED_STRING* literal_string;
457
30.1k
  YR_ATOM_LIST_ITEM* atom;
458
30.1k
  YR_ATOM_LIST_ITEM* atom_list = NULL;
459
460
30.1k
  int c, result;
461
30.1k
  int max_string_len;
462
30.1k
  bool free_literal = false;
463
464
30.1k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
465
30.1k
      compiler->arena,
466
30.1k
      YR_STRINGS_TABLE,
467
30.1k
      sizeof(YR_STRING),
468
30.1k
      string_ref,
469
30.1k
      offsetof(YR_STRING, identifier),
470
30.1k
      offsetof(YR_STRING, string),
471
30.1k
      offsetof(YR_STRING, chained_to),
472
30.1k
      EOL));
473
474
30.1k
  YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr(
475
30.1k
      compiler->arena, string_ref);
476
477
30.1k
  YR_ARENA_REF ref;
478
479
30.1k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
480
481
30.1k
  string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
482
30.1k
  string->rule_idx = compiler->current_rule_idx;
483
30.1k
  string->idx = compiler->current_string_idx;
484
30.1k
  string->fixed_offset = YR_UNDEFINED;
485
486
30.1k
  compiler->current_string_idx++;
487
488
30.1k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
489
29.1k
      modifier.flags & STRING_FLAGS_REGEXP ||
490
4.13k
      modifier.flags & STRING_FLAGS_BASE64 ||
491
2.49k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
492
28.1k
  {
493
28.1k
    literal_string = yr_re_ast_extract_literal(re_ast);
494
495
28.1k
    if (literal_string != NULL)
496
21.2k
      free_literal = true;
497
28.1k
  }
498
1.94k
  else
499
1.94k
  {
500
1.94k
    literal_string = str;
501
1.94k
  }
502
503
30.1k
  if (literal_string != NULL)
504
23.2k
  {
505
23.2k
    modifier.flags |= STRING_FLAGS_LITERAL;
506
507
23.2k
    result = _yr_compiler_store_data(
508
23.2k
        compiler,
509
23.2k
        literal_string->c_string,
510
23.2k
        literal_string->length + 1,  // +1 to include terminating NULL
511
23.2k
        &ref);
512
513
23.2k
    if (result != ERROR_SUCCESS)
514
0
      goto cleanup;
515
516
23.2k
    string->length = (uint32_t) literal_string->length;
517
23.2k
    string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
518
519
23.2k
    if (modifier.flags & STRING_FLAGS_WIDE)
520
1.74k
      max_string_len = string->length * 2;
521
21.4k
    else
522
21.4k
      max_string_len = string->length;
523
524
23.2k
    if (max_string_len <= YR_MAX_ATOM_LENGTH)
525
18.2k
      modifier.flags |= STRING_FLAGS_FITS_IN_ATOM;
526
527
23.2k
    result = yr_atoms_extract_from_string(
528
23.2k
        &compiler->atoms_config,
529
23.2k
        (uint8_t*) literal_string->c_string,
530
23.2k
        (int32_t) literal_string->length,
531
23.2k
        modifier,
532
23.2k
        &atom_list,
533
23.2k
        min_atom_quality);
534
535
23.2k
    if (result != ERROR_SUCCESS)
536
0
      goto cleanup;
537
23.2k
  }
538
6.90k
  else
539
6.90k
  {
540
    // Non-literal strings can't be marked as fixed offset because once we
541
    // find a string atom in the scanned data we don't know the offset where
542
    // the string should start, as the non-literal strings can contain
543
    // variable-length portions.
544
6.90k
    modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
545
546
    // Save the position where the RE forward code starts for later reference.
547
6.90k
    yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
548
6.90k
        compiler->arena, YR_RE_CODE_SECTION);
549
550
    // Emit forwards code
551
6.90k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
552
553
6.90k
    if (result != ERROR_SUCCESS)
554
134
      goto cleanup;
555
556
    // Emit backwards code
557
6.77k
    result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
558
559
6.77k
    if (result != ERROR_SUCCESS)
560
9
      goto cleanup;
561
562
    // Extract atoms from the regular expression.
563
6.76k
    result = yr_atoms_extract_from_re(
564
6.76k
        &compiler->atoms_config,
565
6.76k
        re_ast,
566
6.76k
        modifier,
567
6.76k
        &atom_list,
568
6.76k
        min_atom_quality);
569
570
6.76k
    if (result != ERROR_SUCCESS)
571
0
      goto cleanup;
572
573
    // If no atom was extracted let's add a zero-length atom.
574
6.76k
    if (atom_list == NULL)
575
1.48k
    {
576
1.48k
      atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
577
578
1.48k
      if (atom_list == NULL)
579
0
      {
580
0
        result = ERROR_INSUFFICIENT_MEMORY;
581
0
        goto cleanup;
582
0
      }
583
584
1.48k
      atom_list->atom.length = 0;
585
1.48k
      atom_list->backtrack = 0;
586
1.48k
      atom_list->backward_code_ref = YR_ARENA_NULL_REF;
587
1.48k
      atom_list->next = NULL;
588
589
1.48k
      yr_arena_ptr_to_ref(
590
1.48k
          compiler->arena,
591
1.48k
          yr_arena_get_ptr(
592
1.48k
              compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
593
1.48k
          &(atom_list->forward_code_ref));
594
1.48k
    }
595
6.76k
  }
596
597
29.9k
  string->flags = modifier.flags;
598
599
  // Add the string to Aho-Corasick automaton.
600
29.9k
  result = yr_ac_add_string(
601
29.9k
      compiler->automaton, string, string->idx, atom_list, compiler->arena);
602
603
29.9k
  if (result != ERROR_SUCCESS)
604
0
    goto cleanup;
605
606
29.9k
  atom = atom_list;
607
29.9k
  c = 0;
608
609
2.69M
  while (atom != NULL)
610
2.66M
  {
611
2.66M
    atom = atom->next;
612
2.66M
    c++;
613
2.66M
  }
614
615
29.9k
  (*num_atom) += c;
616
617
30.1k
cleanup:
618
30.1k
  if (free_literal)
619
21.2k
    yr_free(literal_string);
620
621
30.1k
  if (atom_list != NULL)
622
29.9k
    yr_atoms_list_destroy(atom_list);
623
624
30.1k
  return result;
625
29.9k
}
626
627
static int _yr_parser_check_string_modifiers(
628
    yyscan_t yyscanner,
629
    YR_MODIFIER modifier)
630
26.8k
{
631
26.8k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
632
633
  // xor and nocase together is not implemented.
634
26.8k
  if (modifier.flags & STRING_FLAGS_XOR &&
635
755
      modifier.flags & STRING_FLAGS_NO_CASE)
636
0
  {
637
0
    yr_compiler_set_error_extra_info(
638
0
        compiler, "invalid modifier combination: xor nocase");
639
0
    return ERROR_INVALID_MODIFIER;
640
0
  }
641
642
  // base64 and nocase together is not implemented.
643
26.8k
  if (modifier.flags & STRING_FLAGS_NO_CASE &&
644
4.68k
      (modifier.flags & STRING_FLAGS_BASE64 ||
645
4.66k
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
646
14
  {
647
14
    yr_compiler_set_error_extra_info(
648
14
        compiler,
649
14
        modifier.flags & STRING_FLAGS_BASE64
650
14
            ? "invalid modifier combination: base64 nocase"
651
14
            : "invalid modifier combination: base64wide nocase");
652
14
    return ERROR_INVALID_MODIFIER;
653
14
  }
654
655
  // base64 and fullword together is not implemented.
656
26.8k
  if (modifier.flags & STRING_FLAGS_FULL_WORD &&
657
130
      (modifier.flags & STRING_FLAGS_BASE64 ||
658
130
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
659
19
  {
660
19
    yr_compiler_set_error_extra_info(
661
19
        compiler,
662
19
        modifier.flags & STRING_FLAGS_BASE64
663
19
            ? "invalid modifier combination: base64 fullword"
664
19
            : "invalid modifier combination: base64wide fullword");
665
19
    return ERROR_INVALID_MODIFIER;
666
19
  }
667
668
  // base64 and xor together is not implemented.
669
26.8k
  if (modifier.flags & STRING_FLAGS_XOR &&
670
755
      (modifier.flags & STRING_FLAGS_BASE64 ||
671
755
       modifier.flags & STRING_FLAGS_BASE64_WIDE))
672
7
  {
673
7
    yr_compiler_set_error_extra_info(
674
7
        compiler,
675
7
        modifier.flags & STRING_FLAGS_BASE64
676
7
            ? "invalid modifier combination: base64 xor"
677
7
            : "invalid modifier combination: base64wide xor");
678
7
    return ERROR_INVALID_MODIFIER;
679
7
  }
680
681
26.8k
  return ERROR_SUCCESS;
682
26.8k
}
683
684
int yr_parser_reduce_string_declaration(
685
    yyscan_t yyscanner,
686
    YR_MODIFIER modifier,
687
    const char* identifier,
688
    SIZED_STRING* str,
689
    YR_ARENA_REF* string_ref)
690
26.9k
{
691
26.9k
  int result = ERROR_SUCCESS;
692
26.9k
  int min_atom_quality = YR_MAX_ATOM_QUALITY;
693
26.9k
  int atom_quality;
694
695
26.9k
  char message[512];
696
697
26.9k
  int32_t min_gap = 0;
698
26.9k
  int32_t max_gap = 0;
699
700
26.9k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
701
702
26.9k
  RE_AST* re_ast = NULL;
703
26.9k
  RE_AST* remainder_re_ast = NULL;
704
26.9k
  RE_ERROR re_error;
705
706
26.9k
  YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
707
26.9k
      compiler, compiler->current_rule_idx);
708
709
  // Determine if a string with the same identifier was already defined
710
  // by searching for the identifier in strings_table.
711
26.9k
  uint32_t string_idx = yr_hash_table_lookup_uint32(
712
26.9k
      compiler->strings_table, identifier, NULL);
713
714
  // The string was already defined, return an error.
715
26.9k
  if (string_idx != UINT32_MAX)
716
8
  {
717
8
    yr_compiler_set_error_extra_info(compiler, identifier);
718
8
    return ERROR_DUPLICATED_STRING_IDENTIFIER;
719
8
  }
720
721
  // Empty strings are not allowed.
722
26.9k
  if (str->length == 0)
723
29
  {
724
29
    yr_compiler_set_error_extra_info(compiler, identifier);
725
29
    return ERROR_EMPTY_STRING;
726
29
  }
727
728
26.8k
  if (str->flags & SIZED_STRING_FLAGS_NO_CASE)
729
4.28k
    modifier.flags |= STRING_FLAGS_NO_CASE;
730
731
26.8k
  if (str->flags & SIZED_STRING_FLAGS_DOT_ALL)
732
307
    modifier.flags |= STRING_FLAGS_DOT_ALL;
733
734
  // Hex strings are always handled as DOT_ALL regexps.
735
26.8k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
736
1.17k
    modifier.flags |= STRING_FLAGS_DOT_ALL;
737
738
26.8k
  if (!(modifier.flags & STRING_FLAGS_WIDE) &&
739
25.2k
      !(modifier.flags & STRING_FLAGS_BASE64 ||
740
23.7k
        modifier.flags & STRING_FLAGS_BASE64_WIDE))
741
23.5k
  {
742
23.5k
    modifier.flags |= STRING_FLAGS_ASCII;
743
23.5k
  }
744
745
  // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding
746
  // a single match for the string is enough. This is true in
747
  // most cases, except when the string count (#) and string offset (@)
748
  // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH
749
  // initially, and unmarked later if required.
750
26.8k
  modifier.flags |= STRING_FLAGS_SINGLE_MATCH;
751
752
  // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't
753
  // need to be searched all over the file because the user is using the
754
  // "at" operator. The string must be searched at a fixed offset in the
755
  // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially,
756
  // and unmarked later if required.
757
26.8k
  modifier.flags |= STRING_FLAGS_FIXED_OFFSET;
758
759
  // If string identifier is $ this is an anonymous string, if not add the
760
  // identifier to strings_table.
761
26.8k
  if (strcmp(identifier, "$") == 0)
762
25.8k
  {
763
25.8k
    modifier.flags |= STRING_FLAGS_ANONYMOUS;
764
25.8k
  }
765
1.02k
  else
766
1.02k
  {
767
1.02k
    FAIL_ON_ERROR(yr_hash_table_add_uint32(
768
1.02k
        compiler->strings_table,
769
1.02k
        identifier,
770
1.02k
        NULL,
771
1.02k
        compiler->current_string_idx));
772
1.02k
  }
773
774
  // Make sure that the the string does not have an invalid combination of
775
  // modifiers.
776
26.8k
  FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier));
777
778
26.8k
  if (modifier.flags & STRING_FLAGS_HEXADECIMAL ||
779
25.6k
      modifier.flags & STRING_FLAGS_REGEXP ||
780
4.13k
      modifier.flags & STRING_FLAGS_BASE64 ||
781
2.49k
      modifier.flags & STRING_FLAGS_BASE64_WIDE)
782
24.9k
  {
783
24.9k
    if (modifier.flags & STRING_FLAGS_HEXADECIMAL)
784
1.17k
      result = yr_re_parse_hex(str->c_string, &re_ast, &re_error);
785
23.7k
    else if (modifier.flags & STRING_FLAGS_REGEXP)
786
21.5k
    {
787
21.5k
      int flags = RE_PARSER_FLAG_NONE;
788
21.5k
      if (compiler->strict_escape)
789
0
        flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES;
790
21.5k
      result = yr_re_parse(str->c_string, &re_ast, &re_error, flags);
791
21.5k
    }
792
2.18k
    else
793
2.18k
      result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error);
794
795
24.9k
    if (result != ERROR_SUCCESS)
796
1.07k
    {
797
1.07k
      if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
798
0
      {
799
0
        yywarning(yyscanner, "unknown escape sequence");
800
0
      }
801
1.07k
      else
802
1.07k
      {
803
1.07k
        snprintf(
804
1.07k
            message,
805
1.07k
            sizeof(message),
806
1.07k
            "invalid %s \"%s\": %s",
807
1.07k
            (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string"
808
1.07k
                                                        : "regular expression",
809
1.07k
            identifier,
810
1.07k
            re_error.message);
811
812
1.07k
        yr_compiler_set_error_extra_info(compiler, message);
813
1.07k
        goto _exit;
814
1.07k
      }
815
1.07k
    }
816
817
23.8k
    if (re_ast->flags & RE_FLAGS_FAST_REGEXP)
818
664
      modifier.flags |= STRING_FLAGS_FAST_REGEXP;
819
820
23.8k
    if (re_ast->flags & RE_FLAGS_GREEDY)
821
1.11k
      modifier.flags |= STRING_FLAGS_GREEDY_REGEXP;
822
823
    // Regular expressions in the strings section can't mix greedy and
824
    // ungreedy quantifiers like .* and .*?. That's because these regular
825
    // expressions can be matched forwards and/or backwards depending on the
826
    // atom found, and we need the regexp to be all-greedy or all-ungreedy to
827
    // be able to properly calculate the length of the match.
828
829
23.8k
    if ((re_ast->flags & RE_FLAGS_GREEDY) &&
830
1.11k
        (re_ast->flags & RE_FLAGS_UNGREEDY))
831
8
    {
832
8
      result = ERROR_INVALID_REGULAR_EXPRESSION;
833
834
8
      yr_compiler_set_error_extra_info(
835
8
          compiler,
836
8
          "greedy and ungreedy quantifiers can't be mixed in a regular "
837
8
          "expression");
838
839
8
      goto _exit;
840
8
    }
841
842
23.8k
    if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast))
843
2.03k
    {
844
2.03k
      yywarning(
845
2.03k
          yyscanner,
846
2.03k
          "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} "
847
2.03k
          "with a reasonable value for N",
848
2.03k
          identifier);
849
2.03k
    }
850
851
23.8k
    if (compiler->re_ast_callback != NULL)
852
0
    {
853
0
      compiler->re_ast_callback(
854
0
          current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data);
855
0
    }
856
857
23.8k
    *string_ref = YR_ARENA_NULL_REF;
858
859
51.8k
    while (re_ast != NULL)
860
28.1k
    {
861
28.1k
      YR_ARENA_REF ref;
862
863
28.1k
      uint32_t prev_string_idx = compiler->current_string_idx - 1;
864
865
28.1k
      int32_t prev_min_gap = min_gap;
866
28.1k
      int32_t prev_max_gap = max_gap;
867
868
28.1k
      result = yr_re_ast_split_at_chaining_point(
869
28.1k
          re_ast, &remainder_re_ast, &min_gap, &max_gap);
870
871
28.1k
      if (result != ERROR_SUCCESS)
872
0
        goto _exit;
873
874
28.1k
      result = _yr_parser_write_string(
875
28.1k
          identifier,
876
28.1k
          modifier,
877
28.1k
          compiler,
878
28.1k
          NULL,
879
28.1k
          re_ast,
880
28.1k
          &ref,
881
28.1k
          &atom_quality,
882
28.1k
          &current_rule->num_atoms);
883
884
28.1k
      if (result != ERROR_SUCCESS)
885
143
        goto _exit;
886
887
28.0k
      if (atom_quality < min_atom_quality)
888
22.9k
        min_atom_quality = atom_quality;
889
890
28.0k
      if (YR_ARENA_IS_NULL_REF(*string_ref))
891
23.6k
      {
892
        // This is the first string in the chain, the string reference
893
        // returned by this function must point to this string.
894
23.6k
        *string_ref = ref;
895
23.6k
      }
896
4.35k
      else
897
4.35k
      {
898
        // This is not the first string in the chain, set the appropriate
899
        // flags and fill the chained_to, chain_gap_min and chain_gap_max
900
        // fields.
901
4.35k
        YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr(
902
4.35k
            compiler->arena,
903
4.35k
            YR_STRINGS_TABLE,
904
4.35k
            prev_string_idx * sizeof(YR_STRING));
905
906
4.35k
        YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr(
907
4.35k
            compiler->arena, &ref);
908
909
4.35k
        new_string->chained_to = prev_string;
910
4.35k
        new_string->chain_gap_min = prev_min_gap;
911
4.35k
        new_string->chain_gap_max = prev_max_gap;
912
913
        // A string chained to another one can't have a fixed offset, only the
914
        // head of the string chain can have a fixed offset.
915
4.35k
        new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
916
917
        // There is a previous string, but that string wasn't marked as part
918
        // of a chain because we can't do that until knowing there will be
919
        // another string, let's flag it now the we know.
920
4.35k
        prev_string->flags |= STRING_FLAGS_CHAIN_PART;
921
922
        // There is a previous string, so this string is part of a chain, but
923
        // there will be no more strings because there are no more AST to
924
        // split, which means that this is the chain's tail.
925
4.35k
        if (remainder_re_ast == NULL)
926
1.55k
          new_string->flags |= STRING_FLAGS_CHAIN_PART |
927
1.55k
                               STRING_FLAGS_CHAIN_TAIL;
928
4.35k
      }
929
930
28.0k
      yr_re_ast_destroy(re_ast);
931
28.0k
      re_ast = remainder_re_ast;
932
28.0k
    }
933
23.8k
  }
934
1.94k
  else  // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or
935
        // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE
936
1.94k
  {
937
1.94k
    result = _yr_parser_write_string(
938
1.94k
        identifier,
939
1.94k
        modifier,
940
1.94k
        compiler,
941
1.94k
        str,
942
1.94k
        NULL,
943
1.94k
        string_ref,
944
1.94k
        &min_atom_quality,
945
1.94k
        &current_rule->num_atoms);
946
947
1.94k
    if (result != ERROR_SUCCESS)
948
0
      goto _exit;
949
1.94k
  }
950
951
25.6k
  if (min_atom_quality < compiler->atoms_config.quality_warning_threshold)
952
8.11k
  {
953
8.11k
    yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier);
954
8.11k
  }
955
956
26.8k
_exit:
957
958
26.8k
  if (re_ast != NULL)
959
632
    yr_re_ast_destroy(re_ast);
960
961
26.8k
  if (remainder_re_ast != NULL)
962
0
    yr_re_ast_destroy(remainder_re_ast);
963
964
26.8k
  return result;
965
25.6k
}
966
967
static int wildcard_iterator(
968
    void* prefix,
969
    size_t prefix_len,
970
    void* _value,
971
    void* data)
972
3.01k
{
973
3.01k
  const char* identifier = (const char*) data;
974
975
  // If the identifier is prefixed by prefix, then it matches the wildcard.
976
3.01k
  if (!strncmp(prefix, identifier, prefix_len))
977
156
    return ERROR_IDENTIFIER_MATCHES_WILDCARD;
978
979
2.86k
  return ERROR_SUCCESS;
980
3.01k
}
981
982
int yr_parser_reduce_rule_declaration_phase_1(
983
    yyscan_t yyscanner,
984
    int32_t flags,
985
    const char* identifier,
986
    YR_ARENA_REF* rule_ref)
987
33.0k
{
988
33.0k
  int result;
989
33.0k
  YR_FIXUP* fixup;
990
33.0k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
991
992
33.0k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
993
33.0k
      compiler->arena,
994
33.0k
      YR_NAMESPACES_TABLE,
995
33.0k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
996
997
33.0k
  if (yr_hash_table_lookup_uint32(
998
33.0k
          compiler->rules_table, identifier, ns->name) != UINT32_MAX ||
999
10.9k
      yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL)
1000
22.0k
  {
1001
    // A rule or variable with the same identifier already exists, return the
1002
    // appropriate error.
1003
1004
22.0k
    yr_compiler_set_error_extra_info(compiler, identifier);
1005
22.0k
    return ERROR_DUPLICATED_IDENTIFIER;
1006
22.0k
  }
1007
1008
  // Iterate over all identifiers in wildcard_identifiers_table, and check if
1009
  // any of them are a prefix of the identifier being declared. If so, return
1010
  // ERROR_IDENTIFIER_MATCHES_WILDCARD.
1011
10.9k
  result = yr_hash_table_iterate(
1012
10.9k
      compiler->wildcard_identifiers_table,
1013
10.9k
      ns->name,
1014
10.9k
      wildcard_iterator,
1015
10.9k
      (void*) identifier);
1016
1017
10.9k
  if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD)
1018
156
  {
1019
    // This rule matches an existing wildcard rule set.
1020
156
    yr_compiler_set_error_extra_info(compiler, identifier);
1021
156
  }
1022
1023
10.9k
  FAIL_ON_ERROR(result);
1024
1025
10.8k
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1026
10.8k
      compiler->arena,
1027
10.8k
      YR_RULES_TABLE,
1028
10.8k
      sizeof(YR_RULE),
1029
10.8k
      rule_ref,
1030
10.8k
      offsetof(YR_RULE, identifier),
1031
10.8k
      offsetof(YR_RULE, tags),
1032
10.8k
      offsetof(YR_RULE, strings),
1033
10.8k
      offsetof(YR_RULE, metas),
1034
10.8k
      offsetof(YR_RULE, ns),
1035
10.8k
      EOL));
1036
1037
10.8k
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1038
1039
10.8k
  YR_ARENA_REF ref;
1040
1041
10.8k
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1042
1043
10.8k
  rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1044
10.8k
  rule->flags = flags;
1045
10.8k
  rule->ns = ns;
1046
10.8k
  rule->num_atoms = 0;
1047
1048
10.8k
  YR_ARENA_REF jmp_offset_ref;
1049
1050
  // We are starting to parse a new rule, set current_rule_idx accordingly.
1051
10.8k
  compiler->current_rule_idx = compiler->next_rule_idx;
1052
10.8k
  compiler->next_rule_idx++;
1053
1054
  // The OP_INIT_RULE instruction behaves like a jump. When the rule is
1055
  // disabled it skips over the rule's code and go straight to the next rule's
1056
  // code. The jmp_offset_ref variable points to the jump's offset. The offset
1057
  // is set to 0 as we don't know the jump target yet. When we finish
1058
  // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2
1059
  // the jump offset is set to its final value.
1060
1061
10.8k
  FAIL_ON_ERROR(yr_parser_emit_with_arg_int32(
1062
10.8k
      yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref));
1063
1064
10.8k
  FAIL_ON_ERROR(yr_arena_write_data(
1065
10.8k
      compiler->arena,
1066
10.8k
      YR_CODE_SECTION,
1067
10.8k
      &compiler->current_rule_idx,
1068
10.8k
      sizeof(compiler->current_rule_idx),
1069
10.8k
      NULL));
1070
1071
  // Create a fixup entry for the jump and push it in the stack
1072
10.8k
  fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP));
1073
1074
10.8k
  if (fixup == NULL)
1075
0
    return ERROR_INSUFFICIENT_MEMORY;
1076
1077
10.8k
  fixup->ref = jmp_offset_ref;
1078
10.8k
  fixup->next = compiler->fixup_stack_head;
1079
10.8k
  compiler->fixup_stack_head = fixup;
1080
1081
  // Clean strings_table as we are starting to parse a new rule.
1082
10.8k
  yr_hash_table_clean(compiler->strings_table, NULL);
1083
1084
10.8k
  FAIL_ON_ERROR(yr_hash_table_add_uint32(
1085
10.8k
      compiler->rules_table, identifier, ns->name, compiler->current_rule_idx));
1086
1087
10.8k
  return ERROR_SUCCESS;
1088
10.8k
}
1089
1090
int yr_parser_reduce_rule_declaration_phase_2(
1091
    yyscan_t yyscanner,
1092
    YR_ARENA_REF* rule_ref)
1093
300
{
1094
300
  uint32_t max_strings_per_rule;
1095
300
  uint32_t strings_in_rule = 0;
1096
1097
300
  YR_FIXUP* fixup;
1098
300
  YR_STRING* string;
1099
300
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1100
1101
300
  yr_get_configuration_uint32(
1102
300
      YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1103
1104
300
  YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref);
1105
1106
  // Show warning if the rule is generating too many atoms. The warning is
1107
  // shown if the number of atoms is greater than 20 times the maximum number
1108
  // of strings allowed for a rule, as 20 is minimum number of atoms generated
1109
  // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously.
1110
1111
300
  if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD)
1112
20
  {
1113
20
    yywarning(yyscanner, "rule is slowing down scanning");
1114
20
  }
1115
1116
300
  yr_rule_strings_foreach(rule, string)
1117
4.54k
  {
1118
    // Only the heading fragment in a chain of strings (the one with
1119
    // chained_to == NULL) must be referenced. All other fragments
1120
    // are never marked as referenced.
1121
    //
1122
    // Any string identifier that starts with '_' can be unreferenced. Anonymous
1123
    // strings must always be referenced.
1124
1125
4.54k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1126
160
        (STRING_IS_ANONYMOUS(string) ||
1127
153
         (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_')))
1128
22
    {
1129
22
      yr_compiler_set_error_extra_info(
1130
22
          compiler, string->identifier) return ERROR_UNREFERENCED_STRING;
1131
22
    }
1132
1133
    // If a string is unreferenced we need to unset the FIXED_OFFSET flag so
1134
    // that it will match anywhere.
1135
4.51k
    if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL &&
1136
138
        STRING_IS_FIXED_OFFSET(string))
1137
94
    {
1138
94
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1139
94
    }
1140
1141
4.51k
    strings_in_rule++;
1142
1143
4.51k
    if (strings_in_rule > max_strings_per_rule)
1144
0
    {
1145
0
      yr_compiler_set_error_extra_info(
1146
0
          compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS;
1147
0
    }
1148
4.51k
  }
1149
1150
278
  FAIL_ON_ERROR(yr_parser_emit_with_arg(
1151
278
      yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL));
1152
1153
278
  fixup = compiler->fixup_stack_head;
1154
1155
278
  int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr(
1156
278
      compiler->arena, &fixup->ref);
1157
1158
278
  int32_t jmp_offset = yr_arena_get_current_offset(
1159
278
                           compiler->arena, YR_CODE_SECTION) -
1160
278
                       fixup->ref.offset + 1;
1161
1162
278
  memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset));
1163
1164
  // Remove fixup from the stack.
1165
278
  compiler->fixup_stack_head = fixup->next;
1166
278
  yr_free(fixup);
1167
1168
  // We have finished parsing the current rule set current_rule_idx to
1169
  // UINT32_MAX indicating that we are not currently parsing a rule.
1170
278
  compiler->current_rule_idx = UINT32_MAX;
1171
1172
278
  return ERROR_SUCCESS;
1173
278
}
1174
1175
int yr_parser_reduce_string_identifier(
1176
    yyscan_t yyscanner,
1177
    const char* identifier,
1178
    uint8_t instruction,
1179
    uint64_t at_offset)
1180
15.6k
{
1181
15.6k
  YR_STRING* string;
1182
15.6k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1183
1184
15.6k
  if (strcmp(identifier, "$") == 0)  // is an anonymous string ?
1185
13.1k
  {
1186
13.1k
    if (compiler->loop_for_of_var_index >= 0)  // inside a loop ?
1187
13.0k
    {
1188
13.0k
      yr_parser_emit_with_arg(
1189
13.0k
          yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL);
1190
1191
13.0k
      yr_parser_emit(yyscanner, instruction, NULL);
1192
1193
13.0k
      YR_RULE* current_rule = _yr_compiler_get_rule_by_idx(
1194
13.0k
          compiler, compiler->current_rule_idx);
1195
1196
13.0k
      yr_rule_strings_foreach(current_rule, string)
1197
920k
      {
1198
920k
        if (instruction != OP_FOUND)
1199
920k
          string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1200
1201
920k
        if (instruction == OP_FOUND_AT)
1202
920
        {
1203
          // Avoid overwriting any previous fixed offset
1204
920
          if (string->fixed_offset == YR_UNDEFINED)
1205
268
            string->fixed_offset = at_offset;
1206
1207
          // If a previous fixed offset was different, disable
1208
          // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1209
          // have room to store a single fixed offset value
1210
920
          if (string->fixed_offset != at_offset)
1211
497
            string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1212
920
        }
1213
919k
        else
1214
919k
        {
1215
919k
          string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1216
919k
        }
1217
920k
      }
1218
13.0k
    }
1219
105
    else
1220
105
    {
1221
      // Anonymous strings not allowed outside of a loop
1222
105
      return ERROR_MISPLACED_ANONYMOUS_STRING;
1223
105
    }
1224
13.1k
  }
1225
2.47k
  else
1226
2.47k
  {
1227
2.47k
    FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string));
1228
1229
2.41k
    FAIL_ON_ERROR(
1230
2.41k
        yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL));
1231
1232
2.41k
    if (instruction != OP_FOUND)
1233
2.12k
      string->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1234
1235
2.41k
    if (instruction == OP_FOUND_AT)
1236
514
    {
1237
      // Avoid overwriting any previous fixed offset
1238
1239
514
      if (string->fixed_offset == YR_UNDEFINED)
1240
52
        string->fixed_offset = at_offset;
1241
1242
      // If a previous fixed offset was different, disable
1243
      // the STRING_GFLAGS_FIXED_OFFSET flag because we only
1244
      // have room to store a single fixed offset value
1245
1246
514
      if (string->fixed_offset == YR_UNDEFINED ||
1247
476
          string->fixed_offset != at_offset)
1248
253
      {
1249
253
        string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1250
253
      }
1251
514
    }
1252
1.90k
    else
1253
1.90k
    {
1254
1.90k
      string->flags &= ~STRING_FLAGS_FIXED_OFFSET;
1255
1.90k
    }
1256
1257
2.41k
    FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL));
1258
1259
2.41k
    string->flags |= STRING_FLAGS_REFERENCED;
1260
2.41k
  }
1261
1262
15.4k
  return ERROR_SUCCESS;
1263
15.6k
}
1264
1265
int yr_parser_reduce_meta_declaration(
1266
    yyscan_t yyscanner,
1267
    int32_t type,
1268
    const char* identifier,
1269
    const char* string,
1270
    int64_t integer,
1271
    YR_ARENA_REF* meta_ref)
1272
908
{
1273
908
  YR_ARENA_REF ref;
1274
908
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1275
1276
908
  FAIL_ON_ERROR(yr_arena_allocate_struct(
1277
908
      compiler->arena,
1278
908
      YR_METAS_TABLE,
1279
908
      sizeof(YR_META),
1280
908
      meta_ref,
1281
908
      offsetof(YR_META, identifier),
1282
908
      offsetof(YR_META, string),
1283
908
      EOL));
1284
1285
908
  YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref);
1286
1287
908
  meta->type = type;
1288
908
  meta->integer = integer;
1289
1290
908
  FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref));
1291
1292
908
  meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1293
1294
908
  if (string != NULL)
1295
261
  {
1296
261
    FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref));
1297
1298
261
    meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
1299
261
  }
1300
647
  else
1301
647
  {
1302
647
    meta->string = NULL;
1303
647
  }
1304
1305
908
  compiler->current_meta_idx++;
1306
1307
908
  return ERROR_SUCCESS;
1308
908
}
1309
1310
static int _yr_parser_valid_module_name(SIZED_STRING* module_name)
1311
3.09k
{
1312
3.09k
  if (module_name->length == 0)
1313
226
    return false;
1314
1315
2.87k
  if (strlen(module_name->c_string) != module_name->length)
1316
176
    return false;
1317
1318
2.69k
  return true;
1319
2.87k
}
1320
1321
int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name)
1322
3.09k
{
1323
3.09k
  int result;
1324
1325
3.09k
  YR_ARENA_REF ref;
1326
3.09k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1327
3.09k
  YR_OBJECT* module_structure;
1328
1329
3.09k
  if (!_yr_parser_valid_module_name(module_name))
1330
402
  {
1331
402
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1332
1333
402
    return ERROR_INVALID_MODULE_NAME;
1334
402
  }
1335
1336
2.69k
  YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
1337
2.69k
      compiler->arena,
1338
2.69k
      YR_NAMESPACES_TABLE,
1339
2.69k
      compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE));
1340
1341
2.69k
  module_structure = (YR_OBJECT*) yr_hash_table_lookup(
1342
2.69k
      compiler->objects_table, module_name->c_string, ns->name);
1343
1344
  // if module already imported, do nothing
1345
1346
2.69k
  if (module_structure != NULL)
1347
2.21k
    return ERROR_SUCCESS;
1348
1349
480
  FAIL_ON_ERROR(yr_object_create(
1350
480
      OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure));
1351
1352
480
  FAIL_ON_ERROR(yr_hash_table_add(
1353
480
      compiler->objects_table,
1354
480
      module_name->c_string,
1355
480
      ns->name,
1356
480
      module_structure));
1357
1358
480
  result = yr_modules_do_declarations(module_name->c_string, module_structure);
1359
1360
480
  if (result == ERROR_UNKNOWN_MODULE)
1361
290
    yr_compiler_set_error_extra_info(compiler, module_name->c_string);
1362
1363
480
  if (result != ERROR_SUCCESS)
1364
290
    return result;
1365
1366
190
  FAIL_ON_ERROR(
1367
190
      _yr_compiler_store_string(compiler, module_name->c_string, &ref));
1368
1369
190
  FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc(
1370
190
      yyscanner,
1371
190
      OP_IMPORT,
1372
190
      yr_arena_ref_to_ptr(compiler->arena, &ref),
1373
190
      NULL,
1374
190
      NULL));
1375
1376
190
  return ERROR_SUCCESS;
1377
190
}
1378
1379
static int _yr_parser_operator_to_opcode(const char* op, int expression_type)
1380
20.7k
{
1381
20.7k
  int opcode = 0;
1382
1383
20.7k
  switch (expression_type)
1384
20.7k
  {
1385
14.7k
  case EXPRESSION_TYPE_INTEGER:
1386
14.7k
    opcode = OP_INT_BEGIN;
1387
14.7k
    break;
1388
3.43k
  case EXPRESSION_TYPE_FLOAT:
1389
3.43k
    opcode = OP_DBL_BEGIN;
1390
3.43k
    break;
1391
2.52k
  case EXPRESSION_TYPE_STRING:
1392
2.52k
    opcode = OP_STR_BEGIN;
1393
2.52k
    break;
1394
0
  default:
1395
0
    assert(false);
1396
20.7k
  }
1397
1398
20.7k
  if (op[0] == '<')
1399
1.55k
  {
1400
1.55k
    if (op[1] == '=')
1401
570
      opcode += _OP_LE;
1402
987
    else
1403
987
      opcode += _OP_LT;
1404
1.55k
  }
1405
19.1k
  else if (op[0] == '>')
1406
1.50k
  {
1407
1.50k
    if (op[1] == '=')
1408
694
      opcode += _OP_GE;
1409
810
    else
1410
810
      opcode += _OP_GT;
1411
1.50k
  }
1412
17.6k
  else if (op[1] == '=')
1413
1.45k
  {
1414
1.45k
    if (op[0] == '=')
1415
835
      opcode += _OP_EQ;
1416
618
    else
1417
618
      opcode += _OP_NEQ;
1418
1.45k
  }
1419
16.2k
  else if (op[0] == '+')
1420
3.50k
  {
1421
3.50k
    opcode += _OP_ADD;
1422
3.50k
  }
1423
12.7k
  else if (op[0] == '-')
1424
8.14k
  {
1425
8.14k
    opcode += _OP_SUB;
1426
8.14k
  }
1427
4.58k
  else if (op[0] == '*')
1428
2.69k
  {
1429
2.69k
    opcode += _OP_MUL;
1430
2.69k
  }
1431
1.89k
  else if (op[0] == '\\')
1432
1.89k
  {
1433
1.89k
    opcode += _OP_DIV;
1434
1.89k
  }
1435
1436
20.7k
  if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode))
1437
20.7k
  {
1438
20.7k
    return opcode;
1439
20.7k
  }
1440
1441
24
  return OP_ERROR;
1442
20.7k
}
1443
1444
int yr_parser_reduce_operation(
1445
    yyscan_t yyscanner,
1446
    const char* op,
1447
    YR_EXPRESSION left_operand,
1448
    YR_EXPRESSION right_operand)
1449
20.9k
{
1450
20.9k
  int expression_type;
1451
1452
20.9k
  YR_COMPILER* compiler = yyget_extra(yyscanner);
1453
1454
20.9k
  if ((left_operand.type == EXPRESSION_TYPE_INTEGER ||
1455
4.19k
       left_operand.type == EXPRESSION_TYPE_FLOAT) &&
1456
18.2k
      (right_operand.type == EXPRESSION_TYPE_INTEGER ||
1457
2.77k
       right_operand.type == EXPRESSION_TYPE_FLOAT))
1458
18.2k
  {
1459
18.2k
    if (left_operand.type != right_operand.type)
1460
2.62k
    {
1461
      // One operand is double and the other is integer,
1462
      // cast the integer to double
1463
1464
2.62k
      FAIL_ON_ERROR(yr_parser_emit_with_arg(
1465
2.62k
          yyscanner,
1466
2.62k
          OP_INT_TO_DBL,
1467
2.62k
          (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1,
1468
2.62k
          NULL,
1469
2.62k
          NULL));
1470
2.62k
    }
1471
1472
18.2k
    expression_type = EXPRESSION_TYPE_FLOAT;
1473
1474
18.2k
    if (left_operand.type == EXPRESSION_TYPE_INTEGER &&
1475
16.7k
        right_operand.type == EXPRESSION_TYPE_INTEGER)
1476
14.7k
    {
1477
14.7k
      expression_type = EXPRESSION_TYPE_INTEGER;
1478
14.7k
    }
1479
1480
18.2k
    FAIL_ON_ERROR(yr_parser_emit(
1481
18.2k
        yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL));
1482
18.2k
  }
1483
2.71k
  else if (
1484
2.71k
      left_operand.type == EXPRESSION_TYPE_STRING &&
1485
2.57k
      right_operand.type == EXPRESSION_TYPE_STRING)
1486
2.52k
  {
1487
2.52k
    int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING);
1488
1489
2.52k
    if (opcode != OP_ERROR)
1490
2.50k
    {
1491
2.50k
      FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL));
1492
2.50k
    }
1493
24
    else
1494
24
    {
1495
24
      yr_compiler_set_error_extra_info_fmt(
1496
24
          compiler, "strings don't support \"%s\" operation", op);
1497
1498
24
      return ERROR_WRONG_TYPE;
1499
24
    }
1500
2.52k
  }
1501
181
  else
1502
181
  {
1503
181
    yr_compiler_set_error_extra_info(compiler, "type mismatch");
1504
1505
181
    return ERROR_WRONG_TYPE;
1506
181
  }
1507
1508
20.7k
  return ERROR_SUCCESS;
1509
20.9k
}
1510
1511
int yr_parser_mark_nonfast(
1512
   yyscan_t yyscanner,
1513
   YR_STRING_SET string_set
1514
4.90k
) {
1515
4.90k
 YR_COMPILER* compiler = yyget_extra(yyscanner);
1516
1517
4.90k
 YR_STRING_SET_ELEMENT* head = string_set.head;
1518
892k
  while (head != NULL) {
1519
887k
    YR_STRING* string_ptr = yr_arena_ref_to_ptr(compiler->arena, &head->element);
1520
887k
    string_ptr->flags &= ~STRING_FLAGS_SINGLE_MATCH;
1521
887k
    head = head->next;
1522
887k
  }
1523
4.90k
  return ERROR_SUCCESS;
1524
4.90k
}