Coverage Report

Created: 2023-03-26 07:38

/src/yara/libyara/scan.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2014. The YARA Authors. All Rights Reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
1. Redistributions of source code must retain the above copyright notice, this
8
list of conditions and the following disclaimer.
9
10
2. Redistributions in binary form must reproduce the above copyright notice,
11
this list of conditions and the following disclaimer in the documentation and/or
12
other materials provided with the distribution.
13
14
3. Neither the name of the copyright holder nor the names of its contributors
15
may be used to endorse or promote products derived from this software without
16
specific prior written permission.
17
18
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#include <assert.h>
31
#include <ctype.h>
32
#include <stdio.h>
33
#include <stdlib.h>
34
#include <yara/bitmask.h>
35
#include <yara/error.h>
36
#include <yara/globals.h>
37
#include <yara/libyara.h>
38
#include <yara/limits.h>
39
#include <yara/re.h>
40
#include <yara/rules.h>
41
#include <yara/scan.h>
42
#include <yara/stopwatch.h>
43
#include <yara/strutils.h>
44
#include <yara/types.h>
45
#include <yara/utils.h>
46
47
typedef struct _CALLBACK_ARGS
48
{
49
  YR_STRING* string;
50
  YR_SCAN_CONTEXT* context;
51
52
  const uint8_t* data;
53
  size_t data_size;
54
  uint64_t data_base;
55
56
  int forward_matches;
57
  int full_word;
58
  int xor_key;
59
60
} CALLBACK_ARGS;
61
62
static int _yr_scan_xor_compare(
63
    const uint8_t* data,
64
    size_t data_size,
65
    uint8_t* string,
66
    size_t string_length,
67
    uint8_t* xor_key)
68
0
{
69
0
  int result = 0;
70
0
  const uint8_t* s1 = data;
71
0
  const uint8_t* s2 = string;
72
0
  uint8_t k = 0;
73
74
0
  size_t i = 0;
75
76
0
  if (data_size < string_length)
77
0
    goto _exit;
78
79
  // Calculate the xor key to compare with. *s1 is the start of the string we
80
  // matched on and *s2 is the "plaintext" string, so *s1 ^ *s2 is the key to
81
  // every *s2 as we compare.
82
0
  k = *s1 ^ *s2;
83
84
0
  while (i < string_length && *s1++ == ((*s2++) ^ k)) i++;
85
86
0
  result = (int) ((i == string_length) ? i : 0);
87
88
0
_exit:;
89
90
0
  YR_DEBUG_FPRINTF(
91
0
      2,
92
0
      stderr,
93
0
      "- %s(data_size=%zu string_length=%zu) {} = %d\n",
94
0
      __FUNCTION__,
95
0
      data_size,
96
0
      string_length,
97
0
      result);
98
99
0
  if (result > 0)
100
0
    *xor_key = k;
101
102
0
  return result;
103
0
}
104
105
static int _yr_scan_xor_wcompare(
106
    const uint8_t* data,
107
    size_t data_size,
108
    uint8_t* string,
109
    size_t string_length,
110
    uint8_t* xor_key)
111
0
{
112
0
  int result = 0;
113
0
  const uint8_t* s1 = data;
114
0
  const uint8_t* s2 = string;
115
0
  uint8_t k = 0;
116
117
0
  size_t i = 0;
118
119
0
  if (data_size < string_length * 2)
120
0
    return 0;
121
122
  // Calculate the xor key to compare with. *s1 is the start of the string we
123
  // matched on and *s2 is the "plaintext" string, so *s1 ^ *s2 is the key to
124
  // every *s2 as we compare.
125
0
  k = *s1 ^ *s2;
126
127
0
  while (i < string_length && *s1 == ((*s2) ^ k) && ((*(s1 + 1)) ^ k) == 0x00)
128
0
  {
129
0
    s1 += 2;
130
0
    s2++;
131
0
    i++;
132
0
  }
133
134
0
  result = (int) ((i == string_length) ? i * 2 : 0);
135
136
0
  if (result > 0)
137
0
    *xor_key = k;
138
139
0
  return result;
140
0
}
141
142
static int _yr_scan_compare(
143
    const uint8_t* data,
144
    size_t data_size,
145
    uint8_t* string,
146
    size_t string_length)
147
0
{
148
0
  const uint8_t* s1 = data;
149
0
  const uint8_t* s2 = string;
150
151
0
  size_t i = 0;
152
153
0
  if (data_size < string_length)
154
0
    return 0;
155
156
0
  while (i < string_length && *s1++ == *s2++) i++;
157
158
0
  return (int) ((i == string_length) ? i : 0);
159
0
}
160
161
static int _yr_scan_icompare(
162
    const uint8_t* data,
163
    size_t data_size,
164
    uint8_t* string,
165
    size_t string_length)
166
0
{
167
0
  const uint8_t* s1 = data;
168
0
  const uint8_t* s2 = string;
169
170
0
  size_t i = 0;
171
172
0
  if (data_size < string_length)
173
0
    return 0;
174
175
0
  while (i < string_length && yr_lowercase[*s1++] == yr_lowercase[*s2++]) i++;
176
177
0
  return (int) ((i == string_length) ? i : 0);
178
0
}
179
180
static int _yr_scan_wcompare(
181
    const uint8_t* data,
182
    size_t data_size,
183
    uint8_t* string,
184
    size_t string_length)
185
0
{
186
0
  int result = 0;
187
0
  const uint8_t* s1 = data;
188
0
  const uint8_t* s2 = string;
189
190
0
  size_t i = 0;
191
192
0
  if (data_size < string_length * 2)
193
0
    goto _exit;
194
195
0
  while (i < string_length && *s1 == *s2 && *(s1 + 1) == 0x00)
196
0
  {
197
0
    s1 += 2;
198
0
    s2++;
199
0
    i++;
200
0
  }
201
202
0
  result = (int) ((i == string_length) ? i * 2 : 0);
203
204
0
_exit:;
205
206
0
  YR_DEBUG_FPRINTF(
207
0
      2,
208
0
      stderr,
209
0
      "- %s(data_size=%zu string_length=%zu) {} = %d\n",
210
0
      __FUNCTION__,
211
0
      data_size,
212
0
      string_length,
213
0
      result);
214
215
0
  return result;
216
0
}
217
218
static int _yr_scan_wicompare(
219
    const uint8_t* data,
220
    size_t data_size,
221
    uint8_t* string,
222
    size_t string_length)
223
0
{
224
0
  int result = 0;
225
0
  const uint8_t* s1 = data;
226
0
  const uint8_t* s2 = string;
227
228
0
  size_t i = 0;
229
230
0
  if (data_size < string_length * 2)
231
0
    goto _exit;
232
233
0
  while (i < string_length && yr_lowercase[*s1] == yr_lowercase[*s2] &&
234
0
         *(s1 + 1) == 0x00)
235
0
  {
236
0
    s1 += 2;
237
0
    s2++;
238
0
    i++;
239
0
  }
240
241
0
  result = (int) ((i == string_length) ? i * 2 : 0);
242
243
0
_exit:;
244
245
0
  YR_DEBUG_FPRINTF(
246
0
      2,
247
0
      stderr,
248
0
      "- %s(data_size=%zu string_length=%zu) {} = %d\n",
249
0
      __FUNCTION__,
250
0
      data_size,
251
0
      string_length,
252
0
      result);
253
254
0
  return result;
255
0
}
256
257
static void _yr_scan_update_match_chain_length(
258
    YR_SCAN_CONTEXT* context,
259
    YR_STRING* string,
260
    YR_MATCH* match_to_update,
261
    int chain_length)
262
0
{
263
0
  YR_MATCH* match;
264
265
0
  if (match_to_update->chain_length == chain_length)
266
0
    return;
267
268
0
  match_to_update->chain_length = chain_length;
269
270
0
  if (string->chained_to == NULL)
271
0
    return;
272
273
0
  match = context->unconfirmed_matches[string->chained_to->idx].head;
274
275
0
  while (match != NULL)
276
0
  {
277
0
    int64_t ending_offset = match->offset + match->match_length;
278
279
0
    if (ending_offset + string->chain_gap_max >= match_to_update->offset &&
280
0
        ending_offset + string->chain_gap_min <= match_to_update->offset)
281
0
    {
282
0
      _yr_scan_update_match_chain_length(
283
0
          context, string->chained_to, match, chain_length + 1);
284
0
    }
285
286
0
    match = match->next;
287
0
  }
288
0
}
289
290
static int _yr_scan_add_match_to_list(
291
    YR_MATCH* match,
292
    YR_MATCHES* matches_list,
293
    int replace_if_exists)
294
0
{
295
0
  int result = ERROR_SUCCESS;
296
297
#if YR_DEBUG_VERBOSITY > 0
298
  int32_t count_orig = matches_list->count;
299
#endif
300
301
0
  YR_MATCH* insertion_point = matches_list->tail;
302
303
0
  if (matches_list->count == YR_MAX_STRING_MATCHES)
304
0
  {
305
0
    result = ERROR_TOO_MANY_MATCHES;
306
0
    goto _exit;
307
0
  }
308
309
0
  while (insertion_point != NULL)
310
0
  {
311
0
    if ((match->base + match->offset) ==
312
0
        (insertion_point->base + insertion_point->offset))
313
0
    {
314
0
      if (replace_if_exists)
315
0
      {
316
0
        insertion_point->match_length = match->match_length;
317
0
        insertion_point->data_length = match->data_length;
318
0
        insertion_point->data = match->data;
319
0
      }
320
321
0
      goto _exit;  // return ERROR_SUCCESS
322
0
    }
323
324
0
    if ((match->base + match->offset) >
325
0
        (insertion_point->base + insertion_point->offset))
326
0
      break;
327
328
0
    insertion_point = insertion_point->prev;
329
0
  }
330
331
0
  match->prev = insertion_point;
332
333
0
  if (insertion_point != NULL)
334
0
  {
335
0
    match->next = insertion_point->next;
336
0
    insertion_point->next = match;
337
0
  }
338
0
  else
339
0
  {
340
0
    match->next = matches_list->head;
341
0
    matches_list->head = match;
342
0
  }
343
344
0
  matches_list->count++;
345
346
0
  if (match->next != NULL)
347
0
    match->next->prev = match;
348
0
  else
349
0
    matches_list->tail = match;
350
351
0
_exit:;
352
353
0
  YR_DEBUG_FPRINTF(
354
0
      2,
355
0
      stderr,
356
0
      "- %s(replace_if_exists=%d) {} = %d //"
357
0
      " match->base=0x%" PRIx64 " match->offset=%" PRIi64
358
0
      " matches_list->count=%u += %u\n",
359
0
      __FUNCTION__,
360
0
      replace_if_exists,
361
0
      result,
362
0
      match->base,
363
0
      match->offset,
364
0
      count_orig,
365
0
      matches_list->count - count_orig);
366
367
0
  return result;
368
0
}
369
370
static void _yr_scan_remove_match_from_list(
371
    YR_MATCH* match,
372
    YR_MATCHES* matches_list)
373
0
{
374
0
  if (match->prev != NULL)
375
0
    match->prev->next = match->next;
376
377
0
  if (match->next != NULL)
378
0
    match->next->prev = match->prev;
379
380
0
  if (matches_list->head == match)
381
0
    matches_list->head = match->next;
382
383
0
  if (matches_list->tail == match)
384
0
    matches_list->tail = match->prev;
385
386
0
  matches_list->count--;
387
0
  match->next = NULL;
388
0
  match->prev = NULL;
389
0
}
390
391
//
392
// _yr_scan_verify_chained_string_match
393
//
394
// Given a string that is part of a string chain and is matching at some
395
// point in the scanned data, this function determines if the whole string
396
// chain is also matching. For example, if the string S was splitted and
397
// converted in a chain S1 <- S2 <- S3 (see yr_re_ast_split_at_chaining_point),
398
// and a match for S3 was found, this functions finds out if there are matches
399
// for S1 and S2 that together with the match found for S3 conform a match for
400
// the whole S.
401
//
402
// Notice that this function operates in a non-greedy fashion. Matches found
403
// for S will be the shortest possible ones.
404
//
405
406
static int _yr_scan_verify_chained_string_match(
407
    YR_STRING* matching_string,
408
    YR_SCAN_CONTEXT* context,
409
    const uint8_t* match_data,
410
    uint64_t match_base,
411
    uint64_t match_offset,
412
    int32_t match_length,
413
    uint8_t xor_key)
414
0
{
415
0
  YR_DEBUG_FPRINTF(
416
0
      2,
417
0
      stderr,
418
0
      "- %s (match_data=%p match_base=%" PRIx64 " match_offset=0x%" PRIx64
419
0
      " match_length=%'d) {} \n",
420
0
      __FUNCTION__,
421
0
      match_data,
422
0
      match_base,
423
0
      match_offset,
424
0
      match_length);
425
426
0
  YR_STRING* string;
427
0
  YR_MATCH* match;
428
0
  YR_MATCH* next_match;
429
0
  YR_MATCH* new_match;
430
431
0
  uint64_t lowest_offset;
432
0
  uint64_t ending_offset;
433
0
  int32_t full_chain_length;
434
435
0
  bool add_match = false;
436
437
0
  if (matching_string->chained_to == NULL)
438
0
  {
439
    // The matching string is the head of the chain, this match should be
440
    // added to the list of unconfirmed matches. The match will remain
441
    // unconfirmed until all the strings in the chain are found with the
442
    // correct distances between them.
443
0
    add_match = true;
444
0
  }
445
0
  else
446
0
  {
447
    // If some unconfirmed match exists, the lowest possible offset where the
448
    // whole string chain can match is the offset of the first string in the
449
    // list of unconfirmed matches. Unconfirmed matches are sorted in ascending
450
    // offset order. If no unconfirmed match exists, the lowest possible offset
451
    // is the offset of the current match.
452
0
    match = context->unconfirmed_matches[matching_string->idx].head;
453
454
0
    if (match != NULL)
455
0
      lowest_offset = match->offset;
456
0
    else
457
0
      lowest_offset = match_offset;
458
459
    // Iterate over the list of unconfirmed matches for the string that
460
    // precedes the currently matching string. If we have a string chain like:
461
    // S1 <- S2 <- S3, and we just found a match for S2, we are iterating the
462
    // list of unconfirmed matches of S1.
463
0
    match = context->unconfirmed_matches[matching_string->chained_to->idx].head;
464
465
0
    while (match != NULL)
466
0
    {
467
      // Store match->next so that we can use it later for advancing in the
468
      // list, if _yr_scan_remove_match_from_list is called, match->next is
469
      // set to NULL, that's why we store its current value before that happens.
470
0
      next_match = match->next;
471
472
      // The unconfirmed match starts at match->offset and finishes at
473
      // ending_offset.
474
0
      ending_offset = match->offset + match->match_length;
475
476
0
      if (ending_offset + matching_string->chain_gap_max < lowest_offset)
477
0
      {
478
        // If the current match is too far away from the unconfirmed match,
479
        // remove the unconfirmed match from the list because it has been
480
        // negatively confirmed (i.e: we can be sure that this unconfirmed
481
        // match can't be an actual match)
482
0
        _yr_scan_remove_match_from_list(
483
0
            match,
484
0
            &context->unconfirmed_matches[matching_string->chained_to->idx]);
485
0
      }
486
0
      else if (
487
0
          ending_offset + matching_string->chain_gap_max >= match_offset &&
488
0
          ending_offset + matching_string->chain_gap_min <= match_offset)
489
0
      {
490
        // If the distance between the end of the unconfirmed match and the
491
        // start of the current match is within the range specified in the
492
        // regexp or hex string, this could be an actual match.
493
0
        add_match = true;
494
0
        break;
495
0
      }
496
497
0
      match = next_match;
498
0
    }
499
0
  }
500
501
0
  if (add_match)
502
0
  {
503
0
    uint32_t max_match_data;
504
505
0
    FAIL_ON_ERROR(
506
0
        yr_get_configuration_uint32(YR_CONFIG_MAX_MATCH_DATA, &max_match_data))
507
508
0
    if (STRING_IS_CHAIN_TAIL(matching_string))
509
0
    {
510
      // The matching string is the tail of the string chain. It must be
511
      // chained to some other string.
512
0
      assert(matching_string->chained_to != NULL);
513
514
      // Iterate over the list of unconfirmed matches of the preceding string
515
      // in the chain and update the chain_length field for each of them. This
516
      // is a recursive operation that will update the chain_length field for
517
      // every unconfirmed match in all the strings in the chain up to the head
518
      // of the chain.
519
0
      match =
520
0
          context->unconfirmed_matches[matching_string->chained_to->idx].head;
521
522
0
      while (match != NULL)
523
0
      {
524
0
        ending_offset = match->offset + match->match_length;
525
526
0
        if (ending_offset + matching_string->chain_gap_max >= match_offset &&
527
0
            ending_offset + matching_string->chain_gap_min <= match_offset)
528
0
        {
529
0
          _yr_scan_update_match_chain_length(
530
0
              context, matching_string->chained_to, match, 1);
531
0
        }
532
533
0
        match = match->next;
534
0
      }
535
536
0
      full_chain_length = 0;
537
0
      string = matching_string;
538
539
0
      while (string->chained_to != NULL)
540
0
      {
541
0
        full_chain_length++;
542
0
        string = string->chained_to;
543
0
      }
544
545
      // "string" points now to the head of the strings chain.
546
0
      match = context->unconfirmed_matches[string->idx].head;
547
548
      // Iterate over the list of unconfirmed matches of the head of the chain,
549
      // and move to the list of confirmed matches those with a chain_length
550
      // equal to full_chain_length, which means that the whole chain has been
551
      // confirmed to match.
552
0
      while (match != NULL)
553
0
      {
554
0
        next_match = match->next;
555
556
0
        if (match->chain_length == full_chain_length)
557
0
        {
558
0
          _yr_scan_remove_match_from_list(
559
0
              match, &context->unconfirmed_matches[string->idx]);
560
561
0
          match->match_length =
562
0
              (int32_t) (match_offset - match->offset + match_length);
563
564
0
          match->data_length = yr_min(
565
0
              match->match_length, (int32_t) max_match_data);
566
567
0
          match->data = yr_notebook_alloc(
568
0
              context->matches_notebook, match->data_length);
569
570
0
          if (match->data == NULL)
571
0
            return ERROR_INSUFFICIENT_MEMORY;
572
573
0
          memcpy(
574
0
              (void*) match->data,
575
0
              match_data - match_offset + match->offset,
576
0
              match->data_length);
577
578
0
          FAIL_ON_ERROR(_yr_scan_add_match_to_list(
579
0
              match, &context->matches[string->idx], false));
580
0
        }
581
582
0
        match = next_match;
583
0
      }
584
0
    }
585
0
    else  // It's a part of a chain, but not the tail.
586
0
    {
587
0
      new_match = yr_notebook_alloc(
588
0
          context->matches_notebook, sizeof(YR_MATCH));
589
590
0
      if (new_match == NULL)
591
0
        return ERROR_INSUFFICIENT_MEMORY;
592
593
0
      new_match->base = match_base;
594
0
      new_match->offset = match_offset;
595
0
      new_match->match_length = match_length;
596
0
      new_match->chain_length = 0;
597
0
      new_match->prev = NULL;
598
0
      new_match->next = NULL;
599
0
      new_match->is_private = STRING_IS_PRIVATE(matching_string);
600
0
      new_match->xor_key = xor_key;
601
602
      // A copy of the matching data is written to the matches_arena, the
603
      // amount of data copies is limited by YR_CONFIG_MAX_MATCH_DATA.
604
0
      new_match->data_length = yr_min(match_length, (int32_t) max_match_data);
605
606
0
      if (new_match->data_length > 0)
607
0
      {
608
0
        new_match->data = yr_notebook_alloc(
609
0
            context->matches_notebook, new_match->data_length);
610
611
0
        if (new_match->data == NULL)
612
0
          return ERROR_INSUFFICIENT_MEMORY;
613
614
0
        memcpy((void*) new_match->data, match_data, new_match->data_length);
615
0
      }
616
0
      else
617
0
      {
618
0
        new_match->data = NULL;
619
0
      }
620
621
      // Add the match to the list of unconfirmed matches because the string
622
      // is part of a chain but not its tail, so we can't be sure the this is
623
      // an actual match until finding the remaining parts of the chain.
624
0
      FAIL_ON_ERROR(_yr_scan_add_match_to_list(
625
0
          new_match,
626
0
          &context->unconfirmed_matches[matching_string->idx],
627
0
          false));
628
0
    }
629
0
  }
630
631
0
  return ERROR_SUCCESS;
632
0
}
633
634
static int _yr_scan_match_callback(
635
    const uint8_t* match_data,
636
    int32_t match_length,
637
    int flags,
638
    void* args)
639
0
{
640
0
  CALLBACK_ARGS* callback_args = (CALLBACK_ARGS*) args;
641
642
0
  YR_STRING* string = callback_args->string;
643
0
  YR_MATCH* new_match;
644
645
0
  int result = ERROR_SUCCESS;
646
647
0
  size_t match_offset = match_data - callback_args->data;
648
649
0
  YR_DEBUG_FPRINTF(
650
0
      2,
651
0
      stderr,
652
0
      "+ %s(match_data=%p match_length=%d) { //"
653
0
      " match_offset=%zu args->data=%p args->string.length=%u"
654
0
      " args->data_base=0x%" PRIx64 " args->data_size=%zu"
655
0
      " args->forward_matches=%'u\n",
656
0
      __FUNCTION__,
657
0
      match_data,
658
0
      match_length,
659
0
      match_offset,
660
0
      callback_args->data,
661
0
      callback_args->string->length,
662
0
      callback_args->data_base,
663
0
      callback_args->data_size,
664
0
      callback_args->forward_matches);
665
666
  // total match length is the sum of backward and forward matches.
667
0
  match_length += callback_args->forward_matches;
668
669
  // make sure that match fits into the data.
670
0
  assert(match_offset + match_length <= callback_args->data_size);
671
672
0
  if (callback_args->full_word)
673
0
  {
674
0
    if (flags & RE_FLAGS_WIDE)
675
0
    {
676
0
      if (match_offset >= 2 && *(match_data - 1) == 0 &&
677
0
          yr_isalnum(match_data - 2))
678
0
        goto _exit;  // return ERROR_SUCCESS;
679
680
0
      if (match_offset + match_length + 1 < callback_args->data_size &&
681
0
          *(match_data + match_length + 1) == 0 &&
682
0
          yr_isalnum(match_data + match_length))
683
0
        goto _exit;  // return ERROR_SUCCESS;
684
0
    }
685
0
    else
686
0
    {
687
0
      if (match_offset >= 1 && yr_isalnum(match_data - 1))
688
0
        goto _exit;  // return ERROR_SUCCESS;
689
690
0
      if (match_offset + match_length < callback_args->data_size &&
691
0
          yr_isalnum(match_data + match_length))
692
0
        goto _exit;  // return ERROR_SUCCESS;
693
0
    }
694
0
  }
695
696
0
  if (STRING_IS_CHAIN_PART(string))
697
0
  {
698
0
    result = _yr_scan_verify_chained_string_match(
699
0
        string,
700
0
        callback_args->context,
701
0
        match_data,
702
0
        callback_args->data_base,
703
0
        match_offset,
704
0
        match_length,
705
0
        callback_args->xor_key);
706
0
  }
707
0
  else
708
0
  {
709
0
    uint32_t max_match_data;
710
711
0
    FAIL_ON_ERROR(
712
0
        yr_get_configuration_uint32(YR_CONFIG_MAX_MATCH_DATA, &max_match_data));
713
714
0
    new_match = yr_notebook_alloc(
715
0
        callback_args->context->matches_notebook, sizeof(YR_MATCH));
716
717
0
    if (new_match == NULL)
718
0
    {
719
0
      result = ERROR_INSUFFICIENT_MEMORY;
720
0
      goto _exit;
721
0
    }
722
723
0
    new_match->data_length = yr_min(match_length, (int32_t) max_match_data);
724
725
0
    if (new_match->data_length > 0)
726
0
    {
727
0
      new_match->data = yr_notebook_alloc(
728
0
          callback_args->context->matches_notebook, new_match->data_length);
729
730
0
      if (new_match->data == NULL)
731
0
      {
732
0
        result = ERROR_INSUFFICIENT_MEMORY;
733
0
        goto _exit;
734
0
      }
735
736
0
      memcpy((void*) new_match->data, match_data, new_match->data_length);
737
0
    }
738
0
    else
739
0
    {
740
0
      new_match->data = NULL;
741
0
    }
742
743
0
    if (result == ERROR_SUCCESS)
744
0
    {
745
0
      new_match->base = callback_args->data_base;
746
0
      new_match->offset = match_offset;
747
0
      new_match->match_length = match_length;
748
0
      new_match->prev = NULL;
749
0
      new_match->next = NULL;
750
0
      new_match->is_private = STRING_IS_PRIVATE(string);
751
0
      new_match->xor_key = callback_args->xor_key;
752
753
0
      FAIL_ON_ERROR(_yr_scan_add_match_to_list(
754
0
          new_match,
755
0
          &callback_args->context->matches[string->idx],
756
0
          STRING_IS_GREEDY_REGEXP(string)));
757
0
    }
758
0
  }
759
760
0
_exit:;
761
762
0
  YR_DEBUG_FPRINTF(2, stderr, "} = %d // %s()\n", result, __FUNCTION__);
763
764
0
  return result;
765
0
}
766
767
typedef int (*RE_EXEC_FUNC)(
768
    YR_SCAN_CONTEXT* context,
769
    const uint8_t* code,
770
    const uint8_t* input,
771
    size_t input_forwards_size,
772
    size_t input_backwards_size,
773
    int flags,
774
    RE_MATCH_CALLBACK_FUNC callback,
775
    void* callback_args,
776
    int* matches);
777
778
static int _yr_scan_verify_re_match(
779
    YR_SCAN_CONTEXT* context,
780
    YR_AC_MATCH* ac_match,
781
    const uint8_t* data,
782
    size_t data_size,
783
    uint64_t data_base,
784
    size_t offset)
785
0
{
786
0
  YR_DEBUG_FPRINTF(
787
0
      2,
788
0
      stderr,
789
0
      "- %s(data=%p data_size=%zu data_base=0x%" PRIx64 " offset=%zu) {}\n",
790
0
      __FUNCTION__,
791
0
      data,
792
0
      data_size,
793
0
      data_base,
794
0
      offset);
795
796
0
  CALLBACK_ARGS callback_args;
797
0
  RE_EXEC_FUNC exec;
798
799
0
  int forward_matches = -1;
800
0
  int backward_matches = -1;
801
0
  int flags = 0;
802
803
0
  if (STRING_IS_GREEDY_REGEXP(ac_match->string))
804
0
    flags |= RE_FLAGS_GREEDY;
805
806
0
  if (STRING_IS_NO_CASE(ac_match->string))
807
0
    flags |= RE_FLAGS_NO_CASE;
808
809
0
  if (STRING_IS_DOT_ALL(ac_match->string))
810
0
    flags |= RE_FLAGS_DOT_ALL;
811
812
0
  if (STRING_IS_FAST_REGEXP(ac_match->string))
813
0
    exec = yr_re_fast_exec;
814
0
  else
815
0
    exec = yr_re_exec;
816
817
0
  if (STRING_IS_ASCII(ac_match->string) || STRING_IS_BASE64(ac_match->string) ||
818
0
      STRING_IS_BASE64_WIDE(ac_match->string))
819
0
  {
820
0
    FAIL_ON_ERROR(exec(
821
0
        context,
822
0
        ac_match->forward_code,
823
0
        data + offset,
824
0
        data_size - offset,
825
0
        offset,
826
0
        flags,
827
0
        NULL,
828
0
        NULL,
829
0
        &forward_matches));
830
0
  }
831
832
0
  if ((forward_matches == -1) && (STRING_IS_WIDE(ac_match->string) &&
833
0
                                  !(STRING_IS_BASE64(ac_match->string) ||
834
0
                                    STRING_IS_BASE64_WIDE(ac_match->string))))
835
0
  {
836
0
    flags |= RE_FLAGS_WIDE;
837
0
    FAIL_ON_ERROR(exec(
838
0
        context,
839
0
        ac_match->forward_code,
840
0
        data + offset,
841
0
        data_size - offset,
842
0
        offset,
843
0
        flags,
844
0
        NULL,
845
0
        NULL,
846
0
        &forward_matches));
847
0
  }
848
849
0
  if (forward_matches == -1)
850
0
    return ERROR_SUCCESS;
851
852
0
  if (forward_matches == 0 && ac_match->backward_code == NULL)
853
0
    return ERROR_SUCCESS;
854
855
0
  callback_args.string = ac_match->string;
856
0
  callback_args.context = context;
857
0
  callback_args.data = data;
858
0
  callback_args.data_size = data_size;
859
0
  callback_args.data_base = data_base;
860
0
  callback_args.forward_matches = forward_matches;
861
0
  callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string);
862
  // xor modifier is not valid for RE but set it so we don't leak stack values.
863
0
  callback_args.xor_key = 0;
864
865
0
  if (ac_match->backward_code != NULL)
866
0
  {
867
0
    FAIL_ON_ERROR(exec(
868
0
        context,
869
0
        ac_match->backward_code,
870
0
        data + offset,
871
0
        data_size - offset,
872
0
        offset,
873
0
        flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
874
0
        _yr_scan_match_callback,
875
0
        (void*) &callback_args,
876
0
        &backward_matches));
877
0
  }
878
0
  else
879
0
  {
880
0
    FAIL_ON_ERROR(
881
0
        _yr_scan_match_callback(data + offset, 0, flags, &callback_args));
882
0
  }
883
884
0
  return ERROR_SUCCESS;
885
0
}
886
887
static int _yr_scan_verify_literal_match(
888
    YR_SCAN_CONTEXT* context,
889
    YR_AC_MATCH* ac_match,
890
    const uint8_t* data,
891
    size_t data_size,
892
    uint64_t data_base,
893
    size_t offset)
894
0
{
895
0
  YR_DEBUG_FPRINTF(
896
0
      2,
897
0
      stderr,
898
0
      "- %s(data=%p data_size=%zu data_base=0x%" PRIx64 " offset=%zu) {}\n",
899
0
      __FUNCTION__,
900
0
      data,
901
0
      data_size,
902
0
      data_base,
903
0
      offset);
904
905
0
  int flags = 0;
906
0
  int forward_matches = 0;
907
0
  uint8_t xor_key = 0;
908
909
0
  CALLBACK_ARGS callback_args;
910
0
  YR_STRING* string = ac_match->string;
911
912
0
  if (STRING_FITS_IN_ATOM(string))
913
0
  {
914
0
    forward_matches = ac_match->backtrack;
915
0
    if (STRING_IS_XOR(string))
916
0
    {
917
0
      if (STRING_IS_WIDE(string))
918
0
      {
919
0
        _yr_scan_xor_wcompare(
920
0
            data + offset,
921
0
            data_size - offset,
922
0
            string->string,
923
0
            string->length,
924
0
            &xor_key);
925
0
      }
926
927
0
      if (STRING_IS_ASCII(string))
928
0
      {
929
0
        _yr_scan_xor_compare(
930
0
            data + offset,
931
0
            data_size - offset,
932
0
            string->string,
933
0
            string->length,
934
0
            &xor_key);
935
0
      }
936
0
    }
937
0
  }
938
0
  else if (STRING_IS_NO_CASE(string))
939
0
  {
940
0
    if (STRING_IS_ASCII(string))
941
0
    {
942
0
      forward_matches = _yr_scan_icompare(
943
0
          data + offset, data_size - offset, string->string, string->length);
944
0
    }
945
946
0
    if (STRING_IS_WIDE(string) && forward_matches == 0)
947
0
    {
948
0
      forward_matches = _yr_scan_wicompare(
949
0
          data + offset, data_size - offset, string->string, string->length);
950
0
    }
951
0
  }
952
0
  else
953
0
  {
954
0
    if (STRING_IS_ASCII(string))
955
0
    {
956
0
      forward_matches = _yr_scan_compare(
957
0
          data + offset, data_size - offset, string->string, string->length);
958
0
    }
959
960
0
    if (STRING_IS_WIDE(string) && forward_matches == 0)
961
0
    {
962
0
      forward_matches = _yr_scan_wcompare(
963
0
          data + offset, data_size - offset, string->string, string->length);
964
0
    }
965
966
0
    if (STRING_IS_XOR(string) && forward_matches == 0)
967
0
    {
968
0
      if (STRING_IS_WIDE(string))
969
0
      {
970
0
        forward_matches = _yr_scan_xor_wcompare(
971
0
            data + offset,
972
0
            data_size - offset,
973
0
            string->string,
974
0
            string->length,
975
0
            &xor_key);
976
0
      }
977
978
0
      if (forward_matches == 0)
979
0
      {
980
0
        forward_matches = _yr_scan_xor_compare(
981
0
            data + offset,
982
0
            data_size - offset,
983
0
            string->string,
984
0
            string->length,
985
0
            &xor_key);
986
0
      }
987
0
    }
988
0
  }
989
990
0
  if (forward_matches == 0)
991
0
    return ERROR_SUCCESS;
992
993
0
  if (forward_matches == string->length * 2)
994
0
    flags |= RE_FLAGS_WIDE;
995
996
0
  if (STRING_IS_NO_CASE(string))
997
0
    flags |= RE_FLAGS_NO_CASE;
998
999
0
  callback_args.context = context;
1000
0
  callback_args.string = string;
1001
0
  callback_args.data = data;
1002
0
  callback_args.data_size = data_size;
1003
0
  callback_args.data_base = data_base;
1004
0
  callback_args.forward_matches = forward_matches;
1005
0
  callback_args.full_word = STRING_IS_FULL_WORD(string);
1006
0
  callback_args.xor_key = xor_key;
1007
1008
0
  FAIL_ON_ERROR(
1009
0
      _yr_scan_match_callback(data + offset, 0, flags, &callback_args));
1010
1011
0
  return ERROR_SUCCESS;
1012
0
}
1013
1014
int yr_scan_verify_match(
1015
    YR_SCAN_CONTEXT* context,
1016
    YR_AC_MATCH* ac_match,
1017
    const uint8_t* data,
1018
    size_t data_size,
1019
    uint64_t data_base,
1020
    size_t offset)
1021
0
{
1022
0
  YR_DEBUG_FPRINTF(
1023
0
      2,
1024
0
      stderr,
1025
0
      "- %s(data=%p data_size=%zu data_base=0x%" PRIx64 " offset=%zu) {}\n",
1026
0
      __FUNCTION__,
1027
0
      data,
1028
0
      data_size,
1029
0
      data_base,
1030
0
      offset);
1031
1032
0
  YR_STRING* string = ac_match->string;
1033
0
  YR_CALLBACK_FUNC callback = context->callback;
1034
1035
0
  int result;
1036
1037
0
  if (data_size - offset <= 0)
1038
0
    return ERROR_SUCCESS;
1039
1040
0
  if (yr_bitmask_is_set(context->strings_temp_disabled, string->idx))
1041
0
    return ERROR_SUCCESS;
1042
1043
0
  if (context->flags & SCAN_FLAGS_FAST_MODE && STRING_IS_SINGLE_MATCH(string) &&
1044
0
      context->matches[string->idx].head != NULL)
1045
0
    return ERROR_SUCCESS;
1046
1047
0
  if (STRING_IS_FIXED_OFFSET(string) &&
1048
0
      string->fixed_offset != data_base + offset)
1049
0
    return ERROR_SUCCESS;
1050
1051
#ifdef YR_PROFILING_ENABLED
1052
  uint64_t start_time;
1053
  bool sample = context->profiling_info[string->rule_idx].atom_matches %
1054
                    YR_MATCH_VERIFICATION_PROFILING_RATE ==
1055
                0;
1056
1057
  if (sample)
1058
    start_time = yr_stopwatch_elapsed_ns(&context->stopwatch);
1059
#endif
1060
1061
0
  if (STRING_IS_LITERAL(string))
1062
0
  {
1063
0
    result = _yr_scan_verify_literal_match(
1064
0
        context, ac_match, data, data_size, data_base, offset);
1065
0
  }
1066
0
  else
1067
0
  {
1068
0
    result = _yr_scan_verify_re_match(
1069
0
        context, ac_match, data, data_size, data_base, offset);
1070
0
  }
1071
1072
  // If _yr_scan_verify_literal_match or _yr_scan_verify_re_match return
1073
  // ERROR_TOO_MANY_MATCHES call the callback with CALLBACK_MSG_TOO_MANY_MATCHES
1074
  // in order to ask what to do. If the callback returns CALLBACK_CONTINUE
1075
  // this error is ignored, if not, the error is propagated to the caller.
1076
0
  if (result == ERROR_TOO_MANY_MATCHES)
1077
0
  {
1078
0
    result = callback(
1079
0
        context,
1080
0
        CALLBACK_MSG_TOO_MANY_MATCHES,
1081
0
        (void*) string,
1082
0
        context->user_data);
1083
1084
0
    switch (result)
1085
0
    {
1086
0
    case CALLBACK_CONTINUE:
1087
0
      yr_bitmask_set(context->strings_temp_disabled, string->idx);
1088
0
      result = ERROR_SUCCESS;
1089
0
      break;
1090
1091
0
    default:
1092
0
      result = ERROR_TOO_MANY_MATCHES;
1093
0
      break;
1094
0
    }
1095
0
  }
1096
1097
#ifdef YR_PROFILING_ENABLED
1098
  if (sample)
1099
  {
1100
    uint64_t finish_time = yr_stopwatch_elapsed_ns(&context->stopwatch);
1101
    context->profiling_info[string->rule_idx].match_time +=
1102
        (finish_time - start_time);
1103
  }
1104
  context->profiling_info[string->rule_idx].atom_matches++;
1105
#endif
1106
1107
0
  if (result != ERROR_SUCCESS)
1108
0
    context->last_error_string = string;
1109
1110
0
  return result;
1111
0
}