Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (c) 2014. The YARA Authors. All Rights Reserved. |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without modification, |
5 | | are permitted provided that the following conditions are met: |
6 | | |
7 | | 1. Redistributions of source code must retain the above copyright notice, this |
8 | | list of conditions and the following disclaimer. |
9 | | |
10 | | 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | this list of conditions and the following disclaimer in the documentation and/or |
12 | | other materials provided with the distribution. |
13 | | |
14 | | 3. Neither the name of the copyright holder nor the names of its contributors |
15 | | may be used to endorse or promote products derived from this software without |
16 | | specific prior written permission. |
17 | | |
18 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
19 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR |
22 | | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
23 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
24 | | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
25 | | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
27 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #include <assert.h> |
31 | | #include <ctype.h> |
32 | | #include <stdio.h> |
33 | | #include <stdlib.h> |
34 | | #include <yara/bitmask.h> |
35 | | #include <yara/error.h> |
36 | | #include <yara/globals.h> |
37 | | #include <yara/libyara.h> |
38 | | #include <yara/limits.h> |
39 | | #include <yara/re.h> |
40 | | #include <yara/rules.h> |
41 | | #include <yara/scan.h> |
42 | | #include <yara/stopwatch.h> |
43 | | #include <yara/strutils.h> |
44 | | #include <yara/types.h> |
45 | | #include <yara/utils.h> |
46 | | |
47 | | typedef struct _CALLBACK_ARGS |
48 | | { |
49 | | YR_STRING* string; |
50 | | YR_SCAN_CONTEXT* context; |
51 | | |
52 | | const uint8_t* data; |
53 | | size_t data_size; |
54 | | uint64_t data_base; |
55 | | |
56 | | int forward_matches; |
57 | | int full_word; |
58 | | int xor_key; |
59 | | |
60 | | } CALLBACK_ARGS; |
61 | | |
62 | | static int _yr_scan_xor_compare( |
63 | | const uint8_t* data, |
64 | | size_t data_size, |
65 | | uint8_t* string, |
66 | | size_t string_length, |
67 | | uint8_t* xor_key) |
68 | 0 | { |
69 | 0 | int result = 0; |
70 | 0 | const uint8_t* s1 = data; |
71 | 0 | const uint8_t* s2 = string; |
72 | 0 | uint8_t k = 0; |
73 | |
|
74 | 0 | size_t i = 0; |
75 | |
|
76 | 0 | if (data_size < string_length) |
77 | 0 | goto _exit; |
78 | | |
79 | | // Calculate the xor key to compare with. *s1 is the start of the string we |
80 | | // matched on and *s2 is the "plaintext" string, so *s1 ^ *s2 is the key to |
81 | | // every *s2 as we compare. |
82 | 0 | k = *s1 ^ *s2; |
83 | |
|
84 | 0 | while (i < string_length && *s1++ == ((*s2++) ^ k)) i++; |
85 | |
|
86 | 0 | result = (int) ((i == string_length) ? i : 0); |
87 | |
|
88 | 0 | _exit:; |
89 | |
|
90 | 0 | YR_DEBUG_FPRINTF( |
91 | 0 | 2, |
92 | 0 | stderr, |
93 | 0 | "- %s(data_size=%zu string_length=%zu) {} = %d\n", |
94 | 0 | __FUNCTION__, |
95 | 0 | data_size, |
96 | 0 | string_length, |
97 | 0 | result); |
98 | |
|
99 | 0 | if (result > 0) |
100 | 0 | *xor_key = k; |
101 | |
|
102 | 0 | return result; |
103 | 0 | } |
104 | | |
105 | | static int _yr_scan_xor_wcompare( |
106 | | const uint8_t* data, |
107 | | size_t data_size, |
108 | | uint8_t* string, |
109 | | size_t string_length, |
110 | | uint8_t* xor_key) |
111 | 0 | { |
112 | 0 | int result = 0; |
113 | 0 | const uint8_t* s1 = data; |
114 | 0 | const uint8_t* s2 = string; |
115 | 0 | uint8_t k = 0; |
116 | |
|
117 | 0 | size_t i = 0; |
118 | |
|
119 | 0 | if (data_size < string_length * 2) |
120 | 0 | return 0; |
121 | | |
122 | | // Calculate the xor key to compare with. *s1 is the start of the string we |
123 | | // matched on and *s2 is the "plaintext" string, so *s1 ^ *s2 is the key to |
124 | | // every *s2 as we compare. |
125 | 0 | k = *s1 ^ *s2; |
126 | |
|
127 | 0 | while (i < string_length && *s1 == ((*s2) ^ k) && ((*(s1 + 1)) ^ k) == 0x00) |
128 | 0 | { |
129 | 0 | s1 += 2; |
130 | 0 | s2++; |
131 | 0 | i++; |
132 | 0 | } |
133 | |
|
134 | 0 | result = (int) ((i == string_length) ? i * 2 : 0); |
135 | |
|
136 | 0 | if (result > 0) |
137 | 0 | *xor_key = k; |
138 | |
|
139 | 0 | return result; |
140 | 0 | } |
141 | | |
142 | | static int _yr_scan_compare( |
143 | | const uint8_t* data, |
144 | | size_t data_size, |
145 | | uint8_t* string, |
146 | | size_t string_length) |
147 | 0 | { |
148 | 0 | const uint8_t* s1 = data; |
149 | 0 | const uint8_t* s2 = string; |
150 | |
|
151 | 0 | size_t i = 0; |
152 | |
|
153 | 0 | if (data_size < string_length) |
154 | 0 | return 0; |
155 | | |
156 | 0 | while (i < string_length && *s1++ == *s2++) i++; |
157 | |
|
158 | 0 | return (int) ((i == string_length) ? i : 0); |
159 | 0 | } |
160 | | |
161 | | static int _yr_scan_icompare( |
162 | | const uint8_t* data, |
163 | | size_t data_size, |
164 | | uint8_t* string, |
165 | | size_t string_length) |
166 | 0 | { |
167 | 0 | const uint8_t* s1 = data; |
168 | 0 | const uint8_t* s2 = string; |
169 | |
|
170 | 0 | size_t i = 0; |
171 | |
|
172 | 0 | if (data_size < string_length) |
173 | 0 | return 0; |
174 | | |
175 | 0 | while (i < string_length && yr_lowercase[*s1++] == yr_lowercase[*s2++]) i++; |
176 | |
|
177 | 0 | return (int) ((i == string_length) ? i : 0); |
178 | 0 | } |
179 | | |
180 | | static int _yr_scan_wcompare( |
181 | | const uint8_t* data, |
182 | | size_t data_size, |
183 | | uint8_t* string, |
184 | | size_t string_length) |
185 | 0 | { |
186 | 0 | int result = 0; |
187 | 0 | const uint8_t* s1 = data; |
188 | 0 | const uint8_t* s2 = string; |
189 | |
|
190 | 0 | size_t i = 0; |
191 | |
|
192 | 0 | if (data_size < string_length * 2) |
193 | 0 | goto _exit; |
194 | | |
195 | 0 | while (i < string_length && *s1 == *s2 && *(s1 + 1) == 0x00) |
196 | 0 | { |
197 | 0 | s1 += 2; |
198 | 0 | s2++; |
199 | 0 | i++; |
200 | 0 | } |
201 | |
|
202 | 0 | result = (int) ((i == string_length) ? i * 2 : 0); |
203 | |
|
204 | 0 | _exit:; |
205 | |
|
206 | 0 | YR_DEBUG_FPRINTF( |
207 | 0 | 2, |
208 | 0 | stderr, |
209 | 0 | "- %s(data_size=%zu string_length=%zu) {} = %d\n", |
210 | 0 | __FUNCTION__, |
211 | 0 | data_size, |
212 | 0 | string_length, |
213 | 0 | result); |
214 | |
|
215 | 0 | return result; |
216 | 0 | } |
217 | | |
218 | | static int _yr_scan_wicompare( |
219 | | const uint8_t* data, |
220 | | size_t data_size, |
221 | | uint8_t* string, |
222 | | size_t string_length) |
223 | 0 | { |
224 | 0 | int result = 0; |
225 | 0 | const uint8_t* s1 = data; |
226 | 0 | const uint8_t* s2 = string; |
227 | |
|
228 | 0 | size_t i = 0; |
229 | |
|
230 | 0 | if (data_size < string_length * 2) |
231 | 0 | goto _exit; |
232 | | |
233 | 0 | while (i < string_length && yr_lowercase[*s1] == yr_lowercase[*s2] && |
234 | 0 | *(s1 + 1) == 0x00) |
235 | 0 | { |
236 | 0 | s1 += 2; |
237 | 0 | s2++; |
238 | 0 | i++; |
239 | 0 | } |
240 | |
|
241 | 0 | result = (int) ((i == string_length) ? i * 2 : 0); |
242 | |
|
243 | 0 | _exit:; |
244 | |
|
245 | 0 | YR_DEBUG_FPRINTF( |
246 | 0 | 2, |
247 | 0 | stderr, |
248 | 0 | "- %s(data_size=%zu string_length=%zu) {} = %d\n", |
249 | 0 | __FUNCTION__, |
250 | 0 | data_size, |
251 | 0 | string_length, |
252 | 0 | result); |
253 | |
|
254 | 0 | return result; |
255 | 0 | } |
256 | | |
257 | | static void _yr_scan_update_match_chain_length( |
258 | | YR_SCAN_CONTEXT* context, |
259 | | YR_STRING* string, |
260 | | YR_MATCH* match_to_update, |
261 | | int chain_length) |
262 | 0 | { |
263 | 0 | YR_MATCH* match; |
264 | |
|
265 | 0 | if (match_to_update->chain_length == chain_length) |
266 | 0 | return; |
267 | | |
268 | 0 | match_to_update->chain_length = chain_length; |
269 | |
|
270 | 0 | if (string->chained_to == NULL) |
271 | 0 | return; |
272 | | |
273 | 0 | match = context->unconfirmed_matches[string->chained_to->idx].head; |
274 | |
|
275 | 0 | while (match != NULL) |
276 | 0 | { |
277 | 0 | int64_t ending_offset = match->offset + match->match_length; |
278 | |
|
279 | 0 | if (ending_offset + string->chain_gap_max >= match_to_update->offset && |
280 | 0 | ending_offset + string->chain_gap_min <= match_to_update->offset) |
281 | 0 | { |
282 | 0 | _yr_scan_update_match_chain_length( |
283 | 0 | context, string->chained_to, match, chain_length + 1); |
284 | 0 | } |
285 | |
|
286 | 0 | match = match->next; |
287 | 0 | } |
288 | 0 | } |
289 | | |
290 | | static int _yr_scan_add_match_to_list( |
291 | | YR_MATCH* match, |
292 | | YR_MATCHES* matches_list, |
293 | | int replace_if_exists) |
294 | 0 | { |
295 | 0 | int result = ERROR_SUCCESS; |
296 | |
|
297 | | #if YR_DEBUG_VERBOSITY > 0 |
298 | | int32_t count_orig = matches_list->count; |
299 | | #endif |
300 | |
|
301 | 0 | YR_MATCH* insertion_point = matches_list->tail; |
302 | |
|
303 | 0 | if (matches_list->count == YR_MAX_STRING_MATCHES) |
304 | 0 | { |
305 | 0 | result = ERROR_TOO_MANY_MATCHES; |
306 | 0 | goto _exit; |
307 | 0 | } |
308 | | |
309 | 0 | while (insertion_point != NULL) |
310 | 0 | { |
311 | 0 | if ((match->base + match->offset) == |
312 | 0 | (insertion_point->base + insertion_point->offset)) |
313 | 0 | { |
314 | 0 | if (replace_if_exists) |
315 | 0 | { |
316 | 0 | insertion_point->match_length = match->match_length; |
317 | 0 | insertion_point->data_length = match->data_length; |
318 | 0 | insertion_point->data = match->data; |
319 | 0 | } |
320 | |
|
321 | 0 | goto _exit; // return ERROR_SUCCESS |
322 | 0 | } |
323 | | |
324 | 0 | if ((match->base + match->offset) > |
325 | 0 | (insertion_point->base + insertion_point->offset)) |
326 | 0 | break; |
327 | | |
328 | 0 | insertion_point = insertion_point->prev; |
329 | 0 | } |
330 | | |
331 | 0 | match->prev = insertion_point; |
332 | |
|
333 | 0 | if (insertion_point != NULL) |
334 | 0 | { |
335 | 0 | match->next = insertion_point->next; |
336 | 0 | insertion_point->next = match; |
337 | 0 | } |
338 | 0 | else |
339 | 0 | { |
340 | 0 | match->next = matches_list->head; |
341 | 0 | matches_list->head = match; |
342 | 0 | } |
343 | |
|
344 | 0 | matches_list->count++; |
345 | |
|
346 | 0 | if (match->next != NULL) |
347 | 0 | match->next->prev = match; |
348 | 0 | else |
349 | 0 | matches_list->tail = match; |
350 | |
|
351 | 0 | _exit:; |
352 | |
|
353 | 0 | YR_DEBUG_FPRINTF( |
354 | 0 | 2, |
355 | 0 | stderr, |
356 | 0 | "- %s(replace_if_exists=%d) {} = %d //" |
357 | 0 | " match->base=0x%" PRIx64 " match->offset=%" PRIi64 |
358 | 0 | " matches_list->count=%u += %u\n", |
359 | 0 | __FUNCTION__, |
360 | 0 | replace_if_exists, |
361 | 0 | result, |
362 | 0 | match->base, |
363 | 0 | match->offset, |
364 | 0 | count_orig, |
365 | 0 | matches_list->count - count_orig); |
366 | |
|
367 | 0 | return result; |
368 | 0 | } |
369 | | |
370 | | static void _yr_scan_remove_match_from_list( |
371 | | YR_MATCH* match, |
372 | | YR_MATCHES* matches_list) |
373 | 0 | { |
374 | 0 | if (match->prev != NULL) |
375 | 0 | match->prev->next = match->next; |
376 | |
|
377 | 0 | if (match->next != NULL) |
378 | 0 | match->next->prev = match->prev; |
379 | |
|
380 | 0 | if (matches_list->head == match) |
381 | 0 | matches_list->head = match->next; |
382 | |
|
383 | 0 | if (matches_list->tail == match) |
384 | 0 | matches_list->tail = match->prev; |
385 | |
|
386 | 0 | matches_list->count--; |
387 | 0 | match->next = NULL; |
388 | 0 | match->prev = NULL; |
389 | 0 | } |
390 | | |
391 | | // |
392 | | // _yr_scan_verify_chained_string_match |
393 | | // |
394 | | // Given a string that is part of a string chain and is matching at some |
395 | | // point in the scanned data, this function determines if the whole string |
396 | | // chain is also matching. For example, if the string S was splitted and |
397 | | // converted in a chain S1 <- S2 <- S3 (see yr_re_ast_split_at_chaining_point), |
398 | | // and a match for S3 was found, this functions finds out if there are matches |
399 | | // for S1 and S2 that together with the match found for S3 conform a match for |
400 | | // the whole S. |
401 | | // |
402 | | // Notice that this function operates in a non-greedy fashion. Matches found |
403 | | // for S will be the shortest possible ones. |
404 | | // |
405 | | |
406 | | static int _yr_scan_verify_chained_string_match( |
407 | | YR_STRING* matching_string, |
408 | | YR_SCAN_CONTEXT* context, |
409 | | const uint8_t* match_data, |
410 | | uint64_t match_base, |
411 | | uint64_t match_offset, |
412 | | int32_t match_length, |
413 | | uint8_t xor_key) |
414 | 0 | { |
415 | 0 | YR_DEBUG_FPRINTF( |
416 | 0 | 2, |
417 | 0 | stderr, |
418 | 0 | "- %s (match_data=%p match_base=%" PRIx64 " match_offset=0x%" PRIx64 |
419 | 0 | " match_length=%'d) {} \n", |
420 | 0 | __FUNCTION__, |
421 | 0 | match_data, |
422 | 0 | match_base, |
423 | 0 | match_offset, |
424 | 0 | match_length); |
425 | |
|
426 | 0 | YR_STRING* string; |
427 | 0 | YR_MATCH* match; |
428 | 0 | YR_MATCH* next_match; |
429 | 0 | YR_MATCH* new_match; |
430 | |
|
431 | 0 | uint64_t lowest_offset; |
432 | 0 | uint64_t ending_offset; |
433 | 0 | int32_t full_chain_length; |
434 | |
|
435 | 0 | bool add_match = false; |
436 | |
|
437 | 0 | if (matching_string->chained_to == NULL) |
438 | 0 | { |
439 | | // The matching string is the head of the chain, this match should be |
440 | | // added to the list of unconfirmed matches. The match will remain |
441 | | // unconfirmed until all the strings in the chain are found with the |
442 | | // correct distances between them. |
443 | 0 | add_match = true; |
444 | 0 | } |
445 | 0 | else |
446 | 0 | { |
447 | | // If some unconfirmed match exists, the lowest possible offset where the |
448 | | // whole string chain can match is the offset of the first string in the |
449 | | // list of unconfirmed matches. Unconfirmed matches are sorted in ascending |
450 | | // offset order. If no unconfirmed match exists, the lowest possible offset |
451 | | // is the offset of the current match. |
452 | 0 | match = context->unconfirmed_matches[matching_string->idx].head; |
453 | |
|
454 | 0 | if (match != NULL) |
455 | 0 | lowest_offset = match->offset; |
456 | 0 | else |
457 | 0 | lowest_offset = match_offset; |
458 | | |
459 | | // Iterate over the list of unconfirmed matches for the string that |
460 | | // precedes the currently matching string. If we have a string chain like: |
461 | | // S1 <- S2 <- S3, and we just found a match for S2, we are iterating the |
462 | | // list of unconfirmed matches of S1. |
463 | 0 | match = context->unconfirmed_matches[matching_string->chained_to->idx].head; |
464 | |
|
465 | 0 | while (match != NULL) |
466 | 0 | { |
467 | | // Store match->next so that we can use it later for advancing in the |
468 | | // list, if _yr_scan_remove_match_from_list is called, match->next is |
469 | | // set to NULL, that's why we store its current value before that happens. |
470 | 0 | next_match = match->next; |
471 | | |
472 | | // The unconfirmed match starts at match->offset and finishes at |
473 | | // ending_offset. |
474 | 0 | ending_offset = match->offset + match->match_length; |
475 | |
|
476 | 0 | if (ending_offset + matching_string->chain_gap_max < lowest_offset) |
477 | 0 | { |
478 | | // If the current match is too far away from the unconfirmed match, |
479 | | // remove the unconfirmed match from the list because it has been |
480 | | // negatively confirmed (i.e: we can be sure that this unconfirmed |
481 | | // match can't be an actual match) |
482 | 0 | _yr_scan_remove_match_from_list( |
483 | 0 | match, |
484 | 0 | &context->unconfirmed_matches[matching_string->chained_to->idx]); |
485 | 0 | } |
486 | 0 | else if ( |
487 | 0 | ending_offset + matching_string->chain_gap_max >= match_offset && |
488 | 0 | ending_offset + matching_string->chain_gap_min <= match_offset) |
489 | 0 | { |
490 | | // If the distance between the end of the unconfirmed match and the |
491 | | // start of the current match is within the range specified in the |
492 | | // regexp or hex string, this could be an actual match. |
493 | 0 | add_match = true; |
494 | 0 | break; |
495 | 0 | } |
496 | | |
497 | 0 | match = next_match; |
498 | 0 | } |
499 | 0 | } |
500 | |
|
501 | 0 | if (add_match) |
502 | 0 | { |
503 | 0 | uint32_t max_match_data; |
504 | |
|
505 | 0 | FAIL_ON_ERROR( |
506 | 0 | yr_get_configuration_uint32(YR_CONFIG_MAX_MATCH_DATA, &max_match_data)) |
507 | | |
508 | 0 | if (STRING_IS_CHAIN_TAIL(matching_string)) |
509 | 0 | { |
510 | | // The matching string is the tail of the string chain. It must be |
511 | | // chained to some other string. |
512 | 0 | assert(matching_string->chained_to != NULL); |
513 | | |
514 | | // Iterate over the list of unconfirmed matches of the preceding string |
515 | | // in the chain and update the chain_length field for each of them. This |
516 | | // is a recursive operation that will update the chain_length field for |
517 | | // every unconfirmed match in all the strings in the chain up to the head |
518 | | // of the chain. |
519 | 0 | match = |
520 | 0 | context->unconfirmed_matches[matching_string->chained_to->idx].head; |
521 | |
|
522 | 0 | while (match != NULL) |
523 | 0 | { |
524 | 0 | ending_offset = match->offset + match->match_length; |
525 | |
|
526 | 0 | if (ending_offset + matching_string->chain_gap_max >= match_offset && |
527 | 0 | ending_offset + matching_string->chain_gap_min <= match_offset) |
528 | 0 | { |
529 | 0 | _yr_scan_update_match_chain_length( |
530 | 0 | context, matching_string->chained_to, match, 1); |
531 | 0 | } |
532 | |
|
533 | 0 | match = match->next; |
534 | 0 | } |
535 | |
|
536 | 0 | full_chain_length = 0; |
537 | 0 | string = matching_string; |
538 | |
|
539 | 0 | while (string->chained_to != NULL) |
540 | 0 | { |
541 | 0 | full_chain_length++; |
542 | 0 | string = string->chained_to; |
543 | 0 | } |
544 | | |
545 | | // "string" points now to the head of the strings chain. |
546 | 0 | match = context->unconfirmed_matches[string->idx].head; |
547 | | |
548 | | // Iterate over the list of unconfirmed matches of the head of the chain, |
549 | | // and move to the list of confirmed matches those with a chain_length |
550 | | // equal to full_chain_length, which means that the whole chain has been |
551 | | // confirmed to match. |
552 | 0 | while (match != NULL) |
553 | 0 | { |
554 | 0 | next_match = match->next; |
555 | |
|
556 | 0 | if (match->chain_length == full_chain_length) |
557 | 0 | { |
558 | 0 | _yr_scan_remove_match_from_list( |
559 | 0 | match, &context->unconfirmed_matches[string->idx]); |
560 | |
|
561 | 0 | match->match_length = |
562 | 0 | (int32_t) (match_offset - match->offset + match_length); |
563 | |
|
564 | 0 | match->data_length = yr_min( |
565 | 0 | match->match_length, (int32_t) max_match_data); |
566 | |
|
567 | 0 | match->data = yr_notebook_alloc( |
568 | 0 | context->matches_notebook, match->data_length); |
569 | |
|
570 | 0 | if (match->data == NULL) |
571 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
572 | | |
573 | 0 | memcpy( |
574 | 0 | (void*) match->data, |
575 | 0 | match_data - match_offset + match->offset, |
576 | 0 | match->data_length); |
577 | |
|
578 | 0 | FAIL_ON_ERROR(_yr_scan_add_match_to_list( |
579 | 0 | match, &context->matches[string->idx], false)); |
580 | 0 | } |
581 | | |
582 | 0 | match = next_match; |
583 | 0 | } |
584 | 0 | } |
585 | 0 | else // It's a part of a chain, but not the tail. |
586 | 0 | { |
587 | 0 | new_match = yr_notebook_alloc( |
588 | 0 | context->matches_notebook, sizeof(YR_MATCH)); |
589 | |
|
590 | 0 | if (new_match == NULL) |
591 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
592 | | |
593 | 0 | new_match->base = match_base; |
594 | 0 | new_match->offset = match_offset; |
595 | 0 | new_match->match_length = match_length; |
596 | 0 | new_match->chain_length = 0; |
597 | 0 | new_match->prev = NULL; |
598 | 0 | new_match->next = NULL; |
599 | 0 | new_match->is_private = STRING_IS_PRIVATE(matching_string); |
600 | 0 | new_match->xor_key = xor_key; |
601 | | |
602 | | // A copy of the matching data is written to the matches_arena, the |
603 | | // amount of data copies is limited by YR_CONFIG_MAX_MATCH_DATA. |
604 | 0 | new_match->data_length = yr_min(match_length, (int32_t) max_match_data); |
605 | |
|
606 | 0 | if (new_match->data_length > 0) |
607 | 0 | { |
608 | 0 | new_match->data = yr_notebook_alloc( |
609 | 0 | context->matches_notebook, new_match->data_length); |
610 | |
|
611 | 0 | if (new_match->data == NULL) |
612 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
613 | | |
614 | 0 | memcpy((void*) new_match->data, match_data, new_match->data_length); |
615 | 0 | } |
616 | 0 | else |
617 | 0 | { |
618 | 0 | new_match->data = NULL; |
619 | 0 | } |
620 | | |
621 | | // Add the match to the list of unconfirmed matches because the string |
622 | | // is part of a chain but not its tail, so we can't be sure the this is |
623 | | // an actual match until finding the remaining parts of the chain. |
624 | 0 | FAIL_ON_ERROR(_yr_scan_add_match_to_list( |
625 | 0 | new_match, |
626 | 0 | &context->unconfirmed_matches[matching_string->idx], |
627 | 0 | false)); |
628 | 0 | } |
629 | 0 | } |
630 | | |
631 | 0 | return ERROR_SUCCESS; |
632 | 0 | } |
633 | | |
634 | | static int _yr_scan_match_callback( |
635 | | const uint8_t* match_data, |
636 | | int32_t match_length, |
637 | | int flags, |
638 | | void* args) |
639 | 0 | { |
640 | 0 | CALLBACK_ARGS* callback_args = (CALLBACK_ARGS*) args; |
641 | |
|
642 | 0 | YR_STRING* string = callback_args->string; |
643 | 0 | YR_MATCH* new_match; |
644 | |
|
645 | 0 | int result = ERROR_SUCCESS; |
646 | |
|
647 | 0 | size_t match_offset = match_data - callback_args->data; |
648 | |
|
649 | 0 | YR_DEBUG_FPRINTF( |
650 | 0 | 2, |
651 | 0 | stderr, |
652 | 0 | "+ %s(match_data=%p match_length=%d) { //" |
653 | 0 | " match_offset=%zu args->data=%p args->string.length=%u" |
654 | 0 | " args->data_base=0x%" PRIx64 " args->data_size=%zu" |
655 | 0 | " args->forward_matches=%'u\n", |
656 | 0 | __FUNCTION__, |
657 | 0 | match_data, |
658 | 0 | match_length, |
659 | 0 | match_offset, |
660 | 0 | callback_args->data, |
661 | 0 | callback_args->string->length, |
662 | 0 | callback_args->data_base, |
663 | 0 | callback_args->data_size, |
664 | 0 | callback_args->forward_matches); |
665 | | |
666 | | // total match length is the sum of backward and forward matches. |
667 | 0 | match_length += callback_args->forward_matches; |
668 | | |
669 | | // make sure that match fits into the data. |
670 | 0 | assert(match_offset + match_length <= callback_args->data_size); |
671 | | |
672 | 0 | if (callback_args->full_word) |
673 | 0 | { |
674 | 0 | if (flags & RE_FLAGS_WIDE) |
675 | 0 | { |
676 | 0 | if (match_offset >= 2 && *(match_data - 1) == 0 && |
677 | 0 | yr_isalnum(match_data - 2)) |
678 | 0 | goto _exit; // return ERROR_SUCCESS; |
679 | | |
680 | 0 | if (match_offset + match_length + 1 < callback_args->data_size && |
681 | 0 | *(match_data + match_length + 1) == 0 && |
682 | 0 | yr_isalnum(match_data + match_length)) |
683 | 0 | goto _exit; // return ERROR_SUCCESS; |
684 | 0 | } |
685 | 0 | else |
686 | 0 | { |
687 | 0 | if (match_offset >= 1 && yr_isalnum(match_data - 1)) |
688 | 0 | goto _exit; // return ERROR_SUCCESS; |
689 | | |
690 | 0 | if (match_offset + match_length < callback_args->data_size && |
691 | 0 | yr_isalnum(match_data + match_length)) |
692 | 0 | goto _exit; // return ERROR_SUCCESS; |
693 | 0 | } |
694 | 0 | } |
695 | | |
696 | 0 | if (STRING_IS_CHAIN_PART(string)) |
697 | 0 | { |
698 | 0 | result = _yr_scan_verify_chained_string_match( |
699 | 0 | string, |
700 | 0 | callback_args->context, |
701 | 0 | match_data, |
702 | 0 | callback_args->data_base, |
703 | 0 | match_offset, |
704 | 0 | match_length, |
705 | 0 | callback_args->xor_key); |
706 | 0 | } |
707 | 0 | else |
708 | 0 | { |
709 | 0 | uint32_t max_match_data; |
710 | |
|
711 | 0 | FAIL_ON_ERROR( |
712 | 0 | yr_get_configuration_uint32(YR_CONFIG_MAX_MATCH_DATA, &max_match_data)); |
713 | |
|
714 | 0 | new_match = yr_notebook_alloc( |
715 | 0 | callback_args->context->matches_notebook, sizeof(YR_MATCH)); |
716 | |
|
717 | 0 | if (new_match == NULL) |
718 | 0 | { |
719 | 0 | result = ERROR_INSUFFICIENT_MEMORY; |
720 | 0 | goto _exit; |
721 | 0 | } |
722 | | |
723 | 0 | new_match->data_length = yr_min(match_length, (int32_t) max_match_data); |
724 | |
|
725 | 0 | if (new_match->data_length > 0) |
726 | 0 | { |
727 | 0 | new_match->data = yr_notebook_alloc( |
728 | 0 | callback_args->context->matches_notebook, new_match->data_length); |
729 | |
|
730 | 0 | if (new_match->data == NULL) |
731 | 0 | { |
732 | 0 | result = ERROR_INSUFFICIENT_MEMORY; |
733 | 0 | goto _exit; |
734 | 0 | } |
735 | | |
736 | 0 | memcpy((void*) new_match->data, match_data, new_match->data_length); |
737 | 0 | } |
738 | 0 | else |
739 | 0 | { |
740 | 0 | new_match->data = NULL; |
741 | 0 | } |
742 | | |
743 | 0 | if (result == ERROR_SUCCESS) |
744 | 0 | { |
745 | 0 | new_match->base = callback_args->data_base; |
746 | 0 | new_match->offset = match_offset; |
747 | 0 | new_match->match_length = match_length; |
748 | 0 | new_match->prev = NULL; |
749 | 0 | new_match->next = NULL; |
750 | 0 | new_match->is_private = STRING_IS_PRIVATE(string); |
751 | 0 | new_match->xor_key = callback_args->xor_key; |
752 | |
|
753 | 0 | FAIL_ON_ERROR(_yr_scan_add_match_to_list( |
754 | 0 | new_match, |
755 | 0 | &callback_args->context->matches[string->idx], |
756 | 0 | STRING_IS_GREEDY_REGEXP(string))); |
757 | 0 | } |
758 | 0 | } |
759 | | |
760 | 0 | _exit:; |
761 | |
|
762 | 0 | YR_DEBUG_FPRINTF(2, stderr, "} = %d // %s()\n", result, __FUNCTION__); |
763 | |
|
764 | 0 | return result; |
765 | 0 | } |
766 | | |
767 | | typedef int (*RE_EXEC_FUNC)( |
768 | | YR_SCAN_CONTEXT* context, |
769 | | const uint8_t* code, |
770 | | const uint8_t* input, |
771 | | size_t input_forwards_size, |
772 | | size_t input_backwards_size, |
773 | | int flags, |
774 | | RE_MATCH_CALLBACK_FUNC callback, |
775 | | void* callback_args, |
776 | | int* matches); |
777 | | |
778 | | static int _yr_scan_verify_re_match( |
779 | | YR_SCAN_CONTEXT* context, |
780 | | YR_AC_MATCH* ac_match, |
781 | | const uint8_t* data, |
782 | | size_t data_size, |
783 | | uint64_t data_base, |
784 | | size_t offset) |
785 | 0 | { |
786 | 0 | YR_DEBUG_FPRINTF( |
787 | 0 | 2, |
788 | 0 | stderr, |
789 | 0 | "- %s(data=%p data_size=%zu data_base=0x%" PRIx64 " offset=%zu) {}\n", |
790 | 0 | __FUNCTION__, |
791 | 0 | data, |
792 | 0 | data_size, |
793 | 0 | data_base, |
794 | 0 | offset); |
795 | |
|
796 | 0 | CALLBACK_ARGS callback_args; |
797 | 0 | RE_EXEC_FUNC exec; |
798 | |
|
799 | 0 | int forward_matches = -1; |
800 | 0 | int backward_matches = -1; |
801 | 0 | int flags = 0; |
802 | |
|
803 | 0 | if (STRING_IS_GREEDY_REGEXP(ac_match->string)) |
804 | 0 | flags |= RE_FLAGS_GREEDY; |
805 | |
|
806 | 0 | if (STRING_IS_NO_CASE(ac_match->string)) |
807 | 0 | flags |= RE_FLAGS_NO_CASE; |
808 | |
|
809 | 0 | if (STRING_IS_DOT_ALL(ac_match->string)) |
810 | 0 | flags |= RE_FLAGS_DOT_ALL; |
811 | |
|
812 | 0 | if (STRING_IS_FAST_REGEXP(ac_match->string)) |
813 | 0 | exec = yr_re_fast_exec; |
814 | 0 | else |
815 | 0 | exec = yr_re_exec; |
816 | |
|
817 | 0 | if (STRING_IS_ASCII(ac_match->string) || STRING_IS_BASE64(ac_match->string) || |
818 | 0 | STRING_IS_BASE64_WIDE(ac_match->string)) |
819 | 0 | { |
820 | 0 | FAIL_ON_ERROR(exec( |
821 | 0 | context, |
822 | 0 | ac_match->forward_code, |
823 | 0 | data + offset, |
824 | 0 | data_size - offset, |
825 | 0 | offset, |
826 | 0 | flags, |
827 | 0 | NULL, |
828 | 0 | NULL, |
829 | 0 | &forward_matches)); |
830 | 0 | } |
831 | | |
832 | 0 | if ((forward_matches == -1) && (STRING_IS_WIDE(ac_match->string) && |
833 | 0 | !(STRING_IS_BASE64(ac_match->string) || |
834 | 0 | STRING_IS_BASE64_WIDE(ac_match->string)))) |
835 | 0 | { |
836 | 0 | flags |= RE_FLAGS_WIDE; |
837 | 0 | FAIL_ON_ERROR(exec( |
838 | 0 | context, |
839 | 0 | ac_match->forward_code, |
840 | 0 | data + offset, |
841 | 0 | data_size - offset, |
842 | 0 | offset, |
843 | 0 | flags, |
844 | 0 | NULL, |
845 | 0 | NULL, |
846 | 0 | &forward_matches)); |
847 | 0 | } |
848 | | |
849 | 0 | if (forward_matches == -1) |
850 | 0 | return ERROR_SUCCESS; |
851 | | |
852 | 0 | if (forward_matches == 0 && ac_match->backward_code == NULL) |
853 | 0 | return ERROR_SUCCESS; |
854 | | |
855 | 0 | callback_args.string = ac_match->string; |
856 | 0 | callback_args.context = context; |
857 | 0 | callback_args.data = data; |
858 | 0 | callback_args.data_size = data_size; |
859 | 0 | callback_args.data_base = data_base; |
860 | 0 | callback_args.forward_matches = forward_matches; |
861 | 0 | callback_args.full_word = STRING_IS_FULL_WORD(ac_match->string); |
862 | | // xor modifier is not valid for RE but set it so we don't leak stack values. |
863 | 0 | callback_args.xor_key = 0; |
864 | |
|
865 | 0 | if (ac_match->backward_code != NULL) |
866 | 0 | { |
867 | 0 | FAIL_ON_ERROR(exec( |
868 | 0 | context, |
869 | 0 | ac_match->backward_code, |
870 | 0 | data + offset, |
871 | 0 | data_size - offset, |
872 | 0 | offset, |
873 | 0 | flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE, |
874 | 0 | _yr_scan_match_callback, |
875 | 0 | (void*) &callback_args, |
876 | 0 | &backward_matches)); |
877 | 0 | } |
878 | 0 | else |
879 | 0 | { |
880 | 0 | FAIL_ON_ERROR( |
881 | 0 | _yr_scan_match_callback(data + offset, 0, flags, &callback_args)); |
882 | 0 | } |
883 | | |
884 | 0 | return ERROR_SUCCESS; |
885 | 0 | } |
886 | | |
887 | | static int _yr_scan_verify_literal_match( |
888 | | YR_SCAN_CONTEXT* context, |
889 | | YR_AC_MATCH* ac_match, |
890 | | const uint8_t* data, |
891 | | size_t data_size, |
892 | | uint64_t data_base, |
893 | | size_t offset) |
894 | 0 | { |
895 | 0 | YR_DEBUG_FPRINTF( |
896 | 0 | 2, |
897 | 0 | stderr, |
898 | 0 | "- %s(data=%p data_size=%zu data_base=0x%" PRIx64 " offset=%zu) {}\n", |
899 | 0 | __FUNCTION__, |
900 | 0 | data, |
901 | 0 | data_size, |
902 | 0 | data_base, |
903 | 0 | offset); |
904 | |
|
905 | 0 | int flags = 0; |
906 | 0 | int forward_matches = 0; |
907 | 0 | uint8_t xor_key = 0; |
908 | |
|
909 | 0 | CALLBACK_ARGS callback_args; |
910 | 0 | YR_STRING* string = ac_match->string; |
911 | |
|
912 | 0 | if (STRING_FITS_IN_ATOM(string)) |
913 | 0 | { |
914 | 0 | forward_matches = ac_match->backtrack; |
915 | 0 | if (STRING_IS_XOR(string)) |
916 | 0 | { |
917 | 0 | if (STRING_IS_WIDE(string)) |
918 | 0 | { |
919 | 0 | _yr_scan_xor_wcompare( |
920 | 0 | data + offset, |
921 | 0 | data_size - offset, |
922 | 0 | string->string, |
923 | 0 | string->length, |
924 | 0 | &xor_key); |
925 | 0 | } |
926 | |
|
927 | 0 | if (STRING_IS_ASCII(string)) |
928 | 0 | { |
929 | 0 | _yr_scan_xor_compare( |
930 | 0 | data + offset, |
931 | 0 | data_size - offset, |
932 | 0 | string->string, |
933 | 0 | string->length, |
934 | 0 | &xor_key); |
935 | 0 | } |
936 | 0 | } |
937 | 0 | } |
938 | 0 | else if (STRING_IS_NO_CASE(string)) |
939 | 0 | { |
940 | 0 | if (STRING_IS_ASCII(string)) |
941 | 0 | { |
942 | 0 | forward_matches = _yr_scan_icompare( |
943 | 0 | data + offset, data_size - offset, string->string, string->length); |
944 | 0 | } |
945 | |
|
946 | 0 | if (STRING_IS_WIDE(string) && forward_matches == 0) |
947 | 0 | { |
948 | 0 | forward_matches = _yr_scan_wicompare( |
949 | 0 | data + offset, data_size - offset, string->string, string->length); |
950 | 0 | } |
951 | 0 | } |
952 | 0 | else |
953 | 0 | { |
954 | 0 | if (STRING_IS_ASCII(string)) |
955 | 0 | { |
956 | 0 | forward_matches = _yr_scan_compare( |
957 | 0 | data + offset, data_size - offset, string->string, string->length); |
958 | 0 | } |
959 | |
|
960 | 0 | if (STRING_IS_WIDE(string) && forward_matches == 0) |
961 | 0 | { |
962 | 0 | forward_matches = _yr_scan_wcompare( |
963 | 0 | data + offset, data_size - offset, string->string, string->length); |
964 | 0 | } |
965 | |
|
966 | 0 | if (STRING_IS_XOR(string) && forward_matches == 0) |
967 | 0 | { |
968 | 0 | if (STRING_IS_WIDE(string)) |
969 | 0 | { |
970 | 0 | forward_matches = _yr_scan_xor_wcompare( |
971 | 0 | data + offset, |
972 | 0 | data_size - offset, |
973 | 0 | string->string, |
974 | 0 | string->length, |
975 | 0 | &xor_key); |
976 | 0 | } |
977 | |
|
978 | 0 | if (forward_matches == 0) |
979 | 0 | { |
980 | 0 | forward_matches = _yr_scan_xor_compare( |
981 | 0 | data + offset, |
982 | 0 | data_size - offset, |
983 | 0 | string->string, |
984 | 0 | string->length, |
985 | 0 | &xor_key); |
986 | 0 | } |
987 | 0 | } |
988 | 0 | } |
989 | |
|
990 | 0 | if (forward_matches == 0) |
991 | 0 | return ERROR_SUCCESS; |
992 | | |
993 | 0 | if (forward_matches == string->length * 2) |
994 | 0 | flags |= RE_FLAGS_WIDE; |
995 | |
|
996 | 0 | if (STRING_IS_NO_CASE(string)) |
997 | 0 | flags |= RE_FLAGS_NO_CASE; |
998 | |
|
999 | 0 | callback_args.context = context; |
1000 | 0 | callback_args.string = string; |
1001 | 0 | callback_args.data = data; |
1002 | 0 | callback_args.data_size = data_size; |
1003 | 0 | callback_args.data_base = data_base; |
1004 | 0 | callback_args.forward_matches = forward_matches; |
1005 | 0 | callback_args.full_word = STRING_IS_FULL_WORD(string); |
1006 | 0 | callback_args.xor_key = xor_key; |
1007 | |
|
1008 | 0 | FAIL_ON_ERROR( |
1009 | 0 | _yr_scan_match_callback(data + offset, 0, flags, &callback_args)); |
1010 | |
|
1011 | 0 | return ERROR_SUCCESS; |
1012 | 0 | } |
1013 | | |
1014 | | int yr_scan_verify_match( |
1015 | | YR_SCAN_CONTEXT* context, |
1016 | | YR_AC_MATCH* ac_match, |
1017 | | const uint8_t* data, |
1018 | | size_t data_size, |
1019 | | uint64_t data_base, |
1020 | | size_t offset) |
1021 | 0 | { |
1022 | 0 | YR_DEBUG_FPRINTF( |
1023 | 0 | 2, |
1024 | 0 | stderr, |
1025 | 0 | "- %s(data=%p data_size=%zu data_base=0x%" PRIx64 " offset=%zu) {}\n", |
1026 | 0 | __FUNCTION__, |
1027 | 0 | data, |
1028 | 0 | data_size, |
1029 | 0 | data_base, |
1030 | 0 | offset); |
1031 | |
|
1032 | 0 | YR_STRING* string = ac_match->string; |
1033 | 0 | YR_CALLBACK_FUNC callback = context->callback; |
1034 | |
|
1035 | 0 | int result; |
1036 | |
|
1037 | 0 | if (data_size - offset <= 0) |
1038 | 0 | return ERROR_SUCCESS; |
1039 | | |
1040 | 0 | if (yr_bitmask_is_set(context->strings_temp_disabled, string->idx)) |
1041 | 0 | return ERROR_SUCCESS; |
1042 | | |
1043 | 0 | if (context->flags & SCAN_FLAGS_FAST_MODE && STRING_IS_SINGLE_MATCH(string) && |
1044 | 0 | context->matches[string->idx].head != NULL) |
1045 | 0 | return ERROR_SUCCESS; |
1046 | | |
1047 | 0 | if (STRING_IS_FIXED_OFFSET(string) && |
1048 | 0 | string->fixed_offset != data_base + offset) |
1049 | 0 | return ERROR_SUCCESS; |
1050 | | |
1051 | | #ifdef YR_PROFILING_ENABLED |
1052 | | uint64_t start_time; |
1053 | | bool sample = context->profiling_info[string->rule_idx].atom_matches % |
1054 | | YR_MATCH_VERIFICATION_PROFILING_RATE == |
1055 | | 0; |
1056 | | |
1057 | | if (sample) |
1058 | | start_time = yr_stopwatch_elapsed_ns(&context->stopwatch); |
1059 | | #endif |
1060 | | |
1061 | 0 | if (STRING_IS_LITERAL(string)) |
1062 | 0 | { |
1063 | 0 | result = _yr_scan_verify_literal_match( |
1064 | 0 | context, ac_match, data, data_size, data_base, offset); |
1065 | 0 | } |
1066 | 0 | else |
1067 | 0 | { |
1068 | 0 | result = _yr_scan_verify_re_match( |
1069 | 0 | context, ac_match, data, data_size, data_base, offset); |
1070 | 0 | } |
1071 | | |
1072 | | // If _yr_scan_verify_literal_match or _yr_scan_verify_re_match return |
1073 | | // ERROR_TOO_MANY_MATCHES call the callback with CALLBACK_MSG_TOO_MANY_MATCHES |
1074 | | // in order to ask what to do. If the callback returns CALLBACK_CONTINUE |
1075 | | // this error is ignored, if not, the error is propagated to the caller. |
1076 | 0 | if (result == ERROR_TOO_MANY_MATCHES) |
1077 | 0 | { |
1078 | 0 | result = callback( |
1079 | 0 | context, |
1080 | 0 | CALLBACK_MSG_TOO_MANY_MATCHES, |
1081 | 0 | (void*) string, |
1082 | 0 | context->user_data); |
1083 | |
|
1084 | 0 | switch (result) |
1085 | 0 | { |
1086 | 0 | case CALLBACK_CONTINUE: |
1087 | 0 | yr_bitmask_set(context->strings_temp_disabled, string->idx); |
1088 | 0 | result = ERROR_SUCCESS; |
1089 | 0 | break; |
1090 | | |
1091 | 0 | default: |
1092 | 0 | result = ERROR_TOO_MANY_MATCHES; |
1093 | 0 | break; |
1094 | 0 | } |
1095 | 0 | } |
1096 | | |
1097 | | #ifdef YR_PROFILING_ENABLED |
1098 | | if (sample) |
1099 | | { |
1100 | | uint64_t finish_time = yr_stopwatch_elapsed_ns(&context->stopwatch); |
1101 | | context->profiling_info[string->rule_idx].match_time += |
1102 | | (finish_time - start_time); |
1103 | | } |
1104 | | context->profiling_info[string->rule_idx].atom_matches++; |
1105 | | #endif |
1106 | | |
1107 | 0 | if (result != ERROR_SUCCESS) |
1108 | 0 | context->last_error_string = string; |
1109 | |
|
1110 | 0 | return result; |
1111 | 0 | } |