/src/yara/libyara/atoms.c
Line | Count | Source |
1 | | /* |
2 | | Copyright (c) 2013-2018. The YARA Authors. All Rights Reserved. |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without modification, |
5 | | are permitted provided that the following conditions are met: |
6 | | |
7 | | 1. Redistributions of source code must retain the above copyright notice, this |
8 | | list of conditions and the following disclaimer. |
9 | | |
10 | | 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | this list of conditions and the following disclaimer in the documentation and/or |
12 | | other materials provided with the distribution. |
13 | | |
14 | | 3. Neither the name of the copyright holder nor the names of its contributors |
15 | | may be used to endorse or promote products derived from this software without |
16 | | specific prior written permission. |
17 | | |
18 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
19 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR |
22 | | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
23 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
24 | | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
25 | | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
27 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | /* |
31 | | |
32 | | This module handles atom extraction from regexps and hex strings. Atoms are |
33 | | undivided substrings found in a regexps and hex strings. Let's consider this |
34 | | hex string: |
35 | | |
36 | | { 01 02 03 04 05 ?? 06 07 08 [1-2] 09 0A } |
37 | | |
38 | | In the above string, byte sequences 0102030405, 060708 and 090A are atoms. |
39 | | Similarly, in this regexp: |
40 | | |
41 | | /abc.*ed[0-9]+fgh/ |
42 | | |
43 | | The strings "abc", "ed" and "fgh" are atoms. |
44 | | |
45 | | When searching for regexps/hex strings matching a file, YARA uses these |
46 | | atoms to find locations inside the file where the regexp/hex string could |
47 | | match. If the atom "abc" is found somewhere inside the file, there is a chance |
48 | | for /abc.*ed[0-9]+fgh/ to match the file, if "abc" doesn't appear in the file |
49 | | there's no chance for the regexp to match. When the atom is found in the file |
50 | | YARA proceeds to fully evaluate the regexp/hex string to determine if it's |
51 | | actually a match. |
52 | | |
53 | | For each regexp/hex string YARA extracts one or more atoms. Sometimes a |
54 | | single atom is enough (like in the previous example "abc" is enough for finding |
55 | | /abc.*ed[0-9]+fgh/), but sometimes a single atom isn't enough like in the |
56 | | regexp /(abc|efg)/. In this case YARA must search for both "abc" AND "efg" and |
57 | | fully evaluate the regexp whenever one of these atoms is found. |
58 | | |
59 | | In the regexp /Look(at|into)this/ YARA can search for "Look", or search for |
60 | | "this", or search for both "at" and "into". This is what we call an atoms tree, |
61 | | because it can be represented by the following tree structure: |
62 | | |
63 | | -OR |
64 | | |- "Look" |
65 | | | |
66 | | |- AND |
67 | | | | |
68 | | | |- "at" |
69 | | | - "into" |
70 | | | |
71 | | - "this" |
72 | | |
73 | | From an atom tree YARA chooses the best combination, trying to minimize the |
74 | | number of required atoms, but also using high quality atoms (long atoms with |
75 | | not too many zeroes and a bit of byte diversity). In the previous example YARA |
76 | | will end up using the "Look" atom alone, but in /a(bcd|efg)h/ atoms "bcd" and |
77 | | "efg" will be used because "a" and "h" are too short. |
78 | | */ |
79 | | |
80 | | #include <assert.h> |
81 | | #include <string.h> |
82 | | #include <yara/atoms.h> |
83 | | #include <yara/error.h> |
84 | | #include <yara/globals.h> |
85 | | #include <yara/limits.h> |
86 | | #include <yara/mem.h> |
87 | | #include <yara/stack.h> |
88 | | #include <yara/types.h> |
89 | | #include <yara/utils.h> |
90 | | |
91 | | //////////////////////////////////////////////////////////////////////////////// |
92 | | // Returns a numeric value indicating the quality of an atom. The quality |
93 | | // depends on some characteristics of the atom, including its length, number |
94 | | // of very common bytes like 00 and FF and number of unique distinct bytes. |
95 | | // Atom 00 00 has a very low quality, because it's only two bytes long and |
96 | | // both bytes are zeroes. Atom 01 01 01 01 is better but still not optimal, |
97 | | // because the same byte is repeated. Atom 01 02 03 04 is an optimal one. |
98 | | // |
99 | | // Args: |
100 | | // config: Pointer to YR_ATOMS_CONFIG struct. |
101 | | // atom: Pointer to YR_ATOM struct. |
102 | | // |
103 | | // Returns: |
104 | | // An integer indicating the atom's quality |
105 | | // |
106 | | int yr_atoms_heuristic_quality(YR_ATOMS_CONFIG* config, YR_ATOM* atom) |
107 | 8.06M | { |
108 | 8.06M | YR_BITMASK seen_bytes[YR_BITMASK_SIZE(256)]; |
109 | | |
110 | 8.06M | int quality = 0; |
111 | 8.06M | int unique_bytes = 0; |
112 | | |
113 | 8.06M | assert(atom->length <= YR_MAX_ATOM_LENGTH); |
114 | | |
115 | 8.06M | yr_bitmask_clear_all(seen_bytes); |
116 | | |
117 | | // Each byte in the atom contributes a certain amount of points to the |
118 | | // quality. Bytes [a-zA-Z] contribute 18 points each, common bytes like |
119 | | // 0x00, 0x20 and 0xFF contribute only 12 points, and the rest of the |
120 | | // bytes contribute 20 points. The ?? mask substracts 10 points, and masks |
121 | | // X? and ?X contribute 4 points. An additional boost consisting in 2x the |
122 | | // number of unique bytes in the atom is added to the quality. This are |
123 | | // some examples of the quality of atoms: |
124 | | // |
125 | | // 01 02 03 04 quality = 20 + 20 + 20 + 20 + 8 = 88 |
126 | | // 01 ?? 03 04 quality = 20 - 10 + 20 + 20 + 6 = 56 |
127 | | // 01 0? 03 quality = 20 + 4 + 20 + 4 = 48 |
128 | | // 01 02 quality = 20 + 20 + 4 = 44 |
129 | | // 01 ?? ?3 04 quality = 20 - 10 + 2 + 20 + 4 = 36 |
130 | | // 61 62 quality = 18 + 18 + 4 = 40 |
131 | | // 61 61 quality = 18 + 18 + 2 = 38 <- warning threshold |
132 | | // 00 01 quality = 12 + 20 + 4 = 36 |
133 | | // 01 ?? 02 quality = 20 - 10 + 20 + 4 = 34 |
134 | | // 01 quality = 20 + 1 = 21 |
135 | | |
136 | 39.4M | for (int i = 0; i < atom->length; i++) |
137 | 31.4M | { |
138 | 31.4M | switch (atom->mask[i]) |
139 | 31.4M | { |
140 | 19.5k | case 0x00: |
141 | 19.5k | quality -= 10; |
142 | 19.5k | break; |
143 | 938 | case 0x0F: |
144 | 938 | quality += 4; |
145 | 938 | break; |
146 | 1.66k | case 0xF0: |
147 | 1.66k | quality += 4; |
148 | 1.66k | break; |
149 | 31.3M | case 0xFF: |
150 | 31.3M | switch (atom->bytes[i]) |
151 | 31.3M | { |
152 | 2.07M | case 0x00: |
153 | 2.84M | case 0x20: |
154 | 2.84M | case 0xCC: |
155 | 7.94M | case 0xFF: |
156 | | // Common bytes contribute less to the quality than the rest. |
157 | 7.94M | quality += 12; |
158 | 7.94M | break; |
159 | 23.4M | default: |
160 | | // Bytes in the a-z and A-Z ranges have a slightly lower quality |
161 | | // than the rest. We want to favor atoms that contain bytes outside |
162 | | // those ranges because they generate less additional atoms during |
163 | | // calls to _yr_atoms_case_combinations. |
164 | 23.4M | if (yr_lowercase[atom->bytes[i]] >= 'a' && |
165 | 17.4M | yr_lowercase[atom->bytes[i]] <= 'z') |
166 | 16.0M | quality += 18; |
167 | 7.42M | else |
168 | 7.42M | quality += 20; |
169 | 31.3M | } |
170 | | |
171 | 31.3M | if (!yr_bitmask_is_set(seen_bytes, atom->bytes[i])) |
172 | 24.5M | { |
173 | 24.5M | yr_bitmask_set(seen_bytes, atom->bytes[i]); |
174 | 24.5M | unique_bytes++; |
175 | 24.5M | } |
176 | 31.4M | } |
177 | 31.4M | } |
178 | | |
179 | | // If all the bytes in the atom are equal and very common, let's penalize |
180 | | // it heavily. |
181 | 8.06M | if (unique_bytes == 1 && (yr_bitmask_is_set(seen_bytes, 0x00) || |
182 | 1.39M | yr_bitmask_is_set(seen_bytes, 0x20) || |
183 | 1.38M | yr_bitmask_is_set(seen_bytes, 0x90) || |
184 | 1.38M | yr_bitmask_is_set(seen_bytes, 0xCC) || |
185 | 1.38M | yr_bitmask_is_set(seen_bytes, 0xFF))) |
186 | 989k | { |
187 | 989k | quality -= 10 * atom->length; |
188 | 989k | } |
189 | | // In general atoms with more unique bytes have a better quality, so let's |
190 | | // boost the quality in the amount of unique bytes. |
191 | 7.07M | else |
192 | 7.07M | { |
193 | 7.07M | quality += 2 * unique_bytes; |
194 | 7.07M | } |
195 | | |
196 | | // The final quality is not zero-based, we start at YR_MAX_ATOM_QUALITY |
197 | | // for the best possible atom and substract from there. The best possible |
198 | | // quality is 21 * YR_MAX_ATOM_LENGTH (20 points per byte + 2 additional point |
199 | | // per unique byte, with a maximum of 2*YR_MAX_ATOM_LENGTH unique bytes). |
200 | | |
201 | 8.06M | return YR_MAX_ATOM_QUALITY - 22 * YR_MAX_ATOM_LENGTH + quality; |
202 | 8.06M | } |
203 | | |
204 | | //////////////////////////////////////////////////////////////////////////////// |
205 | | // Compares the byte sequence in a1 with the YR_ATOM in a2, taking atom's mask |
206 | | // into account. |
207 | | // |
208 | | // Returns: |
209 | | // < 0 if the first byte that does not match has a lower value in a1 than |
210 | | // in a2. |
211 | | // > 0 if the first byte that does not match has a greater value in a1 than |
212 | | // in a2. |
213 | | // = 0 if a1 is equal or matches a2. |
214 | | // |
215 | | static int _yr_atoms_cmp(const uint8_t* a1, YR_ATOM* a2) |
216 | 0 | { |
217 | 0 | int result = 0; |
218 | 0 | int i = 0; |
219 | |
|
220 | 0 | while (result == 0 && i < a2->length) |
221 | 0 | { |
222 | 0 | switch (a2->mask[i]) |
223 | 0 | { |
224 | 0 | case 0xFF: |
225 | 0 | case 0x0F: |
226 | 0 | case 0xF0: |
227 | 0 | case 0x00: |
228 | 0 | result = (a1[i] & a2->mask[i]) - a2->bytes[i]; |
229 | 0 | break; |
230 | 0 | default: |
231 | 0 | assert(false); |
232 | 0 | } |
233 | | |
234 | 0 | i++; |
235 | 0 | } |
236 | | |
237 | 0 | return result; |
238 | 0 | } |
239 | | |
240 | | //////////////////////////////////////////////////////////////////////////////// |
241 | | // Returns a numeric value indicating the quality of an atom. The quality is |
242 | | // based in the atom quality table passed in "config". Very common atoms |
243 | | // (i.e: those with greater quality) have lower quality than those that are |
244 | | // uncommon. See the comment for yr_compiler_set_atom_quality_table for |
245 | | // details about the quality table's format. |
246 | | // |
247 | | // Args: |
248 | | // YR_ATOMS_CONFIG* config - Pointer to YR_ATOMS_CONFIG struct. |
249 | | // YR_ATOM* atom - Pointer to YR_ATOM struct. |
250 | | // |
251 | | // Returns: |
252 | | // An integer indicating the atom's quality |
253 | | // |
254 | | int yr_atoms_table_quality(YR_ATOMS_CONFIG* config, YR_ATOM* atom) |
255 | 0 | { |
256 | 0 | YR_ATOM_QUALITY_TABLE_ENTRY* table = config->quality_table; |
257 | |
|
258 | 0 | int begin = 0; |
259 | 0 | int end = config->quality_table_entries; |
260 | |
|
261 | 0 | assert(atom->length <= YR_MAX_ATOM_LENGTH); |
262 | |
|
263 | 0 | while (end > begin) |
264 | 0 | { |
265 | 0 | int middle = begin + (end - begin) / 2; |
266 | 0 | int c = _yr_atoms_cmp(table[middle].atom, atom); |
267 | |
|
268 | 0 | if (c < 0) |
269 | 0 | { |
270 | 0 | begin = middle + 1; |
271 | 0 | } |
272 | 0 | else if (c > 0) |
273 | 0 | { |
274 | 0 | end = middle; |
275 | 0 | } |
276 | 0 | else |
277 | 0 | { |
278 | 0 | int i = middle + 1; |
279 | 0 | int quality = table[middle].quality; |
280 | 0 | int min_quality = quality; |
281 | |
|
282 | 0 | while (i < end && _yr_atoms_cmp(table[i].atom, atom) == 0) |
283 | 0 | { |
284 | 0 | if (min_quality > table[i].quality) |
285 | 0 | min_quality = table[i].quality; |
286 | |
|
287 | 0 | i++; |
288 | 0 | } |
289 | |
|
290 | 0 | i = middle - 1; |
291 | |
|
292 | 0 | while (i >= begin && _yr_atoms_cmp(table[i].atom, atom) == 0) |
293 | 0 | { |
294 | 0 | if (min_quality > table[i].quality) |
295 | 0 | min_quality = table[i].quality; |
296 | |
|
297 | 0 | i--; |
298 | 0 | } |
299 | |
|
300 | 0 | return min_quality >> (YR_MAX_ATOM_LENGTH - atom->length); |
301 | 0 | } |
302 | 0 | } |
303 | | |
304 | 0 | return YR_MAX_ATOM_QUALITY; |
305 | 0 | } |
306 | | |
307 | | //////////////////////////////////////////////////////////////////////////////// |
308 | | // Returns the quality for the worst quality atom in a list. |
309 | | // |
310 | | int yr_atoms_min_quality(YR_ATOMS_CONFIG* config, YR_ATOM_LIST_ITEM* atom_list) |
311 | 0 | { |
312 | 0 | YR_ATOM_LIST_ITEM* atom; |
313 | |
|
314 | 0 | int quality; |
315 | 0 | int min_quality = YR_MAX_ATOM_QUALITY; |
316 | |
|
317 | 0 | if (atom_list == NULL) |
318 | 0 | return YR_MIN_ATOM_QUALITY; |
319 | | |
320 | 0 | atom = atom_list; |
321 | |
|
322 | 0 | while (atom != NULL) |
323 | 0 | { |
324 | 0 | quality = config->get_atom_quality(config, &atom->atom); |
325 | |
|
326 | 0 | if (quality < min_quality) |
327 | 0 | min_quality = quality; |
328 | |
|
329 | 0 | atom = atom->next; |
330 | 0 | } |
331 | |
|
332 | 0 | return min_quality; |
333 | 0 | } |
334 | | |
335 | | //////////////////////////////////////////////////////////////////////////////// |
336 | | // Creates a new node for an atoms tree. |
337 | | // |
338 | | static YR_ATOM_TREE_NODE* _yr_atoms_tree_node_create(uint8_t type) |
339 | 620k | { |
340 | 620k | YR_ATOM_TREE_NODE* new_node = (YR_ATOM_TREE_NODE*) yr_malloc( |
341 | 620k | sizeof(YR_ATOM_TREE_NODE)); |
342 | | |
343 | 620k | if (new_node != NULL) |
344 | 620k | { |
345 | 620k | new_node->type = type; |
346 | 620k | new_node->atom.length = 0; |
347 | 620k | new_node->next_sibling = NULL; |
348 | 620k | new_node->children_head = NULL; |
349 | 620k | new_node->children_tail = NULL; |
350 | 620k | } |
351 | | |
352 | 620k | return new_node; |
353 | 620k | } |
354 | | |
355 | | //////////////////////////////////////////////////////////////////////////////// |
356 | | // Destroys a node from an atoms tree. |
357 | | // |
358 | | static void _yr_atoms_tree_node_destroy(YR_ATOM_TREE_NODE* node) |
359 | 620k | { |
360 | 620k | YR_ATOM_TREE_NODE* child; |
361 | 620k | YR_ATOM_TREE_NODE* next_child; |
362 | | |
363 | 620k | if (node == NULL) |
364 | 0 | return; |
365 | | |
366 | 620k | if (node->type == ATOM_TREE_OR || node->type == ATOM_TREE_AND) |
367 | 33.6k | { |
368 | 33.6k | child = node->children_head; |
369 | | |
370 | 647k | while (child != NULL) |
371 | 613k | { |
372 | 613k | next_child = child->next_sibling; |
373 | 613k | _yr_atoms_tree_node_destroy(child); |
374 | 613k | child = next_child; |
375 | 613k | } |
376 | 33.6k | } |
377 | | |
378 | 620k | yr_free(node); |
379 | 620k | } |
380 | | |
381 | | //////////////////////////////////////////////////////////////////////////////// |
382 | | // Appends a new child node to another atoms tree node. |
383 | | // |
384 | | static void _yr_atoms_tree_node_append( |
385 | | YR_ATOM_TREE_NODE* dest, |
386 | | YR_ATOM_TREE_NODE* node) |
387 | 595k | { |
388 | 595k | if (dest->children_head == NULL) |
389 | 23.6k | dest->children_head = node; |
390 | | |
391 | 595k | if (dest->children_tail != NULL) |
392 | 572k | dest->children_tail->next_sibling = node; |
393 | | |
394 | 595k | dest->children_tail = node; |
395 | 595k | } |
396 | | |
397 | | //////////////////////////////////////////////////////////////////////////////// |
398 | | // Destroys an atoms tree. |
399 | | // |
400 | | static void _yr_atoms_tree_destroy(YR_ATOM_TREE* atom_tree) |
401 | 7.00k | { |
402 | 7.00k | _yr_atoms_tree_node_destroy(atom_tree->root_node); |
403 | 7.00k | yr_free(atom_tree); |
404 | 7.00k | } |
405 | | |
406 | | //////////////////////////////////////////////////////////////////////////////// |
407 | | // Destroys an atoms list. |
408 | | // |
409 | | void yr_atoms_list_destroy(YR_ATOM_LIST_ITEM* list_head) |
410 | 55.9k | { |
411 | 55.9k | YR_ATOM_LIST_ITEM* item = list_head; |
412 | 55.9k | YR_ATOM_LIST_ITEM* next; |
413 | | |
414 | 1.81M | while (item != NULL) |
415 | 1.75M | { |
416 | 1.75M | next = item->next; |
417 | 1.75M | yr_free(item); |
418 | 1.75M | item = next; |
419 | 1.75M | } |
420 | 55.9k | } |
421 | | |
422 | | //////////////////////////////////////////////////////////////////////////////// |
423 | | // Concats two atoms lists. |
424 | | // |
425 | | static YR_ATOM_LIST_ITEM* _yr_atoms_list_concat( |
426 | | YR_ATOM_LIST_ITEM* list1, |
427 | | YR_ATOM_LIST_ITEM* list2) |
428 | 8.64k | { |
429 | 8.64k | YR_ATOM_LIST_ITEM* item; |
430 | | |
431 | 8.64k | if (list1 == NULL) |
432 | 812 | return list2; |
433 | | |
434 | 7.83k | item = list1; |
435 | | |
436 | 209k | while (item->next != NULL) |
437 | 202k | { |
438 | 202k | item = item->next; |
439 | 202k | } |
440 | | |
441 | 7.83k | item->next = list2; |
442 | 7.83k | return list1; |
443 | 8.64k | } |
444 | | |
445 | | //////////////////////////////////////////////////////////////////////////////// |
446 | | // If the atom starts or ends with an unknown byte (mask == 0x00), trim |
447 | | // those bytes out of the atom. We don't want to expand an atom like |
448 | | // { ?? 01 02 } into { 00 01 02 }, { 01 01 02}, { 02 01 02} .. { FF 01 02} |
449 | | // in those cases it's better to simply have a shorter atom { 01 02 }. |
450 | | // |
451 | | // Args: |
452 | | // atom: Pointer to the YR_ATOM to be trimmed. |
453 | | // |
454 | | // Returns: |
455 | | // The number of bytes that were trimmed from the beginning of the atom. |
456 | | // |
457 | | int _yr_atoms_trim(YR_ATOM* atom) |
458 | 7.73M | { |
459 | 7.73M | int mask_00 = 0; |
460 | 7.73M | int mask_ff = 0; |
461 | | |
462 | 7.73M | int trim_left = 0; |
463 | | |
464 | 7.81M | while (trim_left < atom->length && atom->mask[trim_left] == 0) trim_left++; |
465 | | |
466 | 7.74M | while (atom->length > trim_left && atom->mask[atom->length - 1] == 0) |
467 | 9.28k | atom->length--; |
468 | | |
469 | 7.73M | atom->length -= trim_left; |
470 | | |
471 | 7.73M | if (atom->length == 0) |
472 | 76.0k | return 0; |
473 | | |
474 | | // The trimmed atom goes from trim_left to trim_left + atom->length and the |
475 | | // first and last byte in the atom are known (mask == 0xFF). Now count the |
476 | | // number of known and unknown bytes in the atom (mask == 0xFF and |
477 | | // mask == 0x00 respectively). |
478 | | |
479 | 38.0M | for (int i = 0; i < atom->length; i++) |
480 | 30.4M | { |
481 | 30.4M | if (atom->mask[trim_left + i] == 0xFF) |
482 | 30.3M | mask_ff++; |
483 | 23.1k | else if (atom->mask[trim_left + i] == 0x00) |
484 | 19.5k | mask_00++; |
485 | 30.4M | } |
486 | | |
487 | | // If the number of unknown bytes is >= than the number of known bytes |
488 | | // it doesn't make sense the to use this atom, so we use a single byte atom |
489 | | // containing the first known byte. If YR_MAX_ATOM_LENGTH == 4 this happens |
490 | | // only when the atom is like { XX ?? ?? YY }, so using the first known |
491 | | // byte is good enough. For larger values of YR_MAX_ATOM_LENGTH this is not |
492 | | // the most efficient solution, as better atoms could be choosen. For |
493 | | // example, in { XX ?? ?? ?? YY ZZ } the best atom is { YY ZZ } not { XX }. |
494 | | // But let's keep it like this for simplicity. |
495 | | |
496 | 7.65M | if (mask_00 >= mask_ff) |
497 | 1.72k | atom->length = 1; |
498 | | |
499 | 7.65M | if (trim_left == 0) |
500 | 7.64M | return 0; |
501 | | |
502 | | // Shift bytes and mask trim_left positions to the left. |
503 | | |
504 | 37.5k | for (int i = 0; i < YR_MAX_ATOM_LENGTH - trim_left; i++) |
505 | 28.1k | { |
506 | 28.1k | atom->bytes[i] = atom->bytes[trim_left + i]; |
507 | 28.1k | atom->mask[i] = atom->mask[trim_left + i]; |
508 | 28.1k | } |
509 | | |
510 | 9.40k | return trim_left; |
511 | 7.65M | } |
512 | | |
513 | | //////////////////////////////////////////////////////////////////////////////// |
514 | | // This function receives an atom tree and returns a list of atoms to be added |
515 | | // to the Aho-Corasick automaton. |
516 | | // |
517 | | static int _yr_atoms_choose( |
518 | | YR_ATOMS_CONFIG* config, |
519 | | YR_ATOM_TREE_NODE* node, |
520 | | YR_ATOM_LIST_ITEM** chosen_atoms, |
521 | | int* atoms_quality) |
522 | 50.5k | { |
523 | 50.5k | YR_ATOM_TREE_NODE* child; |
524 | 50.5k | YR_ATOM_LIST_ITEM* item; |
525 | 50.5k | YR_ATOM_LIST_ITEM* tail; |
526 | | |
527 | 50.5k | int shift, quality; |
528 | | |
529 | 50.5k | int max_quality = YR_MIN_ATOM_QUALITY; |
530 | 50.5k | int min_quality = YR_MAX_ATOM_QUALITY; |
531 | | |
532 | 50.5k | *chosen_atoms = NULL; |
533 | 50.5k | *atoms_quality = YR_MIN_ATOM_QUALITY; |
534 | | |
535 | 50.5k | switch (node->type) |
536 | 50.5k | { |
537 | 16.8k | case ATOM_TREE_LEAF: |
538 | | |
539 | 16.8k | item = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); |
540 | | |
541 | 16.8k | if (item == NULL) |
542 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
543 | | |
544 | 16.8k | memcpy(&item->atom, &node->atom, sizeof(YR_ATOM)); |
545 | | |
546 | 16.8k | shift = _yr_atoms_trim(&item->atom); |
547 | | |
548 | 16.8k | if (item->atom.length > 0) |
549 | 16.1k | { |
550 | 16.1k | item->forward_code_ref = node->re_nodes[shift]->forward_code_ref; |
551 | 16.1k | item->backward_code_ref = node->re_nodes[shift]->backward_code_ref; |
552 | 16.1k | item->backtrack = 0; |
553 | 16.1k | item->next = NULL; |
554 | | |
555 | 16.1k | *chosen_atoms = item; |
556 | 16.1k | *atoms_quality = config->get_atom_quality(config, &item->atom); |
557 | 16.1k | } |
558 | 722 | else |
559 | 722 | { |
560 | 722 | yr_free(item); |
561 | 722 | } |
562 | | |
563 | 16.8k | break; |
564 | | |
565 | 24.8k | case ATOM_TREE_OR: |
566 | | |
567 | | // The choosen nodes are those coming from the highest quality child. |
568 | | |
569 | 24.8k | child = node->children_head; |
570 | | |
571 | 50.0k | while (child != NULL) |
572 | 25.7k | { |
573 | 25.7k | FAIL_ON_ERROR(_yr_atoms_choose(config, child, &item, &quality)); |
574 | | |
575 | 25.7k | if (quality > max_quality) |
576 | 22.8k | { |
577 | 22.8k | max_quality = quality; |
578 | 22.8k | yr_atoms_list_destroy(*chosen_atoms); |
579 | 22.8k | *chosen_atoms = item; |
580 | 22.8k | } |
581 | 2.94k | else |
582 | 2.94k | { |
583 | 2.94k | yr_atoms_list_destroy(item); |
584 | 2.94k | } |
585 | | |
586 | 25.7k | if (max_quality == YR_MAX_ATOM_QUALITY) |
587 | 595 | break; |
588 | | |
589 | 25.2k | child = child->next_sibling; |
590 | 25.2k | } |
591 | | |
592 | 24.8k | *atoms_quality = max_quality; |
593 | 24.8k | break; |
594 | | |
595 | 8.89k | case ATOM_TREE_AND: |
596 | | |
597 | | // The choosen nodes are the concatenation of the the nodes choosen from |
598 | | // all the children. |
599 | | |
600 | 8.89k | child = node->children_head; |
601 | | |
602 | 26.6k | while (child != NULL) |
603 | 17.7k | { |
604 | 17.7k | FAIL_ON_ERROR(_yr_atoms_choose(config, child, &item, &quality)); |
605 | | |
606 | 17.7k | if (quality < min_quality) |
607 | 9.81k | min_quality = quality; |
608 | | |
609 | 17.7k | if (item != NULL) |
610 | 16.8k | { |
611 | 16.8k | tail = item; |
612 | 85.1k | while (tail->next != NULL) tail = tail->next; |
613 | | |
614 | 16.8k | tail->next = *chosen_atoms; |
615 | 16.8k | *chosen_atoms = item; |
616 | 16.8k | } |
617 | | |
618 | 17.7k | child = child->next_sibling; |
619 | 17.7k | } |
620 | | |
621 | 8.89k | *atoms_quality = min_quality; |
622 | 8.89k | break; |
623 | 50.5k | } |
624 | | |
625 | 50.5k | return ERROR_SUCCESS; |
626 | 50.5k | } |
627 | | |
628 | | //////////////////////////////////////////////////////////////////////////////// |
629 | | // Returns all combinations of lower and upper cases for a given atom. For |
630 | | // atom "abc" the output would be "abc" "abC" "aBC" and so on. Resulting |
631 | | // atoms are written into the output buffer in this format: |
632 | | // |
633 | | // [size of atom 1] [atom 1] ... [size of atom N] [atom N] [0] |
634 | | // |
635 | | // Notice the zero at the end to indicate where the output ends. |
636 | | // |
637 | | // The caller is responsible of providing a buffer large enough to hold the |
638 | | // returned atoms. |
639 | | // |
640 | | static uint8_t* _yr_atoms_case_combinations( |
641 | | uint8_t* atom, |
642 | | int atom_length, |
643 | | int atom_offset, |
644 | | uint8_t* output_buffer) |
645 | 1.71M | { |
646 | 1.71M | uint8_t c; |
647 | 1.71M | uint8_t* new_atom; |
648 | | |
649 | 1.71M | if (atom_offset + 1 < atom_length) |
650 | 968k | output_buffer = _yr_atoms_case_combinations( |
651 | 968k | atom, atom_length, atom_offset + 1, output_buffer); |
652 | | |
653 | 1.71M | c = atom[atom_offset]; |
654 | | |
655 | 1.71M | if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) |
656 | 1.20M | { |
657 | | // Write atom length. |
658 | 1.20M | *output_buffer = atom_length; |
659 | 1.20M | output_buffer++; |
660 | | |
661 | 1.20M | memcpy(output_buffer, atom, atom_length); |
662 | | |
663 | 1.20M | new_atom = output_buffer; |
664 | 1.20M | output_buffer += atom_length; |
665 | | |
666 | | // Swap character case. |
667 | 1.20M | if (c >= 'a' && c <= 'z') |
668 | 1.09M | new_atom[atom_offset] -= 32; |
669 | 110k | else |
670 | 110k | new_atom[atom_offset] += 32; |
671 | | |
672 | 1.20M | if (atom_offset + 1 < atom_length) |
673 | 558k | output_buffer = _yr_atoms_case_combinations( |
674 | 558k | new_atom, atom_length, atom_offset + 1, output_buffer); |
675 | 1.20M | } |
676 | | |
677 | 1.71M | if (atom_offset == 0) |
678 | 188k | *output_buffer = 0; |
679 | | |
680 | 1.71M | return output_buffer; |
681 | 1.71M | } |
682 | | |
683 | | // Size of buffer used in _yr_atoms_case_insensitive for storing the all |
684 | | // the possible combinations for an atom. Each atom has up to YR_MAX_ATOM_LENGTH |
685 | | // characters and each character has two possible values (upper and lower case). |
686 | | // That means 2 ^ YR_MAX_ATOM_LENGTH combinations for an atom, where each atom |
687 | | // occupies YR_MAX_ATOM_LENGTH + 1 bytes (the atom itself +1 byte for its |
688 | | // length). One extra bytes is allocated for the zero value indicating the end. |
689 | | |
690 | | #define CASE_COMBINATIONS_BUFFER_SIZE \ |
691 | | (1 << YR_MAX_ATOM_LENGTH) * (YR_MAX_ATOM_LENGTH + 1) + 1 |
692 | | |
693 | | //////////////////////////////////////////////////////////////////////////////// |
694 | | // For a given list of atoms returns another list of atoms |
695 | | // with every case combination. |
696 | | // |
697 | | static int _yr_atoms_case_insensitive( |
698 | | YR_ATOM_LIST_ITEM* atoms, |
699 | | YR_ATOM_LIST_ITEM** case_insensitive_atoms) |
700 | 6.88k | { |
701 | 6.88k | YR_ATOM_LIST_ITEM* atom; |
702 | 6.88k | YR_ATOM_LIST_ITEM* new_atom; |
703 | | |
704 | 6.88k | uint8_t buffer[CASE_COMBINATIONS_BUFFER_SIZE]; |
705 | 6.88k | uint8_t atom_length; |
706 | 6.88k | uint8_t* atoms_cursor; |
707 | | |
708 | 6.88k | int i; |
709 | | |
710 | 6.88k | *case_insensitive_atoms = NULL; |
711 | 6.88k | atom = atoms; |
712 | | |
713 | 195k | while (atom != NULL) |
714 | 188k | { |
715 | 188k | _yr_atoms_case_combinations(atom->atom.bytes, atom->atom.length, 0, buffer); |
716 | | |
717 | 188k | atoms_cursor = buffer; |
718 | 188k | atom_length = *atoms_cursor; |
719 | 188k | atoms_cursor++; |
720 | | |
721 | 1.39M | while (atom_length != 0) |
722 | 1.20M | { |
723 | 1.20M | new_atom = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); |
724 | | |
725 | 1.20M | if (new_atom == NULL) |
726 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
727 | | |
728 | 6.00M | for (i = 0; i < atom_length; i++) |
729 | 4.79M | { |
730 | 4.79M | new_atom->atom.bytes[i] = atoms_cursor[i]; |
731 | 4.79M | new_atom->atom.mask[i] = 0xFF; |
732 | 4.79M | } |
733 | | |
734 | 1.20M | new_atom->atom.length = atom_length; |
735 | 1.20M | new_atom->forward_code_ref = atom->forward_code_ref; |
736 | 1.20M | new_atom->backward_code_ref = atom->backward_code_ref; |
737 | 1.20M | new_atom->backtrack = atom->backtrack; |
738 | 1.20M | new_atom->next = *case_insensitive_atoms; |
739 | | |
740 | 1.20M | *case_insensitive_atoms = new_atom; |
741 | | |
742 | 1.20M | atoms_cursor += atom_length; |
743 | 1.20M | atom_length = *atoms_cursor; |
744 | 1.20M | atoms_cursor++; |
745 | 1.20M | } |
746 | | |
747 | 188k | atom = atom->next; |
748 | 188k | } |
749 | | |
750 | 6.88k | return ERROR_SUCCESS; |
751 | 6.88k | } |
752 | | |
753 | | //////////////////////////////////////////////////////////////////////////////// |
754 | | // For a given list of atoms returns another list after a single byte xor |
755 | | // has been applied to it. |
756 | | // |
757 | | static int _yr_atoms_xor( |
758 | | YR_ATOM_LIST_ITEM* atoms, |
759 | | uint8_t min, |
760 | | uint8_t max, |
761 | | YR_ATOM_LIST_ITEM** xor_atoms) |
762 | 857 | { |
763 | 857 | YR_ATOM_LIST_ITEM* atom; |
764 | 857 | YR_ATOM_LIST_ITEM* new_atom; |
765 | | |
766 | 857 | int i, j; |
767 | 857 | *xor_atoms = NULL; |
768 | 857 | atom = atoms; |
769 | | |
770 | 2.11k | while (atom != NULL) |
771 | 1.25k | { |
772 | 230k | for (j = min; j <= max; j++) |
773 | 229k | { |
774 | 229k | new_atom = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); |
775 | | |
776 | 229k | if (new_atom == NULL) |
777 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
778 | | |
779 | 892k | for (i = 0; i < atom->atom.length; i++) |
780 | 663k | { |
781 | 663k | new_atom->atom.bytes[i] = atom->atom.bytes[i] ^ j; |
782 | 663k | new_atom->atom.mask[i] = 0xFF; |
783 | 663k | } |
784 | | |
785 | 229k | new_atom->atom.length = yr_min(atom->atom.length, YR_MAX_ATOM_LENGTH); |
786 | 229k | new_atom->forward_code_ref = atom->forward_code_ref; |
787 | 229k | new_atom->backward_code_ref = atom->backward_code_ref; |
788 | 229k | new_atom->backtrack = atom->backtrack; |
789 | 229k | new_atom->next = *xor_atoms; |
790 | | |
791 | 229k | *xor_atoms = new_atom; |
792 | 229k | } |
793 | | |
794 | 1.25k | atom = atom->next; |
795 | 1.25k | } |
796 | 857 | return ERROR_SUCCESS; |
797 | 857 | } |
798 | | |
799 | | //////////////////////////////////////////////////////////////////////////////// |
800 | | // For a given list of atoms returns another list with the corresponding |
801 | | // wide atoms. Wide atoms are just the original atoms with interleaved zeroes, |
802 | | // for example: 01 02 -> 01 00 02 00 |
803 | | // |
804 | | static int _yr_atoms_wide( |
805 | | YR_ATOM_LIST_ITEM* atoms, |
806 | | YR_ATOM_LIST_ITEM** wide_atoms) |
807 | 3.29k | { |
808 | 3.29k | YR_ATOM_LIST_ITEM* atom; |
809 | 3.29k | YR_ATOM_LIST_ITEM* new_atom; |
810 | | |
811 | 3.29k | int i; |
812 | | |
813 | 3.29k | *wide_atoms = NULL; |
814 | 3.29k | atom = atoms; |
815 | | |
816 | 31.8k | while (atom != NULL) |
817 | 28.5k | { |
818 | 28.5k | new_atom = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); |
819 | | |
820 | 28.5k | if (new_atom == NULL) |
821 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
822 | | |
823 | 142k | for (i = 0; i < YR_MAX_ATOM_LENGTH; i++) |
824 | 114k | { |
825 | 114k | new_atom->atom.bytes[i] = 0; |
826 | 114k | new_atom->atom.mask[i] = 0xFF; |
827 | 114k | } |
828 | | |
829 | 83.4k | for (i = 0; i < atom->atom.length; i++) |
830 | 80.8k | { |
831 | 80.8k | if (i * 2 < YR_MAX_ATOM_LENGTH) |
832 | 54.9k | new_atom->atom.bytes[i * 2] = atom->atom.bytes[i]; |
833 | 25.8k | else |
834 | 25.8k | break; |
835 | 80.8k | } |
836 | | |
837 | 28.5k | new_atom->atom.length = yr_min(atom->atom.length * 2, YR_MAX_ATOM_LENGTH); |
838 | 28.5k | new_atom->forward_code_ref = atom->forward_code_ref; |
839 | 28.5k | new_atom->backward_code_ref = atom->backward_code_ref; |
840 | 28.5k | new_atom->backtrack = atom->backtrack * 2; |
841 | 28.5k | new_atom->next = *wide_atoms; |
842 | | |
843 | 28.5k | *wide_atoms = new_atom; |
844 | | |
845 | 28.5k | atom = atom->next; |
846 | 28.5k | } |
847 | | |
848 | 3.29k | return ERROR_SUCCESS; |
849 | 3.29k | } |
850 | | |
851 | | struct STACK_ITEM |
852 | | { |
853 | | RE_NODE* re_node; |
854 | | YR_ATOM_TREE_NODE* new_appending_node; |
855 | | }; |
856 | | |
857 | | #define make_atom_from_re_nodes(atom, nodes_length, nodes) \ |
858 | 7.71M | { \ |
859 | 7.71M | atom.length = nodes_length; \ |
860 | 38.1M | for (i = 0; i < atom.length; i++) \ |
861 | 30.4M | { \ |
862 | 30.4M | atom.bytes[i] = (uint8_t) (recent_re_nodes)[i]->value; \ |
863 | 30.4M | atom.mask[i] = (uint8_t) (recent_re_nodes)[i]->mask; \ |
864 | 30.4M | } \ |
865 | 7.71M | } |
866 | | |
867 | | //////////////////////////////////////////////////////////////////////////////// |
868 | | // Extract atoms from a regular expression. This is a helper function used by |
869 | | // yr_atoms_extract_from_re that receives the abstract syntax tree for a regexp |
870 | | // (or hex pattern) and builds an atom tree. The appending_node argument is a |
871 | | // pointer to the ATOM_TREE_OR node at the root of the atom tree. This function |
872 | | // creates the tree by appending new nodes to it. |
873 | | // |
874 | | static int _yr_atoms_extract_from_re( |
875 | | YR_ATOMS_CONFIG* config, |
876 | | RE_AST* re_ast, |
877 | | YR_ATOM_TREE_NODE* appending_node) |
878 | 7.00k | { |
879 | 7.00k | YR_STACK* stack; |
880 | 7.00k | RE_NODE* re_node; |
881 | | |
882 | 7.00k | YR_ATOM atom = {0}; |
883 | 7.00k | YR_ATOM best_atom = {0}; |
884 | | |
885 | 7.00k | struct STACK_ITEM si; |
886 | | |
887 | 7.00k | int i, shift; |
888 | 7.00k | int quality; |
889 | 7.00k | int best_quality = -1; |
890 | 7.00k | int n = 0; |
891 | | |
892 | 7.00k | YR_ATOM_TREE_NODE* and_node; |
893 | 7.00k | YR_ATOM_TREE_NODE* left_node; |
894 | 7.00k | YR_ATOM_TREE_NODE* right_node; |
895 | | |
896 | | // The RE_NODEs most recently visited that can conform an atom (ie: |
897 | | // RE_NODE_LITERAL, RE_NODE_MASKED_LITERAL and RE_NODE_ANY). The number of |
898 | | // items in this array is n. |
899 | 7.00k | RE_NODE* recent_re_nodes[YR_MAX_ATOM_LENGTH]; |
900 | | |
901 | | // The RE_NODEs corresponding to the best atom found so far for the current |
902 | | // appending node. |
903 | 7.00k | RE_NODE* best_atom_re_nodes[YR_MAX_ATOM_LENGTH]; |
904 | | |
905 | | // This holds the ATOM_TREE_OR node where leaves (ATOM_TREE_LEAF) are |
906 | | // currently being appended. |
907 | 7.00k | YR_ATOM_TREE_NODE* current_appending_node = NULL; |
908 | | |
909 | | // This holds the ATOM_TREE_LEAF node whose atom is currently being updated. |
910 | 7.00k | YR_ATOM_TREE_NODE* leaf = NULL; |
911 | | |
912 | 7.00k | FAIL_ON_ERROR(yr_stack_create(1024, sizeof(si), &stack)); |
913 | | |
914 | | // This first item pushed in the stack is the last one to be poped out, the |
915 | | // sole purpose of this item is forcing that any pending leaf is appended to |
916 | | // appending_node during the last iteration of the loop. |
917 | 7.00k | si.re_node = NULL; |
918 | 7.00k | si.new_appending_node = appending_node; |
919 | | |
920 | 7.00k | FAIL_ON_ERROR_WITH_CLEANUP( |
921 | 7.00k | yr_stack_push(stack, (void*) &si), yr_stack_destroy(stack)); |
922 | | |
923 | | // Start processing the root node. |
924 | 7.00k | si.re_node = re_ast->root_node; |
925 | | |
926 | | // Leaf nodes are initially appended to the node passed in the appending_node, |
927 | | // argument which is the root ATOM_TREE_OR node that is empty at this point. |
928 | 7.00k | si.new_appending_node = appending_node; |
929 | | |
930 | 7.00k | FAIL_ON_ERROR_WITH_CLEANUP( |
931 | 7.00k | yr_stack_push(stack, (void*) &si), yr_stack_destroy(stack)); |
932 | | |
933 | 17.0M | while (yr_stack_pop(stack, (void*) &si)) |
934 | 17.0M | { |
935 | | // Change the appending node if the item poped from the stack says so. |
936 | 17.0M | if (si.new_appending_node != NULL) |
937 | 924k | { |
938 | | // Before changing the appending node let's append any pending leaf to |
939 | | // the current appending node. |
940 | 924k | if (n > 0) |
941 | 586k | { |
942 | 586k | make_atom_from_re_nodes(atom, n, recent_re_nodes); |
943 | 586k | shift = _yr_atoms_trim(&atom); |
944 | 586k | quality = config->get_atom_quality(config, &atom); |
945 | | |
946 | 586k | FAIL_ON_NULL_WITH_CLEANUP( |
947 | 586k | leaf = _yr_atoms_tree_node_create(ATOM_TREE_LEAF), |
948 | 586k | yr_stack_destroy(stack)); |
949 | | |
950 | 586k | if (quality > best_quality) |
951 | 196k | { |
952 | 196k | memcpy(&leaf->atom, &atom, sizeof(atom)); |
953 | 196k | memcpy( |
954 | 196k | &leaf->re_nodes, |
955 | 196k | &recent_re_nodes[shift], |
956 | 196k | sizeof(recent_re_nodes) - shift * sizeof(recent_re_nodes[0])); |
957 | 196k | } |
958 | 390k | else |
959 | 390k | { |
960 | 390k | memcpy(&leaf->atom, &best_atom, sizeof(best_atom)); |
961 | 390k | memcpy( |
962 | 390k | &leaf->re_nodes, &best_atom_re_nodes, sizeof(best_atom_re_nodes)); |
963 | 390k | } |
964 | | |
965 | 586k | _yr_atoms_tree_node_append(current_appending_node, leaf); |
966 | 586k | n = 0; |
967 | 586k | } |
968 | | |
969 | 924k | current_appending_node = si.new_appending_node; |
970 | 924k | best_quality = -1; |
971 | 924k | } |
972 | | |
973 | 17.0M | if (si.re_node != NULL) |
974 | 16.1M | { |
975 | 16.1M | switch (si.re_node->type) |
976 | 16.1M | { |
977 | 14.5M | case RE_NODE_LITERAL: |
978 | 14.5M | case RE_NODE_MASKED_LITERAL: |
979 | 14.6M | case RE_NODE_ANY: |
980 | | |
981 | 14.6M | if (n < YR_MAX_ATOM_LENGTH) |
982 | 1.95M | { |
983 | 1.95M | recent_re_nodes[n] = si.re_node; |
984 | 1.95M | best_atom_re_nodes[n] = si.re_node; |
985 | 1.95M | best_atom.bytes[n] = (uint8_t) si.re_node->value; |
986 | 1.95M | best_atom.mask[n] = (uint8_t) si.re_node->mask; |
987 | 1.95M | best_atom.length = ++n; |
988 | 1.95M | } |
989 | 12.7M | else if (best_quality < YR_MAX_ATOM_QUALITY) |
990 | 7.12M | { |
991 | 7.12M | make_atom_from_re_nodes(atom, n, recent_re_nodes); |
992 | 7.12M | shift = _yr_atoms_trim(&atom); |
993 | 7.12M | quality = config->get_atom_quality(config, &atom); |
994 | | |
995 | 7.12M | if (quality > best_quality) |
996 | 1.09M | { |
997 | 5.46M | for (i = 0; i < atom.length; i++) |
998 | 4.36M | { |
999 | 4.36M | best_atom.bytes[i] = atom.bytes[i]; |
1000 | 4.36M | best_atom.mask[i] = atom.mask[i]; |
1001 | 4.36M | best_atom_re_nodes[i] = recent_re_nodes[i + shift]; |
1002 | 4.36M | } |
1003 | | |
1004 | 1.09M | best_atom.length = atom.length; |
1005 | 1.09M | best_quality = quality; |
1006 | 1.09M | } |
1007 | | |
1008 | 28.5M | for (i = 1; i < YR_MAX_ATOM_LENGTH; i++) |
1009 | 21.3M | recent_re_nodes[i - 1] = recent_re_nodes[i]; |
1010 | | |
1011 | 7.12M | recent_re_nodes[YR_MAX_ATOM_LENGTH - 1] = si.re_node; |
1012 | 7.12M | } |
1013 | | |
1014 | 14.6M | break; |
1015 | | |
1016 | 566k | case RE_NODE_CONCAT: |
1017 | | |
1018 | 566k | re_node = si.re_node->children_tail; |
1019 | | |
1020 | | // Push children right to left, they are poped left to right. |
1021 | 16.3M | while (re_node != NULL) |
1022 | 15.7M | { |
1023 | 15.7M | si.new_appending_node = NULL; |
1024 | 15.7M | si.re_node = re_node; |
1025 | | |
1026 | 15.7M | FAIL_ON_ERROR_WITH_CLEANUP( |
1027 | 15.7M | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1028 | | |
1029 | 15.7M | re_node = re_node->prev_sibling; |
1030 | 15.7M | } |
1031 | | |
1032 | 566k | break; |
1033 | | |
1034 | 566k | case RE_NODE_ALT: |
1035 | | |
1036 | | // Create ATOM_TREE_AND node with two ATOM_TREE_OR children nodes. |
1037 | 8.89k | and_node = _yr_atoms_tree_node_create(ATOM_TREE_AND); |
1038 | 8.89k | left_node = _yr_atoms_tree_node_create(ATOM_TREE_OR); |
1039 | 8.89k | right_node = _yr_atoms_tree_node_create(ATOM_TREE_OR); |
1040 | | |
1041 | 8.89k | if (and_node == NULL || left_node == NULL || right_node == NULL) |
1042 | 0 | { |
1043 | 0 | _yr_atoms_tree_node_destroy(and_node); |
1044 | 0 | _yr_atoms_tree_node_destroy(left_node); |
1045 | 0 | _yr_atoms_tree_node_destroy(right_node); |
1046 | |
|
1047 | 0 | yr_stack_destroy(stack); |
1048 | |
|
1049 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1050 | 0 | } |
1051 | | |
1052 | 8.89k | and_node->children_head = left_node; |
1053 | 8.89k | and_node->children_tail = right_node; |
1054 | 8.89k | left_node->next_sibling = right_node; |
1055 | | |
1056 | | // Add the ATOM_TREE_AND as children of the current node. |
1057 | 8.89k | _yr_atoms_tree_node_append(current_appending_node, and_node); |
1058 | | |
1059 | 8.89k | re_node = si.re_node; |
1060 | | |
1061 | 8.89k | si.new_appending_node = current_appending_node; |
1062 | 8.89k | si.re_node = NULL; |
1063 | | |
1064 | 8.89k | FAIL_ON_ERROR_WITH_CLEANUP( |
1065 | 8.89k | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1066 | | |
1067 | | // RE_NODE_ALT nodes has only two children, so children_head is the |
1068 | | // left one, and children_tail is right one. |
1069 | 8.89k | si.new_appending_node = right_node; |
1070 | 8.89k | si.re_node = re_node->children_tail; |
1071 | | |
1072 | 8.89k | FAIL_ON_ERROR_WITH_CLEANUP( |
1073 | 8.89k | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1074 | | |
1075 | 8.89k | si.new_appending_node = left_node; |
1076 | 8.89k | si.re_node = re_node->children_head; |
1077 | | |
1078 | 8.89k | FAIL_ON_ERROR_WITH_CLEANUP( |
1079 | 8.89k | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1080 | | |
1081 | 8.89k | break; |
1082 | | |
1083 | 532 | case RE_NODE_PLUS: |
1084 | | |
1085 | 532 | re_node = si.re_node; |
1086 | | |
1087 | 532 | si.new_appending_node = current_appending_node; |
1088 | 532 | si.re_node = NULL; |
1089 | | |
1090 | 532 | FAIL_ON_ERROR_WITH_CLEANUP( |
1091 | 532 | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1092 | | |
1093 | 532 | si.new_appending_node = NULL; |
1094 | | // RE_NODE_PLUS nodes has a single child, which is children_head. |
1095 | 532 | si.re_node = re_node->children_head; |
1096 | | |
1097 | 532 | FAIL_ON_ERROR_WITH_CLEANUP( |
1098 | 532 | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1099 | | |
1100 | 532 | break; |
1101 | | |
1102 | 106k | case RE_NODE_RANGE: |
1103 | | |
1104 | 106k | re_node = si.re_node; |
1105 | | |
1106 | 106k | si.new_appending_node = current_appending_node; |
1107 | 106k | si.re_node = NULL; |
1108 | | |
1109 | 106k | FAIL_ON_ERROR_WITH_CLEANUP( |
1110 | 106k | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1111 | | |
1112 | 106k | si.new_appending_node = NULL; |
1113 | | |
1114 | | // RE_NODE_RANGE nodes has a single child, which is children_head. |
1115 | 106k | si.re_node = re_node->children_head; |
1116 | | |
1117 | | // In a regexp like /a{10,20}/ the optimal atom is 'aaaa' (assuming |
1118 | | // that YR_MAX_ATOM_LENGTH = 4) because the 'a' character must appear |
1119 | | // at least 10 times in the matching string. Each call in the loop |
1120 | | // will append one 'a' to the atom, so YR_MAX_ATOM_LENGTH iterations |
1121 | | // are enough. |
1122 | | |
1123 | 434k | for (i = 0; i < yr_min(re_node->start, YR_MAX_ATOM_LENGTH); i++) |
1124 | 328k | { |
1125 | 328k | FAIL_ON_ERROR_WITH_CLEANUP( |
1126 | 328k | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1127 | 328k | } |
1128 | | |
1129 | 106k | break; |
1130 | | |
1131 | 106k | case RE_NODE_RANGE_ANY: |
1132 | 2.03k | case RE_NODE_STAR: |
1133 | 2.47k | case RE_NODE_CLASS: |
1134 | 2.61k | case RE_NODE_WORD_CHAR: |
1135 | 2.80k | case RE_NODE_NON_WORD_CHAR: |
1136 | 2.80k | case RE_NODE_SPACE: |
1137 | 2.95k | case RE_NODE_NON_SPACE: |
1138 | 3.13k | case RE_NODE_DIGIT: |
1139 | 3.13k | case RE_NODE_NON_DIGIT: |
1140 | 3.58k | case RE_NODE_EMPTY: |
1141 | 450k | case RE_NODE_ANCHOR_START: |
1142 | 775k | case RE_NODE_ANCHOR_END: |
1143 | 775k | case RE_NODE_WORD_BOUNDARY: |
1144 | 775k | case RE_NODE_NON_WORD_BOUNDARY: |
1145 | 776k | case RE_NODE_NOT_LITERAL: |
1146 | 776k | case RE_NODE_MASKED_NOT_LITERAL: |
1147 | | |
1148 | 776k | si.new_appending_node = current_appending_node; |
1149 | 776k | si.re_node = NULL; |
1150 | | |
1151 | 776k | FAIL_ON_ERROR_WITH_CLEANUP( |
1152 | 776k | yr_stack_push(stack, &si), yr_stack_destroy(stack)); |
1153 | | |
1154 | 776k | break; |
1155 | | |
1156 | 0 | default: |
1157 | 0 | assert(false); |
1158 | 16.1M | } |
1159 | 16.1M | } |
1160 | 17.0M | } |
1161 | | |
1162 | 7.00k | yr_stack_destroy(stack); |
1163 | | |
1164 | 7.00k | return ERROR_SUCCESS; |
1165 | 7.00k | } |
1166 | | |
1167 | | //////////////////////////////////////////////////////////////////////////////// |
1168 | | // Makes an exact copy of an YR_ATOM_LIST_ITEM. |
1169 | | // |
1170 | | static YR_ATOM_LIST_ITEM* _yr_atoms_clone_list_item(YR_ATOM_LIST_ITEM* item) |
1171 | 251k | { |
1172 | 251k | YR_ATOM_LIST_ITEM* clone = (YR_ATOM_LIST_ITEM*) yr_malloc( |
1173 | 251k | sizeof(YR_ATOM_LIST_ITEM)); |
1174 | | |
1175 | 251k | if (clone == NULL) |
1176 | 0 | return NULL; |
1177 | | |
1178 | 251k | memcpy(clone, item, sizeof(YR_ATOM_LIST_ITEM)); |
1179 | | |
1180 | 251k | return clone; |
1181 | 251k | } |
1182 | | |
1183 | | //////////////////////////////////////////////////////////////////////////////// |
1184 | | // Given list of atoms that may contain wildcards, replace those wildcarded |
1185 | | // atoms with a list of non-wildcarded atoms covering all the combinations |
1186 | | // allowed by the wildcarded atom. For example, the atom {01 ?2 03} will be |
1187 | | // replaced by {01 02 03}, {01 12 03}, {01 22 03} .. {01 F2 03}. The list |
1188 | | // is modified in-place. |
1189 | | // |
1190 | | // Args: |
1191 | | // atoms: Pointer to first element of the list. |
1192 | | // |
1193 | | // Returns: |
1194 | | // ERROR_SUCCESS or ERROR_INSUFFICIENT_MEMORY. |
1195 | | // |
1196 | | static int _yr_atoms_expand_wildcards(YR_ATOM_LIST_ITEM* atoms) |
1197 | 7.00k | { |
1198 | 7.00k | int i; |
1199 | | |
1200 | 7.00k | YR_ATOM_LIST_ITEM* atom = atoms; |
1201 | 7.00k | YR_ATOM_LIST_ITEM* new_atom; |
1202 | 7.00k | YR_ATOM_LIST_ITEM* prev_atom; |
1203 | 7.00k | YR_ATOM_LIST_ITEM* next_atom; |
1204 | | |
1205 | 276k | while (atom != NULL) |
1206 | 269k | { |
1207 | 269k | bool expanded = false; |
1208 | | |
1209 | 1.27M | for (i = 0; i < atom->atom.length; i++) |
1210 | 1.00M | { |
1211 | 1.00M | uint16_t a, s, e, incr = 1; |
1212 | | |
1213 | 1.00M | switch (atom->atom.mask[i]) |
1214 | 1.00M | { |
1215 | 823 | case 0x00: |
1216 | 823 | expanded = true; |
1217 | 823 | s = 0x00; |
1218 | 823 | e = 0xFF; |
1219 | 823 | break; |
1220 | | |
1221 | 926 | case 0x0F: |
1222 | 926 | expanded = true; |
1223 | 926 | s = atom->atom.bytes[i]; |
1224 | 926 | e = atom->atom.bytes[i] | 0xF0; |
1225 | 926 | incr = 0x10; |
1226 | 926 | break; |
1227 | | |
1228 | 1.87k | case 0xF0: |
1229 | 1.87k | expanded = true; |
1230 | 1.87k | s = atom->atom.bytes[i]; |
1231 | 1.87k | e = atom->atom.bytes[i] | 0x0F; |
1232 | 1.87k | break; |
1233 | | |
1234 | 998k | default: |
1235 | 998k | s = 0; |
1236 | 998k | e = 0; |
1237 | 1.00M | } |
1238 | | |
1239 | 1.00M | if (s != e) |
1240 | 3.61k | { |
1241 | 3.61k | atom->atom.bytes[i] = (uint8_t) s; |
1242 | 3.61k | atom->atom.mask[i] = 0xFF; |
1243 | 3.61k | } |
1244 | | |
1245 | 1.00M | prev_atom = atom; |
1246 | 1.00M | next_atom = atom->next; |
1247 | | |
1248 | 1.25M | for (a = s + incr; a <= e; a += incr) |
1249 | 251k | { |
1250 | 251k | new_atom = _yr_atoms_clone_list_item(atom); |
1251 | | |
1252 | 251k | if (new_atom == NULL) |
1253 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1254 | | |
1255 | 251k | new_atom->atom.bytes[i] = (uint8_t) a; |
1256 | 251k | new_atom->atom.mask[i] = 0xFF; |
1257 | 251k | new_atom->next = next_atom; |
1258 | 251k | prev_atom->next = new_atom; |
1259 | 251k | prev_atom = new_atom; |
1260 | 251k | } |
1261 | 1.00M | } |
1262 | | |
1263 | 269k | if (!expanded) |
1264 | 265k | atom = atom->next; |
1265 | 269k | } |
1266 | | |
1267 | 7.00k | return ERROR_SUCCESS; |
1268 | 7.00k | } |
1269 | | |
1270 | | //////////////////////////////////////////////////////////////////////////////// |
1271 | | // Extract atoms from a regular expression. This function receives the abstract |
1272 | | // syntax tree for a regexp (or hex pattern) and returns a list of atoms that |
1273 | | // should be added to the Aho-Corasick automaton. |
1274 | | // |
1275 | | int yr_atoms_extract_from_re( |
1276 | | YR_ATOMS_CONFIG* config, |
1277 | | RE_AST* re_ast, |
1278 | | YR_MODIFIER modifier, |
1279 | | YR_ATOM_LIST_ITEM** atoms, |
1280 | | int* min_atom_quality) |
1281 | 7.00k | { |
1282 | 7.00k | YR_ATOM_TREE* atom_tree = (YR_ATOM_TREE*) yr_malloc(sizeof(YR_ATOM_TREE)); |
1283 | | |
1284 | 7.00k | YR_ATOM_LIST_ITEM* wide_atoms; |
1285 | 7.00k | YR_ATOM_LIST_ITEM* case_insensitive_atoms; |
1286 | | |
1287 | 7.00k | if (atom_tree == NULL) |
1288 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1289 | | |
1290 | 7.00k | atom_tree->root_node = _yr_atoms_tree_node_create(ATOM_TREE_OR); |
1291 | | |
1292 | 7.00k | if (atom_tree->root_node == NULL) |
1293 | 0 | { |
1294 | 0 | _yr_atoms_tree_destroy(atom_tree); |
1295 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1296 | 0 | } |
1297 | | |
1298 | 7.00k | FAIL_ON_ERROR_WITH_CLEANUP( |
1299 | 7.00k | _yr_atoms_extract_from_re(config, re_ast, atom_tree->root_node), |
1300 | 7.00k | _yr_atoms_tree_destroy(atom_tree)); |
1301 | | |
1302 | | // Initialize atom list |
1303 | 7.00k | *atoms = NULL; |
1304 | | |
1305 | | // Choose the atoms that will be used. |
1306 | 7.00k | FAIL_ON_ERROR_WITH_CLEANUP( |
1307 | 7.00k | _yr_atoms_choose(config, atom_tree->root_node, atoms, min_atom_quality), |
1308 | 7.00k | _yr_atoms_tree_destroy(atom_tree)); |
1309 | | |
1310 | 7.00k | _yr_atoms_tree_destroy(atom_tree); |
1311 | | |
1312 | 7.00k | FAIL_ON_ERROR_WITH_CLEANUP( |
1313 | 7.00k | _yr_atoms_expand_wildcards(*atoms), |
1314 | 7.00k | { // Cleanup |
1315 | 7.00k | yr_atoms_list_destroy(*atoms); |
1316 | 7.00k | *atoms = NULL; |
1317 | 7.00k | }); |
1318 | | |
1319 | | // Don't do convert atoms to wide here if either base64 modifier is used. |
1320 | | // This is to avoid the situation where we have "base64 wide" because |
1321 | | // the wide has already been applied BEFORE the base64 encoding. |
1322 | 7.00k | if (modifier.flags & STRING_FLAGS_WIDE && |
1323 | 1.89k | !(modifier.flags & STRING_FLAGS_BASE64 || |
1324 | 1.77k | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
1325 | 1.40k | { |
1326 | 1.40k | FAIL_ON_ERROR_WITH_CLEANUP( |
1327 | 1.40k | _yr_atoms_wide(*atoms, &wide_atoms), |
1328 | 1.40k | { // Cleanup |
1329 | 1.40k | yr_atoms_list_destroy(*atoms); |
1330 | 1.40k | yr_atoms_list_destroy(wide_atoms); |
1331 | 1.40k | *atoms = NULL; |
1332 | 1.40k | }); |
1333 | | |
1334 | 1.40k | if (modifier.flags & STRING_FLAGS_ASCII) |
1335 | 571 | { |
1336 | 571 | *atoms = _yr_atoms_list_concat(*atoms, wide_atoms); |
1337 | 571 | } |
1338 | 832 | else |
1339 | 832 | { |
1340 | 832 | yr_atoms_list_destroy(*atoms); |
1341 | 832 | *atoms = wide_atoms; |
1342 | 832 | } |
1343 | 1.40k | } |
1344 | | |
1345 | 7.00k | if (modifier.flags & STRING_FLAGS_NO_CASE) |
1346 | 1.77k | { |
1347 | 1.77k | FAIL_ON_ERROR_WITH_CLEANUP( |
1348 | 1.77k | _yr_atoms_case_insensitive(*atoms, &case_insensitive_atoms), |
1349 | 1.77k | { // Cleanup |
1350 | 1.77k | yr_atoms_list_destroy(*atoms); |
1351 | 1.77k | yr_atoms_list_destroy(case_insensitive_atoms); |
1352 | 1.77k | *atoms = NULL; |
1353 | 1.77k | }); |
1354 | | |
1355 | 1.77k | *atoms = _yr_atoms_list_concat(*atoms, case_insensitive_atoms); |
1356 | 1.77k | } |
1357 | | |
1358 | 7.00k | return ERROR_SUCCESS; |
1359 | 7.00k | } |
1360 | | |
1361 | | //////////////////////////////////////////////////////////////////////////////// |
1362 | | // Extract atoms from a string. |
1363 | | // |
1364 | | int yr_atoms_extract_from_string( |
1365 | | YR_ATOMS_CONFIG* config, |
1366 | | uint8_t* string, |
1367 | | int32_t string_length, |
1368 | | YR_MODIFIER modifier, |
1369 | | YR_ATOM_LIST_ITEM** atoms, |
1370 | | int* min_atom_quality) |
1371 | 20.7k | { |
1372 | 20.7k | YR_ATOM_LIST_ITEM* item; |
1373 | 20.7k | YR_ATOM_LIST_ITEM* case_insensitive_atoms; |
1374 | 20.7k | YR_ATOM_LIST_ITEM* xor_atoms; |
1375 | 20.7k | YR_ATOM_LIST_ITEM* wide_atoms; |
1376 | | |
1377 | 20.7k | YR_ATOM atom; |
1378 | | |
1379 | 20.7k | int quality, max_quality; |
1380 | 20.7k | int i; |
1381 | | |
1382 | 20.7k | item = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); |
1383 | | |
1384 | 20.7k | if (item == NULL) |
1385 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1386 | | |
1387 | 20.7k | item->forward_code_ref = YR_ARENA_NULL_REF; |
1388 | 20.7k | item->backward_code_ref = YR_ARENA_NULL_REF; |
1389 | 20.7k | item->next = NULL; |
1390 | 20.7k | item->backtrack = 0; |
1391 | | |
1392 | 20.7k | item->atom.length = yr_min(string_length, YR_MAX_ATOM_LENGTH); |
1393 | | |
1394 | 72.7k | for (i = 0; i < item->atom.length; i++) |
1395 | 52.0k | { |
1396 | 52.0k | item->atom.bytes[i] = string[i]; |
1397 | 52.0k | item->atom.mask[i] = 0xFF; |
1398 | 52.0k | } |
1399 | | |
1400 | 20.7k | max_quality = config->get_atom_quality(config, &item->atom); |
1401 | | |
1402 | 20.7k | atom.length = YR_MAX_ATOM_LENGTH; |
1403 | 20.7k | memset(atom.mask, 0xFF, atom.length); |
1404 | | |
1405 | 20.7k | for (i = YR_MAX_ATOM_LENGTH; |
1406 | 71.5k | i < string_length && max_quality < YR_MAX_ATOM_QUALITY; |
1407 | 50.8k | i++) |
1408 | 50.8k | { |
1409 | 50.8k | atom.length = YR_MAX_ATOM_LENGTH; |
1410 | 50.8k | memcpy(atom.bytes, string + i - YR_MAX_ATOM_LENGTH + 1, atom.length); |
1411 | | |
1412 | 50.8k | quality = config->get_atom_quality(config, &atom); |
1413 | | |
1414 | 50.8k | if (quality > max_quality) |
1415 | 4.40k | { |
1416 | 4.40k | memcpy(&item->atom, &atom, sizeof(atom)); |
1417 | 4.40k | item->backtrack = i - YR_MAX_ATOM_LENGTH + 1; |
1418 | 4.40k | max_quality = quality; |
1419 | 4.40k | } |
1420 | 50.8k | } |
1421 | | |
1422 | 20.7k | *atoms = item; |
1423 | 20.7k | *min_atom_quality = max_quality; |
1424 | | |
1425 | 20.7k | if (modifier.flags & STRING_FLAGS_WIDE) |
1426 | 1.89k | { |
1427 | 1.89k | FAIL_ON_ERROR_WITH_CLEANUP( |
1428 | 1.89k | _yr_atoms_wide(*atoms, &wide_atoms), |
1429 | 1.89k | { // Cleanup |
1430 | 1.89k | yr_atoms_list_destroy(*atoms); |
1431 | 1.89k | yr_atoms_list_destroy(wide_atoms); |
1432 | 1.89k | *atoms = NULL; |
1433 | 1.89k | }); |
1434 | | |
1435 | 1.89k | if (modifier.flags & STRING_FLAGS_ASCII) |
1436 | 1.18k | { |
1437 | 1.18k | *atoms = _yr_atoms_list_concat(*atoms, wide_atoms); |
1438 | 1.18k | } |
1439 | 712 | else |
1440 | 712 | { |
1441 | 712 | yr_atoms_list_destroy(*atoms); |
1442 | 712 | *atoms = wide_atoms; |
1443 | 712 | } |
1444 | 1.89k | } |
1445 | | |
1446 | 20.7k | if (modifier.flags & STRING_FLAGS_NO_CASE) |
1447 | 5.11k | { |
1448 | 5.11k | FAIL_ON_ERROR_WITH_CLEANUP( |
1449 | 5.11k | _yr_atoms_case_insensitive(*atoms, &case_insensitive_atoms), |
1450 | 5.11k | { // Cleanup |
1451 | 5.11k | yr_atoms_list_destroy(*atoms); |
1452 | 5.11k | yr_atoms_list_destroy(case_insensitive_atoms); |
1453 | 5.11k | *atoms = NULL; |
1454 | 5.11k | }); |
1455 | | |
1456 | 5.11k | *atoms = _yr_atoms_list_concat(*atoms, case_insensitive_atoms); |
1457 | 5.11k | } |
1458 | | |
1459 | 20.7k | if (modifier.flags & STRING_FLAGS_XOR) |
1460 | 857 | { |
1461 | 857 | FAIL_ON_ERROR_WITH_CLEANUP( |
1462 | 857 | _yr_atoms_xor(*atoms, modifier.xor_min, modifier.xor_max, &xor_atoms), |
1463 | 857 | { // Cleanup |
1464 | 857 | yr_atoms_list_destroy(*atoms); |
1465 | 857 | yr_atoms_list_destroy(xor_atoms); |
1466 | 857 | *atoms = NULL; |
1467 | 857 | }); |
1468 | | |
1469 | 857 | yr_atoms_list_destroy(*atoms); |
1470 | 857 | *atoms = xor_atoms; |
1471 | 857 | } |
1472 | | |
1473 | | // Recheck the atom quality, in case we have just generated some poor atoms. |
1474 | | // https://github.com/VirusTotal/yara/issues/1172 |
1475 | 278k | for (item = *atoms; item != NULL; item = item->next) |
1476 | 257k | { |
1477 | 257k | quality = config->get_atom_quality(config, &item->atom); |
1478 | 257k | if (quality < *min_atom_quality) |
1479 | 1.55k | *min_atom_quality = quality; |
1480 | 257k | } |
1481 | | |
1482 | 20.7k | return ERROR_SUCCESS; |
1483 | 20.7k | } |
1484 | | |
1485 | | //////////////////////////////////////////////////////////////////////////////// |
1486 | | // Prints an atom tree node. Used only for debugging purposes. |
1487 | | // |
1488 | | void yr_atoms_tree_node_print(YR_ATOM_TREE_NODE* node) |
1489 | 0 | { |
1490 | 0 | YR_ATOM_TREE_NODE* child; |
1491 | |
|
1492 | 0 | if (node == NULL) |
1493 | 0 | { |
1494 | 0 | printf("Empty tree node\n"); |
1495 | 0 | return; |
1496 | 0 | } |
1497 | | |
1498 | 0 | switch (node->type) |
1499 | 0 | { |
1500 | 0 | case ATOM_TREE_LEAF: |
1501 | 0 | for (int i = 0; i < node->atom.length; i++) |
1502 | 0 | printf("%02X", node->atom.bytes[i]); |
1503 | 0 | break; |
1504 | | |
1505 | 0 | case ATOM_TREE_AND: |
1506 | 0 | case ATOM_TREE_OR: |
1507 | 0 | if (node->type == ATOM_TREE_AND) |
1508 | 0 | printf("AND"); |
1509 | 0 | else |
1510 | 0 | printf("OR"); |
1511 | 0 | printf("("); |
1512 | 0 | child = node->children_head; |
1513 | 0 | while (child != NULL) |
1514 | 0 | { |
1515 | 0 | yr_atoms_tree_node_print(child); |
1516 | 0 | child = child->next_sibling; |
1517 | 0 | if (child != NULL) |
1518 | 0 | printf(","); |
1519 | 0 | } |
1520 | 0 | printf(")"); |
1521 | 0 | break; |
1522 | 0 | } |
1523 | 0 | } |