/src/yara/libyara/parser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (c) 2013. The YARA Authors. All Rights Reserved. |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without modification, |
5 | | are permitted provided that the following conditions are met: |
6 | | |
7 | | 1. Redistributions of source code must retain the above copyright notice, this |
8 | | list of conditions and the following disclaimer. |
9 | | |
10 | | 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | this list of conditions and the following disclaimer in the documentation and/or |
12 | | other materials provided with the distribution. |
13 | | |
14 | | 3. Neither the name of the copyright holder nor the names of its contributors |
15 | | may be used to endorse or promote products derived from this software without |
16 | | specific prior written permission. |
17 | | |
18 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
19 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR |
22 | | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
23 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
24 | | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
25 | | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
27 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #include <limits.h> |
31 | | #include <stddef.h> |
32 | | #include <string.h> |
33 | | #include <yara/ahocorasick.h> |
34 | | #include <yara/arena.h> |
35 | | #include <yara/base64.h> |
36 | | #include <yara/error.h> |
37 | | #include <yara/exec.h> |
38 | | #include <yara/integers.h> |
39 | | #include <yara/mem.h> |
40 | | #include <yara/modules.h> |
41 | | #include <yara/object.h> |
42 | | #include <yara/parser.h> |
43 | | #include <yara/re.h> |
44 | | #include <yara/strutils.h> |
45 | | #include <yara/utils.h> |
46 | | |
47 | | #define todigit(x) \ |
48 | | ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \ |
49 | | : ((uint8_t) (x - '0')) |
50 | | |
51 | | int yr_parser_emit( |
52 | | yyscan_t yyscanner, |
53 | | uint8_t instruction, |
54 | | YR_ARENA_REF* instruction_ref) |
55 | 58.4k | { |
56 | 58.4k | return yr_arena_write_data( |
57 | 58.4k | yyget_extra(yyscanner)->arena, |
58 | 58.4k | YR_CODE_SECTION, |
59 | 58.4k | &instruction, |
60 | 58.4k | sizeof(uint8_t), |
61 | 58.4k | instruction_ref); |
62 | 58.4k | } |
63 | | |
64 | | int yr_parser_emit_with_arg_double( |
65 | | yyscan_t yyscanner, |
66 | | uint8_t instruction, |
67 | | double argument, |
68 | | YR_ARENA_REF* instruction_ref, |
69 | | YR_ARENA_REF* argument_ref) |
70 | 1.24k | { |
71 | 1.24k | int result = yr_arena_write_data( |
72 | 1.24k | yyget_extra(yyscanner)->arena, |
73 | 1.24k | YR_CODE_SECTION, |
74 | 1.24k | &instruction, |
75 | 1.24k | sizeof(uint8_t), |
76 | 1.24k | instruction_ref); |
77 | | |
78 | 1.24k | if (result == ERROR_SUCCESS) |
79 | 1.24k | result = yr_arena_write_data( |
80 | 1.24k | yyget_extra(yyscanner)->arena, |
81 | 1.24k | YR_CODE_SECTION, |
82 | 1.24k | &argument, |
83 | 1.24k | sizeof(double), |
84 | 1.24k | argument_ref); |
85 | | |
86 | 1.24k | return result; |
87 | 1.24k | } |
88 | | |
89 | | int yr_parser_emit_with_arg_int32( |
90 | | yyscan_t yyscanner, |
91 | | uint8_t instruction, |
92 | | int32_t argument, |
93 | | YR_ARENA_REF* instruction_ref, |
94 | | YR_ARENA_REF* argument_ref) |
95 | 17.3k | { |
96 | 17.3k | int result = yr_arena_write_data( |
97 | 17.3k | yyget_extra(yyscanner)->arena, |
98 | 17.3k | YR_CODE_SECTION, |
99 | 17.3k | &instruction, |
100 | 17.3k | sizeof(uint8_t), |
101 | 17.3k | instruction_ref); |
102 | | |
103 | 17.3k | if (result == ERROR_SUCCESS) |
104 | 17.3k | result = yr_arena_write_data( |
105 | 17.3k | yyget_extra(yyscanner)->arena, |
106 | 17.3k | YR_CODE_SECTION, |
107 | 17.3k | &argument, |
108 | 17.3k | sizeof(int32_t), |
109 | 17.3k | argument_ref); |
110 | | |
111 | 17.3k | return result; |
112 | 17.3k | } |
113 | | |
114 | | int yr_parser_emit_with_arg( |
115 | | yyscan_t yyscanner, |
116 | | uint8_t instruction, |
117 | | int64_t argument, |
118 | | YR_ARENA_REF* instruction_ref, |
119 | | YR_ARENA_REF* argument_ref) |
120 | 36.6k | { |
121 | 36.6k | int result = yr_arena_write_data( |
122 | 36.6k | yyget_extra(yyscanner)->arena, |
123 | 36.6k | YR_CODE_SECTION, |
124 | 36.6k | &instruction, |
125 | 36.6k | sizeof(uint8_t), |
126 | 36.6k | instruction_ref); |
127 | | |
128 | 36.6k | if (result == ERROR_SUCCESS) |
129 | 36.6k | result = yr_arena_write_data( |
130 | 36.6k | yyget_extra(yyscanner)->arena, |
131 | 36.6k | YR_CODE_SECTION, |
132 | 36.6k | &argument, |
133 | 36.6k | sizeof(int64_t), |
134 | 36.6k | argument_ref); |
135 | | |
136 | 36.6k | return result; |
137 | 36.6k | } |
138 | | |
139 | | int yr_parser_emit_with_arg_reloc( |
140 | | yyscan_t yyscanner, |
141 | | uint8_t instruction, |
142 | | void* argument, |
143 | | YR_ARENA_REF* instruction_ref, |
144 | | YR_ARENA_REF* argument_ref) |
145 | 412k | { |
146 | 412k | YR_ARENA_REF ref = YR_ARENA_NULL_REF; |
147 | | |
148 | 412k | DECLARE_REFERENCE(void*, ptr) arg; |
149 | | |
150 | 412k | memset(&arg, 0, sizeof(arg)); |
151 | 412k | arg.ptr = argument; |
152 | | |
153 | 412k | int result = yr_arena_write_data( |
154 | 412k | yyget_extra(yyscanner)->arena, |
155 | 412k | YR_CODE_SECTION, |
156 | 412k | &instruction, |
157 | 412k | sizeof(uint8_t), |
158 | 412k | instruction_ref); |
159 | | |
160 | 412k | if (result == ERROR_SUCCESS) |
161 | 412k | result = yr_arena_write_data( |
162 | 412k | yyget_extra(yyscanner)->arena, |
163 | 412k | YR_CODE_SECTION, |
164 | 412k | &arg, |
165 | 412k | sizeof(arg), |
166 | 412k | &ref); |
167 | | |
168 | 412k | if (result == ERROR_SUCCESS) |
169 | 412k | result = yr_arena_make_ptr_relocatable( |
170 | 412k | yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL); |
171 | | |
172 | 412k | if (argument_ref != NULL) |
173 | 0 | *argument_ref = ref; |
174 | | |
175 | 412k | return result; |
176 | 412k | } |
177 | | |
178 | | int yr_parser_emit_pushes_for_strings( |
179 | | yyscan_t yyscanner, |
180 | | const char* identifier, |
181 | | int* count) |
182 | 8.15k | { |
183 | 8.15k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
184 | | |
185 | 8.15k | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
186 | 8.15k | compiler, compiler->current_rule_idx); |
187 | | |
188 | 8.15k | YR_STRING* string; |
189 | | |
190 | 8.15k | const char* string_identifier; |
191 | 8.15k | const char* target_identifier; |
192 | | |
193 | 8.15k | int matching = 0; |
194 | | |
195 | 8.15k | yr_rule_strings_foreach(current_rule, string) |
196 | 412k | { |
197 | | // Don't generate pushes for strings chained to another one, we are |
198 | | // only interested in non-chained strings or the head of the chain. |
199 | | |
200 | 412k | if (string->chained_to == NULL) |
201 | 410k | { |
202 | 410k | string_identifier = string->identifier; |
203 | 410k | target_identifier = identifier; |
204 | | |
205 | 820k | while (*target_identifier != '\0' && *string_identifier != '\0' && |
206 | 820k | *target_identifier == *string_identifier) |
207 | 410k | { |
208 | 410k | target_identifier++; |
209 | 410k | string_identifier++; |
210 | 410k | } |
211 | | |
212 | 410k | if ((*target_identifier == '\0' && *string_identifier == '\0') || |
213 | 410k | *target_identifier == '*') |
214 | 408k | { |
215 | 408k | yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL); |
216 | | |
217 | 408k | string->flags |= STRING_FLAGS_REFERENCED; |
218 | 408k | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
219 | 408k | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
220 | 408k | matching++; |
221 | 408k | } |
222 | 410k | } |
223 | 412k | } |
224 | | |
225 | 8.15k | if (count != NULL) |
226 | 8.15k | { |
227 | 8.15k | *count = matching; |
228 | 8.15k | } |
229 | | |
230 | 8.15k | if (matching == 0) |
231 | 5 | { |
232 | 5 | yr_compiler_set_error_extra_info( |
233 | 5 | compiler, identifier) return ERROR_UNDEFINED_STRING; |
234 | 5 | } |
235 | | |
236 | 8.15k | return ERROR_SUCCESS; |
237 | 8.15k | } |
238 | | |
239 | | // Emit OP_PUSH_RULE instructions for all rules whose identifier has given |
240 | | // prefix. |
241 | | int yr_parser_emit_pushes_for_rules( |
242 | | yyscan_t yyscanner, |
243 | | const char* prefix, |
244 | | int* count) |
245 | 633 | { |
246 | 633 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
247 | | |
248 | | // Make sure the compiler is parsing a rule |
249 | 633 | assert(compiler->current_rule_idx != UINT32_MAX); |
250 | | |
251 | 633 | YR_RULE* rule; |
252 | 633 | int matching = 0; |
253 | | |
254 | 633 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
255 | 633 | compiler->arena, |
256 | 633 | YR_NAMESPACES_TABLE, |
257 | 633 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
258 | | |
259 | | // Can't use yr_rules_foreach here as that requires the rules to have been |
260 | | // finalized (inserting a NULL rule at the end). This is done when |
261 | | // yr_compiler_get_rules() is called, which also inserts a HALT instruction |
262 | | // into the current position in the code arena. Obviously we aren't done |
263 | | // compiling the rules yet so inserting a HALT is a bad idea. To deal with |
264 | | // this I'm manually walking all the currently compiled rules (up to the |
265 | | // current rule index) and comparing identifiers to see if it is one we should |
266 | | // use. |
267 | | // |
268 | | // Further, we have to get compiler->current_rule_idx before we start because |
269 | | // if we emit an OP_PUSH_RULE |
270 | 633 | rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0); |
271 | | |
272 | 4.39k | for (uint32_t i = 0; i <= compiler->current_rule_idx; i++) |
273 | 3.75k | { |
274 | | // Is rule->identifier prefixed by prefix? |
275 | 3.75k | if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0) |
276 | 1.30k | { |
277 | 1.30k | uint32_t rule_idx = yr_hash_table_lookup_uint32( |
278 | 1.30k | compiler->rules_table, rule->identifier, ns->name); |
279 | | |
280 | 1.30k | if (rule_idx != UINT32_MAX) |
281 | 1.30k | { |
282 | 1.30k | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
283 | 1.30k | yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL)); |
284 | 1.30k | matching++; |
285 | 1.30k | } |
286 | 1.30k | } |
287 | | |
288 | 3.75k | rule++; |
289 | 3.75k | } |
290 | | |
291 | 633 | if (count != NULL) |
292 | 633 | { |
293 | 633 | *count = matching; |
294 | 633 | } |
295 | | |
296 | 633 | if (matching == 0) |
297 | 39 | { |
298 | 39 | yr_compiler_set_error_extra_info(compiler, prefix); |
299 | 39 | return ERROR_UNDEFINED_IDENTIFIER; |
300 | 39 | } |
301 | | |
302 | 594 | return ERROR_SUCCESS; |
303 | 633 | } |
304 | | |
305 | | int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument) |
306 | 45.9k | { |
307 | 45.9k | uint8_t opcode[9]; |
308 | 45.9k | int opcode_len = 1; |
309 | | |
310 | 45.9k | if (argument == YR_UNDEFINED) |
311 | 14.6k | { |
312 | 14.6k | opcode[0] = OP_PUSH_U; |
313 | 14.6k | } |
314 | 31.3k | else if (argument <= 0xff) |
315 | 28.6k | { |
316 | 28.6k | opcode[0] = OP_PUSH_8; |
317 | 28.6k | opcode[1] = (uint8_t) argument; |
318 | 28.6k | opcode_len += sizeof(uint8_t); |
319 | 28.6k | } |
320 | 2.68k | else if (argument <= 0xffff) |
321 | 1.03k | { |
322 | 1.03k | opcode[0] = OP_PUSH_16; |
323 | 1.03k | uint16_t u = (uint16_t) argument; |
324 | 1.03k | memcpy(opcode + 1, &u, sizeof(uint16_t)); |
325 | 1.03k | opcode_len += sizeof(uint16_t); |
326 | 1.03k | } |
327 | 1.64k | else if (argument <= 0xffffffff) |
328 | 842 | { |
329 | 842 | opcode[0] = OP_PUSH_32; |
330 | 842 | uint32_t u = (uint32_t) argument; |
331 | 842 | memcpy(opcode + 1, &u, sizeof(uint32_t)); |
332 | 842 | opcode_len += sizeof(uint32_t); |
333 | 842 | } |
334 | 805 | else |
335 | 805 | { |
336 | 805 | opcode[0] = OP_PUSH; |
337 | 805 | memcpy(opcode + 1, &argument, sizeof(uint64_t)); |
338 | 805 | opcode_len += sizeof(uint64_t); |
339 | 805 | } |
340 | | |
341 | 45.9k | return yr_arena_write_data( |
342 | 45.9k | yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL); |
343 | 45.9k | } |
344 | | |
345 | | int yr_parser_check_types( |
346 | | YR_COMPILER* compiler, |
347 | | YR_OBJECT_FUNCTION* function, |
348 | | const char* actual_args_fmt) |
349 | 299 | { |
350 | 299 | int i; |
351 | | |
352 | 525 | for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) |
353 | 525 | { |
354 | 525 | if (function->prototypes[i].arguments_fmt == NULL) |
355 | 0 | break; |
356 | | |
357 | 525 | if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0) |
358 | 299 | return ERROR_SUCCESS; |
359 | 525 | } |
360 | | |
361 | 0 | yr_compiler_set_error_extra_info(compiler, function->identifier) |
362 | |
|
363 | 0 | return ERROR_WRONG_ARGUMENTS; |
364 | 299 | } |
365 | | |
366 | | int yr_parser_lookup_string( |
367 | | yyscan_t yyscanner, |
368 | | const char* identifier, |
369 | | YR_STRING** string) |
370 | 373 | { |
371 | 373 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
372 | | |
373 | 373 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
374 | 373 | compiler, compiler->current_rule_idx); |
375 | | |
376 | 373 | yr_rule_strings_foreach(current_rule, *string) |
377 | 485 | { |
378 | | // If some string $a gets fragmented into multiple chained |
379 | | // strings, all those fragments have the same $a identifier |
380 | | // but we are interested in the heading fragment, which is |
381 | | // that with chained_to == NULL |
382 | | |
383 | 485 | if ((*string)->chained_to == NULL && |
384 | 485 | strcmp((*string)->identifier, identifier) == 0) |
385 | 343 | { |
386 | 343 | return ERROR_SUCCESS; |
387 | 343 | } |
388 | 485 | } |
389 | | |
390 | 30 | yr_compiler_set_error_extra_info(compiler, identifier) |
391 | | |
392 | 30 | * string = NULL; |
393 | | |
394 | 30 | return ERROR_UNDEFINED_STRING; |
395 | 373 | } |
396 | | |
397 | | //////////////////////////////////////////////////////////////////////////////// |
398 | | // Searches for a variable with the given identifier in the scope of the current |
399 | | // "for" loop. In case of nested "for" loops the identifier is searched starting |
400 | | // at the top-level loop and going down thorough the nested loops until the |
401 | | // current one. This is ok because inner loops can not re-define an identifier |
402 | | // already defined by an outer loop. |
403 | | // |
404 | | // If the variable is found, the return value is the position that the variable |
405 | | // occupies among all the currently defined variables. If the variable doesn't |
406 | | // exist the return value is -1. |
407 | | // |
408 | | // The function can receive a pointer to a YR_EXPRESSION that will populated |
409 | | // with information about the variable if found. This pointer can be NULL if |
410 | | // the caller is not interested in getting that information. |
411 | | // |
412 | | int yr_parser_lookup_loop_variable( |
413 | | yyscan_t yyscanner, |
414 | | const char* identifier, |
415 | | YR_EXPRESSION* expr) |
416 | 15.0k | { |
417 | 15.0k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
418 | 15.0k | int i, j; |
419 | 15.0k | int var_offset = 0; |
420 | | |
421 | 18.3k | for (i = 0; i <= compiler->loop_index; i++) |
422 | 12.7k | { |
423 | 12.7k | var_offset += compiler->loop[i].vars_internal_count; |
424 | | |
425 | 22.2k | for (j = 0; j < compiler->loop[i].vars_count; j++) |
426 | 18.9k | { |
427 | 18.9k | if (compiler->loop[i].vars[j].identifier.ptr != NULL && |
428 | 18.9k | strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0) |
429 | 9.45k | { |
430 | 9.45k | if (expr != NULL) |
431 | 9.35k | *expr = compiler->loop[i].vars[j]; |
432 | | |
433 | 9.45k | return var_offset + j; |
434 | 9.45k | } |
435 | 18.9k | } |
436 | | |
437 | 3.30k | var_offset += compiler->loop[i].vars_count; |
438 | 3.30k | } |
439 | | |
440 | 5.59k | return -1; |
441 | 15.0k | } |
442 | | |
443 | | static int _yr_parser_write_string( |
444 | | const char* identifier, |
445 | | YR_MODIFIER modifier, |
446 | | YR_COMPILER* compiler, |
447 | | SIZED_STRING* str, |
448 | | RE_AST* re_ast, |
449 | | YR_ARENA_REF* string_ref, |
450 | | int* min_atom_quality, |
451 | | int* num_atom) |
452 | 20.1k | { |
453 | 20.1k | SIZED_STRING* literal_string; |
454 | 20.1k | YR_ATOM_LIST_ITEM* atom; |
455 | 20.1k | YR_ATOM_LIST_ITEM* atom_list = NULL; |
456 | | |
457 | 20.1k | int c, result; |
458 | 20.1k | int max_string_len; |
459 | 20.1k | bool free_literal = false; |
460 | | |
461 | 20.1k | FAIL_ON_ERROR(yr_arena_allocate_struct( |
462 | 20.1k | compiler->arena, |
463 | 20.1k | YR_STRINGS_TABLE, |
464 | 20.1k | sizeof(YR_STRING), |
465 | 20.1k | string_ref, |
466 | 20.1k | offsetof(YR_STRING, identifier), |
467 | 20.1k | offsetof(YR_STRING, string), |
468 | 20.1k | offsetof(YR_STRING, chained_to), |
469 | 20.1k | EOL)); |
470 | | |
471 | 20.1k | YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr( |
472 | 20.1k | compiler->arena, string_ref); |
473 | | |
474 | 20.1k | YR_ARENA_REF ref; |
475 | | |
476 | 20.1k | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
477 | | |
478 | 20.1k | string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
479 | | |
480 | 20.1k | if (modifier.flags & STRING_FLAGS_HEXADECIMAL || |
481 | 20.1k | modifier.flags & STRING_FLAGS_REGEXP || |
482 | 20.1k | modifier.flags & STRING_FLAGS_BASE64 || |
483 | 20.1k | modifier.flags & STRING_FLAGS_BASE64_WIDE) |
484 | 19.2k | { |
485 | 19.2k | literal_string = yr_re_ast_extract_literal(re_ast); |
486 | | |
487 | 19.2k | if (literal_string != NULL) |
488 | 13.8k | free_literal = true; |
489 | 19.2k | } |
490 | 948 | else |
491 | 948 | { |
492 | 948 | literal_string = str; |
493 | 948 | } |
494 | | |
495 | 20.1k | if (literal_string != NULL) |
496 | 14.7k | { |
497 | 14.7k | modifier.flags |= STRING_FLAGS_LITERAL; |
498 | | |
499 | 14.7k | result = _yr_compiler_store_data( |
500 | 14.7k | compiler, |
501 | 14.7k | literal_string->c_string, |
502 | 14.7k | literal_string->length + 1, // +1 to include terminating NULL |
503 | 14.7k | &ref); |
504 | | |
505 | 14.7k | string->length = (uint32_t) literal_string->length; |
506 | 14.7k | string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
507 | | |
508 | 14.7k | if (result == ERROR_SUCCESS) |
509 | 14.7k | { |
510 | 14.7k | result = yr_atoms_extract_from_string( |
511 | 14.7k | &compiler->atoms_config, |
512 | 14.7k | (uint8_t*) literal_string->c_string, |
513 | 14.7k | (int32_t) literal_string->length, |
514 | 14.7k | modifier, |
515 | 14.7k | &atom_list, |
516 | 14.7k | min_atom_quality); |
517 | 14.7k | } |
518 | 14.7k | } |
519 | 5.37k | else |
520 | 5.37k | { |
521 | | // Non-literal strings can't be marked as fixed offset because once we |
522 | | // find a string atom in the scanned data we don't know the offset where |
523 | | // the string should start, as the non-literal strings can contain |
524 | | // variable-length portions. |
525 | 5.37k | modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET; |
526 | | |
527 | | // Emit forwards code |
528 | 5.37k | result = yr_re_ast_emit_code(re_ast, compiler->arena, false); |
529 | | |
530 | | // Emit backwards code |
531 | 5.37k | if (result == ERROR_SUCCESS) |
532 | 5.30k | result = yr_re_ast_emit_code(re_ast, compiler->arena, true); |
533 | | |
534 | 5.37k | if (result == ERROR_SUCCESS) |
535 | 5.30k | result = yr_atoms_extract_from_re( |
536 | 5.30k | &compiler->atoms_config, |
537 | 5.30k | re_ast, |
538 | 5.30k | modifier, |
539 | 5.30k | &atom_list, |
540 | 5.30k | min_atom_quality); |
541 | 5.37k | } |
542 | | |
543 | 20.1k | string->flags = modifier.flags; |
544 | 20.1k | string->rule_idx = compiler->current_rule_idx; |
545 | 20.1k | string->idx = compiler->current_string_idx; |
546 | 20.1k | string->fixed_offset = YR_UNDEFINED; |
547 | | |
548 | 20.1k | if (result == ERROR_SUCCESS) |
549 | 20.1k | { |
550 | | // Add the string to Aho-Corasick automaton. |
551 | 20.1k | result = yr_ac_add_string( |
552 | 20.1k | compiler->automaton, |
553 | 20.1k | string, |
554 | 20.1k | compiler->current_string_idx, |
555 | 20.1k | atom_list, |
556 | 20.1k | compiler->arena); |
557 | 20.1k | } |
558 | | |
559 | 20.1k | if (modifier.flags & STRING_FLAGS_LITERAL) |
560 | 14.7k | { |
561 | 14.7k | if (modifier.flags & STRING_FLAGS_WIDE) |
562 | 2.07k | max_string_len = string->length * 2; |
563 | 12.7k | else |
564 | 12.7k | max_string_len = string->length; |
565 | | |
566 | 14.7k | if (max_string_len <= YR_MAX_ATOM_LENGTH) |
567 | 10.7k | string->flags |= STRING_FLAGS_FITS_IN_ATOM; |
568 | 14.7k | } |
569 | | |
570 | 20.1k | atom = atom_list; |
571 | 20.1k | c = 0; |
572 | | |
573 | 5.39M | while (atom != NULL) |
574 | 5.37M | { |
575 | 5.37M | atom = atom->next; |
576 | 5.37M | c++; |
577 | 5.37M | } |
578 | | |
579 | 20.1k | (*num_atom) += c; |
580 | | |
581 | 20.1k | compiler->current_string_idx++; |
582 | | |
583 | 20.1k | if (free_literal) |
584 | 13.8k | yr_free(literal_string); |
585 | | |
586 | 20.1k | if (atom_list != NULL) |
587 | 20.1k | yr_atoms_list_destroy(atom_list); |
588 | | |
589 | 20.1k | return result; |
590 | 20.1k | } |
591 | | |
592 | | static int _yr_parser_check_string_modifiers( |
593 | | yyscan_t yyscanner, |
594 | | YR_MODIFIER modifier) |
595 | 16.8k | { |
596 | 16.8k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
597 | | |
598 | | // xor and nocase together is not implemented. |
599 | 16.8k | if (modifier.flags & STRING_FLAGS_XOR && |
600 | 16.8k | modifier.flags & STRING_FLAGS_NO_CASE) |
601 | 0 | { |
602 | 0 | yr_compiler_set_error_extra_info( |
603 | 0 | compiler, "invalid modifier combination: xor nocase"); |
604 | 0 | return ERROR_INVALID_MODIFIER; |
605 | 0 | } |
606 | | |
607 | | // base64 and nocase together is not implemented. |
608 | 16.8k | if (modifier.flags & STRING_FLAGS_NO_CASE && |
609 | 16.8k | (modifier.flags & STRING_FLAGS_BASE64 || |
610 | 2.84k | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
611 | 0 | { |
612 | 0 | yr_compiler_set_error_extra_info( |
613 | 0 | compiler, |
614 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
615 | 0 | ? "invalid modifier combination: base64 nocase" |
616 | 0 | : "invalid modifier combination: base64wide nocase"); |
617 | 0 | return ERROR_INVALID_MODIFIER; |
618 | 0 | } |
619 | | |
620 | | // base64 and fullword together is not implemented. |
621 | 16.8k | if (modifier.flags & STRING_FLAGS_FULL_WORD && |
622 | 16.8k | (modifier.flags & STRING_FLAGS_BASE64 || |
623 | 58 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
624 | 2 | { |
625 | 2 | yr_compiler_set_error_extra_info( |
626 | 2 | compiler, |
627 | 2 | modifier.flags & STRING_FLAGS_BASE64 |
628 | 2 | ? "invalid modifier combination: base64 fullword" |
629 | 2 | : "invalid modifier combination: base64wide fullword"); |
630 | 2 | return ERROR_INVALID_MODIFIER; |
631 | 2 | } |
632 | | |
633 | | // base64 and xor together is not implemented. |
634 | 16.8k | if (modifier.flags & STRING_FLAGS_XOR && |
635 | 16.8k | (modifier.flags & STRING_FLAGS_BASE64 || |
636 | 413 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
637 | 0 | { |
638 | 0 | yr_compiler_set_error_extra_info( |
639 | 0 | compiler, |
640 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
641 | 0 | ? "invalid modifier combination: base64 xor" |
642 | 0 | : "invalid modifier combination: base64wide xor"); |
643 | 0 | return ERROR_INVALID_MODIFIER; |
644 | 0 | } |
645 | | |
646 | 16.8k | return ERROR_SUCCESS; |
647 | 16.8k | } |
648 | | |
649 | | int yr_parser_reduce_string_declaration( |
650 | | yyscan_t yyscanner, |
651 | | YR_MODIFIER modifier, |
652 | | const char* identifier, |
653 | | SIZED_STRING* str, |
654 | | YR_ARENA_REF* string_ref) |
655 | 16.8k | { |
656 | 16.8k | int result = ERROR_SUCCESS; |
657 | 16.8k | int min_atom_quality = YR_MAX_ATOM_QUALITY; |
658 | 16.8k | int atom_quality; |
659 | | |
660 | 16.8k | char message[512]; |
661 | | |
662 | 16.8k | int32_t min_gap = 0; |
663 | 16.8k | int32_t max_gap = 0; |
664 | | |
665 | 16.8k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
666 | | |
667 | 16.8k | RE_AST* re_ast = NULL; |
668 | 16.8k | RE_AST* remainder_re_ast = NULL; |
669 | 16.8k | RE_ERROR re_error; |
670 | | |
671 | 16.8k | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
672 | 16.8k | compiler, compiler->current_rule_idx); |
673 | | |
674 | | // Determine if a string with the same identifier was already defined |
675 | | // by searching for the identifier in strings_table. |
676 | 16.8k | uint32_t string_idx = yr_hash_table_lookup_uint32( |
677 | 16.8k | compiler->strings_table, identifier, NULL); |
678 | | |
679 | | // The string was already defined, return an error. |
680 | 16.8k | if (string_idx != UINT32_MAX) |
681 | 10 | { |
682 | 10 | yr_compiler_set_error_extra_info(compiler, identifier); |
683 | 10 | return ERROR_DUPLICATED_STRING_IDENTIFIER; |
684 | 10 | } |
685 | | |
686 | | // Empty strings are not allowed. |
687 | 16.8k | if (str->length == 0) |
688 | 2 | { |
689 | 2 | yr_compiler_set_error_extra_info(compiler, identifier); |
690 | 2 | return ERROR_EMPTY_STRING; |
691 | 2 | } |
692 | | |
693 | 16.8k | if (str->flags & SIZED_STRING_FLAGS_NO_CASE) |
694 | 2.58k | modifier.flags |= STRING_FLAGS_NO_CASE; |
695 | | |
696 | 16.8k | if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) |
697 | 422 | modifier.flags |= STRING_FLAGS_DOT_ALL; |
698 | | |
699 | | // Hex strings are always handled as DOT_ALL regexps. |
700 | 16.8k | if (modifier.flags & STRING_FLAGS_HEXADECIMAL) |
701 | 440 | modifier.flags |= STRING_FLAGS_DOT_ALL; |
702 | | |
703 | 16.8k | if (!(modifier.flags & STRING_FLAGS_WIDE) && |
704 | 16.8k | !(modifier.flags & STRING_FLAGS_BASE64 || |
705 | 15.7k | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
706 | 14.8k | { |
707 | 14.8k | modifier.flags |= STRING_FLAGS_ASCII; |
708 | 14.8k | } |
709 | | |
710 | | // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding |
711 | | // a single match for the string is enough. This is true in |
712 | | // most cases, except when the string count (#) and string offset (@) |
713 | | // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH |
714 | | // initially, and unmarked later if required. |
715 | 16.8k | modifier.flags |= STRING_FLAGS_SINGLE_MATCH; |
716 | | |
717 | | // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't |
718 | | // need to be searched all over the file because the user is using the |
719 | | // "at" operator. The string must be searched at a fixed offset in the |
720 | | // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially, |
721 | | // and unmarked later if required. |
722 | 16.8k | modifier.flags |= STRING_FLAGS_FIXED_OFFSET; |
723 | | |
724 | | // If string identifier is $ this is an anonymous string, if not add the |
725 | | // identifier to strings_table. |
726 | 16.8k | if (strcmp(identifier, "$") == 0) |
727 | 16.3k | { |
728 | 16.3k | modifier.flags |= STRING_FLAGS_ANONYMOUS; |
729 | 16.3k | } |
730 | 465 | else |
731 | 465 | { |
732 | 465 | FAIL_ON_ERROR(yr_hash_table_add_uint32( |
733 | 465 | compiler->strings_table, |
734 | 465 | identifier, |
735 | 465 | NULL, |
736 | 465 | compiler->current_string_idx)); |
737 | 465 | } |
738 | | |
739 | | // Make sure that the the string does not have an invalid combination of |
740 | | // modifiers. |
741 | 16.8k | FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier)); |
742 | | |
743 | 16.8k | if (modifier.flags & STRING_FLAGS_HEXADECIMAL || |
744 | 16.8k | modifier.flags & STRING_FLAGS_REGEXP || |
745 | 16.8k | modifier.flags & STRING_FLAGS_BASE64 || |
746 | 16.8k | modifier.flags & STRING_FLAGS_BASE64_WIDE) |
747 | 15.8k | { |
748 | 15.8k | if (modifier.flags & STRING_FLAGS_HEXADECIMAL) |
749 | 440 | result = yr_re_parse_hex(str->c_string, &re_ast, &re_error); |
750 | 15.4k | else if (modifier.flags & STRING_FLAGS_REGEXP) |
751 | 13.9k | { |
752 | 13.9k | int flags = RE_PARSER_FLAG_NONE; |
753 | 13.9k | if (compiler->strict_escape) |
754 | 0 | flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; |
755 | 13.9k | result = yr_re_parse(str->c_string, &re_ast, &re_error, flags); |
756 | 13.9k | } |
757 | 1.44k | else |
758 | 1.44k | result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error); |
759 | | |
760 | 15.8k | if (result != ERROR_SUCCESS) |
761 | 446 | { |
762 | 446 | if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) |
763 | 0 | { |
764 | 0 | yywarning( |
765 | 0 | yyscanner, |
766 | 0 | "unknown escape sequence"); |
767 | 0 | } |
768 | 446 | else |
769 | 446 | { |
770 | 446 | snprintf( |
771 | 446 | message, |
772 | 446 | sizeof(message), |
773 | 446 | "invalid %s \"%s\": %s", |
774 | 446 | (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string" |
775 | 446 | : "regular expression", |
776 | 446 | identifier, |
777 | 446 | re_error.message); |
778 | | |
779 | 446 | yr_compiler_set_error_extra_info(compiler, message); |
780 | 446 | goto _exit; |
781 | 446 | } |
782 | 446 | } |
783 | | |
784 | 15.4k | if (re_ast->flags & RE_FLAGS_FAST_REGEXP) |
785 | 165 | modifier.flags |= STRING_FLAGS_FAST_REGEXP; |
786 | | |
787 | 15.4k | if (re_ast->flags & RE_FLAGS_GREEDY) |
788 | 787 | modifier.flags |= STRING_FLAGS_GREEDY_REGEXP; |
789 | | |
790 | | // Regular expressions in the strings section can't mix greedy and |
791 | | // ungreedy quantifiers like .* and .*?. That's because these regular |
792 | | // expressions can be matched forwards and/or backwards depending on the |
793 | | // atom found, and we need the regexp to be all-greedy or all-ungreedy to |
794 | | // be able to properly calculate the length of the match. |
795 | | |
796 | 15.4k | if ((re_ast->flags & RE_FLAGS_GREEDY) && |
797 | 15.4k | (re_ast->flags & RE_FLAGS_UNGREEDY)) |
798 | 6 | { |
799 | 6 | result = ERROR_INVALID_REGULAR_EXPRESSION; |
800 | | |
801 | 6 | yr_compiler_set_error_extra_info( |
802 | 6 | compiler, |
803 | 6 | "greedy and ungreedy quantifiers can't be mixed in a regular " |
804 | 6 | "expression"); |
805 | | |
806 | 6 | goto _exit; |
807 | 6 | } |
808 | | |
809 | 15.4k | if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast)) |
810 | 592 | { |
811 | 592 | yywarning( |
812 | 592 | yyscanner, |
813 | 592 | "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} " |
814 | 592 | "with a reasonable value for N", |
815 | 592 | identifier); |
816 | 592 | } |
817 | | |
818 | 15.4k | if (compiler->re_ast_callback != NULL) |
819 | 0 | { |
820 | 0 | compiler->re_ast_callback( |
821 | 0 | current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data); |
822 | 0 | } |
823 | | |
824 | 15.4k | *string_ref = YR_ARENA_NULL_REF; |
825 | | |
826 | 34.5k | while (re_ast != NULL) |
827 | 19.2k | { |
828 | 19.2k | YR_ARENA_REF ref; |
829 | | |
830 | 19.2k | uint32_t prev_string_idx = compiler->current_string_idx - 1; |
831 | | |
832 | 19.2k | int32_t prev_min_gap = min_gap; |
833 | 19.2k | int32_t prev_max_gap = max_gap; |
834 | | |
835 | 19.2k | result = yr_re_ast_split_at_chaining_point( |
836 | 19.2k | re_ast, &remainder_re_ast, &min_gap, &max_gap); |
837 | | |
838 | 19.2k | if (result != ERROR_SUCCESS) |
839 | 0 | goto _exit; |
840 | | |
841 | 19.2k | result = _yr_parser_write_string( |
842 | 19.2k | identifier, |
843 | 19.2k | modifier, |
844 | 19.2k | compiler, |
845 | 19.2k | NULL, |
846 | 19.2k | re_ast, |
847 | 19.2k | &ref, |
848 | 19.2k | &atom_quality, |
849 | 19.2k | ¤t_rule->num_atoms); |
850 | | |
851 | 19.2k | if (result != ERROR_SUCCESS) |
852 | 74 | goto _exit; |
853 | | |
854 | 19.1k | if (atom_quality < min_atom_quality) |
855 | 13.7k | min_atom_quality = atom_quality; |
856 | | |
857 | 19.1k | if (YR_ARENA_IS_NULL_REF(*string_ref)) |
858 | 15.3k | { |
859 | | // This is the first string in the chain, the string reference |
860 | | // returned by this function must point to this string. |
861 | 15.3k | *string_ref = ref; |
862 | 15.3k | } |
863 | 3.79k | else |
864 | 3.79k | { |
865 | | // This is not the first string in the chain, set the appropriate |
866 | | // flags and fill the chained_to, chain_gap_min and chain_gap_max |
867 | | // fields. |
868 | 3.79k | YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr( |
869 | 3.79k | compiler->arena, |
870 | 3.79k | YR_STRINGS_TABLE, |
871 | 3.79k | prev_string_idx * sizeof(YR_STRING)); |
872 | | |
873 | 3.79k | YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr( |
874 | 3.79k | compiler->arena, &ref); |
875 | | |
876 | 3.79k | new_string->chained_to = prev_string; |
877 | 3.79k | new_string->chain_gap_min = prev_min_gap; |
878 | 3.79k | new_string->chain_gap_max = prev_max_gap; |
879 | | |
880 | | // A string chained to another one can't have a fixed offset, only the |
881 | | // head of the string chain can have a fixed offset. |
882 | 3.79k | new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
883 | | |
884 | | // There is a previous string, but that string wasn't marked as part |
885 | | // of a chain because we can't do that until knowing there will be |
886 | | // another string, let's flag it now the we know. |
887 | 3.79k | prev_string->flags |= STRING_FLAGS_CHAIN_PART; |
888 | | |
889 | | // There is a previous string, so this string is part of a chain, but |
890 | | // there will be no more strings because there are no more AST to |
891 | | // split, which means that this is the chain's tail. |
892 | 3.79k | if (remainder_re_ast == NULL) |
893 | 316 | new_string->flags |= STRING_FLAGS_CHAIN_PART | |
894 | 316 | STRING_FLAGS_CHAIN_TAIL; |
895 | 3.79k | } |
896 | | |
897 | 19.1k | yr_re_ast_destroy(re_ast); |
898 | 19.1k | re_ast = remainder_re_ast; |
899 | 19.1k | } |
900 | 15.4k | } |
901 | 948 | else // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or |
902 | | // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE |
903 | 948 | { |
904 | 948 | result = _yr_parser_write_string( |
905 | 948 | identifier, |
906 | 948 | modifier, |
907 | 948 | compiler, |
908 | 948 | str, |
909 | 948 | NULL, |
910 | 948 | string_ref, |
911 | 948 | &min_atom_quality, |
912 | 948 | ¤t_rule->num_atoms); |
913 | | |
914 | 948 | if (result != ERROR_SUCCESS) |
915 | 0 | goto _exit; |
916 | 948 | } |
917 | | |
918 | 16.3k | if (min_atom_quality < compiler->atoms_config.quality_warning_threshold) |
919 | 3.40k | { |
920 | 3.40k | yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier); |
921 | 3.40k | } |
922 | | |
923 | 16.8k | _exit: |
924 | | |
925 | 16.8k | if (re_ast != NULL) |
926 | 352 | yr_re_ast_destroy(re_ast); |
927 | | |
928 | 16.8k | if (remainder_re_ast != NULL) |
929 | 0 | yr_re_ast_destroy(remainder_re_ast); |
930 | | |
931 | 16.8k | return result; |
932 | 16.3k | } |
933 | | |
934 | | static int wildcard_iterator( |
935 | | void* prefix, |
936 | | size_t prefix_len, |
937 | | void* _value, |
938 | | void* data) |
939 | 1.19k | { |
940 | 1.19k | const char* identifier = (const char*) data; |
941 | | |
942 | | // If the identifier is prefixed by prefix, then it matches the wildcard. |
943 | 1.19k | if (!strncmp(prefix, identifier, prefix_len)) |
944 | 276 | return ERROR_IDENTIFIER_MATCHES_WILDCARD; |
945 | | |
946 | 914 | return ERROR_SUCCESS; |
947 | 1.19k | } |
948 | | |
949 | | int yr_parser_reduce_rule_declaration_phase_1( |
950 | | yyscan_t yyscanner, |
951 | | int32_t flags, |
952 | | const char* identifier, |
953 | | YR_ARENA_REF* rule_ref) |
954 | 18.0k | { |
955 | 18.0k | int result; |
956 | 18.0k | YR_FIXUP* fixup; |
957 | 18.0k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
958 | | |
959 | 18.0k | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
960 | 18.0k | compiler->arena, |
961 | 18.0k | YR_NAMESPACES_TABLE, |
962 | 18.0k | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
963 | | |
964 | 18.0k | if (yr_hash_table_lookup_uint32( |
965 | 18.0k | compiler->rules_table, identifier, ns->name) != UINT32_MAX || |
966 | 18.0k | yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL) |
967 | 9.76k | { |
968 | | // A rule or variable with the same identifier already exists, return the |
969 | | // appropriate error. |
970 | | |
971 | 9.76k | yr_compiler_set_error_extra_info(compiler, identifier); |
972 | 9.76k | return ERROR_DUPLICATED_IDENTIFIER; |
973 | 9.76k | } |
974 | | |
975 | | // Iterate over all identifiers in wildcard_identifiers_table, and check if |
976 | | // any of them are a prefix of the identifier being declared. If so, return |
977 | | // ERROR_IDENTIFIER_MATCHES_WILDCARD. |
978 | 8.31k | result = yr_hash_table_iterate( |
979 | 8.31k | compiler->wildcard_identifiers_table, |
980 | 8.31k | ns->name, |
981 | 8.31k | wildcard_iterator, |
982 | 8.31k | (void*) identifier); |
983 | | |
984 | 8.31k | if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD) |
985 | 276 | { |
986 | | // This rule matches an existing wildcard rule set. |
987 | 276 | yr_compiler_set_error_extra_info(compiler, identifier); |
988 | 276 | } |
989 | | |
990 | 8.31k | FAIL_ON_ERROR(result); |
991 | | |
992 | 8.04k | FAIL_ON_ERROR(yr_arena_allocate_struct( |
993 | 8.04k | compiler->arena, |
994 | 8.04k | YR_RULES_TABLE, |
995 | 8.04k | sizeof(YR_RULE), |
996 | 8.04k | rule_ref, |
997 | 8.04k | offsetof(YR_RULE, identifier), |
998 | 8.04k | offsetof(YR_RULE, tags), |
999 | 8.04k | offsetof(YR_RULE, strings), |
1000 | 8.04k | offsetof(YR_RULE, metas), |
1001 | 8.04k | offsetof(YR_RULE, ns), |
1002 | 8.04k | EOL)); |
1003 | | |
1004 | 8.04k | YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref); |
1005 | | |
1006 | 8.04k | YR_ARENA_REF ref; |
1007 | | |
1008 | 8.04k | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
1009 | | |
1010 | 8.04k | rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1011 | 8.04k | rule->flags = flags; |
1012 | 8.04k | rule->ns = ns; |
1013 | 8.04k | rule->num_atoms = 0; |
1014 | | |
1015 | 8.04k | YR_ARENA_REF jmp_offset_ref; |
1016 | | |
1017 | | // We are starting to parse a new rule, set current_rule_idx accordingly. |
1018 | 8.04k | compiler->current_rule_idx = compiler->next_rule_idx; |
1019 | 8.04k | compiler->next_rule_idx++; |
1020 | | |
1021 | | // The OP_INIT_RULE instruction behaves like a jump. When the rule is |
1022 | | // disabled it skips over the rule's code and go straight to the next rule's |
1023 | | // code. The jmp_offset_ref variable points to the jump's offset. The offset |
1024 | | // is set to 0 as we don't know the jump target yet. When we finish |
1025 | | // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2 |
1026 | | // the jump offset is set to its final value. |
1027 | | |
1028 | 8.04k | FAIL_ON_ERROR(yr_parser_emit_with_arg_int32( |
1029 | 8.04k | yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref)); |
1030 | | |
1031 | 8.04k | FAIL_ON_ERROR(yr_arena_write_data( |
1032 | 8.04k | compiler->arena, |
1033 | 8.04k | YR_CODE_SECTION, |
1034 | 8.04k | &compiler->current_rule_idx, |
1035 | 8.04k | sizeof(compiler->current_rule_idx), |
1036 | 8.04k | NULL)); |
1037 | | |
1038 | | // Create a fixup entry for the jump and push it in the stack |
1039 | 8.04k | fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP)); |
1040 | | |
1041 | 8.04k | if (fixup == NULL) |
1042 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1043 | | |
1044 | 8.04k | fixup->ref = jmp_offset_ref; |
1045 | 8.04k | fixup->next = compiler->fixup_stack_head; |
1046 | 8.04k | compiler->fixup_stack_head = fixup; |
1047 | | |
1048 | | // Clean strings_table as we are starting to parse a new rule. |
1049 | 8.04k | yr_hash_table_clean(compiler->strings_table, NULL); |
1050 | | |
1051 | 8.04k | FAIL_ON_ERROR(yr_hash_table_add_uint32( |
1052 | 8.04k | compiler->rules_table, identifier, ns->name, compiler->current_rule_idx)); |
1053 | | |
1054 | 8.04k | return ERROR_SUCCESS; |
1055 | 8.04k | } |
1056 | | |
1057 | | int yr_parser_reduce_rule_declaration_phase_2( |
1058 | | yyscan_t yyscanner, |
1059 | | YR_ARENA_REF* rule_ref) |
1060 | 171 | { |
1061 | 171 | uint32_t max_strings_per_rule; |
1062 | 171 | uint32_t strings_in_rule = 0; |
1063 | | |
1064 | 171 | YR_FIXUP* fixup; |
1065 | 171 | YR_STRING* string; |
1066 | 171 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1067 | | |
1068 | 171 | yr_get_configuration_uint32( |
1069 | 171 | YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule); |
1070 | | |
1071 | 171 | YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref); |
1072 | | |
1073 | | // Show warning if the rule is generating too many atoms. The warning is |
1074 | | // shown if the number of atoms is greater than 20 times the maximum number |
1075 | | // of strings allowed for a rule, as 20 is minimum number of atoms generated |
1076 | | // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously. |
1077 | | |
1078 | 171 | if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD) |
1079 | 37 | { |
1080 | 37 | yywarning(yyscanner, "rule is slowing down scanning"); |
1081 | 37 | } |
1082 | | |
1083 | 171 | yr_rule_strings_foreach(rule, string) |
1084 | 1.08k | { |
1085 | | // Only the heading fragment in a chain of strings (the one with |
1086 | | // chained_to == NULL) must be referenced. All other fragments |
1087 | | // are never marked as referenced. |
1088 | | // |
1089 | | // Any string identifier that starts with '_' can be unreferenced. Anonymous |
1090 | | // strings must always be referenced. |
1091 | | |
1092 | 1.08k | if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL && |
1093 | 1.08k | (STRING_IS_ANONYMOUS(string) || |
1094 | 28 | (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_'))) |
1095 | 16 | { |
1096 | 16 | yr_compiler_set_error_extra_info( |
1097 | 16 | compiler, string->identifier) return ERROR_UNREFERENCED_STRING; |
1098 | 16 | } |
1099 | | |
1100 | | // If a string is unreferenced we need to unset the FIXED_OFFSET flag so |
1101 | | // that it will match anywhere. |
1102 | 1.06k | if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL && |
1103 | 1.06k | STRING_IS_FIXED_OFFSET(string)) |
1104 | 0 | { |
1105 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1106 | 0 | } |
1107 | | |
1108 | 1.06k | strings_in_rule++; |
1109 | | |
1110 | 1.06k | if (strings_in_rule > max_strings_per_rule) |
1111 | 0 | { |
1112 | 0 | yr_compiler_set_error_extra_info( |
1113 | 0 | compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS; |
1114 | 0 | } |
1115 | 1.06k | } |
1116 | | |
1117 | 155 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
1118 | 155 | yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL)); |
1119 | | |
1120 | 155 | fixup = compiler->fixup_stack_head; |
1121 | | |
1122 | 155 | int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr( |
1123 | 155 | compiler->arena, &fixup->ref); |
1124 | | |
1125 | 155 | int32_t jmp_offset = yr_arena_get_current_offset( |
1126 | 155 | compiler->arena, YR_CODE_SECTION) - |
1127 | 155 | fixup->ref.offset + 1; |
1128 | | |
1129 | 155 | memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset)); |
1130 | | |
1131 | | // Remove fixup from the stack. |
1132 | 155 | compiler->fixup_stack_head = fixup->next; |
1133 | 155 | yr_free(fixup); |
1134 | | |
1135 | | // We have finished parsing the current rule set current_rule_idx to |
1136 | | // UINT32_MAX indicating that we are not currently parsing a rule. |
1137 | 155 | compiler->current_rule_idx = UINT32_MAX; |
1138 | | |
1139 | 155 | return ERROR_SUCCESS; |
1140 | 155 | } |
1141 | | |
1142 | | int yr_parser_reduce_string_identifier( |
1143 | | yyscan_t yyscanner, |
1144 | | const char* identifier, |
1145 | | uint8_t instruction, |
1146 | | uint64_t at_offset) |
1147 | 14.4k | { |
1148 | 14.4k | YR_STRING* string; |
1149 | 14.4k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1150 | | |
1151 | 14.4k | if (strcmp(identifier, "$") == 0) // is an anonymous string ? |
1152 | 14.0k | { |
1153 | 14.0k | if (compiler->loop_for_of_var_index >= 0) // inside a loop ? |
1154 | 14.0k | { |
1155 | 14.0k | yr_parser_emit_with_arg( |
1156 | 14.0k | yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL); |
1157 | | |
1158 | 14.0k | yr_parser_emit(yyscanner, instruction, NULL); |
1159 | | |
1160 | 14.0k | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
1161 | 14.0k | compiler, compiler->current_rule_idx); |
1162 | | |
1163 | 14.0k | yr_rule_strings_foreach(current_rule, string) |
1164 | 434k | { |
1165 | 434k | if (instruction != OP_FOUND) |
1166 | 434k | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1167 | | |
1168 | 434k | if (instruction == OP_FOUND_AT) |
1169 | 624 | { |
1170 | | // Avoid overwriting any previous fixed offset |
1171 | 624 | if (string->fixed_offset == YR_UNDEFINED) |
1172 | 356 | string->fixed_offset = at_offset; |
1173 | | |
1174 | | // If a previous fixed offset was different, disable |
1175 | | // the STRING_GFLAGS_FIXED_OFFSET flag because we only |
1176 | | // have room to store a single fixed offset value |
1177 | 624 | if (string->fixed_offset != at_offset) |
1178 | 268 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1179 | 624 | } |
1180 | 433k | else |
1181 | 433k | { |
1182 | 433k | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1183 | 433k | } |
1184 | 434k | } |
1185 | 14.0k | } |
1186 | 33 | else |
1187 | 33 | { |
1188 | | // Anonymous strings not allowed outside of a loop |
1189 | 33 | return ERROR_MISPLACED_ANONYMOUS_STRING; |
1190 | 33 | } |
1191 | 14.0k | } |
1192 | 373 | else |
1193 | 373 | { |
1194 | 373 | FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string)); |
1195 | | |
1196 | 343 | FAIL_ON_ERROR( |
1197 | 343 | yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL)); |
1198 | | |
1199 | 343 | if (instruction != OP_FOUND) |
1200 | 257 | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1201 | | |
1202 | 343 | if (instruction == OP_FOUND_AT) |
1203 | 83 | { |
1204 | | // Avoid overwriting any previous fixed offset |
1205 | | |
1206 | 83 | if (string->fixed_offset == YR_UNDEFINED) |
1207 | 14 | string->fixed_offset = at_offset; |
1208 | | |
1209 | | // If a previous fixed offset was different, disable |
1210 | | // the STRING_GFLAGS_FIXED_OFFSET flag because we only |
1211 | | // have room to store a single fixed offset value |
1212 | | |
1213 | 83 | if (string->fixed_offset == YR_UNDEFINED || |
1214 | 83 | string->fixed_offset != at_offset) |
1215 | 42 | { |
1216 | 42 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1217 | 42 | } |
1218 | 83 | } |
1219 | 260 | else |
1220 | 260 | { |
1221 | 260 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1222 | 260 | } |
1223 | | |
1224 | 343 | FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL)); |
1225 | | |
1226 | 343 | string->flags |= STRING_FLAGS_REFERENCED; |
1227 | 343 | } |
1228 | | |
1229 | 14.4k | return ERROR_SUCCESS; |
1230 | 14.4k | } |
1231 | | |
1232 | | int yr_parser_reduce_meta_declaration( |
1233 | | yyscan_t yyscanner, |
1234 | | int32_t type, |
1235 | | const char* identifier, |
1236 | | const char* string, |
1237 | | int64_t integer, |
1238 | | YR_ARENA_REF* meta_ref) |
1239 | 1.18k | { |
1240 | 1.18k | YR_ARENA_REF ref; |
1241 | 1.18k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1242 | | |
1243 | 1.18k | FAIL_ON_ERROR(yr_arena_allocate_struct( |
1244 | 1.18k | compiler->arena, |
1245 | 1.18k | YR_METAS_TABLE, |
1246 | 1.18k | sizeof(YR_META), |
1247 | 1.18k | meta_ref, |
1248 | 1.18k | offsetof(YR_META, identifier), |
1249 | 1.18k | offsetof(YR_META, string), |
1250 | 1.18k | EOL)); |
1251 | | |
1252 | 1.18k | YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref); |
1253 | | |
1254 | 1.18k | meta->type = type; |
1255 | 1.18k | meta->integer = integer; |
1256 | | |
1257 | 1.18k | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
1258 | | |
1259 | 1.18k | meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1260 | | |
1261 | 1.18k | if (string != NULL) |
1262 | 20 | { |
1263 | 20 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref)); |
1264 | | |
1265 | 20 | meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1266 | 20 | } |
1267 | 1.16k | else |
1268 | 1.16k | { |
1269 | 1.16k | meta->string = NULL; |
1270 | 1.16k | } |
1271 | | |
1272 | 1.18k | compiler->current_meta_idx++; |
1273 | | |
1274 | 1.18k | return ERROR_SUCCESS; |
1275 | 1.18k | } |
1276 | | |
1277 | | static int _yr_parser_valid_module_name(SIZED_STRING* module_name) |
1278 | 1.33k | { |
1279 | 1.33k | if (module_name->length == 0) |
1280 | 174 | return false; |
1281 | | |
1282 | 1.15k | if (strlen(module_name->c_string) != module_name->length) |
1283 | 45 | return false; |
1284 | | |
1285 | 1.11k | return true; |
1286 | 1.15k | } |
1287 | | |
1288 | | int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name) |
1289 | 1.33k | { |
1290 | 1.33k | int result; |
1291 | | |
1292 | 1.33k | YR_ARENA_REF ref; |
1293 | 1.33k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1294 | 1.33k | YR_OBJECT* module_structure; |
1295 | | |
1296 | 1.33k | if (!_yr_parser_valid_module_name(module_name)) |
1297 | 219 | { |
1298 | 219 | yr_compiler_set_error_extra_info(compiler, module_name->c_string); |
1299 | | |
1300 | 219 | return ERROR_INVALID_MODULE_NAME; |
1301 | 219 | } |
1302 | | |
1303 | 1.11k | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
1304 | 1.11k | compiler->arena, |
1305 | 1.11k | YR_NAMESPACES_TABLE, |
1306 | 1.11k | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
1307 | | |
1308 | 1.11k | module_structure = (YR_OBJECT*) yr_hash_table_lookup( |
1309 | 1.11k | compiler->objects_table, module_name->c_string, ns->name); |
1310 | | |
1311 | | // if module already imported, do nothing |
1312 | | |
1313 | 1.11k | if (module_structure != NULL) |
1314 | 900 | return ERROR_SUCCESS; |
1315 | | |
1316 | 212 | FAIL_ON_ERROR(yr_object_create( |
1317 | 212 | OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure)); |
1318 | | |
1319 | 212 | FAIL_ON_ERROR(yr_hash_table_add( |
1320 | 212 | compiler->objects_table, |
1321 | 212 | module_name->c_string, |
1322 | 212 | ns->name, |
1323 | 212 | module_structure)); |
1324 | | |
1325 | 212 | result = yr_modules_do_declarations(module_name->c_string, module_structure); |
1326 | | |
1327 | 212 | if (result == ERROR_UNKNOWN_MODULE) |
1328 | 105 | yr_compiler_set_error_extra_info(compiler, module_name->c_string); |
1329 | | |
1330 | 212 | if (result != ERROR_SUCCESS) |
1331 | 105 | return result; |
1332 | | |
1333 | 107 | FAIL_ON_ERROR( |
1334 | 107 | _yr_compiler_store_string(compiler, module_name->c_string, &ref)); |
1335 | | |
1336 | 107 | FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc( |
1337 | 107 | yyscanner, |
1338 | 107 | OP_IMPORT, |
1339 | 107 | yr_arena_ref_to_ptr(compiler->arena, &ref), |
1340 | 107 | NULL, |
1341 | 107 | NULL)); |
1342 | | |
1343 | 107 | return ERROR_SUCCESS; |
1344 | 107 | } |
1345 | | |
1346 | | static int _yr_parser_operator_to_opcode(const char* op, int expression_type) |
1347 | 13.5k | { |
1348 | 13.5k | int opcode = 0; |
1349 | | |
1350 | 13.5k | switch (expression_type) |
1351 | 13.5k | { |
1352 | 9.28k | case EXPRESSION_TYPE_INTEGER: |
1353 | 9.28k | opcode = OP_INT_BEGIN; |
1354 | 9.28k | break; |
1355 | 2.47k | case EXPRESSION_TYPE_FLOAT: |
1356 | 2.47k | opcode = OP_DBL_BEGIN; |
1357 | 2.47k | break; |
1358 | 1.83k | case EXPRESSION_TYPE_STRING: |
1359 | 1.83k | opcode = OP_STR_BEGIN; |
1360 | 1.83k | break; |
1361 | 0 | default: |
1362 | 0 | assert(false); |
1363 | 13.5k | } |
1364 | | |
1365 | 13.5k | if (op[0] == '<') |
1366 | 933 | { |
1367 | 933 | if (op[1] == '=') |
1368 | 176 | opcode += _OP_LE; |
1369 | 757 | else |
1370 | 757 | opcode += _OP_LT; |
1371 | 933 | } |
1372 | 12.6k | else if (op[0] == '>') |
1373 | 1.27k | { |
1374 | 1.27k | if (op[1] == '=') |
1375 | 190 | opcode += _OP_GE; |
1376 | 1.08k | else |
1377 | 1.08k | opcode += _OP_GT; |
1378 | 1.27k | } |
1379 | 11.3k | else if (op[1] == '=') |
1380 | 231 | { |
1381 | 231 | if (op[0] == '=') |
1382 | 24 | opcode += _OP_EQ; |
1383 | 207 | else |
1384 | 207 | opcode += _OP_NEQ; |
1385 | 231 | } |
1386 | 11.1k | else if (op[0] == '+') |
1387 | 2.73k | { |
1388 | 2.73k | opcode += _OP_ADD; |
1389 | 2.73k | } |
1390 | 8.41k | else if (op[0] == '-') |
1391 | 5.22k | { |
1392 | 5.22k | opcode += _OP_SUB; |
1393 | 5.22k | } |
1394 | 3.18k | else if (op[0] == '*') |
1395 | 2.17k | { |
1396 | 2.17k | opcode += _OP_MUL; |
1397 | 2.17k | } |
1398 | 1.00k | else if (op[0] == '\\') |
1399 | 1.00k | { |
1400 | 1.00k | opcode += _OP_DIV; |
1401 | 1.00k | } |
1402 | | |
1403 | 13.5k | if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode)) |
1404 | 13.5k | { |
1405 | 13.5k | return opcode; |
1406 | 13.5k | } |
1407 | | |
1408 | 3 | return OP_ERROR; |
1409 | 13.5k | } |
1410 | | |
1411 | | int yr_parser_reduce_operation( |
1412 | | yyscan_t yyscanner, |
1413 | | const char* op, |
1414 | | YR_EXPRESSION left_operand, |
1415 | | YR_EXPRESSION right_operand) |
1416 | 13.6k | { |
1417 | 13.6k | int expression_type; |
1418 | | |
1419 | 13.6k | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1420 | | |
1421 | 13.6k | if ((left_operand.type == EXPRESSION_TYPE_INTEGER || |
1422 | 13.6k | left_operand.type == EXPRESSION_TYPE_FLOAT) && |
1423 | 13.6k | (right_operand.type == EXPRESSION_TYPE_INTEGER || |
1424 | 11.7k | right_operand.type == EXPRESSION_TYPE_FLOAT)) |
1425 | 11.7k | { |
1426 | 11.7k | if (left_operand.type != right_operand.type) |
1427 | 1.96k | { |
1428 | | // One operand is double and the other is integer, |
1429 | | // cast the integer to double |
1430 | | |
1431 | 1.96k | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
1432 | 1.96k | yyscanner, |
1433 | 1.96k | OP_INT_TO_DBL, |
1434 | 1.96k | (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1, |
1435 | 1.96k | NULL, |
1436 | 1.96k | NULL)); |
1437 | 1.96k | } |
1438 | | |
1439 | 11.7k | expression_type = EXPRESSION_TYPE_FLOAT; |
1440 | | |
1441 | 11.7k | if (left_operand.type == EXPRESSION_TYPE_INTEGER && |
1442 | 11.7k | right_operand.type == EXPRESSION_TYPE_INTEGER) |
1443 | 9.28k | { |
1444 | 9.28k | expression_type = EXPRESSION_TYPE_INTEGER; |
1445 | 9.28k | } |
1446 | | |
1447 | 11.7k | FAIL_ON_ERROR(yr_parser_emit( |
1448 | 11.7k | yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL)); |
1449 | 11.7k | } |
1450 | 1.88k | else if ( |
1451 | 1.88k | left_operand.type == EXPRESSION_TYPE_STRING && |
1452 | 1.88k | right_operand.type == EXPRESSION_TYPE_STRING) |
1453 | 1.83k | { |
1454 | 1.83k | int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING); |
1455 | | |
1456 | 1.83k | if (opcode != OP_ERROR) |
1457 | 1.82k | { |
1458 | 1.82k | FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL)); |
1459 | 1.82k | } |
1460 | 3 | else |
1461 | 3 | { |
1462 | 3 | yr_compiler_set_error_extra_info_fmt( |
1463 | 3 | compiler, "strings don't support \"%s\" operation", op); |
1464 | | |
1465 | 3 | return ERROR_WRONG_TYPE; |
1466 | 3 | } |
1467 | 1.83k | } |
1468 | 57 | else |
1469 | 57 | { |
1470 | 57 | yr_compiler_set_error_extra_info(compiler, "type mismatch"); |
1471 | | |
1472 | 57 | return ERROR_WRONG_TYPE; |
1473 | 57 | } |
1474 | | |
1475 | 13.5k | return ERROR_SUCCESS; |
1476 | 13.6k | } |