/src/yara/libyara/parser.c
Line | Count | Source |
1 | | /* |
2 | | Copyright (c) 2013. The YARA Authors. All Rights Reserved. |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without modification, |
5 | | are permitted provided that the following conditions are met: |
6 | | |
7 | | 1. Redistributions of source code must retain the above copyright notice, this |
8 | | list of conditions and the following disclaimer. |
9 | | |
10 | | 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | this list of conditions and the following disclaimer in the documentation and/or |
12 | | other materials provided with the distribution. |
13 | | |
14 | | 3. Neither the name of the copyright holder nor the names of its contributors |
15 | | may be used to endorse or promote products derived from this software without |
16 | | specific prior written permission. |
17 | | |
18 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
19 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR |
22 | | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
23 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
24 | | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
25 | | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
27 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #include <limits.h> |
31 | | #include <stddef.h> |
32 | | #include <string.h> |
33 | | #include <yara/ahocorasick.h> |
34 | | #include <yara/arena.h> |
35 | | #include <yara/base64.h> |
36 | | #include <yara/error.h> |
37 | | #include <yara/exec.h> |
38 | | #include <yara/integers.h> |
39 | | #include <yara/mem.h> |
40 | | #include <yara/modules.h> |
41 | | #include <yara/object.h> |
42 | | #include <yara/parser.h> |
43 | | #include <yara/re.h> |
44 | | #include <yara/strutils.h> |
45 | | #include <yara/utils.h> |
46 | | #include "yara/compiler.h" |
47 | | #include "yara/types.h" |
48 | | |
49 | | #define todigit(x) \ |
50 | | ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \ |
51 | | : ((uint8_t) (x - '0')) |
52 | | |
53 | | int yr_parser_emit( |
54 | | yyscan_t yyscanner, |
55 | | uint8_t instruction, |
56 | | YR_ARENA_REF* instruction_ref) |
57 | 0 | { |
58 | 0 | return yr_arena_write_data( |
59 | 0 | yyget_extra(yyscanner)->arena, |
60 | 0 | YR_CODE_SECTION, |
61 | 0 | &instruction, |
62 | 0 | sizeof(uint8_t), |
63 | 0 | instruction_ref); |
64 | 0 | } |
65 | | |
66 | | int yr_parser_emit_with_arg_double( |
67 | | yyscan_t yyscanner, |
68 | | uint8_t instruction, |
69 | | double argument, |
70 | | YR_ARENA_REF* instruction_ref, |
71 | | YR_ARENA_REF* argument_ref) |
72 | 0 | { |
73 | 0 | int result = yr_arena_write_data( |
74 | 0 | yyget_extra(yyscanner)->arena, |
75 | 0 | YR_CODE_SECTION, |
76 | 0 | &instruction, |
77 | 0 | sizeof(uint8_t), |
78 | 0 | instruction_ref); |
79 | |
|
80 | 0 | if (result == ERROR_SUCCESS) |
81 | 0 | result = yr_arena_write_data( |
82 | 0 | yyget_extra(yyscanner)->arena, |
83 | 0 | YR_CODE_SECTION, |
84 | 0 | &argument, |
85 | 0 | sizeof(double), |
86 | 0 | argument_ref); |
87 | |
|
88 | 0 | return result; |
89 | 0 | } |
90 | | |
91 | | int yr_parser_emit_with_arg_int32( |
92 | | yyscan_t yyscanner, |
93 | | uint8_t instruction, |
94 | | int32_t argument, |
95 | | YR_ARENA_REF* instruction_ref, |
96 | | YR_ARENA_REF* argument_ref) |
97 | 0 | { |
98 | 0 | int result = yr_arena_write_data( |
99 | 0 | yyget_extra(yyscanner)->arena, |
100 | 0 | YR_CODE_SECTION, |
101 | 0 | &instruction, |
102 | 0 | sizeof(uint8_t), |
103 | 0 | instruction_ref); |
104 | |
|
105 | 0 | if (result == ERROR_SUCCESS) |
106 | 0 | result = yr_arena_write_data( |
107 | 0 | yyget_extra(yyscanner)->arena, |
108 | 0 | YR_CODE_SECTION, |
109 | 0 | &argument, |
110 | 0 | sizeof(int32_t), |
111 | 0 | argument_ref); |
112 | |
|
113 | 0 | return result; |
114 | 0 | } |
115 | | |
116 | | int yr_parser_emit_with_arg( |
117 | | yyscan_t yyscanner, |
118 | | uint8_t instruction, |
119 | | int64_t argument, |
120 | | YR_ARENA_REF* instruction_ref, |
121 | | YR_ARENA_REF* argument_ref) |
122 | 0 | { |
123 | 0 | int result = yr_arena_write_data( |
124 | 0 | yyget_extra(yyscanner)->arena, |
125 | 0 | YR_CODE_SECTION, |
126 | 0 | &instruction, |
127 | 0 | sizeof(uint8_t), |
128 | 0 | instruction_ref); |
129 | |
|
130 | 0 | if (result == ERROR_SUCCESS) |
131 | 0 | result = yr_arena_write_data( |
132 | 0 | yyget_extra(yyscanner)->arena, |
133 | 0 | YR_CODE_SECTION, |
134 | 0 | &argument, |
135 | 0 | sizeof(int64_t), |
136 | 0 | argument_ref); |
137 | |
|
138 | 0 | return result; |
139 | 0 | } |
140 | | |
141 | | int yr_parser_emit_with_arg_reloc( |
142 | | yyscan_t yyscanner, |
143 | | uint8_t instruction, |
144 | | void* argument, |
145 | | YR_ARENA_REF* instruction_ref, |
146 | | YR_ARENA_REF* argument_ref) |
147 | 1 | { |
148 | 1 | YR_ARENA_REF ref = YR_ARENA_NULL_REF; |
149 | | |
150 | 1 | DECLARE_REFERENCE(void*, ptr) arg; |
151 | | |
152 | 1 | memset(&arg, 0, sizeof(arg)); |
153 | 1 | arg.ptr = argument; |
154 | | |
155 | 1 | int result = yr_arena_write_data( |
156 | 1 | yyget_extra(yyscanner)->arena, |
157 | 1 | YR_CODE_SECTION, |
158 | 1 | &instruction, |
159 | 1 | sizeof(uint8_t), |
160 | 1 | instruction_ref); |
161 | | |
162 | 1 | if (result == ERROR_SUCCESS) |
163 | 1 | result = yr_arena_write_data( |
164 | 1 | yyget_extra(yyscanner)->arena, |
165 | 1 | YR_CODE_SECTION, |
166 | 1 | &arg, |
167 | 1 | sizeof(arg), |
168 | 1 | &ref); |
169 | | |
170 | 1 | if (result == ERROR_SUCCESS) |
171 | 1 | result = yr_arena_make_ptr_relocatable( |
172 | 1 | yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL); |
173 | | |
174 | 1 | if (argument_ref != NULL) |
175 | 0 | *argument_ref = ref; |
176 | | |
177 | 1 | return result; |
178 | 1 | } |
179 | | |
180 | | int yr_parser_emit_pushes_for_strings( |
181 | | yyscan_t yyscanner, |
182 | | const char* identifier, |
183 | | YR_STRING_SET* strings) |
184 | 0 | { |
185 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
186 | |
|
187 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
188 | 0 | compiler, compiler->current_rule_idx); |
189 | |
|
190 | 0 | YR_STRING* string; |
191 | |
|
192 | 0 | const char* string_identifier; |
193 | 0 | const char* target_identifier; |
194 | |
|
195 | 0 | strings->count = 0; |
196 | 0 | strings->head = NULL; |
197 | 0 | YR_STRING_SET_ELEMENT** tail_ptr = &strings->head; |
198 | |
|
199 | 0 | yr_rule_strings_foreach(current_rule, string) |
200 | 0 | { |
201 | | // Don't generate pushes for strings chained to another one, we are |
202 | | // only interested in non-chained strings or the head of the chain. |
203 | |
|
204 | 0 | if (string->chained_to == NULL) |
205 | 0 | { |
206 | 0 | string_identifier = string->identifier; |
207 | 0 | target_identifier = identifier; |
208 | |
|
209 | 0 | while (*target_identifier != '\0' && *string_identifier != '\0' && |
210 | 0 | *target_identifier == *string_identifier) |
211 | 0 | { |
212 | 0 | target_identifier++; |
213 | 0 | string_identifier++; |
214 | 0 | } |
215 | |
|
216 | 0 | if ((*target_identifier == '\0' && *string_identifier == '\0') || |
217 | 0 | *target_identifier == '*') |
218 | 0 | { |
219 | 0 | yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL); |
220 | |
|
221 | 0 | string->flags |= STRING_FLAGS_REFERENCED; |
222 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
223 | 0 | strings->count++; |
224 | |
|
225 | 0 | *tail_ptr = yr_malloc(sizeof(YR_STRING_SET_ELEMENT)); |
226 | 0 | yr_arena_ptr_to_ref(compiler->arena, string, &((*tail_ptr)->element)); |
227 | 0 | (*tail_ptr)->next = NULL; |
228 | 0 | tail_ptr = &(*tail_ptr)->next; |
229 | 0 | } |
230 | 0 | } |
231 | 0 | } |
232 | |
|
233 | 0 | if (strings->count == 0) |
234 | 0 | { |
235 | 0 | yr_compiler_set_error_extra_info( |
236 | 0 | compiler, identifier) return ERROR_UNDEFINED_STRING; |
237 | 0 | } |
238 | | |
239 | 0 | return ERROR_SUCCESS; |
240 | 0 | } |
241 | | |
242 | | // Emit OP_PUSH_RULE instructions for all rules whose identifier has given |
243 | | // prefix. |
244 | | int yr_parser_emit_pushes_for_rules( |
245 | | yyscan_t yyscanner, |
246 | | const char* prefix, |
247 | | int* count) |
248 | 0 | { |
249 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
250 | | |
251 | | // Make sure the compiler is parsing a rule |
252 | 0 | assert(compiler->current_rule_idx != UINT32_MAX); |
253 | |
|
254 | 0 | YR_RULE* rule; |
255 | 0 | int matching = 0; |
256 | |
|
257 | 0 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
258 | 0 | compiler->arena, |
259 | 0 | YR_NAMESPACES_TABLE, |
260 | 0 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
261 | | |
262 | | // Can't use yr_rules_foreach here as that requires the rules to have been |
263 | | // finalized (inserting a NULL rule at the end). This is done when |
264 | | // yr_compiler_get_rules() is called, which also inserts a HALT instruction |
265 | | // into the current position in the code arena. Obviously we aren't done |
266 | | // compiling the rules yet so inserting a HALT is a bad idea. To deal with |
267 | | // this I'm manually walking all the currently compiled rules (up to the |
268 | | // current rule index) and comparing identifiers to see if it is one we should |
269 | | // use. |
270 | | // |
271 | | // Further, we have to get compiler->current_rule_idx before we start because |
272 | | // if we emit an OP_PUSH_RULE |
273 | 0 | rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0); |
274 | |
|
275 | 0 | for (uint32_t i = 0; i <= compiler->current_rule_idx; i++) |
276 | 0 | { |
277 | | // Is rule->identifier prefixed by prefix? |
278 | 0 | if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0) |
279 | 0 | { |
280 | 0 | uint32_t rule_idx = yr_hash_table_lookup_uint32( |
281 | 0 | compiler->rules_table, rule->identifier, ns->name); |
282 | |
|
283 | 0 | if (rule_idx != UINT32_MAX) |
284 | 0 | { |
285 | 0 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
286 | 0 | yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL)); |
287 | 0 | matching++; |
288 | 0 | } |
289 | 0 | } |
290 | | |
291 | 0 | rule++; |
292 | 0 | } |
293 | | |
294 | 0 | if (count != NULL) |
295 | 0 | { |
296 | 0 | *count = matching; |
297 | 0 | } |
298 | |
|
299 | 0 | if (matching == 0) |
300 | 0 | { |
301 | 0 | yr_compiler_set_error_extra_info(compiler, prefix); |
302 | 0 | return ERROR_UNDEFINED_IDENTIFIER; |
303 | 0 | } |
304 | | |
305 | 0 | return ERROR_SUCCESS; |
306 | 0 | } |
307 | | |
308 | | int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument) |
309 | 0 | { |
310 | 0 | uint8_t opcode[9]; |
311 | 0 | int opcode_len = 1; |
312 | |
|
313 | 0 | if (argument == YR_UNDEFINED) |
314 | 0 | { |
315 | 0 | opcode[0] = OP_PUSH_U; |
316 | 0 | } |
317 | 0 | else if (argument <= 0xff) |
318 | 0 | { |
319 | 0 | opcode[0] = OP_PUSH_8; |
320 | 0 | opcode[1] = (uint8_t) argument; |
321 | 0 | opcode_len += sizeof(uint8_t); |
322 | 0 | } |
323 | 0 | else if (argument <= 0xffff) |
324 | 0 | { |
325 | 0 | opcode[0] = OP_PUSH_16; |
326 | 0 | uint16_t u = (uint16_t) argument; |
327 | 0 | memcpy(opcode + 1, &u, sizeof(uint16_t)); |
328 | 0 | opcode_len += sizeof(uint16_t); |
329 | 0 | } |
330 | 0 | else if (argument <= 0xffffffff) |
331 | 0 | { |
332 | 0 | opcode[0] = OP_PUSH_32; |
333 | 0 | uint32_t u = (uint32_t) argument; |
334 | 0 | memcpy(opcode + 1, &u, sizeof(uint32_t)); |
335 | 0 | opcode_len += sizeof(uint32_t); |
336 | 0 | } |
337 | 0 | else |
338 | 0 | { |
339 | 0 | opcode[0] = OP_PUSH; |
340 | 0 | memcpy(opcode + 1, &argument, sizeof(uint64_t)); |
341 | 0 | opcode_len += sizeof(uint64_t); |
342 | 0 | } |
343 | |
|
344 | 0 | return yr_arena_write_data( |
345 | 0 | yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL); |
346 | 0 | } |
347 | | |
348 | | int yr_parser_check_types( |
349 | | YR_COMPILER* compiler, |
350 | | YR_OBJECT_FUNCTION* function, |
351 | | const char* actual_args_fmt) |
352 | 0 | { |
353 | 0 | int i; |
354 | |
|
355 | 0 | for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) |
356 | 0 | { |
357 | 0 | if (function->prototypes[i].arguments_fmt == NULL) |
358 | 0 | break; |
359 | | |
360 | 0 | if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0) |
361 | 0 | return ERROR_SUCCESS; |
362 | 0 | } |
363 | | |
364 | 0 | yr_compiler_set_error_extra_info(compiler, function->identifier) |
365 | |
|
366 | 0 | return ERROR_WRONG_ARGUMENTS; |
367 | 0 | } |
368 | | |
369 | | int yr_parser_lookup_string( |
370 | | yyscan_t yyscanner, |
371 | | const char* identifier, |
372 | | YR_STRING** string) |
373 | 0 | { |
374 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
375 | |
|
376 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
377 | 0 | compiler, compiler->current_rule_idx); |
378 | |
|
379 | 0 | yr_rule_strings_foreach(current_rule, *string) |
380 | 0 | { |
381 | | // If some string $a gets fragmented into multiple chained |
382 | | // strings, all those fragments have the same $a identifier |
383 | | // but we are interested in the heading fragment, which is |
384 | | // that with chained_to == NULL |
385 | |
|
386 | 0 | if ((*string)->chained_to == NULL && |
387 | 0 | strcmp((*string)->identifier, identifier) == 0) |
388 | 0 | { |
389 | 0 | return ERROR_SUCCESS; |
390 | 0 | } |
391 | 0 | } |
392 | | |
393 | 0 | yr_compiler_set_error_extra_info(compiler, identifier) |
394 | |
|
395 | 0 | * string = NULL; |
396 | |
|
397 | 0 | return ERROR_UNDEFINED_STRING; |
398 | 0 | } |
399 | | |
400 | | //////////////////////////////////////////////////////////////////////////////// |
401 | | // Searches for a variable with the given identifier in the scope of the current |
402 | | // "for" loop. In case of nested "for" loops the identifier is searched starting |
403 | | // at the top-level loop and going down thorough the nested loops until the |
404 | | // current one. This is ok because inner loops can not re-define an identifier |
405 | | // already defined by an outer loop. |
406 | | // |
407 | | // If the variable is found, the return value is the position that the variable |
408 | | // occupies among all the currently defined variables. If the variable doesn't |
409 | | // exist the return value is -1. |
410 | | // |
411 | | // The function can receive a pointer to a YR_EXPRESSION that will populated |
412 | | // with information about the variable if found. This pointer can be NULL if |
413 | | // the caller is not interested in getting that information. |
414 | | // |
415 | | int yr_parser_lookup_loop_variable( |
416 | | yyscan_t yyscanner, |
417 | | const char* identifier, |
418 | | YR_EXPRESSION* expr) |
419 | 0 | { |
420 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
421 | 0 | int i, j; |
422 | 0 | int var_offset = 0; |
423 | |
|
424 | 0 | for (i = 0; i <= compiler->loop_index; i++) |
425 | 0 | { |
426 | 0 | var_offset += compiler->loop[i].vars_internal_count; |
427 | |
|
428 | 0 | for (j = 0; j < compiler->loop[i].vars_count; j++) |
429 | 0 | { |
430 | 0 | if (compiler->loop[i].vars[j].identifier.ptr != NULL && |
431 | 0 | strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0) |
432 | 0 | { |
433 | 0 | if (expr != NULL) |
434 | 0 | *expr = compiler->loop[i].vars[j]; |
435 | |
|
436 | 0 | return var_offset + j; |
437 | 0 | } |
438 | 0 | } |
439 | | |
440 | 0 | var_offset += compiler->loop[i].vars_count; |
441 | 0 | } |
442 | | |
443 | 0 | return -1; |
444 | 0 | } |
445 | | |
446 | | static int _yr_parser_write_string( |
447 | | const char* identifier, |
448 | | YR_MODIFIER modifier, |
449 | | YR_COMPILER* compiler, |
450 | | SIZED_STRING* str, |
451 | | RE_AST* re_ast, |
452 | | YR_ARENA_REF* string_ref, |
453 | | int* min_atom_quality, |
454 | | int* num_atom) |
455 | 0 | { |
456 | 0 | SIZED_STRING* literal_string; |
457 | 0 | YR_ATOM_LIST_ITEM* atom; |
458 | 0 | YR_ATOM_LIST_ITEM* atom_list = NULL; |
459 | |
|
460 | 0 | int c, result; |
461 | 0 | int max_string_len; |
462 | 0 | bool free_literal = false; |
463 | |
|
464 | 0 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
465 | 0 | compiler->arena, |
466 | 0 | YR_STRINGS_TABLE, |
467 | 0 | sizeof(YR_STRING), |
468 | 0 | string_ref, |
469 | 0 | offsetof(YR_STRING, identifier), |
470 | 0 | offsetof(YR_STRING, string), |
471 | 0 | offsetof(YR_STRING, chained_to), |
472 | 0 | EOL)); |
473 | |
|
474 | 0 | YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr( |
475 | 0 | compiler->arena, string_ref); |
476 | |
|
477 | 0 | YR_ARENA_REF ref; |
478 | |
|
479 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
480 | |
|
481 | 0 | string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
482 | 0 | string->rule_idx = compiler->current_rule_idx; |
483 | 0 | string->idx = compiler->current_string_idx; |
484 | 0 | string->fixed_offset = YR_UNDEFINED; |
485 | |
|
486 | 0 | compiler->current_string_idx++; |
487 | |
|
488 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL || |
489 | 0 | modifier.flags & STRING_FLAGS_REGEXP || |
490 | 0 | modifier.flags & STRING_FLAGS_BASE64 || |
491 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE) |
492 | 0 | { |
493 | 0 | literal_string = yr_re_ast_extract_literal(re_ast); |
494 | |
|
495 | 0 | if (literal_string != NULL) |
496 | 0 | free_literal = true; |
497 | 0 | } |
498 | 0 | else |
499 | 0 | { |
500 | 0 | literal_string = str; |
501 | 0 | } |
502 | |
|
503 | 0 | if (literal_string != NULL) |
504 | 0 | { |
505 | 0 | modifier.flags |= STRING_FLAGS_LITERAL; |
506 | |
|
507 | 0 | result = _yr_compiler_store_data( |
508 | 0 | compiler, |
509 | 0 | literal_string->c_string, |
510 | 0 | literal_string->length + 1, // +1 to include terminating NULL |
511 | 0 | &ref); |
512 | |
|
513 | 0 | if (result != ERROR_SUCCESS) |
514 | 0 | goto cleanup; |
515 | | |
516 | 0 | string->length = (uint32_t) literal_string->length; |
517 | 0 | string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
518 | |
|
519 | 0 | if (modifier.flags & STRING_FLAGS_WIDE) |
520 | 0 | max_string_len = string->length * 2; |
521 | 0 | else |
522 | 0 | max_string_len = string->length; |
523 | |
|
524 | 0 | if (max_string_len <= YR_MAX_ATOM_LENGTH) |
525 | 0 | modifier.flags |= STRING_FLAGS_FITS_IN_ATOM; |
526 | |
|
527 | 0 | result = yr_atoms_extract_from_string( |
528 | 0 | &compiler->atoms_config, |
529 | 0 | (uint8_t*) literal_string->c_string, |
530 | 0 | (int32_t) literal_string->length, |
531 | 0 | modifier, |
532 | 0 | &atom_list, |
533 | 0 | min_atom_quality); |
534 | |
|
535 | 0 | if (result != ERROR_SUCCESS) |
536 | 0 | goto cleanup; |
537 | 0 | } |
538 | 0 | else |
539 | 0 | { |
540 | | // Non-literal strings can't be marked as fixed offset because once we |
541 | | // find a string atom in the scanned data we don't know the offset where |
542 | | // the string should start, as the non-literal strings can contain |
543 | | // variable-length portions. |
544 | 0 | modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET; |
545 | | |
546 | | // Save the position where the RE forward code starts for later reference. |
547 | 0 | yr_arena_off_t forward_code_start = yr_arena_get_current_offset( |
548 | 0 | compiler->arena, YR_RE_CODE_SECTION); |
549 | | |
550 | | // Emit forwards code |
551 | 0 | result = yr_re_ast_emit_code(re_ast, compiler->arena, false); |
552 | |
|
553 | 0 | if (result != ERROR_SUCCESS) |
554 | 0 | goto cleanup; |
555 | | |
556 | | // Emit backwards code |
557 | 0 | result = yr_re_ast_emit_code(re_ast, compiler->arena, true); |
558 | |
|
559 | 0 | if (result != ERROR_SUCCESS) |
560 | 0 | goto cleanup; |
561 | | |
562 | | // Extract atoms from the regular expression. |
563 | 0 | result = yr_atoms_extract_from_re( |
564 | 0 | &compiler->atoms_config, |
565 | 0 | re_ast, |
566 | 0 | modifier, |
567 | 0 | &atom_list, |
568 | 0 | min_atom_quality); |
569 | |
|
570 | 0 | if (result != ERROR_SUCCESS) |
571 | 0 | goto cleanup; |
572 | | |
573 | | // If no atom was extracted let's add a zero-length atom. |
574 | 0 | if (atom_list == NULL) |
575 | 0 | { |
576 | 0 | atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); |
577 | |
|
578 | 0 | if (atom_list == NULL) |
579 | 0 | { |
580 | 0 | result = ERROR_INSUFFICIENT_MEMORY; |
581 | 0 | goto cleanup; |
582 | 0 | } |
583 | | |
584 | 0 | atom_list->atom.length = 0; |
585 | 0 | atom_list->backtrack = 0; |
586 | 0 | atom_list->backward_code_ref = YR_ARENA_NULL_REF; |
587 | 0 | atom_list->next = NULL; |
588 | |
|
589 | 0 | yr_arena_ptr_to_ref( |
590 | 0 | compiler->arena, |
591 | 0 | yr_arena_get_ptr( |
592 | 0 | compiler->arena, YR_RE_CODE_SECTION, forward_code_start), |
593 | 0 | &(atom_list->forward_code_ref)); |
594 | 0 | } |
595 | 0 | } |
596 | | |
597 | 0 | string->flags = modifier.flags; |
598 | | |
599 | | // Add the string to Aho-Corasick automaton. |
600 | 0 | result = yr_ac_add_string( |
601 | 0 | compiler->automaton, string, string->idx, atom_list, compiler->arena); |
602 | |
|
603 | 0 | if (result != ERROR_SUCCESS) |
604 | 0 | goto cleanup; |
605 | | |
606 | 0 | atom = atom_list; |
607 | 0 | c = 0; |
608 | |
|
609 | 0 | while (atom != NULL) |
610 | 0 | { |
611 | 0 | atom = atom->next; |
612 | 0 | c++; |
613 | 0 | } |
614 | |
|
615 | 0 | (*num_atom) += c; |
616 | |
|
617 | 0 | cleanup: |
618 | 0 | if (free_literal) |
619 | 0 | yr_free(literal_string); |
620 | |
|
621 | 0 | if (atom_list != NULL) |
622 | 0 | yr_atoms_list_destroy(atom_list); |
623 | |
|
624 | 0 | return result; |
625 | 0 | } |
626 | | |
627 | | static int _yr_parser_check_string_modifiers( |
628 | | yyscan_t yyscanner, |
629 | | YR_MODIFIER modifier) |
630 | 0 | { |
631 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
632 | | |
633 | | // xor and nocase together is not implemented. |
634 | 0 | if (modifier.flags & STRING_FLAGS_XOR && |
635 | 0 | modifier.flags & STRING_FLAGS_NO_CASE) |
636 | 0 | { |
637 | 0 | yr_compiler_set_error_extra_info( |
638 | 0 | compiler, "invalid modifier combination: xor nocase"); |
639 | 0 | return ERROR_INVALID_MODIFIER; |
640 | 0 | } |
641 | | |
642 | | // base64 and nocase together is not implemented. |
643 | 0 | if (modifier.flags & STRING_FLAGS_NO_CASE && |
644 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
645 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
646 | 0 | { |
647 | 0 | yr_compiler_set_error_extra_info( |
648 | 0 | compiler, |
649 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
650 | 0 | ? "invalid modifier combination: base64 nocase" |
651 | 0 | : "invalid modifier combination: base64wide nocase"); |
652 | 0 | return ERROR_INVALID_MODIFIER; |
653 | 0 | } |
654 | | |
655 | | // base64 and fullword together is not implemented. |
656 | 0 | if (modifier.flags & STRING_FLAGS_FULL_WORD && |
657 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
658 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
659 | 0 | { |
660 | 0 | yr_compiler_set_error_extra_info( |
661 | 0 | compiler, |
662 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
663 | 0 | ? "invalid modifier combination: base64 fullword" |
664 | 0 | : "invalid modifier combination: base64wide fullword"); |
665 | 0 | return ERROR_INVALID_MODIFIER; |
666 | 0 | } |
667 | | |
668 | | // base64 and xor together is not implemented. |
669 | 0 | if (modifier.flags & STRING_FLAGS_XOR && |
670 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
671 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
672 | 0 | { |
673 | 0 | yr_compiler_set_error_extra_info( |
674 | 0 | compiler, |
675 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
676 | 0 | ? "invalid modifier combination: base64 xor" |
677 | 0 | : "invalid modifier combination: base64wide xor"); |
678 | 0 | return ERROR_INVALID_MODIFIER; |
679 | 0 | } |
680 | | |
681 | 0 | return ERROR_SUCCESS; |
682 | 0 | } |
683 | | |
684 | | int yr_parser_reduce_string_declaration( |
685 | | yyscan_t yyscanner, |
686 | | YR_MODIFIER modifier, |
687 | | const char* identifier, |
688 | | SIZED_STRING* str, |
689 | | YR_ARENA_REF* string_ref) |
690 | 0 | { |
691 | 0 | int result = ERROR_SUCCESS; |
692 | 0 | int min_atom_quality = YR_MAX_ATOM_QUALITY; |
693 | 0 | int atom_quality; |
694 | |
|
695 | 0 | char message[512]; |
696 | |
|
697 | 0 | int32_t min_gap = 0; |
698 | 0 | int32_t max_gap = 0; |
699 | |
|
700 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
701 | |
|
702 | 0 | RE_AST* re_ast = NULL; |
703 | 0 | RE_AST* remainder_re_ast = NULL; |
704 | 0 | RE_ERROR re_error; |
705 | |
|
706 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
707 | 0 | compiler, compiler->current_rule_idx); |
708 | | |
709 | | // Determine if a string with the same identifier was already defined |
710 | | // by searching for the identifier in strings_table. |
711 | 0 | uint32_t string_idx = yr_hash_table_lookup_uint32( |
712 | 0 | compiler->strings_table, identifier, NULL); |
713 | | |
714 | | // The string was already defined, return an error. |
715 | 0 | if (string_idx != UINT32_MAX) |
716 | 0 | { |
717 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
718 | 0 | return ERROR_DUPLICATED_STRING_IDENTIFIER; |
719 | 0 | } |
720 | | |
721 | | // Empty strings are not allowed. |
722 | 0 | if (str->length == 0) |
723 | 0 | { |
724 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
725 | 0 | return ERROR_EMPTY_STRING; |
726 | 0 | } |
727 | | |
728 | 0 | if (str->flags & SIZED_STRING_FLAGS_NO_CASE) |
729 | 0 | modifier.flags |= STRING_FLAGS_NO_CASE; |
730 | |
|
731 | 0 | if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) |
732 | 0 | modifier.flags |= STRING_FLAGS_DOT_ALL; |
733 | | |
734 | | // Hex strings are always handled as DOT_ALL regexps. |
735 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL) |
736 | 0 | modifier.flags |= STRING_FLAGS_DOT_ALL; |
737 | |
|
738 | 0 | if (!(modifier.flags & STRING_FLAGS_WIDE) && |
739 | 0 | !(modifier.flags & STRING_FLAGS_BASE64 || |
740 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
741 | 0 | { |
742 | 0 | modifier.flags |= STRING_FLAGS_ASCII; |
743 | 0 | } |
744 | | |
745 | | // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding |
746 | | // a single match for the string is enough. This is true in |
747 | | // most cases, except when the string count (#) and string offset (@) |
748 | | // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH |
749 | | // initially, and unmarked later if required. |
750 | 0 | modifier.flags |= STRING_FLAGS_SINGLE_MATCH; |
751 | | |
752 | | // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't |
753 | | // need to be searched all over the file because the user is using the |
754 | | // "at" operator. The string must be searched at a fixed offset in the |
755 | | // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially, |
756 | | // and unmarked later if required. |
757 | 0 | modifier.flags |= STRING_FLAGS_FIXED_OFFSET; |
758 | | |
759 | | // If string identifier is $ this is an anonymous string, if not add the |
760 | | // identifier to strings_table. |
761 | 0 | if (strcmp(identifier, "$") == 0) |
762 | 0 | { |
763 | 0 | modifier.flags |= STRING_FLAGS_ANONYMOUS; |
764 | 0 | } |
765 | 0 | else |
766 | 0 | { |
767 | 0 | FAIL_ON_ERROR(yr_hash_table_add_uint32( |
768 | 0 | compiler->strings_table, |
769 | 0 | identifier, |
770 | 0 | NULL, |
771 | 0 | compiler->current_string_idx)); |
772 | 0 | } |
773 | | |
774 | | // Make sure that the the string does not have an invalid combination of |
775 | | // modifiers. |
776 | 0 | FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier)); |
777 | |
|
778 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL || |
779 | 0 | modifier.flags & STRING_FLAGS_REGEXP || |
780 | 0 | modifier.flags & STRING_FLAGS_BASE64 || |
781 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE) |
782 | 0 | { |
783 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL) |
784 | 0 | result = yr_re_parse_hex(str->c_string, &re_ast, &re_error); |
785 | 0 | else if (modifier.flags & STRING_FLAGS_REGEXP) |
786 | 0 | { |
787 | 0 | int flags = RE_PARSER_FLAG_NONE; |
788 | 0 | if (compiler->strict_escape) |
789 | 0 | flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; |
790 | 0 | result = yr_re_parse(str->c_string, &re_ast, &re_error, flags); |
791 | 0 | } |
792 | 0 | else |
793 | 0 | result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error); |
794 | |
|
795 | 0 | if (result != ERROR_SUCCESS) |
796 | 0 | { |
797 | 0 | if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) |
798 | 0 | { |
799 | 0 | yywarning(yyscanner, "unknown escape sequence"); |
800 | 0 | } |
801 | 0 | else |
802 | 0 | { |
803 | 0 | snprintf( |
804 | 0 | message, |
805 | 0 | sizeof(message), |
806 | 0 | "invalid %s \"%s\": %s", |
807 | 0 | (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string" |
808 | 0 | : "regular expression", |
809 | 0 | identifier, |
810 | 0 | re_error.message); |
811 | |
|
812 | 0 | yr_compiler_set_error_extra_info(compiler, message); |
813 | 0 | goto _exit; |
814 | 0 | } |
815 | 0 | } |
816 | | |
817 | 0 | if (re_ast->flags & RE_FLAGS_FAST_REGEXP) |
818 | 0 | modifier.flags |= STRING_FLAGS_FAST_REGEXP; |
819 | |
|
820 | 0 | if (re_ast->flags & RE_FLAGS_GREEDY) |
821 | 0 | modifier.flags |= STRING_FLAGS_GREEDY_REGEXP; |
822 | | |
823 | | // Regular expressions in the strings section can't mix greedy and |
824 | | // ungreedy quantifiers like .* and .*?. That's because these regular |
825 | | // expressions can be matched forwards and/or backwards depending on the |
826 | | // atom found, and we need the regexp to be all-greedy or all-ungreedy to |
827 | | // be able to properly calculate the length of the match. |
828 | |
|
829 | 0 | if ((re_ast->flags & RE_FLAGS_GREEDY) && |
830 | 0 | (re_ast->flags & RE_FLAGS_UNGREEDY)) |
831 | 0 | { |
832 | 0 | result = ERROR_INVALID_REGULAR_EXPRESSION; |
833 | |
|
834 | 0 | yr_compiler_set_error_extra_info( |
835 | 0 | compiler, |
836 | 0 | "greedy and ungreedy quantifiers can't be mixed in a regular " |
837 | 0 | "expression"); |
838 | |
|
839 | 0 | goto _exit; |
840 | 0 | } |
841 | | |
842 | 0 | if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast)) |
843 | 0 | { |
844 | 0 | yywarning( |
845 | 0 | yyscanner, |
846 | 0 | "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} " |
847 | 0 | "with a reasonable value for N", |
848 | 0 | identifier); |
849 | 0 | } |
850 | |
|
851 | 0 | if (compiler->re_ast_callback != NULL) |
852 | 0 | { |
853 | 0 | compiler->re_ast_callback( |
854 | 0 | current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data); |
855 | 0 | } |
856 | |
|
857 | 0 | *string_ref = YR_ARENA_NULL_REF; |
858 | |
|
859 | 0 | while (re_ast != NULL) |
860 | 0 | { |
861 | 0 | YR_ARENA_REF ref; |
862 | |
|
863 | 0 | uint32_t prev_string_idx = compiler->current_string_idx - 1; |
864 | |
|
865 | 0 | int32_t prev_min_gap = min_gap; |
866 | 0 | int32_t prev_max_gap = max_gap; |
867 | |
|
868 | 0 | result = yr_re_ast_split_at_chaining_point( |
869 | 0 | re_ast, &remainder_re_ast, &min_gap, &max_gap); |
870 | |
|
871 | 0 | if (result != ERROR_SUCCESS) |
872 | 0 | goto _exit; |
873 | | |
874 | 0 | result = _yr_parser_write_string( |
875 | 0 | identifier, |
876 | 0 | modifier, |
877 | 0 | compiler, |
878 | 0 | NULL, |
879 | 0 | re_ast, |
880 | 0 | &ref, |
881 | 0 | &atom_quality, |
882 | 0 | ¤t_rule->num_atoms); |
883 | |
|
884 | 0 | if (result != ERROR_SUCCESS) |
885 | 0 | goto _exit; |
886 | | |
887 | 0 | if (atom_quality < min_atom_quality) |
888 | 0 | min_atom_quality = atom_quality; |
889 | |
|
890 | 0 | if (YR_ARENA_IS_NULL_REF(*string_ref)) |
891 | 0 | { |
892 | | // This is the first string in the chain, the string reference |
893 | | // returned by this function must point to this string. |
894 | 0 | *string_ref = ref; |
895 | 0 | } |
896 | 0 | else |
897 | 0 | { |
898 | | // This is not the first string in the chain, set the appropriate |
899 | | // flags and fill the chained_to, chain_gap_min and chain_gap_max |
900 | | // fields. |
901 | 0 | YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr( |
902 | 0 | compiler->arena, |
903 | 0 | YR_STRINGS_TABLE, |
904 | 0 | prev_string_idx * sizeof(YR_STRING)); |
905 | |
|
906 | 0 | YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr( |
907 | 0 | compiler->arena, &ref); |
908 | |
|
909 | 0 | new_string->chained_to = prev_string; |
910 | 0 | new_string->chain_gap_min = prev_min_gap; |
911 | 0 | new_string->chain_gap_max = prev_max_gap; |
912 | | |
913 | | // A string chained to another one can't have a fixed offset, only the |
914 | | // head of the string chain can have a fixed offset. |
915 | 0 | new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
916 | | |
917 | | // There is a previous string, but that string wasn't marked as part |
918 | | // of a chain because we can't do that until knowing there will be |
919 | | // another string, let's flag it now the we know. |
920 | 0 | prev_string->flags |= STRING_FLAGS_CHAIN_PART; |
921 | | |
922 | | // There is a previous string, so this string is part of a chain, but |
923 | | // there will be no more strings because there are no more AST to |
924 | | // split, which means that this is the chain's tail. |
925 | 0 | if (remainder_re_ast == NULL) |
926 | 0 | new_string->flags |= STRING_FLAGS_CHAIN_PART | |
927 | 0 | STRING_FLAGS_CHAIN_TAIL; |
928 | 0 | } |
929 | |
|
930 | 0 | yr_re_ast_destroy(re_ast); |
931 | 0 | re_ast = remainder_re_ast; |
932 | 0 | } |
933 | 0 | } |
934 | 0 | else // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or |
935 | | // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE |
936 | 0 | { |
937 | 0 | result = _yr_parser_write_string( |
938 | 0 | identifier, |
939 | 0 | modifier, |
940 | 0 | compiler, |
941 | 0 | str, |
942 | 0 | NULL, |
943 | 0 | string_ref, |
944 | 0 | &min_atom_quality, |
945 | 0 | ¤t_rule->num_atoms); |
946 | |
|
947 | 0 | if (result != ERROR_SUCCESS) |
948 | 0 | goto _exit; |
949 | 0 | } |
950 | | |
951 | 0 | if (min_atom_quality < compiler->atoms_config.quality_warning_threshold) |
952 | 0 | { |
953 | 0 | yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier); |
954 | 0 | } |
955 | |
|
956 | 0 | _exit: |
957 | |
|
958 | 0 | if (re_ast != NULL) |
959 | 0 | yr_re_ast_destroy(re_ast); |
960 | |
|
961 | 0 | if (remainder_re_ast != NULL) |
962 | 0 | yr_re_ast_destroy(remainder_re_ast); |
963 | |
|
964 | 0 | return result; |
965 | 0 | } |
966 | | |
967 | | static int wildcard_iterator( |
968 | | void* prefix, |
969 | | size_t prefix_len, |
970 | | void* _value, |
971 | | void* data) |
972 | 0 | { |
973 | 0 | const char* identifier = (const char*) data; |
974 | | |
975 | | // If the identifier is prefixed by prefix, then it matches the wildcard. |
976 | 0 | if (!strncmp(prefix, identifier, prefix_len)) |
977 | 0 | return ERROR_IDENTIFIER_MATCHES_WILDCARD; |
978 | | |
979 | 0 | return ERROR_SUCCESS; |
980 | 0 | } |
981 | | |
982 | | int yr_parser_reduce_rule_declaration_phase_1( |
983 | | yyscan_t yyscanner, |
984 | | int32_t flags, |
985 | | const char* identifier, |
986 | | YR_ARENA_REF* rule_ref) |
987 | 0 | { |
988 | 0 | int result; |
989 | 0 | YR_FIXUP* fixup; |
990 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
991 | |
|
992 | 0 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
993 | 0 | compiler->arena, |
994 | 0 | YR_NAMESPACES_TABLE, |
995 | 0 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
996 | |
|
997 | 0 | if (yr_hash_table_lookup_uint32( |
998 | 0 | compiler->rules_table, identifier, ns->name) != UINT32_MAX || |
999 | 0 | yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL) |
1000 | 0 | { |
1001 | | // A rule or variable with the same identifier already exists, return the |
1002 | | // appropriate error. |
1003 | |
|
1004 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
1005 | 0 | return ERROR_DUPLICATED_IDENTIFIER; |
1006 | 0 | } |
1007 | | |
1008 | | // Iterate over all identifiers in wildcard_identifiers_table, and check if |
1009 | | // any of them are a prefix of the identifier being declared. If so, return |
1010 | | // ERROR_IDENTIFIER_MATCHES_WILDCARD. |
1011 | 0 | result = yr_hash_table_iterate( |
1012 | 0 | compiler->wildcard_identifiers_table, |
1013 | 0 | ns->name, |
1014 | 0 | wildcard_iterator, |
1015 | 0 | (void*) identifier); |
1016 | |
|
1017 | 0 | if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD) |
1018 | 0 | { |
1019 | | // This rule matches an existing wildcard rule set. |
1020 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
1021 | 0 | } |
1022 | |
|
1023 | 0 | FAIL_ON_ERROR(result); |
1024 | |
|
1025 | 0 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
1026 | 0 | compiler->arena, |
1027 | 0 | YR_RULES_TABLE, |
1028 | 0 | sizeof(YR_RULE), |
1029 | 0 | rule_ref, |
1030 | 0 | offsetof(YR_RULE, identifier), |
1031 | 0 | offsetof(YR_RULE, tags), |
1032 | 0 | offsetof(YR_RULE, strings), |
1033 | 0 | offsetof(YR_RULE, metas), |
1034 | 0 | offsetof(YR_RULE, ns), |
1035 | 0 | EOL)); |
1036 | |
|
1037 | 0 | YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref); |
1038 | |
|
1039 | 0 | YR_ARENA_REF ref; |
1040 | |
|
1041 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
1042 | |
|
1043 | 0 | rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1044 | 0 | rule->flags = flags; |
1045 | 0 | rule->ns = ns; |
1046 | 0 | rule->num_atoms = 0; |
1047 | |
|
1048 | 0 | YR_ARENA_REF jmp_offset_ref; |
1049 | | |
1050 | | // We are starting to parse a new rule, set current_rule_idx accordingly. |
1051 | 0 | compiler->current_rule_idx = compiler->next_rule_idx; |
1052 | 0 | compiler->next_rule_idx++; |
1053 | | |
1054 | | // The OP_INIT_RULE instruction behaves like a jump. When the rule is |
1055 | | // disabled it skips over the rule's code and go straight to the next rule's |
1056 | | // code. The jmp_offset_ref variable points to the jump's offset. The offset |
1057 | | // is set to 0 as we don't know the jump target yet. When we finish |
1058 | | // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2 |
1059 | | // the jump offset is set to its final value. |
1060 | |
|
1061 | 0 | FAIL_ON_ERROR(yr_parser_emit_with_arg_int32( |
1062 | 0 | yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref)); |
1063 | |
|
1064 | 0 | FAIL_ON_ERROR(yr_arena_write_data( |
1065 | 0 | compiler->arena, |
1066 | 0 | YR_CODE_SECTION, |
1067 | 0 | &compiler->current_rule_idx, |
1068 | 0 | sizeof(compiler->current_rule_idx), |
1069 | 0 | NULL)); |
1070 | | |
1071 | | // Create a fixup entry for the jump and push it in the stack |
1072 | 0 | fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP)); |
1073 | |
|
1074 | 0 | if (fixup == NULL) |
1075 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1076 | | |
1077 | 0 | fixup->ref = jmp_offset_ref; |
1078 | 0 | fixup->next = compiler->fixup_stack_head; |
1079 | 0 | compiler->fixup_stack_head = fixup; |
1080 | | |
1081 | | // Clean strings_table as we are starting to parse a new rule. |
1082 | 0 | yr_hash_table_clean(compiler->strings_table, NULL); |
1083 | |
|
1084 | 0 | FAIL_ON_ERROR(yr_hash_table_add_uint32( |
1085 | 0 | compiler->rules_table, identifier, ns->name, compiler->current_rule_idx)); |
1086 | |
|
1087 | 0 | return ERROR_SUCCESS; |
1088 | 0 | } |
1089 | | |
1090 | | int yr_parser_reduce_rule_declaration_phase_2( |
1091 | | yyscan_t yyscanner, |
1092 | | YR_ARENA_REF* rule_ref) |
1093 | 0 | { |
1094 | 0 | uint32_t max_strings_per_rule; |
1095 | 0 | uint32_t strings_in_rule = 0; |
1096 | |
|
1097 | 0 | YR_FIXUP* fixup; |
1098 | 0 | YR_STRING* string; |
1099 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1100 | |
|
1101 | 0 | yr_get_configuration_uint32( |
1102 | 0 | YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule); |
1103 | |
|
1104 | 0 | YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref); |
1105 | | |
1106 | | // Show warning if the rule is generating too many atoms. The warning is |
1107 | | // shown if the number of atoms is greater than 20 times the maximum number |
1108 | | // of strings allowed for a rule, as 20 is minimum number of atoms generated |
1109 | | // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously. |
1110 | |
|
1111 | 0 | if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD) |
1112 | 0 | { |
1113 | 0 | yywarning(yyscanner, "rule is slowing down scanning"); |
1114 | 0 | } |
1115 | |
|
1116 | 0 | yr_rule_strings_foreach(rule, string) |
1117 | 0 | { |
1118 | | // Only the heading fragment in a chain of strings (the one with |
1119 | | // chained_to == NULL) must be referenced. All other fragments |
1120 | | // are never marked as referenced. |
1121 | | // |
1122 | | // Any string identifier that starts with '_' can be unreferenced. Anonymous |
1123 | | // strings must always be referenced. |
1124 | |
|
1125 | 0 | if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL && |
1126 | 0 | (STRING_IS_ANONYMOUS(string) || |
1127 | 0 | (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_'))) |
1128 | 0 | { |
1129 | 0 | yr_compiler_set_error_extra_info( |
1130 | 0 | compiler, string->identifier) return ERROR_UNREFERENCED_STRING; |
1131 | 0 | } |
1132 | | |
1133 | | // If a string is unreferenced we need to unset the FIXED_OFFSET flag so |
1134 | | // that it will match anywhere. |
1135 | 0 | if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL && |
1136 | 0 | STRING_IS_FIXED_OFFSET(string)) |
1137 | 0 | { |
1138 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1139 | 0 | } |
1140 | |
|
1141 | 0 | strings_in_rule++; |
1142 | |
|
1143 | 0 | if (strings_in_rule > max_strings_per_rule) |
1144 | 0 | { |
1145 | 0 | yr_compiler_set_error_extra_info( |
1146 | 0 | compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS; |
1147 | 0 | } |
1148 | 0 | } |
1149 | | |
1150 | 0 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
1151 | 0 | yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL)); |
1152 | |
|
1153 | 0 | fixup = compiler->fixup_stack_head; |
1154 | |
|
1155 | 0 | int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr( |
1156 | 0 | compiler->arena, &fixup->ref); |
1157 | |
|
1158 | 0 | int32_t jmp_offset = yr_arena_get_current_offset( |
1159 | 0 | compiler->arena, YR_CODE_SECTION) - |
1160 | 0 | fixup->ref.offset + 1; |
1161 | |
|
1162 | 0 | memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset)); |
1163 | | |
1164 | | // Remove fixup from the stack. |
1165 | 0 | compiler->fixup_stack_head = fixup->next; |
1166 | 0 | yr_free(fixup); |
1167 | | |
1168 | | // We have finished parsing the current rule set current_rule_idx to |
1169 | | // UINT32_MAX indicating that we are not currently parsing a rule. |
1170 | 0 | compiler->current_rule_idx = UINT32_MAX; |
1171 | |
|
1172 | 0 | return ERROR_SUCCESS; |
1173 | 0 | } |
1174 | | |
1175 | | int yr_parser_reduce_string_identifier( |
1176 | | yyscan_t yyscanner, |
1177 | | const char* identifier, |
1178 | | uint8_t instruction, |
1179 | | uint64_t at_offset) |
1180 | 0 | { |
1181 | 0 | YR_STRING* string; |
1182 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1183 | |
|
1184 | 0 | if (strcmp(identifier, "$") == 0) // is an anonymous string ? |
1185 | 0 | { |
1186 | 0 | if (compiler->loop_for_of_var_index >= 0) // inside a loop ? |
1187 | 0 | { |
1188 | 0 | yr_parser_emit_with_arg( |
1189 | 0 | yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL); |
1190 | |
|
1191 | 0 | yr_parser_emit(yyscanner, instruction, NULL); |
1192 | |
|
1193 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
1194 | 0 | compiler, compiler->current_rule_idx); |
1195 | |
|
1196 | 0 | yr_rule_strings_foreach(current_rule, string) |
1197 | 0 | { |
1198 | 0 | if (instruction != OP_FOUND) |
1199 | 0 | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1200 | |
|
1201 | 0 | if (instruction == OP_FOUND_AT) |
1202 | 0 | { |
1203 | | // Avoid overwriting any previous fixed offset |
1204 | 0 | if (string->fixed_offset == YR_UNDEFINED) |
1205 | 0 | string->fixed_offset = at_offset; |
1206 | | |
1207 | | // If a previous fixed offset was different, disable |
1208 | | // the STRING_GFLAGS_FIXED_OFFSET flag because we only |
1209 | | // have room to store a single fixed offset value |
1210 | 0 | if (string->fixed_offset != at_offset) |
1211 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1212 | 0 | } |
1213 | 0 | else |
1214 | 0 | { |
1215 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1216 | 0 | } |
1217 | 0 | } |
1218 | 0 | } |
1219 | 0 | else |
1220 | 0 | { |
1221 | | // Anonymous strings not allowed outside of a loop |
1222 | 0 | return ERROR_MISPLACED_ANONYMOUS_STRING; |
1223 | 0 | } |
1224 | 0 | } |
1225 | 0 | else |
1226 | 0 | { |
1227 | 0 | FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string)); |
1228 | |
|
1229 | 0 | FAIL_ON_ERROR( |
1230 | 0 | yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL)); |
1231 | |
|
1232 | 0 | if (instruction != OP_FOUND) |
1233 | 0 | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1234 | |
|
1235 | 0 | if (instruction == OP_FOUND_AT) |
1236 | 0 | { |
1237 | | // Avoid overwriting any previous fixed offset |
1238 | |
|
1239 | 0 | if (string->fixed_offset == YR_UNDEFINED) |
1240 | 0 | string->fixed_offset = at_offset; |
1241 | | |
1242 | | // If a previous fixed offset was different, disable |
1243 | | // the STRING_GFLAGS_FIXED_OFFSET flag because we only |
1244 | | // have room to store a single fixed offset value |
1245 | |
|
1246 | 0 | if (string->fixed_offset == YR_UNDEFINED || |
1247 | 0 | string->fixed_offset != at_offset) |
1248 | 0 | { |
1249 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1250 | 0 | } |
1251 | 0 | } |
1252 | 0 | else |
1253 | 0 | { |
1254 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1255 | 0 | } |
1256 | |
|
1257 | 0 | FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL)); |
1258 | |
|
1259 | 0 | string->flags |= STRING_FLAGS_REFERENCED; |
1260 | 0 | } |
1261 | | |
1262 | 0 | return ERROR_SUCCESS; |
1263 | 0 | } |
1264 | | |
1265 | | int yr_parser_reduce_meta_declaration( |
1266 | | yyscan_t yyscanner, |
1267 | | int32_t type, |
1268 | | const char* identifier, |
1269 | | const char* string, |
1270 | | int64_t integer, |
1271 | | YR_ARENA_REF* meta_ref) |
1272 | 0 | { |
1273 | 0 | YR_ARENA_REF ref; |
1274 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1275 | |
|
1276 | 0 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
1277 | 0 | compiler->arena, |
1278 | 0 | YR_METAS_TABLE, |
1279 | 0 | sizeof(YR_META), |
1280 | 0 | meta_ref, |
1281 | 0 | offsetof(YR_META, identifier), |
1282 | 0 | offsetof(YR_META, string), |
1283 | 0 | EOL)); |
1284 | |
|
1285 | 0 | YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref); |
1286 | |
|
1287 | 0 | meta->type = type; |
1288 | 0 | meta->integer = integer; |
1289 | |
|
1290 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
1291 | |
|
1292 | 0 | meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1293 | |
|
1294 | 0 | if (string != NULL) |
1295 | 0 | { |
1296 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref)); |
1297 | |
|
1298 | 0 | meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1299 | 0 | } |
1300 | 0 | else |
1301 | 0 | { |
1302 | 0 | meta->string = NULL; |
1303 | 0 | } |
1304 | | |
1305 | 0 | compiler->current_meta_idx++; |
1306 | |
|
1307 | 0 | return ERROR_SUCCESS; |
1308 | 0 | } |
1309 | | |
1310 | | static int _yr_parser_valid_module_name(SIZED_STRING* module_name) |
1311 | 1 | { |
1312 | 1 | if (module_name->length == 0) |
1313 | 0 | return false; |
1314 | | |
1315 | 1 | if (strlen(module_name->c_string) != module_name->length) |
1316 | 0 | return false; |
1317 | | |
1318 | 1 | return true; |
1319 | 1 | } |
1320 | | |
1321 | | int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name) |
1322 | 1 | { |
1323 | 1 | int result; |
1324 | | |
1325 | 1 | YR_ARENA_REF ref; |
1326 | 1 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1327 | 1 | YR_OBJECT* module_structure; |
1328 | | |
1329 | 1 | if (!_yr_parser_valid_module_name(module_name)) |
1330 | 0 | { |
1331 | 0 | yr_compiler_set_error_extra_info(compiler, module_name->c_string); |
1332 | |
|
1333 | 0 | return ERROR_INVALID_MODULE_NAME; |
1334 | 0 | } |
1335 | | |
1336 | 1 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
1337 | 1 | compiler->arena, |
1338 | 1 | YR_NAMESPACES_TABLE, |
1339 | 1 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
1340 | | |
1341 | 1 | module_structure = (YR_OBJECT*) yr_hash_table_lookup( |
1342 | 1 | compiler->objects_table, module_name->c_string, ns->name); |
1343 | | |
1344 | | // if module already imported, do nothing |
1345 | | |
1346 | 1 | if (module_structure != NULL) |
1347 | 0 | return ERROR_SUCCESS; |
1348 | | |
1349 | 1 | FAIL_ON_ERROR(yr_object_create( |
1350 | 1 | OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure)); |
1351 | | |
1352 | 1 | FAIL_ON_ERROR(yr_hash_table_add( |
1353 | 1 | compiler->objects_table, |
1354 | 1 | module_name->c_string, |
1355 | 1 | ns->name, |
1356 | 1 | module_structure)); |
1357 | | |
1358 | 1 | result = yr_modules_do_declarations(module_name->c_string, module_structure); |
1359 | | |
1360 | 1 | if (result == ERROR_UNKNOWN_MODULE) |
1361 | 0 | yr_compiler_set_error_extra_info(compiler, module_name->c_string); |
1362 | | |
1363 | 1 | if (result != ERROR_SUCCESS) |
1364 | 0 | return result; |
1365 | | |
1366 | 1 | FAIL_ON_ERROR( |
1367 | 1 | _yr_compiler_store_string(compiler, module_name->c_string, &ref)); |
1368 | | |
1369 | 1 | FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc( |
1370 | 1 | yyscanner, |
1371 | 1 | OP_IMPORT, |
1372 | 1 | yr_arena_ref_to_ptr(compiler->arena, &ref), |
1373 | 1 | NULL, |
1374 | 1 | NULL)); |
1375 | | |
1376 | 1 | return ERROR_SUCCESS; |
1377 | 1 | } |
1378 | | |
1379 | | static int _yr_parser_operator_to_opcode(const char* op, int expression_type) |
1380 | 0 | { |
1381 | 0 | int opcode = 0; |
1382 | |
|
1383 | 0 | switch (expression_type) |
1384 | 0 | { |
1385 | 0 | case EXPRESSION_TYPE_INTEGER: |
1386 | 0 | opcode = OP_INT_BEGIN; |
1387 | 0 | break; |
1388 | 0 | case EXPRESSION_TYPE_FLOAT: |
1389 | 0 | opcode = OP_DBL_BEGIN; |
1390 | 0 | break; |
1391 | 0 | case EXPRESSION_TYPE_STRING: |
1392 | 0 | opcode = OP_STR_BEGIN; |
1393 | 0 | break; |
1394 | 0 | default: |
1395 | 0 | assert(false); |
1396 | 0 | } |
1397 | | |
1398 | 0 | if (op[0] == '<') |
1399 | 0 | { |
1400 | 0 | if (op[1] == '=') |
1401 | 0 | opcode += _OP_LE; |
1402 | 0 | else |
1403 | 0 | opcode += _OP_LT; |
1404 | 0 | } |
1405 | 0 | else if (op[0] == '>') |
1406 | 0 | { |
1407 | 0 | if (op[1] == '=') |
1408 | 0 | opcode += _OP_GE; |
1409 | 0 | else |
1410 | 0 | opcode += _OP_GT; |
1411 | 0 | } |
1412 | 0 | else if (op[1] == '=') |
1413 | 0 | { |
1414 | 0 | if (op[0] == '=') |
1415 | 0 | opcode += _OP_EQ; |
1416 | 0 | else |
1417 | 0 | opcode += _OP_NEQ; |
1418 | 0 | } |
1419 | 0 | else if (op[0] == '+') |
1420 | 0 | { |
1421 | 0 | opcode += _OP_ADD; |
1422 | 0 | } |
1423 | 0 | else if (op[0] == '-') |
1424 | 0 | { |
1425 | 0 | opcode += _OP_SUB; |
1426 | 0 | } |
1427 | 0 | else if (op[0] == '*') |
1428 | 0 | { |
1429 | 0 | opcode += _OP_MUL; |
1430 | 0 | } |
1431 | 0 | else if (op[0] == '\\') |
1432 | 0 | { |
1433 | 0 | opcode += _OP_DIV; |
1434 | 0 | } |
1435 | |
|
1436 | 0 | if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode)) |
1437 | 0 | { |
1438 | 0 | return opcode; |
1439 | 0 | } |
1440 | | |
1441 | 0 | return OP_ERROR; |
1442 | 0 | } |
1443 | | |
1444 | | int yr_parser_reduce_operation( |
1445 | | yyscan_t yyscanner, |
1446 | | const char* op, |
1447 | | YR_EXPRESSION left_operand, |
1448 | | YR_EXPRESSION right_operand) |
1449 | 0 | { |
1450 | 0 | int expression_type; |
1451 | |
|
1452 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1453 | |
|
1454 | 0 | if ((left_operand.type == EXPRESSION_TYPE_INTEGER || |
1455 | 0 | left_operand.type == EXPRESSION_TYPE_FLOAT) && |
1456 | 0 | (right_operand.type == EXPRESSION_TYPE_INTEGER || |
1457 | 0 | right_operand.type == EXPRESSION_TYPE_FLOAT)) |
1458 | 0 | { |
1459 | 0 | if (left_operand.type != right_operand.type) |
1460 | 0 | { |
1461 | | // One operand is double and the other is integer, |
1462 | | // cast the integer to double |
1463 | |
|
1464 | 0 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
1465 | 0 | yyscanner, |
1466 | 0 | OP_INT_TO_DBL, |
1467 | 0 | (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1, |
1468 | 0 | NULL, |
1469 | 0 | NULL)); |
1470 | 0 | } |
1471 | | |
1472 | 0 | expression_type = EXPRESSION_TYPE_FLOAT; |
1473 | |
|
1474 | 0 | if (left_operand.type == EXPRESSION_TYPE_INTEGER && |
1475 | 0 | right_operand.type == EXPRESSION_TYPE_INTEGER) |
1476 | 0 | { |
1477 | 0 | expression_type = EXPRESSION_TYPE_INTEGER; |
1478 | 0 | } |
1479 | |
|
1480 | 0 | FAIL_ON_ERROR(yr_parser_emit( |
1481 | 0 | yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL)); |
1482 | 0 | } |
1483 | 0 | else if ( |
1484 | 0 | left_operand.type == EXPRESSION_TYPE_STRING && |
1485 | 0 | right_operand.type == EXPRESSION_TYPE_STRING) |
1486 | 0 | { |
1487 | 0 | int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING); |
1488 | |
|
1489 | 0 | if (opcode != OP_ERROR) |
1490 | 0 | { |
1491 | 0 | FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL)); |
1492 | 0 | } |
1493 | 0 | else |
1494 | 0 | { |
1495 | 0 | yr_compiler_set_error_extra_info_fmt( |
1496 | 0 | compiler, "strings don't support \"%s\" operation", op); |
1497 | |
|
1498 | 0 | return ERROR_WRONG_TYPE; |
1499 | 0 | } |
1500 | 0 | } |
1501 | 0 | else |
1502 | 0 | { |
1503 | 0 | yr_compiler_set_error_extra_info(compiler, "type mismatch"); |
1504 | |
|
1505 | 0 | return ERROR_WRONG_TYPE; |
1506 | 0 | } |
1507 | | |
1508 | 0 | return ERROR_SUCCESS; |
1509 | 0 | } |
1510 | | |
1511 | | int yr_parser_mark_nonfast( |
1512 | | yyscan_t yyscanner, |
1513 | | YR_STRING_SET string_set |
1514 | 0 | ) { |
1515 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1516 | |
|
1517 | 0 | YR_STRING_SET_ELEMENT* head = string_set.head; |
1518 | 0 | while (head != NULL) { |
1519 | 0 | YR_STRING* string_ptr = yr_arena_ref_to_ptr(compiler->arena, &head->element); |
1520 | 0 | string_ptr->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1521 | 0 | head = head->next; |
1522 | 0 | } |
1523 | 0 | return ERROR_SUCCESS; |
1524 | 0 | } |