/src/yara/libyara/parser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (c) 2013. The YARA Authors. All Rights Reserved. |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without modification, |
5 | | are permitted provided that the following conditions are met: |
6 | | |
7 | | 1. Redistributions of source code must retain the above copyright notice, this |
8 | | list of conditions and the following disclaimer. |
9 | | |
10 | | 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | this list of conditions and the following disclaimer in the documentation and/or |
12 | | other materials provided with the distribution. |
13 | | |
14 | | 3. Neither the name of the copyright holder nor the names of its contributors |
15 | | may be used to endorse or promote products derived from this software without |
16 | | specific prior written permission. |
17 | | |
18 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
19 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR |
22 | | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
23 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
24 | | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
25 | | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
27 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #include <limits.h> |
31 | | #include <stddef.h> |
32 | | #include <string.h> |
33 | | #include <yara/ahocorasick.h> |
34 | | #include <yara/arena.h> |
35 | | #include <yara/base64.h> |
36 | | #include <yara/error.h> |
37 | | #include <yara/exec.h> |
38 | | #include <yara/integers.h> |
39 | | #include <yara/mem.h> |
40 | | #include <yara/modules.h> |
41 | | #include <yara/object.h> |
42 | | #include <yara/parser.h> |
43 | | #include <yara/re.h> |
44 | | #include <yara/strutils.h> |
45 | | #include <yara/utils.h> |
46 | | |
47 | | #define todigit(x) \ |
48 | | ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \ |
49 | | : ((uint8_t) (x - '0')) |
50 | | |
51 | | int yr_parser_emit( |
52 | | yyscan_t yyscanner, |
53 | | uint8_t instruction, |
54 | | YR_ARENA_REF* instruction_ref) |
55 | 4 | { |
56 | 4 | return yr_arena_write_data( |
57 | 4 | yyget_extra(yyscanner)->arena, |
58 | 4 | YR_CODE_SECTION, |
59 | 4 | &instruction, |
60 | 4 | sizeof(uint8_t), |
61 | 4 | instruction_ref); |
62 | 4 | } |
63 | | |
64 | | int yr_parser_emit_with_arg_double( |
65 | | yyscan_t yyscanner, |
66 | | uint8_t instruction, |
67 | | double argument, |
68 | | YR_ARENA_REF* instruction_ref, |
69 | | YR_ARENA_REF* argument_ref) |
70 | 0 | { |
71 | 0 | int result = yr_arena_write_data( |
72 | 0 | yyget_extra(yyscanner)->arena, |
73 | 0 | YR_CODE_SECTION, |
74 | 0 | &instruction, |
75 | 0 | sizeof(uint8_t), |
76 | 0 | instruction_ref); |
77 | |
|
78 | 0 | if (result == ERROR_SUCCESS) |
79 | 0 | result = yr_arena_write_data( |
80 | 0 | yyget_extra(yyscanner)->arena, |
81 | 0 | YR_CODE_SECTION, |
82 | 0 | &argument, |
83 | 0 | sizeof(double), |
84 | 0 | argument_ref); |
85 | |
|
86 | 0 | return result; |
87 | 0 | } |
88 | | |
89 | | int yr_parser_emit_with_arg_int32( |
90 | | yyscan_t yyscanner, |
91 | | uint8_t instruction, |
92 | | int32_t argument, |
93 | | YR_ARENA_REF* instruction_ref, |
94 | | YR_ARENA_REF* argument_ref) |
95 | 2 | { |
96 | 2 | int result = yr_arena_write_data( |
97 | 2 | yyget_extra(yyscanner)->arena, |
98 | 2 | YR_CODE_SECTION, |
99 | 2 | &instruction, |
100 | 2 | sizeof(uint8_t), |
101 | 2 | instruction_ref); |
102 | | |
103 | 2 | if (result == ERROR_SUCCESS) |
104 | 2 | result = yr_arena_write_data( |
105 | 2 | yyget_extra(yyscanner)->arena, |
106 | 2 | YR_CODE_SECTION, |
107 | 2 | &argument, |
108 | 2 | sizeof(int32_t), |
109 | 2 | argument_ref); |
110 | | |
111 | 2 | return result; |
112 | 2 | } |
113 | | |
114 | | int yr_parser_emit_with_arg( |
115 | | yyscan_t yyscanner, |
116 | | uint8_t instruction, |
117 | | int64_t argument, |
118 | | YR_ARENA_REF* instruction_ref, |
119 | | YR_ARENA_REF* argument_ref) |
120 | 2 | { |
121 | 2 | int result = yr_arena_write_data( |
122 | 2 | yyget_extra(yyscanner)->arena, |
123 | 2 | YR_CODE_SECTION, |
124 | 2 | &instruction, |
125 | 2 | sizeof(uint8_t), |
126 | 2 | instruction_ref); |
127 | | |
128 | 2 | if (result == ERROR_SUCCESS) |
129 | 2 | result = yr_arena_write_data( |
130 | 2 | yyget_extra(yyscanner)->arena, |
131 | 2 | YR_CODE_SECTION, |
132 | 2 | &argument, |
133 | 2 | sizeof(int64_t), |
134 | 2 | argument_ref); |
135 | | |
136 | 2 | return result; |
137 | 2 | } |
138 | | |
139 | | int yr_parser_emit_with_arg_reloc( |
140 | | yyscan_t yyscanner, |
141 | | uint8_t instruction, |
142 | | void* argument, |
143 | | YR_ARENA_REF* instruction_ref, |
144 | | YR_ARENA_REF* argument_ref) |
145 | 8 | { |
146 | 8 | YR_ARENA_REF ref = YR_ARENA_NULL_REF; |
147 | | |
148 | 8 | DECLARE_REFERENCE(void*, ptr) arg; |
149 | | |
150 | 8 | memset(&arg, 0, sizeof(arg)); |
151 | 8 | arg.ptr = argument; |
152 | | |
153 | 8 | int result = yr_arena_write_data( |
154 | 8 | yyget_extra(yyscanner)->arena, |
155 | 8 | YR_CODE_SECTION, |
156 | 8 | &instruction, |
157 | 8 | sizeof(uint8_t), |
158 | 8 | instruction_ref); |
159 | | |
160 | 8 | if (result == ERROR_SUCCESS) |
161 | 8 | result = yr_arena_write_data( |
162 | 8 | yyget_extra(yyscanner)->arena, |
163 | 8 | YR_CODE_SECTION, |
164 | 8 | &arg, |
165 | 8 | sizeof(arg), |
166 | 8 | &ref); |
167 | | |
168 | 8 | if (result == ERROR_SUCCESS) |
169 | 8 | result = yr_arena_make_ptr_relocatable( |
170 | 8 | yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL); |
171 | | |
172 | 8 | if (argument_ref != NULL) |
173 | 0 | *argument_ref = ref; |
174 | | |
175 | 8 | return result; |
176 | 8 | } |
177 | | |
178 | | int yr_parser_emit_pushes_for_strings( |
179 | | yyscan_t yyscanner, |
180 | | const char* identifier, |
181 | | int* count) |
182 | 0 | { |
183 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
184 | |
|
185 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
186 | 0 | compiler, compiler->current_rule_idx); |
187 | |
|
188 | 0 | YR_STRING* string; |
189 | |
|
190 | 0 | const char* string_identifier; |
191 | 0 | const char* target_identifier; |
192 | |
|
193 | 0 | int matching = 0; |
194 | |
|
195 | 0 | yr_rule_strings_foreach(current_rule, string) |
196 | 0 | { |
197 | | // Don't generate pushes for strings chained to another one, we are |
198 | | // only interested in non-chained strings or the head of the chain. |
199 | |
|
200 | 0 | if (string->chained_to == NULL) |
201 | 0 | { |
202 | 0 | string_identifier = string->identifier; |
203 | 0 | target_identifier = identifier; |
204 | |
|
205 | 0 | while (*target_identifier != '\0' && *string_identifier != '\0' && |
206 | 0 | *target_identifier == *string_identifier) |
207 | 0 | { |
208 | 0 | target_identifier++; |
209 | 0 | string_identifier++; |
210 | 0 | } |
211 | |
|
212 | 0 | if ((*target_identifier == '\0' && *string_identifier == '\0') || |
213 | 0 | *target_identifier == '*') |
214 | 0 | { |
215 | 0 | yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL); |
216 | |
|
217 | 0 | string->flags |= STRING_FLAGS_REFERENCED; |
218 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
219 | 0 | matching++; |
220 | 0 | } |
221 | 0 | } |
222 | 0 | } |
223 | |
|
224 | 0 | if (count != NULL) |
225 | 0 | { |
226 | 0 | *count = matching; |
227 | 0 | } |
228 | |
|
229 | 0 | if (matching == 0) |
230 | 0 | { |
231 | 0 | yr_compiler_set_error_extra_info( |
232 | 0 | compiler, identifier) return ERROR_UNDEFINED_STRING; |
233 | 0 | } |
234 | | |
235 | 0 | return ERROR_SUCCESS; |
236 | 0 | } |
237 | | |
238 | | // Emit OP_PUSH_RULE instructions for all rules whose identifier has given |
239 | | // prefix. |
240 | | int yr_parser_emit_pushes_for_rules( |
241 | | yyscan_t yyscanner, |
242 | | const char* prefix, |
243 | | int* count) |
244 | 0 | { |
245 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
246 | | |
247 | | // Make sure the compiler is parsing a rule |
248 | 0 | assert(compiler->current_rule_idx != UINT32_MAX); |
249 | | |
250 | 0 | YR_RULE* rule; |
251 | 0 | int matching = 0; |
252 | |
|
253 | 0 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
254 | 0 | compiler->arena, |
255 | 0 | YR_NAMESPACES_TABLE, |
256 | 0 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
257 | | |
258 | | // Can't use yr_rules_foreach here as that requires the rules to have been |
259 | | // finalized (inserting a NULL rule at the end). This is done when |
260 | | // yr_compiler_get_rules() is called, which also inserts a HALT instruction |
261 | | // into the current position in the code arena. Obviously we aren't done |
262 | | // compiling the rules yet so inserting a HALT is a bad idea. To deal with |
263 | | // this I'm manually walking all the currently compiled rules (up to the |
264 | | // current rule index) and comparing identifiers to see if it is one we should |
265 | | // use. |
266 | | // |
267 | | // Further, we have to get compiler->current_rule_idx before we start because |
268 | | // if we emit an OP_PUSH_RULE |
269 | 0 | rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0); |
270 | |
|
271 | 0 | for (uint32_t i = 0; i <= compiler->current_rule_idx; i++) |
272 | 0 | { |
273 | | // Is rule->identifier prefixed by prefix? |
274 | 0 | if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0) |
275 | 0 | { |
276 | 0 | uint32_t rule_idx = yr_hash_table_lookup_uint32( |
277 | 0 | compiler->rules_table, rule->identifier, ns->name); |
278 | |
|
279 | 0 | if (rule_idx != UINT32_MAX) |
280 | 0 | { |
281 | 0 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
282 | 0 | yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL)); |
283 | 0 | matching++; |
284 | 0 | } |
285 | 0 | } |
286 | | |
287 | 0 | rule++; |
288 | 0 | } |
289 | | |
290 | 0 | if (count != NULL) |
291 | 0 | { |
292 | 0 | *count = matching; |
293 | 0 | } |
294 | |
|
295 | 0 | if (matching == 0) |
296 | 0 | { |
297 | 0 | yr_compiler_set_error_extra_info(compiler, prefix); |
298 | 0 | return ERROR_UNDEFINED_IDENTIFIER; |
299 | 0 | } |
300 | | |
301 | 0 | return ERROR_SUCCESS; |
302 | 0 | } |
303 | | |
304 | | int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument) |
305 | 0 | { |
306 | 0 | uint8_t opcode[9]; |
307 | 0 | int opcode_len = 1; |
308 | |
|
309 | 0 | if (argument == YR_UNDEFINED) |
310 | 0 | { |
311 | 0 | opcode[0] = OP_PUSH_U; |
312 | 0 | } |
313 | 0 | else if (argument <= 0xff) |
314 | 0 | { |
315 | 0 | opcode[0] = OP_PUSH_8; |
316 | 0 | opcode[1] = (uint8_t) argument; |
317 | 0 | opcode_len += sizeof(uint8_t); |
318 | 0 | } |
319 | 0 | else if (argument <= 0xffff) |
320 | 0 | { |
321 | 0 | opcode[0] = OP_PUSH_16; |
322 | 0 | uint16_t u = (uint16_t) argument; |
323 | 0 | memcpy(opcode + 1, &u, sizeof(uint16_t)); |
324 | 0 | opcode_len += sizeof(uint16_t); |
325 | 0 | } |
326 | 0 | else if (argument <= 0xffffffff) |
327 | 0 | { |
328 | 0 | opcode[0] = OP_PUSH_32; |
329 | 0 | uint32_t u = (uint32_t) argument; |
330 | 0 | memcpy(opcode + 1, &u, sizeof(uint32_t)); |
331 | 0 | opcode_len += sizeof(uint32_t); |
332 | 0 | } |
333 | 0 | else |
334 | 0 | { |
335 | 0 | opcode[0] = OP_PUSH; |
336 | 0 | memcpy(opcode + 1, &argument, sizeof(uint64_t)); |
337 | 0 | opcode_len += sizeof(uint64_t); |
338 | 0 | } |
339 | |
|
340 | 0 | return yr_arena_write_data( |
341 | 0 | yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL); |
342 | 0 | } |
343 | | |
344 | | int yr_parser_check_types( |
345 | | YR_COMPILER* compiler, |
346 | | YR_OBJECT_FUNCTION* function, |
347 | | const char* actual_args_fmt) |
348 | 0 | { |
349 | 0 | int i; |
350 | |
|
351 | 0 | for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) |
352 | 0 | { |
353 | 0 | if (function->prototypes[i].arguments_fmt == NULL) |
354 | 0 | break; |
355 | | |
356 | 0 | if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0) |
357 | 0 | return ERROR_SUCCESS; |
358 | 0 | } |
359 | | |
360 | 0 | yr_compiler_set_error_extra_info(compiler, function->identifier) |
361 | |
|
362 | 0 | return ERROR_WRONG_ARGUMENTS; |
363 | 0 | } |
364 | | |
365 | | int yr_parser_lookup_string( |
366 | | yyscan_t yyscanner, |
367 | | const char* identifier, |
368 | | YR_STRING** string) |
369 | 0 | { |
370 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
371 | |
|
372 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
373 | 0 | compiler, compiler->current_rule_idx); |
374 | |
|
375 | 0 | yr_rule_strings_foreach(current_rule, *string) |
376 | 0 | { |
377 | | // If some string $a gets fragmented into multiple chained |
378 | | // strings, all those fragments have the same $a identifier |
379 | | // but we are interested in the heading fragment, which is |
380 | | // that with chained_to == NULL |
381 | |
|
382 | 0 | if ((*string)->chained_to == NULL && |
383 | 0 | strcmp((*string)->identifier, identifier) == 0) |
384 | 0 | { |
385 | 0 | return ERROR_SUCCESS; |
386 | 0 | } |
387 | 0 | } |
388 | | |
389 | 0 | yr_compiler_set_error_extra_info(compiler, identifier) |
390 | |
|
391 | 0 | * string = NULL; |
392 | |
|
393 | 0 | return ERROR_UNDEFINED_STRING; |
394 | 0 | } |
395 | | |
396 | | //////////////////////////////////////////////////////////////////////////////// |
397 | | // Searches for a variable with the given identifier in the scope of the current |
398 | | // "for" loop. In case of nested "for" loops the identifier is searched starting |
399 | | // at the top-level loop and going down thorough the nested loops until the |
400 | | // current one. This is ok because inner loops can not re-define an identifier |
401 | | // already defined by an outer loop. |
402 | | // |
403 | | // If the variable is found, the return value is the position that the variable |
404 | | // occupies among all the currently defined variables. If the variable doesn't |
405 | | // exist the return value is -1. |
406 | | // |
407 | | // The function can receive a pointer to a YR_EXPRESSION that will populated |
408 | | // with information about the variable if found. This pointer can be NULL if |
409 | | // the caller is not interested in getting that information. |
410 | | // |
411 | | int yr_parser_lookup_loop_variable( |
412 | | yyscan_t yyscanner, |
413 | | const char* identifier, |
414 | | YR_EXPRESSION* expr) |
415 | 2 | { |
416 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
417 | 2 | int i, j; |
418 | 2 | int var_offset = 0; |
419 | | |
420 | 2 | for (i = 0; i <= compiler->loop_index; i++) |
421 | 0 | { |
422 | 0 | var_offset += compiler->loop[i].vars_internal_count; |
423 | |
|
424 | 0 | for (j = 0; j < compiler->loop[i].vars_count; j++) |
425 | 0 | { |
426 | 0 | if (compiler->loop[i].vars[j].identifier.ptr != NULL && |
427 | 0 | strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0) |
428 | 0 | { |
429 | 0 | if (expr != NULL) |
430 | 0 | *expr = compiler->loop[i].vars[j]; |
431 | |
|
432 | 0 | return var_offset + j; |
433 | 0 | } |
434 | 0 | } |
435 | | |
436 | 0 | var_offset += compiler->loop[i].vars_count; |
437 | 0 | } |
438 | | |
439 | 2 | return -1; |
440 | 2 | } |
441 | | |
442 | | static int _yr_parser_write_string( |
443 | | const char* identifier, |
444 | | YR_MODIFIER modifier, |
445 | | YR_COMPILER* compiler, |
446 | | SIZED_STRING* str, |
447 | | RE_AST* re_ast, |
448 | | YR_ARENA_REF* string_ref, |
449 | | int* min_atom_quality, |
450 | | int* num_atom) |
451 | 0 | { |
452 | 0 | SIZED_STRING* literal_string; |
453 | 0 | YR_ATOM_LIST_ITEM* atom; |
454 | 0 | YR_ATOM_LIST_ITEM* atom_list = NULL; |
455 | |
|
456 | 0 | int c, result; |
457 | 0 | int max_string_len; |
458 | 0 | bool free_literal = false; |
459 | |
|
460 | 0 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
461 | 0 | compiler->arena, |
462 | 0 | YR_STRINGS_TABLE, |
463 | 0 | sizeof(YR_STRING), |
464 | 0 | string_ref, |
465 | 0 | offsetof(YR_STRING, identifier), |
466 | 0 | offsetof(YR_STRING, string), |
467 | 0 | offsetof(YR_STRING, chained_to), |
468 | 0 | EOL)); |
469 | |
|
470 | 0 | YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr( |
471 | 0 | compiler->arena, string_ref); |
472 | |
|
473 | 0 | YR_ARENA_REF ref; |
474 | |
|
475 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
476 | |
|
477 | 0 | string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
478 | |
|
479 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL || |
480 | 0 | modifier.flags & STRING_FLAGS_REGEXP || |
481 | 0 | modifier.flags & STRING_FLAGS_BASE64 || |
482 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE) |
483 | 0 | { |
484 | 0 | literal_string = yr_re_ast_extract_literal(re_ast); |
485 | |
|
486 | 0 | if (literal_string != NULL) |
487 | 0 | free_literal = true; |
488 | 0 | } |
489 | 0 | else |
490 | 0 | { |
491 | 0 | literal_string = str; |
492 | 0 | } |
493 | |
|
494 | 0 | if (literal_string != NULL) |
495 | 0 | { |
496 | 0 | modifier.flags |= STRING_FLAGS_LITERAL; |
497 | |
|
498 | 0 | result = _yr_compiler_store_data( |
499 | 0 | compiler, |
500 | 0 | literal_string->c_string, |
501 | 0 | literal_string->length + 1, // +1 to include terminating NULL |
502 | 0 | &ref); |
503 | |
|
504 | 0 | string->length = (uint32_t) literal_string->length; |
505 | 0 | string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
506 | |
|
507 | 0 | if (result == ERROR_SUCCESS) |
508 | 0 | { |
509 | 0 | result = yr_atoms_extract_from_string( |
510 | 0 | &compiler->atoms_config, |
511 | 0 | (uint8_t*) literal_string->c_string, |
512 | 0 | (int32_t) literal_string->length, |
513 | 0 | modifier, |
514 | 0 | &atom_list, |
515 | 0 | min_atom_quality); |
516 | 0 | } |
517 | 0 | } |
518 | 0 | else |
519 | 0 | { |
520 | | // Non-literal strings can't be marked as fixed offset because once we |
521 | | // find a string atom in the scanned data we don't know the offset where |
522 | | // the string should start, as the non-literal strings can contain |
523 | | // variable-length portions. |
524 | 0 | modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET; |
525 | | |
526 | | // Emit forwards code |
527 | 0 | result = yr_re_ast_emit_code(re_ast, compiler->arena, false); |
528 | | |
529 | | // Emit backwards code |
530 | 0 | if (result == ERROR_SUCCESS) |
531 | 0 | result = yr_re_ast_emit_code(re_ast, compiler->arena, true); |
532 | |
|
533 | 0 | if (result == ERROR_SUCCESS) |
534 | 0 | result = yr_atoms_extract_from_re( |
535 | 0 | &compiler->atoms_config, |
536 | 0 | re_ast, |
537 | 0 | modifier, |
538 | 0 | &atom_list, |
539 | 0 | min_atom_quality); |
540 | 0 | } |
541 | |
|
542 | 0 | string->flags = modifier.flags; |
543 | 0 | string->rule_idx = compiler->current_rule_idx; |
544 | 0 | string->idx = compiler->current_string_idx; |
545 | 0 | string->fixed_offset = YR_UNDEFINED; |
546 | |
|
547 | 0 | if (result == ERROR_SUCCESS) |
548 | 0 | { |
549 | | // Add the string to Aho-Corasick automaton. |
550 | 0 | result = yr_ac_add_string( |
551 | 0 | compiler->automaton, |
552 | 0 | string, |
553 | 0 | compiler->current_string_idx, |
554 | 0 | atom_list, |
555 | 0 | compiler->arena); |
556 | 0 | } |
557 | |
|
558 | 0 | if (modifier.flags & STRING_FLAGS_LITERAL) |
559 | 0 | { |
560 | 0 | if (modifier.flags & STRING_FLAGS_WIDE) |
561 | 0 | max_string_len = string->length * 2; |
562 | 0 | else |
563 | 0 | max_string_len = string->length; |
564 | |
|
565 | 0 | if (max_string_len <= YR_MAX_ATOM_LENGTH) |
566 | 0 | string->flags |= STRING_FLAGS_FITS_IN_ATOM; |
567 | 0 | } |
568 | |
|
569 | 0 | atom = atom_list; |
570 | 0 | c = 0; |
571 | |
|
572 | 0 | while (atom != NULL) |
573 | 0 | { |
574 | 0 | atom = atom->next; |
575 | 0 | c++; |
576 | 0 | } |
577 | |
|
578 | 0 | (*num_atom) += c; |
579 | |
|
580 | 0 | compiler->current_string_idx++; |
581 | |
|
582 | 0 | if (free_literal) |
583 | 0 | yr_free(literal_string); |
584 | |
|
585 | 0 | if (atom_list != NULL) |
586 | 0 | yr_atoms_list_destroy(atom_list); |
587 | |
|
588 | 0 | return result; |
589 | 0 | } |
590 | | |
591 | | static int _yr_parser_check_string_modifiers( |
592 | | yyscan_t yyscanner, |
593 | | YR_MODIFIER modifier) |
594 | 0 | { |
595 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
596 | | |
597 | | // xor and nocase together is not implemented. |
598 | 0 | if (modifier.flags & STRING_FLAGS_XOR && |
599 | 0 | modifier.flags & STRING_FLAGS_NO_CASE) |
600 | 0 | { |
601 | 0 | yr_compiler_set_error_extra_info( |
602 | 0 | compiler, "invalid modifier combination: xor nocase"); |
603 | 0 | return ERROR_INVALID_MODIFIER; |
604 | 0 | } |
605 | | |
606 | | // base64 and nocase together is not implemented. |
607 | 0 | if (modifier.flags & STRING_FLAGS_NO_CASE && |
608 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
609 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
610 | 0 | { |
611 | 0 | yr_compiler_set_error_extra_info( |
612 | 0 | compiler, |
613 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
614 | 0 | ? "invalid modifier combination: base64 nocase" |
615 | 0 | : "invalid modifier combination: base64wide nocase"); |
616 | 0 | return ERROR_INVALID_MODIFIER; |
617 | 0 | } |
618 | | |
619 | | // base64 and fullword together is not implemented. |
620 | 0 | if (modifier.flags & STRING_FLAGS_FULL_WORD && |
621 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
622 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
623 | 0 | { |
624 | 0 | yr_compiler_set_error_extra_info( |
625 | 0 | compiler, |
626 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
627 | 0 | ? "invalid modifier combination: base64 fullword" |
628 | 0 | : "invalid modifier combination: base64wide fullword"); |
629 | 0 | return ERROR_INVALID_MODIFIER; |
630 | 0 | } |
631 | | |
632 | | // base64 and xor together is not implemented. |
633 | 0 | if (modifier.flags & STRING_FLAGS_XOR && |
634 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
635 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
636 | 0 | { |
637 | 0 | yr_compiler_set_error_extra_info( |
638 | 0 | compiler, |
639 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
640 | 0 | ? "invalid modifier combination: base64 xor" |
641 | 0 | : "invalid modifier combination: base64wide xor"); |
642 | 0 | return ERROR_INVALID_MODIFIER; |
643 | 0 | } |
644 | | |
645 | 0 | return ERROR_SUCCESS; |
646 | 0 | } |
647 | | |
648 | | int yr_parser_reduce_string_declaration( |
649 | | yyscan_t yyscanner, |
650 | | YR_MODIFIER modifier, |
651 | | const char* identifier, |
652 | | SIZED_STRING* str, |
653 | | YR_ARENA_REF* string_ref) |
654 | 0 | { |
655 | 0 | int result = ERROR_SUCCESS; |
656 | 0 | int min_atom_quality = YR_MAX_ATOM_QUALITY; |
657 | 0 | int atom_quality; |
658 | |
|
659 | 0 | char message[512]; |
660 | |
|
661 | 0 | int32_t min_gap = 0; |
662 | 0 | int32_t max_gap = 0; |
663 | |
|
664 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
665 | |
|
666 | 0 | RE_AST* re_ast = NULL; |
667 | 0 | RE_AST* remainder_re_ast = NULL; |
668 | 0 | RE_ERROR re_error; |
669 | |
|
670 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
671 | 0 | compiler, compiler->current_rule_idx); |
672 | | |
673 | | // Determine if a string with the same identifier was already defined |
674 | | // by searching for the identifier in strings_table. |
675 | 0 | uint32_t string_idx = yr_hash_table_lookup_uint32( |
676 | 0 | compiler->strings_table, identifier, NULL); |
677 | | |
678 | | // The string was already defined, return an error. |
679 | 0 | if (string_idx != UINT32_MAX) |
680 | 0 | { |
681 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
682 | 0 | return ERROR_DUPLICATED_STRING_IDENTIFIER; |
683 | 0 | } |
684 | | |
685 | | // Empty strings are not allowed. |
686 | 0 | if (str->length == 0) |
687 | 0 | { |
688 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
689 | 0 | return ERROR_EMPTY_STRING; |
690 | 0 | } |
691 | | |
692 | 0 | if (str->flags & SIZED_STRING_FLAGS_NO_CASE) |
693 | 0 | modifier.flags |= STRING_FLAGS_NO_CASE; |
694 | |
|
695 | 0 | if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) |
696 | 0 | modifier.flags |= STRING_FLAGS_DOT_ALL; |
697 | | |
698 | | // Hex strings are always handled as DOT_ALL regexps. |
699 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL) |
700 | 0 | modifier.flags |= STRING_FLAGS_DOT_ALL; |
701 | |
|
702 | 0 | if (!(modifier.flags & STRING_FLAGS_WIDE) && |
703 | 0 | !(modifier.flags & STRING_FLAGS_BASE64 || |
704 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
705 | 0 | { |
706 | 0 | modifier.flags |= STRING_FLAGS_ASCII; |
707 | 0 | } |
708 | | |
709 | | // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding |
710 | | // a single match for the string is enough. This is true in |
711 | | // most cases, except when the string count (#) and string offset (@) |
712 | | // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH |
713 | | // initially, and unmarked later if required. |
714 | 0 | modifier.flags |= STRING_FLAGS_SINGLE_MATCH; |
715 | | |
716 | | // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't |
717 | | // need to be searched all over the file because the user is using the |
718 | | // "at" operator. The string must be searched at a fixed offset in the |
719 | | // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially, |
720 | | // and unmarked later if required. |
721 | 0 | modifier.flags |= STRING_FLAGS_FIXED_OFFSET; |
722 | | |
723 | | // If string identifier is $ this is an anonymous string, if not add the |
724 | | // identifier to strings_table. |
725 | 0 | if (strcmp(identifier, "$") == 0) |
726 | 0 | { |
727 | 0 | modifier.flags |= STRING_FLAGS_ANONYMOUS; |
728 | 0 | } |
729 | 0 | else |
730 | 0 | { |
731 | 0 | FAIL_ON_ERROR(yr_hash_table_add_uint32( |
732 | 0 | compiler->strings_table, |
733 | 0 | identifier, |
734 | 0 | NULL, |
735 | 0 | compiler->current_string_idx)); |
736 | 0 | } |
737 | | |
738 | | // Make sure that the the string does not have an invalid combination of |
739 | | // modifiers. |
740 | 0 | FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier)); |
741 | |
|
742 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL || |
743 | 0 | modifier.flags & STRING_FLAGS_REGEXP || |
744 | 0 | modifier.flags & STRING_FLAGS_BASE64 || |
745 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE) |
746 | 0 | { |
747 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL) |
748 | 0 | result = yr_re_parse_hex(str->c_string, &re_ast, &re_error); |
749 | 0 | else if (modifier.flags & STRING_FLAGS_REGEXP) |
750 | 0 | result = yr_re_parse(str->c_string, &re_ast, &re_error); |
751 | 0 | else |
752 | 0 | result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error); |
753 | |
|
754 | 0 | if (result != ERROR_SUCCESS) |
755 | 0 | { |
756 | 0 | snprintf( |
757 | 0 | message, |
758 | 0 | sizeof(message), |
759 | 0 | "invalid %s \"%s\": %s", |
760 | 0 | (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string" |
761 | 0 | : "regular expression", |
762 | 0 | identifier, |
763 | 0 | re_error.message); |
764 | |
|
765 | 0 | yr_compiler_set_error_extra_info(compiler, message); |
766 | 0 | goto _exit; |
767 | 0 | } |
768 | | |
769 | 0 | if (re_ast->flags & RE_FLAGS_FAST_REGEXP) |
770 | 0 | modifier.flags |= STRING_FLAGS_FAST_REGEXP; |
771 | |
|
772 | 0 | if (re_ast->flags & RE_FLAGS_GREEDY) |
773 | 0 | modifier.flags |= STRING_FLAGS_GREEDY_REGEXP; |
774 | | |
775 | | // Regular expressions in the strings section can't mix greedy and |
776 | | // ungreedy quantifiers like .* and .*?. That's because these regular |
777 | | // expressions can be matched forwards and/or backwards depending on the |
778 | | // atom found, and we need the regexp to be all-greedy or all-ungreedy to |
779 | | // be able to properly calculate the length of the match. |
780 | |
|
781 | 0 | if ((re_ast->flags & RE_FLAGS_GREEDY) && |
782 | 0 | (re_ast->flags & RE_FLAGS_UNGREEDY)) |
783 | 0 | { |
784 | 0 | result = ERROR_INVALID_REGULAR_EXPRESSION; |
785 | |
|
786 | 0 | yr_compiler_set_error_extra_info( |
787 | 0 | compiler, |
788 | 0 | "greedy and ungreedy quantifiers can't be mixed in a regular " |
789 | 0 | "expression"); |
790 | |
|
791 | 0 | goto _exit; |
792 | 0 | } |
793 | | |
794 | 0 | if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast)) |
795 | 0 | { |
796 | 0 | yywarning( |
797 | 0 | yyscanner, |
798 | 0 | "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} " |
799 | 0 | "with a reasonable value for N", |
800 | 0 | identifier); |
801 | 0 | } |
802 | |
|
803 | 0 | if (compiler->re_ast_callback != NULL) |
804 | 0 | { |
805 | 0 | compiler->re_ast_callback( |
806 | 0 | current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data); |
807 | 0 | } |
808 | |
|
809 | 0 | *string_ref = YR_ARENA_NULL_REF; |
810 | |
|
811 | 0 | while (re_ast != NULL) |
812 | 0 | { |
813 | 0 | YR_ARENA_REF ref; |
814 | |
|
815 | 0 | uint32_t prev_string_idx = compiler->current_string_idx - 1; |
816 | |
|
817 | 0 | int32_t prev_min_gap = min_gap; |
818 | 0 | int32_t prev_max_gap = max_gap; |
819 | |
|
820 | 0 | result = yr_re_ast_split_at_chaining_point( |
821 | 0 | re_ast, &remainder_re_ast, &min_gap, &max_gap); |
822 | |
|
823 | 0 | if (result != ERROR_SUCCESS) |
824 | 0 | goto _exit; |
825 | | |
826 | 0 | result = _yr_parser_write_string( |
827 | 0 | identifier, |
828 | 0 | modifier, |
829 | 0 | compiler, |
830 | 0 | NULL, |
831 | 0 | re_ast, |
832 | 0 | &ref, |
833 | 0 | &atom_quality, |
834 | 0 | ¤t_rule->num_atoms); |
835 | |
|
836 | 0 | if (result != ERROR_SUCCESS) |
837 | 0 | goto _exit; |
838 | | |
839 | 0 | if (atom_quality < min_atom_quality) |
840 | 0 | min_atom_quality = atom_quality; |
841 | |
|
842 | 0 | if (YR_ARENA_IS_NULL_REF(*string_ref)) |
843 | 0 | { |
844 | | // This is the first string in the chain, the string reference |
845 | | // returned by this function must point to this string. |
846 | 0 | *string_ref = ref; |
847 | 0 | } |
848 | 0 | else |
849 | 0 | { |
850 | | // This is not the first string in the chain, set the appropriate |
851 | | // flags and fill the chained_to, chain_gap_min and chain_gap_max |
852 | | // fields. |
853 | 0 | YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr( |
854 | 0 | compiler->arena, |
855 | 0 | YR_STRINGS_TABLE, |
856 | 0 | prev_string_idx * sizeof(YR_STRING)); |
857 | |
|
858 | 0 | YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr( |
859 | 0 | compiler->arena, &ref); |
860 | |
|
861 | 0 | new_string->chained_to = prev_string; |
862 | 0 | new_string->chain_gap_min = prev_min_gap; |
863 | 0 | new_string->chain_gap_max = prev_max_gap; |
864 | | |
865 | | // A string chained to another one can't have a fixed offset, only the |
866 | | // head of the string chain can have a fixed offset. |
867 | 0 | new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
868 | | |
869 | | // There is a previous string, but that string wasn't marked as part |
870 | | // of a chain because we can't do that until knowing there will be |
871 | | // another string, let's flag it now the we know. |
872 | 0 | prev_string->flags |= STRING_FLAGS_CHAIN_PART; |
873 | | |
874 | | // There is a previous string, so this string is part of a chain, but |
875 | | // there will be no more strings because there are no more AST to |
876 | | // split, which means that this is the chain's tail. |
877 | 0 | if (remainder_re_ast == NULL) |
878 | 0 | new_string->flags |= STRING_FLAGS_CHAIN_PART | |
879 | 0 | STRING_FLAGS_CHAIN_TAIL; |
880 | 0 | } |
881 | |
|
882 | 0 | yr_re_ast_destroy(re_ast); |
883 | 0 | re_ast = remainder_re_ast; |
884 | 0 | } |
885 | 0 | } |
886 | 0 | else // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or |
887 | | // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE |
888 | 0 | { |
889 | 0 | result = _yr_parser_write_string( |
890 | 0 | identifier, |
891 | 0 | modifier, |
892 | 0 | compiler, |
893 | 0 | str, |
894 | 0 | NULL, |
895 | 0 | string_ref, |
896 | 0 | &min_atom_quality, |
897 | 0 | ¤t_rule->num_atoms); |
898 | |
|
899 | 0 | if (result != ERROR_SUCCESS) |
900 | 0 | goto _exit; |
901 | 0 | } |
902 | | |
903 | 0 | if (min_atom_quality < compiler->atoms_config.quality_warning_threshold) |
904 | 0 | { |
905 | 0 | yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier); |
906 | 0 | } |
907 | |
|
908 | 0 | _exit: |
909 | |
|
910 | 0 | if (re_ast != NULL) |
911 | 0 | yr_re_ast_destroy(re_ast); |
912 | |
|
913 | 0 | if (remainder_re_ast != NULL) |
914 | 0 | yr_re_ast_destroy(remainder_re_ast); |
915 | |
|
916 | 0 | return result; |
917 | 0 | } |
918 | | |
919 | | static int wildcard_iterator( |
920 | | void* prefix, |
921 | | size_t prefix_len, |
922 | | void* _value, |
923 | | void* data) |
924 | 0 | { |
925 | 0 | const char* identifier = (const char*) data; |
926 | | |
927 | | // If the identifier is prefixed by prefix, then it matches the wildcard. |
928 | 0 | if (!strncmp(prefix, identifier, prefix_len)) |
929 | 0 | return ERROR_IDENTIFIER_MATCHES_WILDCARD; |
930 | | |
931 | 0 | return ERROR_SUCCESS; |
932 | 0 | } |
933 | | |
934 | | int yr_parser_reduce_rule_declaration_phase_1( |
935 | | yyscan_t yyscanner, |
936 | | int32_t flags, |
937 | | const char* identifier, |
938 | | YR_ARENA_REF* rule_ref) |
939 | 2 | { |
940 | 2 | int result; |
941 | 2 | YR_FIXUP* fixup; |
942 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
943 | | |
944 | 2 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
945 | 2 | compiler->arena, |
946 | 2 | YR_NAMESPACES_TABLE, |
947 | 2 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
948 | | |
949 | 2 | if (yr_hash_table_lookup_uint32( |
950 | 2 | compiler->rules_table, identifier, ns->name) != UINT32_MAX || |
951 | 2 | yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL) |
952 | 0 | { |
953 | | // A rule or variable with the same identifier already exists, return the |
954 | | // appropriate error. |
955 | |
|
956 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
957 | 0 | return ERROR_DUPLICATED_IDENTIFIER; |
958 | 0 | } |
959 | | |
960 | | // Iterate over all identifiers in wildcard_identifiers_table, and check if |
961 | | // any of them are a prefix of the identifier being declared. If so, return |
962 | | // ERROR_IDENTIFIER_MATCHES_WILDCARD. |
963 | 2 | result = yr_hash_table_iterate( |
964 | 2 | compiler->wildcard_identifiers_table, |
965 | 2 | ns->name, |
966 | 2 | wildcard_iterator, |
967 | 2 | (void*) identifier); |
968 | | |
969 | 2 | if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD) |
970 | 0 | { |
971 | | // This rule matches an existing wildcard rule set. |
972 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
973 | 0 | } |
974 | | |
975 | 2 | FAIL_ON_ERROR(result); |
976 | | |
977 | 2 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
978 | 2 | compiler->arena, |
979 | 2 | YR_RULES_TABLE, |
980 | 2 | sizeof(YR_RULE), |
981 | 2 | rule_ref, |
982 | 2 | offsetof(YR_RULE, identifier), |
983 | 2 | offsetof(YR_RULE, tags), |
984 | 2 | offsetof(YR_RULE, strings), |
985 | 2 | offsetof(YR_RULE, metas), |
986 | 2 | offsetof(YR_RULE, ns), |
987 | 2 | EOL)); |
988 | | |
989 | 2 | YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref); |
990 | | |
991 | 2 | YR_ARENA_REF ref; |
992 | | |
993 | 2 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
994 | | |
995 | 2 | rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
996 | 2 | rule->flags = flags; |
997 | 2 | rule->ns = ns; |
998 | 2 | rule->num_atoms = 0; |
999 | | |
1000 | 2 | YR_ARENA_REF jmp_offset_ref; |
1001 | | |
1002 | | // We are starting to parse a new rule, set current_rule_idx accordingly. |
1003 | 2 | compiler->current_rule_idx = compiler->next_rule_idx; |
1004 | 2 | compiler->next_rule_idx++; |
1005 | | |
1006 | | // The OP_INIT_RULE instruction behaves like a jump. When the rule is |
1007 | | // disabled it skips over the rule's code and go straight to the next rule's |
1008 | | // code. The jmp_offset_ref variable points to the jump's offset. The offset |
1009 | | // is set to 0 as we don't know the jump target yet. When we finish |
1010 | | // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2 |
1011 | | // the jump offset is set to its final value. |
1012 | | |
1013 | 2 | FAIL_ON_ERROR(yr_parser_emit_with_arg_int32( |
1014 | 2 | yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref)); |
1015 | | |
1016 | 2 | FAIL_ON_ERROR(yr_arena_write_data( |
1017 | 2 | compiler->arena, |
1018 | 2 | YR_CODE_SECTION, |
1019 | 2 | &compiler->current_rule_idx, |
1020 | 2 | sizeof(compiler->current_rule_idx), |
1021 | 2 | NULL)); |
1022 | | |
1023 | | // Create a fixup entry for the jump and push it in the stack |
1024 | 2 | fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP)); |
1025 | | |
1026 | 2 | if (fixup == NULL) |
1027 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1028 | | |
1029 | 2 | fixup->ref = jmp_offset_ref; |
1030 | 2 | fixup->next = compiler->fixup_stack_head; |
1031 | 2 | compiler->fixup_stack_head = fixup; |
1032 | | |
1033 | | // Clean strings_table as we are starting to parse a new rule. |
1034 | 2 | yr_hash_table_clean(compiler->strings_table, NULL); |
1035 | | |
1036 | 2 | FAIL_ON_ERROR(yr_hash_table_add_uint32( |
1037 | 2 | compiler->rules_table, identifier, ns->name, compiler->current_rule_idx)); |
1038 | | |
1039 | 2 | return ERROR_SUCCESS; |
1040 | 2 | } |
1041 | | |
1042 | | int yr_parser_reduce_rule_declaration_phase_2( |
1043 | | yyscan_t yyscanner, |
1044 | | YR_ARENA_REF* rule_ref) |
1045 | 2 | { |
1046 | 2 | uint32_t max_strings_per_rule; |
1047 | 2 | uint32_t strings_in_rule = 0; |
1048 | | |
1049 | 2 | YR_FIXUP* fixup; |
1050 | 2 | YR_STRING* string; |
1051 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1052 | | |
1053 | 2 | yr_get_configuration_uint32( |
1054 | 2 | YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule); |
1055 | | |
1056 | 2 | YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref); |
1057 | | |
1058 | | // Show warning if the rule is generating too many atoms. The warning is |
1059 | | // shown if the number of atoms is greater than 20 times the maximum number |
1060 | | // of strings allowed for a rule, as 20 is minimum number of atoms generated |
1061 | | // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously. |
1062 | | |
1063 | 2 | if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD) |
1064 | 0 | { |
1065 | 0 | yywarning(yyscanner, "rule is slowing down scanning"); |
1066 | 0 | } |
1067 | | |
1068 | 2 | yr_rule_strings_foreach(rule, string) |
1069 | 0 | { |
1070 | | // Only the heading fragment in a chain of strings (the one with |
1071 | | // chained_to == NULL) must be referenced. All other fragments |
1072 | | // are never marked as referenced. |
1073 | |
|
1074 | 0 | if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL) |
1075 | 0 | { |
1076 | 0 | yr_compiler_set_error_extra_info( |
1077 | 0 | compiler, string->identifier) return ERROR_UNREFERENCED_STRING; |
1078 | 0 | } |
1079 | | |
1080 | 0 | strings_in_rule++; |
1081 | |
|
1082 | 0 | if (strings_in_rule > max_strings_per_rule) |
1083 | 0 | { |
1084 | 0 | yr_compiler_set_error_extra_info( |
1085 | 0 | compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS; |
1086 | 0 | } |
1087 | 0 | } |
1088 | | |
1089 | 2 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
1090 | 2 | yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL)); |
1091 | | |
1092 | 2 | fixup = compiler->fixup_stack_head; |
1093 | | |
1094 | 2 | int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr( |
1095 | 2 | compiler->arena, &fixup->ref); |
1096 | | |
1097 | 2 | int32_t jmp_offset = yr_arena_get_current_offset( |
1098 | 2 | compiler->arena, YR_CODE_SECTION) - |
1099 | 2 | fixup->ref.offset + 1; |
1100 | | |
1101 | 2 | memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset)); |
1102 | | |
1103 | | // Remove fixup from the stack. |
1104 | 2 | compiler->fixup_stack_head = fixup->next; |
1105 | 2 | yr_free(fixup); |
1106 | | |
1107 | | // We have finished parsing the current rule set current_rule_idx to |
1108 | | // UINT32_MAX indicating that we are not currently parsing a rule. |
1109 | 2 | compiler->current_rule_idx = UINT32_MAX; |
1110 | | |
1111 | 2 | return ERROR_SUCCESS; |
1112 | 2 | } |
1113 | | |
1114 | | int yr_parser_reduce_string_identifier( |
1115 | | yyscan_t yyscanner, |
1116 | | const char* identifier, |
1117 | | uint8_t instruction, |
1118 | | uint64_t at_offset) |
1119 | 0 | { |
1120 | 0 | YR_STRING* string; |
1121 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1122 | |
|
1123 | 0 | if (strcmp(identifier, "$") == 0) // is an anonymous string ? |
1124 | 0 | { |
1125 | 0 | if (compiler->loop_for_of_var_index >= 0) // inside a loop ? |
1126 | 0 | { |
1127 | 0 | yr_parser_emit_with_arg( |
1128 | 0 | yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL); |
1129 | |
|
1130 | 0 | yr_parser_emit(yyscanner, instruction, NULL); |
1131 | |
|
1132 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
1133 | 0 | compiler, compiler->current_rule_idx); |
1134 | |
|
1135 | 0 | yr_rule_strings_foreach(current_rule, string) |
1136 | 0 | { |
1137 | 0 | if (instruction != OP_FOUND) |
1138 | 0 | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1139 | |
|
1140 | 0 | if (instruction == OP_FOUND_AT) |
1141 | 0 | { |
1142 | | // Avoid overwriting any previous fixed offset |
1143 | 0 | if (string->fixed_offset == YR_UNDEFINED) |
1144 | 0 | string->fixed_offset = at_offset; |
1145 | | |
1146 | | // If a previous fixed offset was different, disable |
1147 | | // the STRING_GFLAGS_FIXED_OFFSET flag because we only |
1148 | | // have room to store a single fixed offset value |
1149 | 0 | if (string->fixed_offset != at_offset) |
1150 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1151 | 0 | } |
1152 | 0 | else |
1153 | 0 | { |
1154 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1155 | 0 | } |
1156 | 0 | } |
1157 | 0 | } |
1158 | 0 | else |
1159 | 0 | { |
1160 | | // Anonymous strings not allowed outside of a loop |
1161 | 0 | return ERROR_MISPLACED_ANONYMOUS_STRING; |
1162 | 0 | } |
1163 | 0 | } |
1164 | 0 | else |
1165 | 0 | { |
1166 | 0 | FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string)); |
1167 | |
|
1168 | 0 | FAIL_ON_ERROR( |
1169 | 0 | yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL)); |
1170 | |
|
1171 | 0 | if (instruction != OP_FOUND) |
1172 | 0 | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1173 | |
|
1174 | 0 | if (instruction == OP_FOUND_AT) |
1175 | 0 | { |
1176 | | // Avoid overwriting any previous fixed offset |
1177 | |
|
1178 | 0 | if (string->fixed_offset == YR_UNDEFINED) |
1179 | 0 | string->fixed_offset = at_offset; |
1180 | | |
1181 | | // If a previous fixed offset was different, disable |
1182 | | // the STRING_GFLAGS_FIXED_OFFSET flag because we only |
1183 | | // have room to store a single fixed offset value |
1184 | |
|
1185 | 0 | if (string->fixed_offset == YR_UNDEFINED || |
1186 | 0 | string->fixed_offset != at_offset) |
1187 | 0 | { |
1188 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1189 | 0 | } |
1190 | 0 | } |
1191 | 0 | else |
1192 | 0 | { |
1193 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1194 | 0 | } |
1195 | |
|
1196 | 0 | FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL)); |
1197 | |
|
1198 | 0 | string->flags |= STRING_FLAGS_REFERENCED; |
1199 | 0 | } |
1200 | | |
1201 | 0 | return ERROR_SUCCESS; |
1202 | 0 | } |
1203 | | |
1204 | | int yr_parser_reduce_meta_declaration( |
1205 | | yyscan_t yyscanner, |
1206 | | int32_t type, |
1207 | | const char* identifier, |
1208 | | const char* string, |
1209 | | int64_t integer, |
1210 | | YR_ARENA_REF* meta_ref) |
1211 | 0 | { |
1212 | 0 | YR_ARENA_REF ref; |
1213 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1214 | |
|
1215 | 0 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
1216 | 0 | compiler->arena, |
1217 | 0 | YR_METAS_TABLE, |
1218 | 0 | sizeof(YR_META), |
1219 | 0 | meta_ref, |
1220 | 0 | offsetof(YR_META, identifier), |
1221 | 0 | offsetof(YR_META, string), |
1222 | 0 | EOL)); |
1223 | |
|
1224 | 0 | YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref); |
1225 | |
|
1226 | 0 | meta->type = type; |
1227 | 0 | meta->integer = integer; |
1228 | |
|
1229 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
1230 | |
|
1231 | 0 | meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1232 | |
|
1233 | 0 | if (string != NULL) |
1234 | 0 | { |
1235 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref)); |
1236 | |
|
1237 | 0 | meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1238 | 0 | } |
1239 | 0 | else |
1240 | 0 | { |
1241 | 0 | meta->string = NULL; |
1242 | 0 | } |
1243 | | |
1244 | 0 | compiler->current_meta_idx++; |
1245 | |
|
1246 | 0 | return ERROR_SUCCESS; |
1247 | 0 | } |
1248 | | |
1249 | | static int _yr_parser_valid_module_name(SIZED_STRING* module_name) |
1250 | 2 | { |
1251 | 2 | if (module_name->length == 0) |
1252 | 0 | return false; |
1253 | | |
1254 | 2 | if (strlen(module_name->c_string) != module_name->length) |
1255 | 0 | return false; |
1256 | | |
1257 | 2 | return true; |
1258 | 2 | } |
1259 | | |
1260 | | int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name) |
1261 | 2 | { |
1262 | 2 | int result; |
1263 | | |
1264 | 2 | YR_ARENA_REF ref; |
1265 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1266 | 2 | YR_OBJECT* module_structure; |
1267 | | |
1268 | 2 | if (!_yr_parser_valid_module_name(module_name)) |
1269 | 0 | { |
1270 | 0 | yr_compiler_set_error_extra_info(compiler, module_name->c_string); |
1271 | |
|
1272 | 0 | return ERROR_INVALID_MODULE_NAME; |
1273 | 0 | } |
1274 | | |
1275 | 2 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
1276 | 2 | compiler->arena, |
1277 | 2 | YR_NAMESPACES_TABLE, |
1278 | 2 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
1279 | | |
1280 | 2 | module_structure = (YR_OBJECT*) yr_hash_table_lookup( |
1281 | 2 | compiler->objects_table, module_name->c_string, ns->name); |
1282 | | |
1283 | | // if module already imported, do nothing |
1284 | | |
1285 | 2 | if (module_structure != NULL) |
1286 | 0 | return ERROR_SUCCESS; |
1287 | | |
1288 | 2 | FAIL_ON_ERROR(yr_object_create( |
1289 | 2 | OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure)); |
1290 | | |
1291 | 2 | FAIL_ON_ERROR(yr_hash_table_add( |
1292 | 2 | compiler->objects_table, |
1293 | 2 | module_name->c_string, |
1294 | 2 | ns->name, |
1295 | 2 | module_structure)); |
1296 | | |
1297 | 2 | result = yr_modules_do_declarations(module_name->c_string, module_structure); |
1298 | | |
1299 | 2 | if (result == ERROR_UNKNOWN_MODULE) |
1300 | 0 | yr_compiler_set_error_extra_info(compiler, module_name->c_string); |
1301 | | |
1302 | 2 | if (result != ERROR_SUCCESS) |
1303 | 0 | return result; |
1304 | | |
1305 | 2 | FAIL_ON_ERROR( |
1306 | 2 | _yr_compiler_store_string(compiler, module_name->c_string, &ref)); |
1307 | | |
1308 | 2 | FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc( |
1309 | 2 | yyscanner, |
1310 | 2 | OP_IMPORT, |
1311 | 2 | yr_arena_ref_to_ptr(compiler->arena, &ref), |
1312 | 2 | NULL, |
1313 | 2 | NULL)); |
1314 | | |
1315 | 2 | return ERROR_SUCCESS; |
1316 | 2 | } |
1317 | | |
1318 | | static int _yr_parser_operator_to_opcode(const char* op, int expression_type) |
1319 | 2 | { |
1320 | 2 | int opcode = 0; |
1321 | | |
1322 | 2 | switch (expression_type) |
1323 | 2 | { |
1324 | 0 | case EXPRESSION_TYPE_INTEGER: |
1325 | 0 | opcode = OP_INT_BEGIN; |
1326 | 0 | break; |
1327 | 0 | case EXPRESSION_TYPE_FLOAT: |
1328 | 0 | opcode = OP_DBL_BEGIN; |
1329 | 0 | break; |
1330 | 2 | case EXPRESSION_TYPE_STRING: |
1331 | 2 | opcode = OP_STR_BEGIN; |
1332 | 2 | break; |
1333 | 0 | default: |
1334 | 0 | assert(false); |
1335 | 2 | } |
1336 | | |
1337 | 2 | if (op[0] == '<') |
1338 | 0 | { |
1339 | 0 | if (op[1] == '=') |
1340 | 0 | opcode += _OP_LE; |
1341 | 0 | else |
1342 | 0 | opcode += _OP_LT; |
1343 | 0 | } |
1344 | 2 | else if (op[0] == '>') |
1345 | 0 | { |
1346 | 0 | if (op[1] == '=') |
1347 | 0 | opcode += _OP_GE; |
1348 | 0 | else |
1349 | 0 | opcode += _OP_GT; |
1350 | 0 | } |
1351 | 2 | else if (op[1] == '=') |
1352 | 2 | { |
1353 | 2 | if (op[0] == '=') |
1354 | 2 | opcode += _OP_EQ; |
1355 | 0 | else |
1356 | 0 | opcode += _OP_NEQ; |
1357 | 2 | } |
1358 | 0 | else if (op[0] == '+') |
1359 | 0 | { |
1360 | 0 | opcode += _OP_ADD; |
1361 | 0 | } |
1362 | 0 | else if (op[0] == '-') |
1363 | 0 | { |
1364 | 0 | opcode += _OP_SUB; |
1365 | 0 | } |
1366 | 0 | else if (op[0] == '*') |
1367 | 0 | { |
1368 | 0 | opcode += _OP_MUL; |
1369 | 0 | } |
1370 | 0 | else if (op[0] == '\\') |
1371 | 0 | { |
1372 | 0 | opcode += _OP_DIV; |
1373 | 0 | } |
1374 | | |
1375 | 2 | if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode)) |
1376 | 2 | { |
1377 | 2 | return opcode; |
1378 | 2 | } |
1379 | | |
1380 | 0 | return OP_ERROR; |
1381 | 2 | } |
1382 | | |
1383 | | int yr_parser_reduce_operation( |
1384 | | yyscan_t yyscanner, |
1385 | | const char* op, |
1386 | | YR_EXPRESSION left_operand, |
1387 | | YR_EXPRESSION right_operand) |
1388 | 2 | { |
1389 | 2 | int expression_type; |
1390 | | |
1391 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1392 | | |
1393 | 2 | if ((left_operand.type == EXPRESSION_TYPE_INTEGER || |
1394 | 2 | left_operand.type == EXPRESSION_TYPE_FLOAT) && |
1395 | 2 | (right_operand.type == EXPRESSION_TYPE_INTEGER || |
1396 | 0 | right_operand.type == EXPRESSION_TYPE_FLOAT)) |
1397 | 0 | { |
1398 | 0 | if (left_operand.type != right_operand.type) |
1399 | 0 | { |
1400 | | // One operand is double and the other is integer, |
1401 | | // cast the integer to double |
1402 | |
|
1403 | 0 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
1404 | 0 | yyscanner, |
1405 | 0 | OP_INT_TO_DBL, |
1406 | 0 | (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1, |
1407 | 0 | NULL, |
1408 | 0 | NULL)); |
1409 | 0 | } |
1410 | | |
1411 | 0 | expression_type = EXPRESSION_TYPE_FLOAT; |
1412 | |
|
1413 | 0 | if (left_operand.type == EXPRESSION_TYPE_INTEGER && |
1414 | 0 | right_operand.type == EXPRESSION_TYPE_INTEGER) |
1415 | 0 | { |
1416 | 0 | expression_type = EXPRESSION_TYPE_INTEGER; |
1417 | 0 | } |
1418 | |
|
1419 | 0 | FAIL_ON_ERROR(yr_parser_emit( |
1420 | 0 | yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL)); |
1421 | 0 | } |
1422 | 2 | else if ( |
1423 | 2 | left_operand.type == EXPRESSION_TYPE_STRING && |
1424 | 2 | right_operand.type == EXPRESSION_TYPE_STRING) |
1425 | 2 | { |
1426 | 2 | int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING); |
1427 | | |
1428 | 2 | if (opcode != OP_ERROR) |
1429 | 2 | { |
1430 | 2 | FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL)); |
1431 | 2 | } |
1432 | 0 | else |
1433 | 0 | { |
1434 | 0 | yr_compiler_set_error_extra_info_fmt( |
1435 | 0 | compiler, "strings don't support \"%s\" operation", op); |
1436 | |
|
1437 | 0 | return ERROR_WRONG_TYPE; |
1438 | 0 | } |
1439 | 2 | } |
1440 | 0 | else |
1441 | 0 | { |
1442 | 0 | yr_compiler_set_error_extra_info(compiler, "type mismatch"); |
1443 | |
|
1444 | 0 | return ERROR_WRONG_TYPE; |
1445 | 0 | } |
1446 | | |
1447 | 2 | return ERROR_SUCCESS; |
1448 | 2 | } |