/src/yara/libyara/parser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (c) 2013. The YARA Authors. All Rights Reserved. |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without modification, |
5 | | are permitted provided that the following conditions are met: |
6 | | |
7 | | 1. Redistributions of source code must retain the above copyright notice, this |
8 | | list of conditions and the following disclaimer. |
9 | | |
10 | | 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | this list of conditions and the following disclaimer in the documentation and/or |
12 | | other materials provided with the distribution. |
13 | | |
14 | | 3. Neither the name of the copyright holder nor the names of its contributors |
15 | | may be used to endorse or promote products derived from this software without |
16 | | specific prior written permission. |
17 | | |
18 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
19 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR |
22 | | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
23 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
24 | | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
25 | | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
27 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #include <limits.h> |
31 | | #include <stddef.h> |
32 | | #include <string.h> |
33 | | #include <yara/ahocorasick.h> |
34 | | #include <yara/arena.h> |
35 | | #include <yara/base64.h> |
36 | | #include <yara/error.h> |
37 | | #include <yara/exec.h> |
38 | | #include <yara/integers.h> |
39 | | #include <yara/mem.h> |
40 | | #include <yara/modules.h> |
41 | | #include <yara/object.h> |
42 | | #include <yara/parser.h> |
43 | | #include <yara/re.h> |
44 | | #include <yara/strutils.h> |
45 | | #include <yara/utils.h> |
46 | | |
47 | | #define todigit(x) \ |
48 | | ((x) >= 'A' && (x) <= 'F') ? ((uint8_t) (x - 'A' + 10)) \ |
49 | | : ((uint8_t) (x - '0')) |
50 | | |
51 | | int yr_parser_emit( |
52 | | yyscan_t yyscanner, |
53 | | uint8_t instruction, |
54 | | YR_ARENA_REF* instruction_ref) |
55 | 4 | { |
56 | 4 | return yr_arena_write_data( |
57 | 4 | yyget_extra(yyscanner)->arena, |
58 | 4 | YR_CODE_SECTION, |
59 | 4 | &instruction, |
60 | 4 | sizeof(uint8_t), |
61 | 4 | instruction_ref); |
62 | 4 | } |
63 | | |
64 | | int yr_parser_emit_with_arg_double( |
65 | | yyscan_t yyscanner, |
66 | | uint8_t instruction, |
67 | | double argument, |
68 | | YR_ARENA_REF* instruction_ref, |
69 | | YR_ARENA_REF* argument_ref) |
70 | 0 | { |
71 | 0 | int result = yr_arena_write_data( |
72 | 0 | yyget_extra(yyscanner)->arena, |
73 | 0 | YR_CODE_SECTION, |
74 | 0 | &instruction, |
75 | 0 | sizeof(uint8_t), |
76 | 0 | instruction_ref); |
77 | |
|
78 | 0 | if (result == ERROR_SUCCESS) |
79 | 0 | result = yr_arena_write_data( |
80 | 0 | yyget_extra(yyscanner)->arena, |
81 | 0 | YR_CODE_SECTION, |
82 | 0 | &argument, |
83 | 0 | sizeof(double), |
84 | 0 | argument_ref); |
85 | |
|
86 | 0 | return result; |
87 | 0 | } |
88 | | |
89 | | int yr_parser_emit_with_arg_int32( |
90 | | yyscan_t yyscanner, |
91 | | uint8_t instruction, |
92 | | int32_t argument, |
93 | | YR_ARENA_REF* instruction_ref, |
94 | | YR_ARENA_REF* argument_ref) |
95 | 2 | { |
96 | 2 | int result = yr_arena_write_data( |
97 | 2 | yyget_extra(yyscanner)->arena, |
98 | 2 | YR_CODE_SECTION, |
99 | 2 | &instruction, |
100 | 2 | sizeof(uint8_t), |
101 | 2 | instruction_ref); |
102 | | |
103 | 2 | if (result == ERROR_SUCCESS) |
104 | 2 | result = yr_arena_write_data( |
105 | 2 | yyget_extra(yyscanner)->arena, |
106 | 2 | YR_CODE_SECTION, |
107 | 2 | &argument, |
108 | 2 | sizeof(int32_t), |
109 | 2 | argument_ref); |
110 | | |
111 | 2 | return result; |
112 | 2 | } |
113 | | |
114 | | int yr_parser_emit_with_arg( |
115 | | yyscan_t yyscanner, |
116 | | uint8_t instruction, |
117 | | int64_t argument, |
118 | | YR_ARENA_REF* instruction_ref, |
119 | | YR_ARENA_REF* argument_ref) |
120 | 2 | { |
121 | 2 | int result = yr_arena_write_data( |
122 | 2 | yyget_extra(yyscanner)->arena, |
123 | 2 | YR_CODE_SECTION, |
124 | 2 | &instruction, |
125 | 2 | sizeof(uint8_t), |
126 | 2 | instruction_ref); |
127 | | |
128 | 2 | if (result == ERROR_SUCCESS) |
129 | 2 | result = yr_arena_write_data( |
130 | 2 | yyget_extra(yyscanner)->arena, |
131 | 2 | YR_CODE_SECTION, |
132 | 2 | &argument, |
133 | 2 | sizeof(int64_t), |
134 | 2 | argument_ref); |
135 | | |
136 | 2 | return result; |
137 | 2 | } |
138 | | |
139 | | int yr_parser_emit_with_arg_reloc( |
140 | | yyscan_t yyscanner, |
141 | | uint8_t instruction, |
142 | | void* argument, |
143 | | YR_ARENA_REF* instruction_ref, |
144 | | YR_ARENA_REF* argument_ref) |
145 | 8 | { |
146 | 8 | YR_ARENA_REF ref = YR_ARENA_NULL_REF; |
147 | | |
148 | 8 | DECLARE_REFERENCE(void*, ptr) arg; |
149 | | |
150 | 8 | memset(&arg, 0, sizeof(arg)); |
151 | 8 | arg.ptr = argument; |
152 | | |
153 | 8 | int result = yr_arena_write_data( |
154 | 8 | yyget_extra(yyscanner)->arena, |
155 | 8 | YR_CODE_SECTION, |
156 | 8 | &instruction, |
157 | 8 | sizeof(uint8_t), |
158 | 8 | instruction_ref); |
159 | | |
160 | 8 | if (result == ERROR_SUCCESS) |
161 | 8 | result = yr_arena_write_data( |
162 | 8 | yyget_extra(yyscanner)->arena, |
163 | 8 | YR_CODE_SECTION, |
164 | 8 | &arg, |
165 | 8 | sizeof(arg), |
166 | 8 | &ref); |
167 | | |
168 | 8 | if (result == ERROR_SUCCESS) |
169 | 8 | result = yr_arena_make_ptr_relocatable( |
170 | 8 | yyget_extra(yyscanner)->arena, YR_CODE_SECTION, ref.offset, EOL); |
171 | | |
172 | 8 | if (argument_ref != NULL) |
173 | 0 | *argument_ref = ref; |
174 | | |
175 | 8 | return result; |
176 | 8 | } |
177 | | |
178 | | int yr_parser_emit_pushes_for_strings( |
179 | | yyscan_t yyscanner, |
180 | | const char* identifier, |
181 | | int* count) |
182 | 0 | { |
183 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
184 | |
|
185 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
186 | 0 | compiler, compiler->current_rule_idx); |
187 | |
|
188 | 0 | YR_STRING* string; |
189 | |
|
190 | 0 | const char* string_identifier; |
191 | 0 | const char* target_identifier; |
192 | |
|
193 | 0 | int matching = 0; |
194 | |
|
195 | 0 | yr_rule_strings_foreach(current_rule, string) |
196 | 0 | { |
197 | | // Don't generate pushes for strings chained to another one, we are |
198 | | // only interested in non-chained strings or the head of the chain. |
199 | |
|
200 | 0 | if (string->chained_to == NULL) |
201 | 0 | { |
202 | 0 | string_identifier = string->identifier; |
203 | 0 | target_identifier = identifier; |
204 | |
|
205 | 0 | while (*target_identifier != '\0' && *string_identifier != '\0' && |
206 | 0 | *target_identifier == *string_identifier) |
207 | 0 | { |
208 | 0 | target_identifier++; |
209 | 0 | string_identifier++; |
210 | 0 | } |
211 | |
|
212 | 0 | if ((*target_identifier == '\0' && *string_identifier == '\0') || |
213 | 0 | *target_identifier == '*') |
214 | 0 | { |
215 | 0 | yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL); |
216 | |
|
217 | 0 | string->flags |= STRING_FLAGS_REFERENCED; |
218 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
219 | 0 | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
220 | 0 | matching++; |
221 | 0 | } |
222 | 0 | } |
223 | 0 | } |
224 | |
|
225 | 0 | if (count != NULL) |
226 | 0 | { |
227 | 0 | *count = matching; |
228 | 0 | } |
229 | |
|
230 | 0 | if (matching == 0) |
231 | 0 | { |
232 | 0 | yr_compiler_set_error_extra_info( |
233 | 0 | compiler, identifier) return ERROR_UNDEFINED_STRING; |
234 | 0 | } |
235 | | |
236 | 0 | return ERROR_SUCCESS; |
237 | 0 | } |
238 | | |
239 | | // Emit OP_PUSH_RULE instructions for all rules whose identifier has given |
240 | | // prefix. |
241 | | int yr_parser_emit_pushes_for_rules( |
242 | | yyscan_t yyscanner, |
243 | | const char* prefix, |
244 | | int* count) |
245 | 0 | { |
246 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
247 | | |
248 | | // Make sure the compiler is parsing a rule |
249 | 0 | assert(compiler->current_rule_idx != UINT32_MAX); |
250 | | |
251 | 0 | YR_RULE* rule; |
252 | 0 | int matching = 0; |
253 | |
|
254 | 0 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
255 | 0 | compiler->arena, |
256 | 0 | YR_NAMESPACES_TABLE, |
257 | 0 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
258 | | |
259 | | // Can't use yr_rules_foreach here as that requires the rules to have been |
260 | | // finalized (inserting a NULL rule at the end). This is done when |
261 | | // yr_compiler_get_rules() is called, which also inserts a HALT instruction |
262 | | // into the current position in the code arena. Obviously we aren't done |
263 | | // compiling the rules yet so inserting a HALT is a bad idea. To deal with |
264 | | // this I'm manually walking all the currently compiled rules (up to the |
265 | | // current rule index) and comparing identifiers to see if it is one we should |
266 | | // use. |
267 | | // |
268 | | // Further, we have to get compiler->current_rule_idx before we start because |
269 | | // if we emit an OP_PUSH_RULE |
270 | 0 | rule = yr_arena_get_ptr(compiler->arena, YR_RULES_TABLE, 0); |
271 | |
|
272 | 0 | for (uint32_t i = 0; i <= compiler->current_rule_idx; i++) |
273 | 0 | { |
274 | | // Is rule->identifier prefixed by prefix? |
275 | 0 | if (strncmp(prefix, rule->identifier, strlen(prefix)) == 0) |
276 | 0 | { |
277 | 0 | uint32_t rule_idx = yr_hash_table_lookup_uint32( |
278 | 0 | compiler->rules_table, rule->identifier, ns->name); |
279 | |
|
280 | 0 | if (rule_idx != UINT32_MAX) |
281 | 0 | { |
282 | 0 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
283 | 0 | yyscanner, OP_PUSH_RULE, rule_idx, NULL, NULL)); |
284 | 0 | matching++; |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | 0 | rule++; |
289 | 0 | } |
290 | | |
291 | 0 | if (count != NULL) |
292 | 0 | { |
293 | 0 | *count = matching; |
294 | 0 | } |
295 | |
|
296 | 0 | if (matching == 0) |
297 | 0 | { |
298 | 0 | yr_compiler_set_error_extra_info(compiler, prefix); |
299 | 0 | return ERROR_UNDEFINED_IDENTIFIER; |
300 | 0 | } |
301 | | |
302 | 0 | return ERROR_SUCCESS; |
303 | 0 | } |
304 | | |
305 | | int yr_parser_emit_push_const(yyscan_t yyscanner, uint64_t argument) |
306 | 0 | { |
307 | 0 | uint8_t opcode[9]; |
308 | 0 | int opcode_len = 1; |
309 | |
|
310 | 0 | if (argument == YR_UNDEFINED) |
311 | 0 | { |
312 | 0 | opcode[0] = OP_PUSH_U; |
313 | 0 | } |
314 | 0 | else if (argument <= 0xff) |
315 | 0 | { |
316 | 0 | opcode[0] = OP_PUSH_8; |
317 | 0 | opcode[1] = (uint8_t) argument; |
318 | 0 | opcode_len += sizeof(uint8_t); |
319 | 0 | } |
320 | 0 | else if (argument <= 0xffff) |
321 | 0 | { |
322 | 0 | opcode[0] = OP_PUSH_16; |
323 | 0 | uint16_t u = (uint16_t) argument; |
324 | 0 | memcpy(opcode + 1, &u, sizeof(uint16_t)); |
325 | 0 | opcode_len += sizeof(uint16_t); |
326 | 0 | } |
327 | 0 | else if (argument <= 0xffffffff) |
328 | 0 | { |
329 | 0 | opcode[0] = OP_PUSH_32; |
330 | 0 | uint32_t u = (uint32_t) argument; |
331 | 0 | memcpy(opcode + 1, &u, sizeof(uint32_t)); |
332 | 0 | opcode_len += sizeof(uint32_t); |
333 | 0 | } |
334 | 0 | else |
335 | 0 | { |
336 | 0 | opcode[0] = OP_PUSH; |
337 | 0 | memcpy(opcode + 1, &argument, sizeof(uint64_t)); |
338 | 0 | opcode_len += sizeof(uint64_t); |
339 | 0 | } |
340 | |
|
341 | 0 | return yr_arena_write_data( |
342 | 0 | yyget_extra(yyscanner)->arena, YR_CODE_SECTION, opcode, opcode_len, NULL); |
343 | 0 | } |
344 | | |
345 | | int yr_parser_check_types( |
346 | | YR_COMPILER* compiler, |
347 | | YR_OBJECT_FUNCTION* function, |
348 | | const char* actual_args_fmt) |
349 | 0 | { |
350 | 0 | int i; |
351 | |
|
352 | 0 | for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) |
353 | 0 | { |
354 | 0 | if (function->prototypes[i].arguments_fmt == NULL) |
355 | 0 | break; |
356 | | |
357 | 0 | if (strcmp(function->prototypes[i].arguments_fmt, actual_args_fmt) == 0) |
358 | 0 | return ERROR_SUCCESS; |
359 | 0 | } |
360 | | |
361 | 0 | yr_compiler_set_error_extra_info(compiler, function->identifier) |
362 | |
|
363 | 0 | return ERROR_WRONG_ARGUMENTS; |
364 | 0 | } |
365 | | |
366 | | int yr_parser_lookup_string( |
367 | | yyscan_t yyscanner, |
368 | | const char* identifier, |
369 | | YR_STRING** string) |
370 | 0 | { |
371 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
372 | |
|
373 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
374 | 0 | compiler, compiler->current_rule_idx); |
375 | |
|
376 | 0 | yr_rule_strings_foreach(current_rule, *string) |
377 | 0 | { |
378 | | // If some string $a gets fragmented into multiple chained |
379 | | // strings, all those fragments have the same $a identifier |
380 | | // but we are interested in the heading fragment, which is |
381 | | // that with chained_to == NULL |
382 | |
|
383 | 0 | if ((*string)->chained_to == NULL && |
384 | 0 | strcmp((*string)->identifier, identifier) == 0) |
385 | 0 | { |
386 | 0 | return ERROR_SUCCESS; |
387 | 0 | } |
388 | 0 | } |
389 | | |
390 | 0 | yr_compiler_set_error_extra_info(compiler, identifier) |
391 | |
|
392 | 0 | * string = NULL; |
393 | |
|
394 | 0 | return ERROR_UNDEFINED_STRING; |
395 | 0 | } |
396 | | |
397 | | //////////////////////////////////////////////////////////////////////////////// |
398 | | // Searches for a variable with the given identifier in the scope of the current |
399 | | // "for" loop. In case of nested "for" loops the identifier is searched starting |
400 | | // at the top-level loop and going down thorough the nested loops until the |
401 | | // current one. This is ok because inner loops can not re-define an identifier |
402 | | // already defined by an outer loop. |
403 | | // |
404 | | // If the variable is found, the return value is the position that the variable |
405 | | // occupies among all the currently defined variables. If the variable doesn't |
406 | | // exist the return value is -1. |
407 | | // |
408 | | // The function can receive a pointer to a YR_EXPRESSION that will populated |
409 | | // with information about the variable if found. This pointer can be NULL if |
410 | | // the caller is not interested in getting that information. |
411 | | // |
412 | | int yr_parser_lookup_loop_variable( |
413 | | yyscan_t yyscanner, |
414 | | const char* identifier, |
415 | | YR_EXPRESSION* expr) |
416 | 2 | { |
417 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
418 | 2 | int i, j; |
419 | 2 | int var_offset = 0; |
420 | | |
421 | 2 | for (i = 0; i <= compiler->loop_index; i++) |
422 | 0 | { |
423 | 0 | var_offset += compiler->loop[i].vars_internal_count; |
424 | |
|
425 | 0 | for (j = 0; j < compiler->loop[i].vars_count; j++) |
426 | 0 | { |
427 | 0 | if (compiler->loop[i].vars[j].identifier.ptr != NULL && |
428 | 0 | strcmp(identifier, compiler->loop[i].vars[j].identifier.ptr) == 0) |
429 | 0 | { |
430 | 0 | if (expr != NULL) |
431 | 0 | *expr = compiler->loop[i].vars[j]; |
432 | |
|
433 | 0 | return var_offset + j; |
434 | 0 | } |
435 | 0 | } |
436 | | |
437 | 0 | var_offset += compiler->loop[i].vars_count; |
438 | 0 | } |
439 | | |
440 | 2 | return -1; |
441 | 2 | } |
442 | | |
443 | | static int _yr_parser_write_string( |
444 | | const char* identifier, |
445 | | YR_MODIFIER modifier, |
446 | | YR_COMPILER* compiler, |
447 | | SIZED_STRING* str, |
448 | | RE_AST* re_ast, |
449 | | YR_ARENA_REF* string_ref, |
450 | | int* min_atom_quality, |
451 | | int* num_atom) |
452 | 0 | { |
453 | 0 | SIZED_STRING* literal_string; |
454 | 0 | YR_ATOM_LIST_ITEM* atom; |
455 | 0 | YR_ATOM_LIST_ITEM* atom_list = NULL; |
456 | |
|
457 | 0 | int c, result; |
458 | 0 | int max_string_len; |
459 | 0 | bool free_literal = false; |
460 | |
|
461 | 0 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
462 | 0 | compiler->arena, |
463 | 0 | YR_STRINGS_TABLE, |
464 | 0 | sizeof(YR_STRING), |
465 | 0 | string_ref, |
466 | 0 | offsetof(YR_STRING, identifier), |
467 | 0 | offsetof(YR_STRING, string), |
468 | 0 | offsetof(YR_STRING, chained_to), |
469 | 0 | EOL)); |
470 | |
|
471 | 0 | YR_STRING* string = (YR_STRING*) yr_arena_ref_to_ptr( |
472 | 0 | compiler->arena, string_ref); |
473 | |
|
474 | 0 | YR_ARENA_REF ref; |
475 | |
|
476 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
477 | |
|
478 | 0 | string->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
479 | 0 | string->rule_idx = compiler->current_rule_idx; |
480 | 0 | string->idx = compiler->current_string_idx; |
481 | 0 | string->fixed_offset = YR_UNDEFINED; |
482 | |
|
483 | 0 | compiler->current_string_idx++; |
484 | |
|
485 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL || |
486 | 0 | modifier.flags & STRING_FLAGS_REGEXP || |
487 | 0 | modifier.flags & STRING_FLAGS_BASE64 || |
488 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE) |
489 | 0 | { |
490 | 0 | literal_string = yr_re_ast_extract_literal(re_ast); |
491 | |
|
492 | 0 | if (literal_string != NULL) |
493 | 0 | free_literal = true; |
494 | 0 | } |
495 | 0 | else |
496 | 0 | { |
497 | 0 | literal_string = str; |
498 | 0 | } |
499 | |
|
500 | 0 | if (literal_string != NULL) |
501 | 0 | { |
502 | 0 | modifier.flags |= STRING_FLAGS_LITERAL; |
503 | |
|
504 | 0 | result = _yr_compiler_store_data( |
505 | 0 | compiler, |
506 | 0 | literal_string->c_string, |
507 | 0 | literal_string->length + 1, // +1 to include terminating NULL |
508 | 0 | &ref); |
509 | |
|
510 | 0 | if (result != ERROR_SUCCESS) |
511 | 0 | goto cleanup; |
512 | | |
513 | 0 | string->length = (uint32_t) literal_string->length; |
514 | 0 | string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
515 | |
|
516 | 0 | if (modifier.flags & STRING_FLAGS_WIDE) |
517 | 0 | max_string_len = string->length * 2; |
518 | 0 | else |
519 | 0 | max_string_len = string->length; |
520 | |
|
521 | 0 | if (max_string_len <= YR_MAX_ATOM_LENGTH) |
522 | 0 | modifier.flags |= STRING_FLAGS_FITS_IN_ATOM; |
523 | |
|
524 | 0 | result = yr_atoms_extract_from_string( |
525 | 0 | &compiler->atoms_config, |
526 | 0 | (uint8_t*) literal_string->c_string, |
527 | 0 | (int32_t) literal_string->length, |
528 | 0 | modifier, |
529 | 0 | &atom_list, |
530 | 0 | min_atom_quality); |
531 | |
|
532 | 0 | if (result != ERROR_SUCCESS) |
533 | 0 | goto cleanup; |
534 | 0 | } |
535 | 0 | else |
536 | 0 | { |
537 | | // Non-literal strings can't be marked as fixed offset because once we |
538 | | // find a string atom in the scanned data we don't know the offset where |
539 | | // the string should start, as the non-literal strings can contain |
540 | | // variable-length portions. |
541 | 0 | modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET; |
542 | | |
543 | | // Save the position where the RE forward code starts for later reference. |
544 | 0 | yr_arena_off_t forward_code_start = yr_arena_get_current_offset( |
545 | 0 | compiler->arena, YR_RE_CODE_SECTION); |
546 | | |
547 | | // Emit forwards code |
548 | 0 | result = yr_re_ast_emit_code(re_ast, compiler->arena, false); |
549 | |
|
550 | 0 | if (result != ERROR_SUCCESS) |
551 | 0 | goto cleanup; |
552 | | |
553 | | // Emit backwards code |
554 | 0 | result = yr_re_ast_emit_code(re_ast, compiler->arena, true); |
555 | |
|
556 | 0 | if (result != ERROR_SUCCESS) |
557 | 0 | goto cleanup; |
558 | | |
559 | | // Extract atoms from the regular expression. |
560 | 0 | result = yr_atoms_extract_from_re( |
561 | 0 | &compiler->atoms_config, |
562 | 0 | re_ast, |
563 | 0 | modifier, |
564 | 0 | &atom_list, |
565 | 0 | min_atom_quality); |
566 | |
|
567 | 0 | if (result != ERROR_SUCCESS) |
568 | 0 | goto cleanup; |
569 | | |
570 | | // If no atom was extracted let's add a zero-length atom. |
571 | 0 | if (atom_list == NULL) |
572 | 0 | { |
573 | 0 | atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); |
574 | |
|
575 | 0 | if (atom_list == NULL) |
576 | 0 | { |
577 | 0 | result = ERROR_INSUFFICIENT_MEMORY; |
578 | 0 | goto cleanup; |
579 | 0 | } |
580 | | |
581 | 0 | atom_list->atom.length = 0; |
582 | 0 | atom_list->backtrack = 0; |
583 | 0 | atom_list->backward_code_ref = YR_ARENA_NULL_REF; |
584 | 0 | atom_list->next = NULL; |
585 | |
|
586 | 0 | yr_arena_ptr_to_ref( |
587 | 0 | compiler->arena, |
588 | 0 | yr_arena_get_ptr( |
589 | 0 | compiler->arena, YR_RE_CODE_SECTION, forward_code_start), |
590 | 0 | &(atom_list->forward_code_ref)); |
591 | 0 | } |
592 | 0 | } |
593 | | |
594 | 0 | string->flags = modifier.flags; |
595 | | |
596 | | // Add the string to Aho-Corasick automaton. |
597 | 0 | result = yr_ac_add_string( |
598 | 0 | compiler->automaton, string, string->idx, atom_list, compiler->arena); |
599 | |
|
600 | 0 | if (result != ERROR_SUCCESS) |
601 | 0 | goto cleanup; |
602 | | |
603 | 0 | atom = atom_list; |
604 | 0 | c = 0; |
605 | |
|
606 | 0 | while (atom != NULL) |
607 | 0 | { |
608 | 0 | atom = atom->next; |
609 | 0 | c++; |
610 | 0 | } |
611 | |
|
612 | 0 | (*num_atom) += c; |
613 | |
|
614 | 0 | cleanup: |
615 | 0 | if (free_literal) |
616 | 0 | yr_free(literal_string); |
617 | |
|
618 | 0 | if (atom_list != NULL) |
619 | 0 | yr_atoms_list_destroy(atom_list); |
620 | |
|
621 | 0 | return result; |
622 | 0 | } |
623 | | |
624 | | static int _yr_parser_check_string_modifiers( |
625 | | yyscan_t yyscanner, |
626 | | YR_MODIFIER modifier) |
627 | 0 | { |
628 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
629 | | |
630 | | // xor and nocase together is not implemented. |
631 | 0 | if (modifier.flags & STRING_FLAGS_XOR && |
632 | 0 | modifier.flags & STRING_FLAGS_NO_CASE) |
633 | 0 | { |
634 | 0 | yr_compiler_set_error_extra_info( |
635 | 0 | compiler, "invalid modifier combination: xor nocase"); |
636 | 0 | return ERROR_INVALID_MODIFIER; |
637 | 0 | } |
638 | | |
639 | | // base64 and nocase together is not implemented. |
640 | 0 | if (modifier.flags & STRING_FLAGS_NO_CASE && |
641 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
642 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
643 | 0 | { |
644 | 0 | yr_compiler_set_error_extra_info( |
645 | 0 | compiler, |
646 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
647 | 0 | ? "invalid modifier combination: base64 nocase" |
648 | 0 | : "invalid modifier combination: base64wide nocase"); |
649 | 0 | return ERROR_INVALID_MODIFIER; |
650 | 0 | } |
651 | | |
652 | | // base64 and fullword together is not implemented. |
653 | 0 | if (modifier.flags & STRING_FLAGS_FULL_WORD && |
654 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
655 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
656 | 0 | { |
657 | 0 | yr_compiler_set_error_extra_info( |
658 | 0 | compiler, |
659 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
660 | 0 | ? "invalid modifier combination: base64 fullword" |
661 | 0 | : "invalid modifier combination: base64wide fullword"); |
662 | 0 | return ERROR_INVALID_MODIFIER; |
663 | 0 | } |
664 | | |
665 | | // base64 and xor together is not implemented. |
666 | 0 | if (modifier.flags & STRING_FLAGS_XOR && |
667 | 0 | (modifier.flags & STRING_FLAGS_BASE64 || |
668 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
669 | 0 | { |
670 | 0 | yr_compiler_set_error_extra_info( |
671 | 0 | compiler, |
672 | 0 | modifier.flags & STRING_FLAGS_BASE64 |
673 | 0 | ? "invalid modifier combination: base64 xor" |
674 | 0 | : "invalid modifier combination: base64wide xor"); |
675 | 0 | return ERROR_INVALID_MODIFIER; |
676 | 0 | } |
677 | | |
678 | 0 | return ERROR_SUCCESS; |
679 | 0 | } |
680 | | |
681 | | int yr_parser_reduce_string_declaration( |
682 | | yyscan_t yyscanner, |
683 | | YR_MODIFIER modifier, |
684 | | const char* identifier, |
685 | | SIZED_STRING* str, |
686 | | YR_ARENA_REF* string_ref) |
687 | 0 | { |
688 | 0 | int result = ERROR_SUCCESS; |
689 | 0 | int min_atom_quality = YR_MAX_ATOM_QUALITY; |
690 | 0 | int atom_quality; |
691 | |
|
692 | 0 | char message[512]; |
693 | |
|
694 | 0 | int32_t min_gap = 0; |
695 | 0 | int32_t max_gap = 0; |
696 | |
|
697 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
698 | |
|
699 | 0 | RE_AST* re_ast = NULL; |
700 | 0 | RE_AST* remainder_re_ast = NULL; |
701 | 0 | RE_ERROR re_error; |
702 | |
|
703 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
704 | 0 | compiler, compiler->current_rule_idx); |
705 | | |
706 | | // Determine if a string with the same identifier was already defined |
707 | | // by searching for the identifier in strings_table. |
708 | 0 | uint32_t string_idx = yr_hash_table_lookup_uint32( |
709 | 0 | compiler->strings_table, identifier, NULL); |
710 | | |
711 | | // The string was already defined, return an error. |
712 | 0 | if (string_idx != UINT32_MAX) |
713 | 0 | { |
714 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
715 | 0 | return ERROR_DUPLICATED_STRING_IDENTIFIER; |
716 | 0 | } |
717 | | |
718 | | // Empty strings are not allowed. |
719 | 0 | if (str->length == 0) |
720 | 0 | { |
721 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
722 | 0 | return ERROR_EMPTY_STRING; |
723 | 0 | } |
724 | | |
725 | 0 | if (str->flags & SIZED_STRING_FLAGS_NO_CASE) |
726 | 0 | modifier.flags |= STRING_FLAGS_NO_CASE; |
727 | |
|
728 | 0 | if (str->flags & SIZED_STRING_FLAGS_DOT_ALL) |
729 | 0 | modifier.flags |= STRING_FLAGS_DOT_ALL; |
730 | | |
731 | | // Hex strings are always handled as DOT_ALL regexps. |
732 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL) |
733 | 0 | modifier.flags |= STRING_FLAGS_DOT_ALL; |
734 | |
|
735 | 0 | if (!(modifier.flags & STRING_FLAGS_WIDE) && |
736 | 0 | !(modifier.flags & STRING_FLAGS_BASE64 || |
737 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE)) |
738 | 0 | { |
739 | 0 | modifier.flags |= STRING_FLAGS_ASCII; |
740 | 0 | } |
741 | | |
742 | | // The STRING_FLAGS_SINGLE_MATCH flag indicates that finding |
743 | | // a single match for the string is enough. This is true in |
744 | | // most cases, except when the string count (#) and string offset (@) |
745 | | // operators are used. All strings are marked STRING_FLAGS_SINGLE_MATCH |
746 | | // initially, and unmarked later if required. |
747 | 0 | modifier.flags |= STRING_FLAGS_SINGLE_MATCH; |
748 | | |
749 | | // The STRING_FLAGS_FIXED_OFFSET indicates that the string doesn't |
750 | | // need to be searched all over the file because the user is using the |
751 | | // "at" operator. The string must be searched at a fixed offset in the |
752 | | // file. All strings are marked STRING_FLAGS_FIXED_OFFSET initially, |
753 | | // and unmarked later if required. |
754 | 0 | modifier.flags |= STRING_FLAGS_FIXED_OFFSET; |
755 | | |
756 | | // If string identifier is $ this is an anonymous string, if not add the |
757 | | // identifier to strings_table. |
758 | 0 | if (strcmp(identifier, "$") == 0) |
759 | 0 | { |
760 | 0 | modifier.flags |= STRING_FLAGS_ANONYMOUS; |
761 | 0 | } |
762 | 0 | else |
763 | 0 | { |
764 | 0 | FAIL_ON_ERROR(yr_hash_table_add_uint32( |
765 | 0 | compiler->strings_table, |
766 | 0 | identifier, |
767 | 0 | NULL, |
768 | 0 | compiler->current_string_idx)); |
769 | 0 | } |
770 | | |
771 | | // Make sure that the the string does not have an invalid combination of |
772 | | // modifiers. |
773 | 0 | FAIL_ON_ERROR(_yr_parser_check_string_modifiers(yyscanner, modifier)); |
774 | |
|
775 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL || |
776 | 0 | modifier.flags & STRING_FLAGS_REGEXP || |
777 | 0 | modifier.flags & STRING_FLAGS_BASE64 || |
778 | 0 | modifier.flags & STRING_FLAGS_BASE64_WIDE) |
779 | 0 | { |
780 | 0 | if (modifier.flags & STRING_FLAGS_HEXADECIMAL) |
781 | 0 | result = yr_re_parse_hex(str->c_string, &re_ast, &re_error); |
782 | 0 | else if (modifier.flags & STRING_FLAGS_REGEXP) |
783 | 0 | { |
784 | 0 | int flags = RE_PARSER_FLAG_NONE; |
785 | 0 | if (compiler->strict_escape) |
786 | 0 | flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; |
787 | 0 | result = yr_re_parse(str->c_string, &re_ast, &re_error, flags); |
788 | 0 | } |
789 | 0 | else |
790 | 0 | result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error); |
791 | |
|
792 | 0 | if (result != ERROR_SUCCESS) |
793 | 0 | { |
794 | 0 | if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) |
795 | 0 | { |
796 | 0 | yywarning(yyscanner, "unknown escape sequence"); |
797 | 0 | } |
798 | 0 | else |
799 | 0 | { |
800 | 0 | snprintf( |
801 | 0 | message, |
802 | 0 | sizeof(message), |
803 | 0 | "invalid %s \"%s\": %s", |
804 | 0 | (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string" |
805 | 0 | : "regular expression", |
806 | 0 | identifier, |
807 | 0 | re_error.message); |
808 | |
|
809 | 0 | yr_compiler_set_error_extra_info(compiler, message); |
810 | 0 | goto _exit; |
811 | 0 | } |
812 | 0 | } |
813 | | |
814 | 0 | if (re_ast->flags & RE_FLAGS_FAST_REGEXP) |
815 | 0 | modifier.flags |= STRING_FLAGS_FAST_REGEXP; |
816 | |
|
817 | 0 | if (re_ast->flags & RE_FLAGS_GREEDY) |
818 | 0 | modifier.flags |= STRING_FLAGS_GREEDY_REGEXP; |
819 | | |
820 | | // Regular expressions in the strings section can't mix greedy and |
821 | | // ungreedy quantifiers like .* and .*?. That's because these regular |
822 | | // expressions can be matched forwards and/or backwards depending on the |
823 | | // atom found, and we need the regexp to be all-greedy or all-ungreedy to |
824 | | // be able to properly calculate the length of the match. |
825 | |
|
826 | 0 | if ((re_ast->flags & RE_FLAGS_GREEDY) && |
827 | 0 | (re_ast->flags & RE_FLAGS_UNGREEDY)) |
828 | 0 | { |
829 | 0 | result = ERROR_INVALID_REGULAR_EXPRESSION; |
830 | |
|
831 | 0 | yr_compiler_set_error_extra_info( |
832 | 0 | compiler, |
833 | 0 | "greedy and ungreedy quantifiers can't be mixed in a regular " |
834 | 0 | "expression"); |
835 | |
|
836 | 0 | goto _exit; |
837 | 0 | } |
838 | | |
839 | 0 | if (yr_re_ast_has_unbounded_quantifier_for_dot(re_ast)) |
840 | 0 | { |
841 | 0 | yywarning( |
842 | 0 | yyscanner, |
843 | 0 | "%s contains .*, .+ or .{x,} consider using .{,N}, .{1,N} or {x,N} " |
844 | 0 | "with a reasonable value for N", |
845 | 0 | identifier); |
846 | 0 | } |
847 | |
|
848 | 0 | if (compiler->re_ast_callback != NULL) |
849 | 0 | { |
850 | 0 | compiler->re_ast_callback( |
851 | 0 | current_rule, identifier, re_ast, compiler->re_ast_clbk_user_data); |
852 | 0 | } |
853 | |
|
854 | 0 | *string_ref = YR_ARENA_NULL_REF; |
855 | |
|
856 | 0 | while (re_ast != NULL) |
857 | 0 | { |
858 | 0 | YR_ARENA_REF ref; |
859 | |
|
860 | 0 | uint32_t prev_string_idx = compiler->current_string_idx - 1; |
861 | |
|
862 | 0 | int32_t prev_min_gap = min_gap; |
863 | 0 | int32_t prev_max_gap = max_gap; |
864 | |
|
865 | 0 | result = yr_re_ast_split_at_chaining_point( |
866 | 0 | re_ast, &remainder_re_ast, &min_gap, &max_gap); |
867 | |
|
868 | 0 | if (result != ERROR_SUCCESS) |
869 | 0 | goto _exit; |
870 | | |
871 | 0 | result = _yr_parser_write_string( |
872 | 0 | identifier, |
873 | 0 | modifier, |
874 | 0 | compiler, |
875 | 0 | NULL, |
876 | 0 | re_ast, |
877 | 0 | &ref, |
878 | 0 | &atom_quality, |
879 | 0 | ¤t_rule->num_atoms); |
880 | |
|
881 | 0 | if (result != ERROR_SUCCESS) |
882 | 0 | goto _exit; |
883 | | |
884 | 0 | if (atom_quality < min_atom_quality) |
885 | 0 | min_atom_quality = atom_quality; |
886 | |
|
887 | 0 | if (YR_ARENA_IS_NULL_REF(*string_ref)) |
888 | 0 | { |
889 | | // This is the first string in the chain, the string reference |
890 | | // returned by this function must point to this string. |
891 | 0 | *string_ref = ref; |
892 | 0 | } |
893 | 0 | else |
894 | 0 | { |
895 | | // This is not the first string in the chain, set the appropriate |
896 | | // flags and fill the chained_to, chain_gap_min and chain_gap_max |
897 | | // fields. |
898 | 0 | YR_STRING* prev_string = (YR_STRING*) yr_arena_get_ptr( |
899 | 0 | compiler->arena, |
900 | 0 | YR_STRINGS_TABLE, |
901 | 0 | prev_string_idx * sizeof(YR_STRING)); |
902 | |
|
903 | 0 | YR_STRING* new_string = (YR_STRING*) yr_arena_ref_to_ptr( |
904 | 0 | compiler->arena, &ref); |
905 | |
|
906 | 0 | new_string->chained_to = prev_string; |
907 | 0 | new_string->chain_gap_min = prev_min_gap; |
908 | 0 | new_string->chain_gap_max = prev_max_gap; |
909 | | |
910 | | // A string chained to another one can't have a fixed offset, only the |
911 | | // head of the string chain can have a fixed offset. |
912 | 0 | new_string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
913 | | |
914 | | // There is a previous string, but that string wasn't marked as part |
915 | | // of a chain because we can't do that until knowing there will be |
916 | | // another string, let's flag it now the we know. |
917 | 0 | prev_string->flags |= STRING_FLAGS_CHAIN_PART; |
918 | | |
919 | | // There is a previous string, so this string is part of a chain, but |
920 | | // there will be no more strings because there are no more AST to |
921 | | // split, which means that this is the chain's tail. |
922 | 0 | if (remainder_re_ast == NULL) |
923 | 0 | new_string->flags |= STRING_FLAGS_CHAIN_PART | |
924 | 0 | STRING_FLAGS_CHAIN_TAIL; |
925 | 0 | } |
926 | |
|
927 | 0 | yr_re_ast_destroy(re_ast); |
928 | 0 | re_ast = remainder_re_ast; |
929 | 0 | } |
930 | 0 | } |
931 | 0 | else // not a STRING_FLAGS_HEXADECIMAL or STRING_FLAGS_REGEXP or |
932 | | // STRING_FLAGS_BASE64 or STRING_FLAGS_BASE64_WIDE |
933 | 0 | { |
934 | 0 | result = _yr_parser_write_string( |
935 | 0 | identifier, |
936 | 0 | modifier, |
937 | 0 | compiler, |
938 | 0 | str, |
939 | 0 | NULL, |
940 | 0 | string_ref, |
941 | 0 | &min_atom_quality, |
942 | 0 | ¤t_rule->num_atoms); |
943 | |
|
944 | 0 | if (result != ERROR_SUCCESS) |
945 | 0 | goto _exit; |
946 | 0 | } |
947 | | |
948 | 0 | if (min_atom_quality < compiler->atoms_config.quality_warning_threshold) |
949 | 0 | { |
950 | 0 | yywarning(yyscanner, "string \"%s\" may slow down scanning", identifier); |
951 | 0 | } |
952 | |
|
953 | 0 | _exit: |
954 | |
|
955 | 0 | if (re_ast != NULL) |
956 | 0 | yr_re_ast_destroy(re_ast); |
957 | |
|
958 | 0 | if (remainder_re_ast != NULL) |
959 | 0 | yr_re_ast_destroy(remainder_re_ast); |
960 | |
|
961 | 0 | return result; |
962 | 0 | } |
963 | | |
964 | | static int wildcard_iterator( |
965 | | void* prefix, |
966 | | size_t prefix_len, |
967 | | void* _value, |
968 | | void* data) |
969 | 0 | { |
970 | 0 | const char* identifier = (const char*) data; |
971 | | |
972 | | // If the identifier is prefixed by prefix, then it matches the wildcard. |
973 | 0 | if (!strncmp(prefix, identifier, prefix_len)) |
974 | 0 | return ERROR_IDENTIFIER_MATCHES_WILDCARD; |
975 | | |
976 | 0 | return ERROR_SUCCESS; |
977 | 0 | } |
978 | | |
979 | | int yr_parser_reduce_rule_declaration_phase_1( |
980 | | yyscan_t yyscanner, |
981 | | int32_t flags, |
982 | | const char* identifier, |
983 | | YR_ARENA_REF* rule_ref) |
984 | 2 | { |
985 | 2 | int result; |
986 | 2 | YR_FIXUP* fixup; |
987 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
988 | | |
989 | 2 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
990 | 2 | compiler->arena, |
991 | 2 | YR_NAMESPACES_TABLE, |
992 | 2 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
993 | | |
994 | 2 | if (yr_hash_table_lookup_uint32( |
995 | 2 | compiler->rules_table, identifier, ns->name) != UINT32_MAX || |
996 | 2 | yr_hash_table_lookup(compiler->objects_table, identifier, NULL) != NULL) |
997 | 0 | { |
998 | | // A rule or variable with the same identifier already exists, return the |
999 | | // appropriate error. |
1000 | |
|
1001 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
1002 | 0 | return ERROR_DUPLICATED_IDENTIFIER; |
1003 | 0 | } |
1004 | | |
1005 | | // Iterate over all identifiers in wildcard_identifiers_table, and check if |
1006 | | // any of them are a prefix of the identifier being declared. If so, return |
1007 | | // ERROR_IDENTIFIER_MATCHES_WILDCARD. |
1008 | 2 | result = yr_hash_table_iterate( |
1009 | 2 | compiler->wildcard_identifiers_table, |
1010 | 2 | ns->name, |
1011 | 2 | wildcard_iterator, |
1012 | 2 | (void*) identifier); |
1013 | | |
1014 | 2 | if (result == ERROR_IDENTIFIER_MATCHES_WILDCARD) |
1015 | 0 | { |
1016 | | // This rule matches an existing wildcard rule set. |
1017 | 0 | yr_compiler_set_error_extra_info(compiler, identifier); |
1018 | 0 | } |
1019 | | |
1020 | 2 | FAIL_ON_ERROR(result); |
1021 | | |
1022 | 2 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
1023 | 2 | compiler->arena, |
1024 | 2 | YR_RULES_TABLE, |
1025 | 2 | sizeof(YR_RULE), |
1026 | 2 | rule_ref, |
1027 | 2 | offsetof(YR_RULE, identifier), |
1028 | 2 | offsetof(YR_RULE, tags), |
1029 | 2 | offsetof(YR_RULE, strings), |
1030 | 2 | offsetof(YR_RULE, metas), |
1031 | 2 | offsetof(YR_RULE, ns), |
1032 | 2 | EOL)); |
1033 | | |
1034 | 2 | YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref); |
1035 | | |
1036 | 2 | YR_ARENA_REF ref; |
1037 | | |
1038 | 2 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
1039 | | |
1040 | 2 | rule->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1041 | 2 | rule->flags = flags; |
1042 | 2 | rule->ns = ns; |
1043 | 2 | rule->num_atoms = 0; |
1044 | | |
1045 | 2 | YR_ARENA_REF jmp_offset_ref; |
1046 | | |
1047 | | // We are starting to parse a new rule, set current_rule_idx accordingly. |
1048 | 2 | compiler->current_rule_idx = compiler->next_rule_idx; |
1049 | 2 | compiler->next_rule_idx++; |
1050 | | |
1051 | | // The OP_INIT_RULE instruction behaves like a jump. When the rule is |
1052 | | // disabled it skips over the rule's code and go straight to the next rule's |
1053 | | // code. The jmp_offset_ref variable points to the jump's offset. The offset |
1054 | | // is set to 0 as we don't know the jump target yet. When we finish |
1055 | | // generating the rule's code in yr_parser_reduce_rule_declaration_phase_2 |
1056 | | // the jump offset is set to its final value. |
1057 | | |
1058 | 2 | FAIL_ON_ERROR(yr_parser_emit_with_arg_int32( |
1059 | 2 | yyscanner, OP_INIT_RULE, 0, NULL, &jmp_offset_ref)); |
1060 | | |
1061 | 2 | FAIL_ON_ERROR(yr_arena_write_data( |
1062 | 2 | compiler->arena, |
1063 | 2 | YR_CODE_SECTION, |
1064 | 2 | &compiler->current_rule_idx, |
1065 | 2 | sizeof(compiler->current_rule_idx), |
1066 | 2 | NULL)); |
1067 | | |
1068 | | // Create a fixup entry for the jump and push it in the stack |
1069 | 2 | fixup = (YR_FIXUP*) yr_malloc(sizeof(YR_FIXUP)); |
1070 | | |
1071 | 2 | if (fixup == NULL) |
1072 | 0 | return ERROR_INSUFFICIENT_MEMORY; |
1073 | | |
1074 | 2 | fixup->ref = jmp_offset_ref; |
1075 | 2 | fixup->next = compiler->fixup_stack_head; |
1076 | 2 | compiler->fixup_stack_head = fixup; |
1077 | | |
1078 | | // Clean strings_table as we are starting to parse a new rule. |
1079 | 2 | yr_hash_table_clean(compiler->strings_table, NULL); |
1080 | | |
1081 | 2 | FAIL_ON_ERROR(yr_hash_table_add_uint32( |
1082 | 2 | compiler->rules_table, identifier, ns->name, compiler->current_rule_idx)); |
1083 | | |
1084 | 2 | return ERROR_SUCCESS; |
1085 | 2 | } |
1086 | | |
1087 | | int yr_parser_reduce_rule_declaration_phase_2( |
1088 | | yyscan_t yyscanner, |
1089 | | YR_ARENA_REF* rule_ref) |
1090 | 2 | { |
1091 | 2 | uint32_t max_strings_per_rule; |
1092 | 2 | uint32_t strings_in_rule = 0; |
1093 | | |
1094 | 2 | YR_FIXUP* fixup; |
1095 | 2 | YR_STRING* string; |
1096 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1097 | | |
1098 | 2 | yr_get_configuration_uint32( |
1099 | 2 | YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule); |
1100 | | |
1101 | 2 | YR_RULE* rule = (YR_RULE*) yr_arena_ref_to_ptr(compiler->arena, rule_ref); |
1102 | | |
1103 | | // Show warning if the rule is generating too many atoms. The warning is |
1104 | | // shown if the number of atoms is greater than 20 times the maximum number |
1105 | | // of strings allowed for a rule, as 20 is minimum number of atoms generated |
1106 | | // for a string using *nocase*, *ascii* and *wide* modifiers simultaneously. |
1107 | | |
1108 | 2 | if (rule->num_atoms > YR_ATOMS_PER_RULE_WARNING_THRESHOLD) |
1109 | 0 | { |
1110 | 0 | yywarning(yyscanner, "rule is slowing down scanning"); |
1111 | 0 | } |
1112 | | |
1113 | 2 | yr_rule_strings_foreach(rule, string) |
1114 | 0 | { |
1115 | | // Only the heading fragment in a chain of strings (the one with |
1116 | | // chained_to == NULL) must be referenced. All other fragments |
1117 | | // are never marked as referenced. |
1118 | | // |
1119 | | // Any string identifier that starts with '_' can be unreferenced. Anonymous |
1120 | | // strings must always be referenced. |
1121 | |
|
1122 | 0 | if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL && |
1123 | 0 | (STRING_IS_ANONYMOUS(string) || |
1124 | 0 | (!STRING_IS_ANONYMOUS(string) && string->identifier[1] != '_'))) |
1125 | 0 | { |
1126 | 0 | yr_compiler_set_error_extra_info( |
1127 | 0 | compiler, string->identifier) return ERROR_UNREFERENCED_STRING; |
1128 | 0 | } |
1129 | | |
1130 | | // If a string is unreferenced we need to unset the FIXED_OFFSET flag so |
1131 | | // that it will match anywhere. |
1132 | 0 | if (!STRING_IS_REFERENCED(string) && string->chained_to == NULL && |
1133 | 0 | STRING_IS_FIXED_OFFSET(string)) |
1134 | 0 | { |
1135 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1136 | 0 | } |
1137 | |
|
1138 | 0 | strings_in_rule++; |
1139 | |
|
1140 | 0 | if (strings_in_rule > max_strings_per_rule) |
1141 | 0 | { |
1142 | 0 | yr_compiler_set_error_extra_info( |
1143 | 0 | compiler, rule->identifier) return ERROR_TOO_MANY_STRINGS; |
1144 | 0 | } |
1145 | 0 | } |
1146 | | |
1147 | 2 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
1148 | 2 | yyscanner, OP_MATCH_RULE, compiler->current_rule_idx, NULL, NULL)); |
1149 | | |
1150 | 2 | fixup = compiler->fixup_stack_head; |
1151 | | |
1152 | 2 | int32_t* jmp_offset_addr = (int32_t*) yr_arena_ref_to_ptr( |
1153 | 2 | compiler->arena, &fixup->ref); |
1154 | | |
1155 | 2 | int32_t jmp_offset = yr_arena_get_current_offset( |
1156 | 2 | compiler->arena, YR_CODE_SECTION) - |
1157 | 2 | fixup->ref.offset + 1; |
1158 | | |
1159 | 2 | memcpy(jmp_offset_addr, &jmp_offset, sizeof(jmp_offset)); |
1160 | | |
1161 | | // Remove fixup from the stack. |
1162 | 2 | compiler->fixup_stack_head = fixup->next; |
1163 | 2 | yr_free(fixup); |
1164 | | |
1165 | | // We have finished parsing the current rule set current_rule_idx to |
1166 | | // UINT32_MAX indicating that we are not currently parsing a rule. |
1167 | 2 | compiler->current_rule_idx = UINT32_MAX; |
1168 | | |
1169 | 2 | return ERROR_SUCCESS; |
1170 | 2 | } |
1171 | | |
1172 | | int yr_parser_reduce_string_identifier( |
1173 | | yyscan_t yyscanner, |
1174 | | const char* identifier, |
1175 | | uint8_t instruction, |
1176 | | uint64_t at_offset) |
1177 | 0 | { |
1178 | 0 | YR_STRING* string; |
1179 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1180 | |
|
1181 | 0 | if (strcmp(identifier, "$") == 0) // is an anonymous string ? |
1182 | 0 | { |
1183 | 0 | if (compiler->loop_for_of_var_index >= 0) // inside a loop ? |
1184 | 0 | { |
1185 | 0 | yr_parser_emit_with_arg( |
1186 | 0 | yyscanner, OP_PUSH_M, compiler->loop_for_of_var_index, NULL, NULL); |
1187 | |
|
1188 | 0 | yr_parser_emit(yyscanner, instruction, NULL); |
1189 | |
|
1190 | 0 | YR_RULE* current_rule = _yr_compiler_get_rule_by_idx( |
1191 | 0 | compiler, compiler->current_rule_idx); |
1192 | |
|
1193 | 0 | yr_rule_strings_foreach(current_rule, string) |
1194 | 0 | { |
1195 | 0 | if (instruction != OP_FOUND) |
1196 | 0 | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1197 | |
|
1198 | 0 | if (instruction == OP_FOUND_AT) |
1199 | 0 | { |
1200 | | // Avoid overwriting any previous fixed offset |
1201 | 0 | if (string->fixed_offset == YR_UNDEFINED) |
1202 | 0 | string->fixed_offset = at_offset; |
1203 | | |
1204 | | // If a previous fixed offset was different, disable |
1205 | | // the STRING_GFLAGS_FIXED_OFFSET flag because we only |
1206 | | // have room to store a single fixed offset value |
1207 | 0 | if (string->fixed_offset != at_offset) |
1208 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1209 | 0 | } |
1210 | 0 | else |
1211 | 0 | { |
1212 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1213 | 0 | } |
1214 | 0 | } |
1215 | 0 | } |
1216 | 0 | else |
1217 | 0 | { |
1218 | | // Anonymous strings not allowed outside of a loop |
1219 | 0 | return ERROR_MISPLACED_ANONYMOUS_STRING; |
1220 | 0 | } |
1221 | 0 | } |
1222 | 0 | else |
1223 | 0 | { |
1224 | 0 | FAIL_ON_ERROR(yr_parser_lookup_string(yyscanner, identifier, &string)); |
1225 | |
|
1226 | 0 | FAIL_ON_ERROR( |
1227 | 0 | yr_parser_emit_with_arg_reloc(yyscanner, OP_PUSH, string, NULL, NULL)); |
1228 | |
|
1229 | 0 | if (instruction != OP_FOUND) |
1230 | 0 | string->flags &= ~STRING_FLAGS_SINGLE_MATCH; |
1231 | |
|
1232 | 0 | if (instruction == OP_FOUND_AT) |
1233 | 0 | { |
1234 | | // Avoid overwriting any previous fixed offset |
1235 | |
|
1236 | 0 | if (string->fixed_offset == YR_UNDEFINED) |
1237 | 0 | string->fixed_offset = at_offset; |
1238 | | |
1239 | | // If a previous fixed offset was different, disable |
1240 | | // the STRING_GFLAGS_FIXED_OFFSET flag because we only |
1241 | | // have room to store a single fixed offset value |
1242 | |
|
1243 | 0 | if (string->fixed_offset == YR_UNDEFINED || |
1244 | 0 | string->fixed_offset != at_offset) |
1245 | 0 | { |
1246 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1247 | 0 | } |
1248 | 0 | } |
1249 | 0 | else |
1250 | 0 | { |
1251 | 0 | string->flags &= ~STRING_FLAGS_FIXED_OFFSET; |
1252 | 0 | } |
1253 | |
|
1254 | 0 | FAIL_ON_ERROR(yr_parser_emit(yyscanner, instruction, NULL)); |
1255 | |
|
1256 | 0 | string->flags |= STRING_FLAGS_REFERENCED; |
1257 | 0 | } |
1258 | | |
1259 | 0 | return ERROR_SUCCESS; |
1260 | 0 | } |
1261 | | |
1262 | | int yr_parser_reduce_meta_declaration( |
1263 | | yyscan_t yyscanner, |
1264 | | int32_t type, |
1265 | | const char* identifier, |
1266 | | const char* string, |
1267 | | int64_t integer, |
1268 | | YR_ARENA_REF* meta_ref) |
1269 | 0 | { |
1270 | 0 | YR_ARENA_REF ref; |
1271 | 0 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1272 | |
|
1273 | 0 | FAIL_ON_ERROR(yr_arena_allocate_struct( |
1274 | 0 | compiler->arena, |
1275 | 0 | YR_METAS_TABLE, |
1276 | 0 | sizeof(YR_META), |
1277 | 0 | meta_ref, |
1278 | 0 | offsetof(YR_META, identifier), |
1279 | 0 | offsetof(YR_META, string), |
1280 | 0 | EOL)); |
1281 | |
|
1282 | 0 | YR_META* meta = (YR_META*) yr_arena_ref_to_ptr(compiler->arena, meta_ref); |
1283 | |
|
1284 | 0 | meta->type = type; |
1285 | 0 | meta->integer = integer; |
1286 | |
|
1287 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, identifier, &ref)); |
1288 | |
|
1289 | 0 | meta->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1290 | |
|
1291 | 0 | if (string != NULL) |
1292 | 0 | { |
1293 | 0 | FAIL_ON_ERROR(_yr_compiler_store_string(compiler, string, &ref)); |
1294 | |
|
1295 | 0 | meta->string = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref); |
1296 | 0 | } |
1297 | 0 | else |
1298 | 0 | { |
1299 | 0 | meta->string = NULL; |
1300 | 0 | } |
1301 | | |
1302 | 0 | compiler->current_meta_idx++; |
1303 | |
|
1304 | 0 | return ERROR_SUCCESS; |
1305 | 0 | } |
1306 | | |
1307 | | static int _yr_parser_valid_module_name(SIZED_STRING* module_name) |
1308 | 2 | { |
1309 | 2 | if (module_name->length == 0) |
1310 | 0 | return false; |
1311 | | |
1312 | 2 | if (strlen(module_name->c_string) != module_name->length) |
1313 | 0 | return false; |
1314 | | |
1315 | 2 | return true; |
1316 | 2 | } |
1317 | | |
1318 | | int yr_parser_reduce_import(yyscan_t yyscanner, SIZED_STRING* module_name) |
1319 | 2 | { |
1320 | 2 | int result; |
1321 | | |
1322 | 2 | YR_ARENA_REF ref; |
1323 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1324 | 2 | YR_OBJECT* module_structure; |
1325 | | |
1326 | 2 | if (!_yr_parser_valid_module_name(module_name)) |
1327 | 0 | { |
1328 | 0 | yr_compiler_set_error_extra_info(compiler, module_name->c_string); |
1329 | |
|
1330 | 0 | return ERROR_INVALID_MODULE_NAME; |
1331 | 0 | } |
1332 | | |
1333 | 2 | YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( |
1334 | 2 | compiler->arena, |
1335 | 2 | YR_NAMESPACES_TABLE, |
1336 | 2 | compiler->current_namespace_idx * sizeof(struct YR_NAMESPACE)); |
1337 | | |
1338 | 2 | module_structure = (YR_OBJECT*) yr_hash_table_lookup( |
1339 | 2 | compiler->objects_table, module_name->c_string, ns->name); |
1340 | | |
1341 | | // if module already imported, do nothing |
1342 | | |
1343 | 2 | if (module_structure != NULL) |
1344 | 0 | return ERROR_SUCCESS; |
1345 | | |
1346 | 2 | FAIL_ON_ERROR(yr_object_create( |
1347 | 2 | OBJECT_TYPE_STRUCTURE, module_name->c_string, NULL, &module_structure)); |
1348 | | |
1349 | 2 | FAIL_ON_ERROR(yr_hash_table_add( |
1350 | 2 | compiler->objects_table, |
1351 | 2 | module_name->c_string, |
1352 | 2 | ns->name, |
1353 | 2 | module_structure)); |
1354 | | |
1355 | 2 | result = yr_modules_do_declarations(module_name->c_string, module_structure); |
1356 | | |
1357 | 2 | if (result == ERROR_UNKNOWN_MODULE) |
1358 | 0 | yr_compiler_set_error_extra_info(compiler, module_name->c_string); |
1359 | | |
1360 | 2 | if (result != ERROR_SUCCESS) |
1361 | 0 | return result; |
1362 | | |
1363 | 2 | FAIL_ON_ERROR( |
1364 | 2 | _yr_compiler_store_string(compiler, module_name->c_string, &ref)); |
1365 | | |
1366 | 2 | FAIL_ON_ERROR(yr_parser_emit_with_arg_reloc( |
1367 | 2 | yyscanner, |
1368 | 2 | OP_IMPORT, |
1369 | 2 | yr_arena_ref_to_ptr(compiler->arena, &ref), |
1370 | 2 | NULL, |
1371 | 2 | NULL)); |
1372 | | |
1373 | 2 | return ERROR_SUCCESS; |
1374 | 2 | } |
1375 | | |
1376 | | static int _yr_parser_operator_to_opcode(const char* op, int expression_type) |
1377 | 2 | { |
1378 | 2 | int opcode = 0; |
1379 | | |
1380 | 2 | switch (expression_type) |
1381 | 2 | { |
1382 | 0 | case EXPRESSION_TYPE_INTEGER: |
1383 | 0 | opcode = OP_INT_BEGIN; |
1384 | 0 | break; |
1385 | 0 | case EXPRESSION_TYPE_FLOAT: |
1386 | 0 | opcode = OP_DBL_BEGIN; |
1387 | 0 | break; |
1388 | 2 | case EXPRESSION_TYPE_STRING: |
1389 | 2 | opcode = OP_STR_BEGIN; |
1390 | 2 | break; |
1391 | 0 | default: |
1392 | 0 | assert(false); |
1393 | 2 | } |
1394 | | |
1395 | 2 | if (op[0] == '<') |
1396 | 0 | { |
1397 | 0 | if (op[1] == '=') |
1398 | 0 | opcode += _OP_LE; |
1399 | 0 | else |
1400 | 0 | opcode += _OP_LT; |
1401 | 0 | } |
1402 | 2 | else if (op[0] == '>') |
1403 | 0 | { |
1404 | 0 | if (op[1] == '=') |
1405 | 0 | opcode += _OP_GE; |
1406 | 0 | else |
1407 | 0 | opcode += _OP_GT; |
1408 | 0 | } |
1409 | 2 | else if (op[1] == '=') |
1410 | 2 | { |
1411 | 2 | if (op[0] == '=') |
1412 | 2 | opcode += _OP_EQ; |
1413 | 0 | else |
1414 | 0 | opcode += _OP_NEQ; |
1415 | 2 | } |
1416 | 0 | else if (op[0] == '+') |
1417 | 0 | { |
1418 | 0 | opcode += _OP_ADD; |
1419 | 0 | } |
1420 | 0 | else if (op[0] == '-') |
1421 | 0 | { |
1422 | 0 | opcode += _OP_SUB; |
1423 | 0 | } |
1424 | 0 | else if (op[0] == '*') |
1425 | 0 | { |
1426 | 0 | opcode += _OP_MUL; |
1427 | 0 | } |
1428 | 0 | else if (op[0] == '\\') |
1429 | 0 | { |
1430 | 0 | opcode += _OP_DIV; |
1431 | 0 | } |
1432 | | |
1433 | 2 | if (IS_INT_OP(opcode) || IS_DBL_OP(opcode) || IS_STR_OP(opcode)) |
1434 | 2 | { |
1435 | 2 | return opcode; |
1436 | 2 | } |
1437 | | |
1438 | 0 | return OP_ERROR; |
1439 | 2 | } |
1440 | | |
1441 | | int yr_parser_reduce_operation( |
1442 | | yyscan_t yyscanner, |
1443 | | const char* op, |
1444 | | YR_EXPRESSION left_operand, |
1445 | | YR_EXPRESSION right_operand) |
1446 | 2 | { |
1447 | 2 | int expression_type; |
1448 | | |
1449 | 2 | YR_COMPILER* compiler = yyget_extra(yyscanner); |
1450 | | |
1451 | 2 | if ((left_operand.type == EXPRESSION_TYPE_INTEGER || |
1452 | 2 | left_operand.type == EXPRESSION_TYPE_FLOAT) && |
1453 | 2 | (right_operand.type == EXPRESSION_TYPE_INTEGER || |
1454 | 0 | right_operand.type == EXPRESSION_TYPE_FLOAT)) |
1455 | 0 | { |
1456 | 0 | if (left_operand.type != right_operand.type) |
1457 | 0 | { |
1458 | | // One operand is double and the other is integer, |
1459 | | // cast the integer to double |
1460 | |
|
1461 | 0 | FAIL_ON_ERROR(yr_parser_emit_with_arg( |
1462 | 0 | yyscanner, |
1463 | 0 | OP_INT_TO_DBL, |
1464 | 0 | (left_operand.type == EXPRESSION_TYPE_INTEGER) ? 2 : 1, |
1465 | 0 | NULL, |
1466 | 0 | NULL)); |
1467 | 0 | } |
1468 | | |
1469 | 0 | expression_type = EXPRESSION_TYPE_FLOAT; |
1470 | |
|
1471 | 0 | if (left_operand.type == EXPRESSION_TYPE_INTEGER && |
1472 | 0 | right_operand.type == EXPRESSION_TYPE_INTEGER) |
1473 | 0 | { |
1474 | 0 | expression_type = EXPRESSION_TYPE_INTEGER; |
1475 | 0 | } |
1476 | |
|
1477 | 0 | FAIL_ON_ERROR(yr_parser_emit( |
1478 | 0 | yyscanner, _yr_parser_operator_to_opcode(op, expression_type), NULL)); |
1479 | 0 | } |
1480 | 2 | else if ( |
1481 | 2 | left_operand.type == EXPRESSION_TYPE_STRING && |
1482 | 2 | right_operand.type == EXPRESSION_TYPE_STRING) |
1483 | 2 | { |
1484 | 2 | int opcode = _yr_parser_operator_to_opcode(op, EXPRESSION_TYPE_STRING); |
1485 | | |
1486 | 2 | if (opcode != OP_ERROR) |
1487 | 2 | { |
1488 | 2 | FAIL_ON_ERROR(yr_parser_emit(yyscanner, opcode, NULL)); |
1489 | 2 | } |
1490 | 0 | else |
1491 | 0 | { |
1492 | 0 | yr_compiler_set_error_extra_info_fmt( |
1493 | 0 | compiler, "strings don't support \"%s\" operation", op); |
1494 | |
|
1495 | 0 | return ERROR_WRONG_TYPE; |
1496 | 0 | } |
1497 | 2 | } |
1498 | 0 | else |
1499 | 0 | { |
1500 | 0 | yr_compiler_set_error_extra_info(compiler, "type mismatch"); |
1501 | |
|
1502 | 0 | return ERROR_WRONG_TYPE; |
1503 | 0 | } |
1504 | | |
1505 | 2 | return ERROR_SUCCESS; |
1506 | 2 | } |