/src/yara/libyara/modules/math/math.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (c) 2014-2021. The YARA Authors. All Rights Reserved. |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without modification, |
5 | | are permitted provided that the following conditions are met: |
6 | | |
7 | | 1. Redistributions of source code must retain the above copyright notice, this |
8 | | list of conditions and the following disclaimer. |
9 | | |
10 | | 2. Redistributions in binary form must reproduce the above copyright notice, |
11 | | this list of conditions and the following disclaimer in the documentation and/or |
12 | | other materials provided with the distribution. |
13 | | |
14 | | 3. Neither the name of the copyright holder nor the names of its contributors |
15 | | may be used to endorse or promote products derived from this software without |
16 | | specific prior written permission. |
17 | | |
18 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
19 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
20 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
21 | | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR |
22 | | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
23 | | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
24 | | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
25 | | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
27 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | | */ |
29 | | |
30 | | #include <stdlib.h> |
31 | | #include <math.h> |
32 | | #include <yara/mem.h> |
33 | | #include <yara/modules.h> |
34 | | #include <yara/strutils.h> |
35 | | #include <yara/utils.h> |
36 | | |
37 | | #define MODULE_NAME math |
38 | | |
39 | | #define PI 3.141592653589793 |
40 | | // This is more than enough space to hold the maximum signed 64bit integer as a |
41 | | // string in decimal, hex or octal, including the sign and NULL terminator. |
42 | 0 | #define INT64_MAX_STRING 30 |
43 | | |
44 | | // log2 is not defined by math.h in VC++ |
45 | | |
46 | | #if defined(_MSC_VER) && _MSC_VER < 1800 |
47 | | static double log2(double n) |
48 | | { |
49 | | return log(n) / log(2.0); |
50 | | } |
51 | | #endif |
52 | | |
53 | 0 | uint32_t* get_distribution(int64_t offset, int64_t length, YR_SCAN_CONTEXT* context) { |
54 | 0 | bool past_first_block = false; |
55 | |
|
56 | 0 | size_t i; |
57 | |
|
58 | 0 | uint32_t* data = (uint32_t*) yr_calloc(256, sizeof(uint32_t)); |
59 | |
|
60 | 0 | if (data == NULL) { |
61 | 0 | return NULL; |
62 | 0 | } |
63 | | |
64 | 0 | YR_MEMORY_BLOCK* block = first_memory_block(context); |
65 | 0 | YR_MEMORY_BLOCK_ITERATOR* iterator = context->iterator; |
66 | |
|
67 | 0 | if (offset < 0 || length < 0 || offset < block->base) |
68 | 0 | { |
69 | 0 | yr_free(data); |
70 | 0 | return NULL; |
71 | 0 | } |
72 | | |
73 | 0 | foreach_memory_block(iterator, block) |
74 | 0 | { |
75 | 0 | if (offset >= block->base && offset < block->base + block->size) |
76 | 0 | { |
77 | 0 | size_t data_offset = (size_t)(offset - block->base); |
78 | 0 | size_t data_len = (size_t) yr_min( |
79 | 0 | length, (size_t)(block->size - data_offset)); |
80 | |
|
81 | 0 | const uint8_t* block_data = block->fetch_data(block); |
82 | |
|
83 | 0 | if (block_data == NULL) |
84 | 0 | { |
85 | 0 | yr_free(data); |
86 | 0 | return NULL; |
87 | 0 | } |
88 | | |
89 | 0 | offset += data_len; |
90 | 0 | length -= data_len; |
91 | |
|
92 | 0 | for (i = 0; i < data_len; i++) |
93 | 0 | { |
94 | 0 | uint8_t c = *(block_data + data_offset + i); |
95 | 0 | data[c]++; |
96 | 0 | } |
97 | |
|
98 | 0 | past_first_block = true; |
99 | 0 | } |
100 | 0 | else if (past_first_block) |
101 | 0 | { |
102 | | // If offset is not within current block and we already |
103 | | // past the first block then the we are trying to compute |
104 | | // the distribution over a range of non contiguous blocks. As |
105 | | // range contains gaps of undefined data the distribution is |
106 | | // undefined. |
107 | |
|
108 | 0 | yr_free(data); |
109 | 0 | return NULL; |
110 | 0 | } |
111 | | |
112 | 0 | if (block->base + block->size > offset + length) |
113 | 0 | break; |
114 | 0 | } |
115 | | |
116 | 0 | if (!past_first_block) |
117 | 0 | { |
118 | 0 | yr_free(data); |
119 | 0 | return NULL; |
120 | 0 | } |
121 | 0 | return data; |
122 | 0 | } |
123 | | |
124 | 0 | uint32_t* get_distribution_global(YR_SCAN_CONTEXT* context) { |
125 | |
|
126 | 0 | size_t i; |
127 | |
|
128 | 0 | int64_t expected_next_offset = 0; |
129 | |
|
130 | 0 | uint32_t* data = (uint32_t*) yr_calloc(256, sizeof(uint32_t)); |
131 | |
|
132 | 0 | if (data == NULL) |
133 | 0 | return NULL; |
134 | | |
135 | 0 | YR_MEMORY_BLOCK* block = first_memory_block(context); |
136 | 0 | YR_MEMORY_BLOCK_ITERATOR* iterator = context->iterator; |
137 | |
|
138 | 0 | foreach_memory_block(iterator, block) |
139 | 0 | { |
140 | 0 | if (expected_next_offset != block->base) |
141 | 0 | { |
142 | | // If offset is not directly after the current block then |
143 | | // we are trying to compute the distribution over a range of non |
144 | | // contiguous blocks. As the range contains gaps of |
145 | | // undefined data the distribution is undefined. |
146 | 0 | yr_free(data); |
147 | 0 | return NULL; |
148 | 0 | } |
149 | 0 | const uint8_t* block_data = block->fetch_data(block); |
150 | |
|
151 | 0 | if (block_data == NULL) |
152 | 0 | { |
153 | 0 | yr_free(data); |
154 | 0 | return NULL; |
155 | 0 | } |
156 | | |
157 | 0 | for (i = 0; i < block->size; i++) |
158 | 0 | { |
159 | 0 | uint8_t c = *(block_data + i); |
160 | 0 | data[c] += 1; |
161 | 0 | } |
162 | 0 | expected_next_offset = block->base + block->size; |
163 | 0 | } |
164 | 0 | return data; |
165 | 0 | } |
166 | | |
167 | | define_function(string_entropy) |
168 | 0 | { |
169 | 0 | size_t i; |
170 | 0 | double entropy = 0.0; |
171 | |
|
172 | 0 | SIZED_STRING* s = sized_string_argument(1); |
173 | |
|
174 | 0 | uint32_t* data = (uint32_t*) yr_calloc(256, sizeof(uint32_t)); |
175 | |
|
176 | 0 | if (data == NULL) |
177 | 0 | return_float(YR_UNDEFINED); |
178 | |
|
179 | 0 | for (i = 0; i < s->length; i++) |
180 | 0 | { |
181 | 0 | uint8_t c = s->c_string[i]; |
182 | 0 | data[c] += 1; |
183 | 0 | } |
184 | |
|
185 | 0 | for (i = 0; i < 256; i++) |
186 | 0 | { |
187 | 0 | if (data[i] != 0) |
188 | 0 | { |
189 | 0 | double x = (double) (data[i]) / s->length; |
190 | 0 | entropy -= x * log2(x); |
191 | 0 | } |
192 | 0 | } |
193 | |
|
194 | 0 | yr_free(data); |
195 | 0 | return_float(entropy); |
196 | 0 | } |
197 | | |
198 | | define_function(data_entropy) |
199 | 0 | { |
200 | 0 | double entropy = 0.0; |
201 | |
|
202 | 0 | int64_t offset = integer_argument(1); // offset where to start |
203 | 0 | int64_t length = integer_argument(2); // length of bytes we want entropy on |
204 | |
|
205 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
206 | |
|
207 | 0 | size_t i; |
208 | |
|
209 | 0 | size_t total_len = 0; |
210 | |
|
211 | 0 | uint32_t* data = get_distribution(offset, length, context); |
212 | 0 | if (data == NULL) |
213 | 0 | return_float(YR_UNDEFINED); |
214 | |
|
215 | 0 | for (i = 0; i < 256; i++) |
216 | 0 | { |
217 | 0 | total_len += data[i]; |
218 | 0 | } |
219 | |
|
220 | 0 | for (i = 0; i < 256; i++) |
221 | 0 | { |
222 | 0 | if (data[i] != 0) |
223 | 0 | { |
224 | 0 | double x = (double) (data[i]) / total_len; |
225 | 0 | entropy -= x * log2(x); |
226 | 0 | } |
227 | 0 | } |
228 | |
|
229 | 0 | yr_free(data); |
230 | 0 | return_float(entropy); |
231 | 0 | } |
232 | | |
233 | | define_function(string_deviation) |
234 | 0 | { |
235 | 0 | SIZED_STRING* s = sized_string_argument(1); |
236 | |
|
237 | 0 | double mean = float_argument(2); |
238 | 0 | double sum = 0.0; |
239 | |
|
240 | 0 | size_t i; |
241 | |
|
242 | 0 | for (i = 0; i < s->length; i++) sum += fabs(((double) s->c_string[i]) - mean); |
243 | |
|
244 | 0 | return_float(sum / s->length); |
245 | 0 | } |
246 | | |
247 | | define_function(data_deviation) |
248 | 0 | { |
249 | 0 | int64_t offset = integer_argument(1); |
250 | 0 | int64_t length = integer_argument(2); |
251 | |
|
252 | 0 | double mean = float_argument(3); |
253 | 0 | double sum = 0.0; |
254 | |
|
255 | 0 | size_t total_len = 0; |
256 | 0 | size_t i; |
257 | |
|
258 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
259 | |
|
260 | 0 | uint32_t* data = get_distribution(offset, length, context); |
261 | 0 | if (data == NULL) |
262 | 0 | return_float(YR_UNDEFINED); |
263 | |
|
264 | 0 | for (i = 0; i < 256; i++) |
265 | 0 | { |
266 | 0 | total_len += data[i]; |
267 | 0 | sum += fabs(((double) i) - mean) * data[i]; |
268 | 0 | } |
269 | |
|
270 | 0 | yr_free(data); |
271 | 0 | return_float(sum / total_len); |
272 | 0 | } |
273 | | |
274 | | define_function(string_mean) |
275 | 0 | { |
276 | 0 | size_t i; |
277 | 0 | double sum = 0.0; |
278 | |
|
279 | 0 | SIZED_STRING* s = sized_string_argument(1); |
280 | |
|
281 | 0 | for (i = 0; i < s->length; i++) sum += (double) s->c_string[i]; |
282 | |
|
283 | 0 | return_float(sum / s->length); |
284 | 0 | } |
285 | | |
286 | | define_function(data_mean) |
287 | 0 | { |
288 | 0 | double sum = 0.0; |
289 | |
|
290 | 0 | int64_t offset = integer_argument(1); |
291 | 0 | int64_t length = integer_argument(2); |
292 | |
|
293 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
294 | |
|
295 | 0 | size_t total_len = 0; |
296 | 0 | size_t i; |
297 | |
|
298 | 0 | uint32_t* data = get_distribution(offset, length, context); |
299 | 0 | if (data == NULL) |
300 | 0 | return_float(YR_UNDEFINED); |
301 | |
|
302 | 0 | for (i = 0; i < 256; i++) |
303 | 0 | { |
304 | 0 | total_len += data[i]; |
305 | 0 | sum += ((double) i) * data[i]; |
306 | 0 | } |
307 | |
|
308 | 0 | yr_free(data); |
309 | 0 | return_float(sum / total_len); |
310 | 0 | } |
311 | | |
312 | | define_function(data_serial_correlation) |
313 | 0 | { |
314 | 0 | int past_first_block = false; |
315 | |
|
316 | 0 | size_t total_len = 0; |
317 | 0 | size_t i; |
318 | |
|
319 | 0 | int64_t offset = integer_argument(1); |
320 | 0 | int64_t length = integer_argument(2); |
321 | |
|
322 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
323 | 0 | YR_MEMORY_BLOCK* block = first_memory_block(context); |
324 | 0 | YR_MEMORY_BLOCK_ITERATOR* iterator = context->iterator; |
325 | |
|
326 | 0 | double sccun = 0; |
327 | 0 | double sccfirst = 0; |
328 | 0 | double scclast = 0; |
329 | 0 | double scct1 = 0; |
330 | 0 | double scct2 = 0; |
331 | 0 | double scct3 = 0; |
332 | 0 | double scc = 0; |
333 | |
|
334 | 0 | if (offset < 0 || length < 0 || offset < block->base) |
335 | 0 | return_float(YR_UNDEFINED); |
336 | |
|
337 | 0 | foreach_memory_block(iterator, block) |
338 | 0 | { |
339 | 0 | if (offset >= block->base && offset < block->base + block->size) |
340 | 0 | { |
341 | 0 | size_t data_offset = (size_t)(offset - block->base); |
342 | 0 | size_t data_len = (size_t) yr_min( |
343 | 0 | length, (size_t)(block->size - data_offset)); |
344 | |
|
345 | 0 | const uint8_t* block_data = block->fetch_data(block); |
346 | |
|
347 | 0 | if (block_data == NULL) |
348 | 0 | return_float(YR_UNDEFINED); |
349 | |
|
350 | 0 | total_len += data_len; |
351 | 0 | offset += data_len; |
352 | 0 | length -= data_len; |
353 | |
|
354 | 0 | for (i = 0; i < data_len; i++) |
355 | 0 | { |
356 | 0 | sccun = (double) *(block_data + data_offset + i); |
357 | 0 | if (i == 0) { |
358 | 0 | sccfirst = sccun; |
359 | 0 | } |
360 | 0 | scct1 += scclast * sccun; |
361 | 0 | scct2 += sccun; |
362 | 0 | scct3 += sccun * sccun; |
363 | 0 | scclast = sccun; |
364 | 0 | } |
365 | |
|
366 | 0 | past_first_block = true; |
367 | 0 | } |
368 | 0 | else if (past_first_block) |
369 | 0 | { |
370 | | // If offset is not within current block and we already |
371 | | // past the first block then the we are trying to compute |
372 | | // the checksum over a range of non contiguous blocks. As |
373 | | // range contains gaps of undefined data the checksum is |
374 | | // undefined. |
375 | 0 | return_float(YR_UNDEFINED); |
376 | 0 | } |
377 | | |
378 | 0 | if (block->base + block->size > offset + length) |
379 | 0 | break; |
380 | 0 | } |
381 | | |
382 | 0 | if (!past_first_block) |
383 | 0 | return_float(YR_UNDEFINED); |
384 | |
|
385 | 0 | scct1 += scclast * sccfirst; |
386 | 0 | scct2 *= scct2; |
387 | |
|
388 | 0 | scc = total_len * scct3 - scct2; |
389 | |
|
390 | 0 | if (scc == 0) |
391 | 0 | scc = -100000; |
392 | 0 | else |
393 | 0 | scc = (total_len * scct1 - scct2) / scc; |
394 | |
|
395 | 0 | return_float(scc); |
396 | 0 | } |
397 | | |
398 | | define_function(string_serial_correlation) |
399 | 0 | { |
400 | 0 | SIZED_STRING* s = sized_string_argument(1); |
401 | |
|
402 | 0 | double sccun = 0; |
403 | 0 | double scclast = 0; |
404 | 0 | double scct1 = 0; |
405 | 0 | double scct2 = 0; |
406 | 0 | double scct3 = 0; |
407 | 0 | double scc = 0; |
408 | |
|
409 | 0 | size_t i; |
410 | |
|
411 | 0 | for (i = 0; i < s->length; i++) |
412 | 0 | { |
413 | 0 | sccun = (double) s->c_string[i]; |
414 | 0 | scct1 += scclast * sccun; |
415 | 0 | scct2 += sccun; |
416 | 0 | scct3 += sccun * sccun; |
417 | 0 | scclast = sccun; |
418 | 0 | } |
419 | |
|
420 | 0 | if (s->length > 0) { |
421 | 0 | scct1 += scclast * (double) s->c_string[0]; |
422 | 0 | } |
423 | 0 | scct2 *= scct2; |
424 | |
|
425 | 0 | scc = s->length * scct3 - scct2; |
426 | |
|
427 | 0 | if (scc == 0) |
428 | 0 | scc = -100000; |
429 | 0 | else |
430 | 0 | scc = (s->length * scct1 - scct2) / scc; |
431 | |
|
432 | 0 | return_float(scc); |
433 | 0 | } |
434 | | |
435 | | define_function(data_monte_carlo_pi) |
436 | 0 | { |
437 | 0 | int past_first_block = false; |
438 | 0 | int mcount = 0; |
439 | 0 | int inmont = 0; |
440 | |
|
441 | 0 | double INCIRC = pow(pow(256.0, 3.0) - 1, 2.0); |
442 | 0 | double mpi = 0; |
443 | |
|
444 | 0 | size_t i; |
445 | |
|
446 | 0 | int64_t offset = integer_argument(1); |
447 | 0 | int64_t length = integer_argument(2); |
448 | |
|
449 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
450 | 0 | YR_MEMORY_BLOCK* block = first_memory_block(context); |
451 | 0 | YR_MEMORY_BLOCK_ITERATOR* iterator = context->iterator; |
452 | |
|
453 | 0 | if (offset < 0 || length < 0 || offset < block->base) |
454 | 0 | return_float(YR_UNDEFINED); |
455 | |
|
456 | 0 | foreach_memory_block(iterator, block) |
457 | 0 | { |
458 | 0 | if (offset >= block->base && offset < block->base + block->size) |
459 | 0 | { |
460 | 0 | unsigned int monte[6]; |
461 | |
|
462 | 0 | size_t data_offset = (size_t)(offset - block->base); |
463 | 0 | size_t data_len = (size_t) yr_min( |
464 | 0 | length, (size_t)(block->size - data_offset)); |
465 | |
|
466 | 0 | const uint8_t* block_data = block->fetch_data(block); |
467 | |
|
468 | 0 | if (block_data == NULL) |
469 | 0 | return_float(YR_UNDEFINED); |
470 | |
|
471 | 0 | offset += data_len; |
472 | 0 | length -= data_len; |
473 | |
|
474 | 0 | for (i = 0; i < data_len; i++) |
475 | 0 | { |
476 | 0 | monte[i % 6] = (unsigned int) *(block_data + data_offset + i); |
477 | |
|
478 | 0 | if (i % 6 == 5) |
479 | 0 | { |
480 | 0 | double mx = 0; |
481 | 0 | double my = 0; |
482 | 0 | int j; |
483 | |
|
484 | 0 | mcount++; |
485 | |
|
486 | 0 | for (j = 0; j < 3; j++) |
487 | 0 | { |
488 | 0 | mx = (mx * 256.0) + monte[j]; |
489 | 0 | my = (my * 256.0) + monte[j + 3]; |
490 | 0 | } |
491 | |
|
492 | 0 | if ((mx * mx + my * my) <= INCIRC) |
493 | 0 | inmont++; |
494 | 0 | } |
495 | 0 | } |
496 | |
|
497 | 0 | past_first_block = true; |
498 | 0 | } |
499 | 0 | else if (past_first_block) |
500 | 0 | { |
501 | | // If offset is not within current block and we already |
502 | | // past the first block then the we are trying to compute |
503 | | // the checksum over a range of non contiguous blocks. As |
504 | | // range contains gaps of undefined data the checksum is |
505 | | // undefined. |
506 | 0 | return_float(YR_UNDEFINED); |
507 | 0 | } |
508 | | |
509 | 0 | if (block->base + block->size > offset + length) |
510 | 0 | break; |
511 | 0 | } |
512 | | |
513 | 0 | if (!past_first_block || mcount == 0) |
514 | 0 | return_float(YR_UNDEFINED); |
515 | |
|
516 | 0 | mpi = 4.0 * ((double) inmont / mcount); |
517 | |
|
518 | 0 | return_float(fabs((mpi - PI) / PI)); |
519 | 0 | } |
520 | | |
521 | | define_function(string_monte_carlo_pi) |
522 | 0 | { |
523 | 0 | SIZED_STRING* s = sized_string_argument(1); |
524 | |
|
525 | 0 | double INCIRC = pow(pow(256.0, 3.0) - 1, 2.0); |
526 | 0 | double mpi = 0; |
527 | |
|
528 | 0 | unsigned int monte[6]; |
529 | |
|
530 | 0 | int mcount = 0; |
531 | 0 | int inmont = 0; |
532 | |
|
533 | 0 | size_t i; |
534 | |
|
535 | 0 | for (i = 0; i < s->length; i++) |
536 | 0 | { |
537 | 0 | monte[i % 6] = (unsigned int) s->c_string[i]; |
538 | |
|
539 | 0 | if (i % 6 == 5) |
540 | 0 | { |
541 | 0 | double mx = 0; |
542 | 0 | double my = 0; |
543 | |
|
544 | 0 | int j; |
545 | |
|
546 | 0 | mcount++; |
547 | |
|
548 | 0 | for (j = 0; j < 3; j++) |
549 | 0 | { |
550 | 0 | mx = (mx * 256.0) + monte[j]; |
551 | 0 | my = (my * 256.0) + monte[j + 3]; |
552 | 0 | } |
553 | |
|
554 | 0 | if ((mx * mx + my * my) <= INCIRC) |
555 | 0 | inmont++; |
556 | 0 | } |
557 | 0 | } |
558 | |
|
559 | 0 | if (mcount == 0) |
560 | 0 | return_float(YR_UNDEFINED); |
561 | |
|
562 | 0 | mpi = 4.0 * ((double) inmont / mcount); |
563 | 0 | return_float(fabs((mpi - PI) / PI)); |
564 | 0 | } |
565 | | |
566 | | define_function(in_range) |
567 | 0 | { |
568 | 0 | double test = float_argument(1); |
569 | 0 | double lower = float_argument(2); |
570 | 0 | double upper = float_argument(3); |
571 | |
|
572 | 0 | return_integer((lower <= test && test <= upper) ? 1 : 0); |
573 | 0 | } |
574 | | |
575 | | // Undefine existing "min" and "max" macros in order to avoid conflicts with |
576 | | // function names. |
577 | | #undef min |
578 | | #undef max |
579 | | |
580 | | define_function(min) |
581 | 0 | { |
582 | 0 | uint64_t i = integer_argument(1); |
583 | 0 | uint64_t j = integer_argument(2); |
584 | |
|
585 | 0 | return_integer(i < j ? i : j); |
586 | 0 | } |
587 | | |
588 | | define_function(max) |
589 | 0 | { |
590 | 0 | uint64_t i = integer_argument(1); |
591 | 0 | uint64_t j = integer_argument(2); |
592 | |
|
593 | 0 | return_integer(i > j ? i : j); |
594 | 0 | } |
595 | | |
596 | | define_function(to_number) |
597 | 0 | { |
598 | 0 | return_integer(integer_argument(1) ? 1 : 0); |
599 | 0 | } |
600 | | |
601 | | define_function(yr_math_abs) |
602 | 0 | { |
603 | 0 | return_integer(llabs(integer_argument(1))); |
604 | 0 | } |
605 | | |
606 | | define_function(count_range) |
607 | 0 | { |
608 | 0 | uint8_t byte = (uint8_t) integer_argument(1); |
609 | 0 | int64_t offset = integer_argument(2); |
610 | 0 | int64_t length = integer_argument(3); |
611 | |
|
612 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
613 | |
|
614 | 0 | uint32_t* distribution = get_distribution(offset, length, context); |
615 | 0 | if (distribution == NULL) |
616 | 0 | { |
617 | 0 | return_integer(YR_UNDEFINED); |
618 | 0 | } |
619 | 0 | int64_t count = (int64_t) distribution[byte]; |
620 | 0 | yr_free(distribution); |
621 | 0 | return_integer(count); |
622 | 0 | } |
623 | | |
624 | | define_function(count_global) |
625 | 0 | { |
626 | 0 | uint8_t byte = (uint8_t) integer_argument(1); |
627 | |
|
628 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
629 | |
|
630 | 0 | uint32_t* distribution = get_distribution_global(context); |
631 | 0 | if (distribution == NULL) |
632 | 0 | { |
633 | 0 | return_integer(YR_UNDEFINED); |
634 | 0 | } |
635 | 0 | int64_t count = (int64_t) distribution[byte]; |
636 | 0 | yr_free(distribution); |
637 | 0 | return_integer(count); |
638 | 0 | } |
639 | | |
640 | | define_function(percentage_range) |
641 | 0 | { |
642 | 0 | uint8_t byte = (uint8_t) integer_argument(1); |
643 | 0 | int64_t offset = integer_argument(2); |
644 | 0 | int64_t length = integer_argument(3); |
645 | |
|
646 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
647 | |
|
648 | 0 | uint32_t* distribution = get_distribution(offset, length, context); |
649 | 0 | if (distribution == NULL) { |
650 | 0 | return_float(YR_UNDEFINED); |
651 | 0 | } |
652 | 0 | int64_t count = (int64_t) distribution[byte]; |
653 | 0 | int64_t total_count = 0; |
654 | 0 | int64_t i; |
655 | 0 | for (i = 0; i < 256; i++) { |
656 | 0 | total_count += distribution[i]; |
657 | 0 | } |
658 | 0 | yr_free(distribution); |
659 | 0 | return_float(((float) count) / ((float) total_count)); |
660 | 0 | } |
661 | | |
662 | | define_function(percentage_global) |
663 | 0 | { |
664 | 0 | uint8_t byte = (uint8_t) integer_argument(1); |
665 | |
|
666 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
667 | |
|
668 | 0 | uint32_t* distribution = get_distribution_global(context); |
669 | 0 | if (distribution == NULL) { |
670 | 0 | return_float(YR_UNDEFINED); |
671 | 0 | } |
672 | 0 | int64_t count = (int64_t) distribution[byte]; |
673 | 0 | int64_t total_count = 0; |
674 | 0 | int64_t i; |
675 | 0 | for (i = 0; i < 256; i++) { |
676 | 0 | total_count += distribution[i]; |
677 | 0 | } |
678 | 0 | yr_free(distribution); |
679 | 0 | return_float(((float) count) / ((float) total_count)); |
680 | 0 | } |
681 | | |
682 | | define_function(mode_range) |
683 | 0 | { |
684 | 0 | int64_t offset = integer_argument(1); |
685 | 0 | int64_t length = integer_argument(2); |
686 | |
|
687 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
688 | |
|
689 | 0 | uint32_t* distribution = get_distribution(offset, length, context); |
690 | 0 | if (distribution == NULL) { |
691 | 0 | return_integer(YR_UNDEFINED); |
692 | 0 | } |
693 | | |
694 | 0 | int64_t most_common = 0; |
695 | 0 | size_t i; |
696 | 0 | for (i = 0; i < 256; i++) |
697 | 0 | { |
698 | 0 | if (distribution[i] > distribution[most_common]) |
699 | 0 | { |
700 | 0 | most_common = (int64_t) i; |
701 | 0 | } |
702 | 0 | } |
703 | 0 | yr_free(distribution); |
704 | 0 | return_integer(most_common); |
705 | 0 | } |
706 | | |
707 | | define_function(mode_global) |
708 | 0 | { |
709 | 0 | YR_SCAN_CONTEXT* context = yr_scan_context(); |
710 | |
|
711 | 0 | uint32_t* distribution = get_distribution_global(context); |
712 | 0 | if (distribution == NULL) { |
713 | 0 | return_integer(YR_UNDEFINED); |
714 | 0 | } |
715 | | |
716 | 0 | int64_t most_common = 0; |
717 | 0 | size_t i; |
718 | 0 | for (i = 0; i < 256; i++) |
719 | 0 | { |
720 | 0 | if (distribution[i] > distribution[most_common]) |
721 | 0 | { |
722 | 0 | most_common = (int64_t) i; |
723 | 0 | } |
724 | 0 | } |
725 | 0 | yr_free(distribution); |
726 | 0 | return_integer(most_common); |
727 | 0 | } |
728 | | |
729 | | define_function(to_string) |
730 | 0 | { |
731 | 0 | int64_t i = integer_argument(1); |
732 | 0 | char str[INT64_MAX_STRING]; |
733 | 0 | snprintf(str, INT64_MAX_STRING, "%" PRId64, i); |
734 | 0 | return_string(&str); |
735 | 0 | } |
736 | | |
737 | | define_function(to_string_base) |
738 | 0 | { |
739 | 0 | int64_t i = integer_argument(1); |
740 | 0 | int64_t base = integer_argument(2); |
741 | 0 | char str[INT64_MAX_STRING]; |
742 | 0 | char *fmt; |
743 | 0 | switch (base) |
744 | 0 | { |
745 | 0 | case 10: |
746 | 0 | fmt = "%" PRId64; |
747 | 0 | break; |
748 | 0 | case 8: |
749 | 0 | fmt = "%" PRIo64; |
750 | 0 | break; |
751 | 0 | case 16: |
752 | 0 | fmt = "%" PRIx64; |
753 | 0 | break; |
754 | 0 | default: |
755 | 0 | return_string(YR_UNDEFINED); |
756 | 0 | } |
757 | 0 | snprintf(str, INT64_MAX_STRING, fmt, i); |
758 | 0 | return_string(&str); |
759 | 0 | } |
760 | | |
761 | 0 | begin_declarations |
762 | 0 | declare_float("MEAN_BYTES"); |
763 | 0 | declare_function("in_range", "fff", "i", in_range); |
764 | 0 | declare_function("deviation", "iif", "f", data_deviation); |
765 | 0 | declare_function("deviation", "sf", "f", string_deviation); |
766 | 0 | declare_function("mean", "ii", "f", data_mean); |
767 | 0 | declare_function("mean", "s", "f", string_mean); |
768 | 0 | declare_function("serial_correlation", "ii", "f", data_serial_correlation); |
769 | 0 | declare_function("serial_correlation", "s", "f", string_serial_correlation); |
770 | 0 | declare_function("monte_carlo_pi", "ii", "f", data_monte_carlo_pi); |
771 | 0 | declare_function("monte_carlo_pi", "s", "f", string_monte_carlo_pi); |
772 | 0 | declare_function("entropy", "ii", "f", data_entropy); |
773 | 0 | declare_function("entropy", "s", "f", string_entropy); |
774 | 0 | declare_function("min", "ii", "i", min); |
775 | 0 | declare_function("max", "ii", "i", max); |
776 | 0 | declare_function("to_number", "b", "i", to_number); |
777 | 0 | declare_function("abs", "i", "i", yr_math_abs); |
778 | 0 | declare_function("count", "iii", "i", count_range); |
779 | 0 | declare_function("count", "i", "i", count_global); |
780 | 0 | declare_function("percentage", "iii", "f", percentage_range); |
781 | 0 | declare_function("percentage", "i", "f", percentage_global); |
782 | 0 | declare_function("mode", "ii", "i", mode_range); |
783 | 0 | declare_function("mode", "", "i", mode_global); |
784 | 0 | declare_function("to_string", "i", "s", to_string); |
785 | 0 | declare_function("to_string", "ii", "s", to_string_base); |
786 | 0 | end_declarations |
787 | | |
788 | | int module_initialize(YR_MODULE* module) |
789 | 2 | { |
790 | 2 | return ERROR_SUCCESS; |
791 | 2 | } |
792 | | |
793 | | int module_finalize(YR_MODULE* module) |
794 | 0 | { |
795 | 0 | return ERROR_SUCCESS; |
796 | 0 | } |
797 | | |
798 | | int module_load( |
799 | | YR_SCAN_CONTEXT* context, |
800 | | YR_OBJECT* module_object, |
801 | | void* module_data, |
802 | | size_t module_data_size) |
803 | 0 | { |
804 | 0 | yr_set_float(127.5, module_object, "MEAN_BYTES"); |
805 | 0 | return ERROR_SUCCESS; |
806 | 0 | } |
807 | | |
808 | | int module_unload(YR_OBJECT* module_object) |
809 | 0 | { |
810 | 0 | return ERROR_SUCCESS; |
811 | 0 | } |