/src/yara/libyara/tlshc/tlsh_impl.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "tlsh_impl.h" |
2 | | #include <tlshc/tlsh.h> |
3 | | #include "tlsh_util.h" |
4 | | |
5 | | #include <stdio.h> |
6 | | #include <stdlib.h> |
7 | | #include <string.h> |
8 | | |
9 | 0 | #define RANGE_LVALUE 256 |
10 | 0 | #define RANGE_QRATIO 16 |
11 | | |
12 | | static void find_quartile( |
13 | | unsigned int *q1, |
14 | | unsigned int *q2, |
15 | | unsigned int *q3, |
16 | | const unsigned int *a_bucket); |
17 | | static unsigned int partition( |
18 | | unsigned int *buf, |
19 | | unsigned int left, |
20 | | unsigned int right); |
21 | | static void tlsh_impl_fast_update5( |
22 | | TlshImpl *impl, |
23 | | const unsigned char *data, |
24 | | unsigned int len, |
25 | | int tlsh_option); |
26 | | |
27 | | // Pearson's sample random table |
28 | | #if defined BUCKETS_48 |
29 | | static unsigned char v_table48[256] = { |
30 | | 1, 39, 1, 12, 32, 34, 6, 22, 25, 1, 6, 36, 48, 38, 44, 19, 14, 5, 21, |
31 | | 37, 17, 37, 26, 32, 16, 47, 24, 34, 44, 46, 38, 8, 14, 33, 8, 7, 45, 48, |
32 | | 48, 2, 29, 5, 33, 18, 45, 0, 31, 30, 25, 11, 46, 22, 38, 45, 48, 34, 24, |
33 | | 48, 20, 22, 48, 35, 5, 43, 1, 42, 9, 22, 12, 48, 34, 31, 16, 5, 31, 7, |
34 | | 15, 14, 39, 48, 30, 25, 19, 10, 18, 10, 10, 3, 48, 27, 17, 43, 38, 35, 0, |
35 | | 48, 36, 8, 4, 27, 32, 37, 14, 4, 34, 30, 43, 13, 9, 48, 24, 27, 23, 20, |
36 | | 31, 30, 35, 40, 9, 3, 26, 11, 44, 32, 40, 18, 4, 10, 42, 30, 0, 39, 12, |
37 | | 35, 13, 26, 47, 26, 48, 46, 33, 18, 15, 8, 26, 7, 19, 23, 48, 14, 3, 6, |
38 | | 7, 11, 7, 28, 42, 5, 23, 35, 29, 29, 15, 46, 31, 47, 41, 16, 9, 41, 33, |
39 | | 32, 25, 16, 37, 27, 22, 25, 2, 13, 46, 20, 9, 1, 38, 36, 15, 20, 10, 23, |
40 | | 21, 37, 27, 44, 19, 28, 24, 48, 42, 4, 29, 12, 21, 48, 19, 13, 39, 11, 41, |
41 | | 40, 42, 3, 6, 0, 11, 33, 20, 47, 2, 12, 21, 36, 21, 28, 44, 36, 18, 28, |
42 | | 41, 6, 15, 8, 41, 40, 17, 4, 39, 47, 2, 24, 3, 17, 28, 0, 48, 29, 45, |
43 | | 45, 2, 43, 16, 43, 23, 13, 40, 17, |
44 | | }; |
45 | | #else |
46 | | static unsigned char v_table[256] = { |
47 | | 1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, |
48 | | 163, 14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, |
49 | | 38, 200, 110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, |
50 | | 96, 31, 222, 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, |
51 | | 244, 227, 149, 235, 97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, |
52 | | 199, 111, 62, 135, 248, 174, 169, 211, 58, 66, 154, 106, 195, 245, 171, |
53 | | 17, 187, 182, 179, 0, 243, 132, 56, 148, 75, 128, 133, 158, 100, 130, |
54 | | 126, 91, 13, 153, 246, 216, 219, 119, 68, 223, 78, 83, 88, 201, 99, |
55 | | 122, 11, 92, 32, 136, 114, 52, 10, 138, 30, 48, 183, 156, 35, 61, |
56 | | 26, 143, 74, 251, 94, 129, 162, 63, 152, 170, 7, 115, 167, 241, 206, |
57 | | 3, 150, 55, 59, 151, 220, 90, 53, 23, 131, 125, 173, 15, 238, 79, |
58 | | 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123, 118, 73, 2, 157, |
59 | | 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229, 27, 188, 67, |
60 | | 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203, 233, 40, |
61 | | 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76, 140, |
62 | | 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120, |
63 | | 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, |
64 | | 209}; |
65 | | #endif |
66 | | |
67 | | // Pearson's algorithm |
68 | | static unsigned char b_mapping( |
69 | | unsigned char salt, |
70 | | unsigned char i, |
71 | | unsigned char j, |
72 | | unsigned char k) |
73 | 0 | { |
74 | 0 | unsigned char h = 0; |
75 | 0 |
|
76 | 0 | h = v_table[h ^ salt]; |
77 | 0 | h = v_table[h ^ i]; |
78 | 0 | h = v_table[h ^ j]; |
79 | 0 | h = v_table[h ^ k]; |
80 | 0 | return h; |
81 | 0 | } |
82 | | |
83 | | #if defined BUCKETS_48 |
84 | | #define fast_b_mapping(ms, i, j, k) \ |
85 | | (v_table48[v_table[v_table[ms ^ i] ^ j] ^ k]) |
86 | | #else |
87 | 0 | #define fast_b_mapping(ms, i, j, k) (v_table[v_table[v_table[ms ^ i] ^ j] ^ k]) |
88 | | #endif |
89 | | |
90 | | //////////////////////////////////////////////////////////////////////////////////////////// |
91 | | |
92 | | #if SLIDING_WND_SIZE == 5 |
93 | 0 | #define SLIDING_WND_SIZE_M1 4 |
94 | | #elif SLIDING_WND_SIZE == 4 |
95 | | #define SLIDING_WND_SIZE_M1 3 |
96 | | #elif SLIDING_WND_SIZE == 6 |
97 | | #define SLIDING_WND_SIZE_M1 5 |
98 | | #elif SLIDING_WND_SIZE == 7 |
99 | | #define SLIDING_WND_SIZE_M1 6 |
100 | | #elif SLIDING_WND_SIZE == 8 |
101 | | #define SLIDING_WND_SIZE_M1 7 |
102 | | #endif |
103 | | |
104 | 0 | #define RNG_SIZE SLIDING_WND_SIZE |
105 | 0 | #define RNG_IDX(i) ((i + RNG_SIZE) % RNG_SIZE) |
106 | | |
107 | | TlshImpl *tlsh_impl_new() |
108 | 0 | { |
109 | 0 | TlshImpl *impl = calloc(1, sizeof(TlshImpl)); |
110 | 0 | if (!impl) |
111 | 0 | return NULL; |
112 | | |
113 | 0 | return impl; |
114 | 0 | } |
115 | | void tlsh_impl_free(TlshImpl *impl) |
116 | 0 | { |
117 | 0 | if (impl) |
118 | 0 | { |
119 | 0 | free(impl->a_bucket); |
120 | 0 | free(impl->lsh_code); |
121 | 0 | free(impl); |
122 | 0 | } |
123 | 0 | } |
124 | | |
125 | | void tlsh_impl_reset(TlshImpl *impl) |
126 | 0 | { |
127 | 0 | free(impl->a_bucket); |
128 | 0 | impl->a_bucket = NULL; |
129 | 0 | memset(impl->slide_window, 0, sizeof impl->slide_window); |
130 | 0 | free(impl->lsh_code); |
131 | 0 | impl->lsh_code = NULL; |
132 | 0 | memset(&impl->lsh_bin, 0, sizeof impl->lsh_bin); |
133 | 0 | impl->data_len = 0; |
134 | 0 | impl->lsh_code_valid = false; |
135 | 0 | } |
136 | | |
137 | | int tlsh_impl_update( |
138 | | TlshImpl *impl, |
139 | | const unsigned char *data, |
140 | | unsigned int len, |
141 | | int tlsh_option) |
142 | 0 | { |
143 | 0 | if (impl->lsh_code_valid) |
144 | 0 | { |
145 | 0 | fprintf(stderr, "call to update() on a tlsh that is already valid\n"); |
146 | 0 | return 1; |
147 | 0 | } |
148 | | |
149 | 0 | unsigned int fed_len = impl->data_len; |
150 | |
|
151 | 0 | if (impl->a_bucket == NULL) |
152 | 0 | { |
153 | 0 | impl->a_bucket = malloc( |
154 | 0 | BUCKETS * sizeof(unsigned int)); // TODO error handling |
155 | 0 | if (!impl->a_bucket) |
156 | 0 | { |
157 | 0 | return 1; |
158 | 0 | } |
159 | | |
160 | 0 | memset(impl->a_bucket, 0, sizeof(int) * BUCKETS); |
161 | 0 | } |
162 | | |
163 | 0 | #if SLIDING_WND_SIZE == 5 |
164 | 0 | if (TLSH_CHECKSUM_LEN == 1) |
165 | 0 | { |
166 | 0 | tlsh_impl_fast_update5(impl, data, len, tlsh_option); |
167 | 0 | #ifndef CHECKSUM_0B |
168 | 0 | if ((tlsh_option & TLSH_OPTION_THREADED) || |
169 | 0 | (tlsh_option & TLSH_OPTION_PRIVATE)) |
170 | 0 | { |
171 | 0 | impl->lsh_bin.checksum[0] = 0; |
172 | 0 | } |
173 | 0 | #endif |
174 | 0 | return 0; |
175 | 0 | } |
176 | 0 | #endif |
177 | 0 | int j = (int) (impl->data_len % RNG_SIZE); |
178 | |
|
179 | 0 | for (unsigned int i = 0; i < len; i++, fed_len++, j = RNG_IDX(j + 1)) |
180 | 0 | { |
181 | 0 | impl->slide_window[j] = data[i]; |
182 | |
|
183 | 0 | if (fed_len >= SLIDING_WND_SIZE_M1) |
184 | 0 | { |
185 | | // only calculate when input >= 5 bytes |
186 | 0 | int j_1 = RNG_IDX(j - 1); |
187 | 0 | int j_2 = RNG_IDX(j - 2); |
188 | 0 | int j_3 = RNG_IDX(j - 3); |
189 | 0 | #if SLIDING_WND_SIZE >= 5 |
190 | 0 | int j_4 = RNG_IDX(j - 4); |
191 | 0 | #endif |
192 | | #if SLIDING_WND_SIZE >= 6 |
193 | | int j_5 = RNG_IDX(j - 5); |
194 | | #endif |
195 | | #if SLIDING_WND_SIZE >= 7 |
196 | | int j_6 = RNG_IDX(j - 6); |
197 | | #endif |
198 | | #if SLIDING_WND_SIZE >= 8 |
199 | | int j_7 = RNG_IDX(j - 7); |
200 | | #endif |
201 | |
|
202 | 0 | #ifndef CHECKSUM_0B |
203 | 0 | for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) |
204 | 0 | { |
205 | 0 | if (k == 0) |
206 | 0 | { |
207 | | // b_mapping(0, ... ) |
208 | 0 | impl->lsh_bin.checksum[k] = fast_b_mapping( |
209 | 0 | 1, |
210 | 0 | impl->slide_window[j], |
211 | 0 | impl->slide_window[j_1], |
212 | 0 | impl->lsh_bin.checksum[k]); |
213 | 0 | } |
214 | 0 | else |
215 | 0 | { |
216 | | // use calculated 1 byte checksums to expand the total checksum to 3 |
217 | | // bytes |
218 | 0 | impl->lsh_bin.checksum[k] = b_mapping( |
219 | 0 | impl->lsh_bin.checksum[k - 1], |
220 | 0 | impl->slide_window[j], |
221 | 0 | impl->slide_window[j_1], |
222 | 0 | impl->lsh_bin.checksum[k]); |
223 | 0 | } |
224 | 0 | } |
225 | 0 | #endif |
226 | |
|
227 | 0 | unsigned char r; |
228 | | // b_mapping(2, ... ) |
229 | 0 | r = fast_b_mapping( |
230 | 0 | 49, |
231 | 0 | impl->slide_window[j], |
232 | 0 | impl->slide_window[j_1], |
233 | 0 | impl->slide_window[j_2]); |
234 | 0 | impl->a_bucket[r]++; |
235 | | // b_mapping(3, ... ) |
236 | 0 | r = fast_b_mapping( |
237 | 0 | 12, |
238 | 0 | impl->slide_window[j], |
239 | 0 | impl->slide_window[j_1], |
240 | 0 | impl->slide_window[j_3]); |
241 | 0 | impl->a_bucket[r]++; |
242 | | // b_mapping(5, ... ) |
243 | 0 | r = fast_b_mapping( |
244 | 0 | 178, |
245 | 0 | impl->slide_window[j], |
246 | 0 | impl->slide_window[j_2], |
247 | 0 | impl->slide_window[j_3]); |
248 | 0 | impl->a_bucket[r]++; |
249 | 0 | #if SLIDING_WND_SIZE >= 5 |
250 | | // b_mapping(7, ... ) |
251 | 0 | r = fast_b_mapping( |
252 | 0 | 166, |
253 | 0 | impl->slide_window[j], |
254 | 0 | impl->slide_window[j_2], |
255 | 0 | impl->slide_window[j_4]); |
256 | 0 | impl->a_bucket[r]++; |
257 | | // b_mapping(11, ... ) |
258 | 0 | r = fast_b_mapping( |
259 | 0 | 84, |
260 | 0 | impl->slide_window[j], |
261 | 0 | impl->slide_window[j_1], |
262 | 0 | impl->slide_window[j_4]); |
263 | 0 | impl->a_bucket[r]++; |
264 | | // b_mapping(13, ... ) |
265 | 0 | r = fast_b_mapping( |
266 | 0 | 230, |
267 | 0 | impl->slide_window[j], |
268 | 0 | impl->slide_window[j_3], |
269 | 0 | impl->slide_window[j_4]); |
270 | 0 | impl->a_bucket[r]++; |
271 | 0 | #endif |
272 | | #if SLIDING_WND_SIZE >= 6 |
273 | | // b_mapping(17, ... ) |
274 | | r = fast_b_mapping( |
275 | | 197, |
276 | | this->slide_window[j], |
277 | | this->slide_window[j_1], |
278 | | this->slide_window[j_5]); |
279 | | this->a_bucket[r]++; |
280 | | // b_mapping(19, ... ) |
281 | | r = fast_b_mapping( |
282 | | 181, |
283 | | this->slide_window[j], |
284 | | this->slide_window[j_2], |
285 | | this->slide_window[j_5]); |
286 | | this->a_bucket[r]++; |
287 | | // b_mapping(23, ... ) |
288 | | r = fast_b_mapping( |
289 | | 80, |
290 | | this->slide_window[j], |
291 | | this->slide_window[j_3], |
292 | | this->slide_window[j_5]); |
293 | | this->a_bucket[r]++; |
294 | | // b_mapping(29, ... ) |
295 | | r = fast_b_mapping( |
296 | | 142, |
297 | | this->slide_window[j], |
298 | | this->slide_window[j_4], |
299 | | this->slide_window[j_5]); |
300 | | this->a_bucket[r]++; |
301 | | #endif |
302 | | #if SLIDING_WND_SIZE >= 7 |
303 | | // b_mapping(31, ... ) |
304 | | r = fast_b_mapping( |
305 | | 200, |
306 | | this->slide_window[j], |
307 | | this->slide_window[j_1], |
308 | | this->slide_window[j_6]); |
309 | | this->a_bucket[r]++; |
310 | | // b_mapping(37, ... ) |
311 | | r = fast_b_mapping( |
312 | | 253, |
313 | | this->slide_window[j], |
314 | | this->slide_window[j_2], |
315 | | this->slide_window[j_6]); |
316 | | this->a_bucket[r]++; |
317 | | // b_mapping(41, ... ) |
318 | | r = fast_b_mapping( |
319 | | 101, |
320 | | this->slide_window[j], |
321 | | this->slide_window[j_3], |
322 | | this->slide_window[j_6]); |
323 | | this->a_bucket[r]++; |
324 | | // b_mapping(43, ... ) |
325 | | r = fast_b_mapping( |
326 | | 18, |
327 | | this->slide_window[j], |
328 | | this->slide_window[j_4], |
329 | | this->slide_window[j_6]); |
330 | | this->a_bucket[r]++; |
331 | | // b_mapping(47, ... ) |
332 | | r = fast_b_mapping( |
333 | | 222, |
334 | | this->slide_window[j], |
335 | | this->slide_window[j_5], |
336 | | this->slide_window[j_6]); |
337 | | this->a_bucket[r]++; |
338 | | #endif |
339 | | #if SLIDING_WND_SIZE >= 8 |
340 | | // b_mapping(53, ... ) |
341 | | r = fast_b_mapping( |
342 | | 237, |
343 | | this->slide_window[j], |
344 | | this->slide_window[j_1], |
345 | | this->slide_window[j_7]); |
346 | | this->a_bucket[r]++; |
347 | | // b_mapping(59, ... ) |
348 | | r = fast_b_mapping( |
349 | | 214, |
350 | | this->slide_window[j], |
351 | | this->slide_window[j_2], |
352 | | this->slide_window[j_7]); |
353 | | this->a_bucket[r]++; |
354 | | // b_mapping(61, ... ) |
355 | | r = fast_b_mapping( |
356 | | 227, |
357 | | this->slide_window[j], |
358 | | this->slide_window[j_3], |
359 | | this->slide_window[j_7]); |
360 | | this->a_bucket[r]++; |
361 | | // b_mapping(67, ... ) |
362 | | r = fast_b_mapping( |
363 | | 22, |
364 | | this->slide_window[j], |
365 | | this->slide_window[j_4], |
366 | | this->slide_window[j_7]); |
367 | | this->a_bucket[r]++; |
368 | | // b_mapping(71, ... ) |
369 | | r = fast_b_mapping( |
370 | | 175, |
371 | | this->slide_window[j], |
372 | | this->slide_window[j_5], |
373 | | this->slide_window[j_7]); |
374 | | this->a_bucket[r]++; |
375 | | // b_mapping(73, ... ) |
376 | | r = fast_b_mapping( |
377 | | 5, |
378 | | this->slide_window[j], |
379 | | this->slide_window[j_6], |
380 | | this->slide_window[j_7]); |
381 | | this->a_bucket[r]++; |
382 | | #endif |
383 | 0 | } |
384 | 0 | } |
385 | 0 | impl->data_len += len; |
386 | 0 | #ifndef CHECKSUM_0B |
387 | 0 | if ((tlsh_option & TLSH_OPTION_THREADED) || |
388 | 0 | (tlsh_option & TLSH_OPTION_PRIVATE)) |
389 | 0 | { |
390 | 0 | for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) |
391 | 0 | { |
392 | 0 | impl->lsh_bin.checksum[k] = 0; |
393 | 0 | } |
394 | 0 | } |
395 | 0 | #endif |
396 | |
|
397 | 0 | return 0; |
398 | 0 | } |
399 | | |
400 | | ///////////////////////////////////////////////////////////////////////////// |
401 | | // update for the case when SLIDING_WND_SIZE==5 |
402 | | // have different optimized functions for |
403 | | // default TLSH |
404 | | // threaded TLSH |
405 | | // private TLSH |
406 | | ///////////////////////////////////////////////////////////////////////////// |
407 | | static void raw_fast_update5( |
408 | | // inputs |
409 | | const unsigned char *data, |
410 | | unsigned int len, |
411 | | unsigned int fed_len, |
412 | | // outputs |
413 | | unsigned int *a_bucket, |
414 | | unsigned char *ret_checksum, |
415 | | unsigned char *slide_window); |
416 | | |
417 | | static void raw_fast_update5_private( |
418 | | // inputs |
419 | | const unsigned char *data, |
420 | | unsigned int len, |
421 | | unsigned int fed_len, |
422 | | // outputs |
423 | | unsigned int *a_bucket, |
424 | | unsigned char *slide_window); |
425 | | |
426 | | static void tlsh_impl_fast_update5( |
427 | | TlshImpl *impl, |
428 | | const unsigned char *data, |
429 | | unsigned int len, |
430 | | int tlsh_option) |
431 | 0 | { |
432 | 0 | if (tlsh_option & TLSH_OPTION_PRIVATE) |
433 | 0 | { |
434 | 0 | raw_fast_update5_private( |
435 | 0 | data, len, impl->data_len, impl->a_bucket, impl->slide_window); |
436 | 0 | impl->data_len += len; |
437 | 0 | impl->lsh_bin.checksum[0] = 0; |
438 | 0 | } |
439 | 0 | else |
440 | 0 | { |
441 | 0 | raw_fast_update5( |
442 | 0 | data, |
443 | 0 | len, |
444 | 0 | impl->data_len, |
445 | 0 | impl->a_bucket, |
446 | 0 | &(impl->lsh_bin.checksum[0]), |
447 | 0 | impl->slide_window); |
448 | 0 | impl->data_len += len; |
449 | 0 | } |
450 | 0 | } |
451 | | |
452 | | static void raw_fast_update5( |
453 | | // inputs |
454 | | const unsigned char *data, |
455 | | unsigned int len, |
456 | | unsigned int fed_len, |
457 | | // outputs |
458 | | unsigned int *a_bucket, |
459 | | unsigned char *ret_checksum, |
460 | | unsigned char *slide_window) |
461 | 0 | { |
462 | 0 | int j = (int) (fed_len % RNG_SIZE); |
463 | 0 | unsigned char checksum = *ret_checksum; |
464 | |
|
465 | 0 | unsigned int start_i = 0; |
466 | 0 | if (fed_len < SLIDING_WND_SIZE_M1) |
467 | 0 | { |
468 | 0 | int extra = SLIDING_WND_SIZE_M1 - fed_len; |
469 | 0 | start_i = extra; |
470 | 0 | j = (j + extra) % RNG_SIZE; |
471 | 0 | } |
472 | 0 | for (unsigned int i = start_i; i < len;) |
473 | 0 | { |
474 | | // only calculate when input >= 5 bytes |
475 | 0 | if ((i >= 4) && (i + 5 < len)) |
476 | 0 | { |
477 | 0 | unsigned char a0 = data[i - 4]; |
478 | 0 | unsigned char a1 = data[i - 3]; |
479 | 0 | unsigned char a2 = data[i - 2]; |
480 | 0 | unsigned char a3 = data[i - 1]; |
481 | 0 | unsigned char a4 = data[i]; |
482 | 0 | unsigned char a5 = data[i + 1]; |
483 | 0 | unsigned char a6 = data[i + 2]; |
484 | 0 | unsigned char a7 = data[i + 3]; |
485 | 0 | unsigned char a8 = data[i + 4]; |
486 | |
|
487 | 0 | checksum = fast_b_mapping(1, a4, a3, checksum); |
488 | 0 | a_bucket[fast_b_mapping(49, a4, a3, a2)]++; |
489 | 0 | a_bucket[fast_b_mapping(12, a4, a3, a1)]++; |
490 | 0 | a_bucket[fast_b_mapping(178, a4, a2, a1)]++; |
491 | 0 | a_bucket[fast_b_mapping(166, a4, a2, a0)]++; |
492 | 0 | a_bucket[fast_b_mapping(84, a4, a3, a0)]++; |
493 | 0 | a_bucket[fast_b_mapping(230, a4, a1, a0)]++; |
494 | |
|
495 | 0 | checksum = fast_b_mapping(1, a5, a4, checksum); |
496 | 0 | a_bucket[fast_b_mapping(49, a5, a4, a3)]++; |
497 | 0 | a_bucket[fast_b_mapping(12, a5, a4, a2)]++; |
498 | 0 | a_bucket[fast_b_mapping(178, a5, a3, a2)]++; |
499 | 0 | a_bucket[fast_b_mapping(166, a5, a3, a1)]++; |
500 | 0 | a_bucket[fast_b_mapping(84, a5, a4, a1)]++; |
501 | 0 | a_bucket[fast_b_mapping(230, a5, a2, a1)]++; |
502 | |
|
503 | 0 | checksum = fast_b_mapping(1, a6, a5, checksum); |
504 | 0 | a_bucket[fast_b_mapping(49, a6, a5, a4)]++; |
505 | 0 | a_bucket[fast_b_mapping(12, a6, a5, a3)]++; |
506 | 0 | a_bucket[fast_b_mapping(178, a6, a4, a3)]++; |
507 | 0 | a_bucket[fast_b_mapping(166, a6, a4, a2)]++; |
508 | 0 | a_bucket[fast_b_mapping(84, a6, a5, a2)]++; |
509 | 0 | a_bucket[fast_b_mapping(230, a6, a3, a2)]++; |
510 | |
|
511 | 0 | checksum = fast_b_mapping(1, a7, a6, checksum); |
512 | 0 | a_bucket[fast_b_mapping(49, a7, a6, a5)]++; |
513 | 0 | a_bucket[fast_b_mapping(12, a7, a6, a4)]++; |
514 | 0 | a_bucket[fast_b_mapping(178, a7, a5, a4)]++; |
515 | 0 | a_bucket[fast_b_mapping(166, a7, a5, a3)]++; |
516 | 0 | a_bucket[fast_b_mapping(84, a7, a6, a3)]++; |
517 | 0 | a_bucket[fast_b_mapping(230, a7, a4, a3)]++; |
518 | |
|
519 | 0 | checksum = fast_b_mapping(1, a8, a7, checksum); |
520 | 0 | a_bucket[fast_b_mapping(49, a8, a7, a6)]++; |
521 | 0 | a_bucket[fast_b_mapping(12, a8, a7, a5)]++; |
522 | 0 | a_bucket[fast_b_mapping(178, a8, a6, a5)]++; |
523 | 0 | a_bucket[fast_b_mapping(166, a8, a6, a4)]++; |
524 | 0 | a_bucket[fast_b_mapping(84, a8, a7, a4)]++; |
525 | 0 | a_bucket[fast_b_mapping(230, a8, a5, a4)]++; |
526 | |
|
527 | 0 | i = i + 5; |
528 | 0 | j = RNG_IDX(j + 5); |
529 | 0 | } |
530 | 0 | else |
531 | 0 | { |
532 | 0 | slide_window[j] = data[i]; |
533 | 0 | int j_1 = RNG_IDX(j - 1); |
534 | 0 | if (i >= 1) |
535 | 0 | { |
536 | 0 | slide_window[j_1] = data[i - 1]; |
537 | 0 | } |
538 | 0 | int j_2 = RNG_IDX(j - 2); |
539 | 0 | if (i >= 2) |
540 | 0 | { |
541 | 0 | slide_window[j_2] = data[i - 2]; |
542 | 0 | } |
543 | 0 | int j_3 = RNG_IDX(j - 3); |
544 | 0 | if (i >= 3) |
545 | 0 | { |
546 | 0 | slide_window[j_3] = data[i - 3]; |
547 | 0 | } |
548 | 0 | int j_4 = RNG_IDX(j - 4); |
549 | 0 | if (i >= 4) |
550 | 0 | { |
551 | 0 | slide_window[j_4] = data[i - 4]; |
552 | 0 | } |
553 | |
|
554 | 0 | checksum = fast_b_mapping( |
555 | 0 | 1, slide_window[j], slide_window[j_1], checksum); |
556 | 0 | a_bucket[fast_b_mapping( |
557 | 0 | 49, slide_window[j], slide_window[j_1], slide_window[j_2])]++; |
558 | 0 | a_bucket[fast_b_mapping( |
559 | 0 | 12, slide_window[j], slide_window[j_1], slide_window[j_3])]++; |
560 | 0 | a_bucket[fast_b_mapping( |
561 | 0 | 178, slide_window[j], slide_window[j_2], slide_window[j_3])]++; |
562 | 0 | a_bucket[fast_b_mapping( |
563 | 0 | 166, slide_window[j], slide_window[j_2], slide_window[j_4])]++; |
564 | 0 | a_bucket[fast_b_mapping( |
565 | 0 | 84, slide_window[j], slide_window[j_1], slide_window[j_4])]++; |
566 | 0 | a_bucket[fast_b_mapping( |
567 | 0 | 230, slide_window[j], slide_window[j_3], slide_window[j_4])]++; |
568 | 0 | i++; |
569 | 0 | j = RNG_IDX(j + 1); |
570 | 0 | } |
571 | 0 | } |
572 | 0 | *ret_checksum = checksum; |
573 | 0 | } |
574 | | |
575 | | static void raw_fast_update5_private( |
576 | | // inputs |
577 | | const unsigned char *data, |
578 | | unsigned int len, |
579 | | unsigned int fed_len, |
580 | | // outputs |
581 | | unsigned int *a_bucket, |
582 | | unsigned char *slide_window) |
583 | 0 | { |
584 | 0 | int j = (int) (fed_len % RNG_SIZE); |
585 | |
|
586 | 0 | unsigned int start_i = 0; |
587 | 0 | if (fed_len < SLIDING_WND_SIZE_M1) |
588 | 0 | { |
589 | 0 | int extra = SLIDING_WND_SIZE_M1 - fed_len; |
590 | 0 | start_i = extra; |
591 | 0 | j = (j + extra) % RNG_SIZE; |
592 | 0 | } |
593 | 0 | for (unsigned int i = start_i; i < len;) |
594 | 0 | { |
595 | | // only calculate when input >= 5 bytes |
596 | 0 | if ((i >= 4) && (i + 5 < len)) |
597 | 0 | { |
598 | 0 | unsigned char a0 = data[i - 4]; |
599 | 0 | unsigned char a1 = data[i - 3]; |
600 | 0 | unsigned char a2 = data[i - 2]; |
601 | 0 | unsigned char a3 = data[i - 1]; |
602 | 0 | unsigned char a4 = data[i]; |
603 | 0 | unsigned char a5 = data[i + 1]; |
604 | 0 | unsigned char a6 = data[i + 2]; |
605 | 0 | unsigned char a7 = data[i + 3]; |
606 | 0 | unsigned char a8 = data[i + 4]; |
607 | |
|
608 | 0 | a_bucket[fast_b_mapping(49, a4, a3, a2)]++; |
609 | 0 | a_bucket[fast_b_mapping(12, a4, a3, a1)]++; |
610 | 0 | a_bucket[fast_b_mapping(178, a4, a2, a1)]++; |
611 | 0 | a_bucket[fast_b_mapping(166, a4, a2, a0)]++; |
612 | 0 | a_bucket[fast_b_mapping(84, a4, a3, a0)]++; |
613 | 0 | a_bucket[fast_b_mapping(230, a4, a1, a0)]++; |
614 | |
|
615 | 0 | a_bucket[fast_b_mapping(49, a5, a4, a3)]++; |
616 | 0 | a_bucket[fast_b_mapping(12, a5, a4, a2)]++; |
617 | 0 | a_bucket[fast_b_mapping(178, a5, a3, a2)]++; |
618 | 0 | a_bucket[fast_b_mapping(166, a5, a3, a1)]++; |
619 | 0 | a_bucket[fast_b_mapping(84, a5, a4, a1)]++; |
620 | 0 | a_bucket[fast_b_mapping(230, a5, a2, a1)]++; |
621 | |
|
622 | 0 | a_bucket[fast_b_mapping(49, a6, a5, a4)]++; |
623 | 0 | a_bucket[fast_b_mapping(12, a6, a5, a3)]++; |
624 | 0 | a_bucket[fast_b_mapping(178, a6, a4, a3)]++; |
625 | 0 | a_bucket[fast_b_mapping(166, a6, a4, a2)]++; |
626 | 0 | a_bucket[fast_b_mapping(84, a6, a5, a2)]++; |
627 | 0 | a_bucket[fast_b_mapping(230, a6, a3, a2)]++; |
628 | |
|
629 | 0 | a_bucket[fast_b_mapping(49, a7, a6, a5)]++; |
630 | 0 | a_bucket[fast_b_mapping(12, a7, a6, a4)]++; |
631 | 0 | a_bucket[fast_b_mapping(178, a7, a5, a4)]++; |
632 | 0 | a_bucket[fast_b_mapping(166, a7, a5, a3)]++; |
633 | 0 | a_bucket[fast_b_mapping(84, a7, a6, a3)]++; |
634 | 0 | a_bucket[fast_b_mapping(230, a7, a4, a3)]++; |
635 | |
|
636 | 0 | a_bucket[fast_b_mapping(49, a8, a7, a6)]++; |
637 | 0 | a_bucket[fast_b_mapping(12, a8, a7, a5)]++; |
638 | 0 | a_bucket[fast_b_mapping(178, a8, a6, a5)]++; |
639 | 0 | a_bucket[fast_b_mapping(166, a8, a6, a4)]++; |
640 | 0 | a_bucket[fast_b_mapping(84, a8, a7, a4)]++; |
641 | 0 | a_bucket[fast_b_mapping(230, a8, a5, a4)]++; |
642 | |
|
643 | 0 | i = i + 5; |
644 | 0 | j = RNG_IDX(j + 5); |
645 | 0 | } |
646 | 0 | else |
647 | 0 | { |
648 | 0 | slide_window[j] = data[i]; |
649 | 0 | int j_1 = RNG_IDX(j - 1); |
650 | 0 | if (i >= 1) |
651 | 0 | { |
652 | 0 | slide_window[j_1] = data[i - 1]; |
653 | 0 | } |
654 | 0 | int j_2 = RNG_IDX(j - 2); |
655 | 0 | if (i >= 2) |
656 | 0 | { |
657 | 0 | slide_window[j_2] = data[i - 2]; |
658 | 0 | } |
659 | 0 | int j_3 = RNG_IDX(j - 3); |
660 | 0 | if (i >= 3) |
661 | 0 | { |
662 | 0 | slide_window[j_3] = data[i - 3]; |
663 | 0 | } |
664 | 0 | int j_4 = RNG_IDX(j - 4); |
665 | 0 | if (i >= 4) |
666 | 0 | { |
667 | 0 | slide_window[j_4] = data[i - 4]; |
668 | 0 | } |
669 | |
|
670 | 0 | a_bucket[fast_b_mapping( |
671 | 0 | 49, slide_window[j], slide_window[j_1], slide_window[j_2])]++; |
672 | 0 | a_bucket[fast_b_mapping( |
673 | 0 | 12, slide_window[j], slide_window[j_1], slide_window[j_3])]++; |
674 | 0 | a_bucket[fast_b_mapping( |
675 | 0 | 178, slide_window[j], slide_window[j_2], slide_window[j_3])]++; |
676 | 0 | a_bucket[fast_b_mapping( |
677 | 0 | 166, slide_window[j], slide_window[j_2], slide_window[j_4])]++; |
678 | 0 | a_bucket[fast_b_mapping( |
679 | 0 | 84, slide_window[j], slide_window[j_1], slide_window[j_4])]++; |
680 | 0 | a_bucket[fast_b_mapping( |
681 | 0 | 230, slide_window[j], slide_window[j_3], slide_window[j_4])]++; |
682 | 0 | i++; |
683 | 0 | j = RNG_IDX(j + 1); |
684 | 0 | } |
685 | 0 | } |
686 | 0 | } |
687 | | |
688 | | ///////////////////////////////////////////////////////////////////////////// |
689 | | // fc_cons_option - a bitfield |
690 | | // 0 default |
691 | | // 1 force (now the default) |
692 | | // 2 conservative |
693 | | // 4 do not delete a_bucket |
694 | | ///////////////////////////////////////////////////////////////////////////// |
695 | | |
696 | | /* to signal the class there is no more data to be added */ |
697 | | void tlsh_impl_final(TlshImpl *this, int fc_cons_option) |
698 | 0 | { |
699 | 0 | if (this->lsh_code_valid) |
700 | 0 | { |
701 | 0 | fprintf(stderr, "call to final() on a tlsh that is already valid\n"); |
702 | 0 | return; |
703 | 0 | } |
704 | | // incoming data must more than or equal to MIN_DATA_LENGTH bytes |
705 | 0 | if (((fc_cons_option & TLSH_OPTION_CONSERVATIVE) == 0) && |
706 | 0 | (this->data_len < MIN_DATA_LENGTH)) |
707 | 0 | { |
708 | | // this->lsh_code be empty |
709 | 0 | free(this->a_bucket); |
710 | 0 | this->a_bucket = NULL; |
711 | 0 | return; |
712 | 0 | } |
713 | 0 | if ((fc_cons_option & TLSH_OPTION_CONSERVATIVE) && |
714 | 0 | (this->data_len < MIN_CONSERVATIVE_DATA_LENGTH)) |
715 | 0 | { |
716 | | // this->lsh_code be empty |
717 | 0 | free(this->a_bucket); |
718 | 0 | this->a_bucket = NULL; |
719 | 0 | return; |
720 | 0 | } |
721 | | |
722 | 0 | unsigned int q1, q2, q3; |
723 | 0 | find_quartile(&q1, &q2, &q3, this->a_bucket); |
724 | | |
725 | | // issue #79 - divide by 0 if q3 == 0 |
726 | 0 | if (q3 == 0) |
727 | 0 | { |
728 | 0 | free(this->a_bucket); |
729 | 0 | this->a_bucket = NULL; |
730 | 0 | return; |
731 | 0 | } |
732 | | |
733 | | // buckets must be more than 50% non-zero |
734 | 0 | int nonzero = 0; |
735 | 0 | for (unsigned int i = 0; i < CODE_SIZE; i++) |
736 | 0 | { |
737 | 0 | for (unsigned int j = 0; j < 4; j++) |
738 | 0 | { |
739 | 0 | if (this->a_bucket[4 * i + j] > 0) |
740 | 0 | { |
741 | 0 | nonzero++; |
742 | 0 | } |
743 | 0 | } |
744 | 0 | } |
745 | | #if defined BUCKETS_48 |
746 | | if (nonzero < 18) |
747 | | { |
748 | | // printf("nonzero=%d\n", nonzero); |
749 | | delete[] this->a_bucket; |
750 | | this->a_bucket = NULL; |
751 | | return; |
752 | | } |
753 | | #else |
754 | 0 | if (nonzero <= 4 * CODE_SIZE / 2) |
755 | 0 | { |
756 | 0 | free(this->a_bucket); |
757 | 0 | this->a_bucket = NULL; |
758 | 0 | return; |
759 | 0 | } |
760 | 0 | #endif |
761 | | |
762 | 0 | for (unsigned int i = 0; i < CODE_SIZE; i++) |
763 | 0 | { |
764 | 0 | unsigned char h = 0; |
765 | 0 | for (unsigned int j = 0; j < 4; j++) |
766 | 0 | { |
767 | 0 | unsigned int k = this->a_bucket[4 * i + j]; |
768 | 0 | if (q3 < k) |
769 | 0 | { |
770 | 0 | h += 3 << (j * 2); // leave the optimization j*2 = j<<1 or j*2 = j+j |
771 | | // for compiler |
772 | 0 | } |
773 | 0 | else if (q2 < k) |
774 | 0 | { |
775 | 0 | h += 2 << (j * 2); |
776 | 0 | } |
777 | 0 | else if (q1 < k) |
778 | 0 | { |
779 | 0 | h += 1 << (j * 2); |
780 | 0 | } |
781 | 0 | } |
782 | 0 | this->lsh_bin.tmp_code[i] = h; |
783 | 0 | } |
784 | |
|
785 | 0 | if ((fc_cons_option & TLSH_OPTION_KEEP_BUCKET) == 0) |
786 | 0 | { |
787 | | // Done with a_bucket so deallocate |
788 | 0 | free(this->a_bucket); |
789 | 0 | this->a_bucket = NULL; |
790 | 0 | } |
791 | |
|
792 | 0 | this->lsh_bin.lvalue = l_capturing(this->data_len); |
793 | 0 | this->lsh_bin.Q.QR.q1ratio = |
794 | 0 | (unsigned int) ((float) (q1 * 100) / (float) q3) % 16; |
795 | 0 | this->lsh_bin.Q.QR.q2ratio = |
796 | 0 | (unsigned int) ((float) (q2 * 100) / (float) q3) % 16; |
797 | 0 | this->lsh_code_valid = true; |
798 | 0 | } |
799 | | |
800 | | int tlsh_impl_from_tlsh_str(TlshImpl *impl, const char *str) |
801 | 0 | { |
802 | | // Assume that we have 128 Buckets |
803 | 0 | int start = 0; |
804 | 0 | if (strncmp(str, "T1", 2) == 0) |
805 | 0 | { |
806 | 0 | start = 2; |
807 | 0 | } |
808 | 0 | else |
809 | 0 | { |
810 | 0 | start = 0; |
811 | 0 | } |
812 | | // Validate input string |
813 | 0 | for (int ii = 0; ii < INTERNAL_TLSH_STRING_LEN; ii++) |
814 | 0 | { |
815 | 0 | int i = ii + start; |
816 | 0 | if (!((str[i] >= '0' && str[i] <= '9') || |
817 | 0 | (str[i] >= 'A' && str[i] <= 'F') || (str[i] >= 'a' && str[i] <= 'f'))) |
818 | 0 | { |
819 | | // printf("warning ii=%d str[%d]='%c'\n", ii, i, str[i]); |
820 | 0 | return 1; |
821 | 0 | } |
822 | 0 | } |
823 | 0 | int xi = INTERNAL_TLSH_STRING_LEN + start; |
824 | 0 | if (((str[xi] >= '0' && str[xi] <= '9') || |
825 | 0 | (str[xi] >= 'A' && str[xi] <= 'F') || |
826 | 0 | (str[xi] >= 'a' && str[xi] <= 'f'))) |
827 | 0 | { |
828 | | // printf("warning xi=%d\n", xi); |
829 | 0 | return 1; |
830 | 0 | } |
831 | | |
832 | 0 | tlsh_impl_reset(impl); |
833 | |
|
834 | 0 | LshBinStruct tmp; |
835 | 0 | from_hex(&str[start], INTERNAL_TLSH_STRING_LEN, (unsigned char *) &tmp); |
836 | | |
837 | | // Reconstruct checksum, Qrations & lvalue |
838 | 0 | for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) |
839 | 0 | { |
840 | 0 | impl->lsh_bin.checksum[k] = swap_byte(tmp.checksum[k]); |
841 | 0 | } |
842 | 0 | impl->lsh_bin.lvalue = swap_byte(tmp.lvalue); |
843 | 0 | impl->lsh_bin.Q.qb = swap_byte(tmp.Q.qb); |
844 | |
|
845 | 0 | for (int i = 0; i < CODE_SIZE; i++) |
846 | 0 | { |
847 | 0 | impl->lsh_bin.tmp_code[i] = (tmp.tmp_code[CODE_SIZE - 1 - i]); |
848 | 0 | } |
849 | 0 | impl->lsh_code_valid = true; |
850 | |
|
851 | 0 | return 0; |
852 | 0 | } |
853 | | |
854 | | const char *hash2( |
855 | | TlshImpl *impl, |
856 | | char *buffer, |
857 | | unsigned int bufSize, |
858 | | bool showvers) |
859 | 0 | { |
860 | 0 | if (bufSize < TLSH_STRING_LEN_REQ + 1) |
861 | 0 | { |
862 | 0 | strncpy(buffer, "", bufSize); |
863 | 0 | return buffer; |
864 | 0 | } |
865 | 0 | if (impl->lsh_code_valid == false) |
866 | 0 | { |
867 | 0 | strncpy(buffer, "", bufSize); |
868 | 0 | return buffer; |
869 | 0 | } |
870 | | |
871 | 0 | LshBinStruct tmp; |
872 | 0 | for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) |
873 | 0 | { |
874 | 0 | tmp.checksum[k] = swap_byte(impl->lsh_bin.checksum[k]); |
875 | 0 | } |
876 | 0 | tmp.lvalue = swap_byte(impl->lsh_bin.lvalue); |
877 | 0 | tmp.Q.qb = swap_byte(impl->lsh_bin.Q.qb); |
878 | |
|
879 | 0 | for (int i = 0; i < CODE_SIZE; i++) |
880 | 0 | { |
881 | 0 | tmp.tmp_code[i] = (impl->lsh_bin.tmp_code[CODE_SIZE - 1 - i]); |
882 | 0 | } |
883 | |
|
884 | 0 | if (showvers) |
885 | 0 | { |
886 | 0 | buffer[0] = 'T'; |
887 | 0 | buffer[1] = '0' + showvers; |
888 | 0 | to_hex((unsigned char *) &tmp, sizeof(tmp), &buffer[2]); |
889 | 0 | } |
890 | 0 | else |
891 | 0 | { |
892 | 0 | to_hex((unsigned char *) &tmp, sizeof(tmp), buffer); |
893 | 0 | } |
894 | 0 | return buffer; |
895 | 0 | } |
896 | | |
897 | | /* to get the hex-encoded hash code */ |
898 | | const char *tlsh_impl_hash(TlshImpl *impl, bool showvers) |
899 | 0 | { |
900 | 0 | if (impl->lsh_code != NULL) |
901 | 0 | { |
902 | | // lsh_code has been previously calculated, so just return it |
903 | 0 | return impl->lsh_code; |
904 | 0 | } |
905 | | |
906 | 0 | impl->lsh_code = (char *) malloc(TLSH_STRING_LEN_REQ + 1); |
907 | 0 | if (!impl->lsh_code) |
908 | 0 | { |
909 | 0 | return NULL; |
910 | 0 | } |
911 | | |
912 | 0 | memset(impl->lsh_code, 0, TLSH_STRING_LEN_REQ + 1); |
913 | |
|
914 | 0 | return hash2(impl, impl->lsh_code, TLSH_STRING_LEN_REQ + 1, showvers); |
915 | 0 | } |
916 | | |
917 | | int tlsh_impl_compare(TlshImpl *this, TlshImpl *other) |
918 | 0 | { |
919 | 0 | return (memcmp(&(this->lsh_bin), &(other->lsh_bin), sizeof(this->lsh_bin))); |
920 | 0 | } |
921 | | |
922 | | //////////////////////////////////////////// |
923 | | // the default for these parameters is 12 |
924 | | //////////////////////////////////////////// |
925 | | |
926 | | static int length_mult = 12; |
927 | | static int qratio_mult = 12; |
928 | | |
929 | | #ifdef TLSH_DISTANCE_PARAMETERS |
930 | | |
931 | | int hist_diff1_add = 1; |
932 | | int hist_diff2_add = 2; |
933 | | int hist_diff3_add = 6; |
934 | | |
935 | | void set_tlsh_distance_parameters( |
936 | | int length_mult_value, |
937 | | int qratio_mult_value, |
938 | | int hist_diff1_add_value, |
939 | | int hist_diff2_add_value, |
940 | | int hist_diff3_add_value) |
941 | | { |
942 | | if (length_mult_value != -1) |
943 | | { |
944 | | length_mult = length_mult_value; |
945 | | } |
946 | | if (qratio_mult_value != -1) |
947 | | { |
948 | | qratio_mult = qratio_mult_value; |
949 | | } |
950 | | if (hist_diff1_add_value != -1) |
951 | | { |
952 | | hist_diff1_add = hist_diff1_add_value; |
953 | | } |
954 | | if (hist_diff2_add_value != -1) |
955 | | { |
956 | | hist_diff2_add = hist_diff2_add_value; |
957 | | } |
958 | | if (hist_diff3_add_value != -1) |
959 | | { |
960 | | hist_diff3_add = hist_diff3_add_value; |
961 | | } |
962 | | } |
963 | | #endif |
964 | | |
965 | | int tlsh_impl_lvalue(TlshImpl *impl) |
966 | 0 | { |
967 | 0 | return (impl->lsh_bin.lvalue); |
968 | 0 | } |
969 | | |
970 | | int tlsh_impl_q1ratio(TlshImpl *impl) |
971 | 0 | { |
972 | 0 | return (impl->lsh_bin.Q.QR.q1ratio); |
973 | 0 | } |
974 | | |
975 | | int tlsh_impl_q2ratio(TlshImpl *impl) |
976 | 0 | { |
977 | 0 | return (impl->lsh_bin.Q.QR.q2ratio); |
978 | 0 | } |
979 | | |
980 | | int tlsh_impl_is_valid(TlshImpl *impl) |
981 | 0 | { |
982 | 0 | return (impl->lsh_code_valid); |
983 | 0 | } |
984 | | |
985 | | int tlsh_impl_checksum(TlshImpl *impl, int k) |
986 | 0 | { |
987 | 0 | if ((k >= TLSH_CHECKSUM_LEN) || (k < 0)) |
988 | 0 | { |
989 | 0 | return 0; |
990 | 0 | } |
991 | 0 | return impl->lsh_bin.checksum[k]; |
992 | 0 | } |
993 | | |
994 | | int tlsh_impl_bucket_value(TlshImpl *impl, int bucket) |
995 | 0 | { |
996 | 0 | int idx; |
997 | 0 | int elem; |
998 | 0 | unsigned char bv; |
999 | |
|
1000 | 0 | idx = (CODE_SIZE - (bucket / 4)) - 1; |
1001 | 0 | elem = bucket % 4; |
1002 | 0 | bv = impl->lsh_bin.tmp_code[idx]; |
1003 | 0 | int h1 = bv / 16; |
1004 | 0 | int h2 = bv % 16; |
1005 | 0 | int p1 = h1 / 4; |
1006 | 0 | int p2 = h1 % 4; |
1007 | 0 | int p3 = h2 / 4; |
1008 | 0 | int p4 = h2 % 4; |
1009 | 0 | if (elem == 0) |
1010 | 0 | { |
1011 | 0 | return (p1); |
1012 | 0 | } |
1013 | 0 | if (elem == 1) |
1014 | 0 | { |
1015 | 0 | return (p2); |
1016 | 0 | } |
1017 | 0 | if (elem == 2) |
1018 | 0 | { |
1019 | 0 | return (p3); |
1020 | 0 | } |
1021 | 0 | return (p4); |
1022 | 0 | } |
1023 | | |
1024 | | int tlsh_impl_histogram_count(TlshImpl *impl, int bucket) |
1025 | 0 | { |
1026 | 0 | if (impl->a_bucket == NULL) |
1027 | 0 | return (-1); |
1028 | 0 | return (impl->a_bucket[EFF_BUCKETS - 1 - bucket]); |
1029 | 0 | } |
1030 | | |
1031 | | int tlsh_impl_total_diff(TlshImpl *impl, TlshImpl *other, bool len_diff) |
1032 | 0 | { |
1033 | 0 | int diff = 0; |
1034 | |
|
1035 | 0 | if (len_diff) |
1036 | 0 | { |
1037 | 0 | int ldiff = mod_diff( |
1038 | 0 | impl->lsh_bin.lvalue, other->lsh_bin.lvalue, RANGE_LVALUE); |
1039 | 0 | if (ldiff == 0) |
1040 | 0 | diff = 0; |
1041 | 0 | else if (ldiff == 1) |
1042 | 0 | diff = 1; |
1043 | 0 | else |
1044 | 0 | diff += ldiff * length_mult; |
1045 | 0 | } |
1046 | |
|
1047 | 0 | int q1diff = mod_diff( |
1048 | 0 | impl->lsh_bin.Q.QR.q1ratio, other->lsh_bin.Q.QR.q1ratio, RANGE_QRATIO); |
1049 | 0 | if (q1diff <= 1) |
1050 | 0 | diff += q1diff; |
1051 | 0 | else |
1052 | 0 | diff += (q1diff - 1) * qratio_mult; |
1053 | |
|
1054 | 0 | int q2diff = mod_diff( |
1055 | 0 | impl->lsh_bin.Q.QR.q2ratio, other->lsh_bin.Q.QR.q2ratio, RANGE_QRATIO); |
1056 | 0 | if (q2diff <= 1) |
1057 | 0 | diff += q2diff; |
1058 | 0 | else |
1059 | 0 | diff += (q2diff - 1) * qratio_mult; |
1060 | |
|
1061 | 0 | for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) |
1062 | 0 | { |
1063 | 0 | if (impl->lsh_bin.checksum[k] != other->lsh_bin.checksum[k]) |
1064 | 0 | { |
1065 | 0 | diff++; |
1066 | 0 | break; |
1067 | 0 | } |
1068 | 0 | } |
1069 | |
|
1070 | 0 | diff += h_distance( |
1071 | 0 | CODE_SIZE, impl->lsh_bin.tmp_code, other->lsh_bin.tmp_code); |
1072 | |
|
1073 | 0 | return (diff); |
1074 | 0 | } |
1075 | | |
1076 | | #define SWAP_UINT(x, y) \ |
1077 | 0 | do \ |
1078 | 0 | { \ |
1079 | 0 | unsigned int int_tmp = (x); \ |
1080 | 0 | (x) = (y); \ |
1081 | 0 | (y) = int_tmp; \ |
1082 | 0 | } while (0) |
1083 | | |
1084 | | void find_quartile( |
1085 | | unsigned int *q1, |
1086 | | unsigned int *q2, |
1087 | | unsigned int *q3, |
1088 | | const unsigned int *a_bucket) |
1089 | 0 | { |
1090 | 0 | unsigned int bucket_copy[EFF_BUCKETS], short_cut_left[EFF_BUCKETS], |
1091 | 0 | short_cut_right[EFF_BUCKETS], spl = 0, spr = 0; |
1092 | 0 | unsigned int p1 = EFF_BUCKETS / 4 - 1; |
1093 | 0 | unsigned int p2 = EFF_BUCKETS / 2 - 1; |
1094 | 0 | unsigned int p3 = EFF_BUCKETS - EFF_BUCKETS / 4 - 1; |
1095 | 0 | unsigned int end = EFF_BUCKETS - 1; |
1096 | |
|
1097 | 0 | for (unsigned int i = 0; i <= end; i++) |
1098 | 0 | { |
1099 | 0 | bucket_copy[i] = a_bucket[i]; |
1100 | 0 | } |
1101 | |
|
1102 | 0 | for (unsigned int l = 0, r = end;;) |
1103 | 0 | { |
1104 | 0 | unsigned int ret = partition(bucket_copy, l, r); |
1105 | 0 | if (ret > p2) |
1106 | 0 | { |
1107 | 0 | r = ret - 1; |
1108 | 0 | short_cut_right[spr] = ret; |
1109 | 0 | spr++; |
1110 | 0 | } |
1111 | 0 | else if (ret < p2) |
1112 | 0 | { |
1113 | 0 | l = ret + 1; |
1114 | 0 | short_cut_left[spl] = ret; |
1115 | 0 | spl++; |
1116 | 0 | } |
1117 | 0 | else |
1118 | 0 | { |
1119 | 0 | *q2 = bucket_copy[p2]; |
1120 | 0 | break; |
1121 | 0 | } |
1122 | 0 | } |
1123 | |
|
1124 | 0 | short_cut_left[spl] = p2 - 1; |
1125 | 0 | short_cut_right[spr] = p2 + 1; |
1126 | |
|
1127 | 0 | for (unsigned int i = 0, l = 0; i <= spl; i++) |
1128 | 0 | { |
1129 | 0 | unsigned int r = short_cut_left[i]; |
1130 | 0 | if (r > p1) |
1131 | 0 | { |
1132 | 0 | for (;;) |
1133 | 0 | { |
1134 | 0 | unsigned int ret = partition(bucket_copy, l, r); |
1135 | 0 | if (ret > p1) |
1136 | 0 | { |
1137 | 0 | r = ret - 1; |
1138 | 0 | } |
1139 | 0 | else if (ret < p1) |
1140 | 0 | { |
1141 | 0 | l = ret + 1; |
1142 | 0 | } |
1143 | 0 | else |
1144 | 0 | { |
1145 | 0 | *q1 = bucket_copy[p1]; |
1146 | 0 | break; |
1147 | 0 | } |
1148 | 0 | } |
1149 | 0 | break; |
1150 | 0 | } |
1151 | 0 | else if (r < p1) |
1152 | 0 | { |
1153 | 0 | l = r; |
1154 | 0 | } |
1155 | 0 | else |
1156 | 0 | { |
1157 | 0 | *q1 = bucket_copy[p1]; |
1158 | 0 | break; |
1159 | 0 | } |
1160 | 0 | } |
1161 | |
|
1162 | 0 | for (unsigned int i = 0, r = end; i <= spr; i++) |
1163 | 0 | { |
1164 | 0 | unsigned int l = short_cut_right[i]; |
1165 | 0 | if (l < p3) |
1166 | 0 | { |
1167 | 0 | for (;;) |
1168 | 0 | { |
1169 | 0 | unsigned int ret = partition(bucket_copy, l, r); |
1170 | 0 | if (ret > p3) |
1171 | 0 | { |
1172 | 0 | r = ret - 1; |
1173 | 0 | } |
1174 | 0 | else if (ret < p3) |
1175 | 0 | { |
1176 | 0 | l = ret + 1; |
1177 | 0 | } |
1178 | 0 | else |
1179 | 0 | { |
1180 | 0 | *q3 = bucket_copy[p3]; |
1181 | 0 | break; |
1182 | 0 | } |
1183 | 0 | } |
1184 | 0 | break; |
1185 | 0 | } |
1186 | 0 | else if (l > p3) |
1187 | 0 | { |
1188 | 0 | r = l; |
1189 | 0 | } |
1190 | 0 | else |
1191 | 0 | { |
1192 | 0 | *q3 = bucket_copy[p3]; |
1193 | 0 | break; |
1194 | 0 | } |
1195 | 0 | } |
1196 | 0 | } |
1197 | | |
1198 | | unsigned int partition(unsigned int *buf, unsigned int left, unsigned int right) |
1199 | 0 | { |
1200 | 0 | if (left == right) |
1201 | 0 | { |
1202 | 0 | return left; |
1203 | 0 | } |
1204 | 0 | if (left + 1 == right) |
1205 | 0 | { |
1206 | 0 | if (buf[left] > buf[right]) |
1207 | 0 | { |
1208 | 0 | SWAP_UINT(buf[left], buf[right]); |
1209 | 0 | } |
1210 | 0 | return left; |
1211 | 0 | } |
1212 | | |
1213 | 0 | unsigned int ret = left, pivot = (left + right) >> 1; |
1214 | |
|
1215 | 0 | unsigned int val = buf[pivot]; |
1216 | |
|
1217 | 0 | buf[pivot] = buf[right]; |
1218 | 0 | buf[right] = val; |
1219 | |
|
1220 | 0 | for (unsigned int i = left; i < right; i++) |
1221 | 0 | { |
1222 | 0 | if (buf[i] < val) |
1223 | 0 | { |
1224 | 0 | SWAP_UINT(buf[ret], buf[i]); |
1225 | 0 | ret++; |
1226 | 0 | } |
1227 | 0 | } |
1228 | 0 | buf[right] = buf[ret]; |
1229 | 0 | buf[ret] = val; |
1230 | |
|
1231 | 0 | return ret; |
1232 | 0 | } |