/src/tor/src/lib/compress/compress.c
Line | Count | Source |
1 | | /* Copyright (c) 2004, Roger Dingledine. |
2 | | * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. |
3 | | * Copyright (c) 2007-2021, The Tor Project, Inc. */ |
4 | | /* See LICENSE for licensing information */ |
5 | | |
6 | | /** |
7 | | * \file compress.c |
8 | | * \brief Common compression API implementation. |
9 | | * |
10 | | * This file provides a unified interface to all the compression libraries Tor |
11 | | * knows how to use. |
12 | | **/ |
13 | | |
14 | | #include "orconfig.h" |
15 | | |
16 | | #include <stdlib.h> |
17 | | #include <stdio.h> |
18 | | #include <string.h> |
19 | | #include "lib/cc/torint.h" |
20 | | |
21 | | #ifdef HAVE_NETINET_IN_H |
22 | | #include <netinet/in.h> |
23 | | #endif |
24 | | |
25 | | #include "lib/log/log.h" |
26 | | #include "lib/log/util_bug.h" |
27 | | #include "lib/arch/bytes.h" |
28 | | #include "lib/ctime/di_ops.h" |
29 | | #include "lib/compress/compress.h" |
30 | | #include "lib/compress/compress_lzma.h" |
31 | | #include "lib/compress/compress_none.h" |
32 | | #include "lib/compress/compress_sys.h" |
33 | | #include "lib/compress/compress_zlib.h" |
34 | | #include "lib/compress/compress_zstd.h" |
35 | | #include "lib/intmath/cmp.h" |
36 | | #include "lib/malloc/malloc.h" |
37 | | #include "lib/subsys/subsys.h" |
38 | | #include "lib/thread/threads.h" |
39 | | |
40 | | /** Total number of bytes allocated for compression state overhead. */ |
41 | | static atomic_counter_t total_compress_allocation; |
42 | | |
43 | | /** @{ */ |
44 | | /* These macros define the maximum allowable compression factor. Anything of |
45 | | * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to |
46 | | * have an uncompression factor (uncompressed size:compressed size ratio) of |
47 | | * any greater than MAX_UNCOMPRESSION_FACTOR. |
48 | | * |
49 | | * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to |
50 | | * be small to limit the attack multiplier, but we also want it to be large |
51 | | * enough so that no legitimate document --even ones we might invent in the |
52 | | * future -- ever compresses by a factor of greater than |
53 | | * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably |
54 | | * large range of possible values. IMO, anything over 8 is probably safe; IMO |
55 | | * anything under 50 is probably sufficient. |
56 | | */ |
57 | 0 | #define MAX_UNCOMPRESSION_FACTOR 25 |
58 | 0 | #define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64) |
59 | | /** @} */ |
60 | | |
61 | | /** Return true if uncompressing an input of size <b>in_size</b> to an input of |
62 | | * size at least <b>size_out</b> looks like a compression bomb. */ |
63 | | MOCK_IMPL(int, |
64 | | tor_compress_is_compression_bomb,(size_t size_in, size_t size_out)) |
65 | 0 | { |
66 | 0 | if (size_in == 0 || size_out < CHECK_FOR_COMPRESSION_BOMB_AFTER) |
67 | 0 | return 0; |
68 | | |
69 | 0 | double compression_factor = (double)size_out / size_in; |
70 | 0 | if (compression_factor > MAX_UNCOMPRESSION_FACTOR) { |
71 | 0 | log_warn(LD_GENERAL, |
72 | 0 | "Detected possible compression bomb with " |
73 | 0 | "input size = %"TOR_PRIuSZ" and output size = %"TOR_PRIuSZ" " |
74 | 0 | "(compression factor = %.2f)", |
75 | 0 | size_in, size_out, compression_factor); |
76 | 0 | return 1; |
77 | 0 | } |
78 | | |
79 | 0 | return 0; |
80 | 0 | } |
81 | | |
82 | | /** Guess the size that <b>in_len</b> will be after compression or |
83 | | * decompression. */ |
84 | | static size_t |
85 | | guess_compress_size(int compress, compress_method_t method, |
86 | | compression_level_t compression_level, |
87 | | size_t in_len) |
88 | 0 | { |
89 | | // ignore these for now. |
90 | 0 | (void)compression_level; |
91 | 0 | if (method == NO_METHOD) { |
92 | | /* Guess that we'll need an extra byte, to avoid a needless realloc |
93 | | * for nul-termination */ |
94 | 0 | return (in_len < SIZE_MAX) ? in_len + 1 : in_len; |
95 | 0 | } |
96 | | |
97 | | /* Always guess a factor of 2. */ |
98 | 0 | if (compress) { |
99 | 0 | in_len /= 2; |
100 | 0 | } else { |
101 | 0 | if (in_len < SIZE_T_CEILING/2) |
102 | 0 | in_len *= 2; |
103 | 0 | } |
104 | 0 | return MAX(in_len, 1024); |
105 | 0 | } |
106 | | |
107 | | /** Internal function to implement tor_compress/tor_uncompress, depending on |
108 | | * whether <b>compress</b> is set. All arguments are as for tor_compress or |
109 | | * tor_uncompress. */ |
110 | | static int |
111 | | tor_compress_impl(int compress, |
112 | | char **out, size_t *out_len, |
113 | | const char *in, size_t in_len, |
114 | | compress_method_t method, |
115 | | compression_level_t compression_level, |
116 | | int complete_only, |
117 | | int protocol_warn_level) |
118 | 0 | { |
119 | 0 | tor_compress_state_t *stream; |
120 | 0 | int rv; |
121 | |
|
122 | 0 | stream = tor_compress_new(compress, method, compression_level); |
123 | |
|
124 | 0 | if (stream == NULL) { |
125 | 0 | log_warn(LD_GENERAL, "NULL stream while %scompressing", |
126 | 0 | compress?"":"de"); |
127 | 0 | log_debug(LD_GENERAL, "method: %d level: %d at len: %lu", |
128 | 0 | method, compression_level, (unsigned long)in_len); |
129 | 0 | return -1; |
130 | 0 | } |
131 | | |
132 | 0 | size_t in_len_orig = in_len; |
133 | 0 | size_t out_remaining, out_alloc; |
134 | 0 | char *outptr; |
135 | |
|
136 | 0 | out_remaining = out_alloc = |
137 | 0 | guess_compress_size(compress, method, compression_level, in_len); |
138 | 0 | *out = outptr = tor_malloc(out_remaining); |
139 | |
|
140 | 0 | const int finish = complete_only || compress; |
141 | |
|
142 | 0 | while (1) { |
143 | 0 | switch (tor_compress_process(stream, |
144 | 0 | &outptr, &out_remaining, |
145 | 0 | &in, &in_len, finish)) { |
146 | 0 | case TOR_COMPRESS_DONE: |
147 | 0 | if (in_len == 0 || compress) { |
148 | 0 | goto done; |
149 | 0 | } else { |
150 | | // More data is present, and we're decompressing. So we may need to |
151 | | // reinitialize the stream if we are handling multiple concatenated |
152 | | // inputs. |
153 | 0 | tor_compress_free(stream); |
154 | 0 | stream = tor_compress_new(compress, method, compression_level); |
155 | 0 | if (stream == NULL) { |
156 | 0 | log_warn(LD_GENERAL, "NULL stream while %scompressing", |
157 | 0 | compress?"":"de"); |
158 | 0 | goto err; |
159 | 0 | } |
160 | 0 | } |
161 | 0 | break; |
162 | 0 | case TOR_COMPRESS_OK: |
163 | 0 | if (compress || complete_only) { |
164 | 0 | log_fn(protocol_warn_level, LD_PROTOCOL, |
165 | 0 | "Unexpected %s while %scompressing", |
166 | 0 | complete_only?"end of input":"result", |
167 | 0 | compress?"":"de"); |
168 | 0 | log_debug(LD_GENERAL, "method: %d level: %d at len: %lu", |
169 | 0 | method, compression_level, (unsigned long)in_len); |
170 | 0 | goto err; |
171 | 0 | } else { |
172 | 0 | if (in_len == 0) { |
173 | 0 | goto done; |
174 | 0 | } |
175 | 0 | } |
176 | 0 | break; |
177 | 0 | case TOR_COMPRESS_BUFFER_FULL: { |
178 | 0 | if (!compress && outptr < *out+out_alloc) { |
179 | | // A buffer error in this case means that we have a problem |
180 | | // with our input. |
181 | 0 | log_fn(protocol_warn_level, LD_PROTOCOL, |
182 | 0 | "Possible truncated or corrupt compressed data"); |
183 | 0 | goto err; |
184 | 0 | } |
185 | 0 | if (out_alloc >= SIZE_T_CEILING / 2) { |
186 | 0 | log_warn(LD_GENERAL, "While %scompressing data: ran out of space.", |
187 | 0 | compress?"":"un"); |
188 | 0 | goto err; |
189 | 0 | } |
190 | 0 | if (!compress && |
191 | 0 | tor_compress_is_compression_bomb(in_len_orig, out_alloc)) { |
192 | | // This should already have been caught down in the backend logic. |
193 | | // LCOV_EXCL_START |
194 | 0 | tor_assert_nonfatal_unreached(); |
195 | 0 | goto err; |
196 | | // LCOV_EXCL_STOP |
197 | 0 | } |
198 | 0 | const size_t offset = outptr - *out; |
199 | 0 | out_alloc *= 2; |
200 | 0 | *out = tor_realloc(*out, out_alloc); |
201 | 0 | outptr = *out + offset; |
202 | 0 | out_remaining = out_alloc - offset; |
203 | 0 | break; |
204 | 0 | } |
205 | 0 | case TOR_COMPRESS_ERROR: |
206 | 0 | log_fn(protocol_warn_level, LD_GENERAL, |
207 | 0 | "Error while %scompressing data: bad input?", |
208 | 0 | compress?"":"un"); |
209 | 0 | goto err; // bad data. |
210 | | |
211 | | // LCOV_EXCL_START |
212 | 0 | default: |
213 | 0 | tor_assert_nonfatal_unreached(); |
214 | 0 | goto err; |
215 | | // LCOV_EXCL_STOP |
216 | 0 | } |
217 | 0 | } |
218 | 0 | done: |
219 | 0 | *out_len = outptr - *out; |
220 | 0 | if (compress && tor_compress_is_compression_bomb(*out_len, in_len_orig)) { |
221 | 0 | log_warn(LD_BUG, "We compressed something and got an insanely high " |
222 | 0 | "compression factor; other Tors would think this was a " |
223 | 0 | "compression bomb."); |
224 | 0 | goto err; |
225 | 0 | } |
226 | 0 | if (!compress) { |
227 | | // NUL-terminate our output. |
228 | 0 | if (out_alloc == *out_len) |
229 | 0 | *out = tor_realloc(*out, out_alloc + 1); |
230 | 0 | (*out)[*out_len] = '\0'; |
231 | 0 | } |
232 | 0 | rv = 0; |
233 | 0 | goto out; |
234 | | |
235 | 0 | err: |
236 | 0 | tor_free(*out); |
237 | 0 | *out_len = 0; |
238 | 0 | rv = -1; |
239 | 0 | goto out; |
240 | | |
241 | 0 | out: |
242 | 0 | tor_compress_free(stream); |
243 | 0 | return rv; |
244 | 0 | } |
245 | | |
246 | | /** Given <b>in_len</b> bytes at <b>in</b>, compress them into a newly |
247 | | * allocated buffer, using the method described in <b>method</b>. Store the |
248 | | * compressed string in *<b>out</b>, and its length in *<b>out_len</b>. |
249 | | * Return 0 on success, -1 on failure. |
250 | | */ |
251 | | int |
252 | | tor_compress(char **out, size_t *out_len, |
253 | | const char *in, size_t in_len, |
254 | | compress_method_t method) |
255 | 0 | { |
256 | 0 | return tor_compress_impl(1, out, out_len, in, in_len, method, |
257 | 0 | BEST_COMPRESSION, |
258 | 0 | 1, LOG_WARN); |
259 | 0 | } |
260 | | |
261 | | /** Given zero or more compressed strings of total length <b>in_len</b> bytes |
262 | | * at <b>in</b>, uncompress them into a newly allocated buffer, using the |
263 | | * method described in <b>method</b>. Store the uncompressed string in |
264 | | * *<b>out</b>, and its length in *<b>out_len</b>. Return 0 on success, -1 on |
265 | | * failure. |
266 | | * |
267 | | * If any bytes are written to <b>out</b>, an extra byte NUL is always |
268 | | * written at the end, but not counted in <b>out_len</b>. This is a |
269 | | * safety feature to ensure that the output can be treated as a |
270 | | * NUL-terminated string -- though of course, callers should check |
271 | | * out_len anyway. |
272 | | * |
273 | | * If <b>complete_only</b> is true, we consider a truncated input as a |
274 | | * failure; otherwise we decompress as much as we can. Warn about truncated |
275 | | * or corrupt inputs at <b>protocol_warn_level</b>. |
276 | | */ |
277 | | int |
278 | | tor_uncompress(char **out, size_t *out_len, |
279 | | const char *in, size_t in_len, |
280 | | compress_method_t method, |
281 | | int complete_only, |
282 | | int protocol_warn_level) |
283 | 0 | { |
284 | 0 | return tor_compress_impl(0, out, out_len, in, in_len, method, |
285 | 0 | BEST_COMPRESSION, |
286 | 0 | complete_only, protocol_warn_level); |
287 | 0 | } |
288 | | |
289 | | /** Try to tell whether the <b>in_len</b>-byte string in <b>in</b> is likely |
290 | | * to be compressed or not. If it is, return the likeliest compression method. |
291 | | * Otherwise, return UNKNOWN_METHOD. |
292 | | */ |
293 | | compress_method_t |
294 | | detect_compression_method(const char *in, size_t in_len) |
295 | 0 | { |
296 | 0 | if (in_len > 2 && fast_memeq(in, "\x1f\x8b", 2)) { |
297 | 0 | return GZIP_METHOD; |
298 | 0 | } else if (in_len > 2 && (in[0] & 0x0f) == 8 && |
299 | 0 | (tor_ntohs(get_uint16(in)) % 31) == 0) { |
300 | 0 | return ZLIB_METHOD; |
301 | 0 | } else if (in_len > 2 && |
302 | 0 | fast_memeq(in, "\x5d\x00\x00", 3)) { |
303 | 0 | return LZMA_METHOD; |
304 | 0 | } else if (in_len > 3 && |
305 | 0 | fast_memeq(in, "\x28\xb5\x2f\xfd", 4)) { |
306 | 0 | return ZSTD_METHOD; |
307 | 0 | } else { |
308 | 0 | return UNKNOWN_METHOD; |
309 | 0 | } |
310 | 0 | } |
311 | | |
312 | | /** Return 1 if a given <b>method</b> is supported; otherwise 0. */ |
313 | | int |
314 | | tor_compress_supports_method(compress_method_t method) |
315 | 0 | { |
316 | 0 | switch (method) { |
317 | 0 | case GZIP_METHOD: |
318 | 0 | case ZLIB_METHOD: |
319 | 0 | return tor_zlib_method_supported(); |
320 | 0 | case LZMA_METHOD: |
321 | 0 | return tor_lzma_method_supported(); |
322 | 0 | case ZSTD_METHOD: |
323 | 0 | return tor_zstd_method_supported(); |
324 | 0 | case NO_METHOD: |
325 | 0 | return 1; |
326 | 0 | case UNKNOWN_METHOD: |
327 | 0 | default: |
328 | 0 | return 0; |
329 | 0 | } |
330 | 0 | } |
331 | | |
332 | | /** |
333 | | * Return a bitmask of the supported compression types, where 1<<m is |
334 | | * set in the bitmask if and only if compression with method <b>m</b> is |
335 | | * supported. |
336 | | */ |
337 | | unsigned |
338 | | tor_compress_get_supported_method_bitmask(void) |
339 | 0 | { |
340 | 0 | static unsigned supported = 0; |
341 | 0 | if (supported == 0) { |
342 | 0 | compress_method_t m; |
343 | 0 | for (m = NO_METHOD; m <= UNKNOWN_METHOD; ++m) { |
344 | 0 | if (tor_compress_supports_method(m)) { |
345 | 0 | supported |= (1u << m); |
346 | 0 | } |
347 | 0 | } |
348 | 0 | } |
349 | 0 | return supported; |
350 | 0 | } |
351 | | |
352 | | /** Table of compression method names. These should have an "x-" prefix, |
353 | | * if they are not listed in the IANA content coding registry. */ |
354 | | static const struct { |
355 | | const char *name; |
356 | | compress_method_t method; |
357 | | } compression_method_names[] = { |
358 | | { "gzip", GZIP_METHOD }, |
359 | | { "deflate", ZLIB_METHOD }, |
360 | | // We call this "x-tor-lzma" rather than "x-lzma", because we impose a |
361 | | // lower maximum memory usage on the decoding side. |
362 | | { "x-tor-lzma", LZMA_METHOD }, |
363 | | { "x-zstd" , ZSTD_METHOD }, |
364 | | { "identity", NO_METHOD }, |
365 | | |
366 | | /* Later entries in this table are not canonical; these are recognized but |
367 | | * not emitted. */ |
368 | | { "x-gzip", GZIP_METHOD }, |
369 | | }; |
370 | | |
371 | | /** Return the canonical string representation of the compression method |
372 | | * <b>method</b>, or NULL if the method isn't recognized. */ |
373 | | const char * |
374 | | compression_method_get_name(compress_method_t method) |
375 | 0 | { |
376 | 0 | unsigned i; |
377 | 0 | for (i = 0; i < ARRAY_LENGTH(compression_method_names); ++i) { |
378 | 0 | if (method == compression_method_names[i].method) |
379 | 0 | return compression_method_names[i].name; |
380 | 0 | } |
381 | 0 | return NULL; |
382 | 0 | } |
383 | | |
384 | | /** Table of compression human readable method names. */ |
385 | | static const struct { |
386 | | compress_method_t method; |
387 | | const char *name; |
388 | | } compression_method_human_names[] = { |
389 | | { NO_METHOD, "uncompressed" }, |
390 | | { GZIP_METHOD, "gzipped" }, |
391 | | { ZLIB_METHOD, "deflated" }, |
392 | | { LZMA_METHOD, "LZMA compressed" }, |
393 | | { ZSTD_METHOD, "Zstandard compressed" }, |
394 | | { UNKNOWN_METHOD, "unknown encoding" }, |
395 | | }; |
396 | | |
397 | | /** Return a human readable string representation of the compression method |
398 | | * <b>method</b>, or NULL if the method isn't recognized. */ |
399 | | const char * |
400 | | compression_method_get_human_name(compress_method_t method) |
401 | 0 | { |
402 | 0 | unsigned i; |
403 | 0 | for (i = 0; i < ARRAY_LENGTH(compression_method_human_names); ++i) { |
404 | 0 | if (method == compression_method_human_names[i].method) |
405 | 0 | return compression_method_human_names[i].name; |
406 | 0 | } |
407 | 0 | return NULL; |
408 | 0 | } |
409 | | |
410 | | /** Return the compression method represented by the string <b>name</b>, or |
411 | | * UNKNOWN_METHOD if the string isn't recognized. */ |
412 | | compress_method_t |
413 | | compression_method_get_by_name(const char *name) |
414 | 0 | { |
415 | 0 | unsigned i; |
416 | 0 | for (i = 0; i < ARRAY_LENGTH(compression_method_names); ++i) { |
417 | 0 | if (!strcmp(compression_method_names[i].name, name)) |
418 | 0 | return compression_method_names[i].method; |
419 | 0 | } |
420 | 0 | return UNKNOWN_METHOD; |
421 | 0 | } |
422 | | |
423 | | /** Return a string representation of the version of the library providing the |
424 | | * compression method given in <b>method</b>. Returns NULL if <b>method</b> is |
425 | | * unknown or unsupported. */ |
426 | | const char * |
427 | | tor_compress_version_str(compress_method_t method) |
428 | 0 | { |
429 | 0 | switch (method) { |
430 | 0 | case GZIP_METHOD: |
431 | 0 | case ZLIB_METHOD: |
432 | 0 | return tor_zlib_get_version_str(); |
433 | 0 | case LZMA_METHOD: |
434 | 0 | return tor_lzma_get_version_str(); |
435 | 0 | case ZSTD_METHOD: |
436 | 0 | return tor_zstd_get_version_str(); |
437 | 0 | case NO_METHOD: |
438 | 0 | case UNKNOWN_METHOD: |
439 | 0 | default: |
440 | 0 | return NULL; |
441 | 0 | } |
442 | 0 | } |
443 | | |
444 | | /** Return a string representation of the version of the library, found at |
445 | | * compile time, providing the compression method given in <b>method</b>. |
446 | | * Returns NULL if <b>method</b> is unknown or unsupported. */ |
447 | | const char * |
448 | | tor_compress_header_version_str(compress_method_t method) |
449 | 0 | { |
450 | 0 | switch (method) { |
451 | 0 | case GZIP_METHOD: |
452 | 0 | case ZLIB_METHOD: |
453 | 0 | return tor_zlib_get_header_version_str(); |
454 | 0 | case LZMA_METHOD: |
455 | 0 | return tor_lzma_get_header_version_str(); |
456 | 0 | case ZSTD_METHOD: |
457 | 0 | return tor_zstd_get_header_version_str(); |
458 | 0 | case NO_METHOD: |
459 | 0 | case UNKNOWN_METHOD: |
460 | 0 | default: |
461 | 0 | return NULL; |
462 | 0 | } |
463 | 0 | } |
464 | | |
465 | | /** Return the approximate number of bytes allocated for all |
466 | | * supported compression schemas. */ |
467 | | size_t |
468 | | tor_compress_get_total_allocation(void) |
469 | 0 | { |
470 | 0 | return atomic_counter_get(&total_compress_allocation) + |
471 | 0 | tor_zlib_get_total_allocation() + |
472 | 0 | tor_lzma_get_total_allocation() + |
473 | 0 | tor_zstd_get_total_allocation(); |
474 | 0 | } |
475 | | |
476 | | /** Internal state for an incremental compression/decompression. The body of |
477 | | * this struct is not exposed. */ |
478 | | struct tor_compress_state_t { |
479 | | compress_method_t method; /**< The compression method. */ |
480 | | |
481 | | union { |
482 | | tor_zlib_compress_state_t *zlib_state; |
483 | | tor_lzma_compress_state_t *lzma_state; |
484 | | tor_zstd_compress_state_t *zstd_state; |
485 | | } u; /**< Compression backend state. */ |
486 | | }; |
487 | | |
488 | | /** Construct and return a tor_compress_state_t object using <b>method</b>. If |
489 | | * <b>compress</b>, it's for compression; otherwise it's for decompression. */ |
490 | | tor_compress_state_t * |
491 | | tor_compress_new(int compress, compress_method_t method, |
492 | | compression_level_t compression_level) |
493 | 0 | { |
494 | 0 | tor_compress_state_t *state; |
495 | |
|
496 | 0 | state = tor_malloc_zero(sizeof(tor_compress_state_t)); |
497 | 0 | state->method = method; |
498 | |
|
499 | 0 | switch (method) { |
500 | 0 | case GZIP_METHOD: |
501 | 0 | case ZLIB_METHOD: { |
502 | 0 | tor_zlib_compress_state_t *zlib_state = |
503 | 0 | tor_zlib_compress_new(compress, method, compression_level); |
504 | |
|
505 | 0 | if (zlib_state == NULL) |
506 | 0 | goto err; |
507 | | |
508 | 0 | state->u.zlib_state = zlib_state; |
509 | 0 | break; |
510 | 0 | } |
511 | 0 | case LZMA_METHOD: { |
512 | 0 | tor_lzma_compress_state_t *lzma_state = |
513 | 0 | tor_lzma_compress_new(compress, method, compression_level); |
514 | |
|
515 | 0 | if (lzma_state == NULL) |
516 | 0 | goto err; |
517 | | |
518 | 0 | state->u.lzma_state = lzma_state; |
519 | 0 | break; |
520 | 0 | } |
521 | 0 | case ZSTD_METHOD: { |
522 | 0 | tor_zstd_compress_state_t *zstd_state = |
523 | 0 | tor_zstd_compress_new(compress, method, compression_level); |
524 | |
|
525 | 0 | if (zstd_state == NULL) |
526 | 0 | goto err; |
527 | | |
528 | 0 | state->u.zstd_state = zstd_state; |
529 | 0 | break; |
530 | 0 | } |
531 | 0 | case NO_METHOD: { |
532 | 0 | break; |
533 | 0 | } |
534 | 0 | case UNKNOWN_METHOD: |
535 | 0 | goto err; |
536 | 0 | } |
537 | | |
538 | 0 | atomic_counter_add(&total_compress_allocation, |
539 | 0 | sizeof(tor_compress_state_t)); |
540 | 0 | return state; |
541 | | |
542 | 0 | err: |
543 | 0 | tor_free(state); |
544 | 0 | return NULL; |
545 | 0 | } |
546 | | |
547 | | /** Compress/decompress some bytes using <b>state</b>. Read up to |
548 | | * *<b>in_len</b> bytes from *<b>in</b>, and write up to *<b>out_len</b> bytes |
549 | | * to *<b>out</b>, adjusting the values as we go. If <b>finish</b> is true, |
550 | | * we've reached the end of the input. |
551 | | * |
552 | | * Return TOR_COMPRESS_DONE if we've finished the entire |
553 | | * compression/decompression. |
554 | | * Return TOR_COMPRESS_OK if we're processed everything from the input. |
555 | | * Return TOR_COMPRESS_BUFFER_FULL if we're out of space on <b>out</b>. |
556 | | * Return TOR_COMPRESS_ERROR if the stream is corrupt. |
557 | | */ |
558 | | tor_compress_output_t |
559 | | tor_compress_process(tor_compress_state_t *state, |
560 | | char **out, size_t *out_len, |
561 | | const char **in, size_t *in_len, |
562 | | int finish) |
563 | 0 | { |
564 | 0 | tor_assert(state != NULL); |
565 | 0 | const size_t in_len_orig = *in_len; |
566 | 0 | const size_t out_len_orig = *out_len; |
567 | 0 | tor_compress_output_t rv; |
568 | |
|
569 | 0 | if (*out_len == 0 && (*in_len > 0 || finish)) { |
570 | | // If we still have input data, but no space for output data, we might as |
571 | | // well return early and let the caller do the reallocation of the out |
572 | | // variable. |
573 | 0 | return TOR_COMPRESS_BUFFER_FULL; |
574 | 0 | } |
575 | | |
576 | 0 | switch (state->method) { |
577 | 0 | case GZIP_METHOD: |
578 | 0 | case ZLIB_METHOD: |
579 | 0 | rv = tor_zlib_compress_process(state->u.zlib_state, |
580 | 0 | out, out_len, in, in_len, |
581 | 0 | finish); |
582 | 0 | break; |
583 | 0 | case LZMA_METHOD: |
584 | 0 | rv = tor_lzma_compress_process(state->u.lzma_state, |
585 | 0 | out, out_len, in, in_len, |
586 | 0 | finish); |
587 | 0 | break; |
588 | 0 | case ZSTD_METHOD: |
589 | 0 | rv = tor_zstd_compress_process(state->u.zstd_state, |
590 | 0 | out, out_len, in, in_len, |
591 | 0 | finish); |
592 | 0 | break; |
593 | 0 | case NO_METHOD: |
594 | 0 | rv = tor_cnone_compress_process(out, out_len, in, in_len, |
595 | 0 | finish); |
596 | 0 | break; |
597 | 0 | default: |
598 | 0 | case UNKNOWN_METHOD: |
599 | 0 | goto err; |
600 | 0 | } |
601 | 0 | if (BUG((rv == TOR_COMPRESS_OK) && |
602 | 0 | *in_len == in_len_orig && |
603 | 0 | *out_len == out_len_orig)) { |
604 | 0 | log_warn(LD_GENERAL, |
605 | 0 | "More info on the bug: method == %s, finish == %d, " |
606 | 0 | " *in_len == in_len_orig == %lu, " |
607 | 0 | "*out_len == out_len_orig == %lu", |
608 | 0 | compression_method_get_human_name(state->method), finish, |
609 | 0 | (unsigned long)in_len_orig, (unsigned long)out_len_orig); |
610 | 0 | return TOR_COMPRESS_ERROR; |
611 | 0 | } |
612 | | |
613 | 0 | return rv; |
614 | 0 | err: |
615 | 0 | return TOR_COMPRESS_ERROR; |
616 | 0 | } |
617 | | |
618 | | /** Deallocate <b>state</b>. */ |
619 | | void |
620 | | tor_compress_free_(tor_compress_state_t *state) |
621 | 0 | { |
622 | 0 | if (state == NULL) |
623 | 0 | return; |
624 | | |
625 | 0 | switch (state->method) { |
626 | 0 | case GZIP_METHOD: |
627 | 0 | case ZLIB_METHOD: |
628 | 0 | tor_zlib_compress_free(state->u.zlib_state); |
629 | 0 | break; |
630 | 0 | case LZMA_METHOD: |
631 | 0 | tor_lzma_compress_free(state->u.lzma_state); |
632 | 0 | break; |
633 | 0 | case ZSTD_METHOD: |
634 | 0 | tor_zstd_compress_free(state->u.zstd_state); |
635 | 0 | break; |
636 | 0 | case NO_METHOD: |
637 | 0 | break; |
638 | 0 | case UNKNOWN_METHOD: |
639 | 0 | break; |
640 | 0 | } |
641 | | |
642 | 0 | atomic_counter_sub(&total_compress_allocation, |
643 | 0 | sizeof(tor_compress_state_t)); |
644 | 0 | tor_free(state); |
645 | 0 | } |
646 | | |
647 | | /** Return the approximate number of bytes allocated for <b>state</b>. */ |
648 | | size_t |
649 | | tor_compress_state_size(const tor_compress_state_t *state) |
650 | 0 | { |
651 | 0 | tor_assert(state != NULL); |
652 | |
|
653 | 0 | size_t size = sizeof(tor_compress_state_t); |
654 | |
|
655 | 0 | switch (state->method) { |
656 | 0 | case GZIP_METHOD: |
657 | 0 | case ZLIB_METHOD: |
658 | 0 | size += tor_zlib_compress_state_size(state->u.zlib_state); |
659 | 0 | break; |
660 | 0 | case LZMA_METHOD: |
661 | 0 | size += tor_lzma_compress_state_size(state->u.lzma_state); |
662 | 0 | break; |
663 | 0 | case ZSTD_METHOD: |
664 | 0 | size += tor_zstd_compress_state_size(state->u.zstd_state); |
665 | 0 | break; |
666 | 0 | case NO_METHOD: |
667 | 0 | case UNKNOWN_METHOD: |
668 | 0 | break; |
669 | 0 | } |
670 | | |
671 | 0 | return size; |
672 | 0 | } |
673 | | |
674 | | /** Initialize all compression modules. */ |
675 | | int |
676 | | tor_compress_init(void) |
677 | 32 | { |
678 | 32 | atomic_counter_init(&total_compress_allocation); |
679 | | |
680 | 32 | tor_zlib_init(); |
681 | 32 | tor_lzma_init(); |
682 | 32 | tor_zstd_init(); |
683 | | |
684 | 32 | return 0; |
685 | 32 | } |
686 | | |
687 | | /** Warn if we had any problems while setting up our compression libraries. |
688 | | * |
689 | | * (This isn't part of tor_compress_init, since the logs aren't set up yet.) |
690 | | */ |
691 | | void |
692 | | tor_compress_log_init_warnings(void) |
693 | 0 | { |
694 | | // XXXX can we move this into tor_compress_init() after all? log.c queues |
695 | | // XXXX log messages at startup. |
696 | 0 | tor_zstd_warn_if_version_mismatched(); |
697 | 0 | } |
698 | | |
699 | | static int |
700 | | subsys_compress_initialize(void) |
701 | 16 | { |
702 | 16 | return tor_compress_init(); |
703 | 16 | } |
704 | | |
705 | | const subsys_fns_t sys_compress = { |
706 | | .name = "compress", |
707 | | SUBSYS_DECLARE_LOCATION(), |
708 | | .supported = true, |
709 | | .level = -55, |
710 | | .initialize = subsys_compress_initialize, |
711 | | }; |