/src/fluent-bit/lib/snappy-fef67ac/snappy.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * C port of the snappy compressor from Google. |
3 | | * This is a very fast compressor with comparable compression to lzo. |
4 | | * Works best on 64bit little-endian, but should be good on others too. |
5 | | * Ported by Andi Kleen. |
6 | | * Uptodate with snappy 1.1.0 |
7 | | */ |
8 | | |
9 | | /* |
10 | | * Copyright 2005 Google Inc. All Rights Reserved. |
11 | | * |
12 | | * Redistribution and use in source and binary forms, with or without |
13 | | * modification, are permitted provided that the following conditions are |
14 | | * met: |
15 | | * |
16 | | * * Redistributions of source code must retain the above copyright |
17 | | * notice, this list of conditions and the following disclaimer. |
18 | | * * Redistributions in binary form must reproduce the above |
19 | | * copyright notice, this list of conditions and the following disclaimer |
20 | | * in the documentation and/or other materials provided with the |
21 | | * distribution. |
22 | | * * Neither the name of Google Inc. nor the names of its |
23 | | * contributors may be used to endorse or promote products derived from |
24 | | * this software without specific prior written permission. |
25 | | * |
26 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
27 | | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
28 | | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
29 | | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
30 | | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
31 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
32 | | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
33 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
34 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
35 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
36 | | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
37 | | */ |
38 | | |
39 | | #ifdef __KERNEL__ |
40 | | #include <linux/kernel.h> |
41 | | #ifdef SG |
42 | | #include <linux/uio.h> |
43 | | #endif |
44 | | #include <linux/module.h> |
45 | | #include <linux/slab.h> |
46 | | #include <linux/string.h> |
47 | | #include <linux/snappy.h> |
48 | | #include <linux/vmalloc.h> |
49 | | #include <asm/unaligned.h> |
50 | | #else |
51 | | #include "snappy.h" |
52 | | #include "compat.h" |
53 | | #endif |
54 | | |
55 | | static inline void put_unaligned16(u16 v, u16 *x) |
56 | 0 | { |
57 | 0 | memcpy(x, &v, sizeof(u16)); |
58 | 0 | } |
59 | | |
60 | | static inline u16 get_unaligned16(u16 *x) |
61 | 0 | { |
62 | 0 | u16 _ret; |
63 | 0 | memcpy(&_ret, x, sizeof(u16)); |
64 | 0 | return _ret; |
65 | 0 | } |
66 | | |
67 | | static inline void put_unaligned32(u32 v, u32 *x) |
68 | 0 | { |
69 | 0 | memcpy(x, &v, sizeof(u32)); |
70 | 0 | } |
71 | | |
72 | | static inline u32 get_unaligned32(u32 *x) |
73 | 0 | { |
74 | 0 | u32 _ret; |
75 | 0 | memcpy(&_ret, x, sizeof(u32)); |
76 | 0 | return _ret; |
77 | 0 | } |
78 | | |
79 | | static inline void put_unaligned64(u64 v, u64 *x) |
80 | 0 | { |
81 | 0 | memcpy(x, &v, sizeof(u64)); |
82 | 0 | } |
83 | | |
84 | | static inline u64 get_unaligned64(u64 *x) |
85 | 0 | { |
86 | 0 | u64 _ret; |
87 | 0 | memcpy(&_ret, x, sizeof(u64)); |
88 | 0 | return _ret; |
89 | 0 | } |
90 | | |
91 | 0 | #define get_unaligned_le32(x) (le32toh(get_unaligned32((u32 *)(x)))) |
92 | 0 | #define put_unaligned_le16(v,x) (put_unaligned16(htole16(v), (u16 *)(x))) |
93 | | |
94 | 0 | #define CRASH_UNLESS(x) BUG_ON(!(x)) |
95 | 0 | #define CHECK(cond) CRASH_UNLESS(cond) |
96 | 0 | #define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b)) |
97 | 0 | #define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b)) |
98 | 0 | #define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b)) |
99 | | #define CHECK_NE(a, b) CRASH_UNLESS((a) != (b)) |
100 | 0 | #define CHECK_LT(a, b) CRASH_UNLESS((a) < (b)) |
101 | 0 | #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) |
102 | | |
103 | | #define UNALIGNED_LOAD16(_p) get_unaligned16((u16 *)(_p)) |
104 | 0 | #define UNALIGNED_LOAD32(_p) get_unaligned32((u32 *)(_p)) |
105 | 0 | #define UNALIGNED_LOAD64(_p) get_unaligned64((u64 *)(_p)) |
106 | | |
107 | | #define UNALIGNED_STORE16(_p, _val) put_unaligned16(_val, (u16 *)(_p)) |
108 | 0 | #define UNALIGNED_STORE32(_p, _val) put_unaligned32(_val, (u32 *)(_p)) |
109 | 0 | #define UNALIGNED_STORE64(_p, _val) put_unaligned64(_val, (u64 *)(_p)) |
110 | | |
111 | | /* |
112 | | * This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64 |
113 | | * on some platforms, in particular ARM. |
114 | | */ |
115 | | static inline void unaligned_copy64(const void *src, void *dst) |
116 | 0 | { |
117 | 0 | if (sizeof(void *) == 8) { |
118 | 0 | UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src)); |
119 | 0 | } else { |
120 | 0 | const char *src_char = (const char *)(src); |
121 | 0 | char *dst_char = (char *)(dst); |
122 | |
|
123 | 0 | UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char)); |
124 | 0 | UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4)); |
125 | 0 | } |
126 | 0 | } |
127 | | |
128 | | #ifdef NDEBUG |
129 | | |
130 | | #define DCHECK(cond) do {} while(0) |
131 | | #define DCHECK_LE(a, b) do {} while(0) |
132 | | #define DCHECK_GE(a, b) do {} while(0) |
133 | | #define DCHECK_EQ(a, b) do {} while(0) |
134 | | #define DCHECK_NE(a, b) do {} while(0) |
135 | | #define DCHECK_LT(a, b) do {} while(0) |
136 | | #define DCHECK_GT(a, b) do {} while(0) |
137 | | |
138 | | #else |
139 | | |
140 | 0 | #define DCHECK(cond) CHECK(cond) |
141 | 0 | #define DCHECK_LE(a, b) CHECK_LE(a, b) |
142 | 0 | #define DCHECK_GE(a, b) CHECK_GE(a, b) |
143 | 0 | #define DCHECK_EQ(a, b) CHECK_EQ(a, b) |
144 | | #define DCHECK_NE(a, b) CHECK_NE(a, b) |
145 | 0 | #define DCHECK_LT(a, b) CHECK_LT(a, b) |
146 | 0 | #define DCHECK_GT(a, b) CHECK_GT(a, b) |
147 | | |
148 | | #endif |
149 | | |
150 | | /* This snipped is based on the following file |
151 | | * https://github.com/edenhill/librdkafka/blob/7230a1aa327b55ace54579d019e9484af96886c6/src/snappy.c |
152 | | */ |
153 | | #if !defined(__WIN32__) |
154 | 0 | # define snappy_clz(n) __builtin_clz(n) |
155 | | # define snappy_ctz(n) __builtin_ctz(n) |
156 | 0 | # define snappy_ctz64(n) __builtin_ctzll(n) |
157 | | #else |
158 | | #include <intrin.h> |
159 | | static int inline snappy_clz(u32 x) { |
160 | | int r = 0; |
161 | | if (_BitScanForward(&r, x)) |
162 | | return 31 - r; |
163 | | else |
164 | | return 32; |
165 | | } |
166 | | |
167 | | static int inline snappy_ctz(u32 x) { |
168 | | int r = 0; |
169 | | if (_BitScanForward(&r, x)) |
170 | | return r; |
171 | | else |
172 | | return 32; |
173 | | } |
174 | | |
175 | | static int inline snappy_ctz64(u64 x) { |
176 | | #ifdef _M_X64 |
177 | | int r = 0; |
178 | | if (_BitScanReverse64(&r, x)) |
179 | | return r; |
180 | | else |
181 | | return 64; |
182 | | #else |
183 | | int r; |
184 | | if ((r = snappy_ctz(x & 0xffffffff)) < 32) |
185 | | return r; |
186 | | return 32 + snappy_ctz(x >> 32); |
187 | | #endif |
188 | | } |
189 | | #endif |
190 | | |
191 | | static inline bool is_little_endian(void) |
192 | 0 | { |
193 | 0 | #ifdef __LITTLE_ENDIAN__ |
194 | 0 | return true; |
195 | 0 | #endif |
196 | 0 | return false; |
197 | 0 | } |
198 | | |
199 | | static inline int log2_floor(u32 n) |
200 | 0 | { |
201 | 0 | return n == 0 ? -1 : 31 ^ snappy_clz(n); |
202 | 0 | } |
203 | | |
204 | | static inline int find_lsb_set_non_zero(u32 n) |
205 | 0 | { |
206 | 0 | return snappy_ctz(n); |
207 | 0 | } |
208 | | |
209 | | static inline int find_lsb_set_non_zero64(u64 n) |
210 | 0 | { |
211 | 0 | return snappy_ctz64(n); |
212 | 0 | } |
213 | | |
214 | | #define kmax32 5 |
215 | | |
216 | | /* |
217 | | * Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. |
218 | | * Never reads a character at or beyond limit. If a valid/terminated varint32 |
219 | | * was found in the range, stores it in *OUTPUT and returns a pointer just |
220 | | * past the last byte of the varint32. Else returns NULL. On success, |
221 | | * "result <= limit". |
222 | | */ |
223 | | static inline const char *varint_parse32_with_limit(const char *p, |
224 | | const char *l, |
225 | | u32 * OUTPUT) |
226 | 0 | { |
227 | 0 | const unsigned char *ptr = (const unsigned char *)(p); |
228 | 0 | const unsigned char *limit = (const unsigned char *)(l); |
229 | 0 | u32 b, result; |
230 | |
|
231 | 0 | if (ptr >= limit) |
232 | 0 | return NULL; |
233 | 0 | b = *(ptr++); |
234 | 0 | result = b & 127; |
235 | 0 | if (b < 128) |
236 | 0 | goto done; |
237 | 0 | if (ptr >= limit) |
238 | 0 | return NULL; |
239 | 0 | b = *(ptr++); |
240 | 0 | result |= (b & 127) << 7; |
241 | 0 | if (b < 128) |
242 | 0 | goto done; |
243 | 0 | if (ptr >= limit) |
244 | 0 | return NULL; |
245 | 0 | b = *(ptr++); |
246 | 0 | result |= (b & 127) << 14; |
247 | 0 | if (b < 128) |
248 | 0 | goto done; |
249 | 0 | if (ptr >= limit) |
250 | 0 | return NULL; |
251 | 0 | b = *(ptr++); |
252 | 0 | result |= (b & 127) << 21; |
253 | 0 | if (b < 128) |
254 | 0 | goto done; |
255 | 0 | if (ptr >= limit) |
256 | 0 | return NULL; |
257 | 0 | b = *(ptr++); |
258 | 0 | result |= (b & 127) << 28; |
259 | 0 | if (b < 16) |
260 | 0 | goto done; |
261 | 0 | return NULL; /* Value is too long to be a varint32 */ |
262 | 0 | done: |
263 | 0 | *OUTPUT = result; |
264 | 0 | return (const char *)(ptr); |
265 | 0 | } |
266 | | |
267 | | /* |
268 | | * REQUIRES "ptr" points to a buffer of length sufficient to hold "v". |
269 | | * EFFECTS Encodes "v" into "ptr" and returns a pointer to the |
270 | | * byte just past the last encoded byte. |
271 | | */ |
272 | | static inline char *varint_encode32(char *sptr, u32 v) |
273 | 0 | { |
274 | | /* Operate on characters as unsigneds */ |
275 | 0 | unsigned char *ptr = (unsigned char *)(sptr); |
276 | 0 | static const int B = 128; |
277 | |
|
278 | 0 | if (v < (1 << 7)) { |
279 | 0 | *(ptr++) = v; |
280 | 0 | } else if (v < (1 << 14)) { |
281 | 0 | *(ptr++) = v | B; |
282 | 0 | *(ptr++) = v >> 7; |
283 | 0 | } else if (v < (1 << 21)) { |
284 | 0 | *(ptr++) = v | B; |
285 | 0 | *(ptr++) = (v >> 7) | B; |
286 | 0 | *(ptr++) = v >> 14; |
287 | 0 | } else if (v < (1 << 28)) { |
288 | 0 | *(ptr++) = v | B; |
289 | 0 | *(ptr++) = (v >> 7) | B; |
290 | 0 | *(ptr++) = (v >> 14) | B; |
291 | 0 | *(ptr++) = v >> 21; |
292 | 0 | } else { |
293 | 0 | *(ptr++) = v | B; |
294 | 0 | *(ptr++) = (v >> 7) | B; |
295 | 0 | *(ptr++) = (v >> 14) | B; |
296 | 0 | *(ptr++) = (v >> 21) | B; |
297 | 0 | *(ptr++) = v >> 28; |
298 | 0 | } |
299 | 0 | return (char *)(ptr); |
300 | 0 | } |
301 | | |
302 | | #ifdef SG |
303 | | |
304 | | static inline void *n_bytes_after_addr(void *addr, size_t n_bytes) |
305 | | { |
306 | | return (void *) ((char *)addr + n_bytes); |
307 | | } |
308 | | |
309 | | struct source { |
310 | | struct iovec *iov; |
311 | | int iovlen; |
312 | | int curvec; |
313 | | int curoff; |
314 | | size_t total; |
315 | | }; |
316 | | |
317 | | /* Only valid at beginning when nothing is consumed */ |
318 | | static inline int available(struct source *s) |
319 | | { |
320 | | return s->total; |
321 | | } |
322 | | |
323 | | static inline const char *peek(struct source *s, size_t *len) |
324 | | { |
325 | | if (likely(s->curvec < s->iovlen)) { |
326 | | struct iovec *iv = &s->iov[s->curvec]; |
327 | | if (s->curoff < iv->iov_len) { |
328 | | *len = iv->iov_len - s->curoff; |
329 | | return n_bytes_after_addr(iv->iov_base, s->curoff); |
330 | | } |
331 | | } |
332 | | *len = 0; |
333 | | return NULL; |
334 | | } |
335 | | |
336 | | static inline void skip(struct source *s, size_t n) |
337 | | { |
338 | | struct iovec *iv = &s->iov[s->curvec]; |
339 | | s->curoff += n; |
340 | | DCHECK_LE(s->curoff, iv->iov_len); |
341 | | if (s->curoff >= iv->iov_len && s->curvec + 1 < s->iovlen) { |
342 | | s->curoff = 0; |
343 | | s->curvec++; |
344 | | } |
345 | | } |
346 | | |
347 | | struct sink { |
348 | | struct iovec *iov; |
349 | | int iovlen; |
350 | | unsigned curvec; |
351 | | unsigned curoff; |
352 | | unsigned written; |
353 | | }; |
354 | | |
355 | | static inline void append(struct sink *s, const char *data, size_t n) |
356 | | { |
357 | | struct iovec *iov = &s->iov[s->curvec]; |
358 | | char *dst = n_bytes_after_addr(iov->iov_base, s->curoff); |
359 | | size_t nlen = min_t(size_t, iov->iov_len - s->curoff, n); |
360 | | if (data != dst) |
361 | | memcpy(dst, data, nlen); |
362 | | s->written += n; |
363 | | s->curoff += nlen; |
364 | | while ((n -= nlen) > 0) { |
365 | | data += nlen; |
366 | | s->curvec++; |
367 | | DCHECK_LT(s->curvec, s->iovlen); |
368 | | iov++; |
369 | | nlen = min_t(size_t, iov->iov_len, n); |
370 | | memcpy(iov->iov_base, data, nlen); |
371 | | s->curoff = nlen; |
372 | | } |
373 | | } |
374 | | |
375 | | static inline void *sink_peek(struct sink *s, size_t n) |
376 | | { |
377 | | struct iovec *iov = &s->iov[s->curvec]; |
378 | | if (s->curvec < iov->iov_len && iov->iov_len - s->curoff >= n) |
379 | | return n_bytes_after_addr(iov->iov_base, s->curoff); |
380 | | return NULL; |
381 | | } |
382 | | |
383 | | #else |
384 | | |
385 | | struct source { |
386 | | const char *ptr; |
387 | | size_t left; |
388 | | }; |
389 | | |
390 | | static inline int available(struct source *s) |
391 | 0 | { |
392 | 0 | return s->left; |
393 | 0 | } |
394 | | |
395 | | static inline const char *peek(struct source *s, size_t * len) |
396 | 0 | { |
397 | 0 | *len = s->left; |
398 | 0 | return s->ptr; |
399 | 0 | } |
400 | | |
401 | | static inline void skip(struct source *s, size_t n) |
402 | 0 | { |
403 | 0 | s->left -= n; |
404 | 0 | s->ptr += n; |
405 | 0 | } |
406 | | |
407 | | struct sink { |
408 | | char *dest; |
409 | | }; |
410 | | |
411 | | static inline void append(struct sink *s, const char *data, size_t n) |
412 | 0 | { |
413 | 0 | if (data != s->dest) |
414 | 0 | memcpy(s->dest, data, n); |
415 | 0 | s->dest += n; |
416 | 0 | } |
417 | | |
418 | 0 | #define sink_peek(s, n) sink_peek_no_sg(s) |
419 | | |
420 | | static inline void *sink_peek_no_sg(const struct sink *s) |
421 | 0 | { |
422 | 0 | return s->dest; |
423 | 0 | } |
424 | | |
425 | | #endif |
426 | | |
427 | | struct writer { |
428 | | char *base; |
429 | | char *op; |
430 | | char *op_limit; |
431 | | }; |
432 | | |
433 | | /* Called before decompression */ |
434 | | static inline void writer_set_expected_length(struct writer *w, size_t len) |
435 | 0 | { |
436 | 0 | w->op_limit = w->op + len; |
437 | 0 | } |
438 | | |
439 | | /* Called after decompression */ |
440 | | static inline bool writer_check_length(struct writer *w) |
441 | 0 | { |
442 | 0 | return w->op == w->op_limit; |
443 | 0 | } |
444 | | |
445 | | /* |
446 | | * Copy "len" bytes from "src" to "op", one byte at a time. Used for |
447 | | * handling COPY operations where the input and output regions may |
448 | | * overlap. For example, suppose: |
449 | | * src == "ab" |
450 | | * op == src + 2 |
451 | | * len == 20 |
452 | | * After IncrementalCopy(src, op, len), the result will have |
453 | | * eleven copies of "ab" |
454 | | * ababababababababababab |
455 | | * Note that this does not match the semantics of either memcpy() |
456 | | * or memmove(). |
457 | | */ |
458 | | static inline void incremental_copy(const char *src, char *op, ssize_t len) |
459 | 0 | { |
460 | 0 | DCHECK_GT(len, 0); |
461 | 0 | do { |
462 | 0 | *op++ = *src++; |
463 | 0 | } while (--len > 0); |
464 | 0 | } |
465 | | |
466 | | /* |
467 | | * Equivalent to IncrementalCopy except that it can write up to ten extra |
468 | | * bytes after the end of the copy, and that it is faster. |
469 | | * |
470 | | * The main part of this loop is a simple copy of eight bytes at a time until |
471 | | * we've copied (at least) the requested amount of bytes. However, if op and |
472 | | * src are less than eight bytes apart (indicating a repeating pattern of |
473 | | * length < 8), we first need to expand the pattern in order to get the correct |
474 | | * results. For instance, if the buffer looks like this, with the eight-byte |
475 | | * <src> and <op> patterns marked as intervals: |
476 | | * |
477 | | * abxxxxxxxxxxxx |
478 | | * [------] src |
479 | | * [------] op |
480 | | * |
481 | | * a single eight-byte copy from <src> to <op> will repeat the pattern once, |
482 | | * after which we can move <op> two bytes without moving <src>: |
483 | | * |
484 | | * ababxxxxxxxxxx |
485 | | * [------] src |
486 | | * [------] op |
487 | | * |
488 | | * and repeat the exercise until the two no longer overlap. |
489 | | * |
490 | | * This allows us to do very well in the special case of one single byte |
491 | | * repeated many times, without taking a big hit for more general cases. |
492 | | * |
493 | | * The worst case of extra writing past the end of the match occurs when |
494 | | * op - src == 1 and len == 1; the last copy will read from byte positions |
495 | | * [0..7] and write to [4..11], whereas it was only supposed to write to |
496 | | * position 1. Thus, ten excess bytes. |
497 | | */ |
498 | | |
499 | 0 | #define kmax_increment_copy_overflow 10 |
500 | | |
501 | | static inline void incremental_copy_fast_path(const char *src, char *op, |
502 | | ssize_t len) |
503 | 0 | { |
504 | 0 | while (op - src < 8) { |
505 | 0 | unaligned_copy64(src, op); |
506 | 0 | len -= op - src; |
507 | 0 | op += op - src; |
508 | 0 | } |
509 | 0 | while (len > 0) { |
510 | 0 | unaligned_copy64(src, op); |
511 | 0 | src += 8; |
512 | 0 | op += 8; |
513 | 0 | len -= 8; |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | static inline bool writer_append_from_self(struct writer *w, u32 offset, |
518 | | u32 len) |
519 | 0 | { |
520 | 0 | char *const op = w->op; |
521 | 0 | CHECK_LE(op, w->op_limit); |
522 | 0 | const u32 space_left = w->op_limit - op; |
523 | |
|
524 | 0 | if (op - w->base <= offset - 1u) /* -1u catches offset==0 */ |
525 | 0 | return false; |
526 | 0 | if (len <= 16 && offset >= 8 && space_left >= 16) { |
527 | | /* Fast path, used for the majority (70-80%) of dynamic |
528 | | * invocations. */ |
529 | 0 | unaligned_copy64(op - offset, op); |
530 | 0 | unaligned_copy64(op - offset + 8, op + 8); |
531 | 0 | } else { |
532 | 0 | if (space_left >= len + kmax_increment_copy_overflow) { |
533 | 0 | incremental_copy_fast_path(op - offset, op, len); |
534 | 0 | } else { |
535 | 0 | if (space_left < len) { |
536 | 0 | return false; |
537 | 0 | } |
538 | 0 | incremental_copy(op - offset, op, len); |
539 | 0 | } |
540 | 0 | } |
541 | | |
542 | 0 | w->op = op + len; |
543 | 0 | return true; |
544 | 0 | } |
545 | | |
546 | | static inline bool writer_append(struct writer *w, const char *ip, u32 len) |
547 | 0 | { |
548 | 0 | char *const op = w->op; |
549 | 0 | CHECK_LE(op, w->op_limit); |
550 | 0 | const u32 space_left = w->op_limit - op; |
551 | 0 | if (space_left < len) |
552 | 0 | return false; |
553 | 0 | memcpy(op, ip, len); |
554 | 0 | w->op = op + len; |
555 | 0 | return true; |
556 | 0 | } |
557 | | |
558 | | static inline bool writer_try_fast_append(struct writer *w, const char *ip, |
559 | | u32 available_bytes, u32 len) |
560 | 0 | { |
561 | 0 | char *const op = w->op; |
562 | 0 | const int space_left = w->op_limit - op; |
563 | 0 | if (len <= 16 && available_bytes >= 16 && space_left >= 16) { |
564 | | /* Fast path, used for the majority (~95%) of invocations */ |
565 | 0 | unaligned_copy64(ip, op); |
566 | 0 | unaligned_copy64(ip + 8, op + 8); |
567 | 0 | w->op = op + len; |
568 | 0 | return true; |
569 | 0 | } |
570 | 0 | return false; |
571 | 0 | } |
572 | | |
573 | | /* |
574 | | * Any hash function will produce a valid compressed bitstream, but a good |
575 | | * hash function reduces the number of collisions and thus yields better |
576 | | * compression for compressible input, and more speed for incompressible |
577 | | * input. Of course, it doesn't hurt if the hash function is reasonably fast |
578 | | * either, as it gets called a lot. |
579 | | */ |
580 | | static inline u32 hash_bytes(u32 bytes, int shift) |
581 | 0 | { |
582 | 0 | u32 kmul = 0x1e35a7bd; |
583 | 0 | return (bytes * kmul) >> shift; |
584 | 0 | } |
585 | | |
586 | | static inline u32 hash(const char *p, int shift) |
587 | 0 | { |
588 | 0 | return hash_bytes(UNALIGNED_LOAD32(p), shift); |
589 | 0 | } |
590 | | |
591 | | /* |
592 | | * Compressed data can be defined as: |
593 | | * compressed := item* literal* |
594 | | * item := literal* copy |
595 | | * |
596 | | * The trailing literal sequence has a space blowup of at most 62/60 |
597 | | * since a literal of length 60 needs one tag byte + one extra byte |
598 | | * for length information. |
599 | | * |
600 | | * Item blowup is trickier to measure. Suppose the "copy" op copies |
601 | | * 4 bytes of data. Because of a special check in the encoding code, |
602 | | * we produce a 4-byte copy only if the offset is < 65536. Therefore |
603 | | * the copy op takes 3 bytes to encode, and this type of item leads |
604 | | * to at most the 62/60 blowup for representing literals. |
605 | | * |
606 | | * Suppose the "copy" op copies 5 bytes of data. If the offset is big |
607 | | * enough, it will take 5 bytes to encode the copy op. Therefore the |
608 | | * worst case here is a one-byte literal followed by a five-byte copy. |
609 | | * I.e., 6 bytes of input turn into 7 bytes of "compressed" data. |
610 | | * |
611 | | * This last factor dominates the blowup, so the final estimate is: |
612 | | */ |
613 | | size_t snappy_max_compressed_length(size_t source_len) |
614 | 0 | { |
615 | 0 | return 32 + source_len + source_len / 6; |
616 | 0 | } |
617 | | EXPORT_SYMBOL(snappy_max_compressed_length); |
618 | | |
619 | | enum { |
620 | | LITERAL = 0, |
621 | | COPY_1_BYTE_OFFSET = 1, /* 3 bit length + 3 bits of offset in opcode */ |
622 | | COPY_2_BYTE_OFFSET = 2, |
623 | | COPY_4_BYTE_OFFSET = 3 |
624 | | }; |
625 | | |
626 | | static inline char *emit_literal(char *op, |
627 | | const char *literal, |
628 | | int len, bool allow_fast_path) |
629 | 0 | { |
630 | 0 | int n = len - 1; /* Zero-length literals are disallowed */ |
631 | |
|
632 | 0 | if (n < 60) { |
633 | | /* Fits in tag byte */ |
634 | 0 | *op++ = LITERAL | (n << 2); |
635 | | |
636 | | /* |
637 | | * The vast majority of copies are below 16 bytes, for which a |
638 | | * call to memcpy is overkill. This fast path can sometimes |
639 | | * copy up to 15 bytes too much, but that is okay in the |
640 | | * main loop, since we have a bit to go on for both sides: |
641 | | * |
642 | | * - The input will always have kInputMarginBytes = 15 extra |
643 | | * available bytes, as long as we're in the main loop, and |
644 | | * if not, allow_fast_path = false. |
645 | | * - The output will always have 32 spare bytes (see |
646 | | * MaxCompressedLength). |
647 | | */ |
648 | 0 | if (allow_fast_path && len <= 16) { |
649 | 0 | unaligned_copy64(literal, op); |
650 | 0 | unaligned_copy64(literal + 8, op + 8); |
651 | 0 | return op + len; |
652 | 0 | } |
653 | 0 | } else { |
654 | | /* Encode in upcoming bytes */ |
655 | 0 | char *base = op; |
656 | 0 | int count = 0; |
657 | 0 | op++; |
658 | 0 | while (n > 0) { |
659 | 0 | *op++ = n & 0xff; |
660 | 0 | n >>= 8; |
661 | 0 | count++; |
662 | 0 | } |
663 | 0 | DCHECK(count >= 1); |
664 | 0 | DCHECK(count <= 4); |
665 | 0 | *base = LITERAL | ((59 + count) << 2); |
666 | 0 | } |
667 | 0 | memcpy(op, literal, len); |
668 | 0 | return op + len; |
669 | 0 | } |
670 | | |
671 | | static inline char *emit_copy_less_than64(char *op, int offset, int len) |
672 | 0 | { |
673 | 0 | DCHECK_LE(len, 64); |
674 | 0 | DCHECK_GE(len, 4); |
675 | 0 | DCHECK_LT(offset, 65536); |
676 | | |
677 | 0 | if ((len < 12) && (offset < 2048)) { |
678 | 0 | int len_minus_4 = len - 4; |
679 | 0 | DCHECK(len_minus_4 < 8); /* Must fit in 3 bits */ |
680 | 0 | *op++ = |
681 | 0 | COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) |
682 | 0 | << 5); |
683 | 0 | *op++ = offset & 0xff; |
684 | 0 | } else { |
685 | 0 | *op++ = COPY_2_BYTE_OFFSET + ((len - 1) << 2); |
686 | 0 | put_unaligned_le16(offset, op); |
687 | 0 | op += 2; |
688 | 0 | } |
689 | 0 | return op; |
690 | 0 | } |
691 | | |
692 | | static inline char *emit_copy(char *op, int offset, int len) |
693 | 0 | { |
694 | | /* |
695 | | * Emit 64 byte copies but make sure to keep at least four bytes |
696 | | * reserved |
697 | | */ |
698 | 0 | while (len >= 68) { |
699 | 0 | op = emit_copy_less_than64(op, offset, 64); |
700 | 0 | len -= 64; |
701 | 0 | } |
702 | | |
703 | | /* |
704 | | * Emit an extra 60 byte copy if have too much data to fit in |
705 | | * one copy |
706 | | */ |
707 | 0 | if (len > 64) { |
708 | 0 | op = emit_copy_less_than64(op, offset, 60); |
709 | 0 | len -= 60; |
710 | 0 | } |
711 | | |
712 | | /* Emit remainder */ |
713 | 0 | op = emit_copy_less_than64(op, offset, len); |
714 | 0 | return op; |
715 | 0 | } |
716 | | |
717 | | /** |
718 | | * snappy_uncompressed_length - return length of uncompressed output. |
719 | | * @start: compressed buffer |
720 | | * @n: length of compressed buffer. |
721 | | * @result: Write the length of the uncompressed output here. |
722 | | * |
723 | | * Returns true when successfull, otherwise false. |
724 | | */ |
725 | | bool snappy_uncompressed_length(const char *start, size_t n, size_t * result) |
726 | 0 | { |
727 | 0 | u32 v = 0; |
728 | 0 | const char *limit = start + n; |
729 | 0 | if (varint_parse32_with_limit(start, limit, &v) != NULL) { |
730 | 0 | *result = v; |
731 | 0 | return true; |
732 | 0 | } else { |
733 | 0 | return false; |
734 | 0 | } |
735 | 0 | } |
736 | | EXPORT_SYMBOL(snappy_uncompressed_length); |
737 | | |
738 | | /* |
739 | | * The size of a compression block. Note that many parts of the compression |
740 | | * code assumes that kBlockSize <= 65536; in particular, the hash table |
741 | | * can only store 16-bit offsets, and EmitCopy() also assumes the offset |
742 | | * is 65535 bytes or less. Note also that if you change this, it will |
743 | | * affect the framing format |
744 | | * Note that there might be older data around that is compressed with larger |
745 | | * block sizes, so the decompression code should not rely on the |
746 | | * non-existence of long backreferences. |
747 | | */ |
748 | | #define kblock_log 16 |
749 | | #define kblock_size (1 << kblock_log) |
750 | | |
751 | | /* |
752 | | * This value could be halfed or quartered to save memory |
753 | | * at the cost of slightly worse compression. |
754 | | */ |
755 | 0 | #define kmax_hash_table_bits 14 |
756 | 0 | #define kmax_hash_table_size (1U << kmax_hash_table_bits) |
757 | | |
758 | | /* |
759 | | * Use smaller hash table when input.size() is smaller, since we |
760 | | * fill the table, incurring O(hash table size) overhead for |
761 | | * compression, and if the input is short, we won't need that |
762 | | * many hash table entries anyway. |
763 | | */ |
764 | | static u16 *get_hash_table(struct snappy_env *env, size_t input_size, |
765 | | int *table_size) |
766 | 0 | { |
767 | 0 | unsigned htsize = 256; |
768 | |
|
769 | 0 | DCHECK(kmax_hash_table_size >= 256); |
770 | 0 | while (htsize < kmax_hash_table_size && htsize < input_size) |
771 | 0 | htsize <<= 1; |
772 | 0 | CHECK_EQ(0, htsize & (htsize - 1)); |
773 | 0 | CHECK_LE(htsize, kmax_hash_table_size); |
774 | | |
775 | 0 | u16 *table; |
776 | 0 | table = env->hash_table; |
777 | |
|
778 | 0 | *table_size = htsize; |
779 | 0 | memset(table, 0, htsize * sizeof(*table)); |
780 | 0 | return table; |
781 | 0 | } |
782 | | |
783 | | /* |
784 | | * Return the largest n such that |
785 | | * |
786 | | * s1[0,n-1] == s2[0,n-1] |
787 | | * and n <= (s2_limit - s2). |
788 | | * |
789 | | * Does not read *s2_limit or beyond. |
790 | | * Does not read *(s1 + (s2_limit - s2)) or beyond. |
791 | | * Requires that s2_limit >= s2. |
792 | | * |
793 | | * Separate implementation for x86_64, for speed. Uses the fact that |
794 | | * x86_64 is little endian. |
795 | | */ |
796 | | #if defined(__LITTLE_ENDIAN__) && BITS_PER_LONG == 64 |
797 | | static inline int find_match_length(const char *s1, |
798 | | const char *s2, const char *s2_limit) |
799 | 0 | { |
800 | 0 | int matched = 0; |
801 | |
|
802 | 0 | DCHECK_GE(s2_limit, s2); |
803 | | /* |
804 | | * Find out how long the match is. We loop over the data 64 bits at a |
805 | | * time until we find a 64-bit block that doesn't match; then we find |
806 | | * the first non-matching bit and use that to calculate the total |
807 | | * length of the match. |
808 | | */ |
809 | 0 | while (likely(s2 <= s2_limit - 8)) { |
810 | 0 | if (unlikely |
811 | 0 | (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) { |
812 | 0 | s2 += 8; |
813 | 0 | matched += 8; |
814 | 0 | } else { |
815 | | /* |
816 | | * On current (mid-2008) Opteron models there |
817 | | * is a 3% more efficient code sequence to |
818 | | * find the first non-matching byte. However, |
819 | | * what follows is ~10% better on Intel Core 2 |
820 | | * and newer, and we expect AMD's bsf |
821 | | * instruction to improve. |
822 | | */ |
823 | 0 | u64 x = |
824 | 0 | UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + |
825 | 0 | matched); |
826 | 0 | int matching_bits = find_lsb_set_non_zero64(x); |
827 | 0 | matched += matching_bits >> 3; |
828 | 0 | return matched; |
829 | 0 | } |
830 | 0 | } |
831 | 0 | while (likely(s2 < s2_limit)) { |
832 | 0 | if (likely(s1[matched] == *s2)) { |
833 | 0 | ++s2; |
834 | 0 | ++matched; |
835 | 0 | } else { |
836 | 0 | return matched; |
837 | 0 | } |
838 | 0 | } |
839 | 0 | return matched; |
840 | 0 | } |
841 | | #else |
842 | | static inline int find_match_length(const char *s1, |
843 | | const char *s2, const char *s2_limit) |
844 | | { |
845 | | /* Implementation based on the x86-64 version, above. */ |
846 | | DCHECK_GE(s2_limit, s2); |
847 | | int matched = 0; |
848 | | |
849 | | while (s2 <= s2_limit - 4 && |
850 | | UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { |
851 | | s2 += 4; |
852 | | matched += 4; |
853 | | } |
854 | | if (is_little_endian() && s2 <= s2_limit - 4) { |
855 | | u32 x = |
856 | | UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); |
857 | | int matching_bits = find_lsb_set_non_zero(x); |
858 | | matched += matching_bits >> 3; |
859 | | } else { |
860 | | while ((s2 < s2_limit) && (s1[matched] == *s2)) { |
861 | | ++s2; |
862 | | ++matched; |
863 | | } |
864 | | } |
865 | | return matched; |
866 | | } |
867 | | #endif |
868 | | |
869 | | /* |
870 | | * For 0 <= offset <= 4, GetU32AtOffset(GetEightBytesAt(p), offset) will |
871 | | * equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have |
872 | | * empirically found that overlapping loads such as |
873 | | * UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) |
874 | | * are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to u32. |
875 | | * |
876 | | * We have different versions for 64- and 32-bit; ideally we would avoid the |
877 | | * two functions and just inline the UNALIGNED_LOAD64 call into |
878 | | * GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever |
879 | | * enough to avoid loading the value multiple times then. For 64-bit, the load |
880 | | * is done when GetEightBytesAt() is called, whereas for 32-bit, the load is |
881 | | * done at GetUint32AtOffset() time. |
882 | | */ |
883 | | |
884 | | #if BITS_PER_LONG == 64 |
885 | | |
886 | | typedef u64 eight_bytes_reference; |
887 | | |
888 | | static inline eight_bytes_reference get_eight_bytes_at(const char* ptr) |
889 | 0 | { |
890 | 0 | return UNALIGNED_LOAD64(ptr); |
891 | 0 | } |
892 | | |
893 | | static inline u32 get_u32_at_offset(u64 v, int offset) |
894 | 0 | { |
895 | 0 | DCHECK_GE(offset, 0); |
896 | 0 | DCHECK_LE(offset, 4); |
897 | 0 | return v >> (is_little_endian()? 8 * offset : 32 - 8 * offset); |
898 | 0 | } |
899 | | |
900 | | #else |
901 | | |
902 | | typedef const char *eight_bytes_reference; |
903 | | |
904 | | static inline eight_bytes_reference get_eight_bytes_at(const char* ptr) |
905 | | { |
906 | | return ptr; |
907 | | } |
908 | | |
909 | | static inline u32 get_u32_at_offset(const char *v, int offset) |
910 | | { |
911 | | DCHECK_GE(offset, 0); |
912 | | DCHECK_LE(offset, 4); |
913 | | return UNALIGNED_LOAD32(v + offset); |
914 | | } |
915 | | #endif |
916 | | |
917 | | /* |
918 | | * Flat array compression that does not emit the "uncompressed length" |
919 | | * prefix. Compresses "input" string to the "*op" buffer. |
920 | | * |
921 | | * REQUIRES: "input" is at most "kBlockSize" bytes long. |
922 | | * REQUIRES: "op" points to an array of memory that is at least |
923 | | * "MaxCompressedLength(input.size())" in size. |
924 | | * REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. |
925 | | * REQUIRES: "table_size" is a power of two |
926 | | * |
927 | | * Returns an "end" pointer into "op" buffer. |
928 | | * "end - op" is the compressed size of "input". |
929 | | */ |
930 | | |
931 | | static char *compress_fragment(const char *const input, |
932 | | const size_t input_size, |
933 | | char *op, u16 * table, const unsigned table_size) |
934 | 0 | { |
935 | | /* "ip" is the input pointer, and "op" is the output pointer. */ |
936 | 0 | const char *ip = input; |
937 | 0 | CHECK_LE(input_size, kblock_size); |
938 | 0 | CHECK_EQ(table_size & (table_size - 1), 0); |
939 | 0 | const int shift = 32 - log2_floor(table_size); |
940 | 0 | DCHECK_EQ(UINT_MAX >> shift, table_size - 1); |
941 | 0 | const char *ip_end = input + input_size; |
942 | 0 | const char *baseip = ip; |
943 | | /* |
944 | | * Bytes in [next_emit, ip) will be emitted as literal bytes. Or |
945 | | * [next_emit, ip_end) after the main loop. |
946 | | */ |
947 | 0 | const char *next_emit = ip; |
948 | |
|
949 | 0 | const unsigned kinput_margin_bytes = 15; |
950 | |
|
951 | 0 | if (likely(input_size >= kinput_margin_bytes)) { |
952 | 0 | const char *const ip_limit = input + input_size - |
953 | 0 | kinput_margin_bytes; |
954 | |
|
955 | 0 | u32 next_hash; |
956 | 0 | for (next_hash = hash(++ip, shift);;) { |
957 | 0 | DCHECK_LT(next_emit, ip); |
958 | | /* |
959 | | * The body of this loop calls EmitLiteral once and then EmitCopy one or |
960 | | * more times. (The exception is that when we're close to exhausting |
961 | | * the input we goto emit_remainder.) |
962 | | * |
963 | | * In the first iteration of this loop we're just starting, so |
964 | | * there's nothing to copy, so calling EmitLiteral once is |
965 | | * necessary. And we only start a new iteration when the |
966 | | * current iteration has determined that a call to EmitLiteral will |
967 | | * precede the next call to EmitCopy (if any). |
968 | | * |
969 | | * Step 1: Scan forward in the input looking for a 4-byte-long match. |
970 | | * If we get close to exhausting the input then goto emit_remainder. |
971 | | * |
972 | | * Heuristic match skipping: If 32 bytes are scanned with no matches |
973 | | * found, start looking only at every other byte. If 32 more bytes are |
974 | | * scanned, look at every third byte, etc.. When a match is found, |
975 | | * immediately go back to looking at every byte. This is a small loss |
976 | | * (~5% performance, ~0.1% density) for lcompressible data due to more |
977 | | * bookkeeping, but for non-compressible data (such as JPEG) it's a huge |
978 | | * win since the compressor quickly "realizes" the data is incompressible |
979 | | * and doesn't bother looking for matches everywhere. |
980 | | * |
981 | | * The "skip" variable keeps track of how many bytes there are since the |
982 | | * last match; dividing it by 32 (ie. right-shifting by five) gives the |
983 | | * number of bytes to move ahead for each iteration. |
984 | | */ |
985 | 0 | u32 skip_bytes = 32; |
986 | |
|
987 | 0 | const char *next_ip = ip; |
988 | 0 | const char *candidate; |
989 | 0 | do { |
990 | 0 | ip = next_ip; |
991 | 0 | u32 hval = next_hash; |
992 | 0 | DCHECK_EQ(hval, hash(ip, shift)); |
993 | 0 | u32 bytes_between_hash_lookups = skip_bytes++ >> 5; |
994 | 0 | next_ip = ip + bytes_between_hash_lookups; |
995 | 0 | if (unlikely(next_ip > ip_limit)) { |
996 | 0 | goto emit_remainder; |
997 | 0 | } |
998 | 0 | next_hash = hash(next_ip, shift); |
999 | 0 | candidate = baseip + table[hval]; |
1000 | 0 | DCHECK_GE(candidate, baseip); |
1001 | 0 | DCHECK_LT(candidate, ip); |
1002 | | |
1003 | 0 | table[hval] = ip - baseip; |
1004 | 0 | } while (likely(UNALIGNED_LOAD32(ip) != |
1005 | 0 | UNALIGNED_LOAD32(candidate))); |
1006 | | |
1007 | | /* |
1008 | | * Step 2: A 4-byte match has been found. We'll later see if more |
1009 | | * than 4 bytes match. But, prior to the match, input |
1010 | | * bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." |
1011 | | */ |
1012 | 0 | DCHECK_LE(next_emit + 16, ip_end); |
1013 | 0 | op = emit_literal(op, next_emit, ip - next_emit, true); |
1014 | | |
1015 | | /* |
1016 | | * Step 3: Call EmitCopy, and then see if another EmitCopy could |
1017 | | * be our next move. Repeat until we find no match for the |
1018 | | * input immediately after what was consumed by the last EmitCopy call. |
1019 | | * |
1020 | | * If we exit this loop normally then we need to call EmitLiteral next, |
1021 | | * though we don't yet know how big the literal will be. We handle that |
1022 | | * by proceeding to the next iteration of the main loop. We also can exit |
1023 | | * this loop via goto if we get close to exhausting the input. |
1024 | | */ |
1025 | 0 | eight_bytes_reference input_bytes; |
1026 | 0 | u32 candidate_bytes = 0; |
1027 | |
|
1028 | 0 | do { |
1029 | | /* |
1030 | | * We have a 4-byte match at ip, and no need to emit any |
1031 | | * "literal bytes" prior to ip. |
1032 | | */ |
1033 | 0 | const char *base = ip; |
1034 | 0 | int matched = 4 + |
1035 | 0 | find_match_length(candidate + 4, ip + 4, |
1036 | 0 | ip_end); |
1037 | 0 | ip += matched; |
1038 | 0 | int offset = base - candidate; |
1039 | 0 | DCHECK_EQ(0, memcmp(base, candidate, matched)); |
1040 | 0 | op = emit_copy(op, offset, matched); |
1041 | | /* |
1042 | | * We could immediately start working at ip now, but to improve |
1043 | | * compression we first update table[Hash(ip - 1, ...)]. |
1044 | | */ |
1045 | 0 | const char *insert_tail = ip - 1; |
1046 | 0 | next_emit = ip; |
1047 | 0 | if (unlikely(ip >= ip_limit)) { |
1048 | 0 | goto emit_remainder; |
1049 | 0 | } |
1050 | 0 | input_bytes = get_eight_bytes_at(insert_tail); |
1051 | 0 | u32 prev_hash = |
1052 | 0 | hash_bytes(get_u32_at_offset |
1053 | 0 | (input_bytes, 0), shift); |
1054 | 0 | table[prev_hash] = ip - baseip - 1; |
1055 | 0 | u32 cur_hash = |
1056 | 0 | hash_bytes(get_u32_at_offset |
1057 | 0 | (input_bytes, 1), shift); |
1058 | 0 | candidate = baseip + table[cur_hash]; |
1059 | 0 | candidate_bytes = UNALIGNED_LOAD32(candidate); |
1060 | 0 | table[cur_hash] = ip - baseip; |
1061 | 0 | } while (get_u32_at_offset(input_bytes, 1) == |
1062 | 0 | candidate_bytes); |
1063 | | |
1064 | 0 | next_hash = |
1065 | 0 | hash_bytes(get_u32_at_offset(input_bytes, 2), |
1066 | 0 | shift); |
1067 | 0 | ++ip; |
1068 | 0 | } |
1069 | 0 | } |
1070 | | |
1071 | 0 | emit_remainder: |
1072 | | /* Emit the remaining bytes as a literal */ |
1073 | 0 | if (next_emit < ip_end) |
1074 | 0 | op = emit_literal(op, next_emit, ip_end - next_emit, false); |
1075 | |
|
1076 | 0 | return op; |
1077 | 0 | } |
1078 | | |
1079 | | /* |
1080 | | * ----------------------------------------------------------------------- |
1081 | | * Lookup table for decompression code. Generated by ComputeTable() below. |
1082 | | * ----------------------------------------------------------------------- |
1083 | | */ |
1084 | | |
1085 | | /* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */ |
1086 | | static const u32 wordmask[] = { |
1087 | | 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu |
1088 | | }; |
1089 | | |
1090 | | /* |
1091 | | * Data stored per entry in lookup table: |
1092 | | * Range Bits-used Description |
1093 | | * ------------------------------------ |
1094 | | * 1..64 0..7 Literal/copy length encoded in opcode byte |
1095 | | * 0..7 8..10 Copy offset encoded in opcode byte / 256 |
1096 | | * 0..4 11..13 Extra bytes after opcode |
1097 | | * |
1098 | | * We use eight bits for the length even though 7 would have sufficed |
1099 | | * because of efficiency reasons: |
1100 | | * (1) Extracting a byte is faster than a bit-field |
1101 | | * (2) It properly aligns copy offset so we do not need a <<8 |
1102 | | */ |
1103 | | static const u16 char_table[256] = { |
1104 | | 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, |
1105 | | 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, |
1106 | | 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, |
1107 | | 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, |
1108 | | 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, |
1109 | | 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, |
1110 | | 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, |
1111 | | 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, |
1112 | | 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, |
1113 | | 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, |
1114 | | 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, |
1115 | | 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, |
1116 | | 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, |
1117 | | 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, |
1118 | | 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, |
1119 | | 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, |
1120 | | 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, |
1121 | | 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, |
1122 | | 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, |
1123 | | 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, |
1124 | | 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, |
1125 | | 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, |
1126 | | 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, |
1127 | | 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, |
1128 | | 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, |
1129 | | 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, |
1130 | | 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, |
1131 | | 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, |
1132 | | 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, |
1133 | | 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, |
1134 | | 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, |
1135 | | 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 |
1136 | | }; |
1137 | | |
1138 | | struct snappy_decompressor { |
1139 | | struct source *reader; /* Underlying source of bytes to decompress */ |
1140 | | const char *ip; /* Points to next buffered byte */ |
1141 | | const char *ip_limit; /* Points just past buffered bytes */ |
1142 | | u32 peeked; /* Bytes peeked from reader (need to skip) */ |
1143 | | bool eof; /* Hit end of input without an error? */ |
1144 | | char scratch[5]; /* Temporary buffer for peekfast boundaries */ |
1145 | | }; |
1146 | | |
1147 | | static void |
1148 | | init_snappy_decompressor(struct snappy_decompressor *d, struct source *reader) |
1149 | 0 | { |
1150 | 0 | d->reader = reader; |
1151 | 0 | d->ip = NULL; |
1152 | 0 | d->ip_limit = NULL; |
1153 | 0 | d->peeked = 0; |
1154 | 0 | d->eof = false; |
1155 | 0 | } |
1156 | | |
1157 | | static void exit_snappy_decompressor(struct snappy_decompressor *d) |
1158 | 0 | { |
1159 | 0 | skip(d->reader, d->peeked); |
1160 | 0 | } |
1161 | | |
1162 | | /* |
1163 | | * Read the uncompressed length stored at the start of the compressed data. |
1164 | | * On succcess, stores the length in *result and returns true. |
1165 | | * On failure, returns false. |
1166 | | */ |
1167 | | static bool read_uncompressed_length(struct snappy_decompressor *d, |
1168 | | u32 * result) |
1169 | 0 | { |
1170 | 0 | DCHECK(d->ip == NULL); /* |
1171 | | * Must not have read anything yet |
1172 | | * Length is encoded in 1..5 bytes |
1173 | | */ |
1174 | 0 | *result = 0; |
1175 | 0 | u32 shift = 0; |
1176 | 0 | while (true) { |
1177 | 0 | if (shift >= 32) |
1178 | 0 | return false; |
1179 | 0 | size_t n; |
1180 | 0 | const char *ip = peek(d->reader, &n); |
1181 | 0 | if (n == 0) |
1182 | 0 | return false; |
1183 | 0 | const unsigned char c = *(const unsigned char *)(ip); |
1184 | 0 | skip(d->reader, 1); |
1185 | 0 | *result |= (u32) (c & 0x7f) << shift; |
1186 | 0 | if (c < 128) { |
1187 | 0 | break; |
1188 | 0 | } |
1189 | 0 | shift += 7; |
1190 | 0 | } |
1191 | 0 | return true; |
1192 | 0 | } |
1193 | | |
1194 | | static bool refill_tag(struct snappy_decompressor *d); |
1195 | | |
1196 | | /* |
1197 | | * Process the next item found in the input. |
1198 | | * Returns true if successful, false on error or end of input. |
1199 | | */ |
1200 | | static void decompress_all_tags(struct snappy_decompressor *d, |
1201 | | struct writer *writer) |
1202 | 0 | { |
1203 | 0 | const char *ip = d->ip; |
1204 | | |
1205 | | /* |
1206 | | * We could have put this refill fragment only at the beginning of the loop. |
1207 | | * However, duplicating it at the end of each branch gives the compiler more |
1208 | | * scope to optimize the <ip_limit_ - ip> expression based on the local |
1209 | | * context, which overall increases speed. |
1210 | | */ |
1211 | 0 | #define MAYBE_REFILL() \ |
1212 | 0 | if (d->ip_limit - ip < 5) { \ |
1213 | 0 | d->ip = ip; \ |
1214 | 0 | if (!refill_tag(d)) return; \ |
1215 | 0 | ip = d->ip; \ |
1216 | 0 | } |
1217 | | |
1218 | |
|
1219 | 0 | MAYBE_REFILL(); |
1220 | 0 | for (;;) { |
1221 | 0 | if (d->ip_limit - ip < 5) { |
1222 | 0 | d->ip = ip; |
1223 | 0 | if (!refill_tag(d)) |
1224 | 0 | return; |
1225 | 0 | ip = d->ip; |
1226 | 0 | } |
1227 | | |
1228 | 0 | const unsigned char c = *(const unsigned char *)(ip++); |
1229 | |
|
1230 | 0 | if ((c & 0x3) == LITERAL) { |
1231 | 0 | u32 literal_length = (c >> 2) + 1; |
1232 | 0 | if (writer_try_fast_append(writer, ip, d->ip_limit - ip, |
1233 | 0 | literal_length)) { |
1234 | 0 | DCHECK_LT(literal_length, 61); |
1235 | 0 | ip += literal_length; |
1236 | 0 | MAYBE_REFILL(); |
1237 | 0 | continue; |
1238 | 0 | } |
1239 | 0 | if (unlikely(literal_length >= 61)) { |
1240 | | /* Long literal */ |
1241 | 0 | const u32 literal_ll = literal_length - 60; |
1242 | 0 | literal_length = (get_unaligned_le32(ip) & |
1243 | 0 | wordmask[literal_ll]) + 1; |
1244 | 0 | ip += literal_ll; |
1245 | 0 | } |
1246 | |
|
1247 | 0 | u32 avail = d->ip_limit - ip; |
1248 | 0 | while (avail < literal_length) { |
1249 | 0 | if (!writer_append(writer, ip, avail)) |
1250 | 0 | return; |
1251 | 0 | literal_length -= avail; |
1252 | 0 | skip(d->reader, d->peeked); |
1253 | 0 | size_t n; |
1254 | 0 | ip = peek(d->reader, &n); |
1255 | 0 | avail = n; |
1256 | 0 | d->peeked = avail; |
1257 | 0 | if (avail == 0) |
1258 | 0 | return; /* Premature end of input */ |
1259 | 0 | d->ip_limit = ip + avail; |
1260 | 0 | } |
1261 | 0 | if (!writer_append(writer, ip, literal_length)) |
1262 | 0 | return; |
1263 | 0 | ip += literal_length; |
1264 | 0 | MAYBE_REFILL(); |
1265 | 0 | } else { |
1266 | 0 | const u32 entry = char_table[c]; |
1267 | 0 | const u32 trailer = get_unaligned_le32(ip) & |
1268 | 0 | wordmask[entry >> 11]; |
1269 | 0 | const u32 length = entry & 0xff; |
1270 | 0 | ip += entry >> 11; |
1271 | | |
1272 | | /* |
1273 | | * copy_offset/256 is encoded in bits 8..10. |
1274 | | * By just fetching those bits, we get |
1275 | | * copy_offset (since the bit-field starts at |
1276 | | * bit 8). |
1277 | | */ |
1278 | 0 | const u32 copy_offset = entry & 0x700; |
1279 | 0 | if (!writer_append_from_self(writer, |
1280 | 0 | copy_offset + trailer, |
1281 | 0 | length)) |
1282 | 0 | return; |
1283 | 0 | MAYBE_REFILL(); |
1284 | 0 | } |
1285 | 0 | } |
1286 | 0 | } |
1287 | | |
1288 | | #undef MAYBE_REFILL |
1289 | | |
1290 | | static bool refill_tag(struct snappy_decompressor *d) |
1291 | 0 | { |
1292 | 0 | const char *ip = d->ip; |
1293 | |
|
1294 | 0 | if (ip == d->ip_limit) { |
1295 | 0 | size_t n; |
1296 | | /* Fetch a new fragment from the reader */ |
1297 | 0 | skip(d->reader, d->peeked); /* All peeked bytes are used up */ |
1298 | 0 | ip = peek(d->reader, &n); |
1299 | 0 | d->peeked = n; |
1300 | 0 | if (n == 0) { |
1301 | 0 | d->eof = true; |
1302 | 0 | return false; |
1303 | 0 | } |
1304 | 0 | d->ip_limit = ip + n; |
1305 | 0 | } |
1306 | | |
1307 | | /* Read the tag character */ |
1308 | 0 | DCHECK_LT(ip, d->ip_limit); |
1309 | 0 | const unsigned char c = *(const unsigned char *)(ip); |
1310 | 0 | const u32 entry = char_table[c]; |
1311 | 0 | const u32 needed = (entry >> 11) + 1; /* +1 byte for 'c' */ |
1312 | 0 | DCHECK_LE(needed, sizeof(d->scratch)); |
1313 | | |
1314 | | /* Read more bytes from reader if needed */ |
1315 | 0 | u32 nbuf = d->ip_limit - ip; |
1316 | |
|
1317 | 0 | if (nbuf < needed) { |
1318 | | /* |
1319 | | * Stitch together bytes from ip and reader to form the word |
1320 | | * contents. We store the needed bytes in "scratch". They |
1321 | | * will be consumed immediately by the caller since we do not |
1322 | | * read more than we need. |
1323 | | */ |
1324 | 0 | memmove(d->scratch, ip, nbuf); |
1325 | 0 | skip(d->reader, d->peeked); /* All peeked bytes are used up */ |
1326 | 0 | d->peeked = 0; |
1327 | 0 | while (nbuf < needed) { |
1328 | 0 | size_t length; |
1329 | 0 | const char *src = peek(d->reader, &length); |
1330 | 0 | if (length == 0) |
1331 | 0 | return false; |
1332 | 0 | u32 to_add = min_t(u32, needed - nbuf, length); |
1333 | 0 | memcpy(d->scratch + nbuf, src, to_add); |
1334 | 0 | nbuf += to_add; |
1335 | 0 | skip(d->reader, to_add); |
1336 | 0 | } |
1337 | 0 | DCHECK_EQ(nbuf, needed); |
1338 | 0 | d->ip = d->scratch; |
1339 | 0 | d->ip_limit = d->scratch + needed; |
1340 | 0 | } else if (nbuf < 5) { |
1341 | | /* |
1342 | | * Have enough bytes, but move into scratch so that we do not |
1343 | | * read past end of input |
1344 | | */ |
1345 | 0 | memmove(d->scratch, ip, nbuf); |
1346 | 0 | skip(d->reader, d->peeked); /* All peeked bytes are used up */ |
1347 | 0 | d->peeked = 0; |
1348 | 0 | d->ip = d->scratch; |
1349 | 0 | d->ip_limit = d->scratch + nbuf; |
1350 | 0 | } else { |
1351 | | /* Pass pointer to buffer returned by reader. */ |
1352 | 0 | d->ip = ip; |
1353 | 0 | } |
1354 | 0 | return true; |
1355 | 0 | } |
1356 | | |
1357 | | static int internal_uncompress(struct source *r, |
1358 | | struct writer *writer, u32 max_len) |
1359 | 0 | { |
1360 | 0 | struct snappy_decompressor decompressor; |
1361 | 0 | u32 uncompressed_len = 0; |
1362 | |
|
1363 | 0 | init_snappy_decompressor(&decompressor, r); |
1364 | |
|
1365 | 0 | if (!read_uncompressed_length(&decompressor, &uncompressed_len)) |
1366 | 0 | return -EIO; |
1367 | | /* Protect against possible DoS attack */ |
1368 | 0 | if ((u64) (uncompressed_len) > max_len) |
1369 | 0 | return -EIO; |
1370 | | |
1371 | 0 | writer_set_expected_length(writer, uncompressed_len); |
1372 | | |
1373 | | /* Process the entire input */ |
1374 | 0 | decompress_all_tags(&decompressor, writer); |
1375 | |
|
1376 | 0 | exit_snappy_decompressor(&decompressor); |
1377 | 0 | if (decompressor.eof && writer_check_length(writer)) |
1378 | 0 | return 0; |
1379 | 0 | return -EIO; |
1380 | 0 | } |
1381 | | |
1382 | | static inline int compress(struct snappy_env *env, struct source *reader, |
1383 | | struct sink *writer) |
1384 | 0 | { |
1385 | 0 | int err; |
1386 | 0 | size_t written = 0; |
1387 | 0 | int N = available(reader); |
1388 | 0 | char ulength[kmax32]; |
1389 | 0 | char *p = varint_encode32(ulength, N); |
1390 | |
|
1391 | 0 | append(writer, ulength, p - ulength); |
1392 | 0 | written += (p - ulength); |
1393 | |
|
1394 | 0 | while (N > 0) { |
1395 | | /* Get next block to compress (without copying if possible) */ |
1396 | 0 | size_t fragment_size; |
1397 | 0 | const char *fragment = peek(reader, &fragment_size); |
1398 | 0 | if (fragment_size == 0) { |
1399 | 0 | err = -EIO; |
1400 | 0 | goto out; |
1401 | 0 | } |
1402 | 0 | const unsigned num_to_read = min_t(int, N, kblock_size); |
1403 | 0 | size_t bytes_read = fragment_size; |
1404 | |
|
1405 | 0 | int pending_advance = 0; |
1406 | 0 | if (bytes_read >= num_to_read) { |
1407 | | /* Buffer returned by reader is large enough */ |
1408 | 0 | pending_advance = num_to_read; |
1409 | 0 | fragment_size = num_to_read; |
1410 | 0 | } |
1411 | 0 | else { |
1412 | 0 | memcpy(env->scratch, fragment, bytes_read); |
1413 | 0 | skip(reader, bytes_read); |
1414 | |
|
1415 | 0 | while (bytes_read < num_to_read) { |
1416 | 0 | fragment = peek(reader, &fragment_size); |
1417 | 0 | size_t n = |
1418 | 0 | min_t(size_t, fragment_size, |
1419 | 0 | num_to_read - bytes_read); |
1420 | 0 | memcpy((char *)(env->scratch) + bytes_read, fragment, n); |
1421 | 0 | bytes_read += n; |
1422 | 0 | skip(reader, n); |
1423 | 0 | } |
1424 | 0 | DCHECK_EQ(bytes_read, num_to_read); |
1425 | 0 | fragment = env->scratch; |
1426 | 0 | fragment_size = num_to_read; |
1427 | 0 | } |
1428 | 0 | if (fragment_size < num_to_read) |
1429 | 0 | return -EIO; |
1430 | | |
1431 | | /* Get encoding table for compression */ |
1432 | 0 | int table_size; |
1433 | 0 | u16 *table = get_hash_table(env, num_to_read, &table_size); |
1434 | | |
1435 | | /* Compress input_fragment and append to dest */ |
1436 | 0 | char *dest; |
1437 | 0 | dest = sink_peek(writer, snappy_max_compressed_length(num_to_read)); |
1438 | 0 | if (!dest) { |
1439 | | /* |
1440 | | * Need a scratch buffer for the output, |
1441 | | * because the byte sink doesn't have enough |
1442 | | * in one piece. |
1443 | | */ |
1444 | 0 | dest = env->scratch_output; |
1445 | 0 | } |
1446 | 0 | char *end = compress_fragment(fragment, fragment_size, |
1447 | 0 | dest, table, table_size); |
1448 | 0 | append(writer, dest, end - dest); |
1449 | 0 | written += (end - dest); |
1450 | |
|
1451 | 0 | N -= num_to_read; |
1452 | 0 | skip(reader, pending_advance); |
1453 | 0 | } |
1454 | | |
1455 | 0 | err = 0; |
1456 | 0 | out: |
1457 | 0 | return err; |
1458 | 0 | } |
1459 | | |
1460 | | #ifdef SG |
1461 | | |
1462 | | int snappy_compress_iov(struct snappy_env *env, |
1463 | | struct iovec *iov_in, |
1464 | | int iov_in_len, |
1465 | | size_t input_length, |
1466 | | struct iovec *iov_out, |
1467 | | int *iov_out_len, |
1468 | | size_t *compressed_length) |
1469 | | { |
1470 | | struct source reader = { |
1471 | | .iov = iov_in, |
1472 | | .iovlen = iov_in_len, |
1473 | | .total = input_length |
1474 | | }; |
1475 | | struct sink writer = { |
1476 | | .iov = iov_out, |
1477 | | .iovlen = *iov_out_len, |
1478 | | }; |
1479 | | int err = compress(env, &reader, &writer); |
1480 | | |
1481 | | *iov_out_len = writer.curvec + 1; |
1482 | | |
1483 | | /* Compute how many bytes were added */ |
1484 | | *compressed_length = writer.written; |
1485 | | return err; |
1486 | | } |
1487 | | EXPORT_SYMBOL(snappy_compress_iov); |
1488 | | |
1489 | | /** |
1490 | | * snappy_compress - Compress a buffer using the snappy compressor. |
1491 | | * @env: Preallocated environment |
1492 | | * @input: Input buffer |
1493 | | * @input_length: Length of input_buffer |
1494 | | * @compressed: Output buffer for compressed data |
1495 | | * @compressed_length: The real length of the output written here. |
1496 | | * |
1497 | | * Return 0 on success, otherwise an negative error code. |
1498 | | * |
1499 | | * The output buffer must be at least |
1500 | | * snappy_max_compressed_length(input_length) bytes long. |
1501 | | * |
1502 | | * Requires a preallocated environment from snappy_init_env. |
1503 | | * The environment does not keep state over individual calls |
1504 | | * of this function, just preallocates the memory. |
1505 | | */ |
1506 | | int snappy_compress(struct snappy_env *env, |
1507 | | const char *input, |
1508 | | size_t input_length, |
1509 | | char *compressed, size_t *compressed_length) |
1510 | | { |
1511 | | struct iovec iov_in = { |
1512 | | .iov_base = (char *)input, |
1513 | | .iov_len = input_length, |
1514 | | }; |
1515 | | struct iovec iov_out = { |
1516 | | .iov_base = compressed, |
1517 | | .iov_len = 0xffffffff, |
1518 | | }; |
1519 | | int out = 1; |
1520 | | return snappy_compress_iov(env, |
1521 | | &iov_in, 1, input_length, |
1522 | | &iov_out, &out, compressed_length); |
1523 | | } |
1524 | | EXPORT_SYMBOL(snappy_compress); |
1525 | | |
1526 | | int snappy_uncompress_iov(struct iovec *iov_in, int iov_in_len, |
1527 | | size_t input_len, char *uncompressed) |
1528 | | { |
1529 | | struct source reader = { |
1530 | | .iov = iov_in, |
1531 | | .iovlen = iov_in_len, |
1532 | | .total = input_len |
1533 | | }; |
1534 | | struct writer output = { |
1535 | | .base = uncompressed, |
1536 | | .op = uncompressed |
1537 | | }; |
1538 | | return internal_uncompress(&reader, &output, 0xffffffff); |
1539 | | } |
1540 | | EXPORT_SYMBOL(snappy_uncompress_iov); |
1541 | | |
1542 | | /** |
1543 | | * snappy_uncompress - Uncompress a snappy compressed buffer |
1544 | | * @compressed: Input buffer with compressed data |
1545 | | * @n: length of compressed buffer |
1546 | | * @uncompressed: buffer for uncompressed data |
1547 | | * |
1548 | | * The uncompressed data buffer must be at least |
1549 | | * snappy_uncompressed_length(compressed) bytes long. |
1550 | | * |
1551 | | * Return 0 on success, otherwise an negative error code. |
1552 | | */ |
1553 | | int snappy_uncompress(const char *compressed, size_t n, char *uncompressed) |
1554 | | { |
1555 | | struct iovec iov = { |
1556 | | .iov_base = (char *)compressed, |
1557 | | .iov_len = n |
1558 | | }; |
1559 | | return snappy_uncompress_iov(&iov, 1, n, uncompressed); |
1560 | | } |
1561 | | EXPORT_SYMBOL(snappy_uncompress); |
1562 | | |
1563 | | #else |
1564 | | /** |
1565 | | * snappy_compress - Compress a buffer using the snappy compressor. |
1566 | | * @env: Preallocated environment |
1567 | | * @input: Input buffer |
1568 | | * @input_length: Length of input_buffer |
1569 | | * @compressed: Output buffer for compressed data |
1570 | | * @compressed_length: The real length of the output written here. |
1571 | | * |
1572 | | * Return 0 on success, otherwise an negative error code. |
1573 | | * |
1574 | | * The output buffer must be at least |
1575 | | * snappy_max_compressed_length(input_length) bytes long. |
1576 | | * |
1577 | | * Requires a preallocated environment from snappy_init_env. |
1578 | | * The environment does not keep state over individual calls |
1579 | | * of this function, just preallocates the memory. |
1580 | | */ |
1581 | | int snappy_compress(struct snappy_env *env, |
1582 | | const char *input, |
1583 | | size_t input_length, |
1584 | | char *compressed, size_t *compressed_length) |
1585 | 0 | { |
1586 | 0 | struct source reader = { |
1587 | 0 | .ptr = input, |
1588 | 0 | .left = input_length |
1589 | 0 | }; |
1590 | 0 | struct sink writer = { |
1591 | 0 | .dest = compressed, |
1592 | 0 | }; |
1593 | 0 | int err = compress(env, &reader, &writer); |
1594 | | |
1595 | | /* Compute how many bytes were added */ |
1596 | 0 | *compressed_length = (writer.dest - compressed); |
1597 | 0 | return err; |
1598 | 0 | } |
1599 | | EXPORT_SYMBOL(snappy_compress); |
1600 | | |
1601 | | /** |
1602 | | * snappy_uncompress - Uncompress a snappy compressed buffer |
1603 | | * @compressed: Input buffer with compressed data |
1604 | | * @n: length of compressed buffer |
1605 | | * @uncompressed: buffer for uncompressed data |
1606 | | * |
1607 | | * The uncompressed data buffer must be at least |
1608 | | * snappy_uncompressed_length(compressed) bytes long. |
1609 | | * |
1610 | | * Return 0 on success, otherwise an negative error code. |
1611 | | */ |
1612 | | int snappy_uncompress(const char *compressed, size_t n, char *uncompressed) |
1613 | 0 | { |
1614 | 0 | struct source reader = { |
1615 | 0 | .ptr = compressed, |
1616 | 0 | .left = n |
1617 | 0 | }; |
1618 | 0 | struct writer output = { |
1619 | 0 | .base = uncompressed, |
1620 | 0 | .op = uncompressed |
1621 | 0 | }; |
1622 | 0 | return internal_uncompress(&reader, &output, 0xffffffff); |
1623 | 0 | } |
1624 | | EXPORT_SYMBOL(snappy_uncompress); |
1625 | | #endif |
1626 | | |
1627 | | static inline void clear_env(struct snappy_env *env) |
1628 | 0 | { |
1629 | 0 | memset(env, 0, sizeof(*env)); |
1630 | 0 | } |
1631 | | |
1632 | | #ifdef SG |
1633 | | /** |
1634 | | * snappy_init_env_sg - Allocate snappy compression environment |
1635 | | * @env: Environment to preallocate |
1636 | | * @sg: Input environment ever does scather gather |
1637 | | * |
1638 | | * If false is passed to sg then multiple entries in an iovec |
1639 | | * are not legal. |
1640 | | * Returns 0 on success, otherwise negative errno. |
1641 | | * Must run in process context. |
1642 | | */ |
1643 | | int snappy_init_env_sg(struct snappy_env *env, bool sg) |
1644 | | { |
1645 | | if (snappy_init_env(env) < 0) |
1646 | | goto error; |
1647 | | |
1648 | | if (sg) { |
1649 | | env->scratch = vmalloc(kblock_size); |
1650 | | if (!env->scratch) |
1651 | | goto error; |
1652 | | env->scratch_output = |
1653 | | vmalloc(snappy_max_compressed_length(kblock_size)); |
1654 | | if (!env->scratch_output) |
1655 | | goto error; |
1656 | | } |
1657 | | return 0; |
1658 | | error: |
1659 | | snappy_free_env(env); |
1660 | | return -ENOMEM; |
1661 | | } |
1662 | | EXPORT_SYMBOL(snappy_init_env_sg); |
1663 | | #endif |
1664 | | |
1665 | | /** |
1666 | | * snappy_init_env - Allocate snappy compression environment |
1667 | | * @env: Environment to preallocate |
1668 | | * |
1669 | | * Passing multiple entries in an iovec is not allowed |
1670 | | * on the environment allocated here. |
1671 | | * Returns 0 on success, otherwise negative errno. |
1672 | | * Must run in process context. |
1673 | | */ |
1674 | | int snappy_init_env(struct snappy_env *env) |
1675 | 0 | { |
1676 | 0 | clear_env(env); |
1677 | 0 | env->hash_table = vmalloc(sizeof(u16) * kmax_hash_table_size); |
1678 | 0 | if (!env->hash_table) |
1679 | 0 | return -ENOMEM; |
1680 | 0 | return 0; |
1681 | 0 | } |
1682 | | EXPORT_SYMBOL(snappy_init_env); |
1683 | | |
1684 | | /** |
1685 | | * snappy_free_env - Free an snappy compression environment |
1686 | | * @env: Environment to free. |
1687 | | * |
1688 | | * Must run in process context. |
1689 | | */ |
1690 | | void snappy_free_env(struct snappy_env *env) |
1691 | 0 | { |
1692 | 0 | vfree(env->hash_table); |
1693 | | #ifdef SG |
1694 | | vfree(env->scratch); |
1695 | | vfree(env->scratch_output); |
1696 | | #endif |
1697 | 0 | clear_env(env); |
1698 | 0 | } |
1699 | | EXPORT_SYMBOL(snappy_free_env); |