Coverage Report

Created: 2023-03-26 06:05

/src/fluent-bit/lib/snappy-fef67ac/snappy.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * C port of the snappy compressor from Google.
3
 * This is a very fast compressor with comparable compression to lzo.
4
 * Works best on 64bit little-endian, but should be good on others too.
5
 * Ported by Andi Kleen.
6
 * Uptodate with snappy 1.1.0
7
 */
8
9
/*
10
 * Copyright 2005 Google Inc. All Rights Reserved.
11
 *
12
 * Redistribution and use in source and binary forms, with or without
13
 * modification, are permitted provided that the following conditions are
14
 * met:
15
 *
16
 *     * Redistributions of source code must retain the above copyright
17
 * notice, this list of conditions and the following disclaimer.
18
 *     * Redistributions in binary form must reproduce the above
19
 * copyright notice, this list of conditions and the following disclaimer
20
 * in the documentation and/or other materials provided with the
21
 * distribution.
22
 *     * Neither the name of Google Inc. nor the names of its
23
 * contributors may be used to endorse or promote products derived from
24
 * this software without specific prior written permission.
25
 *
26
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
 */
38
39
#ifdef __KERNEL__
40
#include <linux/kernel.h>
41
#ifdef SG
42
#include <linux/uio.h>
43
#endif
44
#include <linux/module.h>
45
#include <linux/slab.h>
46
#include <linux/string.h>
47
#include <linux/snappy.h>
48
#include <linux/vmalloc.h>
49
#include <asm/unaligned.h>
50
#else
51
#include "snappy.h"
52
#include "compat.h"
53
#endif
54
55
static inline void put_unaligned16(u16 v, u16 *x)
56
0
{
57
0
  memcpy(x, &v, sizeof(u16));
58
0
}
59
60
static inline u16 get_unaligned16(u16 *x)
61
0
{
62
0
  u16 _ret;
63
0
  memcpy(&_ret, x, sizeof(u16));
64
0
  return _ret;
65
0
}
66
67
static inline void put_unaligned32(u32 v, u32 *x)
68
0
{
69
0
  memcpy(x, &v, sizeof(u32));
70
0
}
71
72
static inline u32 get_unaligned32(u32 *x)
73
0
{
74
0
  u32 _ret;
75
0
  memcpy(&_ret, x, sizeof(u32));
76
0
  return _ret;
77
0
}
78
79
static inline void put_unaligned64(u64 v, u64 *x)
80
0
{
81
0
  memcpy(x, &v, sizeof(u64));
82
0
}
83
84
static inline u64 get_unaligned64(u64 *x)
85
0
{
86
0
  u64 _ret;
87
0
  memcpy(&_ret, x, sizeof(u64));
88
0
  return _ret;
89
0
}
90
91
0
#define get_unaligned_le32(x) (le32toh(get_unaligned32((u32 *)(x))))
92
0
#define put_unaligned_le16(v,x) (put_unaligned16(htole16(v), (u16 *)(x)))
93
94
0
#define CRASH_UNLESS(x) BUG_ON(!(x))
95
0
#define CHECK(cond) CRASH_UNLESS(cond)
96
0
#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b))
97
0
#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b))
98
0
#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b))
99
#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
100
0
#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
101
0
#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
102
103
#define UNALIGNED_LOAD16(_p) get_unaligned16((u16 *)(_p))
104
0
#define UNALIGNED_LOAD32(_p) get_unaligned32((u32 *)(_p))
105
0
#define UNALIGNED_LOAD64(_p) get_unaligned64((u64 *)(_p))
106
107
#define UNALIGNED_STORE16(_p, _val) put_unaligned16(_val, (u16 *)(_p))
108
0
#define UNALIGNED_STORE32(_p, _val) put_unaligned32(_val, (u32 *)(_p))
109
0
#define UNALIGNED_STORE64(_p, _val) put_unaligned64(_val, (u64 *)(_p))
110
111
/*
112
 * This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
113
 * on some platforms, in particular ARM.
114
 */
115
static inline void unaligned_copy64(const void *src, void *dst)
116
0
{
117
0
  if (sizeof(void *) == 8) {
118
0
    UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
119
0
  } else {
120
0
    const char *src_char = (const char *)(src);
121
0
    char *dst_char = (char *)(dst);
122
123
0
    UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
124
0
    UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
125
0
  }
126
0
}
127
128
#ifdef NDEBUG
129
130
#define DCHECK(cond) do {} while(0)
131
#define DCHECK_LE(a, b) do {} while(0)
132
#define DCHECK_GE(a, b) do {} while(0)
133
#define DCHECK_EQ(a, b) do {} while(0)
134
#define DCHECK_NE(a, b) do {} while(0)
135
#define DCHECK_LT(a, b) do {} while(0)
136
#define DCHECK_GT(a, b) do {} while(0)
137
138
#else
139
140
0
#define DCHECK(cond) CHECK(cond)
141
0
#define DCHECK_LE(a, b) CHECK_LE(a, b)
142
0
#define DCHECK_GE(a, b) CHECK_GE(a, b)
143
0
#define DCHECK_EQ(a, b) CHECK_EQ(a, b)
144
#define DCHECK_NE(a, b) CHECK_NE(a, b)
145
0
#define DCHECK_LT(a, b) CHECK_LT(a, b)
146
0
#define DCHECK_GT(a, b) CHECK_GT(a, b)
147
148
#endif
149
150
/* This snipped is based on the following file
151
 * https://github.com/edenhill/librdkafka/blob/7230a1aa327b55ace54579d019e9484af96886c6/src/snappy.c
152
 */
153
#if !defined(__WIN32__)
154
0
# define snappy_clz(n)   __builtin_clz(n)
155
# define snappy_ctz(n)   __builtin_ctz(n)
156
0
# define snappy_ctz64(n) __builtin_ctzll(n)
157
#else
158
#include <intrin.h>
159
static int inline snappy_clz(u32 x) {
160
  int r = 0;
161
  if (_BitScanForward(&r, x))
162
    return 31 - r;
163
  else
164
    return 32;
165
}
166
167
static int inline snappy_ctz(u32 x) {
168
  int r = 0;
169
  if (_BitScanForward(&r, x))
170
    return r;
171
  else
172
    return 32;
173
}
174
175
static int inline snappy_ctz64(u64 x) {
176
#ifdef _M_X64
177
  int r = 0;
178
  if (_BitScanReverse64(&r, x))
179
    return r;
180
  else
181
    return 64;
182
#else
183
  int r;
184
  if ((r = snappy_ctz(x & 0xffffffff)) < 32)
185
    return r;
186
  return 32 + snappy_ctz(x >> 32);
187
#endif
188
}
189
#endif
190
191
static inline bool is_little_endian(void)
192
0
{
193
0
#ifdef __LITTLE_ENDIAN__
194
0
  return true;
195
0
#endif
196
0
  return false;
197
0
}
198
199
static inline int log2_floor(u32 n)
200
0
{
201
0
  return n == 0 ? -1 : 31 ^ snappy_clz(n);
202
0
}
203
204
static inline int find_lsb_set_non_zero(u32 n)
205
0
{
206
0
  return snappy_ctz(n);
207
0
}
208
209
static inline int find_lsb_set_non_zero64(u64 n)
210
0
{
211
0
  return snappy_ctz64(n);
212
0
}
213
214
#define kmax32 5
215
216
/*
217
 * Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
218
 *  Never reads a character at or beyond limit.  If a valid/terminated varint32
219
 * was found in the range, stores it in *OUTPUT and returns a pointer just
220
 * past the last byte of the varint32. Else returns NULL.  On success,
221
 * "result <= limit".
222
 */
223
static inline const char *varint_parse32_with_limit(const char *p,
224
                const char *l,
225
                u32 * OUTPUT)
226
0
{
227
0
  const unsigned char *ptr = (const unsigned char *)(p);
228
0
  const unsigned char *limit = (const unsigned char *)(l);
229
0
  u32 b, result;
230
231
0
  if (ptr >= limit)
232
0
    return NULL;
233
0
  b = *(ptr++);
234
0
  result = b & 127;
235
0
  if (b < 128)
236
0
    goto done;
237
0
  if (ptr >= limit)
238
0
    return NULL;
239
0
  b = *(ptr++);
240
0
  result |= (b & 127) << 7;
241
0
  if (b < 128)
242
0
    goto done;
243
0
  if (ptr >= limit)
244
0
    return NULL;
245
0
  b = *(ptr++);
246
0
  result |= (b & 127) << 14;
247
0
  if (b < 128)
248
0
    goto done;
249
0
  if (ptr >= limit)
250
0
    return NULL;
251
0
  b = *(ptr++);
252
0
  result |= (b & 127) << 21;
253
0
  if (b < 128)
254
0
    goto done;
255
0
  if (ptr >= limit)
256
0
    return NULL;
257
0
  b = *(ptr++);
258
0
  result |= (b & 127) << 28;
259
0
  if (b < 16)
260
0
    goto done;
261
0
  return NULL;   /* Value is too long to be a varint32 */
262
0
done:
263
0
  *OUTPUT = result;
264
0
  return (const char *)(ptr);
265
0
}
266
267
/*
268
 * REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
269
 *  EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
270
 *            byte just past the last encoded byte.
271
 */
272
static inline char *varint_encode32(char *sptr, u32 v)
273
0
{
274
  /* Operate on characters as unsigneds */
275
0
  unsigned char *ptr = (unsigned char *)(sptr);
276
0
  static const int B = 128;
277
278
0
  if (v < (1 << 7)) {
279
0
    *(ptr++) = v;
280
0
  } else if (v < (1 << 14)) {
281
0
    *(ptr++) = v | B;
282
0
    *(ptr++) = v >> 7;
283
0
  } else if (v < (1 << 21)) {
284
0
    *(ptr++) = v | B;
285
0
    *(ptr++) = (v >> 7) | B;
286
0
    *(ptr++) = v >> 14;
287
0
  } else if (v < (1 << 28)) {
288
0
    *(ptr++) = v | B;
289
0
    *(ptr++) = (v >> 7) | B;
290
0
    *(ptr++) = (v >> 14) | B;
291
0
    *(ptr++) = v >> 21;
292
0
  } else {
293
0
    *(ptr++) = v | B;
294
0
    *(ptr++) = (v >> 7) | B;
295
0
    *(ptr++) = (v >> 14) | B;
296
0
    *(ptr++) = (v >> 21) | B;
297
0
    *(ptr++) = v >> 28;
298
0
  }
299
0
  return (char *)(ptr);
300
0
}
301
302
#ifdef SG
303
304
static inline void *n_bytes_after_addr(void *addr, size_t n_bytes)
305
{
306
    return (void *) ((char *)addr + n_bytes);
307
}
308
309
struct source {
310
  struct iovec *iov;
311
  int iovlen;
312
  int curvec;
313
  int curoff;
314
  size_t total;
315
};
316
317
/* Only valid at beginning when nothing is consumed */
318
static inline int available(struct source *s)
319
{
320
  return s->total;
321
}
322
323
static inline const char *peek(struct source *s, size_t *len)
324
{
325
  if (likely(s->curvec < s->iovlen)) {
326
    struct iovec *iv = &s->iov[s->curvec];
327
    if (s->curoff < iv->iov_len) { 
328
      *len = iv->iov_len - s->curoff;
329
      return n_bytes_after_addr(iv->iov_base, s->curoff);
330
    }
331
  }
332
  *len = 0;
333
  return NULL;
334
}
335
336
static inline void skip(struct source *s, size_t n)
337
{
338
  struct iovec *iv = &s->iov[s->curvec];
339
  s->curoff += n;
340
  DCHECK_LE(s->curoff, iv->iov_len);
341
  if (s->curoff >= iv->iov_len && s->curvec + 1 < s->iovlen) {
342
    s->curoff = 0;
343
    s->curvec++;
344
  }
345
}
346
347
struct sink {
348
  struct iovec *iov;
349
  int iovlen;
350
  unsigned curvec;
351
  unsigned curoff;
352
  unsigned written;
353
};
354
355
static inline void append(struct sink *s, const char *data, size_t n)
356
{
357
  struct iovec *iov = &s->iov[s->curvec];
358
  char *dst = n_bytes_after_addr(iov->iov_base, s->curoff);
359
  size_t nlen = min_t(size_t, iov->iov_len - s->curoff, n);
360
  if (data != dst)
361
    memcpy(dst, data, nlen);
362
  s->written += n;
363
  s->curoff += nlen;
364
  while ((n -= nlen) > 0) {
365
    data += nlen;
366
    s->curvec++;
367
    DCHECK_LT(s->curvec, s->iovlen);
368
    iov++;
369
    nlen = min_t(size_t, iov->iov_len, n);
370
    memcpy(iov->iov_base, data, nlen);
371
    s->curoff = nlen;
372
  }
373
}
374
375
static inline void *sink_peek(struct sink *s, size_t n)
376
{
377
  struct iovec *iov = &s->iov[s->curvec];
378
  if (s->curvec < iov->iov_len && iov->iov_len - s->curoff >= n)
379
    return n_bytes_after_addr(iov->iov_base, s->curoff);
380
  return NULL;
381
}
382
383
#else
384
385
struct source {
386
  const char *ptr;
387
  size_t left;
388
};
389
390
static inline int available(struct source *s)
391
0
{
392
0
  return s->left;
393
0
}
394
395
static inline const char *peek(struct source *s, size_t * len)
396
0
{
397
0
  *len = s->left;
398
0
  return s->ptr;
399
0
}
400
401
static inline void skip(struct source *s, size_t n)
402
0
{
403
0
  s->left -= n;
404
0
  s->ptr += n;
405
0
}
406
407
struct sink {
408
  char *dest;
409
};
410
411
static inline void append(struct sink *s, const char *data, size_t n)
412
0
{
413
0
  if (data != s->dest)
414
0
    memcpy(s->dest, data, n);
415
0
  s->dest += n;
416
0
}
417
418
0
#define sink_peek(s, n) sink_peek_no_sg(s)
419
420
static inline void *sink_peek_no_sg(const struct sink *s)
421
0
{
422
0
  return s->dest;
423
0
}
424
425
#endif
426
427
struct writer {
428
  char *base;
429
  char *op;
430
  char *op_limit;
431
};
432
433
/* Called before decompression */
434
static inline void writer_set_expected_length(struct writer *w, size_t len)
435
0
{
436
0
  w->op_limit = w->op + len;
437
0
}
438
439
/* Called after decompression */
440
static inline bool writer_check_length(struct writer *w)
441
0
{
442
0
  return w->op == w->op_limit;
443
0
}
444
445
/*
446
 * Copy "len" bytes from "src" to "op", one byte at a time.  Used for
447
 *  handling COPY operations where the input and output regions may
448
 * overlap.  For example, suppose:
449
 *    src    == "ab"
450
 *    op     == src + 2
451
 *    len    == 20
452
 * After IncrementalCopy(src, op, len), the result will have
453
 * eleven copies of "ab"
454
 *    ababababababababababab
455
 * Note that this does not match the semantics of either memcpy()
456
 * or memmove().
457
 */
458
static inline void incremental_copy(const char *src, char *op, ssize_t len)
459
0
{
460
0
  DCHECK_GT(len, 0);
461
0
  do {
462
0
    *op++ = *src++;
463
0
  } while (--len > 0);
464
0
}
465
466
/*
467
 * Equivalent to IncrementalCopy except that it can write up to ten extra
468
 *  bytes after the end of the copy, and that it is faster.
469
 *
470
 * The main part of this loop is a simple copy of eight bytes at a time until
471
 * we've copied (at least) the requested amount of bytes.  However, if op and
472
 * src are less than eight bytes apart (indicating a repeating pattern of
473
 * length < 8), we first need to expand the pattern in order to get the correct
474
 * results. For instance, if the buffer looks like this, with the eight-byte
475
 * <src> and <op> patterns marked as intervals:
476
 *
477
 *    abxxxxxxxxxxxx
478
 *    [------]           src
479
 *      [------]         op
480
 *
481
 * a single eight-byte copy from <src> to <op> will repeat the pattern once,
482
 * after which we can move <op> two bytes without moving <src>:
483
 *
484
 *    ababxxxxxxxxxx
485
 *    [------]           src
486
 *        [------]       op
487
 *
488
 * and repeat the exercise until the two no longer overlap.
489
 *
490
 * This allows us to do very well in the special case of one single byte
491
 * repeated many times, without taking a big hit for more general cases.
492
 *
493
 * The worst case of extra writing past the end of the match occurs when
494
 * op - src == 1 and len == 1; the last copy will read from byte positions
495
 * [0..7] and write to [4..11], whereas it was only supposed to write to
496
 * position 1. Thus, ten excess bytes.
497
 */
498
499
0
#define kmax_increment_copy_overflow  10
500
501
static inline void incremental_copy_fast_path(const char *src, char *op,
502
                ssize_t len)
503
0
{
504
0
  while (op - src < 8) {
505
0
    unaligned_copy64(src, op);
506
0
    len -= op - src;
507
0
    op += op - src;
508
0
  }
509
0
  while (len > 0) {
510
0
    unaligned_copy64(src, op);
511
0
    src += 8;
512
0
    op += 8;
513
0
    len -= 8;
514
0
  }
515
0
}
516
517
static inline bool writer_append_from_self(struct writer *w, u32 offset,
518
             u32 len)
519
0
{
520
0
  char *const op = w->op;
521
0
  CHECK_LE(op, w->op_limit);
522
0
  const u32 space_left = w->op_limit - op;
523
524
0
  if (op - w->base <= offset - 1u) /* -1u catches offset==0 */
525
0
    return false;
526
0
  if (len <= 16 && offset >= 8 && space_left >= 16) {
527
    /* Fast path, used for the majority (70-80%) of dynamic
528
     * invocations. */
529
0
    unaligned_copy64(op - offset, op);
530
0
    unaligned_copy64(op - offset + 8, op + 8);
531
0
  } else {
532
0
    if (space_left >= len + kmax_increment_copy_overflow) {
533
0
      incremental_copy_fast_path(op - offset, op, len);
534
0
    } else {
535
0
      if (space_left < len) {
536
0
        return false;
537
0
      }
538
0
      incremental_copy(op - offset, op, len);
539
0
    }
540
0
  }
541
542
0
  w->op = op + len;
543
0
  return true;
544
0
}
545
546
static inline bool writer_append(struct writer *w, const char *ip, u32 len)
547
0
{
548
0
  char *const op = w->op;
549
0
  CHECK_LE(op, w->op_limit);
550
0
  const u32 space_left = w->op_limit - op;
551
0
  if (space_left < len)
552
0
    return false;
553
0
  memcpy(op, ip, len);
554
0
  w->op = op + len;
555
0
  return true;
556
0
}
557
558
static inline bool writer_try_fast_append(struct writer *w, const char *ip, 
559
            u32 available_bytes, u32 len)
560
0
{
561
0
  char *const op = w->op;
562
0
  const int space_left = w->op_limit - op;
563
0
  if (len <= 16 && available_bytes >= 16 && space_left >= 16) {
564
    /* Fast path, used for the majority (~95%) of invocations */
565
0
    unaligned_copy64(ip, op);
566
0
    unaligned_copy64(ip + 8, op + 8);
567
0
    w->op = op + len;
568
0
    return true;
569
0
  }
570
0
  return false;
571
0
}
572
573
/*
574
 * Any hash function will produce a valid compressed bitstream, but a good
575
 * hash function reduces the number of collisions and thus yields better
576
 * compression for compressible input, and more speed for incompressible
577
 * input. Of course, it doesn't hurt if the hash function is reasonably fast
578
 * either, as it gets called a lot.
579
 */
580
static inline u32 hash_bytes(u32 bytes, int shift)
581
0
{
582
0
  u32 kmul = 0x1e35a7bd;
583
0
  return (bytes * kmul) >> shift;
584
0
}
585
586
static inline u32 hash(const char *p, int shift)
587
0
{
588
0
  return hash_bytes(UNALIGNED_LOAD32(p), shift);
589
0
}
590
591
/*
592
 * Compressed data can be defined as:
593
 *    compressed := item* literal*
594
 *    item       := literal* copy
595
 *
596
 * The trailing literal sequence has a space blowup of at most 62/60
597
 * since a literal of length 60 needs one tag byte + one extra byte
598
 * for length information.
599
 *
600
 * Item blowup is trickier to measure.  Suppose the "copy" op copies
601
 * 4 bytes of data.  Because of a special check in the encoding code,
602
 * we produce a 4-byte copy only if the offset is < 65536.  Therefore
603
 * the copy op takes 3 bytes to encode, and this type of item leads
604
 * to at most the 62/60 blowup for representing literals.
605
 *
606
 * Suppose the "copy" op copies 5 bytes of data.  If the offset is big
607
 * enough, it will take 5 bytes to encode the copy op.  Therefore the
608
 * worst case here is a one-byte literal followed by a five-byte copy.
609
 * I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
610
 *
611
 * This last factor dominates the blowup, so the final estimate is:
612
 */
613
size_t snappy_max_compressed_length(size_t source_len)
614
0
{
615
0
  return 32 + source_len + source_len / 6;
616
0
}
617
EXPORT_SYMBOL(snappy_max_compressed_length);
618
619
enum {
620
  LITERAL = 0,
621
  COPY_1_BYTE_OFFSET = 1, /* 3 bit length + 3 bits of offset in opcode */
622
  COPY_2_BYTE_OFFSET = 2,
623
  COPY_4_BYTE_OFFSET = 3
624
};
625
626
static inline char *emit_literal(char *op,
627
         const char *literal,
628
         int len, bool allow_fast_path)
629
0
{
630
0
  int n = len - 1;  /* Zero-length literals are disallowed */
631
632
0
  if (n < 60) {
633
    /* Fits in tag byte */
634
0
    *op++ = LITERAL | (n << 2);
635
636
/*
637
 * The vast majority of copies are below 16 bytes, for which a
638
 * call to memcpy is overkill. This fast path can sometimes
639
 * copy up to 15 bytes too much, but that is okay in the
640
 * main loop, since we have a bit to go on for both sides:
641
 *
642
 *   - The input will always have kInputMarginBytes = 15 extra
643
 *     available bytes, as long as we're in the main loop, and
644
 *     if not, allow_fast_path = false.
645
 *   - The output will always have 32 spare bytes (see
646
 *     MaxCompressedLength).
647
 */
648
0
    if (allow_fast_path && len <= 16) {
649
0
      unaligned_copy64(literal, op);
650
0
      unaligned_copy64(literal + 8, op + 8);
651
0
      return op + len;
652
0
    }
653
0
  } else {
654
    /* Encode in upcoming bytes */
655
0
    char *base = op;
656
0
    int count = 0;
657
0
    op++;
658
0
    while (n > 0) {
659
0
      *op++ = n & 0xff;
660
0
      n >>= 8;
661
0
      count++;
662
0
    }
663
0
    DCHECK(count >= 1);
664
0
    DCHECK(count <= 4);
665
0
    *base = LITERAL | ((59 + count) << 2);
666
0
  }
667
0
  memcpy(op, literal, len);
668
0
  return op + len;
669
0
}
670
671
static inline char *emit_copy_less_than64(char *op, int offset, int len)
672
0
{
673
0
  DCHECK_LE(len, 64);
674
0
  DCHECK_GE(len, 4);
675
0
  DCHECK_LT(offset, 65536);
676
677
0
  if ((len < 12) && (offset < 2048)) {
678
0
    int len_minus_4 = len - 4;
679
0
    DCHECK(len_minus_4 < 8);  /* Must fit in 3 bits */
680
0
    *op++ =
681
0
        COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8)
682
0
                 << 5);
683
0
    *op++ = offset & 0xff;
684
0
  } else {
685
0
    *op++ = COPY_2_BYTE_OFFSET + ((len - 1) << 2);
686
0
    put_unaligned_le16(offset, op);
687
0
    op += 2;
688
0
  }
689
0
  return op;
690
0
}
691
692
static inline char *emit_copy(char *op, int offset, int len)
693
0
{
694
  /*
695
   * Emit 64 byte copies but make sure to keep at least four bytes
696
   * reserved
697
   */
698
0
  while (len >= 68) {
699
0
    op = emit_copy_less_than64(op, offset, 64);
700
0
    len -= 64;
701
0
  }
702
703
  /*
704
   * Emit an extra 60 byte copy if have too much data to fit in
705
   * one copy
706
   */
707
0
  if (len > 64) {
708
0
    op = emit_copy_less_than64(op, offset, 60);
709
0
    len -= 60;
710
0
  }
711
712
  /* Emit remainder */
713
0
  op = emit_copy_less_than64(op, offset, len);
714
0
  return op;
715
0
}
716
717
/**
718
 * snappy_uncompressed_length - return length of uncompressed output.
719
 * @start: compressed buffer
720
 * @n: length of compressed buffer.
721
 * @result: Write the length of the uncompressed output here.
722
 *
723
 * Returns true when successfull, otherwise false.
724
 */
725
bool snappy_uncompressed_length(const char *start, size_t n, size_t * result)
726
0
{
727
0
  u32 v = 0;
728
0
  const char *limit = start + n;
729
0
  if (varint_parse32_with_limit(start, limit, &v) != NULL) {
730
0
    *result = v;
731
0
    return true;
732
0
  } else {
733
0
    return false;
734
0
  }
735
0
}
736
EXPORT_SYMBOL(snappy_uncompressed_length);
737
738
/*
739
 * The size of a compression block. Note that many parts of the compression
740
 * code assumes that kBlockSize <= 65536; in particular, the hash table
741
 * can only store 16-bit offsets, and EmitCopy() also assumes the offset
742
 * is 65535 bytes or less. Note also that if you change this, it will
743
 * affect the framing format
744
 * Note that there might be older data around that is compressed with larger
745
 * block sizes, so the decompression code should not rely on the
746
 * non-existence of long backreferences.
747
 */
748
#define kblock_log 16
749
#define kblock_size (1 << kblock_log)
750
751
/* 
752
 * This value could be halfed or quartered to save memory 
753
 * at the cost of slightly worse compression.
754
 */
755
0
#define kmax_hash_table_bits 14
756
0
#define kmax_hash_table_size (1U << kmax_hash_table_bits)
757
758
/*
759
 * Use smaller hash table when input.size() is smaller, since we
760
 * fill the table, incurring O(hash table size) overhead for
761
 * compression, and if the input is short, we won't need that
762
 * many hash table entries anyway.
763
 */
764
static u16 *get_hash_table(struct snappy_env *env, size_t input_size,
765
            int *table_size)
766
0
{
767
0
  unsigned htsize = 256;
768
769
0
  DCHECK(kmax_hash_table_size >= 256);
770
0
  while (htsize < kmax_hash_table_size && htsize < input_size)
771
0
    htsize <<= 1;
772
0
  CHECK_EQ(0, htsize & (htsize - 1));
773
0
  CHECK_LE(htsize, kmax_hash_table_size);
774
775
0
  u16 *table;
776
0
  table = env->hash_table;
777
778
0
  *table_size = htsize;
779
0
  memset(table, 0, htsize * sizeof(*table));
780
0
  return table;
781
0
}
782
783
/*
784
 * Return the largest n such that
785
 *
786
 *   s1[0,n-1] == s2[0,n-1]
787
 *   and n <= (s2_limit - s2).
788
 *
789
 * Does not read *s2_limit or beyond.
790
 * Does not read *(s1 + (s2_limit - s2)) or beyond.
791
 * Requires that s2_limit >= s2.
792
 *
793
 * Separate implementation for x86_64, for speed.  Uses the fact that
794
 * x86_64 is little endian.
795
 */
796
#if defined(__LITTLE_ENDIAN__) && BITS_PER_LONG == 64
797
static inline int find_match_length(const char *s1,
798
            const char *s2, const char *s2_limit)
799
0
{
800
0
  int matched = 0;
801
802
0
  DCHECK_GE(s2_limit, s2);
803
  /*
804
   * Find out how long the match is. We loop over the data 64 bits at a
805
   * time until we find a 64-bit block that doesn't match; then we find
806
   * the first non-matching bit and use that to calculate the total
807
   * length of the match.
808
   */
809
0
  while (likely(s2 <= s2_limit - 8)) {
810
0
    if (unlikely
811
0
        (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) {
812
0
      s2 += 8;
813
0
      matched += 8;
814
0
    } else {
815
      /*
816
       * On current (mid-2008) Opteron models there
817
       * is a 3% more efficient code sequence to
818
       * find the first non-matching byte.  However,
819
       * what follows is ~10% better on Intel Core 2
820
       * and newer, and we expect AMD's bsf
821
       * instruction to improve.
822
       */
823
0
      u64 x =
824
0
          UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 +
825
0
                    matched);
826
0
      int matching_bits = find_lsb_set_non_zero64(x);
827
0
      matched += matching_bits >> 3;
828
0
      return matched;
829
0
    }
830
0
  }
831
0
  while (likely(s2 < s2_limit)) {
832
0
    if (likely(s1[matched] == *s2)) {
833
0
      ++s2;
834
0
      ++matched;
835
0
    } else {
836
0
      return matched;
837
0
    }
838
0
  }
839
0
  return matched;
840
0
}
841
#else
842
static inline int find_match_length(const char *s1,
843
            const char *s2, const char *s2_limit)
844
{
845
  /* Implementation based on the x86-64 version, above. */
846
  DCHECK_GE(s2_limit, s2);
847
  int matched = 0;
848
849
  while (s2 <= s2_limit - 4 &&
850
         UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
851
    s2 += 4;
852
    matched += 4;
853
  }
854
  if (is_little_endian() && s2 <= s2_limit - 4) {
855
    u32 x =
856
        UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
857
    int matching_bits = find_lsb_set_non_zero(x);
858
    matched += matching_bits >> 3;
859
  } else {
860
    while ((s2 < s2_limit) && (s1[matched] == *s2)) {
861
      ++s2;
862
      ++matched;
863
    }
864
  }
865
  return matched;
866
}
867
#endif
868
869
/*
870
 * For 0 <= offset <= 4, GetU32AtOffset(GetEightBytesAt(p), offset) will
871
 *  equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
872
 * empirically found that overlapping loads such as
873
 *  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
874
 * are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to u32.
875
 *
876
 * We have different versions for 64- and 32-bit; ideally we would avoid the
877
 * two functions and just inline the UNALIGNED_LOAD64 call into
878
 * GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
879
 * enough to avoid loading the value multiple times then. For 64-bit, the load
880
 * is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
881
 * done at GetUint32AtOffset() time.
882
 */
883
884
#if BITS_PER_LONG == 64
885
886
typedef u64 eight_bytes_reference;
887
888
static inline eight_bytes_reference get_eight_bytes_at(const char* ptr)
889
0
{
890
0
  return UNALIGNED_LOAD64(ptr);
891
0
}
892
893
static inline u32 get_u32_at_offset(u64 v, int offset)
894
0
{
895
0
  DCHECK_GE(offset, 0);
896
0
  DCHECK_LE(offset, 4);
897
0
  return v >> (is_little_endian()? 8 * offset : 32 - 8 * offset);
898
0
}
899
900
#else
901
902
typedef const char *eight_bytes_reference;
903
904
static inline eight_bytes_reference get_eight_bytes_at(const char* ptr) 
905
{
906
  return ptr;
907
}
908
909
static inline u32 get_u32_at_offset(const char *v, int offset) 
910
{
911
  DCHECK_GE(offset, 0);
912
  DCHECK_LE(offset, 4);
913
  return UNALIGNED_LOAD32(v + offset);
914
}
915
#endif
916
917
/*
918
 * Flat array compression that does not emit the "uncompressed length"
919
 *  prefix. Compresses "input" string to the "*op" buffer.
920
 *
921
 * REQUIRES: "input" is at most "kBlockSize" bytes long.
922
 * REQUIRES: "op" points to an array of memory that is at least
923
 * "MaxCompressedLength(input.size())" in size.
924
 * REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
925
 * REQUIRES: "table_size" is a power of two
926
 *
927
 * Returns an "end" pointer into "op" buffer.
928
 * "end - op" is the compressed size of "input".
929
 */
930
931
static char *compress_fragment(const char *const input,
932
             const size_t input_size,
933
             char *op, u16 * table, const unsigned table_size)
934
0
{
935
  /* "ip" is the input pointer, and "op" is the output pointer. */
936
0
  const char *ip = input;
937
0
  CHECK_LE(input_size, kblock_size);
938
0
  CHECK_EQ(table_size & (table_size - 1), 0);
939
0
  const int shift = 32 - log2_floor(table_size);
940
0
  DCHECK_EQ(UINT_MAX >> shift, table_size - 1);
941
0
  const char *ip_end = input + input_size;
942
0
  const char *baseip = ip;
943
  /*
944
   * Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
945
   *  [next_emit, ip_end) after the main loop.
946
   */
947
0
  const char *next_emit = ip;
948
949
0
  const unsigned kinput_margin_bytes = 15;
950
951
0
  if (likely(input_size >= kinput_margin_bytes)) {
952
0
    const char *const ip_limit = input + input_size -
953
0
      kinput_margin_bytes;
954
955
0
    u32 next_hash;
956
0
    for (next_hash = hash(++ip, shift);;) {
957
0
      DCHECK_LT(next_emit, ip);
958
/*
959
 * The body of this loop calls EmitLiteral once and then EmitCopy one or
960
 * more times.  (The exception is that when we're close to exhausting
961
 * the input we goto emit_remainder.)
962
 *
963
 * In the first iteration of this loop we're just starting, so
964
 * there's nothing to copy, so calling EmitLiteral once is
965
 * necessary.  And we only start a new iteration when the
966
 * current iteration has determined that a call to EmitLiteral will
967
 * precede the next call to EmitCopy (if any).
968
 *
969
 * Step 1: Scan forward in the input looking for a 4-byte-long match.
970
 * If we get close to exhausting the input then goto emit_remainder.
971
 *
972
 * Heuristic match skipping: If 32 bytes are scanned with no matches
973
 * found, start looking only at every other byte. If 32 more bytes are
974
 * scanned, look at every third byte, etc.. When a match is found,
975
 * immediately go back to looking at every byte. This is a small loss
976
 * (~5% performance, ~0.1% density) for lcompressible data due to more
977
 * bookkeeping, but for non-compressible data (such as JPEG) it's a huge
978
 * win since the compressor quickly "realizes" the data is incompressible
979
 * and doesn't bother looking for matches everywhere.
980
 *
981
 * The "skip" variable keeps track of how many bytes there are since the
982
 * last match; dividing it by 32 (ie. right-shifting by five) gives the
983
 * number of bytes to move ahead for each iteration.
984
 */
985
0
      u32 skip_bytes = 32;
986
987
0
      const char *next_ip = ip;
988
0
      const char *candidate;
989
0
      do {
990
0
        ip = next_ip;
991
0
        u32 hval = next_hash;
992
0
        DCHECK_EQ(hval, hash(ip, shift));
993
0
        u32 bytes_between_hash_lookups = skip_bytes++ >> 5;
994
0
        next_ip = ip + bytes_between_hash_lookups;
995
0
        if (unlikely(next_ip > ip_limit)) {
996
0
          goto emit_remainder;
997
0
        }
998
0
        next_hash = hash(next_ip, shift);
999
0
        candidate = baseip + table[hval];
1000
0
        DCHECK_GE(candidate, baseip);
1001
0
        DCHECK_LT(candidate, ip);
1002
1003
0
        table[hval] = ip - baseip;
1004
0
      } while (likely(UNALIGNED_LOAD32(ip) !=
1005
0
          UNALIGNED_LOAD32(candidate)));
1006
1007
/*
1008
 * Step 2: A 4-byte match has been found.  We'll later see if more
1009
 * than 4 bytes match.  But, prior to the match, input
1010
 * bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
1011
 */
1012
0
      DCHECK_LE(next_emit + 16, ip_end);
1013
0
      op = emit_literal(op, next_emit, ip - next_emit, true);
1014
1015
/*
1016
 * Step 3: Call EmitCopy, and then see if another EmitCopy could
1017
 * be our next move.  Repeat until we find no match for the
1018
 * input immediately after what was consumed by the last EmitCopy call.
1019
 *
1020
 * If we exit this loop normally then we need to call EmitLiteral next,
1021
 * though we don't yet know how big the literal will be.  We handle that
1022
 * by proceeding to the next iteration of the main loop.  We also can exit
1023
 * this loop via goto if we get close to exhausting the input.
1024
 */
1025
0
      eight_bytes_reference input_bytes;
1026
0
      u32 candidate_bytes = 0;
1027
1028
0
      do {
1029
/*
1030
 * We have a 4-byte match at ip, and no need to emit any
1031
 *  "literal bytes" prior to ip.
1032
 */
1033
0
        const char *base = ip;
1034
0
        int matched = 4 +
1035
0
            find_match_length(candidate + 4, ip + 4,
1036
0
                  ip_end);
1037
0
        ip += matched;
1038
0
        int offset = base - candidate;
1039
0
        DCHECK_EQ(0, memcmp(base, candidate, matched));
1040
0
        op = emit_copy(op, offset, matched);
1041
/*
1042
 * We could immediately start working at ip now, but to improve
1043
 * compression we first update table[Hash(ip - 1, ...)].
1044
 */
1045
0
        const char *insert_tail = ip - 1;
1046
0
        next_emit = ip;
1047
0
        if (unlikely(ip >= ip_limit)) {
1048
0
          goto emit_remainder;
1049
0
        }
1050
0
        input_bytes = get_eight_bytes_at(insert_tail);
1051
0
        u32 prev_hash =
1052
0
            hash_bytes(get_u32_at_offset
1053
0
                 (input_bytes, 0), shift);
1054
0
        table[prev_hash] = ip - baseip - 1;
1055
0
        u32 cur_hash =
1056
0
            hash_bytes(get_u32_at_offset
1057
0
                 (input_bytes, 1), shift);
1058
0
        candidate = baseip + table[cur_hash];
1059
0
        candidate_bytes = UNALIGNED_LOAD32(candidate);
1060
0
        table[cur_hash] = ip - baseip;
1061
0
      } while (get_u32_at_offset(input_bytes, 1) ==
1062
0
         candidate_bytes);
1063
1064
0
      next_hash =
1065
0
          hash_bytes(get_u32_at_offset(input_bytes, 2),
1066
0
               shift);
1067
0
      ++ip;
1068
0
    }
1069
0
  }
1070
1071
0
emit_remainder:
1072
  /* Emit the remaining bytes as a literal */
1073
0
  if (next_emit < ip_end)
1074
0
    op = emit_literal(op, next_emit, ip_end - next_emit, false);
1075
1076
0
  return op;
1077
0
}
1078
1079
/*
1080
 * -----------------------------------------------------------------------
1081
 *  Lookup table for decompression code.  Generated by ComputeTable() below.
1082
 * -----------------------------------------------------------------------
1083
 */
1084
1085
/* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */
1086
static const u32 wordmask[] = {
1087
  0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
1088
};
1089
1090
/*
1091
 * Data stored per entry in lookup table:
1092
 *       Range   Bits-used       Description
1093
 *      ------------------------------------
1094
 *      1..64   0..7            Literal/copy length encoded in opcode byte
1095
 *      0..7    8..10           Copy offset encoded in opcode byte / 256
1096
 *      0..4    11..13          Extra bytes after opcode
1097
 *
1098
 * We use eight bits for the length even though 7 would have sufficed
1099
 * because of efficiency reasons:
1100
 *      (1) Extracting a byte is faster than a bit-field
1101
 *      (2) It properly aligns copy offset so we do not need a <<8
1102
 */
1103
static const u16 char_table[256] = {
1104
  0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
1105
  0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
1106
  0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
1107
  0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
1108
  0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
1109
  0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
1110
  0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
1111
  0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
1112
  0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
1113
  0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
1114
  0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
1115
  0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
1116
  0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
1117
  0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
1118
  0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
1119
  0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
1120
  0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
1121
  0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
1122
  0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
1123
  0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
1124
  0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
1125
  0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
1126
  0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
1127
  0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
1128
  0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
1129
  0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
1130
  0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
1131
  0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
1132
  0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
1133
  0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
1134
  0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
1135
  0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
1136
};
1137
1138
struct snappy_decompressor {
1139
  struct source *reader;  /* Underlying source of bytes to decompress */
1140
  const char *ip;   /* Points to next buffered byte */
1141
  const char *ip_limit; /* Points just past buffered bytes */
1142
  u32 peeked;   /* Bytes peeked from reader (need to skip) */
1143
  bool eof;   /* Hit end of input without an error? */
1144
  char scratch[5];  /* Temporary buffer for peekfast boundaries */
1145
};
1146
1147
static void
1148
init_snappy_decompressor(struct snappy_decompressor *d, struct source *reader)
1149
0
{
1150
0
  d->reader = reader;
1151
0
  d->ip = NULL;
1152
0
  d->ip_limit = NULL;
1153
0
  d->peeked = 0;
1154
0
  d->eof = false;
1155
0
}
1156
1157
static void exit_snappy_decompressor(struct snappy_decompressor *d)
1158
0
{
1159
0
  skip(d->reader, d->peeked);
1160
0
}
1161
1162
/*
1163
 * Read the uncompressed length stored at the start of the compressed data.
1164
 * On succcess, stores the length in *result and returns true.
1165
 * On failure, returns false.
1166
 */
1167
static bool read_uncompressed_length(struct snappy_decompressor *d,
1168
             u32 * result)
1169
0
{
1170
0
  DCHECK(d->ip == NULL);  /*
1171
         * Must not have read anything yet
1172
         * Length is encoded in 1..5 bytes
1173
         */
1174
0
  *result = 0;
1175
0
  u32 shift = 0;
1176
0
  while (true) {
1177
0
    if (shift >= 32)
1178
0
      return false;
1179
0
    size_t n;
1180
0
    const char *ip = peek(d->reader, &n);
1181
0
    if (n == 0)
1182
0
      return false;
1183
0
    const unsigned char c = *(const unsigned char *)(ip);
1184
0
    skip(d->reader, 1);
1185
0
    *result |= (u32) (c & 0x7f) << shift;
1186
0
    if (c < 128) {
1187
0
      break;
1188
0
    }
1189
0
    shift += 7;
1190
0
  }
1191
0
  return true;
1192
0
}
1193
1194
static bool refill_tag(struct snappy_decompressor *d);
1195
1196
/*
1197
 * Process the next item found in the input.
1198
 * Returns true if successful, false on error or end of input.
1199
 */
1200
static void decompress_all_tags(struct snappy_decompressor *d,
1201
        struct writer *writer)
1202
0
{
1203
0
  const char *ip = d->ip;
1204
1205
  /*
1206
   * We could have put this refill fragment only at the beginning of the loop.
1207
   * However, duplicating it at the end of each branch gives the compiler more
1208
   * scope to optimize the <ip_limit_ - ip> expression based on the local
1209
   * context, which overall increases speed.
1210
   */
1211
0
#define MAYBE_REFILL() \
1212
0
        if (d->ip_limit - ip < 5) {   \
1213
0
    d->ip = ip;     \
1214
0
    if (!refill_tag(d)) return; \
1215
0
    ip = d->ip;     \
1216
0
        }
1217
1218
1219
0
  MAYBE_REFILL();
1220
0
  for (;;) {
1221
0
    if (d->ip_limit - ip < 5) {
1222
0
      d->ip = ip;
1223
0
      if (!refill_tag(d))
1224
0
        return;
1225
0
      ip = d->ip;
1226
0
    }
1227
1228
0
    const unsigned char c = *(const unsigned char *)(ip++);
1229
1230
0
    if ((c & 0x3) == LITERAL) {
1231
0
      u32 literal_length = (c >> 2) + 1;
1232
0
      if (writer_try_fast_append(writer, ip, d->ip_limit - ip, 
1233
0
               literal_length)) {
1234
0
        DCHECK_LT(literal_length, 61);
1235
0
        ip += literal_length;
1236
0
        MAYBE_REFILL();
1237
0
        continue;
1238
0
      }
1239
0
      if (unlikely(literal_length >= 61)) {
1240
        /* Long literal */
1241
0
        const u32 literal_ll = literal_length - 60;
1242
0
        literal_length = (get_unaligned_le32(ip) &
1243
0
              wordmask[literal_ll]) + 1;
1244
0
        ip += literal_ll;
1245
0
      }
1246
1247
0
      u32 avail = d->ip_limit - ip;
1248
0
      while (avail < literal_length) {
1249
0
        if (!writer_append(writer, ip, avail))
1250
0
          return;
1251
0
        literal_length -= avail;
1252
0
        skip(d->reader, d->peeked);
1253
0
        size_t n;
1254
0
        ip = peek(d->reader, &n);
1255
0
        avail = n;
1256
0
        d->peeked = avail;
1257
0
        if (avail == 0)
1258
0
          return; /* Premature end of input */
1259
0
        d->ip_limit = ip + avail;
1260
0
      }
1261
0
      if (!writer_append(writer, ip, literal_length))
1262
0
        return;
1263
0
      ip += literal_length;
1264
0
      MAYBE_REFILL();
1265
0
    } else {
1266
0
      const u32 entry = char_table[c];
1267
0
      const u32 trailer = get_unaligned_le32(ip) &
1268
0
        wordmask[entry >> 11];
1269
0
      const u32 length = entry & 0xff;
1270
0
      ip += entry >> 11;
1271
1272
      /*
1273
       * copy_offset/256 is encoded in bits 8..10.
1274
       * By just fetching those bits, we get
1275
       * copy_offset (since the bit-field starts at
1276
       * bit 8).
1277
       */
1278
0
      const u32 copy_offset = entry & 0x700;
1279
0
      if (!writer_append_from_self(writer,
1280
0
                 copy_offset + trailer,
1281
0
                 length))
1282
0
        return;
1283
0
      MAYBE_REFILL();
1284
0
    }
1285
0
  }
1286
0
}
1287
1288
#undef MAYBE_REFILL
1289
1290
static bool refill_tag(struct snappy_decompressor *d)
1291
0
{
1292
0
  const char *ip = d->ip;
1293
1294
0
  if (ip == d->ip_limit) {
1295
0
    size_t n;
1296
    /* Fetch a new fragment from the reader */
1297
0
    skip(d->reader, d->peeked); /* All peeked bytes are used up */
1298
0
    ip = peek(d->reader, &n);
1299
0
    d->peeked = n;
1300
0
    if (n == 0) {
1301
0
      d->eof = true;
1302
0
      return false;
1303
0
    }
1304
0
    d->ip_limit = ip + n;
1305
0
  }
1306
1307
  /* Read the tag character */
1308
0
  DCHECK_LT(ip, d->ip_limit);
1309
0
  const unsigned char c = *(const unsigned char *)(ip);
1310
0
  const u32 entry = char_table[c];
1311
0
  const u32 needed = (entry >> 11) + 1; /* +1 byte for 'c' */
1312
0
  DCHECK_LE(needed, sizeof(d->scratch));
1313
1314
  /* Read more bytes from reader if needed */
1315
0
  u32 nbuf = d->ip_limit - ip;
1316
1317
0
  if (nbuf < needed) {
1318
    /*
1319
     * Stitch together bytes from ip and reader to form the word
1320
     * contents.  We store the needed bytes in "scratch".  They
1321
     * will be consumed immediately by the caller since we do not
1322
     * read more than we need.
1323
     */
1324
0
    memmove(d->scratch, ip, nbuf);
1325
0
    skip(d->reader, d->peeked); /* All peeked bytes are used up */
1326
0
    d->peeked = 0;
1327
0
    while (nbuf < needed) {
1328
0
      size_t length;
1329
0
      const char *src = peek(d->reader, &length);
1330
0
      if (length == 0)
1331
0
        return false;
1332
0
      u32 to_add = min_t(u32, needed - nbuf, length);
1333
0
      memcpy(d->scratch + nbuf, src, to_add);
1334
0
      nbuf += to_add;
1335
0
      skip(d->reader, to_add);
1336
0
    }
1337
0
    DCHECK_EQ(nbuf, needed);
1338
0
    d->ip = d->scratch;
1339
0
    d->ip_limit = d->scratch + needed;
1340
0
  } else if (nbuf < 5) {
1341
    /*
1342
     * Have enough bytes, but move into scratch so that we do not
1343
     * read past end of input
1344
     */
1345
0
    memmove(d->scratch, ip, nbuf);
1346
0
    skip(d->reader, d->peeked); /* All peeked bytes are used up */
1347
0
    d->peeked = 0;
1348
0
    d->ip = d->scratch;
1349
0
    d->ip_limit = d->scratch + nbuf;
1350
0
  } else {
1351
    /* Pass pointer to buffer returned by reader. */
1352
0
    d->ip = ip;
1353
0
  }
1354
0
  return true;
1355
0
}
1356
1357
static int internal_uncompress(struct source *r,
1358
             struct writer *writer, u32 max_len)
1359
0
{
1360
0
  struct snappy_decompressor decompressor;
1361
0
  u32 uncompressed_len = 0;
1362
1363
0
  init_snappy_decompressor(&decompressor, r);
1364
1365
0
  if (!read_uncompressed_length(&decompressor, &uncompressed_len))
1366
0
    return -EIO;
1367
  /* Protect against possible DoS attack */
1368
0
  if ((u64) (uncompressed_len) > max_len)
1369
0
    return -EIO;
1370
1371
0
  writer_set_expected_length(writer, uncompressed_len);
1372
1373
  /* Process the entire input */
1374
0
  decompress_all_tags(&decompressor, writer);
1375
1376
0
  exit_snappy_decompressor(&decompressor);
1377
0
  if (decompressor.eof && writer_check_length(writer))
1378
0
    return 0;
1379
0
  return -EIO;
1380
0
}
1381
1382
static inline int compress(struct snappy_env *env, struct source *reader,
1383
         struct sink *writer)
1384
0
{
1385
0
  int err;
1386
0
  size_t written = 0;
1387
0
  int N = available(reader);
1388
0
  char ulength[kmax32];
1389
0
  char *p = varint_encode32(ulength, N);
1390
1391
0
  append(writer, ulength, p - ulength);
1392
0
  written += (p - ulength);
1393
1394
0
  while (N > 0) {
1395
    /* Get next block to compress (without copying if possible) */
1396
0
    size_t fragment_size;
1397
0
    const char *fragment = peek(reader, &fragment_size);
1398
0
    if (fragment_size == 0) {
1399
0
      err = -EIO;
1400
0
      goto out;
1401
0
    }
1402
0
    const unsigned num_to_read = min_t(int, N, kblock_size);
1403
0
    size_t bytes_read = fragment_size;
1404
1405
0
    int pending_advance = 0;
1406
0
    if (bytes_read >= num_to_read) {
1407
      /* Buffer returned by reader is large enough */
1408
0
      pending_advance = num_to_read;
1409
0
      fragment_size = num_to_read;
1410
0
    }
1411
0
    else {
1412
0
      memcpy(env->scratch, fragment, bytes_read);
1413
0
      skip(reader, bytes_read);
1414
1415
0
      while (bytes_read < num_to_read) {
1416
0
        fragment = peek(reader, &fragment_size);
1417
0
        size_t n =
1418
0
            min_t(size_t, fragment_size,
1419
0
            num_to_read - bytes_read);
1420
0
        memcpy((char *)(env->scratch) + bytes_read, fragment, n);
1421
0
        bytes_read += n;
1422
0
        skip(reader, n);
1423
0
      }
1424
0
      DCHECK_EQ(bytes_read, num_to_read);
1425
0
      fragment = env->scratch;
1426
0
      fragment_size = num_to_read;
1427
0
    }
1428
0
    if (fragment_size < num_to_read)
1429
0
      return -EIO;
1430
1431
    /* Get encoding table for compression */
1432
0
    int table_size;
1433
0
    u16 *table = get_hash_table(env, num_to_read, &table_size);
1434
1435
    /* Compress input_fragment and append to dest */
1436
0
    char *dest;
1437
0
    dest = sink_peek(writer, snappy_max_compressed_length(num_to_read));
1438
0
    if (!dest) {
1439
      /*
1440
       * Need a scratch buffer for the output,
1441
       * because the byte sink doesn't have enough
1442
       * in one piece.
1443
       */
1444
0
      dest = env->scratch_output;
1445
0
    }
1446
0
    char *end = compress_fragment(fragment, fragment_size,
1447
0
                dest, table, table_size);
1448
0
    append(writer, dest, end - dest);
1449
0
    written += (end - dest);
1450
1451
0
    N -= num_to_read;
1452
0
    skip(reader, pending_advance);
1453
0
  }
1454
1455
0
  err = 0;
1456
0
out:
1457
0
  return err;
1458
0
}
1459
1460
#ifdef SG
1461
1462
int snappy_compress_iov(struct snappy_env *env,
1463
      struct iovec *iov_in,
1464
      int iov_in_len,
1465
      size_t input_length,
1466
      struct iovec *iov_out,
1467
      int *iov_out_len,
1468
      size_t *compressed_length)
1469
{
1470
  struct source reader = {
1471
    .iov = iov_in,
1472
    .iovlen = iov_in_len,
1473
    .total = input_length
1474
  };
1475
  struct sink writer = {
1476
    .iov = iov_out,
1477
    .iovlen = *iov_out_len,
1478
  };
1479
  int err = compress(env, &reader, &writer);
1480
1481
  *iov_out_len = writer.curvec + 1;
1482
1483
  /* Compute how many bytes were added */
1484
  *compressed_length = writer.written;
1485
  return err;
1486
}
1487
EXPORT_SYMBOL(snappy_compress_iov);
1488
1489
/**
1490
 * snappy_compress - Compress a buffer using the snappy compressor.
1491
 * @env: Preallocated environment
1492
 * @input: Input buffer
1493
 * @input_length: Length of input_buffer
1494
 * @compressed: Output buffer for compressed data
1495
 * @compressed_length: The real length of the output written here.
1496
 *
1497
 * Return 0 on success, otherwise an negative error code.
1498
 *
1499
 * The output buffer must be at least
1500
 * snappy_max_compressed_length(input_length) bytes long.
1501
 *
1502
 * Requires a preallocated environment from snappy_init_env.
1503
 * The environment does not keep state over individual calls
1504
 * of this function, just preallocates the memory.
1505
 */
1506
int snappy_compress(struct snappy_env *env,
1507
        const char *input,
1508
        size_t input_length,
1509
        char *compressed, size_t *compressed_length)
1510
{
1511
  struct iovec iov_in = {
1512
    .iov_base = (char *)input,
1513
    .iov_len = input_length,
1514
  };
1515
  struct iovec iov_out = {
1516
    .iov_base = compressed,
1517
    .iov_len = 0xffffffff,
1518
  };
1519
  int out = 1;
1520
  return snappy_compress_iov(env, 
1521
           &iov_in, 1, input_length, 
1522
           &iov_out, &out, compressed_length);
1523
}
1524
EXPORT_SYMBOL(snappy_compress);
1525
1526
int snappy_uncompress_iov(struct iovec *iov_in, int iov_in_len,
1527
         size_t input_len, char *uncompressed)
1528
{
1529
  struct source reader = {
1530
    .iov = iov_in,
1531
    .iovlen = iov_in_len,
1532
    .total = input_len
1533
  };
1534
  struct writer output = {
1535
    .base = uncompressed,
1536
    .op = uncompressed
1537
  };
1538
  return internal_uncompress(&reader, &output, 0xffffffff);
1539
}
1540
EXPORT_SYMBOL(snappy_uncompress_iov);
1541
1542
/**
1543
 * snappy_uncompress - Uncompress a snappy compressed buffer
1544
 * @compressed: Input buffer with compressed data
1545
 * @n: length of compressed buffer
1546
 * @uncompressed: buffer for uncompressed data
1547
 *
1548
 * The uncompressed data buffer must be at least
1549
 * snappy_uncompressed_length(compressed) bytes long.
1550
 *
1551
 * Return 0 on success, otherwise an negative error code.
1552
 */
1553
int snappy_uncompress(const char *compressed, size_t n, char *uncompressed)
1554
{
1555
  struct iovec iov = {
1556
    .iov_base = (char *)compressed,
1557
    .iov_len = n
1558
  };
1559
  return snappy_uncompress_iov(&iov, 1, n, uncompressed);
1560
}
1561
EXPORT_SYMBOL(snappy_uncompress);
1562
1563
#else
1564
/**
1565
 * snappy_compress - Compress a buffer using the snappy compressor.
1566
 * @env: Preallocated environment
1567
 * @input: Input buffer
1568
 * @input_length: Length of input_buffer
1569
 * @compressed: Output buffer for compressed data
1570
 * @compressed_length: The real length of the output written here.
1571
 *
1572
 * Return 0 on success, otherwise an negative error code.
1573
 *
1574
 * The output buffer must be at least
1575
 * snappy_max_compressed_length(input_length) bytes long.
1576
 *
1577
 * Requires a preallocated environment from snappy_init_env.
1578
 * The environment does not keep state over individual calls
1579
 * of this function, just preallocates the memory.
1580
 */
1581
int snappy_compress(struct snappy_env *env,
1582
        const char *input,
1583
        size_t input_length,
1584
        char *compressed, size_t *compressed_length)
1585
0
{
1586
0
  struct source reader = {
1587
0
    .ptr = input,
1588
0
    .left = input_length
1589
0
  };
1590
0
  struct sink writer = {
1591
0
    .dest = compressed,
1592
0
  };
1593
0
  int err = compress(env, &reader, &writer);
1594
1595
  /* Compute how many bytes were added */
1596
0
  *compressed_length = (writer.dest - compressed);
1597
0
  return err;
1598
0
}
1599
EXPORT_SYMBOL(snappy_compress);
1600
1601
/**
1602
 * snappy_uncompress - Uncompress a snappy compressed buffer
1603
 * @compressed: Input buffer with compressed data
1604
 * @n: length of compressed buffer
1605
 * @uncompressed: buffer for uncompressed data
1606
 *
1607
 * The uncompressed data buffer must be at least
1608
 * snappy_uncompressed_length(compressed) bytes long.
1609
 *
1610
 * Return 0 on success, otherwise an negative error code.
1611
 */
1612
int snappy_uncompress(const char *compressed, size_t n, char *uncompressed)
1613
0
{
1614
0
  struct source reader = {
1615
0
    .ptr = compressed,
1616
0
    .left = n
1617
0
  };
1618
0
  struct writer output = {
1619
0
    .base = uncompressed,
1620
0
    .op = uncompressed
1621
0
  };
1622
0
  return internal_uncompress(&reader, &output, 0xffffffff);
1623
0
}
1624
EXPORT_SYMBOL(snappy_uncompress);
1625
#endif
1626
1627
static inline void clear_env(struct snappy_env *env)
1628
0
{
1629
0
    memset(env, 0, sizeof(*env));
1630
0
}
1631
1632
#ifdef SG
1633
/**
1634
 * snappy_init_env_sg - Allocate snappy compression environment
1635
 * @env: Environment to preallocate
1636
 * @sg: Input environment ever does scather gather
1637
 *
1638
 * If false is passed to sg then multiple entries in an iovec
1639
 * are not legal.
1640
 * Returns 0 on success, otherwise negative errno.
1641
 * Must run in process context.
1642
 */
1643
int snappy_init_env_sg(struct snappy_env *env, bool sg)
1644
{
1645
  if (snappy_init_env(env) < 0)
1646
    goto error;
1647
1648
  if (sg) {
1649
    env->scratch = vmalloc(kblock_size);
1650
    if (!env->scratch)
1651
      goto error;
1652
    env->scratch_output =
1653
      vmalloc(snappy_max_compressed_length(kblock_size));
1654
    if (!env->scratch_output)
1655
      goto error;
1656
  }
1657
  return 0;
1658
error:
1659
  snappy_free_env(env);
1660
  return -ENOMEM;
1661
}
1662
EXPORT_SYMBOL(snappy_init_env_sg);
1663
#endif
1664
1665
/**
1666
 * snappy_init_env - Allocate snappy compression environment
1667
 * @env: Environment to preallocate
1668
 *
1669
 * Passing multiple entries in an iovec is not allowed
1670
 * on the environment allocated here.
1671
 * Returns 0 on success, otherwise negative errno.
1672
 * Must run in process context.
1673
 */
1674
int snappy_init_env(struct snappy_env *env)
1675
0
{
1676
0
    clear_env(env);
1677
0
  env->hash_table = vmalloc(sizeof(u16) * kmax_hash_table_size);
1678
0
  if (!env->hash_table)
1679
0
    return -ENOMEM;
1680
0
  return 0;
1681
0
}
1682
EXPORT_SYMBOL(snappy_init_env);
1683
1684
/**
1685
 * snappy_free_env - Free an snappy compression environment
1686
 * @env: Environment to free.
1687
 *
1688
 * Must run in process context.
1689
 */
1690
void snappy_free_env(struct snappy_env *env)
1691
0
{
1692
0
  vfree(env->hash_table);
1693
#ifdef SG
1694
  vfree(env->scratch);
1695
  vfree(env->scratch_output);
1696
#endif
1697
0
  clear_env(env);
1698
0
}
1699
EXPORT_SYMBOL(snappy_free_env);