Coverage Report

Created: 2025-11-15 07:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/duckdb/third_party/snappy/snappy-stubs-internal.h
Line
Count
Source
1
// Copyright 2011 Google Inc. All Rights Reserved.
2
//
3
// Redistribution and use in source and binary forms, with or without
4
// modification, are permitted provided that the following conditions are
5
// met:
6
//
7
//     * Redistributions of source code must retain the above copyright
8
// notice, this list of conditions and the following disclaimer.
9
//     * Redistributions in binary form must reproduce the above
10
// copyright notice, this list of conditions and the following disclaimer
11
// in the documentation and/or other materials provided with the
12
// distribution.
13
//     * Neither the name of Google Inc. nor the names of its
14
// contributors may be used to endorse or promote products derived from
15
// this software without specific prior written permission.
16
//
17
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
//
29
// Various stubs for the open-source version of Snappy.
30
31
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
32
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
33
34
// DuckDB - LNK: define here instead of in CMake
35
#ifdef __GNUC__
36
#define HAVE_BUILTIN_EXPECT 1
37
#define HAVE_BUILTIN_CTZ 1
38
#define HAVE_BUILTIN_PREFETCH 1
39
#endif
40
41
// These should always be available on aarch64, but sadly not on iOS/Android
42
// #if defined(__aarch64__)
43
// #define SNAPPY_HAVE_NEON 1
44
// #define SNAPPY_HAVE_NEON_CRC32 1
45
// #endif
46
47
#include "snappy_version.hpp"
48
49
#if SNAPPY_NEW_VERSION
50
51
#if HAVE_CONFIG_H
52
#include "config.h"
53
#endif
54
55
#include <stdint.h>
56
57
#include <cassert>
58
#include <cstdlib>
59
#include <cstring>
60
#include <limits>
61
#include <string>
62
63
#if HAVE_SYS_MMAN_H
64
#include <sys/mman.h>
65
#endif
66
67
#if HAVE_UNISTD_H
68
#include <unistd.h>
69
#endif
70
71
#if defined(_MSC_VER)
72
#include <intrin.h>
73
#endif  // defined(_MSC_VER)
74
75
#ifndef __has_feature
76
#define __has_feature(x) 0
77
#endif
78
79
#if __has_feature(memory_sanitizer)
80
#include <sanitizer/msan_interface.h>
81
#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
82
    __msan_unpoison((address), (size))
83
#else
84
#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */
85
#endif  // __has_feature(memory_sanitizer)
86
87
#include "snappy-stubs-public.h"
88
89
// Used to enable 64-bit optimized versions of some routines.
90
#if defined(__PPC64__) || defined(__powerpc64__)
91
#define ARCH_PPC 1
92
#elif defined(__aarch64__) || defined(_M_ARM64)
93
#define ARCH_ARM 1
94
#endif
95
96
// Needed by OS X, among others.
97
#ifndef MAP_ANONYMOUS
98
#define MAP_ANONYMOUS MAP_ANON
99
#endif
100
101
// The size of an array, if known at compile-time.
102
// Will give unexpected results if used on a pointer.
103
// We undefine it first, since some compilers already have a definition.
104
#ifdef ARRAYSIZE
105
#undef ARRAYSIZE
106
#endif
107
#define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))}
108
109
// Static prediction hints.
110
#if HAVE_BUILTIN_EXPECT
111
0
#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
112
0
#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
113
#else
114
#define SNAPPY_PREDICT_FALSE(x) x
115
#define SNAPPY_PREDICT_TRUE(x) x
116
#endif  // HAVE_BUILTIN_EXPECT
117
118
// Inlining hints.
119
#if HAVE_ATTRIBUTE_ALWAYS_INLINE
120
#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
121
#else
122
#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
123
#endif  // HAVE_ATTRIBUTE_ALWAYS_INLINE
124
125
#if HAVE_BUILTIN_PREFETCH
126
0
#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
127
#else
128
#define SNAPPY_PREFETCH(ptr) (void)(ptr)
129
#endif
130
131
// Stubbed version of ABSL_FLAG.
132
//
133
// In the open source version, flags can only be changed at compile time.
134
#define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \
135
  flag_type FLAGS_ ## flag_name = default_value
136
137
namespace duckdb_snappy {
138
139
// Stubbed version of absl::GetFlag().
140
template <typename T>
141
inline T GetFlag(T flag) { return flag; }
142
143
static const uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
144
static const int64_t kint64max = std::numeric_limits<int64_t>::max();
145
146
// Potentially unaligned loads and stores.
147
148
0
inline uint16_t UNALIGNED_LOAD16(const void *p) {
149
0
  // Compiles to a single movzx/ldrh on clang/gcc/msvc.
150
0
  uint16_t v;
151
0
  std::memcpy(&v, p, sizeof(v));
152
0
  return v;
153
0
}
154
155
0
inline uint32_t UNALIGNED_LOAD32(const void *p) {
156
0
  // Compiles to a single mov/ldr on clang/gcc/msvc.
157
0
  uint32_t v;
158
0
  std::memcpy(&v, p, sizeof(v));
159
0
  return v;
160
0
}
161
162
0
inline uint64_t UNALIGNED_LOAD64(const void *p) {
163
  // Compiles to a single mov/ldr on clang/gcc/msvc.
164
0
  uint64_t v;
165
0
  std::memcpy(&v, p, sizeof(v));
166
0
  return v;
167
0
}
168
169
0
inline void UNALIGNED_STORE16(void *p, uint16_t v) {
170
0
  // Compiles to a single mov/strh on clang/gcc/msvc.
171
0
  std::memcpy(p, &v, sizeof(v));
172
0
}
173
174
0
inline void UNALIGNED_STORE32(void *p, uint32_t v) {
175
0
  // Compiles to a single mov/str on clang/gcc/msvc.
176
0
  std::memcpy(p, &v, sizeof(v));
177
0
}
178
179
0
inline void UNALIGNED_STORE64(void *p, uint64_t v) {
180
0
  // Compiles to a single mov/str on clang/gcc/msvc.
181
0
  std::memcpy(p, &v, sizeof(v));
182
0
}
183
184
// Convert to little-endian storage, opposite of network format.
185
// Convert x from host to little endian: x = LittleEndian.FromHost(x);
186
// convert x from little endian to host: x = LittleEndian.ToHost(x);
187
//
188
//  Store values into unaligned memory converting to little endian order:
189
//    LittleEndian.Store16(p, x);
190
//
191
//  Load unaligned values stored in little endian converting to host order:
192
//    x = LittleEndian.Load16(p);
193
class LittleEndian {
194
 public:
195
  // Functions to do unaligned loads and stores in little-endian order.
196
0
  static inline uint16_t Load16(const void *ptr) {
197
0
    // Compiles to a single mov/str on recent clang and gcc.
198
0
#if SNAPPY_IS_BIG_ENDIAN
199
0
    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
200
0
    return (static_cast<uint16_t>(buffer[0])) |
201
0
            (static_cast<uint16_t>(buffer[1]) << 8);
202
0
#else
203
0
    // memcpy() turns into a single instruction early in the optimization
204
0
    // pipeline (relatively to a series of byte accesses). So, using memcpy
205
0
    // instead of byte accesses may lead to better decisions in more stages of
206
0
    // the optimization pipeline.
207
0
    uint16_t value;
208
0
    std::memcpy(&value, ptr, 2);
209
0
    return value;
210
0
#endif
211
0
  }
212
213
0
  static inline uint32_t Load32(const void *ptr) {
214
    // Compiles to a single mov/str on recent clang and gcc.
215
#if SNAPPY_IS_BIG_ENDIAN
216
    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
217
    return (static_cast<uint32_t>(buffer[0])) |
218
            (static_cast<uint32_t>(buffer[1]) << 8) |
219
            (static_cast<uint32_t>(buffer[2]) << 16) |
220
            (static_cast<uint32_t>(buffer[3]) << 24);
221
#else
222
    // See Load16() for the rationale of using memcpy().
223
0
    uint32_t value;
224
0
    std::memcpy(&value, ptr, 4);
225
0
    return value;
226
0
#endif
227
0
  }
228
229
0
  static inline uint64_t Load64(const void *ptr) {
230
    // Compiles to a single mov/str on recent clang and gcc.
231
#if SNAPPY_IS_BIG_ENDIAN
232
    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
233
    return (static_cast<uint64_t>(buffer[0])) |
234
            (static_cast<uint64_t>(buffer[1]) << 8) |
235
            (static_cast<uint64_t>(buffer[2]) << 16) |
236
            (static_cast<uint64_t>(buffer[3]) << 24) |
237
            (static_cast<uint64_t>(buffer[4]) << 32) |
238
            (static_cast<uint64_t>(buffer[5]) << 40) |
239
            (static_cast<uint64_t>(buffer[6]) << 48) |
240
            (static_cast<uint64_t>(buffer[7]) << 56);
241
#else
242
    // See Load16() for the rationale of using memcpy().
243
0
    uint64_t value;
244
0
    std::memcpy(&value, ptr, 8);
245
0
    return value;
246
0
#endif
247
0
  }
248
249
0
  static inline void Store16(void *dst, uint16_t value) {
250
0
    // Compiles to a single mov/str on recent clang and gcc.
251
0
#if SNAPPY_IS_BIG_ENDIAN
252
0
    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
253
0
    buffer[0] = static_cast<uint8_t>(value);
254
0
    buffer[1] = static_cast<uint8_t>(value >> 8);
255
0
#else
256
0
    // See Load16() for the rationale of using memcpy().
257
0
    std::memcpy(dst, &value, 2);
258
0
#endif
259
0
  }
260
261
0
  static void Store32(void *dst, uint32_t value) {
262
    // Compiles to a single mov/str on recent clang and gcc.
263
#if SNAPPY_IS_BIG_ENDIAN
264
    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
265
    buffer[0] = static_cast<uint8_t>(value);
266
    buffer[1] = static_cast<uint8_t>(value >> 8);
267
    buffer[2] = static_cast<uint8_t>(value >> 16);
268
    buffer[3] = static_cast<uint8_t>(value >> 24);
269
#else
270
    // See Load16() for the rationale of using memcpy().
271
0
    std::memcpy(dst, &value, 4);
272
0
#endif
273
0
  }
274
275
0
  static void Store64(void* dst, uint64_t value) {
276
0
    // Compiles to a single mov/str on recent clang and gcc.
277
0
#if SNAPPY_IS_BIG_ENDIAN
278
0
    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
279
0
    buffer[0] = static_cast<uint8_t>(value);
280
0
    buffer[1] = static_cast<uint8_t>(value >> 8);
281
0
    buffer[2] = static_cast<uint8_t>(value >> 16);
282
0
    buffer[3] = static_cast<uint8_t>(value >> 24);
283
0
    buffer[4] = static_cast<uint8_t>(value >> 32);
284
0
    buffer[5] = static_cast<uint8_t>(value >> 40);
285
0
    buffer[6] = static_cast<uint8_t>(value >> 48);
286
0
    buffer[7] = static_cast<uint8_t>(value >> 56);
287
0
#else
288
0
    // See Load16() for the rationale of using memcpy().
289
0
    std::memcpy(dst, &value, 8);
290
0
#endif
291
0
  }
292
293
0
  static inline constexpr bool IsLittleEndian() {
294
0
#if SNAPPY_IS_BIG_ENDIAN
295
0
    return false;
296
0
#else
297
0
    return true;
298
0
#endif  // SNAPPY_IS_BIG_ENDIAN
299
0
  }
300
};
301
302
// Some bit-manipulation functions.
303
class Bits {
304
 public:
305
  // Return floor(log2(n)) for positive integer n.
306
  static int Log2FloorNonZero(uint32_t n);
307
308
  // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
309
  static int Log2Floor(uint32_t n);
310
311
  // Return the first set least / most significant bit, 0-indexed.  Returns an
312
  // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
313
  // that it's 0-indexed.
314
  static int FindLSBSetNonZero(uint32_t n);
315
316
  static int FindLSBSetNonZero64(uint64_t n);
317
318
 private:
319
  // No copying
320
  Bits(const Bits&);
321
  void operator=(const Bits&);
322
};
323
324
#if HAVE_BUILTIN_CTZ
325
326
0
inline int Bits::Log2FloorNonZero(uint32_t n) {
327
0
  assert(n != 0);
328
  // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
329
  // represents subtraction in base 2 and observes that there's no carry.
330
  //
331
  // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x).
332
  // Using "31 ^" here instead of "31 -" allows the optimizer to strip the
333
  // function body down to _bit_scan_reverse(x).
334
0
  return 31 ^ __builtin_clz(n);
335
0
}
336
337
0
inline int Bits::Log2Floor(uint32_t n) {
338
0
  return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
339
0
}
340
341
0
inline int Bits::FindLSBSetNonZero(uint32_t n) {
342
0
  assert(n != 0);
343
0
  return __builtin_ctz(n);
344
0
}
345
346
#elif defined(_MSC_VER)
347
348
inline int Bits::Log2FloorNonZero(uint32_t n) {
349
  assert(n != 0);
350
  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
351
  unsigned long where;
352
  _BitScanReverse(&where, n);
353
  return static_cast<int>(where);
354
}
355
356
inline int Bits::Log2Floor(uint32_t n) {
357
  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
358
  unsigned long where;
359
  if (_BitScanReverse(&where, n))
360
    return static_cast<int>(where);
361
  return -1;
362
}
363
364
inline int Bits::FindLSBSetNonZero(uint32_t n) {
365
  assert(n != 0);
366
  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
367
  unsigned long where;
368
  if (_BitScanForward(&where, n))
369
    return static_cast<int>(where);
370
  return 32;
371
}
372
373
#else  // Portable versions.
374
375
inline int Bits::Log2FloorNonZero(uint32_t n) {
376
  assert(n != 0);
377
378
  int log = 0;
379
  uint32_t value = n;
380
  for (int i = 4; i >= 0; --i) {
381
    int shift = (1 << i);
382
    uint32_t x = value >> shift;
383
    if (x != 0) {
384
      value = x;
385
      log += shift;
386
    }
387
  }
388
  assert(value == 1);
389
  return log;
390
}
391
392
inline int Bits::Log2Floor(uint32_t n) {
393
  return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
394
}
395
396
inline int Bits::FindLSBSetNonZero(uint32_t n) {
397
  assert(n != 0);
398
399
  int rc = 31;
400
  for (int i = 4, shift = 1 << 4; i >= 0; --i) {
401
    const uint32_t x = n << shift;
402
    if (x != 0) {
403
      n = x;
404
      rc -= shift;
405
    }
406
    shift >>= 1;
407
  }
408
  return rc;
409
}
410
411
#endif  // End portable versions.
412
413
#if HAVE_BUILTIN_CTZ
414
415
0
inline int Bits::FindLSBSetNonZero64(uint64_t n) {
416
0
  assert(n != 0);
417
0
  return __builtin_ctzll(n);
418
0
}
419
420
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
421
// _BitScanForward64() is only available on x64 and ARM64.
422
423
inline int Bits::FindLSBSetNonZero64(uint64_t n) {
424
  assert(n != 0);
425
  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
426
  unsigned long where;
427
  if (_BitScanForward64(&where, n))
428
    return static_cast<int>(where);
429
  return 64;
430
}
431
432
#else  // Portable version.
433
434
// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
435
inline int Bits::FindLSBSetNonZero64(uint64_t n) {
436
  assert(n != 0);
437
438
  const uint32_t bottombits = static_cast<uint32_t>(n);
439
  if (bottombits == 0) {
440
    // Bottom bits are zero, so scan the top bits.
441
    return 32 + FindLSBSetNonZero(static_cast<uint32_t>(n >> 32));
442
  } else {
443
    return FindLSBSetNonZero(bottombits);
444
  }
445
}
446
447
#endif  // HAVE_BUILTIN_CTZ
448
449
// Variable-length integer encoding.
450
class Bignum {
451
 public:
452
  // Maximum lengths of bignum encoding of uint32_t.
453
  static const int kMax32 = 5;
454
455
  // Attempts to parse a bignum32 from a prefix of the bytes in [ptr,limit-1].
456
  // Never reads a character at or beyond limit.  If a valid/terminated bignum32
457
  // was found in the range, stores it in *OUTPUT and returns a pointer just
458
  // past the last byte of the bignum32. Else returns NULL.  On success,
459
  // "result <= limit".
460
  static const char* Parse32WithLimit(const char* ptr, const char* limit,
461
                                      uint32_t* OUTPUT);
462
463
  // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
464
  // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
465
  //            byte just past the last encoded byte.
466
  static char* Encode32(char* ptr, uint32_t v);
467
468
  // EFFECTS    Appends the bignum representation of "value" to "*s".
469
  static void Append32(std::string* s, uint32_t value);
470
};
471
472
inline const char* Bignum::Parse32WithLimit(const char* p,
473
                                            const char* l,
474
0
                                            uint32_t* OUTPUT) {
475
0
  const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
476
0
  const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
477
0
  uint32_t b, result;
478
0
  if (ptr >= limit) return NULL;
479
0
  b = *(ptr++); result = b & 127;          if (b < 128) goto done;
480
0
  if (ptr >= limit) return NULL;
481
0
  b = *(ptr++); result |= (b & 127) <<  7; if (b < 128) goto done;
482
0
  if (ptr >= limit) return NULL;
483
0
  b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
484
0
  if (ptr >= limit) return NULL;
485
0
  b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
486
0
  if (ptr >= limit) return NULL;
487
0
  b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
488
0
  return NULL;       // Value is too long to be a bignum32
489
0
 done:
490
0
  *OUTPUT = result;
491
0
  return reinterpret_cast<const char*>(ptr);
492
0
}
493
494
0
inline char* Bignum::Encode32(char* sptr, uint32_t v) {
495
  // Operate on characters as unsigneds
496
0
  uint8_t* ptr = reinterpret_cast<uint8_t*>(sptr);
497
0
  static const uint8_t B = 128;
498
0
  if (v < (1 << 7)) {
499
0
    *(ptr++) = static_cast<uint8_t>(v);
500
0
  } else if (v < (1 << 14)) {
501
0
    *(ptr++) = static_cast<uint8_t>(v | B);
502
0
    *(ptr++) = static_cast<uint8_t>(v >> 7);
503
0
  } else if (v < (1 << 21)) {
504
0
    *(ptr++) = static_cast<uint8_t>(v | B);
505
0
    *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
506
0
    *(ptr++) = static_cast<uint8_t>(v >> 14);
507
0
  } else if (v < (1 << 28)) {
508
0
    *(ptr++) = static_cast<uint8_t>(v | B);
509
0
    *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
510
0
    *(ptr++) = static_cast<uint8_t>((v >> 14) | B);
511
0
    *(ptr++) = static_cast<uint8_t>(v >> 21);
512
0
  } else {
513
0
    *(ptr++) = static_cast<uint8_t>(v | B);
514
0
    *(ptr++) = static_cast<uint8_t>((v>>7) | B);
515
0
    *(ptr++) = static_cast<uint8_t>((v>>14) | B);
516
0
    *(ptr++) = static_cast<uint8_t>((v>>21) | B);
517
0
    *(ptr++) = static_cast<uint8_t>(v >> 28);
518
0
  }
519
0
  return reinterpret_cast<char*>(ptr);
520
0
}
521
522
// If you know the internal layout of the std::string in use, you can
523
// replace this function with one that resizes the string without
524
// filling the new space with zeros (if applicable) --
525
// it will be non-portable but faster.
526
0
inline void STLStringResizeUninitialized(std::string* s, size_t new_size) {
527
0
  s->resize(new_size);
528
0
}
529
530
// Return a mutable char* pointing to a string's internal buffer,
531
// which may not be null-terminated. Writing through this pointer will
532
// modify the string.
533
//
534
// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
535
// next call to a string method that invalidates iterators.
536
//
537
// As of 2006-04, there is no standard-blessed way of getting a
538
// mutable reference to a string's internal buffer. However, issue 530
539
// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
540
// proposes this as the method. It will officially be part of the standard
541
// for C++0x. This should already work on all current implementations.
542
0
inline char* string_as_array(std::string* str) {
543
0
  return str->empty() ? NULL : &*str->begin();
544
0
}
545
546
}  // namespace duckdb_snappy
547
548
#else // #if SNAPPY_NEW_VERSION
549
550
#ifdef HAVE_CONFIG_H
551
#include "config.h"
552
#endif
553
554
#include <string>
555
556
#include <assert.h>
557
#include <stdlib.h>
558
#include <string.h>
559
560
#ifdef HAVE_SYS_MMAN_H
561
#include <sys/mman.h>
562
#endif
563
564
#ifdef HAVE_UNISTD_H
565
#include <unistd.h>
566
#endif
567
568
#if defined(_MSC_VER)
569
#include <intrin.h>
570
#endif  // defined(_MSC_VER)
571
572
#ifndef __has_feature
573
#define __has_feature(x) 0
574
#endif
575
576
#if __has_feature(memory_sanitizer)
577
#include <sanitizer/msan_interface.h>
578
#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
579
    __msan_unpoison((address), (size))
580
#else
581
#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */
582
#endif  // __has_feature(memory_sanitizer)
583
584
#include "snappy-stubs-public.h"
585
586
#if defined(__x86_64__)
587
588
// Enable 64-bit optimized versions of some routines.
589
#define ARCH_K8 1
590
591
#elif defined(__ppc64__)
592
593
#define ARCH_PPC 1
594
595
#elif defined(__aarch64__)
596
597
#define ARCH_ARM 1
598
599
#endif
600
601
// Needed by OS X, among others.
602
#ifndef MAP_ANONYMOUS
603
#define MAP_ANONYMOUS MAP_ANON
604
#endif
605
606
// The size of an array, if known at compile-time.
607
// Will give unexpected results if used on a pointer.
608
// We undefine it first, since some compilers already have a definition.
609
#ifdef ARRAYSIZE
610
#undef ARRAYSIZE
611
#endif
612
#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
613
614
// Static prediction hints.
615
#ifdef HAVE_BUILTIN_EXPECT
616
#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
617
#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
618
#else
619
#define SNAPPY_PREDICT_FALSE(x) x
620
#define SNAPPY_PREDICT_TRUE(x) x
621
#endif
622
623
// This is only used for recomputing the tag byte table used during
624
// decompression; for simplicity we just remove it from the open-source
625
// version (anyone who wants to regenerate it can just do the call
626
// themselves within main()).
627
#define DEFINE_bool(flag_name, default_value, description) \
628
  bool FLAGS_ ## flag_name = default_value
629
#define DECLARE_bool(flag_name) \
630
  extern bool FLAGS_ ## flag_name
631
632
namespace duckdb_snappy {
633
634
//static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
635
//static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
636
637
638
// HM: Always use aligned load to keep ourselves out of trouble. Sorry.
639
640
inline uint16 UNALIGNED_LOAD16(const void *p) {
641
  uint16 t;
642
  memcpy(&t, p, sizeof t);
643
  return t;
644
}
645
646
inline uint32 UNALIGNED_LOAD32(const void *p) {
647
  uint32 t;
648
  memcpy(&t, p, sizeof t);
649
  return t;
650
}
651
652
inline uint64 UNALIGNED_LOAD64(const void *p) {
653
  uint64 t;
654
  memcpy(&t, p, sizeof t);
655
  return t;
656
}
657
658
inline void UNALIGNED_STORE16(void *p, uint16 v) {
659
  memcpy(p, &v, sizeof v);
660
}
661
662
inline void UNALIGNED_STORE32(void *p, uint32 v) {
663
  memcpy(p, &v, sizeof v);
664
}
665
666
inline void UNALIGNED_STORE64(void *p, uint64 v) {
667
  memcpy(p, &v, sizeof v);
668
}
669
670
671
// The following guarantees declaration of the byte swap functions.
672
#if defined(SNAPPY_IS_BIG_ENDIAN)
673
674
#ifdef HAVE_SYS_BYTEORDER_H
675
#include <sys/byteorder.h>
676
#endif
677
678
#ifdef HAVE_SYS_ENDIAN_H
679
#include <sys/endian.h>
680
#endif
681
682
#ifdef _MSC_VER
683
#include <stdlib.h>
684
#define bswap_16(x) _byteswap_ushort(x)
685
#define bswap_32(x) _byteswap_ulong(x)
686
#define bswap_64(x) _byteswap_uint64(x)
687
688
#elif defined(__APPLE__)
689
// Mac OS X / Darwin features
690
#include <libkern/OSByteOrder.h>
691
#define bswap_16(x) OSSwapInt16(x)
692
#define bswap_32(x) OSSwapInt32(x)
693
#define bswap_64(x) OSSwapInt64(x)
694
695
#elif defined(HAVE_BYTESWAP_H)
696
#include <byteswap.h>
697
698
#elif defined(bswap32)
699
// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
700
#define bswap_16(x) bswap16(x)
701
#define bswap_32(x) bswap32(x)
702
#define bswap_64(x) bswap64(x)
703
704
#elif defined(BSWAP_64)
705
// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
706
#define bswap_16(x) BSWAP_16(x)
707
#define bswap_32(x) BSWAP_32(x)
708
#define bswap_64(x) BSWAP_64(x)
709
710
#else
711
712
inline uint16 bswap_16(uint16 x) {
713
  return (x << 8) | (x >> 8);
714
}
715
716
inline uint32 bswap_32(uint32 x) {
717
  x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
718
  return (x >> 16) | (x << 16);
719
}
720
721
inline uint64 bswap_64(uint64 x) {
722
  x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8);
723
  x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16);
724
  return (x >> 32) | (x << 32);
725
}
726
727
#endif
728
729
#endif  // defined(SNAPPY_IS_BIG_ENDIAN)
730
731
// Convert to little-endian storage, opposite of network format.
732
// Convert x from host to little endian: x = LittleEndian.FromHost(x);
733
// convert x from little endian to host: x = LittleEndian.ToHost(x);
734
//
735
//  Store values into unaligned memory converting to little endian order:
736
//    LittleEndian.Store16(p, x);
737
//
738
//  Load unaligned values stored in little endian converting to host order:
739
//    x = LittleEndian.Load16(p);
740
class LittleEndian {
741
 public:
742
  // Conversion functions.
743
#if defined(SNAPPY_IS_BIG_ENDIAN)
744
745
  static uint16 FromHost16(uint16 x) { return bswap_16(x); }
746
  static uint16 ToHost16(uint16 x) { return bswap_16(x); }
747
748
  static uint32 FromHost32(uint32 x) { return bswap_32(x); }
749
  static uint32 ToHost32(uint32 x) { return bswap_32(x); }
750
751
  static bool IsLittleEndian() { return false; }
752
753
#else  // !defined(SNAPPY_IS_BIG_ENDIAN)
754
755
  static uint16 FromHost16(uint16 x) { return x; }
756
  static uint16 ToHost16(uint16 x) { return x; }
757
758
  static uint32 FromHost32(uint32 x) { return x; }
759
  static uint32 ToHost32(uint32 x) { return x; }
760
761
  static bool IsLittleEndian() { return true; }
762
763
#endif  // !defined(SNAPPY_IS_BIG_ENDIAN)
764
765
  // Functions to do unaligned loads and stores in little-endian order.
766
  static uint16 Load16(const void *p) {
767
    return ToHost16(UNALIGNED_LOAD16(p));
768
  }
769
770
  static void Store16(void *p, uint16 v) {
771
    UNALIGNED_STORE16(p, FromHost16(v));
772
  }
773
774
  static uint32 Load32(const void *p) {
775
    return ToHost32(UNALIGNED_LOAD32(p));
776
  }
777
778
  static void Store32(void *p, uint32 v) {
779
    UNALIGNED_STORE32(p, FromHost32(v));
780
  }
781
};
782
783
// Some bit-manipulation functions.
784
class Bits {
785
 public:
786
  // Return floor(log2(n)) for positive integer n.
787
  static int Log2FloorNonZero(uint32 n);
788
789
  // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
790
  static int Log2Floor(uint32 n);
791
792
  // Return the first set least / most significant bit, 0-indexed.  Returns an
793
  // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
794
  // that it's 0-indexed.
795
  static int FindLSBSetNonZero(uint32 n);
796
797
#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
798
  static int FindLSBSetNonZero64(uint64 n);
799
#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
800
801
 private:
802
  // No copying
803
  Bits(const Bits&);
804
  void operator=(const Bits&);
805
};
806
807
#ifdef HAVE_BUILTIN_CTZ
808
809
inline int Bits::Log2FloorNonZero(uint32 n) {
810
  assert(n != 0);
811
  // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
812
  // represents subtraction in base 2 and observes that there's no carry.
813
  //
814
  // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x).
815
  // Using "31 ^" here instead of "31 -" allows the optimizer to strip the
816
  // function body down to _bit_scan_reverse(x).
817
  return 31 ^ __builtin_clz(n);
818
}
819
820
inline int Bits::Log2Floor(uint32 n) {
821
  return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
822
}
823
824
inline int Bits::FindLSBSetNonZero(uint32 n) {
825
  assert(n != 0);
826
  return __builtin_ctz(n);
827
}
828
829
#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
830
inline int Bits::FindLSBSetNonZero64(uint64 n) {
831
  assert(n != 0);
832
  return __builtin_ctzll(n);
833
}
834
#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
835
836
#elif defined(_MSC_VER)
837
838
inline int Bits::Log2FloorNonZero(uint32 n) {
839
  assert(n != 0);
840
  unsigned long where;
841
  _BitScanReverse(&where, n);
842
  return static_cast<int>(where);
843
}
844
845
inline int Bits::Log2Floor(uint32 n) {
846
  unsigned long where;
847
  if (_BitScanReverse(&where, n))
848
    return static_cast<int>(where);
849
  return -1;
850
}
851
852
inline int Bits::FindLSBSetNonZero(uint32 n) {
853
  assert(n != 0);
854
  unsigned long where;
855
  if (_BitScanForward(&where, n))
856
    return static_cast<int>(where);
857
  return 32;
858
}
859
860
#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
861
inline int Bits::FindLSBSetNonZero64(uint64 n) {
862
  assert(n != 0);
863
  unsigned long where;
864
  if (_BitScanForward64(&where, n))
865
    return static_cast<int>(where);
866
  return 64;
867
}
868
#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
869
870
#else  // Portable versions.
871
872
inline int Bits::Log2FloorNonZero(uint32 n) {
873
  assert(n != 0);
874
875
  int log = 0;
876
  uint32 value = n;
877
  for (int i = 4; i >= 0; --i) {
878
    int shift = (1 << i);
879
    uint32 x = value >> shift;
880
    if (x != 0) {
881
      value = x;
882
      log += shift;
883
    }
884
  }
885
  assert(value == 1);
886
  return log;
887
}
888
889
inline int Bits::Log2Floor(uint32 n) {
890
  return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
891
}
892
893
inline int Bits::FindLSBSetNonZero(uint32 n) {
894
  assert(n != 0);
895
896
  int rc = 31;
897
  for (int i = 4, shift = 1 << 4; i >= 0; --i) {
898
    const uint32 x = n << shift;
899
    if (x != 0) {
900
      n = x;
901
      rc -= shift;
902
    }
903
    shift >>= 1;
904
  }
905
  return rc;
906
}
907
908
#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
909
// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
910
inline int Bits::FindLSBSetNonZero64(uint64 n) {
911
  assert(n != 0);
912
913
  const uint32 bottombits = static_cast<uint32>(n);
914
  if (bottombits == 0) {
915
    // Bottom bits are zero, so scan in top bits
916
    return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
917
  } else {
918
    return FindLSBSetNonZero(bottombits);
919
  }
920
}
921
#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
922
923
#endif  // End portable versions.
924
925
// Variable-length integer encoding.
926
class Bignum {
927
 public:
928
  // Maximum lengths of bignum encoding of uint32.
929
  static const int kMax32 = 5;
930
931
  // Attempts to parse a bignum32 from a prefix of the bytes in [ptr,limit-1].
932
  // Never reads a character at or beyond limit.  If a valid/terminated bignum32
933
  // was found in the range, stores it in *OUTPUT and returns a pointer just
934
  // past the last byte of the bignum32. Else returns NULL.  On success,
935
  // "result <= limit".
936
  static const char* Parse32WithLimit(const char* ptr, const char* limit,
937
                                      uint32* OUTPUT);
938
939
  // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
940
  // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
941
  //            byte just past the last encoded byte.
942
  static char* Encode32(char* ptr, uint32 v);
943
944
  // EFFECTS    Appends the bignum representation of "value" to "*s".
945
  static void Append32(string* s, uint32 value);
946
};
947
948
inline const char* Bignum::Parse32WithLimit(const char* p,
949
                                            const char* l,
950
                                            uint32* OUTPUT) {
951
  const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
952
  const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
953
  uint32 b, result;
954
  if (ptr >= limit) return NULL;
955
  b = *(ptr++); result = b & 127;          if (b < 128) goto done;
956
  if (ptr >= limit) return NULL;
957
  b = *(ptr++); result |= (b & 127) <<  7; if (b < 128) goto done;
958
  if (ptr >= limit) return NULL;
959
  b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
960
  if (ptr >= limit) return NULL;
961
  b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
962
  if (ptr >= limit) return NULL;
963
  b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
964
  return NULL;       // Value is too long to be a bignum32
965
 done:
966
  *OUTPUT = result;
967
  return reinterpret_cast<const char*>(ptr);
968
}
969
970
inline char* Bignum::Encode32(char* sptr, uint32 v) {
971
  // Operate on characters as unsigneds
972
  unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
973
  static const int B = 128;
974
  if (v < (1<<7)) {
975
    *(ptr++) = v;
976
  } else if (v < (1<<14)) {
977
    *(ptr++) = v | B;
978
    *(ptr++) = v>>7;
979
  } else if (v < (1<<21)) {
980
    *(ptr++) = v | B;
981
    *(ptr++) = (v>>7) | B;
982
    *(ptr++) = v>>14;
983
  } else if (v < (1<<28)) {
984
    *(ptr++) = v | B;
985
    *(ptr++) = (v>>7) | B;
986
    *(ptr++) = (v>>14) | B;
987
    *(ptr++) = v>>21;
988
  } else {
989
    *(ptr++) = v | B;
990
    *(ptr++) = (v>>7) | B;
991
    *(ptr++) = (v>>14) | B;
992
    *(ptr++) = (v>>21) | B;
993
    *(ptr++) = v>>28;
994
  }
995
  return reinterpret_cast<char*>(ptr);
996
}
997
998
// If you know the internal layout of the std::string in use, you can
999
// replace this function with one that resizes the string without
1000
// filling the new space with zeros (if applicable) --
1001
// it will be non-portable but faster.
1002
inline void STLStringResizeUninitialized(string* s, size_t new_size) {
1003
  s->resize(new_size);
1004
}
1005
1006
// Return a mutable char* pointing to a string's internal buffer,
1007
// which may not be null-terminated. Writing through this pointer will
1008
// modify the string.
1009
//
1010
// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
1011
// next call to a string method that invalidates iterators.
1012
//
1013
// As of 2006-04, there is no standard-blessed way of getting a
1014
// mutable reference to a string's internal buffer. However, issue 530
1015
// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
1016
// proposes this as the method. It will officially be part of the standard
1017
// for C++0x. This should already work on all current implementations.
1018
inline char* string_as_array(string* str) {
1019
  return str->empty() ? NULL : &*str->begin();
1020
}
1021
1022
}  // namespace duckdb_snappy
1023
1024
#endif  // #if SNAPPY_NEW_VERSION # else
1025
1026
#endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_