Coverage Report

Created: 2025-06-22 06:56

/src/util-linux/include/xxhash.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * SPDX-License-Identifier: BSD-2-Clause
3
 *
4
 * xxHash - Extremely Fast Hash algorithm
5
 * Header File
6
 * Copyright (C) 2012-2020 Yann Collet
7
 *
8
 * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
9
 *
10
 * Redistribution and use in source and binary forms, with or without
11
 * modification, are permitted provided that the following conditions are
12
 * met:
13
 *
14
 *    * Redistributions of source code must retain the above copyright
15
 *      notice, this list of conditions and the following disclaimer.
16
 *    * Redistributions in binary form must reproduce the above
17
 *      copyright notice, this list of conditions and the following disclaimer
18
 *      in the documentation and/or other materials provided with the
19
 *      distribution.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
 *
33
 * You can contact the author at:
34
 *   - xxHash homepage: https://www.xxhash.com
35
 *   - xxHash source repository: https://github.com/Cyan4973/xxHash
36
 */
37
/*!
38
 * @mainpage xxHash
39
 *
40
 * @file xxhash.h
41
 * xxHash prototypes and implementation
42
 */
43
/* TODO: update */
44
/* Notice extracted from xxHash homepage:
45
46
xxHash is an extremely fast hash algorithm, running at RAM speed limits.
47
It also successfully passes all tests from the SMHasher suite.
48
49
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
50
51
Name            Speed       Q.Score   Author
52
xxHash          5.4 GB/s     10
53
CrapWow         3.2 GB/s      2       Andrew
54
MurmurHash 3a   2.7 GB/s     10       Austin Appleby
55
SpookyHash      2.0 GB/s     10       Bob Jenkins
56
SBox            1.4 GB/s      9       Bret Mulvey
57
Lookup3         1.2 GB/s      9       Bob Jenkins
58
SuperFastHash   1.2 GB/s      1       Paul Hsieh
59
CityHash64      1.05 GB/s    10       Pike & Alakuijala
60
FNV             0.55 GB/s     5       Fowler, Noll, Vo
61
CRC32           0.43 GB/s     9
62
MD5-32          0.33 GB/s    10       Ronald L. Rivest
63
SHA1-32         0.28 GB/s    10
64
65
Q.Score is a measure of quality of the hash function.
66
It depends on successfully passing SMHasher test set.
67
10 is a perfect score.
68
69
Note: SMHasher's CRC32 implementation is not the fastest one.
70
Other speed-oriented implementations can be faster,
71
especially in combination with PCLMUL instruction:
72
https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
73
74
A 64-bit version, named XXH64, is available since r35.
75
It offers much better speed, but for 64-bit applications only.
76
Name     Speed on 64 bits    Speed on 32 bits
77
XXH64       13.8 GB/s            1.9 GB/s
78
XXH32        6.8 GB/s            6.0 GB/s
79
*/
80
81
/* util-linux customizations */
82
#define XXH_NO_XXH3
83
#define XXH_NAMESPACE ul_
84
85
#if defined (__cplusplus)
86
extern "C" {
87
#endif
88
89
/* ****************************
90
 *  INLINE mode
91
 ******************************/
92
/*!
93
 * XXH_INLINE_ALL (and XXH_PRIVATE_API)
94
 * Use these build macros to inline xxhash into the target unit.
95
 * Inlining improves performance on small inputs, especially when the length is
96
 * expressed as a compile-time constant:
97
 *
98
 *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
99
 *
100
 * It also keeps xxHash symbols private to the unit, so they are not exported.
101
 *
102
 * Usage:
103
 *     #define XXH_INLINE_ALL
104
 *     #include "xxhash.h"
105
 *
106
 * Do not compile and link xxhash.o as a separate object, as it is not useful.
107
 */
108
#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
109
    && !defined(XXH_INLINE_ALL_31684351384)
110
   /* this section should be traversed only once */
111
#  define XXH_INLINE_ALL_31684351384
112
   /* give access to the advanced API, required to compile implementations */
113
#  undef XXH_STATIC_LINKING_ONLY   /* avoid macro redef */
114
#  define XXH_STATIC_LINKING_ONLY
115
   /* make all functions private */
116
#  undef XXH_PUBLIC_API
117
#  if defined(__GNUC__)
118
#    define XXH_PUBLIC_API static __inline __attribute__((unused))
119
#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
120
#    define XXH_PUBLIC_API static inline
121
#  elif defined(_MSC_VER)
122
#    define XXH_PUBLIC_API static __inline
123
#  else
124
     /* note: this version may generate warnings for unused static functions */
125
#    define XXH_PUBLIC_API static
126
#  endif
127
128
   /*
129
    * This part deals with the special case where a unit wants to inline xxHash,
130
    * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
131
    * such as part of some previously included *.h header file.
132
    * Without further action, the new include would just be ignored,
133
    * and functions would effectively _not_ be inlined (silent failure).
134
    * The following macros solve this situation by prefixing all inlined names,
135
    * avoiding naming collision with previous inclusions.
136
    */
137
   /* Before that, we unconditionally #undef all symbols,
138
    * in case they were already defined with XXH_NAMESPACE.
139
    * They will then be redefined for XXH_INLINE_ALL
140
    */
141
#  undef XXH_versionNumber
142
    /* XXH32 */
143
#  undef XXH32
144
#  undef XXH32_createState
145
#  undef XXH32_freeState
146
#  undef XXH32_reset
147
#  undef XXH32_update
148
#  undef XXH32_digest
149
#  undef XXH32_copyState
150
#  undef XXH32_canonicalFromHash
151
#  undef XXH32_hashFromCanonical
152
    /* XXH64 */
153
#  undef XXH64
154
#  undef XXH64_createState
155
#  undef XXH64_freeState
156
#  undef XXH64_reset
157
#  undef XXH64_update
158
#  undef XXH64_digest
159
#  undef XXH64_copyState
160
#  undef XXH64_canonicalFromHash
161
#  undef XXH64_hashFromCanonical
162
    /* XXH3_64bits */
163
#  undef XXH3_64bits
164
#  undef XXH3_64bits_withSecret
165
#  undef XXH3_64bits_withSeed
166
#  undef XXH3_64bits_withSecretandSeed
167
#  undef XXH3_createState
168
#  undef XXH3_freeState
169
#  undef XXH3_copyState
170
#  undef XXH3_64bits_reset
171
#  undef XXH3_64bits_reset_withSeed
172
#  undef XXH3_64bits_reset_withSecret
173
#  undef XXH3_64bits_update
174
#  undef XXH3_64bits_digest
175
#  undef XXH3_generateSecret
176
    /* XXH3_128bits */
177
#  undef XXH128
178
#  undef XXH3_128bits
179
#  undef XXH3_128bits_withSeed
180
#  undef XXH3_128bits_withSecret
181
#  undef XXH3_128bits_reset
182
#  undef XXH3_128bits_reset_withSeed
183
#  undef XXH3_128bits_reset_withSecret
184
#  undef XXH3_128bits_reset_withSecretandSeed
185
#  undef XXH3_128bits_update
186
#  undef XXH3_128bits_digest
187
#  undef XXH128_isEqual
188
#  undef XXH128_cmp
189
#  undef XXH128_canonicalFromHash
190
#  undef XXH128_hashFromCanonical
191
    /* Finally, free the namespace itself */
192
#  undef XXH_NAMESPACE
193
194
    /* employ the namespace for XXH_INLINE_ALL */
195
#  define XXH_NAMESPACE XXH_INLINE_
196
   /*
197
    * Some identifiers (enums, type names) are not symbols,
198
    * but they must nonetheless be renamed to avoid redeclaration.
199
    * Alternative solution: do not redeclare them.
200
    * However, this requires some #ifdefs, and has a more dispersed impact.
201
    * Meanwhile, renaming can be achieved in a single place.
202
    */
203
#  define XXH_IPREF(Id)   XXH_NAMESPACE ## Id
204
#  define XXH_OK XXH_IPREF(XXH_OK)
205
#  define XXH_ERROR XXH_IPREF(XXH_ERROR)
206
#  define XXH_errorcode XXH_IPREF(XXH_errorcode)
207
#  define XXH32_canonical_t  XXH_IPREF(XXH32_canonical_t)
208
#  define XXH64_canonical_t  XXH_IPREF(XXH64_canonical_t)
209
#  define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
210
#  define XXH32_state_s XXH_IPREF(XXH32_state_s)
211
#  define XXH32_state_t XXH_IPREF(XXH32_state_t)
212
#  define XXH64_state_s XXH_IPREF(XXH64_state_s)
213
#  define XXH64_state_t XXH_IPREF(XXH64_state_t)
214
#  define XXH3_state_s  XXH_IPREF(XXH3_state_s)
215
#  define XXH3_state_t  XXH_IPREF(XXH3_state_t)
216
#  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
217
   /* Ensure the header is parsed again, even if it was previously included */
218
#  undef XXHASH_H_5627135585666179
219
#  undef XXHASH_H_STATIC_13879238742
220
#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
221
222
223
224
/* ****************************************************************
225
 *  Stable API
226
 *****************************************************************/
227
#ifndef XXHASH_H_5627135585666179
228
#define XXHASH_H_5627135585666179 1
229
230
231
/*!
232
 * @defgroup public Public API
233
 * Contains details on the public xxHash functions.
234
 * @{
235
 */
236
/* specific declaration modes for Windows */
237
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
238
#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
239
#    ifdef XXH_EXPORT
240
#      define XXH_PUBLIC_API __declspec(dllexport)
241
#    elif XXH_IMPORT
242
#      define XXH_PUBLIC_API __declspec(dllimport)
243
#    endif
244
#  else
245
#    define XXH_PUBLIC_API   /* do nothing */
246
#  endif
247
#endif
248
249
#ifdef XXH_DOXYGEN
250
/*!
251
 * @brief Emulate a namespace by transparently prefixing all symbols.
252
 *
253
 * If you want to include _and expose_ xxHash functions from within your own
254
 * library, but also want to avoid symbol collisions with other libraries which
255
 * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
256
 * any public symbol from xxhash library with the value of XXH_NAMESPACE
257
 * (therefore, avoid empty or numeric values).
258
 *
259
 * Note that no change is required within the calling program as long as it
260
 * includes `xxhash.h`: Regular symbol names will be automatically translated
261
 * by this header.
262
 */
263
#  define XXH_NAMESPACE /* YOUR NAME HERE */
264
#  undef XXH_NAMESPACE
265
#endif
266
267
#ifdef XXH_NAMESPACE
268
24
#  define XXH_CAT(A,B) A##B
269
24
#  define XXH_NAME2(A,B) XXH_CAT(A,B)
270
#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
271
/* XXH32 */
272
#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
273
#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
274
#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
275
#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
276
#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
277
#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
278
#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
279
#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
280
#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
281
/* XXH64 */
282
24
#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
283
#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
284
#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
285
#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
286
#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
287
#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
288
#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
289
#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
290
#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
291
/* XXH3_64bits */
292
#  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
293
#  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
294
#  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
295
#  define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
296
#  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
297
#  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
298
#  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
299
#  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
300
#  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
301
#  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
302
#  define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
303
#  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
304
#  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
305
#  define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
306
#  define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
307
/* XXH3_128bits */
308
#  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
309
#  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
310
#  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
311
#  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
312
#  define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
313
#  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
314
#  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
315
#  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
316
#  define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
317
#  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
318
#  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
319
#  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
320
#  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
321
#  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
322
#  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
323
#endif
324
325
326
/* *************************************
327
*  Version
328
***************************************/
329
0
#define XXH_VERSION_MAJOR    0
330
0
#define XXH_VERSION_MINOR    8
331
0
#define XXH_VERSION_RELEASE  1
332
0
#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
333
334
/*!
335
 * @brief Obtains the xxHash version.
336
 *
337
 * This is mostly useful when xxHash is compiled as a shared library,
338
 * since the returned value comes from the library, as opposed to header file.
339
 *
340
 * @return `XXH_VERSION_NUMBER` of the invoked library.
341
 */
342
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
343
344
345
/* ****************************
346
*  Common basic types
347
******************************/
348
#include <stddef.h>   /* size_t */
349
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
350
351
352
/*-**********************************************************************
353
*  32-bit hash
354
************************************************************************/
355
#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
356
/*!
357
 * @brief An unsigned 32-bit integer.
358
 *
359
 * Not necessarily defined to `uint32_t` but functionally equivalent.
360
 */
361
typedef uint32_t XXH32_hash_t;
362
363
#elif !defined (__VMS) \
364
  && (defined (__cplusplus) \
365
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
366
#   include <stdint.h>
367
    typedef uint32_t XXH32_hash_t;
368
369
#else
370
#   include <limits.h>
371
#   if UINT_MAX == 0xFFFFFFFFUL
372
      typedef unsigned int XXH32_hash_t;
373
#   else
374
#     if ULONG_MAX == 0xFFFFFFFFUL
375
        typedef unsigned long XXH32_hash_t;
376
#     else
377
#       error "unsupported platform: need a 32-bit type"
378
#     endif
379
#   endif
380
#endif
381
382
/*!
383
 * @}
384
 *
385
 * @defgroup xxh32_family XXH32 family
386
 * @ingroup public
387
 * Contains functions used in the classic 32-bit xxHash algorithm.
388
 *
389
 * @note
390
 *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
391
 *   Note that @ref xxh3_family provides competitive speed
392
 *   for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
393
 *
394
 * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
395
 * @see @ref xxh32_impl for implementation details
396
 * @{
397
 */
398
399
/*!
400
 * @brief Calculates the 32-bit hash of @p input using xxHash32.
401
 *
402
 * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
403
 *
404
 * @param input The block of data to be hashed, at least @p length bytes in size.
405
 * @param length The length of @p input, in bytes.
406
 * @param seed The 32-bit seed to alter the hash's output predictably.
407
 *
408
 * @pre
409
 *   The memory between @p input and @p input + @p length must be valid,
410
 *   readable, contiguous memory. However, if @p length is `0`, @p input may be
411
 *   `NULL`. In C++, this also must be *TriviallyCopyable*.
412
 *
413
 * @return The calculated 32-bit hash value.
414
 *
415
 * @see
416
 *    XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
417
 *    Direct equivalents for the other variants of xxHash.
418
 * @see
419
 *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
420
 */
421
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
422
423
/*!
424
 * Streaming functions generate the xxHash value from an incremental input.
425
 * This method is slower than single-call functions, due to state management.
426
 * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
427
 *
428
 * An XXH state must first be allocated using `XXH*_createState()`.
429
 *
430
 * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
431
 *
432
 * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
433
 *
434
 * The function returns an error code, with 0 meaning OK, and any other value
435
 * meaning there is an error.
436
 *
437
 * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
438
 * This function returns the nn-bits hash as an int or long long.
439
 *
440
 * It's still possible to continue inserting input into the hash state after a
441
 * digest, and generate new hash values later on by invoking `XXH*_digest()`.
442
 *
443
 * When done, release the state using `XXH*_freeState()`.
444
 *
445
 * Example code for incrementally hashing a file:
446
 * @code{.c}
447
 *    #include <stdio.h>
448
 *    #include <xxhash.h>
449
 *    #define BUFFER_SIZE 256
450
 *
451
 *    // Note: XXH64 and XXH3 use the same interface.
452
 *    XXH32_hash_t
453
 *    hashFile(FILE* stream)
454
 *    {
455
 *        XXH32_state_t* state;
456
 *        unsigned char buf[BUFFER_SIZE];
457
 *        size_t amt;
458
 *        XXH32_hash_t hash;
459
 *
460
 *        state = XXH32_createState();       // Create a state
461
 *        assert(state != NULL);             // Error check here
462
 *        XXH32_reset(state, 0xbaad5eed);    // Reset state with our seed
463
 *        while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
464
 *            XXH32_update(state, buf, amt); // Hash the file in chunks
465
 *        }
466
 *        hash = XXH32_digest(state);        // Finalize the hash
467
 *        XXH32_freeState(state);            // Clean up
468
 *        return hash;
469
 *    }
470
 * @endcode
471
 */
472
473
/*!
474
 * @typedef struct XXH32_state_s XXH32_state_t
475
 * @brief The opaque state struct for the XXH32 streaming API.
476
 *
477
 * @see XXH32_state_s for details.
478
 */
479
typedef struct XXH32_state_s XXH32_state_t;
480
481
/*!
482
 * @brief Allocates an @ref XXH32_state_t.
483
 *
484
 * Must be freed with XXH32_freeState().
485
 * @return An allocated XXH32_state_t on success, `NULL` on failure.
486
 */
487
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
488
/*!
489
 * @brief Frees an @ref XXH32_state_t.
490
 *
491
 * Must be allocated with XXH32_createState().
492
 * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
493
 * @return XXH_OK.
494
 */
495
XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
496
/*!
497
 * @brief Copies one @ref XXH32_state_t to another.
498
 *
499
 * @param dst_state The state to copy to.
500
 * @param src_state The state to copy from.
501
 * @pre
502
 *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
503
 */
504
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
505
506
/*!
507
 * @brief Resets an @ref XXH32_state_t to begin a new hash.
508
 *
509
 * This function resets and seeds a state. Call it before @ref XXH32_update().
510
 *
511
 * @param statePtr The state struct to reset.
512
 * @param seed The 32-bit seed to alter the hash result predictably.
513
 *
514
 * @pre
515
 *   @p statePtr must not be `NULL`.
516
 *
517
 * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
518
 */
519
XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
520
521
/*!
522
 * @brief Consumes a block of @p input to an @ref XXH32_state_t.
523
 *
524
 * Call this to incrementally consume blocks of data.
525
 *
526
 * @param statePtr The state struct to update.
527
 * @param input The block of data to be hashed, at least @p length bytes in size.
528
 * @param length The length of @p input, in bytes.
529
 *
530
 * @pre
531
 *   @p statePtr must not be `NULL`.
532
 * @pre
533
 *   The memory between @p input and @p input + @p length must be valid,
534
 *   readable, contiguous memory. However, if @p length is `0`, @p input may be
535
 *   `NULL`. In C++, this also must be *TriviallyCopyable*.
536
 *
537
 * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
538
 */
539
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
540
541
/*!
542
 * @brief Returns the calculated hash value from an @ref XXH32_state_t.
543
 *
544
 * @note
545
 *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
546
 *   digest, and update again.
547
 *
548
 * @param statePtr The state struct to calculate the hash from.
549
 *
550
 * @pre
551
 *  @p statePtr must not be `NULL`.
552
 *
553
 * @return The calculated xxHash32 value from that state.
554
 */
555
XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
556
557
/*******   Canonical representation   *******/
558
559
/*
560
 * The default return values from XXH functions are unsigned 32 and 64 bit
561
 * integers.
562
 * This the simplest and fastest format for further post-processing.
563
 *
564
 * However, this leaves open the question of what is the order on the byte level,
565
 * since little and big endian conventions will store the same number differently.
566
 *
567
 * The canonical representation settles this issue by mandating big-endian
568
 * convention, the same convention as human-readable numbers (large digits first).
569
 *
570
 * When writing hash values to storage, sending them over a network, or printing
571
 * them, it's highly recommended to use the canonical representation to ensure
572
 * portability across a wider range of systems, present and future.
573
 *
574
 * The following functions allow transformation of hash values to and from
575
 * canonical format.
576
 */
577
578
/*!
579
 * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
580
 */
581
typedef struct {
582
    unsigned char digest[4]; /*!< Hash bytes, big endian */
583
} XXH32_canonical_t;
584
585
/*!
586
 * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
587
 *
588
 * @param dst The @ref XXH32_canonical_t pointer to be stored to.
589
 * @param hash The @ref XXH32_hash_t to be converted.
590
 *
591
 * @pre
592
 *   @p dst must not be `NULL`.
593
 */
594
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
595
596
/*!
597
 * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
598
 *
599
 * @param src The @ref XXH32_canonical_t to convert.
600
 *
601
 * @pre
602
 *   @p src must not be `NULL`.
603
 *
604
 * @return The converted hash.
605
 */
606
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
607
608
/* Reuse FALLTHROUGH macro from c.h */
609
#include "c.h"
610
611
0
#define XXH_FALLTHROUGH FALLTHROUGH
612
613
/*!
614
 * @}
615
 * @ingroup public
616
 * @{
617
 */
618
619
#ifndef XXH_NO_LONG_LONG
620
/*-**********************************************************************
621
*  64-bit hash
622
************************************************************************/
623
#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
624
/*!
625
 * @brief An unsigned 64-bit integer.
626
 *
627
 * Not necessarily defined to `uint64_t` but functionally equivalent.
628
 */
629
typedef uint64_t XXH64_hash_t;
630
#elif !defined (__VMS) \
631
  && (defined (__cplusplus) \
632
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
633
#  include <stdint.h>
634
   typedef uint64_t XXH64_hash_t;
635
#else
636
#  include <limits.h>
637
#  if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
638
     /* LP64 ABI says uint64_t is unsigned long */
639
     typedef unsigned long XXH64_hash_t;
640
#  else
641
     /* the following type must have a width of 64-bit */
642
     typedef unsigned long long XXH64_hash_t;
643
#  endif
644
#endif
645
646
/*!
647
 * @}
648
 *
649
 * @defgroup xxh64_family XXH64 family
650
 * @ingroup public
651
 * @{
652
 * Contains functions used in the classic 64-bit xxHash algorithm.
653
 *
654
 * @note
655
 *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
656
 *   and offers true 64/128 bit hash results.
657
 *   It provides better speed for systems with vector processing capabilities.
658
 */
659
660
661
/*!
662
 * @brief Calculates the 64-bit hash of @p input using xxHash64.
663
 *
664
 * This function usually runs faster on 64-bit systems, but slower on 32-bit
665
 * systems (see benchmark).
666
 *
667
 * @param input The block of data to be hashed, at least @p length bytes in size.
668
 * @param length The length of @p input, in bytes.
669
 * @param seed The 64-bit seed to alter the hash's output predictably.
670
 *
671
 * @pre
672
 *   The memory between @p input and @p input + @p length must be valid,
673
 *   readable, contiguous memory. However, if @p length is `0`, @p input may be
674
 *   `NULL`. In C++, this also must be *TriviallyCopyable*.
675
 *
676
 * @return The calculated 64-bit hash.
677
 *
678
 * @see
679
 *    XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
680
 *    Direct equivalents for the other variants of xxHash.
681
 * @see
682
 *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
683
 */
684
XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
685
686
/*******   Streaming   *******/
687
/*!
688
 * @brief The opaque state struct for the XXH64 streaming API.
689
 *
690
 * @see XXH64_state_s for details.
691
 */
692
typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
693
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
694
XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
695
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
696
697
XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
698
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
699
XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
700
701
/*******   Canonical representation   *******/
702
typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
703
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
704
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
705
706
/*!
707
 * @}
708
 * ************************************************************************
709
 * @defgroup xxh3_family XXH3 family
710
 * @ingroup public
711
 * @{
712
 *
713
 * XXH3 is a more recent hash algorithm featuring:
714
 *  - Improved speed for both small and large inputs
715
 *  - True 64-bit and 128-bit outputs
716
 *  - SIMD acceleration
717
 *  - Improved 32-bit viability
718
 *
719
 * Speed analysis methodology is explained here:
720
 *
721
 *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
722
 *
723
 * Compared to XXH64, expect XXH3 to run approximately
724
 * ~2x faster on large inputs and >3x faster on small ones,
725
 * exact differences vary depending on platform.
726
 *
727
 * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
728
 * but does not require it.
729
 * Any 32-bit and 64-bit targets that can run XXH32 smoothly
730
 * can run XXH3 at competitive speeds, even without vector support.
731
 * Further details are explained in the implementation.
732
 *
733
 * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
734
 * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
735
 *
736
 * XXH3 implementation is portable:
737
 * it has a generic C90 formulation that can be compiled on any platform,
738
 * all implementations generate exactly the same hash value on all platforms.
739
 * Starting from v0.8.0, it's also labelled "stable", meaning that
740
 * any future version will also generate the same hash value.
741
 *
742
 * XXH3 offers 2 variants, _64bits and _128bits.
743
 *
744
 * When only 64 bits are needed, prefer invoking the _64bits variant, as it
745
 * reduces the amount of mixing, resulting in faster speed on small inputs.
746
 * It's also generally simpler to manipulate a scalar return type than a struct.
747
 *
748
 * The API supports one-shot hashing, streaming mode, and custom secrets.
749
 */
750
751
/*-**********************************************************************
752
*  XXH3 64-bit variant
753
************************************************************************/
754
755
/* XXH3_64bits():
756
 * default 64-bit variant, using default secret and default seed of 0.
757
 * It's the fastest variant. */
758
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
759
760
/*
761
 * XXH3_64bits_withSeed():
762
 * This variant generates a custom secret on the fly
763
 * based on default secret altered using the `seed` value.
764
 * While this operation is decently fast, note that it's not completely free.
765
 * Note: seed==0 produces the same results as XXH3_64bits().
766
 */
767
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
768
769
/*!
770
 * The bare minimum size for a custom secret.
771
 *
772
 * @see
773
 *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
774
 *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
775
 */
776
#define XXH3_SECRET_SIZE_MIN 136
777
778
/*
779
 * XXH3_64bits_withSecret():
780
 * It's possible to provide any blob of bytes as a "secret" to generate the hash.
781
 * This makes it more difficult for an external actor to prepare an intentional collision.
782
 * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
783
 * However, the quality of the secret impacts the dispersion of the hash algorithm.
784
 * Therefore, the secret _must_ look like a bunch of random bytes.
785
 * Avoid "trivial" or structured data such as repeated sequences or a text document.
786
 * Whenever in doubt about the "randomness" of the blob of bytes,
787
 * consider employing "XXH3_generateSecret()" instead (see below).
788
 * It will generate a proper high entropy secret derived from the blob of bytes.
789
 * Another advantage of using XXH3_generateSecret() is that
790
 * it guarantees that all bits within the initial blob of bytes
791
 * will impact every bit of the output.
792
 * This is not necessarily the case when using the blob of bytes directly
793
 * because, when hashing _small_ inputs, only a portion of the secret is employed.
794
 */
795
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
796
797
798
/*******   Streaming   *******/
799
/*
800
 * Streaming requires state maintenance.
801
 * This operation costs memory and CPU.
802
 * As a consequence, streaming is slower than one-shot hashing.
803
 * For better performance, prefer one-shot functions whenever applicable.
804
 */
805
806
/*!
807
 * @brief The state struct for the XXH3 streaming API.
808
 *
809
 * @see XXH3_state_s for details.
810
 */
811
typedef struct XXH3_state_s XXH3_state_t;
812
XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
813
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
814
XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
815
816
/*
817
 * XXH3_64bits_reset():
818
 * Initialize with default parameters.
819
 * digest will be equivalent to `XXH3_64bits()`.
820
 */
821
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
822
/*
823
 * XXH3_64bits_reset_withSeed():
824
 * Generate a custom secret from `seed`, and store it into `statePtr`.
825
 * digest will be equivalent to `XXH3_64bits_withSeed()`.
826
 */
827
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
828
/*
829
 * XXH3_64bits_reset_withSecret():
830
 * `secret` is referenced, it _must outlive_ the hash streaming session.
831
 * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
832
 * and the quality of produced hash values depends on secret's entropy
833
 * (secret's content should look like a bunch of random bytes).
834
 * When in doubt about the randomness of a candidate `secret`,
835
 * consider employing `XXH3_generateSecret()` instead (see below).
836
 */
837
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
838
839
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
840
XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
841
842
/* note : canonical representation of XXH3 is the same as XXH64
843
 * since they both produce XXH64_hash_t values */
844
845
846
/*-**********************************************************************
847
*  XXH3 128-bit variant
848
************************************************************************/
849
850
/*!
851
 * @brief The return value from 128-bit hashes.
852
 *
853
 * Stored in little endian order, although the fields themselves are in native
854
 * endianness.
855
 */
856
typedef struct {
857
    XXH64_hash_t low64;   /*!< `value & 0xFFFFFFFFFFFFFFFF` */
858
    XXH64_hash_t high64;  /*!< `value >> 64` */
859
} XXH128_hash_t;
860
861
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
862
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
863
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
864
865
/*******   Streaming   *******/
866
/*
867
 * Streaming requires state maintenance.
868
 * This operation costs memory and CPU.
869
 * As a consequence, streaming is slower than one-shot hashing.
870
 * For better performance, prefer one-shot functions whenever applicable.
871
 *
872
 * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
873
 * Use already declared XXH3_createState() and XXH3_freeState().
874
 *
875
 * All reset and streaming functions have same meaning as their 64-bit counterpart.
876
 */
877
878
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
879
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
880
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
881
882
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
883
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
884
885
/* Following helper functions make it possible to compare XXH128_hast_t values.
886
 * Since XXH128_hash_t is a structure, this capability is not offered by the language.
887
 * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
888
889
/*!
890
 * XXH128_isEqual():
891
 * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
892
 */
893
XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
894
895
/*!
896
 * XXH128_cmp():
897
 *
898
 * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
899
 *
900
 * return: >0 if *h128_1  > *h128_2
901
 *         =0 if *h128_1 == *h128_2
902
 *         <0 if *h128_1  < *h128_2
903
 */
904
XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
905
906
907
/*******   Canonical representation   *******/
908
typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
909
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
910
XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
911
912
913
#endif  /* XXH_NO_LONG_LONG */
914
915
/*!
916
 * @}
917
 */
918
#endif /* XXHASH_H_5627135585666179 */
919
920
921
922
#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
923
#define XXHASH_H_STATIC_13879238742
924
/* ****************************************************************************
925
 * This section contains declarations which are not guaranteed to remain stable.
926
 * They may change in future versions, becoming incompatible with a different
927
 * version of the library.
928
 * These declarations should only be used with static linking.
929
 * Never use them in association with dynamic linking!
930
 ***************************************************************************** */
931
932
/*
933
 * These definitions are only present to allow static allocation
934
 * of XXH states, on stack or in a struct, for example.
935
 * Never **ever** access their members directly.
936
 */
937
938
/*!
939
 * @internal
940
 * @brief Structure for XXH32 streaming API.
941
 *
942
 * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
943
 * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
944
 * an opaque type. This allows fields to safely be changed.
945
 *
946
 * Typedef'd to @ref XXH32_state_t.
947
 * Do not access the members of this struct directly.
948
 * @see XXH64_state_s, XXH3_state_s
949
 */
950
struct XXH32_state_s {
951
   XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
952
   XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
953
   XXH32_hash_t v[4];         /*!< Accumulator lanes */
954
   XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
955
   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem32 */
956
   XXH32_hash_t reserved;     /*!< Reserved field. Do not read or write to it, it may be removed. */
957
};   /* typedef'd to XXH32_state_t */
958
959
960
#ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
961
962
/*!
963
 * @internal
964
 * @brief Structure for XXH64 streaming API.
965
 *
966
 * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
967
 * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
968
 * an opaque type. This allows fields to safely be changed.
969
 *
970
 * Typedef'd to @ref XXH64_state_t.
971
 * Do not access the members of this struct directly.
972
 * @see XXH32_state_s, XXH3_state_s
973
 */
974
struct XXH64_state_s {
975
   XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */
976
   XXH64_hash_t v[4];         /*!< Accumulator lanes */
977
   XXH64_hash_t mem64[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
978
   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem64 */
979
   XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/
980
   XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it, it may be removed. */
981
};   /* typedef'd to XXH64_state_t */
982
983
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
984
#  include <stdalign.h>
985
#  define XXH_ALIGN(n)      alignas(n)
986
#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
987
/* In C++ alignas() is a keyword */
988
#  define XXH_ALIGN(n)      alignas(n)
989
#elif defined(__GNUC__)
990
#  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
991
#elif defined(_MSC_VER)
992
#  define XXH_ALIGN(n)      __declspec(align(n))
993
#else
994
#  define XXH_ALIGN(n)   /* disabled */
995
#endif
996
997
/* Old GCC versions only accept the attribute after the type in structures. */
998
#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \
999
    && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
1000
    && defined(__GNUC__)
1001
#   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
1002
#else
1003
#   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
1004
#endif
1005
1006
/*!
1007
 * @brief The size of the internal XXH3 buffer.
1008
 *
1009
 * This is the optimal update size for incremental hashing.
1010
 *
1011
 * @see XXH3_64b_update(), XXH3_128b_update().
1012
 */
1013
#define XXH3_INTERNALBUFFER_SIZE 256
1014
1015
/*!
1016
 * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
1017
 *
1018
 * This is the size used in @ref XXH3_kSecret and the seeded functions.
1019
 *
1020
 * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
1021
 */
1022
#define XXH3_SECRET_DEFAULT_SIZE 192
1023
1024
/*!
1025
 * @internal
1026
 * @brief Structure for XXH3 streaming API.
1027
 *
1028
 * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
1029
 * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
1030
 * Otherwise it is an opaque type.
1031
 * Never use this definition in combination with dynamic library.
1032
 * This allows fields to safely be changed in the future.
1033
 *
1034
 * @note ** This structure has a strict alignment requirement of 64 bytes!! **
1035
 * Do not allocate this with `malloc()` or `new`,
1036
 * it will not be sufficiently aligned.
1037
 * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
1038
 *
1039
 * Typedef'd to @ref XXH3_state_t.
1040
 * Do never access the members of this struct directly.
1041
 *
1042
 * @see XXH3_INITSTATE() for stack initialization.
1043
 * @see XXH3_createState(), XXH3_freeState().
1044
 * @see XXH32_state_s, XXH64_state_s
1045
 */
1046
struct XXH3_state_s {
1047
   XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
1048
       /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
1049
   XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
1050
       /*!< Used to store a custom secret generated from a seed. */
1051
   XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
1052
       /*!< The internal buffer. @see XXH32_state_s::mem32 */
1053
   XXH32_hash_t bufferedSize;
1054
       /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
1055
   XXH32_hash_t useSeed;
1056
       /*!< Reserved field. Needed for padding on 64-bit. */
1057
   size_t nbStripesSoFar;
1058
       /*!< Number or stripes processed. */
1059
   XXH64_hash_t totalLen;
1060
       /*!< Total length hashed. 64-bit even on 32-bit targets. */
1061
   size_t nbStripesPerBlock;
1062
       /*!< Number of stripes per block. */
1063
   size_t secretLimit;
1064
       /*!< Size of @ref customSecret or @ref extSecret */
1065
   XXH64_hash_t seed;
1066
       /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
1067
   XXH64_hash_t reserved64;
1068
       /*!< Reserved field. */
1069
   const unsigned char* extSecret;
1070
       /*!< Reference to an external secret for the _withSecret variants, NULL
1071
        *   for other variants. */
1072
   /* note: there may be some padding at the end due to alignment on 64 bytes */
1073
}; /* typedef'd to XXH3_state_t */
1074
1075
#undef XXH_ALIGN_MEMBER
1076
1077
/*!
1078
 * @brief Initializes a stack-allocated `XXH3_state_s`.
1079
 *
1080
 * When the @ref XXH3_state_t structure is merely emplaced on stack,
1081
 * it should be initialized with XXH3_INITSTATE() or a memset()
1082
 * in case its first reset uses XXH3_NNbits_reset_withSeed().
1083
 * This init can be omitted if the first reset uses default or _withSecret mode.
1084
 * This operation isn't necessary when the state is created with XXH3_createState().
1085
 * Note that this doesn't prepare the state for a streaming operation,
1086
 * it's still necessary to use XXH3_NNbits_reset*() afterwards.
1087
 */
1088
#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
1089
1090
1091
/* XXH128() :
1092
 * simple alias to pre-selected XXH3_128bits variant
1093
 */
1094
XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
1095
1096
1097
/* ===   Experimental API   === */
1098
/* Symbols defined below must be considered tied to a specific library version. */
1099
1100
/*
1101
 * XXH3_generateSecret():
1102
 *
1103
 * Derive a high-entropy secret from any user-defined content, named customSeed.
1104
 * The generated secret can be used in combination with `*_withSecret()` functions.
1105
 * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
1106
 * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
1107
 *
1108
 * The function accepts as input a custom seed of any length and any content,
1109
 * and derives from it a high-entropy secret of length @secretSize
1110
 * into an already allocated buffer @secretBuffer.
1111
 * @secretSize must be >= XXH3_SECRET_SIZE_MIN
1112
 *
1113
 * The generated secret can then be used with any `*_withSecret()` variant.
1114
 * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
1115
 * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
1116
 * are part of this list. They all accept a `secret` parameter
1117
 * which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
1118
 * _and_ feature very high entropy (consist of random-looking bytes).
1119
 * These conditions can be a high bar to meet, so
1120
 * XXH3_generateSecret() can be employed to ensure proper quality.
1121
 *
1122
 * customSeed can be anything. It can have any size, even small ones,
1123
 * and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
1124
 * The resulting `secret` will nonetheless provide all required qualities.
1125
 *
1126
 * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
1127
 */
1128
XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
1129
1130
1131
/*
1132
 * XXH3_generateSecret_fromSeed():
1133
 *
1134
 * Generate the same secret as the _withSeed() variants.
1135
 *
1136
 * The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
1137
 * @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
1138
 *
1139
 * The generated secret can be used in combination with
1140
 *`*_withSecret()` and `_withSecretandSeed()` variants.
1141
 * This generator is notably useful in combination with `_withSecretandSeed()`,
1142
 * as a way to emulate a faster `_withSeed()` variant.
1143
 */
1144
XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
1145
1146
/*
1147
 * *_withSecretandSeed() :
1148
 * These variants generate hash values using either
1149
 * @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
1150
 * or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
1151
 *
1152
 * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
1153
 * `_withSeed()` has to generate the secret on the fly for "large" keys.
1154
 * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
1155
 * `_withSecret()` has to generate the masks on the fly for "small" keys,
1156
 * which requires more instructions than _withSeed() variants.
1157
 * Therefore, _withSecretandSeed variant combines the best of both worlds.
1158
 *
1159
 * When @secret has been generated by XXH3_generateSecret_fromSeed(),
1160
 * this variant produces *exactly* the same results as `_withSeed()` variant,
1161
 * hence offering only a pure speed benefit on "large" input,
1162
 * by skipping the need to regenerate the secret for every large input.
1163
 *
1164
 * Another usage scenario is to hash the secret to a 64-bit hash value,
1165
 * for example with XXH3_64bits(), which then becomes the seed,
1166
 * and then employ both the seed and the secret in _withSecretandSeed().
1167
 * On top of speed, an added benefit is that each bit in the secret
1168
 * has a 50% chance to swap each bit in the output,
1169
 * via its impact to the seed.
1170
 * This is not guaranteed when using the secret directly in "small data" scenarios,
1171
 * because only portions of the secret are employed for small data.
1172
 */
1173
XXH_PUBLIC_API XXH64_hash_t
1174
XXH3_64bits_withSecretandSeed(const void* data, size_t len,
1175
                              const void* secret, size_t secretSize,
1176
                              XXH64_hash_t seed);
1177
1178
XXH_PUBLIC_API XXH128_hash_t
1179
XXH3_128bits_withSecretandSeed(const void* data, size_t len,
1180
                               const void* secret, size_t secretSize,
1181
                               XXH64_hash_t seed64);
1182
1183
XXH_PUBLIC_API XXH_errorcode
1184
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
1185
                                    const void* secret, size_t secretSize,
1186
                                    XXH64_hash_t seed64);
1187
1188
XXH_PUBLIC_API XXH_errorcode
1189
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
1190
                                     const void* secret, size_t secretSize,
1191
                                     XXH64_hash_t seed64);
1192
1193
1194
#endif  /* XXH_NO_LONG_LONG */
1195
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
1196
#  define XXH_IMPLEMENTATION
1197
#endif
1198
1199
#endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
1200
1201
1202
/* ======================================================================== */
1203
/* ======================================================================== */
1204
/* ======================================================================== */
1205
1206
1207
/*-**********************************************************************
1208
 * xxHash implementation
1209
 *-**********************************************************************
1210
 * xxHash's implementation used to be hosted inside xxhash.c.
1211
 *
1212
 * However, inlining requires implementation to be visible to the compiler,
1213
 * hence be included alongside the header.
1214
 * Previously, implementation was hosted inside xxhash.c,
1215
 * which was then #included when inlining was activated.
1216
 * This construction created issues with a few build and install systems,
1217
 * as it required xxhash.c to be stored in /include directory.
1218
 *
1219
 * xxHash implementation is now directly integrated within xxhash.h.
1220
 * As a consequence, xxhash.c is no longer needed in /include.
1221
 *
1222
 * xxhash.c is still available and is still useful.
1223
 * In a "normal" setup, when xxhash is not inlined,
1224
 * xxhash.h only exposes the prototypes and public symbols,
1225
 * while xxhash.c can be built into an object file xxhash.o
1226
 * which can then be linked into the final binary.
1227
 ************************************************************************/
1228
1229
#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
1230
   || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
1231
#  define XXH_IMPLEM_13a8737387
1232
1233
/* *************************************
1234
*  Tuning parameters
1235
***************************************/
1236
1237
/*!
1238
 * @defgroup tuning Tuning parameters
1239
 * @{
1240
 *
1241
 * Various macros to control xxHash's behavior.
1242
 */
1243
#ifdef XXH_DOXYGEN
1244
/*!
1245
 * @brief Define this to disable 64-bit code.
1246
 *
1247
 * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
1248
 */
1249
#  define XXH_NO_LONG_LONG
1250
#  undef XXH_NO_LONG_LONG /* don't actually */
1251
/*!
1252
 * @brief Controls how unaligned memory is accessed.
1253
 *
1254
 * By default, access to unaligned memory is controlled by `memcpy()`, which is
1255
 * safe and portable.
1256
 *
1257
 * Unfortunately, on some target/compiler combinations, the generated assembly
1258
 * is sub-optimal.
1259
 *
1260
 * The below switch allow selection of a different access method
1261
 * in the search for improved performance.
1262
 *
1263
 * @par Possible options:
1264
 *
1265
 *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
1266
 *   @par
1267
 *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
1268
 *     eliminate the function call and treat it as an unaligned access.
1269
 *
1270
 *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
1271
 *   @par
1272
 *     Depends on compiler extensions and is therefore not portable.
1273
 *     This method is safe _if_ your compiler supports it,
1274
 *     and *generally* as fast or faster than `memcpy`.
1275
 *
1276
 *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
1277
 *  @par
1278
 *     Casts directly and dereferences. This method doesn't depend on the
1279
 *     compiler, but it violates the C standard as it directly dereferences an
1280
 *     unaligned pointer. It can generate buggy code on targets which do not
1281
 *     support unaligned memory accesses, but in some circumstances, it's the
1282
 *     only known way to get the most performance.
1283
 *
1284
 *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
1285
 *  @par
1286
 *     Also portable. This can generate the best code on old compilers which don't
1287
 *     inline small `memcpy()` calls, and it might also be faster on big-endian
1288
 *     systems which lack a native byteswap instruction. However, some compilers
1289
 *     will emit literal byteshifts even if the target supports unaligned access.
1290
 *  .
1291
 *
1292
 * @warning
1293
 *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
1294
 *   care, as what works on one compiler/platform/optimization level may cause
1295
 *   another to read garbage data or even crash.
1296
 *
1297
 * See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
1298
 *
1299
 * Prefer these methods in priority order (0 > 3 > 1 > 2)
1300
 */
1301
#  define XXH_FORCE_MEMORY_ACCESS 0
1302
1303
/*!
1304
 * @def XXH_FORCE_ALIGN_CHECK
1305
 * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
1306
 * and XXH64() only).
1307
 *
1308
 * This is an important performance trick for architectures without decent
1309
 * unaligned memory access performance.
1310
 *
1311
 * It checks for input alignment, and when conditions are met, uses a "fast
1312
 * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
1313
 * faster_ read speed.
1314
 *
1315
 * The check costs one initial branch per hash, which is generally negligible,
1316
 * but not zero.
1317
 *
1318
 * Moreover, it's not useful to generate an additional code path if memory
1319
 * access uses the same instruction for both aligned and unaligned
1320
 * addresses (e.g. x86 and aarch64).
1321
 *
1322
 * In these cases, the alignment check can be removed by setting this macro to 0.
1323
 * Then the code will always use unaligned memory access.
1324
 * Align check is automatically disabled on x86, x64 & arm64,
1325
 * which are platforms known to offer good unaligned memory accesses performance.
1326
 *
1327
 * This option does not affect XXH3 (only XXH32 and XXH64).
1328
 */
1329
#  define XXH_FORCE_ALIGN_CHECK 0
1330
1331
/*!
1332
 * @def XXH_NO_INLINE_HINTS
1333
 * @brief When non-zero, sets all functions to `static`.
1334
 *
1335
 * By default, xxHash tries to force the compiler to inline almost all internal
1336
 * functions.
1337
 *
1338
 * This can usually improve performance due to reduced jumping and improved
1339
 * constant folding, but significantly increases the size of the binary which
1340
 * might not be favorable.
1341
 *
1342
 * Additionally, sometimes the forced inlining can be detrimental to performance,
1343
 * depending on the architecture.
1344
 *
1345
 * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
1346
 * compiler full control on whether to inline or not.
1347
 *
1348
 * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
1349
 * -fno-inline with GCC or Clang, this will automatically be defined.
1350
 */
1351
#  define XXH_NO_INLINE_HINTS 0
1352
1353
/*!
1354
 * @def XXH32_ENDJMP
1355
 * @brief Whether to use a jump for `XXH32_finalize`.
1356
 *
1357
 * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
1358
 * This is generally preferable for performance,
1359
 * but depending on exact architecture, a jmp may be preferable.
1360
 *
1361
 * This setting is only possibly making a difference for very small inputs.
1362
 */
1363
#  define XXH32_ENDJMP 0
1364
1365
/*!
1366
 * @internal
1367
 * @brief Redefines old internal names.
1368
 *
1369
 * For compatibility with code that uses xxHash's internals before the names
1370
 * were changed to improve namespacing. There is no other reason to use this.
1371
 */
1372
#  define XXH_OLD_NAMES
1373
#  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
1374
#endif /* XXH_DOXYGEN */
1375
/*!
1376
 * @}
1377
 */
1378
1379
#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
1380
   /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
1381
#  if !defined(__clang__) && \
1382
( \
1383
    (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
1384
    ( \
1385
        defined(__GNUC__) && ( \
1386
            (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
1387
            ( \
1388
                defined(__mips__) && \
1389
                (__mips <= 5 || __mips_isa_rev < 6) && \
1390
                (!defined(__mips16) || defined(__mips_mips16e2)) \
1391
            ) \
1392
        ) \
1393
    ) \
1394
)
1395
#    define XXH_FORCE_MEMORY_ACCESS 1
1396
#  endif
1397
#endif
1398
1399
#ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
1400
#  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \
1401
   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */
1402
24
#    define XXH_FORCE_ALIGN_CHECK 0
1403
#  else
1404
#    define XXH_FORCE_ALIGN_CHECK 1
1405
#  endif
1406
#endif
1407
1408
#ifndef XXH_NO_INLINE_HINTS
1409
#  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
1410
   || defined(__NO_INLINE__)     /* -O0, -fno-inline */
1411
#    define XXH_NO_INLINE_HINTS 1
1412
#  else
1413
#    define XXH_NO_INLINE_HINTS 0
1414
#  endif
1415
#endif
1416
1417
#ifndef XXH32_ENDJMP
1418
/* generally preferable for performance */
1419
0
#  define XXH32_ENDJMP 0
1420
#endif
1421
1422
/*!
1423
 * @defgroup impl Implementation
1424
 * @{
1425
 */
1426
1427
1428
/* *************************************
1429
*  Includes & Memory related functions
1430
***************************************/
1431
/*
1432
 * Modify the local functions below should you wish to use
1433
 * different memory routines for malloc() and free()
1434
 */
1435
#include <stdlib.h>
1436
1437
/*!
1438
 * @internal
1439
 * @brief Modify this function to use a different routine than malloc().
1440
 */
1441
0
static void* XXH_malloc(size_t s) { return malloc(s); }
1442
1443
/*!
1444
 * @internal
1445
 * @brief Modify this function to use a different routine than free().
1446
 */
1447
0
static void XXH_free(void* p) { free(p); }
1448
1449
#include <string.h>
1450
1451
/*!
1452
 * @internal
1453
 * @brief Modify this function to use a different routine than memcpy().
1454
 */
1455
static void* XXH_memcpy(void* dest, const void* src, size_t size)
1456
1.71M
{
1457
1.71M
    return memcpy(dest,src,size);
1458
1.71M
}
1459
1460
#include <limits.h>   /* ULLONG_MAX */
1461
1462
1463
/* *************************************
1464
*  Compiler Specific Options
1465
***************************************/
1466
#ifdef _MSC_VER /* Visual Studio warning fix */
1467
#  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1468
#endif
1469
1470
#if XXH_NO_INLINE_HINTS  /* disable inlining hints */
1471
#  if defined(__GNUC__) || defined(__clang__)
1472
#    define XXH_FORCE_INLINE static __attribute__((unused))
1473
#  else
1474
#    define XXH_FORCE_INLINE static
1475
#  endif
1476
#  define XXH_NO_INLINE static
1477
/* enable inlining hints */
1478
#elif defined(__GNUC__) || defined(__clang__)
1479
#  define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
1480
#  define XXH_NO_INLINE static __attribute__((noinline))
1481
#elif defined(_MSC_VER)  /* Visual Studio */
1482
#  define XXH_FORCE_INLINE static __forceinline
1483
#  define XXH_NO_INLINE static __declspec(noinline)
1484
#elif defined (__cplusplus) \
1485
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */
1486
#  define XXH_FORCE_INLINE static inline
1487
#  define XXH_NO_INLINE static
1488
#else
1489
#  define XXH_FORCE_INLINE static
1490
#  define XXH_NO_INLINE static
1491
#endif
1492
1493
1494
1495
/* *************************************
1496
*  Debug
1497
***************************************/
1498
/*!
1499
 * @ingroup tuning
1500
 * @def XXH_DEBUGLEVEL
1501
 * @brief Sets the debugging level.
1502
 *
1503
 * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
1504
 * compiler's command line options. The value must be a number.
1505
 */
1506
#ifndef XXH_DEBUGLEVEL
1507
#  ifdef DEBUGLEVEL /* backwards compat */
1508
#    define XXH_DEBUGLEVEL DEBUGLEVEL
1509
#  else
1510
#    define XXH_DEBUGLEVEL 0
1511
#  endif
1512
#endif
1513
1514
#if (XXH_DEBUGLEVEL>=1)
1515
#  include <assert.h>   /* note: can still be disabled with NDEBUG */
1516
#  define XXH_ASSERT(c)   assert(c)
1517
#else
1518
0
#  define XXH_ASSERT(c)   ((void)0)
1519
#endif
1520
1521
/* note: use after variable declarations */
1522
#ifndef XXH_STATIC_ASSERT
1523
#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
1524
#    include <assert.h>
1525
0
#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1526
#  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
1527
#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1528
#  else
1529
#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
1530
#  endif
1531
0
#  define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
1532
#endif
1533
1534
/*!
1535
 * @internal
1536
 * @def XXH_COMPILER_GUARD(var)
1537
 * @brief Used to prevent unwanted optimizations for @p var.
1538
 *
1539
 * It uses an empty GCC inline assembly statement with a register constraint
1540
 * which forces @p var into a general purpose register (e.g., eax, ebx, ecx
1541
 * on x86) and marks it as modified.
1542
 *
1543
 * This is used in a few places to avoid unwanted autovectorization (e.g.
1544
 * XXH32_round()). All vectorization we want is explicit via intrinsics,
1545
 * and _usually_ isn't wanted elsewhere.
1546
 *
1547
 * We also use it to prevent unwanted constant folding for AArch64 in
1548
 * XXH3_initCustomSecret_scalar().
1549
 */
1550
#if defined(__GNUC__) || defined(__clang__)
1551
#  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
1552
#else
1553
#  define XXH_COMPILER_GUARD(var) ((void)0)
1554
#endif
1555
1556
/* *************************************
1557
*  Basic Types
1558
***************************************/
1559
#if !defined (__VMS) \
1560
 && (defined (__cplusplus) \
1561
 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
1562
# include <stdint.h>
1563
  typedef uint8_t xxh_u8;
1564
#else
1565
  typedef unsigned char xxh_u8;
1566
#endif
1567
typedef XXH32_hash_t xxh_u32;
1568
1569
#ifdef XXH_OLD_NAMES
1570
#  define BYTE xxh_u8
1571
#  define U8   xxh_u8
1572
#  define U32  xxh_u32
1573
#endif
1574
1575
/* ***   Memory access   *** */
1576
1577
/*!
1578
 * @internal
1579
 * @fn xxh_u32 XXH_read32(const void* ptr)
1580
 * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
1581
 *
1582
 * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
1583
 *
1584
 * @param ptr The pointer to read from.
1585
 * @return The 32-bit native endian integer from the bytes at @p ptr.
1586
 */
1587
1588
/*!
1589
 * @internal
1590
 * @fn xxh_u32 XXH_readLE32(const void* ptr)
1591
 * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
1592
 *
1593
 * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
1594
 *
1595
 * @param ptr The pointer to read from.
1596
 * @return The 32-bit little endian integer from the bytes at @p ptr.
1597
 */
1598
1599
/*!
1600
 * @internal
1601
 * @fn xxh_u32 XXH_readBE32(const void* ptr)
1602
 * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
1603
 *
1604
 * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
1605
 *
1606
 * @param ptr The pointer to read from.
1607
 * @return The 32-bit big endian integer from the bytes at @p ptr.
1608
 */
1609
1610
/*!
1611
 * @internal
1612
 * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
1613
 * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
1614
 *
1615
 * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
1616
 * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
1617
 * always @ref XXH_alignment::XXH_unaligned.
1618
 *
1619
 * @param ptr The pointer to read from.
1620
 * @param align Whether @p ptr is aligned.
1621
 * @pre
1622
 *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
1623
 *   aligned.
1624
 * @return The 32-bit little endian integer from the bytes at @p ptr.
1625
 */
1626
1627
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
1628
/*
1629
 * Manual byteshift. Best for old compilers which don't inline memcpy.
1630
 * We actually directly use XXH_readLE32 and XXH_readBE32.
1631
 */
1632
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1633
1634
/*
1635
 * Force direct memory access. Only works on CPU which support unaligned memory
1636
 * access in hardware.
1637
 */
1638
static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
1639
1640
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1641
1642
/*
1643
 * __pack instructions are safer but compiler specific, hence potentially
1644
 * problematic for some compilers.
1645
 *
1646
 * Currently only defined for GCC and ICC.
1647
 */
1648
#ifdef XXH_OLD_NAMES
1649
typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
1650
#endif
1651
static xxh_u32 XXH_read32(const void* ptr)
1652
{
1653
    typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
1654
    return ((const xxh_unalign*)ptr)->u32;
1655
}
1656
1657
#else
1658
1659
/*
1660
 * Portable and safe solution. Generally efficient.
1661
 * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
1662
 */
1663
static xxh_u32 XXH_read32(const void* memPtr)
1664
0
{
1665
0
    xxh_u32 val;
1666
0
    XXH_memcpy(&val, memPtr, sizeof(val));
1667
0
    return val;
1668
0
}
1669
1670
#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
1671
1672
1673
/* ***   Endianness   *** */
1674
1675
/*!
1676
 * @ingroup tuning
1677
 * @def XXH_CPU_LITTLE_ENDIAN
1678
 * @brief Whether the target is little endian.
1679
 *
1680
 * Defined to 1 if the target is little endian, or 0 if it is big endian.
1681
 * It can be defined externally, for example on the compiler command line.
1682
 *
1683
 * If it is not defined,
1684
 * a runtime check (which is usually constant folded) is used instead.
1685
 *
1686
 * @note
1687
 *   This is not necessarily defined to an integer constant.
1688
 *
1689
 * @see XXH_isLittleEndian() for the runtime check.
1690
 */
1691
#ifndef XXH_CPU_LITTLE_ENDIAN
1692
/*
1693
 * Try to detect endianness automatically, to avoid the nonstandard behavior
1694
 * in `XXH_isLittleEndian()`
1695
 */
1696
#  if defined(_WIN32) /* Windows is always little endian */ \
1697
     || defined(__LITTLE_ENDIAN__) \
1698
     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1699
1.71M
#    define XXH_CPU_LITTLE_ENDIAN 1
1700
#  elif defined(__BIG_ENDIAN__) \
1701
     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
1702
#    define XXH_CPU_LITTLE_ENDIAN 0
1703
#  else
1704
/*!
1705
 * @internal
1706
 * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
1707
 *
1708
 * Most compilers will constant fold this.
1709
 */
1710
static int XXH_isLittleEndian(void)
1711
{
1712
    /*
1713
     * Portable and well-defined behavior.
1714
     * Don't use static: it is detrimental to performance.
1715
     */
1716
    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
1717
    return one.c[0];
1718
}
1719
#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
1720
#  endif
1721
#endif
1722
1723
1724
1725
1726
/* ****************************************
1727
*  Compiler-specific Functions and Macros
1728
******************************************/
1729
#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
1730
1731
#ifdef __has_builtin
1732
#  define XXH_HAS_BUILTIN(x) __has_builtin(x)
1733
#else
1734
#  define XXH_HAS_BUILTIN(x) 0
1735
#endif
1736
1737
/*!
1738
 * @internal
1739
 * @def XXH_rotl32(x,r)
1740
 * @brief 32-bit rotate left.
1741
 *
1742
 * @param x The 32-bit integer to be rotated.
1743
 * @param r The number of bits to rotate.
1744
 * @pre
1745
 *   @p r > 0 && @p r < 32
1746
 * @note
1747
 *   @p x and @p r may be evaluated multiple times.
1748
 * @return The rotated result.
1749
 */
1750
#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
1751
                               && XXH_HAS_BUILTIN(__builtin_rotateleft64)
1752
0
#  define XXH_rotl32 __builtin_rotateleft32
1753
1.71M
#  define XXH_rotl64 __builtin_rotateleft64
1754
/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
1755
#elif defined(_MSC_VER)
1756
#  define XXH_rotl32(x,r) _rotl(x,r)
1757
#  define XXH_rotl64(x,r) _rotl64(x,r)
1758
#else
1759
#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
1760
#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
1761
#endif
1762
1763
/*!
1764
 * @internal
1765
 * @fn xxh_u32 XXH_swap32(xxh_u32 x)
1766
 * @brief A 32-bit byteswap.
1767
 *
1768
 * @param x The 32-bit integer to byteswap.
1769
 * @return @p x, byteswapped.
1770
 */
1771
#if defined(_MSC_VER)     /* Visual Studio */
1772
#  define XXH_swap32 _byteswap_ulong
1773
#elif XXH_GCC_VERSION >= 403
1774
#  define XXH_swap32 __builtin_bswap32
1775
#else
1776
static xxh_u32 XXH_swap32 (xxh_u32 x)
1777
0
{
1778
0
    return  ((x << 24) & 0xff000000 ) |
1779
0
            ((x <<  8) & 0x00ff0000 ) |
1780
0
            ((x >>  8) & 0x0000ff00 ) |
1781
0
            ((x >> 24) & 0x000000ff );
1782
0
}
1783
#endif
1784
1785
1786
/* ***************************
1787
*  Memory reads
1788
*****************************/
1789
1790
/*!
1791
 * @internal
1792
 * @brief Enum to indicate whether a pointer is aligned.
1793
 */
1794
typedef enum {
1795
    XXH_aligned,  /*!< Aligned */
1796
    XXH_unaligned /*!< Possibly unaligned */
1797
} XXH_alignment;
1798
1799
/*
1800
 * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
1801
 *
1802
 * This is ideal for older compilers which don't inline memcpy.
1803
 */
1804
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
1805
1806
XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
1807
{
1808
    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
1809
    return bytePtr[0]
1810
         | ((xxh_u32)bytePtr[1] << 8)
1811
         | ((xxh_u32)bytePtr[2] << 16)
1812
         | ((xxh_u32)bytePtr[3] << 24);
1813
}
1814
1815
XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
1816
{
1817
    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
1818
    return bytePtr[3]
1819
         | ((xxh_u32)bytePtr[2] << 8)
1820
         | ((xxh_u32)bytePtr[1] << 16)
1821
         | ((xxh_u32)bytePtr[0] << 24);
1822
}
1823
1824
#else
1825
XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
1826
0
{
1827
0
    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
1828
0
}
1829
1830
static xxh_u32 XXH_readBE32(const void* ptr)
1831
0
{
1832
0
    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
1833
0
}
1834
#endif
1835
1836
XXH_FORCE_INLINE xxh_u32
1837
XXH_readLE32_align(const void* ptr, XXH_alignment align)
1838
0
{
1839
0
    if (align==XXH_unaligned) {
1840
0
        return XXH_readLE32(ptr);
1841
0
    } else {
1842
0
        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
1843
0
    }
1844
0
}
1845
1846
1847
/* *************************************
1848
*  Misc
1849
***************************************/
1850
/*! @ingroup public */
1851
0
XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
1852
1853
1854
/* *******************************************************************
1855
*  32-bit hash functions
1856
*********************************************************************/
1857
/*!
1858
 * @}
1859
 * @defgroup xxh32_impl XXH32 implementation
1860
 * @ingroup impl
1861
 * @{
1862
 */
1863
 /* #define instead of static const, to be used as initializers */
1864
0
#define XXH_PRIME32_1  0x9E3779B1U  /*!< 0b10011110001101110111100110110001 */
1865
0
#define XXH_PRIME32_2  0x85EBCA77U  /*!< 0b10000101111010111100101001110111 */
1866
0
#define XXH_PRIME32_3  0xC2B2AE3DU  /*!< 0b11000010101100101010111000111101 */
1867
0
#define XXH_PRIME32_4  0x27D4EB2FU  /*!< 0b00100111110101001110101100101111 */
1868
0
#define XXH_PRIME32_5  0x165667B1U  /*!< 0b00010110010101100110011110110001 */
1869
1870
#ifdef XXH_OLD_NAMES
1871
#  define PRIME32_1 XXH_PRIME32_1
1872
#  define PRIME32_2 XXH_PRIME32_2
1873
#  define PRIME32_3 XXH_PRIME32_3
1874
#  define PRIME32_4 XXH_PRIME32_4
1875
#  define PRIME32_5 XXH_PRIME32_5
1876
#endif
1877
1878
/*!
1879
 * @internal
1880
 * @brief Normal stripe processing routine.
1881
 *
1882
 * This shuffles the bits so that any bit from @p input impacts several bits in
1883
 * @p acc.
1884
 *
1885
 * @param acc The accumulator lane.
1886
 * @param input The stripe of input to mix.
1887
 * @return The mixed accumulator lane.
1888
 */
1889
static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
1890
0
{
1891
0
    acc += input * XXH_PRIME32_2;
1892
0
    acc  = XXH_rotl32(acc, 13);
1893
0
    acc *= XXH_PRIME32_1;
1894
#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
1895
    /*
1896
     * UGLY HACK:
1897
     * A compiler fence is the only thing that prevents GCC and Clang from
1898
     * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
1899
     * reason) without globally disabling SSE4.1.
1900
     *
1901
     * The reason we want to avoid vectorization is because despite working on
1902
     * 4 integers at a time, there are multiple factors slowing XXH32 down on
1903
     * SSE4:
1904
     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
1905
     *   newer chips!) making it slightly slower to multiply four integers at
1906
     *   once compared to four integers independently. Even when pmulld was
1907
     *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
1908
     *   just to multiply unless doing a long operation.
1909
     *
1910
     * - Four instructions are required to rotate,
1911
     *      movqda tmp,  v // not required with VEX encoding
1912
     *      pslld  tmp, 13 // tmp <<= 13
1913
     *      psrld  v,   19 // x >>= 19
1914
     *      por    v,  tmp // x |= tmp
1915
     *   compared to one for scalar:
1916
     *      roll   v, 13    // reliably fast across the board
1917
     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
1918
     *
1919
     * - Instruction level parallelism is actually more beneficial here because
1920
     *   the SIMD actually serializes this operation: While v1 is rotating, v2
1921
     *   can load data, while v3 can multiply. SSE forces them to operate
1922
     *   together.
1923
     *
1924
     * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
1925
     * and it is pointless writing a NEON implementation that is basically the
1926
     * same speed as scalar for XXH32.
1927
     */
1928
    XXH_COMPILER_GUARD(acc);
1929
#endif
1930
0
    return acc;
1931
0
}
1932
1933
/*!
1934
 * @internal
1935
 * @brief Mixes all bits to finalize the hash.
1936
 *
1937
 * The final mix ensures that all input bits have a chance to impact any bit in
1938
 * the output digest, resulting in an unbiased distribution.
1939
 *
1940
 * @param h32 The hash to avalanche.
1941
 * @return The avalanched hash.
1942
 */
1943
static xxh_u32 XXH32_avalanche(xxh_u32 h32)
1944
0
{
1945
0
    h32 ^= h32 >> 15;
1946
0
    h32 *= XXH_PRIME32_2;
1947
0
    h32 ^= h32 >> 13;
1948
0
    h32 *= XXH_PRIME32_3;
1949
0
    h32 ^= h32 >> 16;
1950
0
    return(h32);
1951
0
}
1952
1953
0
#define XXH_get32bits(p) XXH_readLE32_align(p, align)
1954
1955
/*!
1956
 * @internal
1957
 * @brief Processes the last 0-15 bytes of @p ptr.
1958
 *
1959
 * There may be up to 15 bytes remaining to consume from the input.
1960
 * This final stage will digest them to ensure that all input bytes are present
1961
 * in the final mix.
1962
 *
1963
 * @param h32 The hash to finalize.
1964
 * @param ptr The pointer to the remaining input.
1965
 * @param len The remaining length, modulo 16.
1966
 * @param align Whether @p ptr is aligned.
1967
 * @return The finalized hash.
1968
 */
1969
static xxh_u32
1970
XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
1971
0
{
1972
0
#define XXH_PROCESS1 do {                           \
1973
0
    h32 += (*ptr++) * XXH_PRIME32_5;                \
1974
0
    h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;      \
1975
0
} while (0)
1976
1977
0
#define XXH_PROCESS4 do {                           \
1978
0
    h32 += XXH_get32bits(ptr) * XXH_PRIME32_3;      \
1979
0
    ptr += 4;                                   \
1980
0
    h32  = XXH_rotl32(h32, 17) * XXH_PRIME32_4;     \
1981
0
} while (0)
1982
1983
0
    if (ptr==NULL) XXH_ASSERT(len == 0);
1984
1985
    /* Compact rerolled version; generally faster */
1986
0
    if (!XXH32_ENDJMP) {
1987
0
        len &= 15;
1988
0
        while (len >= 4) {
1989
0
            XXH_PROCESS4;
1990
0
            len -= 4;
1991
0
        }
1992
0
        while (len > 0) {
1993
0
            XXH_PROCESS1;
1994
0
            --len;
1995
0
        }
1996
0
        return XXH32_avalanche(h32);
1997
0
    } else {
1998
0
         switch(len&15) /* or switch(bEnd - p) */ {
1999
0
           case 12:      XXH_PROCESS4;
2000
0
                         XXH_FALLTHROUGH;
2001
0
           case 8:       XXH_PROCESS4;
2002
0
                         XXH_FALLTHROUGH;
2003
0
           case 4:       XXH_PROCESS4;
2004
0
                         return XXH32_avalanche(h32);
2005
2006
0
           case 13:      XXH_PROCESS4;
2007
0
                         XXH_FALLTHROUGH;
2008
0
           case 9:       XXH_PROCESS4;
2009
0
                         XXH_FALLTHROUGH;
2010
0
           case 5:       XXH_PROCESS4;
2011
0
                         XXH_PROCESS1;
2012
0
                         return XXH32_avalanche(h32);
2013
2014
0
           case 14:      XXH_PROCESS4;
2015
0
                         XXH_FALLTHROUGH;
2016
0
           case 10:      XXH_PROCESS4;
2017
0
                         XXH_FALLTHROUGH;
2018
0
           case 6:       XXH_PROCESS4;
2019
0
                         XXH_PROCESS1;
2020
0
                         XXH_PROCESS1;
2021
0
                         return XXH32_avalanche(h32);
2022
2023
0
           case 15:      XXH_PROCESS4;
2024
0
                         XXH_FALLTHROUGH;
2025
0
           case 11:      XXH_PROCESS4;
2026
0
                         XXH_FALLTHROUGH;
2027
0
           case 7:       XXH_PROCESS4;
2028
0
                         XXH_FALLTHROUGH;
2029
0
           case 3:       XXH_PROCESS1;
2030
0
                         XXH_FALLTHROUGH;
2031
0
           case 2:       XXH_PROCESS1;
2032
0
                         XXH_FALLTHROUGH;
2033
0
           case 1:       XXH_PROCESS1;
2034
0
                         XXH_FALLTHROUGH;
2035
0
           case 0:       return XXH32_avalanche(h32);
2036
0
        }
2037
0
        XXH_ASSERT(0);
2038
0
        return h32;   /* reaching this point is deemed impossible */
2039
0
    }
2040
0
}
2041
2042
#ifdef XXH_OLD_NAMES
2043
#  define PROCESS1 XXH_PROCESS1
2044
#  define PROCESS4 XXH_PROCESS4
2045
#else
2046
#  undef XXH_PROCESS1
2047
#  undef XXH_PROCESS4
2048
#endif
2049
2050
/*!
2051
 * @internal
2052
 * @brief The implementation for @ref XXH32().
2053
 *
2054
 * @param input , len , seed Directly passed from @ref XXH32().
2055
 * @param align Whether @p input is aligned.
2056
 * @return The calculated hash.
2057
 */
2058
XXH_FORCE_INLINE xxh_u32
2059
XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
2060
0
{
2061
0
    xxh_u32 h32;
2062
2063
0
    if (input==NULL) XXH_ASSERT(len == 0);
2064
2065
0
    if (len>=16) {
2066
0
        const xxh_u8* const bEnd = input + len;
2067
0
        const xxh_u8* const limit = bEnd - 15;
2068
0
        xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2069
0
        xxh_u32 v2 = seed + XXH_PRIME32_2;
2070
0
        xxh_u32 v3 = seed + 0;
2071
0
        xxh_u32 v4 = seed - XXH_PRIME32_1;
2072
2073
0
        do {
2074
0
            v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
2075
0
            v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
2076
0
            v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
2077
0
            v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
2078
0
        } while (input < limit);
2079
2080
0
        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
2081
0
            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
2082
0
    } else {
2083
0
        h32  = seed + XXH_PRIME32_5;
2084
0
    }
2085
2086
0
    h32 += (xxh_u32)len;
2087
2088
0
    return XXH32_finalize(h32, input, len&15, align);
2089
0
}
2090
2091
/*! @ingroup xxh32_family */
2092
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
2093
0
{
2094
#if 0
2095
    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2096
    XXH32_state_t state;
2097
    XXH32_reset(&state, seed);
2098
    XXH32_update(&state, (const xxh_u8*)input, len);
2099
    return XXH32_digest(&state);
2100
#else
2101
0
    if (XXH_FORCE_ALIGN_CHECK) {
2102
0
        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
2103
0
            return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2104
0
    }   }
2105
2106
0
    return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2107
0
#endif
2108
0
}
2109
2110
2111
2112
/*******   Hash streaming   *******/
2113
/*!
2114
 * @ingroup xxh32_family
2115
 */
2116
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
2117
0
{
2118
0
    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
2119
0
}
2120
/*! @ingroup xxh32_family */
2121
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
2122
0
{
2123
0
    XXH_free(statePtr);
2124
0
    return XXH_OK;
2125
0
}
2126
2127
/*! @ingroup xxh32_family */
2128
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
2129
0
{
2130
0
    XXH_memcpy(dstState, srcState, sizeof(*dstState));
2131
0
}
2132
2133
/*! @ingroup xxh32_family */
2134
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
2135
0
{
2136
0
    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
2137
0
    memset(&state, 0, sizeof(state));
2138
0
    state.v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2139
0
    state.v[1] = seed + XXH_PRIME32_2;
2140
0
    state.v[2] = seed + 0;
2141
0
    state.v[3] = seed - XXH_PRIME32_1;
2142
    /* do not write into reserved, planned to be removed in a future version */
2143
0
    XXH_memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
2144
0
    return XXH_OK;
2145
0
}
2146
2147
2148
/*! @ingroup xxh32_family */
2149
XXH_PUBLIC_API XXH_errorcode
2150
XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2151
0
{
2152
0
    if (input==NULL) {
2153
0
        XXH_ASSERT(len == 0);
2154
0
        return XXH_OK;
2155
0
    }
2156
2157
0
    {   const xxh_u8* p = (const xxh_u8*)input;
2158
0
        const xxh_u8* const bEnd = p + len;
2159
2160
0
        state->total_len_32 += (XXH32_hash_t)len;
2161
0
        state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
2162
2163
0
        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
2164
0
            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
2165
0
            state->memsize += (XXH32_hash_t)len;
2166
0
            return XXH_OK;
2167
0
        }
2168
2169
0
        if (state->memsize) {   /* some data left from previous update */
2170
0
            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
2171
0
            {   const xxh_u32* p32 = state->mem32;
2172
0
                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
2173
0
                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
2174
0
                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
2175
0
                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
2176
0
            }
2177
0
            p += 16-state->memsize;
2178
0
            state->memsize = 0;
2179
0
        }
2180
2181
0
        if (p <= bEnd-16) {
2182
0
            const xxh_u8* const limit = bEnd - 16;
2183
2184
0
            do {
2185
0
                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
2186
0
                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
2187
0
                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
2188
0
                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
2189
0
            } while (p<=limit);
2190
2191
0
        }
2192
2193
0
        if (p < bEnd) {
2194
0
            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
2195
0
            state->memsize = (unsigned)(bEnd-p);
2196
0
        }
2197
0
    }
2198
2199
0
    return XXH_OK;
2200
0
}
2201
2202
2203
/*! @ingroup xxh32_family */
2204
XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
2205
0
{
2206
0
    xxh_u32 h32;
2207
2208
0
    if (state->large_len) {
2209
0
        h32 = XXH_rotl32(state->v[0], 1)
2210
0
            + XXH_rotl32(state->v[1], 7)
2211
0
            + XXH_rotl32(state->v[2], 12)
2212
0
            + XXH_rotl32(state->v[3], 18);
2213
0
    } else {
2214
0
        h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
2215
0
    }
2216
2217
0
    h32 += state->total_len_32;
2218
2219
0
    return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
2220
0
}
2221
2222
2223
/*******   Canonical representation   *******/
2224
2225
/*!
2226
 * @ingroup xxh32_family
2227
 * The default return values from XXH functions are unsigned 32 and 64 bit
2228
 * integers.
2229
 *
2230
 * The canonical representation uses big endian convention, the same convention
2231
 * as human-readable numbers (large digits first).
2232
 *
2233
 * This way, hash values can be written into a file or buffer, remaining
2234
 * comparable across different systems.
2235
 *
2236
 * The following functions allow transformation of hash values to and from their
2237
 * canonical format.
2238
 */
2239
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
2240
0
{
2241
0
    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
2242
0
    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
2243
0
    XXH_memcpy(dst, &hash, sizeof(*dst));
2244
0
}
2245
/*! @ingroup xxh32_family */
2246
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
2247
0
{
2248
0
    return XXH_readBE32(src);
2249
0
}
2250
2251
2252
#ifndef XXH_NO_LONG_LONG
2253
2254
/* *******************************************************************
2255
*  64-bit hash functions
2256
*********************************************************************/
2257
/*!
2258
 * @}
2259
 * @ingroup impl
2260
 * @{
2261
 */
2262
/*******   Memory access   *******/
2263
2264
typedef XXH64_hash_t xxh_u64;
2265
2266
#ifdef XXH_OLD_NAMES
2267
#  define U64 xxh_u64
2268
#endif
2269
2270
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2271
/*
2272
 * Manual byteshift. Best for old compilers which don't inline memcpy.
2273
 * We actually directly use XXH_readLE64 and XXH_readBE64.
2274
 */
2275
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
2276
2277
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
2278
static xxh_u64 XXH_read64(const void* memPtr)
2279
{
2280
    return *(const xxh_u64*) memPtr;
2281
}
2282
2283
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2284
2285
/*
2286
 * __pack instructions are safer, but compiler specific, hence potentially
2287
 * problematic for some compilers.
2288
 *
2289
 * Currently only defined for GCC and ICC.
2290
 */
2291
#ifdef XXH_OLD_NAMES
2292
typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
2293
#endif
2294
static xxh_u64 XXH_read64(const void* ptr)
2295
{
2296
    typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
2297
    return ((const xxh_unalign64*)ptr)->u64;
2298
}
2299
2300
#else
2301
2302
/*
2303
 * Portable and safe solution. Generally efficient.
2304
 * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2305
 */
2306
static xxh_u64 XXH_read64(const void* memPtr)
2307
1.71M
{
2308
1.71M
    xxh_u64 val;
2309
1.71M
    XXH_memcpy(&val, memPtr, sizeof(val));
2310
1.71M
    return val;
2311
1.71M
}
2312
2313
#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
2314
2315
#if defined(_MSC_VER)     /* Visual Studio */
2316
#  define XXH_swap64 _byteswap_uint64
2317
#elif XXH_GCC_VERSION >= 403
2318
#  define XXH_swap64 __builtin_bswap64
2319
#else
2320
static xxh_u64 XXH_swap64(xxh_u64 x)
2321
0
{
2322
0
    return  ((x << 56) & 0xff00000000000000ULL) |
2323
0
            ((x << 40) & 0x00ff000000000000ULL) |
2324
0
            ((x << 24) & 0x0000ff0000000000ULL) |
2325
0
            ((x << 8)  & 0x000000ff00000000ULL) |
2326
0
            ((x >> 8)  & 0x00000000ff000000ULL) |
2327
0
            ((x >> 24) & 0x0000000000ff0000ULL) |
2328
0
            ((x >> 40) & 0x000000000000ff00ULL) |
2329
0
            ((x >> 56) & 0x00000000000000ffULL);
2330
0
}
2331
#endif
2332
2333
2334
/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
2335
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2336
2337
XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
2338
{
2339
    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2340
    return bytePtr[0]
2341
         | ((xxh_u64)bytePtr[1] << 8)
2342
         | ((xxh_u64)bytePtr[2] << 16)
2343
         | ((xxh_u64)bytePtr[3] << 24)
2344
         | ((xxh_u64)bytePtr[4] << 32)
2345
         | ((xxh_u64)bytePtr[5] << 40)
2346
         | ((xxh_u64)bytePtr[6] << 48)
2347
         | ((xxh_u64)bytePtr[7] << 56);
2348
}
2349
2350
XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
2351
{
2352
    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2353
    return bytePtr[7]
2354
         | ((xxh_u64)bytePtr[6] << 8)
2355
         | ((xxh_u64)bytePtr[5] << 16)
2356
         | ((xxh_u64)bytePtr[4] << 24)
2357
         | ((xxh_u64)bytePtr[3] << 32)
2358
         | ((xxh_u64)bytePtr[2] << 40)
2359
         | ((xxh_u64)bytePtr[1] << 48)
2360
         | ((xxh_u64)bytePtr[0] << 56);
2361
}
2362
2363
#else
2364
XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
2365
1.71M
{
2366
1.71M
    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
2367
1.71M
}
2368
2369
static xxh_u64 XXH_readBE64(const void* ptr)
2370
0
{
2371
0
    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
2372
0
}
2373
#endif
2374
2375
XXH_FORCE_INLINE xxh_u64
2376
XXH_readLE64_align(const void* ptr, XXH_alignment align)
2377
1.71M
{
2378
1.71M
    if (align==XXH_unaligned)
2379
1.71M
        return XXH_readLE64(ptr);
2380
0
    else
2381
0
        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
2382
1.71M
}
2383
2384
2385
/*******   xxh64   *******/
2386
/*!
2387
 * @}
2388
 * @defgroup xxh64_impl XXH64 implementation
2389
 * @ingroup impl
2390
 * @{
2391
 */
2392
/* #define rather that static const, to be used as initializers */
2393
1.71M
#define XXH_PRIME64_1  0x9E3779B185EBCA87ULL  /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
2394
1.71M
#define XXH_PRIME64_2  0xC2B2AE3D27D4EB4FULL  /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
2395
24
#define XXH_PRIME64_3  0x165667B19E3779F9ULL  /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
2396
126
#define XXH_PRIME64_4  0x85EBCA77C2B2AE63ULL  /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
2397
0
#define XXH_PRIME64_5  0x27D4EB2F165667C5ULL  /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
2398
2399
#ifdef XXH_OLD_NAMES
2400
#  define PRIME64_1 XXH_PRIME64_1
2401
#  define PRIME64_2 XXH_PRIME64_2
2402
#  define PRIME64_3 XXH_PRIME64_3
2403
#  define PRIME64_4 XXH_PRIME64_4
2404
#  define PRIME64_5 XXH_PRIME64_5
2405
#endif
2406
2407
static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
2408
1.71M
{
2409
1.71M
    acc += input * XXH_PRIME64_2;
2410
1.71M
    acc  = XXH_rotl64(acc, 31);
2411
1.71M
    acc *= XXH_PRIME64_1;
2412
1.71M
    return acc;
2413
1.71M
}
2414
2415
static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
2416
96
{
2417
96
    val  = XXH64_round(0, val);
2418
96
    acc ^= val;
2419
96
    acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
2420
96
    return acc;
2421
96
}
2422
2423
static xxh_u64 XXH64_avalanche(xxh_u64 h64)
2424
24
{
2425
24
    h64 ^= h64 >> 33;
2426
24
    h64 *= XXH_PRIME64_2;
2427
24
    h64 ^= h64 >> 29;
2428
24
    h64 *= XXH_PRIME64_3;
2429
24
    h64 ^= h64 >> 32;
2430
24
    return h64;
2431
24
}
2432
2433
2434
1.71M
#define XXH_get64bits(p) XXH_readLE64_align(p, align)
2435
2436
static xxh_u64
2437
XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
2438
24
{
2439
24
    if (ptr==NULL) XXH_ASSERT(len == 0);
2440
24
    len &= 31;
2441
54
    while (len >= 8) {
2442
30
        xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
2443
30
        ptr += 8;
2444
30
        h64 ^= k1;
2445
30
        h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
2446
30
        len -= 8;
2447
30
    }
2448
24
    if (len >= 4) {
2449
0
        h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
2450
0
        ptr += 4;
2451
0
        h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
2452
0
        len -= 4;
2453
0
    }
2454
24
    while (len > 0) {
2455
0
        h64 ^= (*ptr++) * XXH_PRIME64_5;
2456
0
        h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
2457
0
        --len;
2458
0
    }
2459
24
    return  XXH64_avalanche(h64);
2460
24
}
2461
2462
#ifdef XXH_OLD_NAMES
2463
#  define PROCESS1_64 XXH_PROCESS1_64
2464
#  define PROCESS4_64 XXH_PROCESS4_64
2465
#  define PROCESS8_64 XXH_PROCESS8_64
2466
#else
2467
#  undef XXH_PROCESS1_64
2468
#  undef XXH_PROCESS4_64
2469
#  undef XXH_PROCESS8_64
2470
#endif
2471
2472
XXH_FORCE_INLINE xxh_u64
2473
XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
2474
24
{
2475
24
    xxh_u64 h64;
2476
24
    if (input==NULL) XXH_ASSERT(len == 0);
2477
2478
24
    if (len>=32) {
2479
24
        const xxh_u8* const bEnd = input + len;
2480
24
        const xxh_u8* const limit = bEnd - 31;
2481
24
        xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2482
24
        xxh_u64 v2 = seed + XXH_PRIME64_2;
2483
24
        xxh_u64 v3 = seed + 0;
2484
24
        xxh_u64 v4 = seed - XXH_PRIME64_1;
2485
2486
428k
        do {
2487
428k
            v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
2488
428k
            v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
2489
428k
            v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
2490
428k
            v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
2491
428k
        } while (input<limit);
2492
2493
24
        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
2494
24
        h64 = XXH64_mergeRound(h64, v1);
2495
24
        h64 = XXH64_mergeRound(h64, v2);
2496
24
        h64 = XXH64_mergeRound(h64, v3);
2497
24
        h64 = XXH64_mergeRound(h64, v4);
2498
2499
24
    } else {
2500
0
        h64  = seed + XXH_PRIME64_5;
2501
0
    }
2502
2503
24
    h64 += (xxh_u64) len;
2504
2505
24
    return XXH64_finalize(h64, input, len, align);
2506
24
}
2507
2508
2509
/*! @ingroup xxh64_family */
2510
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
2511
24
{
2512
#if 0
2513
    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2514
    XXH64_state_t state;
2515
    XXH64_reset(&state, seed);
2516
    XXH64_update(&state, (const xxh_u8*)input, len);
2517
    return XXH64_digest(&state);
2518
#else
2519
24
    if (XXH_FORCE_ALIGN_CHECK) {
2520
0
        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
2521
0
            return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2522
0
    }   }
2523
2524
24
    return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2525
2526
24
#endif
2527
24
}
2528
2529
/*******   Hash Streaming   *******/
2530
2531
/*! @ingroup xxh64_family*/
2532
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
2533
0
{
2534
0
    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
2535
0
}
2536
/*! @ingroup xxh64_family */
2537
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
2538
0
{
2539
0
    XXH_free(statePtr);
2540
0
    return XXH_OK;
2541
0
}
2542
2543
/*! @ingroup xxh64_family */
2544
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
2545
0
{
2546
0
    XXH_memcpy(dstState, srcState, sizeof(*dstState));
2547
0
}
2548
2549
/*! @ingroup xxh64_family */
2550
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
2551
0
{
2552
0
    XXH64_state_t state;   /* use a local state to memcpy() in order to avoid strict-aliasing warnings */
2553
0
    memset(&state, 0, sizeof(state));
2554
0
    state.v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2555
0
    state.v[1] = seed + XXH_PRIME64_2;
2556
0
    state.v[2] = seed + 0;
2557
0
    state.v[3] = seed - XXH_PRIME64_1;
2558
     /* do not write into reserved64, might be removed in a future version */
2559
0
    XXH_memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
2560
0
    return XXH_OK;
2561
0
}
2562
2563
/*! @ingroup xxh64_family */
2564
XXH_PUBLIC_API XXH_errorcode
2565
XXH64_update (XXH64_state_t* state, const void* input, size_t len)
2566
0
{
2567
0
    if (input==NULL) {
2568
0
        XXH_ASSERT(len == 0);
2569
0
        return XXH_OK;
2570
0
    }
2571
2572
0
    {   const xxh_u8* p = (const xxh_u8*)input;
2573
0
        const xxh_u8* const bEnd = p + len;
2574
2575
0
        state->total_len += len;
2576
2577
0
        if (state->memsize + len < 32) {  /* fill in tmp buffer */
2578
0
            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
2579
0
            state->memsize += (xxh_u32)len;
2580
0
            return XXH_OK;
2581
0
        }
2582
2583
0
        if (state->memsize) {   /* tmp buffer is full */
2584
0
            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
2585
0
            state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
2586
0
            state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
2587
0
            state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
2588
0
            state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
2589
0
            p += 32 - state->memsize;
2590
0
            state->memsize = 0;
2591
0
        }
2592
2593
0
        if (p+32 <= bEnd) {
2594
0
            const xxh_u8* const limit = bEnd - 32;
2595
2596
0
            do {
2597
0
                state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
2598
0
                state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
2599
0
                state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
2600
0
                state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
2601
0
            } while (p<=limit);
2602
2603
0
        }
2604
2605
0
        if (p < bEnd) {
2606
0
            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
2607
0
            state->memsize = (unsigned)(bEnd-p);
2608
0
        }
2609
0
    }
2610
2611
0
    return XXH_OK;
2612
0
}
2613
2614
2615
/*! @ingroup xxh64_family */
2616
XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
2617
0
{
2618
0
    xxh_u64 h64;
2619
2620
0
    if (state->total_len >= 32) {
2621
0
        h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
2622
0
        h64 = XXH64_mergeRound(h64, state->v[0]);
2623
0
        h64 = XXH64_mergeRound(h64, state->v[1]);
2624
0
        h64 = XXH64_mergeRound(h64, state->v[2]);
2625
0
        h64 = XXH64_mergeRound(h64, state->v[3]);
2626
0
    } else {
2627
0
        h64  = state->v[2] /*seed*/ + XXH_PRIME64_5;
2628
0
    }
2629
2630
0
    h64 += (xxh_u64) state->total_len;
2631
2632
0
    return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
2633
0
}
2634
2635
2636
/******* Canonical representation   *******/
2637
2638
/*! @ingroup xxh64_family */
2639
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
2640
0
{
2641
0
    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
2642
0
    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
2643
0
    XXH_memcpy(dst, &hash, sizeof(*dst));
2644
0
}
2645
2646
/*! @ingroup xxh64_family */
2647
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
2648
0
{
2649
0
    return XXH_readBE64(src);
2650
0
}
2651
2652
#ifndef XXH_NO_XXH3
2653
2654
/* *********************************************************************
2655
*  XXH3
2656
*  New generation hash designed for speed on small keys and vectorization
2657
************************************************************************ */
2658
/*!
2659
 * @}
2660
 * @defgroup xxh3_impl XXH3 implementation
2661
 * @ingroup impl
2662
 * @{
2663
 */
2664
2665
/* ===   Compiler specifics   === */
2666
2667
#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
2668
#  define XXH_RESTRICT /* disable */
2669
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
2670
#  define XXH_RESTRICT   restrict
2671
#else
2672
/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
2673
#  define XXH_RESTRICT   /* disable */
2674
#endif
2675
2676
#if (defined(__GNUC__) && (__GNUC__ >= 3))  \
2677
  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
2678
  || defined(__clang__)
2679
#    define XXH_likely(x) __builtin_expect(x, 1)
2680
#    define XXH_unlikely(x) __builtin_expect(x, 0)
2681
#else
2682
#    define XXH_likely(x) (x)
2683
#    define XXH_unlikely(x) (x)
2684
#endif
2685
2686
#if defined(__GNUC__)
2687
#  if defined(__AVX2__)
2688
#    include <immintrin.h>
2689
#  elif defined(__SSE2__)
2690
#    include <emmintrin.h>
2691
#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
2692
#    define inline __inline__  /* circumvent a clang bug */
2693
#    include <arm_neon.h>
2694
#    undef inline
2695
#  endif
2696
#elif defined(_MSC_VER)
2697
#  include <intrin.h>
2698
#endif
2699
2700
/*
2701
 * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
2702
 * remaining a true 64-bit/128-bit hash function.
2703
 *
2704
 * This is done by prioritizing a subset of 64-bit operations that can be
2705
 * emulated without too many steps on the average 32-bit machine.
2706
 *
2707
 * For example, these two lines seem similar, and run equally fast on 64-bit:
2708
 *
2709
 *   xxh_u64 x;
2710
 *   x ^= (x >> 47); // good
2711
 *   x ^= (x >> 13); // bad
2712
 *
2713
 * However, to a 32-bit machine, there is a major difference.
2714
 *
2715
 * x ^= (x >> 47) looks like this:
2716
 *
2717
 *   x.lo ^= (x.hi >> (47 - 32));
2718
 *
2719
 * while x ^= (x >> 13) looks like this:
2720
 *
2721
 *   // note: funnel shifts are not usually cheap.
2722
 *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
2723
 *   x.hi ^= (x.hi >> 13);
2724
 *
2725
 * The first one is significantly faster than the second, simply because the
2726
 * shift is larger than 32. This means:
2727
 *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
2728
 *    32 bits in the shift.
2729
 *  - The shift result will always fit in the lower 32 bits, and therefore,
2730
 *    we can ignore the upper 32 bits in the xor.
2731
 *
2732
 * Thanks to this optimization, XXH3 only requires these features to be efficient:
2733
 *
2734
 *  - Usable unaligned access
2735
 *  - A 32-bit or 64-bit ALU
2736
 *      - If 32-bit, a decent ADC instruction
2737
 *  - A 32 or 64-bit multiply with a 64-bit result
2738
 *  - For the 128-bit variant, a decent byteswap helps short inputs.
2739
 *
2740
 * The first two are already required by XXH32, and almost all 32-bit and 64-bit
2741
 * platforms which can run XXH32 can run XXH3 efficiently.
2742
 *
2743
 * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
2744
 * notable exception.
2745
 *
2746
 * First of all, Thumb-1 lacks support for the UMULL instruction which
2747
 * performs the important long multiply. This means numerous __aeabi_lmul
2748
 * calls.
2749
 *
2750
 * Second of all, the 8 functional registers are just not enough.
2751
 * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
2752
 * Lo registers, and this shuffling results in thousands more MOVs than A32.
2753
 *
2754
 * A32 and T32 don't have this limitation. They can access all 14 registers,
2755
 * do a 32->64 multiply with UMULL, and the flexible operand allowing free
2756
 * shifts is helpful, too.
2757
 *
2758
 * Therefore, we do a quick sanity check.
2759
 *
2760
 * If compiling Thumb-1 for a target which supports ARM instructions, we will
2761
 * emit a warning, as it is not a "sane" platform to compile for.
2762
 *
2763
 * Usually, if this happens, it is because of an accident and you probably need
2764
 * to specify -march, as you likely meant to compile for a newer architecture.
2765
 *
2766
 * Credit: large sections of the vectorial and asm source code paths
2767
 *         have been contributed by @easyaspi314
2768
 */
2769
#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
2770
#   warning "XXH3 is highly inefficient without ARM or Thumb-2."
2771
#endif
2772
2773
/* ==========================================
2774
 * Vectorization detection
2775
 * ========================================== */
2776
2777
#ifdef XXH_DOXYGEN
2778
/*!
2779
 * @ingroup tuning
2780
 * @brief Overrides the vectorization implementation chosen for XXH3.
2781
 *
2782
 * Can be defined to 0 to disable SIMD or any of the values mentioned in
2783
 * @ref XXH_VECTOR_TYPE.
2784
 *
2785
 * If this is not defined, it uses predefined macros to determine the best
2786
 * implementation.
2787
 */
2788
#  define XXH_VECTOR XXH_SCALAR
2789
/*!
2790
 * @ingroup tuning
2791
 * @brief Possible values for @ref XXH_VECTOR.
2792
 *
2793
 * Note that these are actually implemented as macros.
2794
 *
2795
 * If this is not defined, it is detected automatically.
2796
 * @ref XXH_X86DISPATCH overrides this.
2797
 */
2798
enum XXH_VECTOR_TYPE /* fake enum */ {
2799
    XXH_SCALAR = 0,  /*!< Portable scalar version */
2800
    XXH_SSE2   = 1,  /*!<
2801
                      * SSE2 for Pentium 4, Opteron, all x86_64.
2802
                      *
2803
                      * @note SSE2 is also guaranteed on Windows 10, macOS, and
2804
                      * Android x86.
2805
                      */
2806
    XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */
2807
    XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
2808
    XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
2809
    XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
2810
};
2811
/*!
2812
 * @ingroup tuning
2813
 * @brief Selects the minimum alignment for XXH3's accumulators.
2814
 *
2815
 * When using SIMD, this should match the alignment required for said vector
2816
 * type, so, for example, 32 for AVX2.
2817
 *
2818
 * Default: Auto detected.
2819
 */
2820
#  define XXH_ACC_ALIGN 8
2821
#endif
2822
2823
/* Actual definition */
2824
#ifndef XXH_DOXYGEN
2825
#  define XXH_SCALAR 0
2826
#  define XXH_SSE2   1
2827
#  define XXH_AVX2   2
2828
#  define XXH_AVX512 3
2829
#  define XXH_NEON   4
2830
#  define XXH_VSX    5
2831
#endif
2832
2833
#ifndef XXH_VECTOR    /* can be defined on command line */
2834
#  if defined(__AVX512F__)
2835
#    define XXH_VECTOR XXH_AVX512
2836
#  elif defined(__AVX2__)
2837
#    define XXH_VECTOR XXH_AVX2
2838
#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
2839
#    define XXH_VECTOR XXH_SSE2
2840
#  elif ( \
2841
        defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
2842
     || defined(_M_ARM64) || defined(_M_ARM_ARMV7VE) /* msvc */ \
2843
   ) && ( \
2844
        defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
2845
    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
2846
   )
2847
#    define XXH_VECTOR XXH_NEON
2848
#  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
2849
     || (defined(__s390x__) && defined(__VEC__)) \
2850
     && defined(__GNUC__) /* TODO: IBM XL */
2851
#    define XXH_VECTOR XXH_VSX
2852
#  else
2853
#    define XXH_VECTOR XXH_SCALAR
2854
#  endif
2855
#endif
2856
2857
/*
2858
 * Controls the alignment of the accumulator,
2859
 * for compatibility with aligned vector loads, which are usually faster.
2860
 */
2861
#ifndef XXH_ACC_ALIGN
2862
#  if defined(XXH_X86DISPATCH)
2863
#     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */
2864
#  elif XXH_VECTOR == XXH_SCALAR  /* scalar */
2865
#     define XXH_ACC_ALIGN 8
2866
#  elif XXH_VECTOR == XXH_SSE2  /* sse2 */
2867
#     define XXH_ACC_ALIGN 16
2868
#  elif XXH_VECTOR == XXH_AVX2  /* avx2 */
2869
#     define XXH_ACC_ALIGN 32
2870
#  elif XXH_VECTOR == XXH_NEON  /* neon */
2871
#     define XXH_ACC_ALIGN 16
2872
#  elif XXH_VECTOR == XXH_VSX   /* vsx */
2873
#     define XXH_ACC_ALIGN 16
2874
#  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
2875
#     define XXH_ACC_ALIGN 64
2876
#  endif
2877
#endif
2878
2879
#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
2880
    || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
2881
#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
2882
#else
2883
#  define XXH_SEC_ALIGN 8
2884
#endif
2885
2886
/*
2887
 * UGLY HACK:
2888
 * GCC usually generates the best code with -O3 for xxHash.
2889
 *
2890
 * However, when targeting AVX2, it is overzealous in its unrolling resulting
2891
 * in code roughly 3/4 the speed of Clang.
2892
 *
2893
 * There are other issues, such as GCC splitting _mm256_loadu_si256 into
2894
 * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
2895
 * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
2896
 *
2897
 * That is why when compiling the AVX2 version, it is recommended to use either
2898
 *   -O2 -mavx2 -march=haswell
2899
 * or
2900
 *   -O2 -mavx2 -mno-avx256-split-unaligned-load
2901
 * for decent performance, or to use Clang instead.
2902
 *
2903
 * Fortunately, we can control the first one with a pragma that forces GCC into
2904
 * -O2, but the other one we can't control without "failed to inline always
2905
 * inline function due to target mismatch" warnings.
2906
 */
2907
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
2908
  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
2909
  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
2910
#  pragma GCC push_options
2911
#  pragma GCC optimize("-O2")
2912
#endif
2913
2914
2915
#if XXH_VECTOR == XXH_NEON
2916
/*
2917
 * NEON's setup for vmlal_u32 is a little more complicated than it is on
2918
 * SSE2, AVX2, and VSX.
2919
 *
2920
 * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
2921
 *
2922
 * To do the same operation, the 128-bit 'Q' register needs to be split into
2923
 * two 64-bit 'D' registers, performing this operation::
2924
 *
2925
 *   [                a                 |                 b                ]
2926
 *            |              '---------. .--------'                |
2927
 *            |                         x                          |
2928
 *            |              .---------' '--------.                |
2929
 *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
2930
 *
2931
 * Due to significant changes in aarch64, the fastest method for aarch64 is
2932
 * completely different than the fastest method for ARMv7-A.
2933
 *
2934
 * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
2935
 * D11 will modify the high half of Q5. This is similar to how modifying AH
2936
 * will only affect bits 8-15 of AX on x86.
2937
 *
2938
 * VZIP takes two registers, and puts even lanes in one register and odd lanes
2939
 * in the other.
2940
 *
2941
 * On ARMv7-A, this strangely modifies both parameters in place instead of
2942
 * taking the usual 3-operand form.
2943
 *
2944
 * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
2945
 * lower and upper halves of the Q register to end up with the high and low
2946
 * halves where we want - all in one instruction.
2947
 *
2948
 *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
2949
 *
2950
 * Unfortunately we need inline assembly for this: Instructions modifying two
2951
 * registers at once is not possible in GCC or Clang's IR, and they have to
2952
 * create a copy.
2953
 *
2954
 * aarch64 requires a different approach.
2955
 *
2956
 * In order to make it easier to write a decent compiler for aarch64, many
2957
 * quirks were removed, such as conditional execution.
2958
 *
2959
 * NEON was also affected by this.
2960
 *
2961
 * aarch64 cannot access the high bits of a Q-form register, and writes to a
2962
 * D-form register zero the high bits, similar to how writes to W-form scalar
2963
 * registers (or DWORD registers on x86_64) work.
2964
 *
2965
 * The formerly free vget_high intrinsics now require a vext (with a few
2966
 * exceptions)
2967
 *
2968
 * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
2969
 * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
2970
 * operand.
2971
 *
2972
 * The equivalent of the VZIP.32 on the lower and upper halves would be this
2973
 * mess:
2974
 *
2975
 *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
2976
 *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
2977
 *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
2978
 *
2979
 * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
2980
 *
2981
 *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
2982
 *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
2983
 *
2984
 * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
2985
 */
2986
2987
/*!
2988
 * Function-like macro:
2989
 * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
2990
 * {
2991
 *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
2992
 *     outHi = (uint32x2_t)(in >> 32);
2993
 *     in = UNDEFINED;
2994
 * }
2995
 */
2996
# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
2997
   && defined(__GNUC__) \
2998
   && !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
2999
#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
3000
    do {                                                                                    \
3001
      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
3002
      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
3003
      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
3004
      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
3005
      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
3006
      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
3007
   } while (0)
3008
# else
3009
#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
3010
    do {                                                                                  \
3011
      (outLo) = vmovn_u64    (in);                                                        \
3012
      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
3013
    } while (0)
3014
# endif
3015
#endif  /* XXH_VECTOR == XXH_NEON */
3016
3017
/*
3018
 * VSX and Z Vector helpers.
3019
 *
3020
 * This is very messy, and any pull requests to clean this up are welcome.
3021
 *
3022
 * There are a lot of problems with supporting VSX and s390x, due to
3023
 * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
3024
 */
3025
#if XXH_VECTOR == XXH_VSX
3026
#  if defined(__s390x__)
3027
#    include <s390intrin.h>
3028
#  else
3029
/* gcc's altivec.h can have the unwanted consequence to unconditionally
3030
 * #define bool, vector, and pixel keywords,
3031
 * with bad consequences for programs already using these keywords for other purposes.
3032
 * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
3033
 * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
3034
 * but it seems that, in some cases, it isn't.
3035
 * Force the build macro to be defined, so that keywords are not altered.
3036
 */
3037
#    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
3038
#      define __APPLE_ALTIVEC__
3039
#    endif
3040
#    include <altivec.h>
3041
#  endif
3042
3043
typedef __vector unsigned long long xxh_u64x2;
3044
typedef __vector unsigned char xxh_u8x16;
3045
typedef __vector unsigned xxh_u32x4;
3046
3047
# ifndef XXH_VSX_BE
3048
#  if defined(__BIG_ENDIAN__) \
3049
  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
3050
#    define XXH_VSX_BE 1
3051
#  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
3052
#    warning "-maltivec=be is not recommended. Please use native endianness."
3053
#    define XXH_VSX_BE 1
3054
#  else
3055
#    define XXH_VSX_BE 0
3056
#  endif
3057
# endif /* !defined(XXH_VSX_BE) */
3058
3059
# if XXH_VSX_BE
3060
#  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
3061
#    define XXH_vec_revb vec_revb
3062
#  else
3063
/*!
3064
 * A polyfill for POWER9's vec_revb().
3065
 */
3066
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
3067
{
3068
    xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
3069
                                  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
3070
    return vec_perm(val, val, vByteSwap);
3071
}
3072
#  endif
3073
# endif /* XXH_VSX_BE */
3074
3075
/*!
3076
 * Performs an unaligned vector load and byte swaps it on big endian.
3077
 */
3078
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
3079
{
3080
    xxh_u64x2 ret;
3081
    XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
3082
# if XXH_VSX_BE
3083
    ret = XXH_vec_revb(ret);
3084
# endif
3085
    return ret;
3086
}
3087
3088
/*
3089
 * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
3090
 *
3091
 * These intrinsics weren't added until GCC 8, despite existing for a while,
3092
 * and they are endian dependent. Also, their meaning swap depending on version.
3093
 * */
3094
# if defined(__s390x__)
3095
 /* s390x is always big endian, no issue on this platform */
3096
#  define XXH_vec_mulo vec_mulo
3097
#  define XXH_vec_mule vec_mule
3098
# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
3099
/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
3100
#  define XXH_vec_mulo __builtin_altivec_vmulouw
3101
#  define XXH_vec_mule __builtin_altivec_vmuleuw
3102
# else
3103
/* gcc needs inline assembly */
3104
/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
3105
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
3106
{
3107
    xxh_u64x2 result;
3108
    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3109
    return result;
3110
}
3111
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
3112
{
3113
    xxh_u64x2 result;
3114
    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3115
    return result;
3116
}
3117
# endif /* XXH_vec_mulo, XXH_vec_mule */
3118
#endif /* XXH_VECTOR == XXH_VSX */
3119
3120
3121
/* prefetch
3122
 * can be disabled, by declaring XXH_NO_PREFETCH build macro */
3123
#if defined(XXH_NO_PREFETCH)
3124
#  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
3125
#else
3126
#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
3127
#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
3128
#    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
3129
#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
3130
#    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
3131
#  else
3132
#    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */
3133
#  endif
3134
#endif  /* XXH_NO_PREFETCH */
3135
3136
3137
/* ==========================================
3138
 * XXH3 default settings
3139
 * ========================================== */
3140
3141
#define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
3142
3143
#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
3144
#  error "default keyset is not large enough"
3145
#endif
3146
3147
/*! Pseudorandom secret taken directly from FARSH. */
3148
XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
3149
    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
3150
    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
3151
    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
3152
    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
3153
    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
3154
    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
3155
    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
3156
    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
3157
    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
3158
    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
3159
    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
3160
    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
3161
};
3162
3163
3164
#ifdef XXH_OLD_NAMES
3165
#  define kSecret XXH3_kSecret
3166
#endif
3167
3168
#ifdef XXH_DOXYGEN
3169
/*!
3170
 * @brief Calculates a 32-bit to 64-bit long multiply.
3171
 *
3172
 * Implemented as a macro.
3173
 *
3174
 * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
3175
 * need to (but it shouldn't need to anyways, it is about 7 instructions to do
3176
 * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
3177
 * use that instead of the normal method.
3178
 *
3179
 * If you are compiling for platforms like Thumb-1 and don't have a better option,
3180
 * you may also want to write your own long multiply routine here.
3181
 *
3182
 * @param x, y Numbers to be multiplied
3183
 * @return 64-bit product of the low 32 bits of @p x and @p y.
3184
 */
3185
XXH_FORCE_INLINE xxh_u64
3186
XXH_mult32to64(xxh_u64 x, xxh_u64 y)
3187
{
3188
   return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
3189
}
3190
#elif defined(_MSC_VER) && defined(_M_IX86)
3191
#    include <intrin.h>
3192
#    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
3193
#else
3194
/*
3195
 * Downcast + upcast is usually better than masking on older compilers like
3196
 * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
3197
 *
3198
 * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
3199
 * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
3200
 */
3201
#    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
3202
#endif
3203
3204
/*!
3205
 * @brief Calculates a 64->128-bit long multiply.
3206
 *
3207
 * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
3208
 * version.
3209
 *
3210
 * @param lhs , rhs The 64-bit integers to be multiplied
3211
 * @return The 128-bit result represented in an @ref XXH128_hash_t.
3212
 */
3213
static XXH128_hash_t
3214
XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3215
{
3216
    /*
3217
     * GCC/Clang __uint128_t method.
3218
     *
3219
     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
3220
     * This is usually the best way as it usually uses a native long 64-bit
3221
     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
3222
     *
3223
     * Usually.
3224
     *
3225
     * Despite being a 32-bit platform, Clang (and emscripten) define this type
3226
     * despite not having the arithmetic for it. This results in a laggy
3227
     * compiler builtin call which calculates a full 128-bit multiply.
3228
     * In that case it is best to use the portable one.
3229
     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
3230
     */
3231
#if defined(__GNUC__) && !defined(__wasm__) \
3232
    && defined(__SIZEOF_INT128__) \
3233
    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
3234
3235
    __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
3236
    XXH128_hash_t r128;
3237
    r128.low64  = (xxh_u64)(product);
3238
    r128.high64 = (xxh_u64)(product >> 64);
3239
    return r128;
3240
3241
    /*
3242
     * MSVC for x64's _umul128 method.
3243
     *
3244
     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
3245
     *
3246
     * This compiles to single operand MUL on x64.
3247
     */
3248
#elif defined(_M_X64) || defined(_M_IA64)
3249
3250
#ifndef _MSC_VER
3251
#   pragma intrinsic(_umul128)
3252
#endif
3253
    xxh_u64 product_high;
3254
    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
3255
    XXH128_hash_t r128;
3256
    r128.low64  = product_low;
3257
    r128.high64 = product_high;
3258
    return r128;
3259
3260
    /*
3261
     * MSVC for ARM64's __umulh method.
3262
     *
3263
     * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
3264
     */
3265
#elif defined(_M_ARM64)
3266
3267
#ifndef _MSC_VER
3268
#   pragma intrinsic(__umulh)
3269
#endif
3270
    XXH128_hash_t r128;
3271
    r128.low64  = lhs * rhs;
3272
    r128.high64 = __umulh(lhs, rhs);
3273
    return r128;
3274
3275
#else
3276
    /*
3277
     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
3278
     *
3279
     * This is a fast and simple grade school multiply, which is shown below
3280
     * with base 10 arithmetic instead of base 0x100000000.
3281
     *
3282
     *           9 3 // D2 lhs = 93
3283
     *         x 7 5 // D2 rhs = 75
3284
     *     ----------
3285
     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
3286
     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
3287
     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
3288
     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
3289
     *     ---------
3290
     *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
3291
     *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
3292
     *     ---------
3293
     *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
3294
     *
3295
     * The reasons for adding the products like this are:
3296
     *  1. It avoids manual carry tracking. Just like how
3297
     *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
3298
     *     This avoids a lot of complexity.
3299
     *
3300
     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
3301
     *     instruction available in ARM's Digital Signal Processing extension
3302
     *     in 32-bit ARMv6 and later, which is shown below:
3303
     *
3304
     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
3305
     *         {
3306
     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
3307
     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
3308
     *             *RdHi = (xxh_u32)(product >> 32);
3309
     *         }
3310
     *
3311
     *     This instruction was designed for efficient long multiplication, and
3312
     *     allows this to be calculated in only 4 instructions at speeds
3313
     *     comparable to some 64-bit ALUs.
3314
     *
3315
     *  3. It isn't terrible on other platforms. Usually this will be a couple
3316
     *     of 32-bit ADD/ADCs.
3317
     */
3318
3319
    /* First calculate all of the cross products. */
3320
    xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
3321
    xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
3322
    xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
3323
    xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);
3324
3325
    /* Now add the products together. These will never overflow. */
3326
    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
3327
    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
3328
    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
3329
3330
    XXH128_hash_t r128;
3331
    r128.low64  = lower;
3332
    r128.high64 = upper;
3333
    return r128;
3334
#endif
3335
}
3336
3337
/*!
3338
 * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
3339
 *
3340
 * The reason for the separate function is to prevent passing too many structs
3341
 * around by value. This will hopefully inline the multiply, but we don't force it.
3342
 *
3343
 * @param lhs , rhs The 64-bit integers to multiply
3344
 * @return The low 64 bits of the product XOR'd by the high 64 bits.
3345
 * @see XXH_mult64to128()
3346
 */
3347
static xxh_u64
3348
XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
3349
{
3350
    XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
3351
    return product.low64 ^ product.high64;
3352
}
3353
3354
/*! Seems to produce slightly better code on GCC for some reason. */
3355
XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
3356
{
3357
    XXH_ASSERT(0 <= shift && shift < 64);
3358
    return v64 ^ (v64 >> shift);
3359
}
3360
3361
/*
3362
 * This is a fast avalanche stage,
3363
 * suitable when input bits are already partially mixed
3364
 */
3365
static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
3366
{
3367
    h64 = XXH_xorshift64(h64, 37);
3368
    h64 *= 0x165667919E3779F9ULL;
3369
    h64 = XXH_xorshift64(h64, 32);
3370
    return h64;
3371
}
3372
3373
/*
3374
 * This is a stronger avalanche,
3375
 * inspired by Pelle Evensen's rrmxmx
3376
 * preferable when input has not been previously mixed
3377
 */
3378
static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
3379
{
3380
    /* this mix is inspired by Pelle Evensen's rrmxmx */
3381
    h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
3382
    h64 *= 0x9FB21C651E98DF25ULL;
3383
    h64 ^= (h64 >> 35) + len ;
3384
    h64 *= 0x9FB21C651E98DF25ULL;
3385
    return XXH_xorshift64(h64, 28);
3386
}
3387
3388
3389
/* ==========================================
3390
 * Short keys
3391
 * ==========================================
3392
 * One of the shortcomings of XXH32 and XXH64 was that their performance was
3393
 * sub-optimal on short lengths. It used an iterative algorithm which strongly
3394
 * favored lengths that were a multiple of 4 or 8.
3395
 *
3396
 * Instead of iterating over individual inputs, we use a set of single shot
3397
 * functions which piece together a range of lengths and operate in constant time.
3398
 *
3399
 * Additionally, the number of multiplies has been significantly reduced. This
3400
 * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
3401
 *
3402
 * Depending on the platform, this may or may not be faster than XXH32, but it
3403
 * is almost guaranteed to be faster than XXH64.
3404
 */
3405
3406
/*
3407
 * At very short lengths, there isn't enough input to fully hide secrets, or use
3408
 * the entire secret.
3409
 *
3410
 * There is also only a limited amount of mixing we can do before significantly
3411
 * impacting performance.
3412
 *
3413
 * Therefore, we use different sections of the secret and always mix two secret
3414
 * samples with an XOR. This should have no effect on performance on the
3415
 * seedless or withSeed variants because everything _should_ be constant folded
3416
 * by modern compilers.
3417
 *
3418
 * The XOR mixing hides individual parts of the secret and increases entropy.
3419
 *
3420
 * This adds an extra layer of strength for custom secrets.
3421
 */
3422
XXH_FORCE_INLINE XXH64_hash_t
3423
XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3424
{
3425
    XXH_ASSERT(input != NULL);
3426
    XXH_ASSERT(1 <= len && len <= 3);
3427
    XXH_ASSERT(secret != NULL);
3428
    /*
3429
     * len = 1: combined = { input[0], 0x01, input[0], input[0] }
3430
     * len = 2: combined = { input[1], 0x02, input[0], input[1] }
3431
     * len = 3: combined = { input[2], 0x03, input[0], input[1] }
3432
     */
3433
    {   xxh_u8  const c1 = input[0];
3434
        xxh_u8  const c2 = input[len >> 1];
3435
        xxh_u8  const c3 = input[len - 1];
3436
        xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)
3437
                               | ((xxh_u32)c3 <<  0) | ((xxh_u32)len << 8);
3438
        xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
3439
        xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
3440
        return XXH64_avalanche(keyed);
3441
    }
3442
}
3443
3444
XXH_FORCE_INLINE XXH64_hash_t
3445
XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3446
{
3447
    XXH_ASSERT(input != NULL);
3448
    XXH_ASSERT(secret != NULL);
3449
    XXH_ASSERT(4 <= len && len <= 8);
3450
    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
3451
    {   xxh_u32 const input1 = XXH_readLE32(input);
3452
        xxh_u32 const input2 = XXH_readLE32(input + len - 4);
3453
        xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
3454
        xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
3455
        xxh_u64 const keyed = input64 ^ bitflip;
3456
        return XXH3_rrmxmx(keyed, len);
3457
    }
3458
}
3459
3460
XXH_FORCE_INLINE XXH64_hash_t
3461
XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3462
{
3463
    XXH_ASSERT(input != NULL);
3464
    XXH_ASSERT(secret != NULL);
3465
    XXH_ASSERT(9 <= len && len <= 16);
3466
    {   xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
3467
        xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
3468
        xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;
3469
        xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
3470
        xxh_u64 const acc = len
3471
                          + XXH_swap64(input_lo) + input_hi
3472
                          + XXH3_mul128_fold64(input_lo, input_hi);
3473
        return XXH3_avalanche(acc);
3474
    }
3475
}
3476
3477
XXH_FORCE_INLINE XXH64_hash_t
3478
XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3479
{
3480
    XXH_ASSERT(len <= 16);
3481
    {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed);
3482
        if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
3483
        if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
3484
        return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
3485
    }
3486
}
3487
3488
/*
3489
 * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
3490
 * multiplication by zero, affecting hashes of lengths 17 to 240.
3491
 *
3492
 * However, they are very unlikely.
3493
 *
3494
 * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
3495
 * unseeded non-cryptographic hashes, it does not attempt to defend itself
3496
 * against specially crafted inputs, only random inputs.
3497
 *
3498
 * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
3499
 * cancelling out the secret is taken an arbitrary number of times (addressed
3500
 * in XXH3_accumulate_512), this collision is very unlikely with random inputs
3501
 * and/or proper seeding:
3502
 *
3503
 * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
3504
 * function that is only called up to 16 times per hash with up to 240 bytes of
3505
 * input.
3506
 *
3507
 * This is not too bad for a non-cryptographic hash function, especially with
3508
 * only 64 bit outputs.
3509
 *
3510
 * The 128-bit variant (which trades some speed for strength) is NOT affected
3511
 * by this, although it is always a good idea to use a proper seed if you care
3512
 * about strength.
3513
 */
3514
XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
3515
                                     const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
3516
{
3517
#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
3518
  && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
3519
  && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
3520
    /*
3521
     * UGLY HACK:
3522
     * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
3523
     * slower code.
3524
     *
3525
     * By forcing seed64 into a register, we disrupt the cost model and
3526
     * cause it to scalarize. See `XXH32_round()`
3527
     *
3528
     * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
3529
     * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
3530
     * GCC 9.2, despite both emitting scalar code.
3531
     *
3532
     * GCC generates much better scalar code than Clang for the rest of XXH3,
3533
     * which is why finding a more optimal codepath is an interest.
3534
     */
3535
    XXH_COMPILER_GUARD(seed64);
3536
#endif
3537
    {   xxh_u64 const input_lo = XXH_readLE64(input);
3538
        xxh_u64 const input_hi = XXH_readLE64(input+8);
3539
        return XXH3_mul128_fold64(
3540
            input_lo ^ (XXH_readLE64(secret)   + seed64),
3541
            input_hi ^ (XXH_readLE64(secret+8) - seed64)
3542
        );
3543
    }
3544
}
3545
3546
/* For mid range keys, XXH3 uses a Mum-hash variant. */
3547
XXH_FORCE_INLINE XXH64_hash_t
3548
XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
3549
                     const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
3550
                     XXH64_hash_t seed)
3551
{
3552
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
3553
    XXH_ASSERT(16 < len && len <= 128);
3554
3555
    {   xxh_u64 acc = len * XXH_PRIME64_1;
3556
        if (len > 32) {
3557
            if (len > 64) {
3558
                if (len > 96) {
3559
                    acc += XXH3_mix16B(input+48, secret+96, seed);
3560
                    acc += XXH3_mix16B(input+len-64, secret+112, seed);
3561
                }
3562
                acc += XXH3_mix16B(input+32, secret+64, seed);
3563
                acc += XXH3_mix16B(input+len-48, secret+80, seed);
3564
            }
3565
            acc += XXH3_mix16B(input+16, secret+32, seed);
3566
            acc += XXH3_mix16B(input+len-32, secret+48, seed);
3567
        }
3568
        acc += XXH3_mix16B(input+0, secret+0, seed);
3569
        acc += XXH3_mix16B(input+len-16, secret+16, seed);
3570
3571
        return XXH3_avalanche(acc);
3572
    }
3573
}
3574
3575
#define XXH3_MIDSIZE_MAX 240
3576
3577
XXH_NO_INLINE XXH64_hash_t
3578
XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
3579
                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
3580
                      XXH64_hash_t seed)
3581
{
3582
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
3583
    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
3584
3585
    #define XXH3_MIDSIZE_STARTOFFSET 3
3586
    #define XXH3_MIDSIZE_LASTOFFSET  17
3587
3588
    {   xxh_u64 acc = len * XXH_PRIME64_1;
3589
        int const nbRounds = (int)len / 16;
3590
        int i;
3591
        for (i=0; i<8; i++) {
3592
            acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
3593
        }
3594
        acc = XXH3_avalanche(acc);
3595
        XXH_ASSERT(nbRounds >= 8);
3596
#if defined(__clang__)                                /* Clang */ \
3597
    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
3598
    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
3599
        /*
3600
         * UGLY HACK:
3601
         * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
3602
         * In everywhere else, it uses scalar code.
3603
         *
3604
         * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
3605
         * would still be slower than UMAAL (see XXH_mult64to128).
3606
         *
3607
         * Unfortunately, Clang doesn't handle the long multiplies properly and
3608
         * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
3609
         * scalarized into an ugly mess of VMOV.32 instructions.
3610
         *
3611
         * This mess is difficult to avoid without turning autovectorization
3612
         * off completely, but they are usually relatively minor and/or not
3613
         * worth it to fix.
3614
         *
3615
         * This loop is the easiest to fix, as unlike XXH32, this pragma
3616
         * _actually works_ because it is a loop vectorization instead of an
3617
         * SLP vectorization.
3618
         */
3619
        #pragma clang loop vectorize(disable)
3620
#endif
3621
        for (i=8 ; i < nbRounds; i++) {
3622
            acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
3623
        }
3624
        /* last bytes */
3625
        acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
3626
        return XXH3_avalanche(acc);
3627
    }
3628
}
3629
3630
3631
/* =======     Long Keys     ======= */
3632
3633
#define XXH_STRIPE_LEN 64
3634
#define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
3635
#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
3636
3637
#ifdef XXH_OLD_NAMES
3638
#  define STRIPE_LEN XXH_STRIPE_LEN
3639
#  define ACC_NB XXH_ACC_NB
3640
#endif
3641
3642
XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
3643
{
3644
    if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
3645
    XXH_memcpy(dst, &v64, sizeof(v64));
3646
}
3647
3648
/* Several intrinsic functions below are supposed to accept __int64 as argument,
3649
 * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
3650
 * However, several environments do not define __int64 type,
3651
 * requiring a workaround.
3652
 */
3653
#if !defined (__VMS) \
3654
  && (defined (__cplusplus) \
3655
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
3656
    typedef int64_t xxh_i64;
3657
#else
3658
    /* the following type must have a width of 64-bit */
3659
    typedef long long xxh_i64;
3660
#endif
3661
3662
/*
3663
 * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
3664
 *
3665
 * It is a hardened version of UMAC, based off of FARSH's implementation.
3666
 *
3667
 * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
3668
 * implementations, and it is ridiculously fast.
3669
 *
3670
 * We harden it by mixing the original input to the accumulators as well as the product.
3671
 *
3672
 * This means that in the (relatively likely) case of a multiply by zero, the
3673
 * original input is preserved.
3674
 *
3675
 * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
3676
 * cross-pollination, as otherwise the upper and lower halves would be
3677
 * essentially independent.
3678
 *
3679
 * This doesn't matter on 64-bit hashes since they all get merged together in
3680
 * the end, so we skip the extra step.
3681
 *
3682
 * Both XXH3_64bits and XXH3_128bits use this subroutine.
3683
 */
3684
3685
#if (XXH_VECTOR == XXH_AVX512) \
3686
     || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
3687
3688
#ifndef XXH_TARGET_AVX512
3689
# define XXH_TARGET_AVX512  /* disable attribute target */
3690
#endif
3691
3692
XXH_FORCE_INLINE XXH_TARGET_AVX512 void
3693
XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
3694
                     const void* XXH_RESTRICT input,
3695
                     const void* XXH_RESTRICT secret)
3696
{
3697
    __m512i* const xacc = (__m512i *) acc;
3698
    XXH_ASSERT((((size_t)acc) & 63) == 0);
3699
    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
3700
3701
    {
3702
        /* data_vec    = input[0]; */
3703
        __m512i const data_vec    = _mm512_loadu_si512   (input);
3704
        /* key_vec     = secret[0]; */
3705
        __m512i const key_vec     = _mm512_loadu_si512   (secret);
3706
        /* data_key    = data_vec ^ key_vec; */
3707
        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
3708
        /* data_key_lo = data_key >> 32; */
3709
        __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
3710
        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
3711
        __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
3712
        /* xacc[0] += swap(data_vec); */
3713
        __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
3714
        __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
3715
        /* xacc[0] += product; */
3716
        *xacc = _mm512_add_epi64(product, sum);
3717
    }
3718
}
3719
3720
/*
3721
 * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
3722
 *
3723
 * Multiplication isn't perfect, as explained by Google in HighwayHash:
3724
 *
3725
 *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
3726
 *  // varying degrees. In descending order of goodness, bytes
3727
 *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
3728
 *  // As expected, the upper and lower bytes are much worse.
3729
 *
3730
 * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
3731
 *
3732
 * Since our algorithm uses a pseudorandom secret to add some variance into the
3733
 * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
3734
 *
3735
 * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
3736
 * extraction.
3737
 *
3738
 * Both XXH3_64bits and XXH3_128bits use this subroutine.
3739
 */
3740
3741
XXH_FORCE_INLINE XXH_TARGET_AVX512 void
3742
XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
3743
{
3744
    XXH_ASSERT((((size_t)acc) & 63) == 0);
3745
    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
3746
    {   __m512i* const xacc = (__m512i*) acc;
3747
        const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
3748
3749
        /* xacc[0] ^= (xacc[0] >> 47) */
3750
        __m512i const acc_vec     = *xacc;
3751
        __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
3752
        __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
3753
        /* xacc[0] ^= secret; */
3754
        __m512i const key_vec     = _mm512_loadu_si512   (secret);
3755
        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
3756
3757
        /* xacc[0] *= XXH_PRIME32_1; */
3758
        __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
3759
        __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
3760
        __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
3761
        *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
3762
    }
3763
}
3764
3765
XXH_FORCE_INLINE XXH_TARGET_AVX512 void
3766
XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
3767
{
3768
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
3769
    XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
3770
    XXH_ASSERT(((size_t)customSecret & 63) == 0);
3771
    (void)(&XXH_writeLE64);
3772
    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
3773
        __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
3774
3775
        const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
3776
              __m512i* const dest = (      __m512i*) customSecret;
3777
        int i;
3778
        XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
3779
        XXH_ASSERT(((size_t)dest & 63) == 0);
3780
        for (i=0; i < nbRounds; ++i) {
3781
            /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
3782
             * this will warn "discards 'const' qualifier". */
3783
            union {
3784
                const __m512i* cp;
3785
                void* p;
3786
            } remote_const_void;
3787
            remote_const_void.cp = src + i;
3788
            dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
3789
    }   }
3790
}
3791
3792
#endif
3793
3794
#if (XXH_VECTOR == XXH_AVX2) \
3795
    || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
3796
3797
#ifndef XXH_TARGET_AVX2
3798
# define XXH_TARGET_AVX2  /* disable attribute target */
3799
#endif
3800
3801
XXH_FORCE_INLINE XXH_TARGET_AVX2 void
3802
XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
3803
                    const void* XXH_RESTRICT input,
3804
                    const void* XXH_RESTRICT secret)
3805
{
3806
    XXH_ASSERT((((size_t)acc) & 31) == 0);
3807
    {   __m256i* const xacc    =       (__m256i *) acc;
3808
        /* Unaligned. This is mainly for pointer arithmetic, and because
3809
         * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
3810
        const         __m256i* const xinput  = (const __m256i *) input;
3811
        /* Unaligned. This is mainly for pointer arithmetic, and because
3812
         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
3813
        const         __m256i* const xsecret = (const __m256i *) secret;
3814
3815
        size_t i;
3816
        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
3817
            /* data_vec    = xinput[i]; */
3818
            __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);
3819
            /* key_vec     = xsecret[i]; */
3820
            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
3821
            /* data_key    = data_vec ^ key_vec; */
3822
            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
3823
            /* data_key_lo = data_key >> 32; */
3824
            __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
3825
            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
3826
            __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
3827
            /* xacc[i] += swap(data_vec); */
3828
            __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
3829
            __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
3830
            /* xacc[i] += product; */
3831
            xacc[i] = _mm256_add_epi64(product, sum);
3832
    }   }
3833
}
3834
3835
XXH_FORCE_INLINE XXH_TARGET_AVX2 void
3836
XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
3837
{
3838
    XXH_ASSERT((((size_t)acc) & 31) == 0);
3839
    {   __m256i* const xacc = (__m256i*) acc;
3840
        /* Unaligned. This is mainly for pointer arithmetic, and because
3841
         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
3842
        const         __m256i* const xsecret = (const __m256i *) secret;
3843
        const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
3844
3845
        size_t i;
3846
        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
3847
            /* xacc[i] ^= (xacc[i] >> 47) */
3848
            __m256i const acc_vec     = xacc[i];
3849
            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
3850
            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
3851
            /* xacc[i] ^= xsecret; */
3852
            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
3853
            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
3854
3855
            /* xacc[i] *= XXH_PRIME32_1; */
3856
            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
3857
            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
3858
            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
3859
            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
3860
        }
3861
    }
3862
}
3863
3864
XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
3865
{
3866
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
3867
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
3868
    XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
3869
    (void)(&XXH_writeLE64);
3870
    XXH_PREFETCH(customSecret);
3871
    {   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
3872
3873
        const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);
3874
              __m256i*       dest = (      __m256i*) customSecret;
3875
3876
#       if defined(__GNUC__) || defined(__clang__)
3877
        /*
3878
         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
3879
         *   - do not extract the secret from sse registers in the internal loop
3880
         *   - use less common registers, and avoid pushing these reg into stack
3881
         */
3882
        XXH_COMPILER_GUARD(dest);
3883
#       endif
3884
        XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
3885
        XXH_ASSERT(((size_t)dest & 31) == 0);
3886
3887
        /* GCC -O2 need unroll loop manually */
3888
        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
3889
        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
3890
        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
3891
        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
3892
        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
3893
        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
3894
    }
3895
}
3896
3897
#endif
3898
3899
/* x86dispatch always generates SSE2 */
3900
#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
3901
3902
#ifndef XXH_TARGET_SSE2
3903
# define XXH_TARGET_SSE2  /* disable attribute target */
3904
#endif
3905
3906
XXH_FORCE_INLINE XXH_TARGET_SSE2 void
3907
XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
3908
                    const void* XXH_RESTRICT input,
3909
                    const void* XXH_RESTRICT secret)
3910
{
3911
    /* SSE2 is just a half-scale version of the AVX2 version. */
3912
    XXH_ASSERT((((size_t)acc) & 15) == 0);
3913
    {   __m128i* const xacc    =       (__m128i *) acc;
3914
        /* Unaligned. This is mainly for pointer arithmetic, and because
3915
         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
3916
        const         __m128i* const xinput  = (const __m128i *) input;
3917
        /* Unaligned. This is mainly for pointer arithmetic, and because
3918
         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
3919
        const         __m128i* const xsecret = (const __m128i *) secret;
3920
3921
        size_t i;
3922
        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
3923
            /* data_vec    = xinput[i]; */
3924
            __m128i const data_vec    = _mm_loadu_si128   (xinput+i);
3925
            /* key_vec     = xsecret[i]; */
3926
            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
3927
            /* data_key    = data_vec ^ key_vec; */
3928
            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
3929
            /* data_key_lo = data_key >> 32; */
3930
            __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
3931
            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
3932
            __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
3933
            /* xacc[i] += swap(data_vec); */
3934
            __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
3935
            __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
3936
            /* xacc[i] += product; */
3937
            xacc[i] = _mm_add_epi64(product, sum);
3938
    }   }
3939
}
3940
3941
XXH_FORCE_INLINE XXH_TARGET_SSE2 void
3942
XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
3943
{
3944
    XXH_ASSERT((((size_t)acc) & 15) == 0);
3945
    {   __m128i* const xacc = (__m128i*) acc;
3946
        /* Unaligned. This is mainly for pointer arithmetic, and because
3947
         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
3948
        const         __m128i* const xsecret = (const __m128i *) secret;
3949
        const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
3950
3951
        size_t i;
3952
        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
3953
            /* xacc[i] ^= (xacc[i] >> 47) */
3954
            __m128i const acc_vec     = xacc[i];
3955
            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
3956
            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
3957
            /* xacc[i] ^= xsecret[i]; */
3958
            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
3959
            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
3960
3961
            /* xacc[i] *= XXH_PRIME32_1; */
3962
            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
3963
            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
3964
            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
3965
            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
3966
        }
3967
    }
3968
}
3969
3970
XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
3971
{
3972
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
3973
    (void)(&XXH_writeLE64);
3974
    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
3975
3976
#       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
3977
        /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
3978
        XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
3979
        __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
3980
#       else
3981
        __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
3982
#       endif
3983
        int i;
3984
3985
        const void* const src16 = XXH3_kSecret;
3986
        __m128i* dst16 = (__m128i*) customSecret;
3987
#       if defined(__GNUC__) || defined(__clang__)
3988
        /*
3989
         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
3990
         *   - do not extract the secret from sse registers in the internal loop
3991
         *   - use less common registers, and avoid pushing these reg into stack
3992
         */
3993
        XXH_COMPILER_GUARD(dst16);
3994
#       endif
3995
        XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
3996
        XXH_ASSERT(((size_t)dst16 & 15) == 0);
3997
3998
        for (i=0; i < nbRounds; ++i) {
3999
            dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
4000
    }   }
4001
}
4002
4003
#endif
4004
4005
#if (XXH_VECTOR == XXH_NEON)
4006
4007
XXH_FORCE_INLINE void
4008
XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
4009
                    const void* XXH_RESTRICT input,
4010
                    const void* XXH_RESTRICT secret)
4011
{
4012
    XXH_ASSERT((((size_t)acc) & 15) == 0);
4013
    {
4014
        uint64x2_t* const xacc = (uint64x2_t *) acc;
4015
        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
4016
        uint8_t const* const xinput = (const uint8_t *) input;
4017
        uint8_t const* const xsecret  = (const uint8_t *) secret;
4018
4019
        size_t i;
4020
        for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) {
4021
            /* data_vec = xinput[i]; */
4022
            uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));
4023
            /* key_vec  = xsecret[i];  */
4024
            uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));
4025
            uint64x2_t data_key;
4026
            uint32x2_t data_key_lo, data_key_hi;
4027
            /* xacc[i] += swap(data_vec); */
4028
            uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);
4029
            uint64x2_t const swapped = vextq_u64(data64, data64, 1);
4030
            xacc[i] = vaddq_u64 (xacc[i], swapped);
4031
            /* data_key = data_vec ^ key_vec; */
4032
            data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
4033
            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
4034
             * data_key_hi = (uint32x2_t) (data_key >> 32);
4035
             * data_key = UNDEFINED; */
4036
            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
4037
            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
4038
            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
4039
4040
        }
4041
    }
4042
}
4043
4044
XXH_FORCE_INLINE void
4045
XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4046
{
4047
    XXH_ASSERT((((size_t)acc) & 15) == 0);
4048
4049
    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
4050
        uint8_t const* xsecret = (uint8_t const*) secret;
4051
        uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
4052
4053
        size_t i;
4054
        for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) {
4055
            /* xacc[i] ^= (xacc[i] >> 47); */
4056
            uint64x2_t acc_vec  = xacc[i];
4057
            uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);
4058
            uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);
4059
4060
            /* xacc[i] ^= xsecret[i]; */
4061
            uint8x16_t key_vec  = vld1q_u8    (xsecret + (i * 16));
4062
            uint64x2_t data_key = veorq_u64   (data_vec, vreinterpretq_u64_u8(key_vec));
4063
4064
            /* xacc[i] *= XXH_PRIME32_1 */
4065
            uint32x2_t data_key_lo, data_key_hi;
4066
            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
4067
             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
4068
             * xacc[i] = UNDEFINED; */
4069
            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
4070
            {   /*
4071
                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
4072
                 *
4073
                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
4074
                 * incorrectly "optimize" this:
4075
                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
4076
                 *   shifted = vshll_n_u32(tmp, 32);
4077
                 * to this:
4078
                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
4079
                 *   shifted = vshlq_n_u64(tmp, 32);
4080
                 *
4081
                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
4082
                 * for NEON, and it scalarizes two 64-bit multiplies instead.
4083
                 *
4084
                 * vmull_u32 has the same timing as vmul_u32, and it avoids
4085
                 * this bug completely.
4086
                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
4087
                 */
4088
                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
4089
                /* xacc[i] = prod_hi << 32; */
4090
                xacc[i] = vshlq_n_u64(prod_hi, 32);
4091
                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
4092
                xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
4093
            }
4094
    }   }
4095
}
4096
4097
#endif
4098
4099
#if (XXH_VECTOR == XXH_VSX)
4100
4101
XXH_FORCE_INLINE void
4102
XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
4103
                    const void* XXH_RESTRICT input,
4104
                    const void* XXH_RESTRICT secret)
4105
{
4106
    /* presumed aligned */
4107
    unsigned long long* const xacc = (unsigned long long*) acc;
4108
    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
4109
    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
4110
    xxh_u64x2 const v32 = { 32, 32 };
4111
    size_t i;
4112
    for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
4113
        /* data_vec = xinput[i]; */
4114
        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
4115
        /* key_vec = xsecret[i]; */
4116
        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
4117
        xxh_u64x2 const data_key = data_vec ^ key_vec;
4118
        /* shuffled = (data_key << 32) | (data_key >> 32); */
4119
        xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
4120
        /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
4121
        xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
4122
        /* acc_vec = xacc[i]; */
4123
        xxh_u64x2 acc_vec        = vec_xl(0, xacc + 2 * i);
4124
        acc_vec += product;
4125
4126
        /* swap high and low halves */
4127
#ifdef __s390x__
4128
        acc_vec += vec_permi(data_vec, data_vec, 2);
4129
#else
4130
        acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
4131
#endif
4132
        /* xacc[i] = acc_vec; */
4133
        vec_xst(acc_vec, 0, xacc + 2 * i);
4134
    }
4135
}
4136
4137
XXH_FORCE_INLINE void
4138
XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4139
{
4140
    XXH_ASSERT((((size_t)acc) & 15) == 0);
4141
4142
    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
4143
        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
4144
        /* constants */
4145
        xxh_u64x2 const v32  = { 32, 32 };
4146
        xxh_u64x2 const v47 = { 47, 47 };
4147
        xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
4148
        size_t i;
4149
        for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
4150
            /* xacc[i] ^= (xacc[i] >> 47); */
4151
            xxh_u64x2 const acc_vec  = xacc[i];
4152
            xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
4153
4154
            /* xacc[i] ^= xsecret[i]; */
4155
            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
4156
            xxh_u64x2 const data_key = data_vec ^ key_vec;
4157
4158
            /* xacc[i] *= XXH_PRIME32_1 */
4159
            /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
4160
            xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
4161
            /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
4162
            xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
4163
            xacc[i] = prod_odd + (prod_even << v32);
4164
    }   }
4165
}
4166
4167
#endif
4168
4169
/* scalar variants - universal */
4170
4171
XXH_FORCE_INLINE void
4172
XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
4173
                     const void* XXH_RESTRICT input,
4174
                     const void* XXH_RESTRICT secret)
4175
{
4176
    xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
4177
    const xxh_u8* const xinput  = (const xxh_u8*) input;  /* no alignment restriction */
4178
    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
4179
    size_t i;
4180
    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
4181
    for (i=0; i < XXH_ACC_NB; i++) {
4182
        xxh_u64 const data_val = XXH_readLE64(xinput + 8*i);
4183
        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
4184
        xacc[i ^ 1] += data_val; /* swap adjacent lanes */
4185
        xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
4186
    }
4187
}
4188
4189
XXH_FORCE_INLINE void
4190
XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4191
{
4192
    xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
4193
    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
4194
    size_t i;
4195
    XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
4196
    for (i=0; i < XXH_ACC_NB; i++) {
4197
        xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i);
4198
        xxh_u64 acc64 = xacc[i];
4199
        acc64 = XXH_xorshift64(acc64, 47);
4200
        acc64 ^= key64;
4201
        acc64 *= XXH_PRIME32_1;
4202
        xacc[i] = acc64;
4203
    }
4204
}
4205
4206
XXH_FORCE_INLINE void
4207
XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4208
{
4209
    /*
4210
     * We need a separate pointer for the hack below,
4211
     * which requires a non-const pointer.
4212
     * Any decent compiler will optimize this out otherwise.
4213
     */
4214
    const xxh_u8* kSecretPtr = XXH3_kSecret;
4215
    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
4216
4217
#if defined(__clang__) && defined(__aarch64__)
4218
    /*
4219
     * UGLY HACK:
4220
     * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
4221
     * placed sequentially, in order, at the top of the unrolled loop.
4222
     *
4223
     * While MOVK is great for generating constants (2 cycles for a 64-bit
4224
     * constant compared to 4 cycles for LDR), long MOVK chains stall the
4225
     * integer pipelines:
4226
     *   I   L   S
4227
     * MOVK
4228
     * MOVK
4229
     * MOVK
4230
     * MOVK
4231
     * ADD
4232
     * SUB      STR
4233
     *          STR
4234
     * By forcing loads from memory (as the asm line causes Clang to assume
4235
     * that XXH3_kSecretPtr has been changed), the pipelines are used more
4236
     * efficiently:
4237
     *   I   L   S
4238
     *      LDR
4239
     *  ADD LDR
4240
     *  SUB     STR
4241
     *          STR
4242
     * XXH3_64bits_withSeed, len == 256, Snapdragon 835
4243
     *   without hack: 2654.4 MB/s
4244
     *   with hack:    3202.9 MB/s
4245
     */
4246
    XXH_COMPILER_GUARD(kSecretPtr);
4247
#endif
4248
    /*
4249
     * Note: in debug mode, this overrides the asm optimization
4250
     * and Clang will emit MOVK chains again.
4251
     */
4252
    XXH_ASSERT(kSecretPtr == XXH3_kSecret);
4253
4254
    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
4255
        int i;
4256
        for (i=0; i < nbRounds; i++) {
4257
            /*
4258
             * The asm hack causes Clang to assume that kSecretPtr aliases with
4259
             * customSecret, and on aarch64, this prevented LDP from merging two
4260
             * loads together for free. Putting the loads together before the stores
4261
             * properly generates LDP.
4262
             */
4263
            xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;
4264
            xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
4265
            XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);
4266
            XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
4267
    }   }
4268
}
4269
4270
4271
typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
4272
typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
4273
typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
4274
4275
4276
#if (XXH_VECTOR == XXH_AVX512)
4277
4278
#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
4279
#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
4280
#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
4281
4282
#elif (XXH_VECTOR == XXH_AVX2)
4283
4284
#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
4285
#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
4286
#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
4287
4288
#elif (XXH_VECTOR == XXH_SSE2)
4289
4290
#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
4291
#define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
4292
#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
4293
4294
#elif (XXH_VECTOR == XXH_NEON)
4295
4296
#define XXH3_accumulate_512 XXH3_accumulate_512_neon
4297
#define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
4298
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
4299
4300
#elif (XXH_VECTOR == XXH_VSX)
4301
4302
#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
4303
#define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
4304
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
4305
4306
#else /* scalar */
4307
4308
#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
4309
#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
4310
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
4311
4312
#endif
4313
4314
4315
4316
#ifndef XXH_PREFETCH_DIST
4317
#  ifdef __clang__
4318
#    define XXH_PREFETCH_DIST 320
4319
#  else
4320
#    if (XXH_VECTOR == XXH_AVX512)
4321
#      define XXH_PREFETCH_DIST 512
4322
#    else
4323
#      define XXH_PREFETCH_DIST 384
4324
#    endif
4325
#  endif  /* __clang__ */
4326
#endif  /* XXH_PREFETCH_DIST */
4327
4328
/*
4329
 * XXH3_accumulate()
4330
 * Loops over XXH3_accumulate_512().
4331
 * Assumption: nbStripes will not overflow the secret size
4332
 */
4333
XXH_FORCE_INLINE void
4334
XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
4335
                const xxh_u8* XXH_RESTRICT input,
4336
                const xxh_u8* XXH_RESTRICT secret,
4337
                      size_t nbStripes,
4338
                      XXH3_f_accumulate_512 f_acc512)
4339
{
4340
    size_t n;
4341
    for (n = 0; n < nbStripes; n++ ) {
4342
        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
4343
        XXH_PREFETCH(in + XXH_PREFETCH_DIST);
4344
        f_acc512(acc,
4345
                 in,
4346
                 secret + n*XXH_SECRET_CONSUME_RATE);
4347
    }
4348
}
4349
4350
XXH_FORCE_INLINE void
4351
XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
4352
                      const xxh_u8* XXH_RESTRICT input, size_t len,
4353
                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4354
                            XXH3_f_accumulate_512 f_acc512,
4355
                            XXH3_f_scrambleAcc f_scramble)
4356
{
4357
    size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
4358
    size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
4359
    size_t const nb_blocks = (len - 1) / block_len;
4360
4361
    size_t n;
4362
4363
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
4364
4365
    for (n = 0; n < nb_blocks; n++) {
4366
        XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
4367
        f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
4368
    }
4369
4370
    /* last partial block */
4371
    XXH_ASSERT(len > XXH_STRIPE_LEN);
4372
    {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
4373
        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
4374
        XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
4375
4376
        /* last stripe */
4377
        {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
4378
#define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
4379
            f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
4380
    }   }
4381
}
4382
4383
XXH_FORCE_INLINE xxh_u64
4384
XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
4385
{
4386
    return XXH3_mul128_fold64(
4387
               acc[0] ^ XXH_readLE64(secret),
4388
               acc[1] ^ XXH_readLE64(secret+8) );
4389
}
4390
4391
static XXH64_hash_t
4392
XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
4393
{
4394
    xxh_u64 result64 = start;
4395
    size_t i = 0;
4396
4397
    for (i = 0; i < 4; i++) {
4398
        result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
4399
#if defined(__clang__)                                /* Clang */ \
4400
    && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
4401
    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \
4402
    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
4403
        /*
4404
         * UGLY HACK:
4405
         * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
4406
         * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
4407
         * XXH3_64bits, len == 256, Snapdragon 835:
4408
         *   without hack: 2063.7 MB/s
4409
         *   with hack:    2560.7 MB/s
4410
         */
4411
        XXH_COMPILER_GUARD(result64);
4412
#endif
4413
    }
4414
4415
    return XXH3_avalanche(result64);
4416
}
4417
4418
#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
4419
                        XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
4420
4421
XXH_FORCE_INLINE XXH64_hash_t
4422
XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
4423
                           const void* XXH_RESTRICT secret, size_t secretSize,
4424
                           XXH3_f_accumulate_512 f_acc512,
4425
                           XXH3_f_scrambleAcc f_scramble)
4426
{
4427
    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
4428
4429
    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
4430
4431
    /* converge into final hash */
4432
    XXH_STATIC_ASSERT(sizeof(acc) == 64);
4433
    /* do not align on 8, so that the secret is different from the accumulator */
4434
#define XXH_SECRET_MERGEACCS_START 11
4435
    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
4436
    return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
4437
}
4438
4439
/*
4440
 * It's important for performance to transmit secret's size (when it's static)
4441
 * so that the compiler can properly optimize the vectorized loop.
4442
 * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
4443
 */
4444
XXH_FORCE_INLINE XXH64_hash_t
4445
XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
4446
                             XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
4447
{
4448
    (void)seed64;
4449
    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
4450
}
4451
4452
/*
4453
 * It's preferable for performance that XXH3_hashLong is not inlined,
4454
 * as it results in a smaller function for small data, easier to the instruction cache.
4455
 * Note that inside this no_inline function, we do inline the internal loop,
4456
 * and provide a statically defined secret size to allow optimization of vector loop.
4457
 */
4458
XXH_NO_INLINE XXH64_hash_t
4459
XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
4460
                          XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
4461
{
4462
    (void)seed64; (void)secret; (void)secretLen;
4463
    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
4464
}
4465
4466
/*
4467
 * XXH3_hashLong_64b_withSeed():
4468
 * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
4469
 * and then use this key for long mode hashing.
4470
 *
4471
 * This operation is decently fast but nonetheless costs a little bit of time.
4472
 * Try to avoid it whenever possible (typically when seed==0).
4473
 *
4474
 * It's important for performance that XXH3_hashLong is not inlined. Not sure
4475
 * why (uop cache maybe?), but the difference is large and easily measurable.
4476
 */
4477
XXH_FORCE_INLINE XXH64_hash_t
4478
XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
4479
                                    XXH64_hash_t seed,
4480
                                    XXH3_f_accumulate_512 f_acc512,
4481
                                    XXH3_f_scrambleAcc f_scramble,
4482
                                    XXH3_f_initCustomSecret f_initSec)
4483
{
4484
    if (seed == 0)
4485
        return XXH3_hashLong_64b_internal(input, len,
4486
                                          XXH3_kSecret, sizeof(XXH3_kSecret),
4487
                                          f_acc512, f_scramble);
4488
    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
4489
        f_initSec(secret, seed);
4490
        return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
4491
                                          f_acc512, f_scramble);
4492
    }
4493
}
4494
4495
/*
4496
 * It's important for performance that XXH3_hashLong is not inlined.
4497
 */
4498
XXH_NO_INLINE XXH64_hash_t
4499
XXH3_hashLong_64b_withSeed(const void* input, size_t len,
4500
                           XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
4501
{
4502
    (void)secret; (void)secretLen;
4503
    return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
4504
                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
4505
}
4506
4507
4508
typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
4509
                                          XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
4510
4511
XXH_FORCE_INLINE XXH64_hash_t
4512
XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
4513
                     XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
4514
                     XXH3_hashLong64_f f_hashLong)
4515
{
4516
    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
4517
    /*
4518
     * If an action is to be taken if `secretLen` condition is not respected,
4519
     * it should be done here.
4520
     * For now, it's a contract pre-condition.
4521
     * Adding a check and a branch here would cost performance at every hash.
4522
     * Also, note that function signature doesn't offer room to return an error.
4523
     */
4524
    if (len <= 16)
4525
        return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
4526
    if (len <= 128)
4527
        return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
4528
    if (len <= XXH3_MIDSIZE_MAX)
4529
        return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
4530
    return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
4531
}
4532
4533
4534
/* ===   Public entry point   === */
4535
4536
/*! @ingroup xxh3_family */
4537
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
4538
{
4539
    return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
4540
}
4541
4542
/*! @ingroup xxh3_family */
4543
XXH_PUBLIC_API XXH64_hash_t
4544
XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
4545
{
4546
    return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
4547
}
4548
4549
/*! @ingroup xxh3_family */
4550
XXH_PUBLIC_API XXH64_hash_t
4551
XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
4552
{
4553
    return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
4554
}
4555
4556
XXH_PUBLIC_API XXH64_hash_t
4557
XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
4558
{
4559
    if (len <= XXH3_MIDSIZE_MAX)
4560
        return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
4561
    return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
4562
}
4563
4564
4565
/* ===   XXH3 streaming   === */
4566
4567
/*
4568
 * Malloc's a pointer that is always aligned to align.
4569
 *
4570
 * This must be freed with `XXH_alignedFree()`.
4571
 *
4572
 * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
4573
 * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
4574
 * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
4575
 *
4576
 * This underalignment previously caused a rather obvious crash which went
4577
 * completely unnoticed due to XXH3_createState() not actually being tested.
4578
 * Credit to RedSpah for noticing this bug.
4579
 *
4580
 * The alignment is done manually: Functions like posix_memalign or _mm_malloc
4581
 * are avoided: To maintain portability, we would have to write a fallback
4582
 * like this anyways, and besides, testing for the existence of library
4583
 * functions without relying on external build tools is impossible.
4584
 *
4585
 * The method is simple: Overallocate, manually align, and store the offset
4586
 * to the original behind the returned pointer.
4587
 *
4588
 * Align must be a power of 2 and 8 <= align <= 128.
4589
 */
4590
static void* XXH_alignedMalloc(size_t s, size_t align)
4591
{
4592
    XXH_ASSERT(align <= 128 && align >= 8); /* range check */
4593
    XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
4594
    XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
4595
    {   /* Overallocate to make room for manual realignment and an offset byte */
4596
        xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
4597
        if (base != NULL) {
4598
            /*
4599
             * Get the offset needed to align this pointer.
4600
             *
4601
             * Even if the returned pointer is aligned, there will always be
4602
             * at least one byte to store the offset to the original pointer.
4603
             */
4604
            size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
4605
            /* Add the offset for the now-aligned pointer */
4606
            xxh_u8* ptr = base + offset;
4607
4608
            XXH_ASSERT((size_t)ptr % align == 0);
4609
4610
            /* Store the offset immediately before the returned pointer. */
4611
            ptr[-1] = (xxh_u8)offset;
4612
            return ptr;
4613
        }
4614
        return NULL;
4615
    }
4616
}
4617
/*
4618
 * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
4619
 * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
4620
 */
4621
static void XXH_alignedFree(void* p)
4622
{
4623
    if (p != NULL) {
4624
        xxh_u8* ptr = (xxh_u8*)p;
4625
        /* Get the offset byte we added in XXH_malloc. */
4626
        xxh_u8 offset = ptr[-1];
4627
        /* Free the original malloc'd pointer */
4628
        xxh_u8* base = ptr - offset;
4629
        XXH_free(base);
4630
    }
4631
}
4632
/*! @ingroup xxh3_family */
4633
XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
4634
{
4635
    XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
4636
    if (state==NULL) return NULL;
4637
    XXH3_INITSTATE(state);
4638
    return state;
4639
}
4640
4641
/*! @ingroup xxh3_family */
4642
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
4643
{
4644
    XXH_alignedFree(statePtr);
4645
    return XXH_OK;
4646
}
4647
4648
/*! @ingroup xxh3_family */
4649
XXH_PUBLIC_API void
4650
XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
4651
{
4652
    XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
4653
}
4654
4655
static void
4656
XXH3_reset_internal(XXH3_state_t* statePtr,
4657
                    XXH64_hash_t seed,
4658
                    const void* secret, size_t secretSize)
4659
{
4660
    size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
4661
    size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
4662
    XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
4663
    XXH_ASSERT(statePtr != NULL);
4664
    /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
4665
    memset((char*)statePtr + initStart, 0, initLength);
4666
    statePtr->acc[0] = XXH_PRIME32_3;
4667
    statePtr->acc[1] = XXH_PRIME64_1;
4668
    statePtr->acc[2] = XXH_PRIME64_2;
4669
    statePtr->acc[3] = XXH_PRIME64_3;
4670
    statePtr->acc[4] = XXH_PRIME64_4;
4671
    statePtr->acc[5] = XXH_PRIME32_2;
4672
    statePtr->acc[6] = XXH_PRIME64_5;
4673
    statePtr->acc[7] = XXH_PRIME32_1;
4674
    statePtr->seed = seed;
4675
    statePtr->useSeed = (seed != 0);
4676
    statePtr->extSecret = (const unsigned char*)secret;
4677
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
4678
    statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
4679
    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
4680
}
4681
4682
/*! @ingroup xxh3_family */
4683
XXH_PUBLIC_API XXH_errorcode
4684
XXH3_64bits_reset(XXH3_state_t* statePtr)
4685
{
4686
    if (statePtr == NULL) return XXH_ERROR;
4687
    XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
4688
    return XXH_OK;
4689
}
4690
4691
/*! @ingroup xxh3_family */
4692
XXH_PUBLIC_API XXH_errorcode
4693
XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
4694
{
4695
    if (statePtr == NULL) return XXH_ERROR;
4696
    XXH3_reset_internal(statePtr, 0, secret, secretSize);
4697
    if (secret == NULL) return XXH_ERROR;
4698
    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
4699
    return XXH_OK;
4700
}
4701
4702
/*! @ingroup xxh3_family */
4703
XXH_PUBLIC_API XXH_errorcode
4704
XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
4705
{
4706
    if (statePtr == NULL) return XXH_ERROR;
4707
    if (seed==0) return XXH3_64bits_reset(statePtr);
4708
    if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
4709
        XXH3_initCustomSecret(statePtr->customSecret, seed);
4710
    XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
4711
    return XXH_OK;
4712
}
4713
4714
/*! @ingroup xxh3_family */
4715
XXH_PUBLIC_API XXH_errorcode
4716
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
4717
{
4718
    if (statePtr == NULL) return XXH_ERROR;
4719
    if (secret == NULL) return XXH_ERROR;
4720
    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
4721
    XXH3_reset_internal(statePtr, seed64, secret, secretSize);
4722
    statePtr->useSeed = 1; /* always, even if seed64==0 */
4723
    return XXH_OK;
4724
}
4725
4726
/* Note : when XXH3_consumeStripes() is invoked,
4727
 * there must be a guarantee that at least one more byte must be consumed from input
4728
 * so that the function can blindly consume all stripes using the "normal" secret segment */
4729
XXH_FORCE_INLINE void
4730
XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
4731
                    size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
4732
                    const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
4733
                    const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
4734
                    XXH3_f_accumulate_512 f_acc512,
4735
                    XXH3_f_scrambleAcc f_scramble)
4736
{
4737
    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
4738
    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
4739
    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
4740
        /* need a scrambling operation */
4741
        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
4742
        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
4743
        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
4744
        f_scramble(acc, secret + secretLimit);
4745
        XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
4746
        *nbStripesSoFarPtr = nbStripesAfterBlock;
4747
    } else {
4748
        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
4749
        *nbStripesSoFarPtr += nbStripes;
4750
    }
4751
}
4752
4753
#ifndef XXH3_STREAM_USE_STACK
4754
# ifndef __clang__ /* clang doesn't need additional stack space */
4755
#   define XXH3_STREAM_USE_STACK 1
4756
# endif
4757
#endif
4758
/*
4759
 * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
4760
 */
4761
XXH_FORCE_INLINE XXH_errorcode
4762
XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
4763
            const xxh_u8* XXH_RESTRICT input, size_t len,
4764
            XXH3_f_accumulate_512 f_acc512,
4765
            XXH3_f_scrambleAcc f_scramble)
4766
{
4767
    if (input==NULL) {
4768
        XXH_ASSERT(len == 0);
4769
        return XXH_OK;
4770
    }
4771
4772
    XXH_ASSERT(state != NULL);
4773
    {   const xxh_u8* const bEnd = input + len;
4774
        const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
4775
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
4776
        /* For some reason, gcc and MSVC seem to suffer greatly
4777
         * when operating accumulators directly into state.
4778
         * Operating into stack space seems to enable proper optimization.
4779
         * clang, on the other hand, doesn't seem to need this trick */
4780
        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
4781
#else
4782
        xxh_u64* XXH_RESTRICT const acc = state->acc;
4783
#endif
4784
        state->totalLen += len;
4785
        XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
4786
4787
        /* small input : just fill in tmp buffer */
4788
        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
4789
            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
4790
            state->bufferedSize += (XXH32_hash_t)len;
4791
            return XXH_OK;
4792
        }
4793
4794
        /* total input is now > XXH3_INTERNALBUFFER_SIZE */
4795
        #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
4796
        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */
4797
4798
        /*
4799
         * Internal buffer is partially filled (always, except at beginning)
4800
         * Complete it, then consume it.
4801
         */
4802
        if (state->bufferedSize) {
4803
            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
4804
            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
4805
            input += loadSize;
4806
            XXH3_consumeStripes(acc,
4807
                               &state->nbStripesSoFar, state->nbStripesPerBlock,
4808
                                state->buffer, XXH3_INTERNALBUFFER_STRIPES,
4809
                                secret, state->secretLimit,
4810
                                f_acc512, f_scramble);
4811
            state->bufferedSize = 0;
4812
        }
4813
        XXH_ASSERT(input < bEnd);
4814
4815
        /* large input to consume : ingest per full block */
4816
        if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
4817
            size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
4818
            XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
4819
            /* join to current block's end */
4820
            {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
4821
                XXH_ASSERT(nbStripes <= nbStripes);
4822
                XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
4823
                f_scramble(acc, secret + state->secretLimit);
4824
                state->nbStripesSoFar = 0;
4825
                input += nbStripesToEnd * XXH_STRIPE_LEN;
4826
                nbStripes -= nbStripesToEnd;
4827
            }
4828
            /* consume per entire blocks */
4829
            while(nbStripes >= state->nbStripesPerBlock) {
4830
                XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
4831
                f_scramble(acc, secret + state->secretLimit);
4832
                input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
4833
                nbStripes -= state->nbStripesPerBlock;
4834
            }
4835
            /* consume last partial block */
4836
            XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
4837
            input += nbStripes * XXH_STRIPE_LEN;
4838
            XXH_ASSERT(input < bEnd);  /* at least some bytes left */
4839
            state->nbStripesSoFar = nbStripes;
4840
            /* buffer predecessor of last partial stripe */
4841
            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
4842
            XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
4843
        } else {
4844
            /* content to consume <= block size */
4845
            /* Consume input by a multiple of internal buffer size */
4846
            if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
4847
                const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
4848
                do {
4849
                    XXH3_consumeStripes(acc,
4850
                                       &state->nbStripesSoFar, state->nbStripesPerBlock,
4851
                                        input, XXH3_INTERNALBUFFER_STRIPES,
4852
                                        secret, state->secretLimit,
4853
                                        f_acc512, f_scramble);
4854
                    input += XXH3_INTERNALBUFFER_SIZE;
4855
                } while (input<limit);
4856
                /* buffer predecessor of last partial stripe */
4857
                XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
4858
            }
4859
        }
4860
4861
        /* Some remaining input (always) : buffer it */
4862
        XXH_ASSERT(input < bEnd);
4863
        XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
4864
        XXH_ASSERT(state->bufferedSize == 0);
4865
        XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
4866
        state->bufferedSize = (XXH32_hash_t)(bEnd-input);
4867
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
4868
        /* save stack accumulators into state */
4869
        memcpy(state->acc, acc, sizeof(acc));
4870
#endif
4871
    }
4872
4873
    return XXH_OK;
4874
}
4875
4876
/*! @ingroup xxh3_family */
4877
XXH_PUBLIC_API XXH_errorcode
4878
XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
4879
{
4880
    return XXH3_update(state, (const xxh_u8*)input, len,
4881
                       XXH3_accumulate_512, XXH3_scrambleAcc);
4882
}
4883
4884
4885
XXH_FORCE_INLINE void
4886
XXH3_digest_long (XXH64_hash_t* acc,
4887
                  const XXH3_state_t* state,
4888
                  const unsigned char* secret)
4889
{
4890
    /*
4891
     * Digest on a local copy. This way, the state remains unaltered, and it can
4892
     * continue ingesting more input afterwards.
4893
     */
4894
    XXH_memcpy(acc, state->acc, sizeof(state->acc));
4895
    if (state->bufferedSize >= XXH_STRIPE_LEN) {
4896
        size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
4897
        size_t nbStripesSoFar = state->nbStripesSoFar;
4898
        XXH3_consumeStripes(acc,
4899
                           &nbStripesSoFar, state->nbStripesPerBlock,
4900
                            state->buffer, nbStripes,
4901
                            secret, state->secretLimit,
4902
                            XXH3_accumulate_512, XXH3_scrambleAcc);
4903
        /* last stripe */
4904
        XXH3_accumulate_512(acc,
4905
                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
4906
                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
4907
    } else {  /* bufferedSize < XXH_STRIPE_LEN */
4908
        xxh_u8 lastStripe[XXH_STRIPE_LEN];
4909
        size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
4910
        XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
4911
        XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
4912
        XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
4913
        XXH3_accumulate_512(acc,
4914
                            lastStripe,
4915
                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
4916
    }
4917
}
4918
4919
/*! @ingroup xxh3_family */
4920
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
4921
{
4922
    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
4923
    if (state->totalLen > XXH3_MIDSIZE_MAX) {
4924
        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
4925
        XXH3_digest_long(acc, state, secret);
4926
        return XXH3_mergeAccs(acc,
4927
                              secret + XXH_SECRET_MERGEACCS_START,
4928
                              (xxh_u64)state->totalLen * XXH_PRIME64_1);
4929
    }
4930
    /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
4931
    if (state->useSeed)
4932
        return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
4933
    return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
4934
                                  secret, state->secretLimit + XXH_STRIPE_LEN);
4935
}
4936
4937
4938
4939
/* ==========================================
4940
 * XXH3 128 bits (a.k.a XXH128)
4941
 * ==========================================
4942
 * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
4943
 * even without counting the significantly larger output size.
4944
 *
4945
 * For example, extra steps are taken to avoid the seed-dependent collisions
4946
 * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
4947
 *
4948
 * This strength naturally comes at the cost of some speed, especially on short
4949
 * lengths. Note that longer hashes are about as fast as the 64-bit version
4950
 * due to it using only a slight modification of the 64-bit loop.
4951
 *
4952
 * XXH128 is also more oriented towards 64-bit machines. It is still extremely
4953
 * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
4954
 */
4955
4956
XXH_FORCE_INLINE XXH128_hash_t
4957
XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4958
{
4959
    /* A doubled version of 1to3_64b with different constants. */
4960
    XXH_ASSERT(input != NULL);
4961
    XXH_ASSERT(1 <= len && len <= 3);
4962
    XXH_ASSERT(secret != NULL);
4963
    /*
4964
     * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
4965
     * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
4966
     * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
4967
     */
4968
    {   xxh_u8 const c1 = input[0];
4969
        xxh_u8 const c2 = input[len >> 1];
4970
        xxh_u8 const c3 = input[len - 1];
4971
        xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
4972
                                | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
4973
        xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
4974
        xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
4975
        xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
4976
        xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
4977
        xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
4978
        XXH128_hash_t h128;
4979
        h128.low64  = XXH64_avalanche(keyed_lo);
4980
        h128.high64 = XXH64_avalanche(keyed_hi);
4981
        return h128;
4982
    }
4983
}
4984
4985
XXH_FORCE_INLINE XXH128_hash_t
4986
XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4987
{
4988
    XXH_ASSERT(input != NULL);
4989
    XXH_ASSERT(secret != NULL);
4990
    XXH_ASSERT(4 <= len && len <= 8);
4991
    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
4992
    {   xxh_u32 const input_lo = XXH_readLE32(input);
4993
        xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
4994
        xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
4995
        xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
4996
        xxh_u64 const keyed = input_64 ^ bitflip;
4997
4998
        /* Shift len to the left to ensure it is even, this avoids even multiplies. */
4999
        XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
5000
5001
        m128.high64 += (m128.low64 << 1);
5002
        m128.low64  ^= (m128.high64 >> 3);
5003
5004
        m128.low64   = XXH_xorshift64(m128.low64, 35);
5005
        m128.low64  *= 0x9FB21C651E98DF25ULL;
5006
        m128.low64   = XXH_xorshift64(m128.low64, 28);
5007
        m128.high64  = XXH3_avalanche(m128.high64);
5008
        return m128;
5009
    }
5010
}
5011
5012
XXH_FORCE_INLINE XXH128_hash_t
5013
XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5014
{
5015
    XXH_ASSERT(input != NULL);
5016
    XXH_ASSERT(secret != NULL);
5017
    XXH_ASSERT(9 <= len && len <= 16);
5018
    {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
5019
        xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
5020
        xxh_u64 const input_lo = XXH_readLE64(input);
5021
        xxh_u64       input_hi = XXH_readLE64(input + len - 8);
5022
        XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
5023
        /*
5024
         * Put len in the middle of m128 to ensure that the length gets mixed to
5025
         * both the low and high bits in the 128x64 multiply below.
5026
         */
5027
        m128.low64 += (xxh_u64)(len - 1) << 54;
5028
        input_hi   ^= bitfliph;
5029
        /*
5030
         * Add the high 32 bits of input_hi to the high 32 bits of m128, then
5031
         * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
5032
         * the high 64 bits of m128.
5033
         *
5034
         * The best approach to this operation is different on 32-bit and 64-bit.
5035
         */
5036
        if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
5037
            /*
5038
             * 32-bit optimized version, which is more readable.
5039
             *
5040
             * On 32-bit, it removes an ADC and delays a dependency between the two
5041
             * halves of m128.high64, but it generates an extra mask on 64-bit.
5042
             */
5043
            m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
5044
        } else {
5045
            /*
5046
             * 64-bit optimized (albeit more confusing) version.
5047
             *
5048
             * Uses some properties of addition and multiplication to remove the mask:
5049
             *
5050
             * Let:
5051
             *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
5052
             *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
5053
             *    c = XXH_PRIME32_2
5054
             *
5055
             *    a + (b * c)
5056
             * Inverse Property: x + y - x == y
5057
             *    a + (b * (1 + c - 1))
5058
             * Distributive Property: x * (y + z) == (x * y) + (x * z)
5059
             *    a + (b * 1) + (b * (c - 1))
5060
             * Identity Property: x * 1 == x
5061
             *    a + b + (b * (c - 1))
5062
             *
5063
             * Substitute a, b, and c:
5064
             *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
5065
             *
5066
             * Since input_hi.hi + input_hi.lo == input_hi, we get this:
5067
             *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
5068
             */
5069
            m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
5070
        }
5071
        /* m128 ^= XXH_swap64(m128 >> 64); */
5072
        m128.low64  ^= XXH_swap64(m128.high64);
5073
5074
        {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
5075
            XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
5076
            h128.high64 += m128.high64 * XXH_PRIME64_2;
5077
5078
            h128.low64   = XXH3_avalanche(h128.low64);
5079
            h128.high64  = XXH3_avalanche(h128.high64);
5080
            return h128;
5081
    }   }
5082
}
5083
5084
/*
5085
 * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
5086
 */
5087
XXH_FORCE_INLINE XXH128_hash_t
5088
XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5089
{
5090
    XXH_ASSERT(len <= 16);
5091
    {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
5092
        if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
5093
        if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
5094
        {   XXH128_hash_t h128;
5095
            xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
5096
            xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
5097
            h128.low64 = XXH64_avalanche(seed ^ bitflipl);
5098
            h128.high64 = XXH64_avalanche( seed ^ bitfliph);
5099
            return h128;
5100
    }   }
5101
}
5102
5103
/*
5104
 * A bit slower than XXH3_mix16B, but handles multiply by zero better.
5105
 */
5106
XXH_FORCE_INLINE XXH128_hash_t
5107
XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
5108
              const xxh_u8* secret, XXH64_hash_t seed)
5109
{
5110
    acc.low64  += XXH3_mix16B (input_1, secret+0, seed);
5111
    acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
5112
    acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
5113
    acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
5114
    return acc;
5115
}
5116
5117
5118
XXH_FORCE_INLINE XXH128_hash_t
5119
XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5120
                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5121
                      XXH64_hash_t seed)
5122
{
5123
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
5124
    XXH_ASSERT(16 < len && len <= 128);
5125
5126
    {   XXH128_hash_t acc;
5127
        acc.low64 = len * XXH_PRIME64_1;
5128
        acc.high64 = 0;
5129
        if (len > 32) {
5130
            if (len > 64) {
5131
                if (len > 96) {
5132
                    acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
5133
                }
5134
                acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
5135
            }
5136
            acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
5137
        }
5138
        acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
5139
        {   XXH128_hash_t h128;
5140
            h128.low64  = acc.low64 + acc.high64;
5141
            h128.high64 = (acc.low64    * XXH_PRIME64_1)
5142
                        + (acc.high64   * XXH_PRIME64_4)
5143
                        + ((len - seed) * XXH_PRIME64_2);
5144
            h128.low64  = XXH3_avalanche(h128.low64);
5145
            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
5146
            return h128;
5147
        }
5148
    }
5149
}
5150
5151
XXH_NO_INLINE XXH128_hash_t
5152
XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5153
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5154
                       XXH64_hash_t seed)
5155
{
5156
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
5157
    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
5158
5159
    {   XXH128_hash_t acc;
5160
        int const nbRounds = (int)len / 32;
5161
        int i;
5162
        acc.low64 = len * XXH_PRIME64_1;
5163
        acc.high64 = 0;
5164
        for (i=0; i<4; i++) {
5165
            acc = XXH128_mix32B(acc,
5166
                                input  + (32 * i),
5167
                                input  + (32 * i) + 16,
5168
                                secret + (32 * i),
5169
                                seed);
5170
        }
5171
        acc.low64 = XXH3_avalanche(acc.low64);
5172
        acc.high64 = XXH3_avalanche(acc.high64);
5173
        XXH_ASSERT(nbRounds >= 4);
5174
        for (i=4 ; i < nbRounds; i++) {
5175
            acc = XXH128_mix32B(acc,
5176
                                input + (32 * i),
5177
                                input + (32 * i) + 16,
5178
                                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
5179
                                seed);
5180
        }
5181
        /* last bytes */
5182
        acc = XXH128_mix32B(acc,
5183
                            input + len - 16,
5184
                            input + len - 32,
5185
                            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
5186
                            0ULL - seed);
5187
5188
        {   XXH128_hash_t h128;
5189
            h128.low64  = acc.low64 + acc.high64;
5190
            h128.high64 = (acc.low64    * XXH_PRIME64_1)
5191
                        + (acc.high64   * XXH_PRIME64_4)
5192
                        + ((len - seed) * XXH_PRIME64_2);
5193
            h128.low64  = XXH3_avalanche(h128.low64);
5194
            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
5195
            return h128;
5196
        }
5197
    }
5198
}
5199
5200
XXH_FORCE_INLINE XXH128_hash_t
5201
XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
5202
                            const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5203
                            XXH3_f_accumulate_512 f_acc512,
5204
                            XXH3_f_scrambleAcc f_scramble)
5205
{
5206
    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
5207
5208
    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
5209
5210
    /* converge into final hash */
5211
    XXH_STATIC_ASSERT(sizeof(acc) == 64);
5212
    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
5213
    {   XXH128_hash_t h128;
5214
        h128.low64  = XXH3_mergeAccs(acc,
5215
                                     secret + XXH_SECRET_MERGEACCS_START,
5216
                                     (xxh_u64)len * XXH_PRIME64_1);
5217
        h128.high64 = XXH3_mergeAccs(acc,
5218
                                     secret + secretSize
5219
                                            - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
5220
                                     ~((xxh_u64)len * XXH_PRIME64_2));
5221
        return h128;
5222
    }
5223
}
5224
5225
/*
5226
 * It's important for performance that XXH3_hashLong is not inlined.
5227
 */
5228
XXH_NO_INLINE XXH128_hash_t
5229
XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
5230
                           XXH64_hash_t seed64,
5231
                           const void* XXH_RESTRICT secret, size_t secretLen)
5232
{
5233
    (void)seed64; (void)secret; (void)secretLen;
5234
    return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
5235
                                       XXH3_accumulate_512, XXH3_scrambleAcc);
5236
}
5237
5238
/*
5239
 * It's important for performance to pass @secretLen (when it's static)
5240
 * to the compiler, so that it can properly optimize the vectorized loop.
5241
 */
5242
XXH_FORCE_INLINE XXH128_hash_t
5243
XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
5244
                              XXH64_hash_t seed64,
5245
                              const void* XXH_RESTRICT secret, size_t secretLen)
5246
{
5247
    (void)seed64;
5248
    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
5249
                                       XXH3_accumulate_512, XXH3_scrambleAcc);
5250
}
5251
5252
XXH_FORCE_INLINE XXH128_hash_t
5253
XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
5254
                                XXH64_hash_t seed64,
5255
                                XXH3_f_accumulate_512 f_acc512,
5256
                                XXH3_f_scrambleAcc f_scramble,
5257
                                XXH3_f_initCustomSecret f_initSec)
5258
{
5259
    if (seed64 == 0)
5260
        return XXH3_hashLong_128b_internal(input, len,
5261
                                           XXH3_kSecret, sizeof(XXH3_kSecret),
5262
                                           f_acc512, f_scramble);
5263
    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
5264
        f_initSec(secret, seed64);
5265
        return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
5266
                                           f_acc512, f_scramble);
5267
    }
5268
}
5269
5270
/*
5271
 * It's important for performance that XXH3_hashLong is not inlined.
5272
 */
5273
XXH_NO_INLINE XXH128_hash_t
5274
XXH3_hashLong_128b_withSeed(const void* input, size_t len,
5275
                            XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
5276
{
5277
    (void)secret; (void)secretLen;
5278
    return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
5279
                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
5280
}
5281
5282
typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
5283
                                            XXH64_hash_t, const void* XXH_RESTRICT, size_t);
5284
5285
XXH_FORCE_INLINE XXH128_hash_t
5286
XXH3_128bits_internal(const void* input, size_t len,
5287
                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
5288
                      XXH3_hashLong128_f f_hl128)
5289
{
5290
    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
5291
    /*
5292
     * If an action is to be taken if `secret` conditions are not respected,
5293
     * it should be done here.
5294
     * For now, it's a contract pre-condition.
5295
     * Adding a check and a branch here would cost performance at every hash.
5296
     */
5297
    if (len <= 16)
5298
        return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
5299
    if (len <= 128)
5300
        return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5301
    if (len <= XXH3_MIDSIZE_MAX)
5302
        return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5303
    return f_hl128(input, len, seed64, secret, secretLen);
5304
}
5305
5306
5307
/* ===   Public XXH128 API   === */
5308
5309
/*! @ingroup xxh3_family */
5310
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
5311
{
5312
    return XXH3_128bits_internal(input, len, 0,
5313
                                 XXH3_kSecret, sizeof(XXH3_kSecret),
5314
                                 XXH3_hashLong_128b_default);
5315
}
5316
5317
/*! @ingroup xxh3_family */
5318
XXH_PUBLIC_API XXH128_hash_t
5319
XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
5320
{
5321
    return XXH3_128bits_internal(input, len, 0,
5322
                                 (const xxh_u8*)secret, secretSize,
5323
                                 XXH3_hashLong_128b_withSecret);
5324
}
5325
5326
/*! @ingroup xxh3_family */
5327
XXH_PUBLIC_API XXH128_hash_t
5328
XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
5329
{
5330
    return XXH3_128bits_internal(input, len, seed,
5331
                                 XXH3_kSecret, sizeof(XXH3_kSecret),
5332
                                 XXH3_hashLong_128b_withSeed);
5333
}
5334
5335
/*! @ingroup xxh3_family */
5336
XXH_PUBLIC_API XXH128_hash_t
5337
XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
5338
{
5339
    if (len <= XXH3_MIDSIZE_MAX)
5340
        return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
5341
    return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
5342
}
5343
5344
/*! @ingroup xxh3_family */
5345
XXH_PUBLIC_API XXH128_hash_t
5346
XXH128(const void* input, size_t len, XXH64_hash_t seed)
5347
{
5348
    return XXH3_128bits_withSeed(input, len, seed);
5349
}
5350
5351
5352
/* ===   XXH3 128-bit streaming   === */
5353
5354
/*
5355
 * All initialization and update functions are identical to 64-bit streaming variant.
5356
 * The only difference is the finalization routine.
5357
 */
5358
5359
/*! @ingroup xxh3_family */
5360
XXH_PUBLIC_API XXH_errorcode
5361
XXH3_128bits_reset(XXH3_state_t* statePtr)
5362
{
5363
    return XXH3_64bits_reset(statePtr);
5364
}
5365
5366
/*! @ingroup xxh3_family */
5367
XXH_PUBLIC_API XXH_errorcode
5368
XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
5369
{
5370
    return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
5371
}
5372
5373
/*! @ingroup xxh3_family */
5374
XXH_PUBLIC_API XXH_errorcode
5375
XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
5376
{
5377
    return XXH3_64bits_reset_withSeed(statePtr, seed);
5378
}
5379
5380
/*! @ingroup xxh3_family */
5381
XXH_PUBLIC_API XXH_errorcode
5382
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
5383
{
5384
    return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
5385
}
5386
5387
/*! @ingroup xxh3_family */
5388
XXH_PUBLIC_API XXH_errorcode
5389
XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
5390
{
5391
    return XXH3_update(state, (const xxh_u8*)input, len,
5392
                       XXH3_accumulate_512, XXH3_scrambleAcc);
5393
}
5394
5395
/*! @ingroup xxh3_family */
5396
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
5397
{
5398
    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5399
    if (state->totalLen > XXH3_MIDSIZE_MAX) {
5400
        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
5401
        XXH3_digest_long(acc, state, secret);
5402
        XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
5403
        {   XXH128_hash_t h128;
5404
            h128.low64  = XXH3_mergeAccs(acc,
5405
                                         secret + XXH_SECRET_MERGEACCS_START,
5406
                                         (xxh_u64)state->totalLen * XXH_PRIME64_1);
5407
            h128.high64 = XXH3_mergeAccs(acc,
5408
                                         secret + state->secretLimit + XXH_STRIPE_LEN
5409
                                                - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
5410
                                         ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
5411
            return h128;
5412
        }
5413
    }
5414
    /* len <= XXH3_MIDSIZE_MAX : short code */
5415
    if (state->seed)
5416
        return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
5417
    return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
5418
                                   secret, state->secretLimit + XXH_STRIPE_LEN);
5419
}
5420
5421
/* 128-bit utility functions */
5422
5423
#include <string.h>   /* memcmp, memcpy */
5424
5425
/* return : 1 is equal, 0 if different */
5426
/*! @ingroup xxh3_family */
5427
XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
5428
{
5429
    /* note : XXH128_hash_t is compact, it has no padding byte */
5430
    return !(memcmp(&h1, &h2, sizeof(h1)));
5431
}
5432
5433
/* This prototype is compatible with stdlib's qsort().
5434
 * return : >0 if *h128_1  > *h128_2
5435
 *          <0 if *h128_1  < *h128_2
5436
 *          =0 if *h128_1 == *h128_2  */
5437
/*! @ingroup xxh3_family */
5438
XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
5439
{
5440
    XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
5441
    XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
5442
    int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
5443
    /* note : bets that, in most cases, hash values are different */
5444
    if (hcmp) return hcmp;
5445
    return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
5446
}
5447
5448
5449
/*======   Canonical representation   ======*/
5450
/*! @ingroup xxh3_family */
5451
XXH_PUBLIC_API void
5452
XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
5453
{
5454
    XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
5455
    if (XXH_CPU_LITTLE_ENDIAN) {
5456
        hash.high64 = XXH_swap64(hash.high64);
5457
        hash.low64  = XXH_swap64(hash.low64);
5458
    }
5459
    XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
5460
    XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
5461
}
5462
5463
/*! @ingroup xxh3_family */
5464
XXH_PUBLIC_API XXH128_hash_t
5465
XXH128_hashFromCanonical(const XXH128_canonical_t* src)
5466
{
5467
    XXH128_hash_t h;
5468
    h.high64 = XXH_readBE64(src);
5469
    h.low64  = XXH_readBE64(src->digest + 8);
5470
    return h;
5471
}
5472
5473
5474
5475
/* ==========================================
5476
 * Secret generators
5477
 * ==========================================
5478
 */
5479
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
5480
5481
static void XXH3_combine16(void* dst, XXH128_hash_t h128)
5482
{
5483
    XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
5484
    XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
5485
}
5486
5487
/*! @ingroup xxh3_family */
5488
XXH_PUBLIC_API XXH_errorcode
5489
XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
5490
{
5491
    XXH_ASSERT(secretBuffer != NULL);
5492
    if (secretBuffer == NULL) return XXH_ERROR;
5493
    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5494
    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5495
    if (customSeedSize == 0) {
5496
        customSeed = XXH3_kSecret;
5497
        customSeedSize = XXH_SECRET_DEFAULT_SIZE;
5498
    }
5499
    XXH_ASSERT(customSeed != NULL);
5500
    if (customSeed == NULL) return XXH_ERROR;
5501
5502
    /* Fill secretBuffer with a copy of customSeed - repeat as needed */
5503
    {   size_t pos = 0;
5504
        while (pos < secretSize) {
5505
            size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
5506
            memcpy((char*)secretBuffer + pos, customSeed, toCopy);
5507
            pos += toCopy;
5508
    }   }
5509
5510
    {   size_t const nbSeg16 = secretSize / 16;
5511
        size_t n;
5512
        XXH128_canonical_t scrambler;
5513
        XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
5514
        for (n=0; n<nbSeg16; n++) {
5515
            XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
5516
            XXH3_combine16((char*)secretBuffer + n*16, h128);
5517
        }
5518
        /* last segment */
5519
        XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
5520
    }
5521
    return XXH_OK;
5522
}
5523
5524
/*! @ingroup xxh3_family */
5525
XXH_PUBLIC_API void
5526
XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
5527
{
5528
    XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
5529
    XXH3_initCustomSecret(secret, seed);
5530
    XXH_ASSERT(secretBuffer != NULL);
5531
    memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
5532
}
5533
5534
5535
5536
/* Pop our optimization override from above */
5537
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
5538
  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
5539
  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
5540
#  pragma GCC pop_options
5541
#endif
5542
5543
#endif  /* XXH_NO_LONG_LONG */
5544
5545
#endif  /* XXH_NO_XXH3 */
5546
5547
/*!
5548
 * @}
5549
 */
5550
#endif  /* XXH_IMPLEMENTATION */
5551
5552
5553
#if defined (__cplusplus)
5554
}
5555
#endif