/src/util-linux/include/xxhash.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * SPDX-License-Identifier: BSD-2-Clause  | 
3  |  |  *  | 
4  |  |  * xxHash - Extremely Fast Hash algorithm  | 
5  |  |  * Header File  | 
6  |  |  * Copyright (C) 2012-2020 Yann Collet  | 
7  |  |  *  | 
8  |  |  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)  | 
9  |  |  *  | 
10  |  |  * Redistribution and use in source and binary forms, with or without  | 
11  |  |  * modification, are permitted provided that the following conditions are  | 
12  |  |  * met:  | 
13  |  |  *  | 
14  |  |  *    * Redistributions of source code must retain the above copyright  | 
15  |  |  *      notice, this list of conditions and the following disclaimer.  | 
16  |  |  *    * Redistributions in binary form must reproduce the above  | 
17  |  |  *      copyright notice, this list of conditions and the following disclaimer  | 
18  |  |  *      in the documentation and/or other materials provided with the  | 
19  |  |  *      distribution.  | 
20  |  |  *  | 
21  |  |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS  | 
22  |  |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT  | 
23  |  |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  | 
24  |  |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT  | 
25  |  |  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,  | 
26  |  |  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  | 
27  |  |  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  | 
28  |  |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  | 
29  |  |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  | 
30  |  |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  | 
31  |  |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  | 
32  |  |  *  | 
33  |  |  * You can contact the author at:  | 
34  |  |  *   - xxHash homepage: https://www.xxhash.com  | 
35  |  |  *   - xxHash source repository: https://github.com/Cyan4973/xxHash  | 
36  |  |  */  | 
37  |  | /*!  | 
38  |  |  * @mainpage xxHash  | 
39  |  |  *  | 
40  |  |  * @file xxhash.h  | 
41  |  |  * xxHash prototypes and implementation  | 
42  |  |  */  | 
43  |  | /* TODO: update */  | 
44  |  | /* Notice extracted from xxHash homepage:  | 
45  |  |  | 
46  |  | xxHash is an extremely fast hash algorithm, running at RAM speed limits.  | 
47  |  | It also successfully passes all tests from the SMHasher suite.  | 
48  |  |  | 
49  |  | Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)  | 
50  |  |  | 
51  |  | Name            Speed       Q.Score   Author  | 
52  |  | xxHash          5.4 GB/s     10  | 
53  |  | CrapWow         3.2 GB/s      2       Andrew  | 
54  |  | MurmurHash 3a   2.7 GB/s     10       Austin Appleby  | 
55  |  | SpookyHash      2.0 GB/s     10       Bob Jenkins  | 
56  |  | SBox            1.4 GB/s      9       Bret Mulvey  | 
57  |  | Lookup3         1.2 GB/s      9       Bob Jenkins  | 
58  |  | SuperFastHash   1.2 GB/s      1       Paul Hsieh  | 
59  |  | CityHash64      1.05 GB/s    10       Pike & Alakuijala  | 
60  |  | FNV             0.55 GB/s     5       Fowler, Noll, Vo  | 
61  |  | CRC32           0.43 GB/s     9  | 
62  |  | MD5-32          0.33 GB/s    10       Ronald L. Rivest  | 
63  |  | SHA1-32         0.28 GB/s    10  | 
64  |  |  | 
65  |  | Q.Score is a measure of quality of the hash function.  | 
66  |  | It depends on successfully passing SMHasher test set.  | 
67  |  | 10 is a perfect score.  | 
68  |  |  | 
69  |  | Note: SMHasher's CRC32 implementation is not the fastest one.  | 
70  |  | Other speed-oriented implementations can be faster,  | 
71  |  | especially in combination with PCLMUL instruction:  | 
72  |  | https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735  | 
73  |  |  | 
74  |  | A 64-bit version, named XXH64, is available since r35.  | 
75  |  | It offers much better speed, but for 64-bit applications only.  | 
76  |  | Name     Speed on 64 bits    Speed on 32 bits  | 
77  |  | XXH64       13.8 GB/s            1.9 GB/s  | 
78  |  | XXH32        6.8 GB/s            6.0 GB/s  | 
79  |  | */  | 
80  |  |  | 
81  |  | /* util-linux customizations */  | 
82  |  | #define XXH_NO_XXH3  | 
83  |  | #define XXH_NAMESPACE ul_  | 
84  |  |  | 
85  |  | #if defined (__cplusplus)  | 
86  |  | extern "C" { | 
87  |  | #endif  | 
88  |  |  | 
89  |  | /* ****************************  | 
90  |  |  *  INLINE mode  | 
91  |  |  ******************************/  | 
92  |  | /*!  | 
93  |  |  * XXH_INLINE_ALL (and XXH_PRIVATE_API)  | 
94  |  |  * Use these build macros to inline xxhash into the target unit.  | 
95  |  |  * Inlining improves performance on small inputs, especially when the length is  | 
96  |  |  * expressed as a compile-time constant:  | 
97  |  |  *  | 
98  |  |  *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html  | 
99  |  |  *  | 
100  |  |  * It also keeps xxHash symbols private to the unit, so they are not exported.  | 
101  |  |  *  | 
102  |  |  * Usage:  | 
103  |  |  *     #define XXH_INLINE_ALL  | 
104  |  |  *     #include "xxhash.h"  | 
105  |  |  *  | 
106  |  |  * Do not compile and link xxhash.o as a separate object, as it is not useful.  | 
107  |  |  */  | 
108  |  | #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \  | 
109  |  |     && !defined(XXH_INLINE_ALL_31684351384)  | 
110  |  |    /* this section should be traversed only once */  | 
111  |  | #  define XXH_INLINE_ALL_31684351384  | 
112  |  |    /* give access to the advanced API, required to compile implementations */  | 
113  |  | #  undef XXH_STATIC_LINKING_ONLY   /* avoid macro redef */  | 
114  |  | #  define XXH_STATIC_LINKING_ONLY  | 
115  |  |    /* make all functions private */  | 
116  |  | #  undef XXH_PUBLIC_API  | 
117  |  | #  if defined(__GNUC__)  | 
118  |  | #    define XXH_PUBLIC_API static __inline __attribute__((unused))  | 
119  |  | #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)  | 
120  |  | #    define XXH_PUBLIC_API static inline  | 
121  |  | #  elif defined(_MSC_VER)  | 
122  |  | #    define XXH_PUBLIC_API static __inline  | 
123  |  | #  else  | 
124  |  |      /* note: this version may generate warnings for unused static functions */  | 
125  |  | #    define XXH_PUBLIC_API static  | 
126  |  | #  endif  | 
127  |  |  | 
128  |  |    /*  | 
129  |  |     * This part deals with the special case where a unit wants to inline xxHash,  | 
130  |  |     * but "xxhash.h" has previously been included without XXH_INLINE_ALL,  | 
131  |  |     * such as part of some previously included *.h header file.  | 
132  |  |     * Without further action, the new include would just be ignored,  | 
133  |  |     * and functions would effectively _not_ be inlined (silent failure).  | 
134  |  |     * The following macros solve this situation by prefixing all inlined names,  | 
135  |  |     * avoiding naming collision with previous inclusions.  | 
136  |  |     */  | 
137  |  |    /* Before that, we unconditionally #undef all symbols,  | 
138  |  |     * in case they were already defined with XXH_NAMESPACE.  | 
139  |  |     * They will then be redefined for XXH_INLINE_ALL  | 
140  |  |     */  | 
141  |  | #  undef XXH_versionNumber  | 
142  |  |     /* XXH32 */  | 
143  |  | #  undef XXH32  | 
144  |  | #  undef XXH32_createState  | 
145  |  | #  undef XXH32_freeState  | 
146  |  | #  undef XXH32_reset  | 
147  |  | #  undef XXH32_update  | 
148  |  | #  undef XXH32_digest  | 
149  |  | #  undef XXH32_copyState  | 
150  |  | #  undef XXH32_canonicalFromHash  | 
151  |  | #  undef XXH32_hashFromCanonical  | 
152  |  |     /* XXH64 */  | 
153  |  | #  undef XXH64  | 
154  |  | #  undef XXH64_createState  | 
155  |  | #  undef XXH64_freeState  | 
156  |  | #  undef XXH64_reset  | 
157  |  | #  undef XXH64_update  | 
158  |  | #  undef XXH64_digest  | 
159  |  | #  undef XXH64_copyState  | 
160  |  | #  undef XXH64_canonicalFromHash  | 
161  |  | #  undef XXH64_hashFromCanonical  | 
162  |  |     /* XXH3_64bits */  | 
163  |  | #  undef XXH3_64bits  | 
164  |  | #  undef XXH3_64bits_withSecret  | 
165  |  | #  undef XXH3_64bits_withSeed  | 
166  |  | #  undef XXH3_64bits_withSecretandSeed  | 
167  |  | #  undef XXH3_createState  | 
168  |  | #  undef XXH3_freeState  | 
169  |  | #  undef XXH3_copyState  | 
170  |  | #  undef XXH3_64bits_reset  | 
171  |  | #  undef XXH3_64bits_reset_withSeed  | 
172  |  | #  undef XXH3_64bits_reset_withSecret  | 
173  |  | #  undef XXH3_64bits_update  | 
174  |  | #  undef XXH3_64bits_digest  | 
175  |  | #  undef XXH3_generateSecret  | 
176  |  |     /* XXH3_128bits */  | 
177  |  | #  undef XXH128  | 
178  |  | #  undef XXH3_128bits  | 
179  |  | #  undef XXH3_128bits_withSeed  | 
180  |  | #  undef XXH3_128bits_withSecret  | 
181  |  | #  undef XXH3_128bits_reset  | 
182  |  | #  undef XXH3_128bits_reset_withSeed  | 
183  |  | #  undef XXH3_128bits_reset_withSecret  | 
184  |  | #  undef XXH3_128bits_reset_withSecretandSeed  | 
185  |  | #  undef XXH3_128bits_update  | 
186  |  | #  undef XXH3_128bits_digest  | 
187  |  | #  undef XXH128_isEqual  | 
188  |  | #  undef XXH128_cmp  | 
189  |  | #  undef XXH128_canonicalFromHash  | 
190  |  | #  undef XXH128_hashFromCanonical  | 
191  |  |     /* Finally, free the namespace itself */  | 
192  |  | #  undef XXH_NAMESPACE  | 
193  |  |  | 
194  |  |     /* employ the namespace for XXH_INLINE_ALL */  | 
195  |  | #  define XXH_NAMESPACE XXH_INLINE_  | 
196  |  |    /*  | 
197  |  |     * Some identifiers (enums, type names) are not symbols,  | 
198  |  |     * but they must nonetheless be renamed to avoid redeclaration.  | 
199  |  |     * Alternative solution: do not redeclare them.  | 
200  |  |     * However, this requires some #ifdefs, and has a more dispersed impact.  | 
201  |  |     * Meanwhile, renaming can be achieved in a single place.  | 
202  |  |     */  | 
203  |  | #  define XXH_IPREF(Id)   XXH_NAMESPACE ## Id  | 
204  |  | #  define XXH_OK XXH_IPREF(XXH_OK)  | 
205  |  | #  define XXH_ERROR XXH_IPREF(XXH_ERROR)  | 
206  |  | #  define XXH_errorcode XXH_IPREF(XXH_errorcode)  | 
207  |  | #  define XXH32_canonical_t  XXH_IPREF(XXH32_canonical_t)  | 
208  |  | #  define XXH64_canonical_t  XXH_IPREF(XXH64_canonical_t)  | 
209  |  | #  define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)  | 
210  |  | #  define XXH32_state_s XXH_IPREF(XXH32_state_s)  | 
211  |  | #  define XXH32_state_t XXH_IPREF(XXH32_state_t)  | 
212  |  | #  define XXH64_state_s XXH_IPREF(XXH64_state_s)  | 
213  |  | #  define XXH64_state_t XXH_IPREF(XXH64_state_t)  | 
214  |  | #  define XXH3_state_s  XXH_IPREF(XXH3_state_s)  | 
215  |  | #  define XXH3_state_t  XXH_IPREF(XXH3_state_t)  | 
216  |  | #  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)  | 
217  |  |    /* Ensure the header is parsed again, even if it was previously included */  | 
218  |  | #  undef XXHASH_H_5627135585666179  | 
219  |  | #  undef XXHASH_H_STATIC_13879238742  | 
220  |  | #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */  | 
221  |  |  | 
222  |  |  | 
223  |  |  | 
224  |  | /* ****************************************************************  | 
225  |  |  *  Stable API  | 
226  |  |  *****************************************************************/  | 
227  |  | #ifndef XXHASH_H_5627135585666179  | 
228  |  | #define XXHASH_H_5627135585666179 1  | 
229  |  |  | 
230  |  |  | 
231  |  | /*!  | 
232  |  |  * @defgroup public Public API  | 
233  |  |  * Contains details on the public xxHash functions.  | 
234  |  |  * @{ | 
235  |  |  */  | 
236  |  | /* specific declaration modes for Windows */  | 
237  |  | #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)  | 
238  |  | #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))  | 
239  |  | #    ifdef XXH_EXPORT  | 
240  |  | #      define XXH_PUBLIC_API __declspec(dllexport)  | 
241  |  | #    elif XXH_IMPORT  | 
242  |  | #      define XXH_PUBLIC_API __declspec(dllimport)  | 
243  |  | #    endif  | 
244  |  | #  else  | 
245  |  | #    define XXH_PUBLIC_API   /* do nothing */  | 
246  |  | #  endif  | 
247  |  | #endif  | 
248  |  |  | 
249  |  | #ifdef XXH_DOXYGEN  | 
250  |  | /*!  | 
251  |  |  * @brief Emulate a namespace by transparently prefixing all symbols.  | 
252  |  |  *  | 
253  |  |  * If you want to include _and expose_ xxHash functions from within your own  | 
254  |  |  * library, but also want to avoid symbol collisions with other libraries which  | 
255  |  |  * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix  | 
256  |  |  * any public symbol from xxhash library with the value of XXH_NAMESPACE  | 
257  |  |  * (therefore, avoid empty or numeric values).  | 
258  |  |  *  | 
259  |  |  * Note that no change is required within the calling program as long as it  | 
260  |  |  * includes `xxhash.h`: Regular symbol names will be automatically translated  | 
261  |  |  * by this header.  | 
262  |  |  */  | 
263  |  | #  define XXH_NAMESPACE /* YOUR NAME HERE */  | 
264  |  | #  undef XXH_NAMESPACE  | 
265  |  | #endif  | 
266  |  |  | 
267  |  | #ifdef XXH_NAMESPACE  | 
268  | 25  | #  define XXH_CAT(A,B) A##B  | 
269  | 25  | #  define XXH_NAME2(A,B) XXH_CAT(A,B)  | 
270  |  | #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)  | 
271  |  | /* XXH32 */  | 
272  |  | #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)  | 
273  |  | #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)  | 
274  |  | #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)  | 
275  |  | #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)  | 
276  |  | #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)  | 
277  |  | #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)  | 
278  |  | #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)  | 
279  |  | #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)  | 
280  |  | #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)  | 
281  |  | /* XXH64 */  | 
282  | 25  | #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)  | 
283  |  | #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)  | 
284  |  | #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)  | 
285  |  | #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)  | 
286  |  | #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)  | 
287  |  | #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)  | 
288  |  | #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)  | 
289  |  | #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)  | 
290  |  | #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)  | 
291  |  | /* XXH3_64bits */  | 
292  |  | #  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)  | 
293  |  | #  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)  | 
294  |  | #  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)  | 
295  |  | #  define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)  | 
296  |  | #  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)  | 
297  |  | #  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)  | 
298  |  | #  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)  | 
299  |  | #  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)  | 
300  |  | #  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)  | 
301  |  | #  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)  | 
302  |  | #  define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)  | 
303  |  | #  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)  | 
304  |  | #  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)  | 
305  |  | #  define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)  | 
306  |  | #  define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)  | 
307  |  | /* XXH3_128bits */  | 
308  |  | #  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)  | 
309  |  | #  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)  | 
310  |  | #  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)  | 
311  |  | #  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)  | 
312  |  | #  define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)  | 
313  |  | #  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)  | 
314  |  | #  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)  | 
315  |  | #  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)  | 
316  |  | #  define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)  | 
317  |  | #  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)  | 
318  |  | #  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)  | 
319  |  | #  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)  | 
320  |  | #  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)  | 
321  |  | #  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)  | 
322  |  | #  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)  | 
323  |  | #endif  | 
324  |  |  | 
325  |  |  | 
326  |  | /* *************************************  | 
327  |  | *  Version  | 
328  |  | ***************************************/  | 
329  | 0  | #define XXH_VERSION_MAJOR    0  | 
330  | 0  | #define XXH_VERSION_MINOR    8  | 
331  | 0  | #define XXH_VERSION_RELEASE  1  | 
332  | 0  | #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)  | 
333  |  |  | 
334  |  | /*!  | 
335  |  |  * @brief Obtains the xxHash version.  | 
336  |  |  *  | 
337  |  |  * This is mostly useful when xxHash is compiled as a shared library,  | 
338  |  |  * since the returned value comes from the library, as opposed to header file.  | 
339  |  |  *  | 
340  |  |  * @return `XXH_VERSION_NUMBER` of the invoked library.  | 
341  |  |  */  | 
342  |  | XXH_PUBLIC_API unsigned XXH_versionNumber (void);  | 
343  |  |  | 
344  |  |  | 
345  |  | /* ****************************  | 
346  |  | *  Common basic types  | 
347  |  | ******************************/  | 
348  |  | #include <stddef.h>   /* size_t */  | 
349  |  | typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; | 
350  |  |  | 
351  |  |  | 
352  |  | /*-**********************************************************************  | 
353  |  | *  32-bit hash  | 
354  |  | ************************************************************************/  | 
355  |  | #if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */  | 
356  |  | /*!  | 
357  |  |  * @brief An unsigned 32-bit integer.  | 
358  |  |  *  | 
359  |  |  * Not necessarily defined to `uint32_t` but functionally equivalent.  | 
360  |  |  */  | 
361  |  | typedef uint32_t XXH32_hash_t;  | 
362  |  |  | 
363  |  | #elif !defined (__VMS) \  | 
364  |  |   && (defined (__cplusplus) \  | 
365  |  |   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )  | 
366  |  | #   include <stdint.h>  | 
367  |  |     typedef uint32_t XXH32_hash_t;  | 
368  |  |  | 
369  |  | #else  | 
370  |  | #   include <limits.h>  | 
371  |  | #   if UINT_MAX == 0xFFFFFFFFUL  | 
372  |  |       typedef unsigned int XXH32_hash_t;  | 
373  |  | #   else  | 
374  |  | #     if ULONG_MAX == 0xFFFFFFFFUL  | 
375  |  |         typedef unsigned long XXH32_hash_t;  | 
376  |  | #     else  | 
377  |  | #       error "unsupported platform: need a 32-bit type"  | 
378  |  | #     endif  | 
379  |  | #   endif  | 
380  |  | #endif  | 
381  |  |  | 
382  |  | /*!  | 
383  |  |  * @}  | 
384  |  |  *  | 
385  |  |  * @defgroup xxh32_family XXH32 family  | 
386  |  |  * @ingroup public  | 
387  |  |  * Contains functions used in the classic 32-bit xxHash algorithm.  | 
388  |  |  *  | 
389  |  |  * @note  | 
390  |  |  *   XXH32 is useful for older platforms, with no or poor 64-bit performance.  | 
391  |  |  *   Note that @ref xxh3_family provides competitive speed  | 
392  |  |  *   for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.  | 
393  |  |  *  | 
394  |  |  * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families  | 
395  |  |  * @see @ref xxh32_impl for implementation details  | 
396  |  |  * @{ | 
397  |  |  */  | 
398  |  |  | 
399  |  | /*!  | 
400  |  |  * @brief Calculates the 32-bit hash of @p input using xxHash32.  | 
401  |  |  *  | 
402  |  |  * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s  | 
403  |  |  *  | 
404  |  |  * @param input The block of data to be hashed, at least @p length bytes in size.  | 
405  |  |  * @param length The length of @p input, in bytes.  | 
406  |  |  * @param seed The 32-bit seed to alter the hash's output predictably.  | 
407  |  |  *  | 
408  |  |  * @pre  | 
409  |  |  *   The memory between @p input and @p input + @p length must be valid,  | 
410  |  |  *   readable, contiguous memory. However, if @p length is `0`, @p input may be  | 
411  |  |  *   `NULL`. In C++, this also must be *TriviallyCopyable*.  | 
412  |  |  *  | 
413  |  |  * @return The calculated 32-bit hash value.  | 
414  |  |  *  | 
415  |  |  * @see  | 
416  |  |  *    XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():  | 
417  |  |  *    Direct equivalents for the other variants of xxHash.  | 
418  |  |  * @see  | 
419  |  |  *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.  | 
420  |  |  */  | 
421  |  | XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);  | 
422  |  |  | 
423  |  | /*!  | 
424  |  |  * Streaming functions generate the xxHash value from an incremental input.  | 
425  |  |  * This method is slower than single-call functions, due to state management.  | 
426  |  |  * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.  | 
427  |  |  *  | 
428  |  |  * An XXH state must first be allocated using `XXH*_createState()`.  | 
429  |  |  *  | 
430  |  |  * Start a new hash by initializing the state with a seed using `XXH*_reset()`.  | 
431  |  |  *  | 
432  |  |  * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.  | 
433  |  |  *  | 
434  |  |  * The function returns an error code, with 0 meaning OK, and any other value  | 
435  |  |  * meaning there is an error.  | 
436  |  |  *  | 
437  |  |  * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.  | 
438  |  |  * This function returns the nn-bits hash as an int or long long.  | 
439  |  |  *  | 
440  |  |  * It's still possible to continue inserting input into the hash state after a  | 
441  |  |  * digest, and generate new hash values later on by invoking `XXH*_digest()`.  | 
442  |  |  *  | 
443  |  |  * When done, release the state using `XXH*_freeState()`.  | 
444  |  |  *  | 
445  |  |  * Example code for incrementally hashing a file:  | 
446  |  |  * @code{.c} | 
447  |  |  *    #include <stdio.h>  | 
448  |  |  *    #include <xxhash.h>  | 
449  |  |  *    #define BUFFER_SIZE 256  | 
450  |  |  *  | 
451  |  |  *    // Note: XXH64 and XXH3 use the same interface.  | 
452  |  |  *    XXH32_hash_t  | 
453  |  |  *    hashFile(FILE* stream)  | 
454  |  |  *    { | 
455  |  |  *        XXH32_state_t* state;  | 
456  |  |  *        unsigned char buf[BUFFER_SIZE];  | 
457  |  |  *        size_t amt;  | 
458  |  |  *        XXH32_hash_t hash;  | 
459  |  |  *  | 
460  |  |  *        state = XXH32_createState();       // Create a state  | 
461  |  |  *        assert(state != NULL);             // Error check here  | 
462  |  |  *        XXH32_reset(state, 0xbaad5eed);    // Reset state with our seed  | 
463  |  |  *        while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) { | 
464  |  |  *            XXH32_update(state, buf, amt); // Hash the file in chunks  | 
465  |  |  *        }  | 
466  |  |  *        hash = XXH32_digest(state);        // Finalize the hash  | 
467  |  |  *        XXH32_freeState(state);            // Clean up  | 
468  |  |  *        return hash;  | 
469  |  |  *    }  | 
470  |  |  * @endcode  | 
471  |  |  */  | 
472  |  |  | 
473  |  | /*!  | 
474  |  |  * @typedef struct XXH32_state_s XXH32_state_t  | 
475  |  |  * @brief The opaque state struct for the XXH32 streaming API.  | 
476  |  |  *  | 
477  |  |  * @see XXH32_state_s for details.  | 
478  |  |  */  | 
479  |  | typedef struct XXH32_state_s XXH32_state_t;  | 
480  |  |  | 
481  |  | /*!  | 
482  |  |  * @brief Allocates an @ref XXH32_state_t.  | 
483  |  |  *  | 
484  |  |  * Must be freed with XXH32_freeState().  | 
485  |  |  * @return An allocated XXH32_state_t on success, `NULL` on failure.  | 
486  |  |  */  | 
487  |  | XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);  | 
488  |  | /*!  | 
489  |  |  * @brief Frees an @ref XXH32_state_t.  | 
490  |  |  *  | 
491  |  |  * Must be allocated with XXH32_createState().  | 
492  |  |  * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().  | 
493  |  |  * @return XXH_OK.  | 
494  |  |  */  | 
495  |  | XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);  | 
496  |  | /*!  | 
497  |  |  * @brief Copies one @ref XXH32_state_t to another.  | 
498  |  |  *  | 
499  |  |  * @param dst_state The state to copy to.  | 
500  |  |  * @param src_state The state to copy from.  | 
501  |  |  * @pre  | 
502  |  |  *   @p dst_state and @p src_state must not be `NULL` and must not overlap.  | 
503  |  |  */  | 
504  |  | XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);  | 
505  |  |  | 
506  |  | /*!  | 
507  |  |  * @brief Resets an @ref XXH32_state_t to begin a new hash.  | 
508  |  |  *  | 
509  |  |  * This function resets and seeds a state. Call it before @ref XXH32_update().  | 
510  |  |  *  | 
511  |  |  * @param statePtr The state struct to reset.  | 
512  |  |  * @param seed The 32-bit seed to alter the hash result predictably.  | 
513  |  |  *  | 
514  |  |  * @pre  | 
515  |  |  *   @p statePtr must not be `NULL`.  | 
516  |  |  *  | 
517  |  |  * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.  | 
518  |  |  */  | 
519  |  | XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);  | 
520  |  |  | 
521  |  | /*!  | 
522  |  |  * @brief Consumes a block of @p input to an @ref XXH32_state_t.  | 
523  |  |  *  | 
524  |  |  * Call this to incrementally consume blocks of data.  | 
525  |  |  *  | 
526  |  |  * @param statePtr The state struct to update.  | 
527  |  |  * @param input The block of data to be hashed, at least @p length bytes in size.  | 
528  |  |  * @param length The length of @p input, in bytes.  | 
529  |  |  *  | 
530  |  |  * @pre  | 
531  |  |  *   @p statePtr must not be `NULL`.  | 
532  |  |  * @pre  | 
533  |  |  *   The memory between @p input and @p input + @p length must be valid,  | 
534  |  |  *   readable, contiguous memory. However, if @p length is `0`, @p input may be  | 
535  |  |  *   `NULL`. In C++, this also must be *TriviallyCopyable*.  | 
536  |  |  *  | 
537  |  |  * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.  | 
538  |  |  */  | 
539  |  | XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);  | 
540  |  |  | 
541  |  | /*!  | 
542  |  |  * @brief Returns the calculated hash value from an @ref XXH32_state_t.  | 
543  |  |  *  | 
544  |  |  * @note  | 
545  |  |  *   Calling XXH32_digest() will not affect @p statePtr, so you can update,  | 
546  |  |  *   digest, and update again.  | 
547  |  |  *  | 
548  |  |  * @param statePtr The state struct to calculate the hash from.  | 
549  |  |  *  | 
550  |  |  * @pre  | 
551  |  |  *  @p statePtr must not be `NULL`.  | 
552  |  |  *  | 
553  |  |  * @return The calculated xxHash32 value from that state.  | 
554  |  |  */  | 
555  |  | XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);  | 
556  |  |  | 
557  |  | /*******   Canonical representation   *******/  | 
558  |  |  | 
559  |  | /*  | 
560  |  |  * The default return values from XXH functions are unsigned 32 and 64 bit  | 
561  |  |  * integers.  | 
562  |  |  * This the simplest and fastest format for further post-processing.  | 
563  |  |  *  | 
564  |  |  * However, this leaves open the question of what is the order on the byte level,  | 
565  |  |  * since little and big endian conventions will store the same number differently.  | 
566  |  |  *  | 
567  |  |  * The canonical representation settles this issue by mandating big-endian  | 
568  |  |  * convention, the same convention as human-readable numbers (large digits first).  | 
569  |  |  *  | 
570  |  |  * When writing hash values to storage, sending them over a network, or printing  | 
571  |  |  * them, it's highly recommended to use the canonical representation to ensure  | 
572  |  |  * portability across a wider range of systems, present and future.  | 
573  |  |  *  | 
574  |  |  * The following functions allow transformation of hash values to and from  | 
575  |  |  * canonical format.  | 
576  |  |  */  | 
577  |  |  | 
578  |  | /*!  | 
579  |  |  * @brief Canonical (big endian) representation of @ref XXH32_hash_t.  | 
580  |  |  */  | 
581  |  | typedef struct { | 
582  |  |     unsigned char digest[4]; /*!< Hash bytes, big endian */  | 
583  |  | } XXH32_canonical_t;  | 
584  |  |  | 
585  |  | /*!  | 
586  |  |  * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.  | 
587  |  |  *  | 
588  |  |  * @param dst The @ref XXH32_canonical_t pointer to be stored to.  | 
589  |  |  * @param hash The @ref XXH32_hash_t to be converted.  | 
590  |  |  *  | 
591  |  |  * @pre  | 
592  |  |  *   @p dst must not be `NULL`.  | 
593  |  |  */  | 
594  |  | XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);  | 
595  |  |  | 
596  |  | /*!  | 
597  |  |  * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.  | 
598  |  |  *  | 
599  |  |  * @param src The @ref XXH32_canonical_t to convert.  | 
600  |  |  *  | 
601  |  |  * @pre  | 
602  |  |  *   @p src must not be `NULL`.  | 
603  |  |  *  | 
604  |  |  * @return The converted hash.  | 
605  |  |  */  | 
606  |  | XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);  | 
607  |  |  | 
608  |  | /* Reuse FALLTHROUGH macro from c.h */  | 
609  |  | #include "c.h"  | 
610  |  |  | 
611  | 0  | #define XXH_FALLTHROUGH FALLTHROUGH  | 
612  |  |  | 
613  |  | /*!  | 
614  |  |  * @}  | 
615  |  |  * @ingroup public  | 
616  |  |  * @{ | 
617  |  |  */  | 
618  |  |  | 
619  |  | #ifndef XXH_NO_LONG_LONG  | 
620  |  | /*-**********************************************************************  | 
621  |  | *  64-bit hash  | 
622  |  | ************************************************************************/  | 
623  |  | #if defined(XXH_DOXYGEN) /* don't include <stdint.h> */  | 
624  |  | /*!  | 
625  |  |  * @brief An unsigned 64-bit integer.  | 
626  |  |  *  | 
627  |  |  * Not necessarily defined to `uint64_t` but functionally equivalent.  | 
628  |  |  */  | 
629  |  | typedef uint64_t XXH64_hash_t;  | 
630  |  | #elif !defined (__VMS) \  | 
631  |  |   && (defined (__cplusplus) \  | 
632  |  |   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )  | 
633  |  | #  include <stdint.h>  | 
634  |  |    typedef uint64_t XXH64_hash_t;  | 
635  |  | #else  | 
636  |  | #  include <limits.h>  | 
637  |  | #  if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL  | 
638  |  |      /* LP64 ABI says uint64_t is unsigned long */  | 
639  |  |      typedef unsigned long XXH64_hash_t;  | 
640  |  | #  else  | 
641  |  |      /* the following type must have a width of 64-bit */  | 
642  |  |      typedef unsigned long long XXH64_hash_t;  | 
643  |  | #  endif  | 
644  |  | #endif  | 
645  |  |  | 
646  |  | /*!  | 
647  |  |  * @}  | 
648  |  |  *  | 
649  |  |  * @defgroup xxh64_family XXH64 family  | 
650  |  |  * @ingroup public  | 
651  |  |  * @{ | 
652  |  |  * Contains functions used in the classic 64-bit xxHash algorithm.  | 
653  |  |  *  | 
654  |  |  * @note  | 
655  |  |  *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,  | 
656  |  |  *   and offers true 64/128 bit hash results.  | 
657  |  |  *   It provides better speed for systems with vector processing capabilities.  | 
658  |  |  */  | 
659  |  |  | 
660  |  |  | 
661  |  | /*!  | 
662  |  |  * @brief Calculates the 64-bit hash of @p input using xxHash64.  | 
663  |  |  *  | 
664  |  |  * This function usually runs faster on 64-bit systems, but slower on 32-bit  | 
665  |  |  * systems (see benchmark).  | 
666  |  |  *  | 
667  |  |  * @param input The block of data to be hashed, at least @p length bytes in size.  | 
668  |  |  * @param length The length of @p input, in bytes.  | 
669  |  |  * @param seed The 64-bit seed to alter the hash's output predictably.  | 
670  |  |  *  | 
671  |  |  * @pre  | 
672  |  |  *   The memory between @p input and @p input + @p length must be valid,  | 
673  |  |  *   readable, contiguous memory. However, if @p length is `0`, @p input may be  | 
674  |  |  *   `NULL`. In C++, this also must be *TriviallyCopyable*.  | 
675  |  |  *  | 
676  |  |  * @return The calculated 64-bit hash.  | 
677  |  |  *  | 
678  |  |  * @see  | 
679  |  |  *    XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():  | 
680  |  |  *    Direct equivalents for the other variants of xxHash.  | 
681  |  |  * @see  | 
682  |  |  *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.  | 
683  |  |  */  | 
684  |  | XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);  | 
685  |  |  | 
686  |  | /*******   Streaming   *******/  | 
687  |  | /*!  | 
688  |  |  * @brief The opaque state struct for the XXH64 streaming API.  | 
689  |  |  *  | 
690  |  |  * @see XXH64_state_s for details.  | 
691  |  |  */  | 
692  |  | typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */  | 
693  |  | XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);  | 
694  |  | XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);  | 
695  |  | XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);  | 
696  |  |  | 
697  |  | XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);  | 
698  |  | XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);  | 
699  |  | XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);  | 
700  |  |  | 
701  |  | /*******   Canonical representation   *******/  | 
702  |  | typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t; | 
703  |  | XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);  | 
704  |  | XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);  | 
705  |  |  | 
706  |  | /*!  | 
707  |  |  * @}  | 
708  |  |  * ************************************************************************  | 
709  |  |  * @defgroup xxh3_family XXH3 family  | 
710  |  |  * @ingroup public  | 
711  |  |  * @{ | 
712  |  |  *  | 
713  |  |  * XXH3 is a more recent hash algorithm featuring:  | 
714  |  |  *  - Improved speed for both small and large inputs  | 
715  |  |  *  - True 64-bit and 128-bit outputs  | 
716  |  |  *  - SIMD acceleration  | 
717  |  |  *  - Improved 32-bit viability  | 
718  |  |  *  | 
719  |  |  * Speed analysis methodology is explained here:  | 
720  |  |  *  | 
721  |  |  *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html  | 
722  |  |  *  | 
723  |  |  * Compared to XXH64, expect XXH3 to run approximately  | 
724  |  |  * ~2x faster on large inputs and >3x faster on small ones,  | 
725  |  |  * exact differences vary depending on platform.  | 
726  |  |  *  | 
727  |  |  * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,  | 
728  |  |  * but does not require it.  | 
729  |  |  * Any 32-bit and 64-bit targets that can run XXH32 smoothly  | 
730  |  |  * can run XXH3 at competitive speeds, even without vector support.  | 
731  |  |  * Further details are explained in the implementation.  | 
732  |  |  *  | 
733  |  |  * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,  | 
734  |  |  * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.  | 
735  |  |  *  | 
736  |  |  * XXH3 implementation is portable:  | 
737  |  |  * it has a generic C90 formulation that can be compiled on any platform,  | 
738  |  |  * all implementations generate exactly the same hash value on all platforms.  | 
739  |  |  * Starting from v0.8.0, it's also labelled "stable", meaning that  | 
740  |  |  * any future version will also generate the same hash value.  | 
741  |  |  *  | 
742  |  |  * XXH3 offers 2 variants, _64bits and _128bits.  | 
743  |  |  *  | 
744  |  |  * When only 64 bits are needed, prefer invoking the _64bits variant, as it  | 
745  |  |  * reduces the amount of mixing, resulting in faster speed on small inputs.  | 
746  |  |  * It's also generally simpler to manipulate a scalar return type than a struct.  | 
747  |  |  *  | 
748  |  |  * The API supports one-shot hashing, streaming mode, and custom secrets.  | 
749  |  |  */  | 
750  |  |  | 
751  |  | /*-**********************************************************************  | 
752  |  | *  XXH3 64-bit variant  | 
753  |  | ************************************************************************/  | 
754  |  |  | 
755  |  | /* XXH3_64bits():  | 
756  |  |  * default 64-bit variant, using default secret and default seed of 0.  | 
757  |  |  * It's the fastest variant. */  | 
758  |  | XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);  | 
759  |  |  | 
760  |  | /*  | 
761  |  |  * XXH3_64bits_withSeed():  | 
762  |  |  * This variant generates a custom secret on the fly  | 
763  |  |  * based on default secret altered using the `seed` value.  | 
764  |  |  * While this operation is decently fast, note that it's not completely free.  | 
765  |  |  * Note: seed==0 produces the same results as XXH3_64bits().  | 
766  |  |  */  | 
767  |  | XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);  | 
768  |  |  | 
769  |  | /*!  | 
770  |  |  * The bare minimum size for a custom secret.  | 
771  |  |  *  | 
772  |  |  * @see  | 
773  |  |  *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),  | 
774  |  |  *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().  | 
775  |  |  */  | 
776  |  | #define XXH3_SECRET_SIZE_MIN 136  | 
777  |  |  | 
778  |  | /*  | 
779  |  |  * XXH3_64bits_withSecret():  | 
780  |  |  * It's possible to provide any blob of bytes as a "secret" to generate the hash.  | 
781  |  |  * This makes it more difficult for an external actor to prepare an intentional collision.  | 
782  |  |  * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).  | 
783  |  |  * However, the quality of the secret impacts the dispersion of the hash algorithm.  | 
784  |  |  * Therefore, the secret _must_ look like a bunch of random bytes.  | 
785  |  |  * Avoid "trivial" or structured data such as repeated sequences or a text document.  | 
786  |  |  * Whenever in doubt about the "randomness" of the blob of bytes,  | 
787  |  |  * consider employing "XXH3_generateSecret()" instead (see below).  | 
788  |  |  * It will generate a proper high entropy secret derived from the blob of bytes.  | 
789  |  |  * Another advantage of using XXH3_generateSecret() is that  | 
790  |  |  * it guarantees that all bits within the initial blob of bytes  | 
791  |  |  * will impact every bit of the output.  | 
792  |  |  * This is not necessarily the case when using the blob of bytes directly  | 
793  |  |  * because, when hashing _small_ inputs, only a portion of the secret is employed.  | 
794  |  |  */  | 
795  |  | XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);  | 
796  |  |  | 
797  |  |  | 
798  |  | /*******   Streaming   *******/  | 
799  |  | /*  | 
800  |  |  * Streaming requires state maintenance.  | 
801  |  |  * This operation costs memory and CPU.  | 
802  |  |  * As a consequence, streaming is slower than one-shot hashing.  | 
803  |  |  * For better performance, prefer one-shot functions whenever applicable.  | 
804  |  |  */  | 
805  |  |  | 
806  |  | /*!  | 
807  |  |  * @brief The state struct for the XXH3 streaming API.  | 
808  |  |  *  | 
809  |  |  * @see XXH3_state_s for details.  | 
810  |  |  */  | 
811  |  | typedef struct XXH3_state_s XXH3_state_t;  | 
812  |  | XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);  | 
813  |  | XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);  | 
814  |  | XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);  | 
815  |  |  | 
816  |  | /*  | 
817  |  |  * XXH3_64bits_reset():  | 
818  |  |  * Initialize with default parameters.  | 
819  |  |  * digest will be equivalent to `XXH3_64bits()`.  | 
820  |  |  */  | 
821  |  | XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);  | 
822  |  | /*  | 
823  |  |  * XXH3_64bits_reset_withSeed():  | 
824  |  |  * Generate a custom secret from `seed`, and store it into `statePtr`.  | 
825  |  |  * digest will be equivalent to `XXH3_64bits_withSeed()`.  | 
826  |  |  */  | 
827  |  | XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);  | 
828  |  | /*  | 
829  |  |  * XXH3_64bits_reset_withSecret():  | 
830  |  |  * `secret` is referenced, it _must outlive_ the hash streaming session.  | 
831  |  |  * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,  | 
832  |  |  * and the quality of produced hash values depends on secret's entropy  | 
833  |  |  * (secret's content should look like a bunch of random bytes).  | 
834  |  |  * When in doubt about the randomness of a candidate `secret`,  | 
835  |  |  * consider employing `XXH3_generateSecret()` instead (see below).  | 
836  |  |  */  | 
837  |  | XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);  | 
838  |  |  | 
839  |  | XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);  | 
840  |  | XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);  | 
841  |  |  | 
842  |  | /* note : canonical representation of XXH3 is the same as XXH64  | 
843  |  |  * since they both produce XXH64_hash_t values */  | 
844  |  |  | 
845  |  |  | 
846  |  | /*-**********************************************************************  | 
847  |  | *  XXH3 128-bit variant  | 
848  |  | ************************************************************************/  | 
849  |  |  | 
850  |  | /*!  | 
851  |  |  * @brief The return value from 128-bit hashes.  | 
852  |  |  *  | 
853  |  |  * Stored in little endian order, although the fields themselves are in native  | 
854  |  |  * endianness.  | 
855  |  |  */  | 
856  |  | typedef struct { | 
857  |  |     XXH64_hash_t low64;   /*!< `value & 0xFFFFFFFFFFFFFFFF` */  | 
858  |  |     XXH64_hash_t high64;  /*!< `value >> 64` */  | 
859  |  | } XXH128_hash_t;  | 
860  |  |  | 
861  |  | XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);  | 
862  |  | XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);  | 
863  |  | XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);  | 
864  |  |  | 
865  |  | /*******   Streaming   *******/  | 
866  |  | /*  | 
867  |  |  * Streaming requires state maintenance.  | 
868  |  |  * This operation costs memory and CPU.  | 
869  |  |  * As a consequence, streaming is slower than one-shot hashing.  | 
870  |  |  * For better performance, prefer one-shot functions whenever applicable.  | 
871  |  |  *  | 
872  |  |  * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().  | 
873  |  |  * Use already declared XXH3_createState() and XXH3_freeState().  | 
874  |  |  *  | 
875  |  |  * All reset and streaming functions have same meaning as their 64-bit counterpart.  | 
876  |  |  */  | 
877  |  |  | 
878  |  | XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);  | 
879  |  | XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);  | 
880  |  | XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);  | 
881  |  |  | 
882  |  | XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);  | 
883  |  | XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);  | 
884  |  |  | 
885  |  | /* Following helper functions make it possible to compare XXH128_hast_t values.  | 
886  |  |  * Since XXH128_hash_t is a structure, this capability is not offered by the language.  | 
887  |  |  * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */  | 
888  |  |  | 
889  |  | /*!  | 
890  |  |  * XXH128_isEqual():  | 
891  |  |  * Return: 1 if `h1` and `h2` are equal, 0 if they are not.  | 
892  |  |  */  | 
893  |  | XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);  | 
894  |  |  | 
895  |  | /*!  | 
896  |  |  * XXH128_cmp():  | 
897  |  |  *  | 
898  |  |  * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.  | 
899  |  |  *  | 
900  |  |  * return: >0 if *h128_1  > *h128_2  | 
901  |  |  *         =0 if *h128_1 == *h128_2  | 
902  |  |  *         <0 if *h128_1  < *h128_2  | 
903  |  |  */  | 
904  |  | XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);  | 
905  |  |  | 
906  |  |  | 
907  |  | /*******   Canonical representation   *******/  | 
908  |  | typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t; | 
909  |  | XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);  | 
910  |  | XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);  | 
911  |  |  | 
912  |  |  | 
913  |  | #endif  /* XXH_NO_LONG_LONG */  | 
914  |  |  | 
915  |  | /*!  | 
916  |  |  * @}  | 
917  |  |  */  | 
918  |  | #endif /* XXHASH_H_5627135585666179 */  | 
919  |  |  | 
920  |  |  | 
921  |  |  | 
922  |  | #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)  | 
923  |  | #define XXHASH_H_STATIC_13879238742  | 
924  |  | /* ****************************************************************************  | 
925  |  |  * This section contains declarations which are not guaranteed to remain stable.  | 
926  |  |  * They may change in future versions, becoming incompatible with a different  | 
927  |  |  * version of the library.  | 
928  |  |  * These declarations should only be used with static linking.  | 
929  |  |  * Never use them in association with dynamic linking!  | 
930  |  |  ***************************************************************************** */  | 
931  |  |  | 
932  |  | /*  | 
933  |  |  * These definitions are only present to allow static allocation  | 
934  |  |  * of XXH states, on stack or in a struct, for example.  | 
935  |  |  * Never **ever** access their members directly.  | 
936  |  |  */  | 
937  |  |  | 
938  |  | /*!  | 
939  |  |  * @internal  | 
940  |  |  * @brief Structure for XXH32 streaming API.  | 
941  |  |  *  | 
942  |  |  * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,  | 
943  |  |  * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is  | 
944  |  |  * an opaque type. This allows fields to safely be changed.  | 
945  |  |  *  | 
946  |  |  * Typedef'd to @ref XXH32_state_t.  | 
947  |  |  * Do not access the members of this struct directly.  | 
948  |  |  * @see XXH64_state_s, XXH3_state_s  | 
949  |  |  */  | 
950  |  | struct XXH32_state_s { | 
951  |  |    XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */  | 
952  |  |    XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */  | 
953  |  |    XXH32_hash_t v[4];         /*!< Accumulator lanes */  | 
954  |  |    XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */  | 
955  |  |    XXH32_hash_t memsize;      /*!< Amount of data in @ref mem32 */  | 
956  |  |    XXH32_hash_t reserved;     /*!< Reserved field. Do not read or write to it, it may be removed. */  | 
957  |  | };   /* typedef'd to XXH32_state_t */  | 
958  |  |  | 
959  |  |  | 
960  |  | #ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */  | 
961  |  |  | 
962  |  | /*!  | 
963  |  |  * @internal  | 
964  |  |  * @brief Structure for XXH64 streaming API.  | 
965  |  |  *  | 
966  |  |  * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,  | 
967  |  |  * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is  | 
968  |  |  * an opaque type. This allows fields to safely be changed.  | 
969  |  |  *  | 
970  |  |  * Typedef'd to @ref XXH64_state_t.  | 
971  |  |  * Do not access the members of this struct directly.  | 
972  |  |  * @see XXH32_state_s, XXH3_state_s  | 
973  |  |  */  | 
974  |  | struct XXH64_state_s { | 
975  |  |    XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */  | 
976  |  |    XXH64_hash_t v[4];         /*!< Accumulator lanes */  | 
977  |  |    XXH64_hash_t mem64[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */  | 
978  |  |    XXH32_hash_t memsize;      /*!< Amount of data in @ref mem64 */  | 
979  |  |    XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/  | 
980  |  |    XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it, it may be removed. */  | 
981  |  | };   /* typedef'd to XXH64_state_t */  | 
982  |  |  | 
983  |  | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */  | 
984  |  | #  include <stdalign.h>  | 
985  |  | #  define XXH_ALIGN(n)      alignas(n)  | 
986  |  | #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */  | 
987  |  | /* In C++ alignas() is a keyword */  | 
988  |  | #  define XXH_ALIGN(n)      alignas(n)  | 
989  |  | #elif defined(__GNUC__)  | 
990  |  | #  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))  | 
991  |  | #elif defined(_MSC_VER)  | 
992  |  | #  define XXH_ALIGN(n)      __declspec(align(n))  | 
993  |  | #else  | 
994  |  | #  define XXH_ALIGN(n)   /* disabled */  | 
995  |  | #endif  | 
996  |  |  | 
997  |  | /* Old GCC versions only accept the attribute after the type in structures. */  | 
998  |  | #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \  | 
999  |  |     && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \  | 
1000  |  |     && defined(__GNUC__)  | 
1001  |  | #   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)  | 
1002  |  | #else  | 
1003  |  | #   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type  | 
1004  |  | #endif  | 
1005  |  |  | 
1006  |  | /*!  | 
1007  |  |  * @brief The size of the internal XXH3 buffer.  | 
1008  |  |  *  | 
1009  |  |  * This is the optimal update size for incremental hashing.  | 
1010  |  |  *  | 
1011  |  |  * @see XXH3_64b_update(), XXH3_128b_update().  | 
1012  |  |  */  | 
1013  |  | #define XXH3_INTERNALBUFFER_SIZE 256  | 
1014  |  |  | 
1015  |  | /*!  | 
1016  |  |  * @brief Default size of the secret buffer (and @ref XXH3_kSecret).  | 
1017  |  |  *  | 
1018  |  |  * This is the size used in @ref XXH3_kSecret and the seeded functions.  | 
1019  |  |  *  | 
1020  |  |  * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.  | 
1021  |  |  */  | 
1022  |  | #define XXH3_SECRET_DEFAULT_SIZE 192  | 
1023  |  |  | 
1024  |  | /*!  | 
1025  |  |  * @internal  | 
1026  |  |  * @brief Structure for XXH3 streaming API.  | 
1027  |  |  *  | 
1028  |  |  * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,  | 
1029  |  |  * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.  | 
1030  |  |  * Otherwise it is an opaque type.  | 
1031  |  |  * Never use this definition in combination with dynamic library.  | 
1032  |  |  * This allows fields to safely be changed in the future.  | 
1033  |  |  *  | 
1034  |  |  * @note ** This structure has a strict alignment requirement of 64 bytes!! **  | 
1035  |  |  * Do not allocate this with `malloc()` or `new`,  | 
1036  |  |  * it will not be sufficiently aligned.  | 
1037  |  |  * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.  | 
1038  |  |  *  | 
1039  |  |  * Typedef'd to @ref XXH3_state_t.  | 
1040  |  |  * Do never access the members of this struct directly.  | 
1041  |  |  *  | 
1042  |  |  * @see XXH3_INITSTATE() for stack initialization.  | 
1043  |  |  * @see XXH3_createState(), XXH3_freeState().  | 
1044  |  |  * @see XXH32_state_s, XXH64_state_s  | 
1045  |  |  */  | 
1046  |  | struct XXH3_state_s { | 
1047  |  |    XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);  | 
1048  |  |        /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */  | 
1049  |  |    XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);  | 
1050  |  |        /*!< Used to store a custom secret generated from a seed. */  | 
1051  |  |    XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);  | 
1052  |  |        /*!< The internal buffer. @see XXH32_state_s::mem32 */  | 
1053  |  |    XXH32_hash_t bufferedSize;  | 
1054  |  |        /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */  | 
1055  |  |    XXH32_hash_t useSeed;  | 
1056  |  |        /*!< Reserved field. Needed for padding on 64-bit. */  | 
1057  |  |    size_t nbStripesSoFar;  | 
1058  |  |        /*!< Number or stripes processed. */  | 
1059  |  |    XXH64_hash_t totalLen;  | 
1060  |  |        /*!< Total length hashed. 64-bit even on 32-bit targets. */  | 
1061  |  |    size_t nbStripesPerBlock;  | 
1062  |  |        /*!< Number of stripes per block. */  | 
1063  |  |    size_t secretLimit;  | 
1064  |  |        /*!< Size of @ref customSecret or @ref extSecret */  | 
1065  |  |    XXH64_hash_t seed;  | 
1066  |  |        /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */  | 
1067  |  |    XXH64_hash_t reserved64;  | 
1068  |  |        /*!< Reserved field. */  | 
1069  |  |    const unsigned char* extSecret;  | 
1070  |  |        /*!< Reference to an external secret for the _withSecret variants, NULL  | 
1071  |  |         *   for other variants. */  | 
1072  |  |    /* note: there may be some padding at the end due to alignment on 64 bytes */  | 
1073  |  | }; /* typedef'd to XXH3_state_t */  | 
1074  |  |  | 
1075  |  | #undef XXH_ALIGN_MEMBER  | 
1076  |  |  | 
1077  |  | /*!  | 
1078  |  |  * @brief Initializes a stack-allocated `XXH3_state_s`.  | 
1079  |  |  *  | 
1080  |  |  * When the @ref XXH3_state_t structure is merely emplaced on stack,  | 
1081  |  |  * it should be initialized with XXH3_INITSTATE() or a memset()  | 
1082  |  |  * in case its first reset uses XXH3_NNbits_reset_withSeed().  | 
1083  |  |  * This init can be omitted if the first reset uses default or _withSecret mode.  | 
1084  |  |  * This operation isn't necessary when the state is created with XXH3_createState().  | 
1085  |  |  * Note that this doesn't prepare the state for a streaming operation,  | 
1086  |  |  * it's still necessary to use XXH3_NNbits_reset*() afterwards.  | 
1087  |  |  */  | 
1088  |  | #define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; } | 
1089  |  |  | 
1090  |  |  | 
1091  |  | /* XXH128() :  | 
1092  |  |  * simple alias to pre-selected XXH3_128bits variant  | 
1093  |  |  */  | 
1094  |  | XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);  | 
1095  |  |  | 
1096  |  |  | 
1097  |  | /* ===   Experimental API   === */  | 
1098  |  | /* Symbols defined below must be considered tied to a specific library version. */  | 
1099  |  |  | 
1100  |  | /*  | 
1101  |  |  * XXH3_generateSecret():  | 
1102  |  |  *  | 
1103  |  |  * Derive a high-entropy secret from any user-defined content, named customSeed.  | 
1104  |  |  * The generated secret can be used in combination with `*_withSecret()` functions.  | 
1105  |  |  * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,  | 
1106  |  |  * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.  | 
1107  |  |  *  | 
1108  |  |  * The function accepts as input a custom seed of any length and any content,  | 
1109  |  |  * and derives from it a high-entropy secret of length @secretSize  | 
1110  |  |  * into an already allocated buffer @secretBuffer.  | 
1111  |  |  * @secretSize must be >= XXH3_SECRET_SIZE_MIN  | 
1112  |  |  *  | 
1113  |  |  * The generated secret can then be used with any `*_withSecret()` variant.  | 
1114  |  |  * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,  | 
1115  |  |  * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`  | 
1116  |  |  * are part of this list. They all accept a `secret` parameter  | 
1117  |  |  * which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)  | 
1118  |  |  * _and_ feature very high entropy (consist of random-looking bytes).  | 
1119  |  |  * These conditions can be a high bar to meet, so  | 
1120  |  |  * XXH3_generateSecret() can be employed to ensure proper quality.  | 
1121  |  |  *  | 
1122  |  |  * customSeed can be anything. It can have any size, even small ones,  | 
1123  |  |  * and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.  | 
1124  |  |  * The resulting `secret` will nonetheless provide all required qualities.  | 
1125  |  |  *  | 
1126  |  |  * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.  | 
1127  |  |  */  | 
1128  |  | XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);  | 
1129  |  |  | 
1130  |  |  | 
1131  |  | /*  | 
1132  |  |  * XXH3_generateSecret_fromSeed():  | 
1133  |  |  *  | 
1134  |  |  * Generate the same secret as the _withSeed() variants.  | 
1135  |  |  *  | 
1136  |  |  * The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).  | 
1137  |  |  * @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.  | 
1138  |  |  *  | 
1139  |  |  * The generated secret can be used in combination with  | 
1140  |  |  *`*_withSecret()` and `_withSecretandSeed()` variants.  | 
1141  |  |  * This generator is notably useful in combination with `_withSecretandSeed()`,  | 
1142  |  |  * as a way to emulate a faster `_withSeed()` variant.  | 
1143  |  |  */  | 
1144  |  | XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);  | 
1145  |  |  | 
1146  |  | /*  | 
1147  |  |  * *_withSecretandSeed() :  | 
1148  |  |  * These variants generate hash values using either  | 
1149  |  |  * @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)  | 
1150  |  |  * or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).  | 
1151  |  |  *  | 
1152  |  |  * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.  | 
1153  |  |  * `_withSeed()` has to generate the secret on the fly for "large" keys.  | 
1154  |  |  * It's fast, but can be perceptible for "not so large" keys (< 1 KB).  | 
1155  |  |  * `_withSecret()` has to generate the masks on the fly for "small" keys,  | 
1156  |  |  * which requires more instructions than _withSeed() variants.  | 
1157  |  |  * Therefore, _withSecretandSeed variant combines the best of both worlds.  | 
1158  |  |  *  | 
1159  |  |  * When @secret has been generated by XXH3_generateSecret_fromSeed(),  | 
1160  |  |  * this variant produces *exactly* the same results as `_withSeed()` variant,  | 
1161  |  |  * hence offering only a pure speed benefit on "large" input,  | 
1162  |  |  * by skipping the need to regenerate the secret for every large input.  | 
1163  |  |  *  | 
1164  |  |  * Another usage scenario is to hash the secret to a 64-bit hash value,  | 
1165  |  |  * for example with XXH3_64bits(), which then becomes the seed,  | 
1166  |  |  * and then employ both the seed and the secret in _withSecretandSeed().  | 
1167  |  |  * On top of speed, an added benefit is that each bit in the secret  | 
1168  |  |  * has a 50% chance to swap each bit in the output,  | 
1169  |  |  * via its impact to the seed.  | 
1170  |  |  * This is not guaranteed when using the secret directly in "small data" scenarios,  | 
1171  |  |  * because only portions of the secret are employed for small data.  | 
1172  |  |  */  | 
1173  |  | XXH_PUBLIC_API XXH64_hash_t  | 
1174  |  | XXH3_64bits_withSecretandSeed(const void* data, size_t len,  | 
1175  |  |                               const void* secret, size_t secretSize,  | 
1176  |  |                               XXH64_hash_t seed);  | 
1177  |  |  | 
1178  |  | XXH_PUBLIC_API XXH128_hash_t  | 
1179  |  | XXH3_128bits_withSecretandSeed(const void* data, size_t len,  | 
1180  |  |                                const void* secret, size_t secretSize,  | 
1181  |  |                                XXH64_hash_t seed64);  | 
1182  |  |  | 
1183  |  | XXH_PUBLIC_API XXH_errorcode  | 
1184  |  | XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,  | 
1185  |  |                                     const void* secret, size_t secretSize,  | 
1186  |  |                                     XXH64_hash_t seed64);  | 
1187  |  |  | 
1188  |  | XXH_PUBLIC_API XXH_errorcode  | 
1189  |  | XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,  | 
1190  |  |                                      const void* secret, size_t secretSize,  | 
1191  |  |                                      XXH64_hash_t seed64);  | 
1192  |  |  | 
1193  |  |  | 
1194  |  | #endif  /* XXH_NO_LONG_LONG */  | 
1195  |  | #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)  | 
1196  |  | #  define XXH_IMPLEMENTATION  | 
1197  |  | #endif  | 
1198  |  |  | 
1199  |  | #endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */  | 
1200  |  |  | 
1201  |  |  | 
1202  |  | /* ======================================================================== */  | 
1203  |  | /* ======================================================================== */  | 
1204  |  | /* ======================================================================== */  | 
1205  |  |  | 
1206  |  |  | 
1207  |  | /*-**********************************************************************  | 
1208  |  |  * xxHash implementation  | 
1209  |  |  *-**********************************************************************  | 
1210  |  |  * xxHash's implementation used to be hosted inside xxhash.c.  | 
1211  |  |  *  | 
1212  |  |  * However, inlining requires implementation to be visible to the compiler,  | 
1213  |  |  * hence be included alongside the header.  | 
1214  |  |  * Previously, implementation was hosted inside xxhash.c,  | 
1215  |  |  * which was then #included when inlining was activated.  | 
1216  |  |  * This construction created issues with a few build and install systems,  | 
1217  |  |  * as it required xxhash.c to be stored in /include directory.  | 
1218  |  |  *  | 
1219  |  |  * xxHash implementation is now directly integrated within xxhash.h.  | 
1220  |  |  * As a consequence, xxhash.c is no longer needed in /include.  | 
1221  |  |  *  | 
1222  |  |  * xxhash.c is still available and is still useful.  | 
1223  |  |  * In a "normal" setup, when xxhash is not inlined,  | 
1224  |  |  * xxhash.h only exposes the prototypes and public symbols,  | 
1225  |  |  * while xxhash.c can be built into an object file xxhash.o  | 
1226  |  |  * which can then be linked into the final binary.  | 
1227  |  |  ************************************************************************/  | 
1228  |  |  | 
1229  |  | #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \  | 
1230  |  |    || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)  | 
1231  |  | #  define XXH_IMPLEM_13a8737387  | 
1232  |  |  | 
1233  |  | /* *************************************  | 
1234  |  | *  Tuning parameters  | 
1235  |  | ***************************************/  | 
1236  |  |  | 
1237  |  | /*!  | 
1238  |  |  * @defgroup tuning Tuning parameters  | 
1239  |  |  * @{ | 
1240  |  |  *  | 
1241  |  |  * Various macros to control xxHash's behavior.  | 
1242  |  |  */  | 
1243  |  | #ifdef XXH_DOXYGEN  | 
1244  |  | /*!  | 
1245  |  |  * @brief Define this to disable 64-bit code.  | 
1246  |  |  *  | 
1247  |  |  * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.  | 
1248  |  |  */  | 
1249  |  | #  define XXH_NO_LONG_LONG  | 
1250  |  | #  undef XXH_NO_LONG_LONG /* don't actually */  | 
1251  |  | /*!  | 
1252  |  |  * @brief Controls how unaligned memory is accessed.  | 
1253  |  |  *  | 
1254  |  |  * By default, access to unaligned memory is controlled by `memcpy()`, which is  | 
1255  |  |  * safe and portable.  | 
1256  |  |  *  | 
1257  |  |  * Unfortunately, on some target/compiler combinations, the generated assembly  | 
1258  |  |  * is sub-optimal.  | 
1259  |  |  *  | 
1260  |  |  * The below switch allow selection of a different access method  | 
1261  |  |  * in the search for improved performance.  | 
1262  |  |  *  | 
1263  |  |  * @par Possible options:  | 
1264  |  |  *  | 
1265  |  |  *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`  | 
1266  |  |  *   @par  | 
1267  |  |  *     Use `memcpy()`. Safe and portable. Note that most modern compilers will  | 
1268  |  |  *     eliminate the function call and treat it as an unaligned access.  | 
1269  |  |  *  | 
1270  |  |  *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`  | 
1271  |  |  *   @par  | 
1272  |  |  *     Depends on compiler extensions and is therefore not portable.  | 
1273  |  |  *     This method is safe _if_ your compiler supports it,  | 
1274  |  |  *     and *generally* as fast or faster than `memcpy`.  | 
1275  |  |  *  | 
1276  |  |  *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast  | 
1277  |  |  *  @par  | 
1278  |  |  *     Casts directly and dereferences. This method doesn't depend on the  | 
1279  |  |  *     compiler, but it violates the C standard as it directly dereferences an  | 
1280  |  |  *     unaligned pointer. It can generate buggy code on targets which do not  | 
1281  |  |  *     support unaligned memory accesses, but in some circumstances, it's the  | 
1282  |  |  *     only known way to get the most performance.  | 
1283  |  |  *  | 
1284  |  |  *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift  | 
1285  |  |  *  @par  | 
1286  |  |  *     Also portable. This can generate the best code on old compilers which don't  | 
1287  |  |  *     inline small `memcpy()` calls, and it might also be faster on big-endian  | 
1288  |  |  *     systems which lack a native byteswap instruction. However, some compilers  | 
1289  |  |  *     will emit literal byteshifts even if the target supports unaligned access.  | 
1290  |  |  *  .  | 
1291  |  |  *  | 
1292  |  |  * @warning  | 
1293  |  |  *   Methods 1 and 2 rely on implementation-defined behavior. Use these with  | 
1294  |  |  *   care, as what works on one compiler/platform/optimization level may cause  | 
1295  |  |  *   another to read garbage data or even crash.  | 
1296  |  |  *  | 
1297  |  |  * See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.  | 
1298  |  |  *  | 
1299  |  |  * Prefer these methods in priority order (0 > 3 > 1 > 2)  | 
1300  |  |  */  | 
1301  |  | #  define XXH_FORCE_MEMORY_ACCESS 0  | 
1302  |  |  | 
1303  |  | /*!  | 
1304  |  |  * @def XXH_FORCE_ALIGN_CHECK  | 
1305  |  |  * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()  | 
1306  |  |  * and XXH64() only).  | 
1307  |  |  *  | 
1308  |  |  * This is an important performance trick for architectures without decent  | 
1309  |  |  * unaligned memory access performance.  | 
1310  |  |  *  | 
1311  |  |  * It checks for input alignment, and when conditions are met, uses a "fast  | 
1312  |  |  * path" employing direct 32-bit/64-bit reads, resulting in _dramatically  | 
1313  |  |  * faster_ read speed.  | 
1314  |  |  *  | 
1315  |  |  * The check costs one initial branch per hash, which is generally negligible,  | 
1316  |  |  * but not zero.  | 
1317  |  |  *  | 
1318  |  |  * Moreover, it's not useful to generate an additional code path if memory  | 
1319  |  |  * access uses the same instruction for both aligned and unaligned  | 
1320  |  |  * addresses (e.g. x86 and aarch64).  | 
1321  |  |  *  | 
1322  |  |  * In these cases, the alignment check can be removed by setting this macro to 0.  | 
1323  |  |  * Then the code will always use unaligned memory access.  | 
1324  |  |  * Align check is automatically disabled on x86, x64 & arm64,  | 
1325  |  |  * which are platforms known to offer good unaligned memory accesses performance.  | 
1326  |  |  *  | 
1327  |  |  * This option does not affect XXH3 (only XXH32 and XXH64).  | 
1328  |  |  */  | 
1329  |  | #  define XXH_FORCE_ALIGN_CHECK 0  | 
1330  |  |  | 
1331  |  | /*!  | 
1332  |  |  * @def XXH_NO_INLINE_HINTS  | 
1333  |  |  * @brief When non-zero, sets all functions to `static`.  | 
1334  |  |  *  | 
1335  |  |  * By default, xxHash tries to force the compiler to inline almost all internal  | 
1336  |  |  * functions.  | 
1337  |  |  *  | 
1338  |  |  * This can usually improve performance due to reduced jumping and improved  | 
1339  |  |  * constant folding, but significantly increases the size of the binary which  | 
1340  |  |  * might not be favorable.  | 
1341  |  |  *  | 
1342  |  |  * Additionally, sometimes the forced inlining can be detrimental to performance,  | 
1343  |  |  * depending on the architecture.  | 
1344  |  |  *  | 
1345  |  |  * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the  | 
1346  |  |  * compiler full control on whether to inline or not.  | 
1347  |  |  *  | 
1348  |  |  * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using  | 
1349  |  |  * -fno-inline with GCC or Clang, this will automatically be defined.  | 
1350  |  |  */  | 
1351  |  | #  define XXH_NO_INLINE_HINTS 0  | 
1352  |  |  | 
1353  |  | /*!  | 
1354  |  |  * @def XXH32_ENDJMP  | 
1355  |  |  * @brief Whether to use a jump for `XXH32_finalize`.  | 
1356  |  |  *  | 
1357  |  |  * For performance, `XXH32_finalize` uses multiple branches in the finalizer.  | 
1358  |  |  * This is generally preferable for performance,  | 
1359  |  |  * but depending on exact architecture, a jmp may be preferable.  | 
1360  |  |  *  | 
1361  |  |  * This setting is only possibly making a difference for very small inputs.  | 
1362  |  |  */  | 
1363  |  | #  define XXH32_ENDJMP 0  | 
1364  |  |  | 
1365  |  | /*!  | 
1366  |  |  * @internal  | 
1367  |  |  * @brief Redefines old internal names.  | 
1368  |  |  *  | 
1369  |  |  * For compatibility with code that uses xxHash's internals before the names  | 
1370  |  |  * were changed to improve namespacing. There is no other reason to use this.  | 
1371  |  |  */  | 
1372  |  | #  define XXH_OLD_NAMES  | 
1373  |  | #  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */  | 
1374  |  | #endif /* XXH_DOXYGEN */  | 
1375  |  | /*!  | 
1376  |  |  * @}  | 
1377  |  |  */  | 
1378  |  |  | 
1379  |  | #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */  | 
1380  |  |    /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */  | 
1381  |  | #  if !defined(__clang__) && \  | 
1382  |  | ( \  | 
1383  |  |     (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \  | 
1384  |  |     ( \  | 
1385  |  |         defined(__GNUC__) && ( \  | 
1386  |  |             (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \  | 
1387  |  |             ( \  | 
1388  |  |                 defined(__mips__) && \  | 
1389  |  |                 (__mips <= 5 || __mips_isa_rev < 6) && \  | 
1390  |  |                 (!defined(__mips16) || defined(__mips_mips16e2)) \  | 
1391  |  |             ) \  | 
1392  |  |         ) \  | 
1393  |  |     ) \  | 
1394  |  | )  | 
1395  |  | #    define XXH_FORCE_MEMORY_ACCESS 1  | 
1396  |  | #  endif  | 
1397  |  | #endif  | 
1398  |  |  | 
1399  |  | #ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */  | 
1400  |  | #  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \  | 
1401  |  |    || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */  | 
1402  | 25  | #    define XXH_FORCE_ALIGN_CHECK 0  | 
1403  |  | #  else  | 
1404  |  | #    define XXH_FORCE_ALIGN_CHECK 1  | 
1405  |  | #  endif  | 
1406  |  | #endif  | 
1407  |  |  | 
1408  |  | #ifndef XXH_NO_INLINE_HINTS  | 
1409  |  | #  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \  | 
1410  |  |    || defined(__NO_INLINE__)     /* -O0, -fno-inline */  | 
1411  |  | #    define XXH_NO_INLINE_HINTS 1  | 
1412  |  | #  else  | 
1413  |  | #    define XXH_NO_INLINE_HINTS 0  | 
1414  |  | #  endif  | 
1415  |  | #endif  | 
1416  |  |  | 
1417  |  | #ifndef XXH32_ENDJMP  | 
1418  |  | /* generally preferable for performance */  | 
1419  | 0  | #  define XXH32_ENDJMP 0  | 
1420  |  | #endif  | 
1421  |  |  | 
1422  |  | /*!  | 
1423  |  |  * @defgroup impl Implementation  | 
1424  |  |  * @{ | 
1425  |  |  */  | 
1426  |  |  | 
1427  |  |  | 
1428  |  | /* *************************************  | 
1429  |  | *  Includes & Memory related functions  | 
1430  |  | ***************************************/  | 
1431  |  | /*  | 
1432  |  |  * Modify the local functions below should you wish to use  | 
1433  |  |  * different memory routines for malloc() and free()  | 
1434  |  |  */  | 
1435  |  | #include <stdlib.h>  | 
1436  |  |  | 
1437  |  | /*!  | 
1438  |  |  * @internal  | 
1439  |  |  * @brief Modify this function to use a different routine than malloc().  | 
1440  |  |  */  | 
1441  | 0  | static void* XXH_malloc(size_t s) { return malloc(s); } | 
1442  |  |  | 
1443  |  | /*!  | 
1444  |  |  * @internal  | 
1445  |  |  * @brief Modify this function to use a different routine than free().  | 
1446  |  |  */  | 
1447  | 0  | static void XXH_free(void* p) { free(p); } | 
1448  |  |  | 
1449  |  | #include <string.h>  | 
1450  |  |  | 
1451  |  | /*!  | 
1452  |  |  * @internal  | 
1453  |  |  * @brief Modify this function to use a different routine than memcpy().  | 
1454  |  |  */  | 
1455  |  | static void* XXH_memcpy(void* dest, const void* src, size_t size)  | 
1456  | 981k  | { | 
1457  | 981k  |     return memcpy(dest,src,size);  | 
1458  | 981k  | }  | 
1459  |  |  | 
1460  |  | #include <limits.h>   /* ULLONG_MAX */  | 
1461  |  |  | 
1462  |  |  | 
1463  |  | /* *************************************  | 
1464  |  | *  Compiler Specific Options  | 
1465  |  | ***************************************/  | 
1466  |  | #ifdef _MSC_VER /* Visual Studio warning fix */  | 
1467  |  | #  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */  | 
1468  |  | #endif  | 
1469  |  |  | 
1470  |  | #if XXH_NO_INLINE_HINTS  /* disable inlining hints */  | 
1471  |  | #  if defined(__GNUC__) || defined(__clang__)  | 
1472  |  | #    define XXH_FORCE_INLINE static __attribute__((unused))  | 
1473  |  | #  else  | 
1474  |  | #    define XXH_FORCE_INLINE static  | 
1475  |  | #  endif  | 
1476  |  | #  define XXH_NO_INLINE static  | 
1477  |  | /* enable inlining hints */  | 
1478  |  | #elif defined(__GNUC__) || defined(__clang__)  | 
1479  |  | #  define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))  | 
1480  |  | #  define XXH_NO_INLINE static __attribute__((noinline))  | 
1481  |  | #elif defined(_MSC_VER)  /* Visual Studio */  | 
1482  |  | #  define XXH_FORCE_INLINE static __forceinline  | 
1483  |  | #  define XXH_NO_INLINE static __declspec(noinline)  | 
1484  |  | #elif defined (__cplusplus) \  | 
1485  |  |   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */  | 
1486  |  | #  define XXH_FORCE_INLINE static inline  | 
1487  |  | #  define XXH_NO_INLINE static  | 
1488  |  | #else  | 
1489  |  | #  define XXH_FORCE_INLINE static  | 
1490  |  | #  define XXH_NO_INLINE static  | 
1491  |  | #endif  | 
1492  |  |  | 
1493  |  |  | 
1494  |  |  | 
1495  |  | /* *************************************  | 
1496  |  | *  Debug  | 
1497  |  | ***************************************/  | 
1498  |  | /*!  | 
1499  |  |  * @ingroup tuning  | 
1500  |  |  * @def XXH_DEBUGLEVEL  | 
1501  |  |  * @brief Sets the debugging level.  | 
1502  |  |  *  | 
1503  |  |  * XXH_DEBUGLEVEL is expected to be defined externally, typically via the  | 
1504  |  |  * compiler's command line options. The value must be a number.  | 
1505  |  |  */  | 
1506  |  | #ifndef XXH_DEBUGLEVEL  | 
1507  |  | #  ifdef DEBUGLEVEL /* backwards compat */  | 
1508  |  | #    define XXH_DEBUGLEVEL DEBUGLEVEL  | 
1509  |  | #  else  | 
1510  |  | #    define XXH_DEBUGLEVEL 0  | 
1511  |  | #  endif  | 
1512  |  | #endif  | 
1513  |  |  | 
1514  |  | #if (XXH_DEBUGLEVEL>=1)  | 
1515  |  | #  include <assert.h>   /* note: can still be disabled with NDEBUG */  | 
1516  |  | #  define XXH_ASSERT(c)   assert(c)  | 
1517  |  | #else  | 
1518  | 0  | #  define XXH_ASSERT(c)   ((void)0)  | 
1519  |  | #endif  | 
1520  |  |  | 
1521  |  | /* note: use after variable declarations */  | 
1522  |  | #ifndef XXH_STATIC_ASSERT  | 
1523  |  | #  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */  | 
1524  |  | #    include <assert.h>  | 
1525  | 0  | #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0) | 
1526  |  | #  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */  | 
1527  |  | #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0) | 
1528  |  | #  else  | 
1529  |  | #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0) | 
1530  |  | #  endif  | 
1531  | 0  | #  define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)  | 
1532  |  | #endif  | 
1533  |  |  | 
1534  |  | /*!  | 
1535  |  |  * @internal  | 
1536  |  |  * @def XXH_COMPILER_GUARD(var)  | 
1537  |  |  * @brief Used to prevent unwanted optimizations for @p var.  | 
1538  |  |  *  | 
1539  |  |  * It uses an empty GCC inline assembly statement with a register constraint  | 
1540  |  |  * which forces @p var into a general purpose register (e.g., eax, ebx, ecx  | 
1541  |  |  * on x86) and marks it as modified.  | 
1542  |  |  *  | 
1543  |  |  * This is used in a few places to avoid unwanted autovectorization (e.g.  | 
1544  |  |  * XXH32_round()). All vectorization we want is explicit via intrinsics,  | 
1545  |  |  * and _usually_ isn't wanted elsewhere.  | 
1546  |  |  *  | 
1547  |  |  * We also use it to prevent unwanted constant folding for AArch64 in  | 
1548  |  |  * XXH3_initCustomSecret_scalar().  | 
1549  |  |  */  | 
1550  |  | #if defined(__GNUC__) || defined(__clang__)  | 
1551  |  | #  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var)) | 
1552  |  | #else  | 
1553  |  | #  define XXH_COMPILER_GUARD(var) ((void)0)  | 
1554  |  | #endif  | 
1555  |  |  | 
1556  |  | /* *************************************  | 
1557  |  | *  Basic Types  | 
1558  |  | ***************************************/  | 
1559  |  | #if !defined (__VMS) \  | 
1560  |  |  && (defined (__cplusplus) \  | 
1561  |  |  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )  | 
1562  |  | # include <stdint.h>  | 
1563  |  |   typedef uint8_t xxh_u8;  | 
1564  |  | #else  | 
1565  |  |   typedef unsigned char xxh_u8;  | 
1566  |  | #endif  | 
1567  |  | typedef XXH32_hash_t xxh_u32;  | 
1568  |  |  | 
1569  |  | #ifdef XXH_OLD_NAMES  | 
1570  |  | #  define BYTE xxh_u8  | 
1571  |  | #  define U8   xxh_u8  | 
1572  |  | #  define U32  xxh_u32  | 
1573  |  | #endif  | 
1574  |  |  | 
1575  |  | /* ***   Memory access   *** */  | 
1576  |  |  | 
1577  |  | /*!  | 
1578  |  |  * @internal  | 
1579  |  |  * @fn xxh_u32 XXH_read32(const void* ptr)  | 
1580  |  |  * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.  | 
1581  |  |  *  | 
1582  |  |  * Affected by @ref XXH_FORCE_MEMORY_ACCESS.  | 
1583  |  |  *  | 
1584  |  |  * @param ptr The pointer to read from.  | 
1585  |  |  * @return The 32-bit native endian integer from the bytes at @p ptr.  | 
1586  |  |  */  | 
1587  |  |  | 
1588  |  | /*!  | 
1589  |  |  * @internal  | 
1590  |  |  * @fn xxh_u32 XXH_readLE32(const void* ptr)  | 
1591  |  |  * @brief Reads an unaligned 32-bit little endian integer from @p ptr.  | 
1592  |  |  *  | 
1593  |  |  * Affected by @ref XXH_FORCE_MEMORY_ACCESS.  | 
1594  |  |  *  | 
1595  |  |  * @param ptr The pointer to read from.  | 
1596  |  |  * @return The 32-bit little endian integer from the bytes at @p ptr.  | 
1597  |  |  */  | 
1598  |  |  | 
1599  |  | /*!  | 
1600  |  |  * @internal  | 
1601  |  |  * @fn xxh_u32 XXH_readBE32(const void* ptr)  | 
1602  |  |  * @brief Reads an unaligned 32-bit big endian integer from @p ptr.  | 
1603  |  |  *  | 
1604  |  |  * Affected by @ref XXH_FORCE_MEMORY_ACCESS.  | 
1605  |  |  *  | 
1606  |  |  * @param ptr The pointer to read from.  | 
1607  |  |  * @return The 32-bit big endian integer from the bytes at @p ptr.  | 
1608  |  |  */  | 
1609  |  |  | 
1610  |  | /*!  | 
1611  |  |  * @internal  | 
1612  |  |  * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)  | 
1613  |  |  * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.  | 
1614  |  |  *  | 
1615  |  |  * Affected by @ref XXH_FORCE_MEMORY_ACCESS.  | 
1616  |  |  * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is  | 
1617  |  |  * always @ref XXH_alignment::XXH_unaligned.  | 
1618  |  |  *  | 
1619  |  |  * @param ptr The pointer to read from.  | 
1620  |  |  * @param align Whether @p ptr is aligned.  | 
1621  |  |  * @pre  | 
1622  |  |  *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte  | 
1623  |  |  *   aligned.  | 
1624  |  |  * @return The 32-bit little endian integer from the bytes at @p ptr.  | 
1625  |  |  */  | 
1626  |  |  | 
1627  |  | #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))  | 
1628  |  | /*  | 
1629  |  |  * Manual byteshift. Best for old compilers which don't inline memcpy.  | 
1630  |  |  * We actually directly use XXH_readLE32 and XXH_readBE32.  | 
1631  |  |  */  | 
1632  |  | #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))  | 
1633  |  |  | 
1634  |  | /*  | 
1635  |  |  * Force direct memory access. Only works on CPU which support unaligned memory  | 
1636  |  |  * access in hardware.  | 
1637  |  |  */  | 
1638  |  | static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } | 
1639  |  |  | 
1640  |  | #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))  | 
1641  |  |  | 
1642  |  | /*  | 
1643  |  |  * __pack instructions are safer but compiler specific, hence potentially  | 
1644  |  |  * problematic for some compilers.  | 
1645  |  |  *  | 
1646  |  |  * Currently only defined for GCC and ICC.  | 
1647  |  |  */  | 
1648  |  | #ifdef XXH_OLD_NAMES  | 
1649  |  | typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; | 
1650  |  | #endif  | 
1651  |  | static xxh_u32 XXH_read32(const void* ptr)  | 
1652  |  | { | 
1653  |  |     typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign; | 
1654  |  |     return ((const xxh_unalign*)ptr)->u32;  | 
1655  |  | }  | 
1656  |  |  | 
1657  |  | #else  | 
1658  |  |  | 
1659  |  | /*  | 
1660  |  |  * Portable and safe solution. Generally efficient.  | 
1661  |  |  * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html  | 
1662  |  |  */  | 
1663  |  | static xxh_u32 XXH_read32(const void* memPtr)  | 
1664  | 0  | { | 
1665  | 0  |     xxh_u32 val;  | 
1666  | 0  |     XXH_memcpy(&val, memPtr, sizeof(val));  | 
1667  | 0  |     return val;  | 
1668  | 0  | }  | 
1669  |  |  | 
1670  |  | #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */  | 
1671  |  |  | 
1672  |  |  | 
1673  |  | /* ***   Endianness   *** */  | 
1674  |  |  | 
1675  |  | /*!  | 
1676  |  |  * @ingroup tuning  | 
1677  |  |  * @def XXH_CPU_LITTLE_ENDIAN  | 
1678  |  |  * @brief Whether the target is little endian.  | 
1679  |  |  *  | 
1680  |  |  * Defined to 1 if the target is little endian, or 0 if it is big endian.  | 
1681  |  |  * It can be defined externally, for example on the compiler command line.  | 
1682  |  |  *  | 
1683  |  |  * If it is not defined,  | 
1684  |  |  * a runtime check (which is usually constant folded) is used instead.  | 
1685  |  |  *  | 
1686  |  |  * @note  | 
1687  |  |  *   This is not necessarily defined to an integer constant.  | 
1688  |  |  *  | 
1689  |  |  * @see XXH_isLittleEndian() for the runtime check.  | 
1690  |  |  */  | 
1691  |  | #ifndef XXH_CPU_LITTLE_ENDIAN  | 
1692  |  | /*  | 
1693  |  |  * Try to detect endianness automatically, to avoid the nonstandard behavior  | 
1694  |  |  * in `XXH_isLittleEndian()`  | 
1695  |  |  */  | 
1696  |  | #  if defined(_WIN32) /* Windows is always little endian */ \  | 
1697  |  |      || defined(__LITTLE_ENDIAN__) \  | 
1698  |  |      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)  | 
1699  | 981k  | #    define XXH_CPU_LITTLE_ENDIAN 1  | 
1700  |  | #  elif defined(__BIG_ENDIAN__) \  | 
1701  |  |      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)  | 
1702  |  | #    define XXH_CPU_LITTLE_ENDIAN 0  | 
1703  |  | #  else  | 
1704  |  | /*!  | 
1705  |  |  * @internal  | 
1706  |  |  * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.  | 
1707  |  |  *  | 
1708  |  |  * Most compilers will constant fold this.  | 
1709  |  |  */  | 
1710  |  | static int XXH_isLittleEndian(void)  | 
1711  |  | { | 
1712  |  |     /*  | 
1713  |  |      * Portable and well-defined behavior.  | 
1714  |  |      * Don't use static: it is detrimental to performance.  | 
1715  |  |      */  | 
1716  |  |     const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; | 
1717  |  |     return one.c[0];  | 
1718  |  | }  | 
1719  |  | #   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()  | 
1720  |  | #  endif  | 
1721  |  | #endif  | 
1722  |  |  | 
1723  |  |  | 
1724  |  |  | 
1725  |  |  | 
1726  |  | /* ****************************************  | 
1727  |  | *  Compiler-specific Functions and Macros  | 
1728  |  | ******************************************/  | 
1729  |  | #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)  | 
1730  |  |  | 
1731  |  | #ifdef __has_builtin  | 
1732  |  | #  define XXH_HAS_BUILTIN(x) __has_builtin(x)  | 
1733  |  | #else  | 
1734  |  | #  define XXH_HAS_BUILTIN(x) 0  | 
1735  |  | #endif  | 
1736  |  |  | 
1737  |  | /*!  | 
1738  |  |  * @internal  | 
1739  |  |  * @def XXH_rotl32(x,r)  | 
1740  |  |  * @brief 32-bit rotate left.  | 
1741  |  |  *  | 
1742  |  |  * @param x The 32-bit integer to be rotated.  | 
1743  |  |  * @param r The number of bits to rotate.  | 
1744  |  |  * @pre  | 
1745  |  |  *   @p r > 0 && @p r < 32  | 
1746  |  |  * @note  | 
1747  |  |  *   @p x and @p r may be evaluated multiple times.  | 
1748  |  |  * @return The rotated result.  | 
1749  |  |  */  | 
1750  |  | #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \  | 
1751  |  |                                && XXH_HAS_BUILTIN(__builtin_rotateleft64)  | 
1752  | 0  | #  define XXH_rotl32 __builtin_rotateleft32  | 
1753  | 982k  | #  define XXH_rotl64 __builtin_rotateleft64  | 
1754  |  | /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */  | 
1755  |  | #elif defined(_MSC_VER)  | 
1756  |  | #  define XXH_rotl32(x,r) _rotl(x,r)  | 
1757  |  | #  define XXH_rotl64(x,r) _rotl64(x,r)  | 
1758  |  | #else  | 
1759  |  | #  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))  | 
1760  |  | #  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))  | 
1761  |  | #endif  | 
1762  |  |  | 
1763  |  | /*!  | 
1764  |  |  * @internal  | 
1765  |  |  * @fn xxh_u32 XXH_swap32(xxh_u32 x)  | 
1766  |  |  * @brief A 32-bit byteswap.  | 
1767  |  |  *  | 
1768  |  |  * @param x The 32-bit integer to byteswap.  | 
1769  |  |  * @return @p x, byteswapped.  | 
1770  |  |  */  | 
1771  |  | #if defined(_MSC_VER)     /* Visual Studio */  | 
1772  |  | #  define XXH_swap32 _byteswap_ulong  | 
1773  |  | #elif XXH_GCC_VERSION >= 403  | 
1774  |  | #  define XXH_swap32 __builtin_bswap32  | 
1775  |  | #else  | 
1776  |  | static xxh_u32 XXH_swap32 (xxh_u32 x)  | 
1777  | 0  | { | 
1778  | 0  |     return  ((x << 24) & 0xff000000 ) |  | 
1779  | 0  |             ((x <<  8) & 0x00ff0000 ) |  | 
1780  | 0  |             ((x >>  8) & 0x0000ff00 ) |  | 
1781  | 0  |             ((x >> 24) & 0x000000ff );  | 
1782  | 0  | }  | 
1783  |  | #endif  | 
1784  |  |  | 
1785  |  |  | 
1786  |  | /* ***************************  | 
1787  |  | *  Memory reads  | 
1788  |  | *****************************/  | 
1789  |  |  | 
1790  |  | /*!  | 
1791  |  |  * @internal  | 
1792  |  |  * @brief Enum to indicate whether a pointer is aligned.  | 
1793  |  |  */  | 
1794  |  | typedef enum { | 
1795  |  |     XXH_aligned,  /*!< Aligned */  | 
1796  |  |     XXH_unaligned /*!< Possibly unaligned */  | 
1797  |  | } XXH_alignment;  | 
1798  |  |  | 
1799  |  | /*  | 
1800  |  |  * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.  | 
1801  |  |  *  | 
1802  |  |  * This is ideal for older compilers which don't inline memcpy.  | 
1803  |  |  */  | 
1804  |  | #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))  | 
1805  |  |  | 
1806  |  | XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)  | 
1807  |  | { | 
1808  |  |     const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;  | 
1809  |  |     return bytePtr[0]  | 
1810  |  |          | ((xxh_u32)bytePtr[1] << 8)  | 
1811  |  |          | ((xxh_u32)bytePtr[2] << 16)  | 
1812  |  |          | ((xxh_u32)bytePtr[3] << 24);  | 
1813  |  | }  | 
1814  |  |  | 
1815  |  | XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)  | 
1816  |  | { | 
1817  |  |     const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;  | 
1818  |  |     return bytePtr[3]  | 
1819  |  |          | ((xxh_u32)bytePtr[2] << 8)  | 
1820  |  |          | ((xxh_u32)bytePtr[1] << 16)  | 
1821  |  |          | ((xxh_u32)bytePtr[0] << 24);  | 
1822  |  | }  | 
1823  |  |  | 
1824  |  | #else  | 
1825  |  | XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)  | 
1826  | 0  | { | 
1827  | 0  |     return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));  | 
1828  | 0  | }  | 
1829  |  |  | 
1830  |  | static xxh_u32 XXH_readBE32(const void* ptr)  | 
1831  | 0  | { | 
1832  | 0  |     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);  | 
1833  | 0  | }  | 
1834  |  | #endif  | 
1835  |  |  | 
1836  |  | XXH_FORCE_INLINE xxh_u32  | 
1837  |  | XXH_readLE32_align(const void* ptr, XXH_alignment align)  | 
1838  | 0  | { | 
1839  | 0  |     if (align==XXH_unaligned) { | 
1840  | 0  |         return XXH_readLE32(ptr);  | 
1841  | 0  |     } else { | 
1842  | 0  |         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);  | 
1843  | 0  |     }  | 
1844  | 0  | }  | 
1845  |  |  | 
1846  |  |  | 
1847  |  | /* *************************************  | 
1848  |  | *  Misc  | 
1849  |  | ***************************************/  | 
1850  |  | /*! @ingroup public */  | 
1851  | 0  | XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } | 
1852  |  |  | 
1853  |  |  | 
1854  |  | /* *******************************************************************  | 
1855  |  | *  32-bit hash functions  | 
1856  |  | *********************************************************************/  | 
1857  |  | /*!  | 
1858  |  |  * @}  | 
1859  |  |  * @defgroup xxh32_impl XXH32 implementation  | 
1860  |  |  * @ingroup impl  | 
1861  |  |  * @{ | 
1862  |  |  */  | 
1863  |  |  /* #define instead of static const, to be used as initializers */  | 
1864  | 0  | #define XXH_PRIME32_1  0x9E3779B1U  /*!< 0b10011110001101110111100110110001 */  | 
1865  | 0  | #define XXH_PRIME32_2  0x85EBCA77U  /*!< 0b10000101111010111100101001110111 */  | 
1866  | 0  | #define XXH_PRIME32_3  0xC2B2AE3DU  /*!< 0b11000010101100101010111000111101 */  | 
1867  | 0  | #define XXH_PRIME32_4  0x27D4EB2FU  /*!< 0b00100111110101001110101100101111 */  | 
1868  | 0  | #define XXH_PRIME32_5  0x165667B1U  /*!< 0b00010110010101100110011110110001 */  | 
1869  |  |  | 
1870  |  | #ifdef XXH_OLD_NAMES  | 
1871  |  | #  define PRIME32_1 XXH_PRIME32_1  | 
1872  |  | #  define PRIME32_2 XXH_PRIME32_2  | 
1873  |  | #  define PRIME32_3 XXH_PRIME32_3  | 
1874  |  | #  define PRIME32_4 XXH_PRIME32_4  | 
1875  |  | #  define PRIME32_5 XXH_PRIME32_5  | 
1876  |  | #endif  | 
1877  |  |  | 
1878  |  | /*!  | 
1879  |  |  * @internal  | 
1880  |  |  * @brief Normal stripe processing routine.  | 
1881  |  |  *  | 
1882  |  |  * This shuffles the bits so that any bit from @p input impacts several bits in  | 
1883  |  |  * @p acc.  | 
1884  |  |  *  | 
1885  |  |  * @param acc The accumulator lane.  | 
1886  |  |  * @param input The stripe of input to mix.  | 
1887  |  |  * @return The mixed accumulator lane.  | 
1888  |  |  */  | 
1889  |  | static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)  | 
1890  | 0  | { | 
1891  | 0  |     acc += input * XXH_PRIME32_2;  | 
1892  | 0  |     acc  = XXH_rotl32(acc, 13);  | 
1893  | 0  |     acc *= XXH_PRIME32_1;  | 
1894  |  | #if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)  | 
1895  |  |     /*  | 
1896  |  |      * UGLY HACK:  | 
1897  |  |      * A compiler fence is the only thing that prevents GCC and Clang from  | 
1898  |  |      * autovectorizing the XXH32 loop (pragmas and attributes don't work for some  | 
1899  |  |      * reason) without globally disabling SSE4.1.  | 
1900  |  |      *  | 
1901  |  |      * The reason we want to avoid vectorization is because despite working on  | 
1902  |  |      * 4 integers at a time, there are multiple factors slowing XXH32 down on  | 
1903  |  |      * SSE4:  | 
1904  |  |      * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on  | 
1905  |  |      *   newer chips!) making it slightly slower to multiply four integers at  | 
1906  |  |      *   once compared to four integers independently. Even when pmulld was  | 
1907  |  |      *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE  | 
1908  |  |      *   just to multiply unless doing a long operation.  | 
1909  |  |      *  | 
1910  |  |      * - Four instructions are required to rotate,  | 
1911  |  |      *      movqda tmp,  v // not required with VEX encoding  | 
1912  |  |      *      pslld  tmp, 13 // tmp <<= 13  | 
1913  |  |      *      psrld  v,   19 // x >>= 19  | 
1914  |  |      *      por    v,  tmp // x |= tmp  | 
1915  |  |      *   compared to one for scalar:  | 
1916  |  |      *      roll   v, 13    // reliably fast across the board  | 
1917  |  |      *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason  | 
1918  |  |      *  | 
1919  |  |      * - Instruction level parallelism is actually more beneficial here because  | 
1920  |  |      *   the SIMD actually serializes this operation: While v1 is rotating, v2  | 
1921  |  |      *   can load data, while v3 can multiply. SSE forces them to operate  | 
1922  |  |      *   together.  | 
1923  |  |      *  | 
1924  |  |      * This is also enabled on AArch64, as Clang autovectorizes it incorrectly  | 
1925  |  |      * and it is pointless writing a NEON implementation that is basically the  | 
1926  |  |      * same speed as scalar for XXH32.  | 
1927  |  |      */  | 
1928  |  |     XXH_COMPILER_GUARD(acc);  | 
1929  |  | #endif  | 
1930  | 0  |     return acc;  | 
1931  | 0  | }  | 
1932  |  |  | 
1933  |  | /*!  | 
1934  |  |  * @internal  | 
1935  |  |  * @brief Mixes all bits to finalize the hash.  | 
1936  |  |  *  | 
1937  |  |  * The final mix ensures that all input bits have a chance to impact any bit in  | 
1938  |  |  * the output digest, resulting in an unbiased distribution.  | 
1939  |  |  *  | 
1940  |  |  * @param h32 The hash to avalanche.  | 
1941  |  |  * @return The avalanched hash.  | 
1942  |  |  */  | 
1943  |  | static xxh_u32 XXH32_avalanche(xxh_u32 h32)  | 
1944  | 0  | { | 
1945  | 0  |     h32 ^= h32 >> 15;  | 
1946  | 0  |     h32 *= XXH_PRIME32_2;  | 
1947  | 0  |     h32 ^= h32 >> 13;  | 
1948  | 0  |     h32 *= XXH_PRIME32_3;  | 
1949  | 0  |     h32 ^= h32 >> 16;  | 
1950  | 0  |     return(h32);  | 
1951  | 0  | }  | 
1952  |  |  | 
1953  | 0  | #define XXH_get32bits(p) XXH_readLE32_align(p, align)  | 
1954  |  |  | 
1955  |  | /*!  | 
1956  |  |  * @internal  | 
1957  |  |  * @brief Processes the last 0-15 bytes of @p ptr.  | 
1958  |  |  *  | 
1959  |  |  * There may be up to 15 bytes remaining to consume from the input.  | 
1960  |  |  * This final stage will digest them to ensure that all input bytes are present  | 
1961  |  |  * in the final mix.  | 
1962  |  |  *  | 
1963  |  |  * @param h32 The hash to finalize.  | 
1964  |  |  * @param ptr The pointer to the remaining input.  | 
1965  |  |  * @param len The remaining length, modulo 16.  | 
1966  |  |  * @param align Whether @p ptr is aligned.  | 
1967  |  |  * @return The finalized hash.  | 
1968  |  |  */  | 
1969  |  | static xxh_u32  | 
1970  |  | XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)  | 
1971  | 0  | { | 
1972  | 0  | #define XXH_PROCESS1 do {                           \ | 
1973  | 0  |     h32 += (*ptr++) * XXH_PRIME32_5;                \  | 
1974  | 0  |     h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;      \  | 
1975  | 0  | } while (0)  | 
1976  |  | 
  | 
1977  | 0  | #define XXH_PROCESS4 do {                           \ | 
1978  | 0  |     h32 += XXH_get32bits(ptr) * XXH_PRIME32_3;      \  | 
1979  | 0  |     ptr += 4;                                   \  | 
1980  | 0  |     h32  = XXH_rotl32(h32, 17) * XXH_PRIME32_4;     \  | 
1981  | 0  | } while (0)  | 
1982  |  | 
  | 
1983  | 0  |     if (ptr==NULL) XXH_ASSERT(len == 0);  | 
1984  |  |  | 
1985  |  |     /* Compact rerolled version; generally faster */  | 
1986  | 0  |     if (!XXH32_ENDJMP) { | 
1987  | 0  |         len &= 15;  | 
1988  | 0  |         while (len >= 4) { | 
1989  | 0  |             XXH_PROCESS4;  | 
1990  | 0  |             len -= 4;  | 
1991  | 0  |         }  | 
1992  | 0  |         while (len > 0) { | 
1993  | 0  |             XXH_PROCESS1;  | 
1994  | 0  |             --len;  | 
1995  | 0  |         }  | 
1996  | 0  |         return XXH32_avalanche(h32);  | 
1997  | 0  |     } else { | 
1998  | 0  |          switch(len&15) /* or switch(bEnd - p) */ { | 
1999  | 0  |            case 12:      XXH_PROCESS4;  | 
2000  | 0  |                          XXH_FALLTHROUGH;  | 
2001  | 0  |            case 8:       XXH_PROCESS4;  | 
2002  | 0  |                          XXH_FALLTHROUGH;  | 
2003  | 0  |            case 4:       XXH_PROCESS4;  | 
2004  | 0  |                          return XXH32_avalanche(h32);  | 
2005  |  |  | 
2006  | 0  |            case 13:      XXH_PROCESS4;  | 
2007  | 0  |                          XXH_FALLTHROUGH;  | 
2008  | 0  |            case 9:       XXH_PROCESS4;  | 
2009  | 0  |                          XXH_FALLTHROUGH;  | 
2010  | 0  |            case 5:       XXH_PROCESS4;  | 
2011  | 0  |                          XXH_PROCESS1;  | 
2012  | 0  |                          return XXH32_avalanche(h32);  | 
2013  |  |  | 
2014  | 0  |            case 14:      XXH_PROCESS4;  | 
2015  | 0  |                          XXH_FALLTHROUGH;  | 
2016  | 0  |            case 10:      XXH_PROCESS4;  | 
2017  | 0  |                          XXH_FALLTHROUGH;  | 
2018  | 0  |            case 6:       XXH_PROCESS4;  | 
2019  | 0  |                          XXH_PROCESS1;  | 
2020  | 0  |                          XXH_PROCESS1;  | 
2021  | 0  |                          return XXH32_avalanche(h32);  | 
2022  |  |  | 
2023  | 0  |            case 15:      XXH_PROCESS4;  | 
2024  | 0  |                          XXH_FALLTHROUGH;  | 
2025  | 0  |            case 11:      XXH_PROCESS4;  | 
2026  | 0  |                          XXH_FALLTHROUGH;  | 
2027  | 0  |            case 7:       XXH_PROCESS4;  | 
2028  | 0  |                          XXH_FALLTHROUGH;  | 
2029  | 0  |            case 3:       XXH_PROCESS1;  | 
2030  | 0  |                          XXH_FALLTHROUGH;  | 
2031  | 0  |            case 2:       XXH_PROCESS1;  | 
2032  | 0  |                          XXH_FALLTHROUGH;  | 
2033  | 0  |            case 1:       XXH_PROCESS1;  | 
2034  | 0  |                          XXH_FALLTHROUGH;  | 
2035  | 0  |            case 0:       return XXH32_avalanche(h32);  | 
2036  | 0  |         }  | 
2037  | 0  |         XXH_ASSERT(0);  | 
2038  | 0  |         return h32;   /* reaching this point is deemed impossible */  | 
2039  | 0  |     }  | 
2040  | 0  | }  | 
2041  |  |  | 
2042  |  | #ifdef XXH_OLD_NAMES  | 
2043  |  | #  define PROCESS1 XXH_PROCESS1  | 
2044  |  | #  define PROCESS4 XXH_PROCESS4  | 
2045  |  | #else  | 
2046  |  | #  undef XXH_PROCESS1  | 
2047  |  | #  undef XXH_PROCESS4  | 
2048  |  | #endif  | 
2049  |  |  | 
2050  |  | /*!  | 
2051  |  |  * @internal  | 
2052  |  |  * @brief The implementation for @ref XXH32().  | 
2053  |  |  *  | 
2054  |  |  * @param input , len , seed Directly passed from @ref XXH32().  | 
2055  |  |  * @param align Whether @p input is aligned.  | 
2056  |  |  * @return The calculated hash.  | 
2057  |  |  */  | 
2058  |  | XXH_FORCE_INLINE xxh_u32  | 
2059  |  | XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)  | 
2060  | 0  | { | 
2061  | 0  |     xxh_u32 h32;  | 
2062  |  | 
  | 
2063  | 0  |     if (input==NULL) XXH_ASSERT(len == 0);  | 
2064  |  | 
  | 
2065  | 0  |     if (len>=16) { | 
2066  | 0  |         const xxh_u8* const bEnd = input + len;  | 
2067  | 0  |         const xxh_u8* const limit = bEnd - 15;  | 
2068  | 0  |         xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;  | 
2069  | 0  |         xxh_u32 v2 = seed + XXH_PRIME32_2;  | 
2070  | 0  |         xxh_u32 v3 = seed + 0;  | 
2071  | 0  |         xxh_u32 v4 = seed - XXH_PRIME32_1;  | 
2072  |  | 
  | 
2073  | 0  |         do { | 
2074  | 0  |             v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;  | 
2075  | 0  |             v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;  | 
2076  | 0  |             v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;  | 
2077  | 0  |             v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;  | 
2078  | 0  |         } while (input < limit);  | 
2079  |  | 
  | 
2080  | 0  |         h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)  | 
2081  | 0  |             + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);  | 
2082  | 0  |     } else { | 
2083  | 0  |         h32  = seed + XXH_PRIME32_5;  | 
2084  | 0  |     }  | 
2085  |  | 
  | 
2086  | 0  |     h32 += (xxh_u32)len;  | 
2087  |  | 
  | 
2088  | 0  |     return XXH32_finalize(h32, input, len&15, align);  | 
2089  | 0  | }  | 
2090  |  |  | 
2091  |  | /*! @ingroup xxh32_family */  | 
2092  |  | XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)  | 
2093  | 0  | { | 
2094  |  | #if 0  | 
2095  |  |     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */  | 
2096  |  |     XXH32_state_t state;  | 
2097  |  |     XXH32_reset(&state, seed);  | 
2098  |  |     XXH32_update(&state, (const xxh_u8*)input, len);  | 
2099  |  |     return XXH32_digest(&state);  | 
2100  |  | #else  | 
2101  | 0  |     if (XXH_FORCE_ALIGN_CHECK) { | 
2102  | 0  |         if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */ | 
2103  | 0  |             return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);  | 
2104  | 0  |     }   }  | 
2105  |  |  | 
2106  | 0  |     return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);  | 
2107  | 0  | #endif  | 
2108  | 0  | }  | 
2109  |  |  | 
2110  |  |  | 
2111  |  |  | 
2112  |  | /*******   Hash streaming   *******/  | 
2113  |  | /*!  | 
2114  |  |  * @ingroup xxh32_family  | 
2115  |  |  */  | 
2116  |  | XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)  | 
2117  | 0  | { | 
2118  | 0  |     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));  | 
2119  | 0  | }  | 
2120  |  | /*! @ingroup xxh32_family */  | 
2121  |  | XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)  | 
2122  | 0  | { | 
2123  | 0  |     XXH_free(statePtr);  | 
2124  | 0  |     return XXH_OK;  | 
2125  | 0  | }  | 
2126  |  |  | 
2127  |  | /*! @ingroup xxh32_family */  | 
2128  |  | XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)  | 
2129  | 0  | { | 
2130  | 0  |     XXH_memcpy(dstState, srcState, sizeof(*dstState));  | 
2131  | 0  | }  | 
2132  |  |  | 
2133  |  | /*! @ingroup xxh32_family */  | 
2134  |  | XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)  | 
2135  | 0  | { | 
2136  | 0  |     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */  | 
2137  | 0  |     memset(&state, 0, sizeof(state));  | 
2138  | 0  |     state.v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;  | 
2139  | 0  |     state.v[1] = seed + XXH_PRIME32_2;  | 
2140  | 0  |     state.v[2] = seed + 0;  | 
2141  | 0  |     state.v[3] = seed - XXH_PRIME32_1;  | 
2142  |  |     /* do not write into reserved, planned to be removed in a future version */  | 
2143  | 0  |     XXH_memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));  | 
2144  | 0  |     return XXH_OK;  | 
2145  | 0  | }  | 
2146  |  |  | 
2147  |  |  | 
2148  |  | /*! @ingroup xxh32_family */  | 
2149  |  | XXH_PUBLIC_API XXH_errorcode  | 
2150  |  | XXH32_update(XXH32_state_t* state, const void* input, size_t len)  | 
2151  | 0  | { | 
2152  | 0  |     if (input==NULL) { | 
2153  | 0  |         XXH_ASSERT(len == 0);  | 
2154  | 0  |         return XXH_OK;  | 
2155  | 0  |     }  | 
2156  |  |  | 
2157  | 0  |     {   const xxh_u8* p = (const xxh_u8*)input; | 
2158  | 0  |         const xxh_u8* const bEnd = p + len;  | 
2159  |  | 
  | 
2160  | 0  |         state->total_len_32 += (XXH32_hash_t)len;  | 
2161  | 0  |         state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));  | 
2162  |  | 
  | 
2163  | 0  |         if (state->memsize + len < 16)  {   /* fill in tmp buffer */ | 
2164  | 0  |             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);  | 
2165  | 0  |             state->memsize += (XXH32_hash_t)len;  | 
2166  | 0  |             return XXH_OK;  | 
2167  | 0  |         }  | 
2168  |  |  | 
2169  | 0  |         if (state->memsize) {   /* some data left from previous update */ | 
2170  | 0  |             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);  | 
2171  | 0  |             {   const xxh_u32* p32 = state->mem32; | 
2172  | 0  |                 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;  | 
2173  | 0  |                 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;  | 
2174  | 0  |                 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;  | 
2175  | 0  |                 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));  | 
2176  | 0  |             }  | 
2177  | 0  |             p += 16-state->memsize;  | 
2178  | 0  |             state->memsize = 0;  | 
2179  | 0  |         }  | 
2180  |  | 
  | 
2181  | 0  |         if (p <= bEnd-16) { | 
2182  | 0  |             const xxh_u8* const limit = bEnd - 16;  | 
2183  |  | 
  | 
2184  | 0  |             do { | 
2185  | 0  |                 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;  | 
2186  | 0  |                 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;  | 
2187  | 0  |                 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;  | 
2188  | 0  |                 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;  | 
2189  | 0  |             } while (p<=limit);  | 
2190  |  | 
  | 
2191  | 0  |         }  | 
2192  |  | 
  | 
2193  | 0  |         if (p < bEnd) { | 
2194  | 0  |             XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));  | 
2195  | 0  |             state->memsize = (unsigned)(bEnd-p);  | 
2196  | 0  |         }  | 
2197  | 0  |     }  | 
2198  |  |  | 
2199  | 0  |     return XXH_OK;  | 
2200  | 0  | }  | 
2201  |  |  | 
2202  |  |  | 
2203  |  | /*! @ingroup xxh32_family */  | 
2204  |  | XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)  | 
2205  | 0  | { | 
2206  | 0  |     xxh_u32 h32;  | 
2207  |  | 
  | 
2208  | 0  |     if (state->large_len) { | 
2209  | 0  |         h32 = XXH_rotl32(state->v[0], 1)  | 
2210  | 0  |             + XXH_rotl32(state->v[1], 7)  | 
2211  | 0  |             + XXH_rotl32(state->v[2], 12)  | 
2212  | 0  |             + XXH_rotl32(state->v[3], 18);  | 
2213  | 0  |     } else { | 
2214  | 0  |         h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;  | 
2215  | 0  |     }  | 
2216  |  | 
  | 
2217  | 0  |     h32 += state->total_len_32;  | 
2218  |  | 
  | 
2219  | 0  |     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);  | 
2220  | 0  | }  | 
2221  |  |  | 
2222  |  |  | 
2223  |  | /*******   Canonical representation   *******/  | 
2224  |  |  | 
2225  |  | /*!  | 
2226  |  |  * @ingroup xxh32_family  | 
2227  |  |  * The default return values from XXH functions are unsigned 32 and 64 bit  | 
2228  |  |  * integers.  | 
2229  |  |  *  | 
2230  |  |  * The canonical representation uses big endian convention, the same convention  | 
2231  |  |  * as human-readable numbers (large digits first).  | 
2232  |  |  *  | 
2233  |  |  * This way, hash values can be written into a file or buffer, remaining  | 
2234  |  |  * comparable across different systems.  | 
2235  |  |  *  | 
2236  |  |  * The following functions allow transformation of hash values to and from their  | 
2237  |  |  * canonical format.  | 
2238  |  |  */  | 
2239  |  | XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)  | 
2240  | 0  | { | 
2241  | 0  |     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));  | 
2242  | 0  |     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);  | 
2243  | 0  |     XXH_memcpy(dst, &hash, sizeof(*dst));  | 
2244  | 0  | }  | 
2245  |  | /*! @ingroup xxh32_family */  | 
2246  |  | XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)  | 
2247  | 0  | { | 
2248  | 0  |     return XXH_readBE32(src);  | 
2249  | 0  | }  | 
2250  |  |  | 
2251  |  |  | 
2252  |  | #ifndef XXH_NO_LONG_LONG  | 
2253  |  |  | 
2254  |  | /* *******************************************************************  | 
2255  |  | *  64-bit hash functions  | 
2256  |  | *********************************************************************/  | 
2257  |  | /*!  | 
2258  |  |  * @}  | 
2259  |  |  * @ingroup impl  | 
2260  |  |  * @{ | 
2261  |  |  */  | 
2262  |  | /*******   Memory access   *******/  | 
2263  |  |  | 
2264  |  | typedef XXH64_hash_t xxh_u64;  | 
2265  |  |  | 
2266  |  | #ifdef XXH_OLD_NAMES  | 
2267  |  | #  define U64 xxh_u64  | 
2268  |  | #endif  | 
2269  |  |  | 
2270  |  | #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))  | 
2271  |  | /*  | 
2272  |  |  * Manual byteshift. Best for old compilers which don't inline memcpy.  | 
2273  |  |  * We actually directly use XXH_readLE64 and XXH_readBE64.  | 
2274  |  |  */  | 
2275  |  | #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))  | 
2276  |  |  | 
2277  |  | /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */  | 
2278  |  | static xxh_u64 XXH_read64(const void* memPtr)  | 
2279  |  | { | 
2280  |  |     return *(const xxh_u64*) memPtr;  | 
2281  |  | }  | 
2282  |  |  | 
2283  |  | #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))  | 
2284  |  |  | 
2285  |  | /*  | 
2286  |  |  * __pack instructions are safer, but compiler specific, hence potentially  | 
2287  |  |  * problematic for some compilers.  | 
2288  |  |  *  | 
2289  |  |  * Currently only defined for GCC and ICC.  | 
2290  |  |  */  | 
2291  |  | #ifdef XXH_OLD_NAMES  | 
2292  |  | typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; | 
2293  |  | #endif  | 
2294  |  | static xxh_u64 XXH_read64(const void* ptr)  | 
2295  |  | { | 
2296  |  |     typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64; | 
2297  |  |     return ((const xxh_unalign64*)ptr)->u64;  | 
2298  |  | }  | 
2299  |  |  | 
2300  |  | #else  | 
2301  |  |  | 
2302  |  | /*  | 
2303  |  |  * Portable and safe solution. Generally efficient.  | 
2304  |  |  * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html  | 
2305  |  |  */  | 
2306  |  | static xxh_u64 XXH_read64(const void* memPtr)  | 
2307  | 981k  | { | 
2308  | 981k  |     xxh_u64 val;  | 
2309  | 981k  |     XXH_memcpy(&val, memPtr, sizeof(val));  | 
2310  | 981k  |     return val;  | 
2311  | 981k  | }  | 
2312  |  |  | 
2313  |  | #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */  | 
2314  |  |  | 
2315  |  | #if defined(_MSC_VER)     /* Visual Studio */  | 
2316  |  | #  define XXH_swap64 _byteswap_uint64  | 
2317  |  | #elif XXH_GCC_VERSION >= 403  | 
2318  |  | #  define XXH_swap64 __builtin_bswap64  | 
2319  |  | #else  | 
2320  |  | static xxh_u64 XXH_swap64(xxh_u64 x)  | 
2321  | 0  | { | 
2322  | 0  |     return  ((x << 56) & 0xff00000000000000ULL) |  | 
2323  | 0  |             ((x << 40) & 0x00ff000000000000ULL) |  | 
2324  | 0  |             ((x << 24) & 0x0000ff0000000000ULL) |  | 
2325  | 0  |             ((x << 8)  & 0x000000ff00000000ULL) |  | 
2326  | 0  |             ((x >> 8)  & 0x00000000ff000000ULL) |  | 
2327  | 0  |             ((x >> 24) & 0x0000000000ff0000ULL) |  | 
2328  | 0  |             ((x >> 40) & 0x000000000000ff00ULL) |  | 
2329  | 0  |             ((x >> 56) & 0x00000000000000ffULL);  | 
2330  | 0  | }  | 
2331  |  | #endif  | 
2332  |  |  | 
2333  |  |  | 
2334  |  | /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */  | 
2335  |  | #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))  | 
2336  |  |  | 
2337  |  | XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)  | 
2338  |  | { | 
2339  |  |     const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;  | 
2340  |  |     return bytePtr[0]  | 
2341  |  |          | ((xxh_u64)bytePtr[1] << 8)  | 
2342  |  |          | ((xxh_u64)bytePtr[2] << 16)  | 
2343  |  |          | ((xxh_u64)bytePtr[3] << 24)  | 
2344  |  |          | ((xxh_u64)bytePtr[4] << 32)  | 
2345  |  |          | ((xxh_u64)bytePtr[5] << 40)  | 
2346  |  |          | ((xxh_u64)bytePtr[6] << 48)  | 
2347  |  |          | ((xxh_u64)bytePtr[7] << 56);  | 
2348  |  | }  | 
2349  |  |  | 
2350  |  | XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)  | 
2351  |  | { | 
2352  |  |     const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;  | 
2353  |  |     return bytePtr[7]  | 
2354  |  |          | ((xxh_u64)bytePtr[6] << 8)  | 
2355  |  |          | ((xxh_u64)bytePtr[5] << 16)  | 
2356  |  |          | ((xxh_u64)bytePtr[4] << 24)  | 
2357  |  |          | ((xxh_u64)bytePtr[3] << 32)  | 
2358  |  |          | ((xxh_u64)bytePtr[2] << 40)  | 
2359  |  |          | ((xxh_u64)bytePtr[1] << 48)  | 
2360  |  |          | ((xxh_u64)bytePtr[0] << 56);  | 
2361  |  | }  | 
2362  |  |  | 
2363  |  | #else  | 
2364  |  | XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)  | 
2365  | 981k  | { | 
2366  | 981k  |     return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));  | 
2367  | 981k  | }  | 
2368  |  |  | 
2369  |  | static xxh_u64 XXH_readBE64(const void* ptr)  | 
2370  | 0  | { | 
2371  | 0  |     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);  | 
2372  | 0  | }  | 
2373  |  | #endif  | 
2374  |  |  | 
2375  |  | XXH_FORCE_INLINE xxh_u64  | 
2376  |  | XXH_readLE64_align(const void* ptr, XXH_alignment align)  | 
2377  | 981k  | { | 
2378  | 981k  |     if (align==XXH_unaligned)  | 
2379  | 981k  |         return XXH_readLE64(ptr);  | 
2380  | 0  |     else  | 
2381  | 0  |         return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);  | 
2382  | 981k  | }  | 
2383  |  |  | 
2384  |  |  | 
2385  |  | /*******   xxh64   *******/  | 
2386  |  | /*!  | 
2387  |  |  * @}  | 
2388  |  |  * @defgroup xxh64_impl XXH64 implementation  | 
2389  |  |  * @ingroup impl  | 
2390  |  |  * @{ | 
2391  |  |  */  | 
2392  |  | /* #define rather that static const, to be used as initializers */  | 
2393  | 982k  | #define XXH_PRIME64_1  0x9E3779B185EBCA87ULL  /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */  | 
2394  | 982k  | #define XXH_PRIME64_2  0xC2B2AE3D27D4EB4FULL  /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */  | 
2395  | 25  | #define XXH_PRIME64_3  0x165667B19E3779F9ULL  /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */  | 
2396  | 141  | #define XXH_PRIME64_4  0x85EBCA77C2B2AE63ULL  /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */  | 
2397  | 0  | #define XXH_PRIME64_5  0x27D4EB2F165667C5ULL  /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */  | 
2398  |  |  | 
2399  |  | #ifdef XXH_OLD_NAMES  | 
2400  |  | #  define PRIME64_1 XXH_PRIME64_1  | 
2401  |  | #  define PRIME64_2 XXH_PRIME64_2  | 
2402  |  | #  define PRIME64_3 XXH_PRIME64_3  | 
2403  |  | #  define PRIME64_4 XXH_PRIME64_4  | 
2404  |  | #  define PRIME64_5 XXH_PRIME64_5  | 
2405  |  | #endif  | 
2406  |  |  | 
2407  |  | static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)  | 
2408  | 981k  | { | 
2409  | 981k  |     acc += input * XXH_PRIME64_2;  | 
2410  | 981k  |     acc  = XXH_rotl64(acc, 31);  | 
2411  | 981k  |     acc *= XXH_PRIME64_1;  | 
2412  | 981k  |     return acc;  | 
2413  | 981k  | }  | 
2414  |  |  | 
2415  |  | static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)  | 
2416  | 100  | { | 
2417  | 100  |     val  = XXH64_round(0, val);  | 
2418  | 100  |     acc ^= val;  | 
2419  | 100  |     acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;  | 
2420  | 100  |     return acc;  | 
2421  | 100  | }  | 
2422  |  |  | 
2423  |  | static xxh_u64 XXH64_avalanche(xxh_u64 h64)  | 
2424  | 25  | { | 
2425  | 25  |     h64 ^= h64 >> 33;  | 
2426  | 25  |     h64 *= XXH_PRIME64_2;  | 
2427  | 25  |     h64 ^= h64 >> 29;  | 
2428  | 25  |     h64 *= XXH_PRIME64_3;  | 
2429  | 25  |     h64 ^= h64 >> 32;  | 
2430  | 25  |     return h64;  | 
2431  | 25  | }  | 
2432  |  |  | 
2433  |  |  | 
2434  | 981k  | #define XXH_get64bits(p) XXH_readLE64_align(p, align)  | 
2435  |  |  | 
2436  |  | static xxh_u64  | 
2437  |  | XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)  | 
2438  | 25  | { | 
2439  | 25  |     if (ptr==NULL) XXH_ASSERT(len == 0);  | 
2440  | 25  |     len &= 31;  | 
2441  | 66  |     while (len >= 8) { | 
2442  | 41  |         xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));  | 
2443  | 41  |         ptr += 8;  | 
2444  | 41  |         h64 ^= k1;  | 
2445  | 41  |         h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;  | 
2446  | 41  |         len -= 8;  | 
2447  | 41  |     }  | 
2448  | 25  |     if (len >= 4) { | 
2449  | 0  |         h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;  | 
2450  | 0  |         ptr += 4;  | 
2451  | 0  |         h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;  | 
2452  | 0  |         len -= 4;  | 
2453  | 0  |     }  | 
2454  | 25  |     while (len > 0) { | 
2455  | 0  |         h64 ^= (*ptr++) * XXH_PRIME64_5;  | 
2456  | 0  |         h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;  | 
2457  | 0  |         --len;  | 
2458  | 0  |     }  | 
2459  | 25  |     return  XXH64_avalanche(h64);  | 
2460  | 25  | }  | 
2461  |  |  | 
2462  |  | #ifdef XXH_OLD_NAMES  | 
2463  |  | #  define PROCESS1_64 XXH_PROCESS1_64  | 
2464  |  | #  define PROCESS4_64 XXH_PROCESS4_64  | 
2465  |  | #  define PROCESS8_64 XXH_PROCESS8_64  | 
2466  |  | #else  | 
2467  |  | #  undef XXH_PROCESS1_64  | 
2468  |  | #  undef XXH_PROCESS4_64  | 
2469  |  | #  undef XXH_PROCESS8_64  | 
2470  |  | #endif  | 
2471  |  |  | 
2472  |  | XXH_FORCE_INLINE xxh_u64  | 
2473  |  | XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)  | 
2474  | 25  | { | 
2475  | 25  |     xxh_u64 h64;  | 
2476  | 25  |     if (input==NULL) XXH_ASSERT(len == 0);  | 
2477  |  |  | 
2478  | 25  |     if (len>=32) { | 
2479  | 25  |         const xxh_u8* const bEnd = input + len;  | 
2480  | 25  |         const xxh_u8* const limit = bEnd - 31;  | 
2481  | 25  |         xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;  | 
2482  | 25  |         xxh_u64 v2 = seed + XXH_PRIME64_2;  | 
2483  | 25  |         xxh_u64 v3 = seed + 0;  | 
2484  | 25  |         xxh_u64 v4 = seed - XXH_PRIME64_1;  | 
2485  |  |  | 
2486  | 245k  |         do { | 
2487  | 245k  |             v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;  | 
2488  | 245k  |             v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;  | 
2489  | 245k  |             v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;  | 
2490  | 245k  |             v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;  | 
2491  | 245k  |         } while (input<limit);  | 
2492  |  |  | 
2493  | 25  |         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);  | 
2494  | 25  |         h64 = XXH64_mergeRound(h64, v1);  | 
2495  | 25  |         h64 = XXH64_mergeRound(h64, v2);  | 
2496  | 25  |         h64 = XXH64_mergeRound(h64, v3);  | 
2497  | 25  |         h64 = XXH64_mergeRound(h64, v4);  | 
2498  |  |  | 
2499  | 25  |     } else { | 
2500  | 0  |         h64  = seed + XXH_PRIME64_5;  | 
2501  | 0  |     }  | 
2502  |  |  | 
2503  | 25  |     h64 += (xxh_u64) len;  | 
2504  |  |  | 
2505  | 25  |     return XXH64_finalize(h64, input, len, align);  | 
2506  | 25  | }  | 
2507  |  |  | 
2508  |  |  | 
2509  |  | /*! @ingroup xxh64_family */  | 
2510  |  | XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)  | 
2511  | 25  | { | 
2512  |  | #if 0  | 
2513  |  |     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */  | 
2514  |  |     XXH64_state_t state;  | 
2515  |  |     XXH64_reset(&state, seed);  | 
2516  |  |     XXH64_update(&state, (const xxh_u8*)input, len);  | 
2517  |  |     return XXH64_digest(&state);  | 
2518  |  | #else  | 
2519  | 25  |     if (XXH_FORCE_ALIGN_CHECK) { | 
2520  | 0  |         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */ | 
2521  | 0  |             return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);  | 
2522  | 0  |     }   }  | 
2523  |  |  | 
2524  | 25  |     return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);  | 
2525  |  |  | 
2526  | 25  | #endif  | 
2527  | 25  | }  | 
2528  |  |  | 
2529  |  | /*******   Hash Streaming   *******/  | 
2530  |  |  | 
2531  |  | /*! @ingroup xxh64_family*/  | 
2532  |  | XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)  | 
2533  | 0  | { | 
2534  | 0  |     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));  | 
2535  | 0  | }  | 
2536  |  | /*! @ingroup xxh64_family */  | 
2537  |  | XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)  | 
2538  | 0  | { | 
2539  | 0  |     XXH_free(statePtr);  | 
2540  | 0  |     return XXH_OK;  | 
2541  | 0  | }  | 
2542  |  |  | 
2543  |  | /*! @ingroup xxh64_family */  | 
2544  |  | XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)  | 
2545  | 0  | { | 
2546  | 0  |     XXH_memcpy(dstState, srcState, sizeof(*dstState));  | 
2547  | 0  | }  | 
2548  |  |  | 
2549  |  | /*! @ingroup xxh64_family */  | 
2550  |  | XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)  | 
2551  | 0  | { | 
2552  | 0  |     XXH64_state_t state;   /* use a local state to memcpy() in order to avoid strict-aliasing warnings */  | 
2553  | 0  |     memset(&state, 0, sizeof(state));  | 
2554  | 0  |     state.v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;  | 
2555  | 0  |     state.v[1] = seed + XXH_PRIME64_2;  | 
2556  | 0  |     state.v[2] = seed + 0;  | 
2557  | 0  |     state.v[3] = seed - XXH_PRIME64_1;  | 
2558  |  |      /* do not write into reserved64, might be removed in a future version */  | 
2559  | 0  |     XXH_memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));  | 
2560  | 0  |     return XXH_OK;  | 
2561  | 0  | }  | 
2562  |  |  | 
2563  |  | /*! @ingroup xxh64_family */  | 
2564  |  | XXH_PUBLIC_API XXH_errorcode  | 
2565  |  | XXH64_update (XXH64_state_t* state, const void* input, size_t len)  | 
2566  | 0  | { | 
2567  | 0  |     if (input==NULL) { | 
2568  | 0  |         XXH_ASSERT(len == 0);  | 
2569  | 0  |         return XXH_OK;  | 
2570  | 0  |     }  | 
2571  |  |  | 
2572  | 0  |     {   const xxh_u8* p = (const xxh_u8*)input; | 
2573  | 0  |         const xxh_u8* const bEnd = p + len;  | 
2574  |  | 
  | 
2575  | 0  |         state->total_len += len;  | 
2576  |  | 
  | 
2577  | 0  |         if (state->memsize + len < 32) {  /* fill in tmp buffer */ | 
2578  | 0  |             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);  | 
2579  | 0  |             state->memsize += (xxh_u32)len;  | 
2580  | 0  |             return XXH_OK;  | 
2581  | 0  |         }  | 
2582  |  |  | 
2583  | 0  |         if (state->memsize) {   /* tmp buffer is full */ | 
2584  | 0  |             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);  | 
2585  | 0  |             state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));  | 
2586  | 0  |             state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));  | 
2587  | 0  |             state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));  | 
2588  | 0  |             state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));  | 
2589  | 0  |             p += 32 - state->memsize;  | 
2590  | 0  |             state->memsize = 0;  | 
2591  | 0  |         }  | 
2592  |  | 
  | 
2593  | 0  |         if (p+32 <= bEnd) { | 
2594  | 0  |             const xxh_u8* const limit = bEnd - 32;  | 
2595  |  | 
  | 
2596  | 0  |             do { | 
2597  | 0  |                 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;  | 
2598  | 0  |                 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;  | 
2599  | 0  |                 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;  | 
2600  | 0  |                 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;  | 
2601  | 0  |             } while (p<=limit);  | 
2602  |  | 
  | 
2603  | 0  |         }  | 
2604  |  | 
  | 
2605  | 0  |         if (p < bEnd) { | 
2606  | 0  |             XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));  | 
2607  | 0  |             state->memsize = (unsigned)(bEnd-p);  | 
2608  | 0  |         }  | 
2609  | 0  |     }  | 
2610  |  |  | 
2611  | 0  |     return XXH_OK;  | 
2612  | 0  | }  | 
2613  |  |  | 
2614  |  |  | 
2615  |  | /*! @ingroup xxh64_family */  | 
2616  |  | XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)  | 
2617  | 0  | { | 
2618  | 0  |     xxh_u64 h64;  | 
2619  |  | 
  | 
2620  | 0  |     if (state->total_len >= 32) { | 
2621  | 0  |         h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);  | 
2622  | 0  |         h64 = XXH64_mergeRound(h64, state->v[0]);  | 
2623  | 0  |         h64 = XXH64_mergeRound(h64, state->v[1]);  | 
2624  | 0  |         h64 = XXH64_mergeRound(h64, state->v[2]);  | 
2625  | 0  |         h64 = XXH64_mergeRound(h64, state->v[3]);  | 
2626  | 0  |     } else { | 
2627  | 0  |         h64  = state->v[2] /*seed*/ + XXH_PRIME64_5;  | 
2628  | 0  |     }  | 
2629  |  | 
  | 
2630  | 0  |     h64 += (xxh_u64) state->total_len;  | 
2631  |  | 
  | 
2632  | 0  |     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);  | 
2633  | 0  | }  | 
2634  |  |  | 
2635  |  |  | 
2636  |  | /******* Canonical representation   *******/  | 
2637  |  |  | 
2638  |  | /*! @ingroup xxh64_family */  | 
2639  |  | XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)  | 
2640  | 0  | { | 
2641  | 0  |     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));  | 
2642  | 0  |     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);  | 
2643  | 0  |     XXH_memcpy(dst, &hash, sizeof(*dst));  | 
2644  | 0  | }  | 
2645  |  |  | 
2646  |  | /*! @ingroup xxh64_family */  | 
2647  |  | XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)  | 
2648  | 0  | { | 
2649  | 0  |     return XXH_readBE64(src);  | 
2650  | 0  | }  | 
2651  |  |  | 
2652  |  | #ifndef XXH_NO_XXH3  | 
2653  |  |  | 
2654  |  | /* *********************************************************************  | 
2655  |  | *  XXH3  | 
2656  |  | *  New generation hash designed for speed on small keys and vectorization  | 
2657  |  | ************************************************************************ */  | 
2658  |  | /*!  | 
2659  |  |  * @}  | 
2660  |  |  * @defgroup xxh3_impl XXH3 implementation  | 
2661  |  |  * @ingroup impl  | 
2662  |  |  * @{ | 
2663  |  |  */  | 
2664  |  |  | 
2665  |  | /* ===   Compiler specifics   === */  | 
2666  |  |  | 
2667  |  | #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */  | 
2668  |  | #  define XXH_RESTRICT /* disable */  | 
2669  |  | #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */  | 
2670  |  | #  define XXH_RESTRICT   restrict  | 
2671  |  | #else  | 
2672  |  | /* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */  | 
2673  |  | #  define XXH_RESTRICT   /* disable */  | 
2674  |  | #endif  | 
2675  |  |  | 
2676  |  | #if (defined(__GNUC__) && (__GNUC__ >= 3))  \  | 
2677  |  |   || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \  | 
2678  |  |   || defined(__clang__)  | 
2679  |  | #    define XXH_likely(x) __builtin_expect(x, 1)  | 
2680  |  | #    define XXH_unlikely(x) __builtin_expect(x, 0)  | 
2681  |  | #else  | 
2682  |  | #    define XXH_likely(x) (x)  | 
2683  |  | #    define XXH_unlikely(x) (x)  | 
2684  |  | #endif  | 
2685  |  |  | 
2686  |  | #if defined(__GNUC__)  | 
2687  |  | #  if defined(__AVX2__)  | 
2688  |  | #    include <immintrin.h>  | 
2689  |  | #  elif defined(__SSE2__)  | 
2690  |  | #    include <emmintrin.h>  | 
2691  |  | #  elif defined(__ARM_NEON__) || defined(__ARM_NEON)  | 
2692  |  | #    define inline __inline__  /* circumvent a clang bug */  | 
2693  |  | #    include <arm_neon.h>  | 
2694  |  | #    undef inline  | 
2695  |  | #  endif  | 
2696  |  | #elif defined(_MSC_VER)  | 
2697  |  | #  include <intrin.h>  | 
2698  |  | #endif  | 
2699  |  |  | 
2700  |  | /*  | 
2701  |  |  * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while  | 
2702  |  |  * remaining a true 64-bit/128-bit hash function.  | 
2703  |  |  *  | 
2704  |  |  * This is done by prioritizing a subset of 64-bit operations that can be  | 
2705  |  |  * emulated without too many steps on the average 32-bit machine.  | 
2706  |  |  *  | 
2707  |  |  * For example, these two lines seem similar, and run equally fast on 64-bit:  | 
2708  |  |  *  | 
2709  |  |  *   xxh_u64 x;  | 
2710  |  |  *   x ^= (x >> 47); // good  | 
2711  |  |  *   x ^= (x >> 13); // bad  | 
2712  |  |  *  | 
2713  |  |  * However, to a 32-bit machine, there is a major difference.  | 
2714  |  |  *  | 
2715  |  |  * x ^= (x >> 47) looks like this:  | 
2716  |  |  *  | 
2717  |  |  *   x.lo ^= (x.hi >> (47 - 32));  | 
2718  |  |  *  | 
2719  |  |  * while x ^= (x >> 13) looks like this:  | 
2720  |  |  *  | 
2721  |  |  *   // note: funnel shifts are not usually cheap.  | 
2722  |  |  *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));  | 
2723  |  |  *   x.hi ^= (x.hi >> 13);  | 
2724  |  |  *  | 
2725  |  |  * The first one is significantly faster than the second, simply because the  | 
2726  |  |  * shift is larger than 32. This means:  | 
2727  |  |  *  - All the bits we need are in the upper 32 bits, so we can ignore the lower  | 
2728  |  |  *    32 bits in the shift.  | 
2729  |  |  *  - The shift result will always fit in the lower 32 bits, and therefore,  | 
2730  |  |  *    we can ignore the upper 32 bits in the xor.  | 
2731  |  |  *  | 
2732  |  |  * Thanks to this optimization, XXH3 only requires these features to be efficient:  | 
2733  |  |  *  | 
2734  |  |  *  - Usable unaligned access  | 
2735  |  |  *  - A 32-bit or 64-bit ALU  | 
2736  |  |  *      - If 32-bit, a decent ADC instruction  | 
2737  |  |  *  - A 32 or 64-bit multiply with a 64-bit result  | 
2738  |  |  *  - For the 128-bit variant, a decent byteswap helps short inputs.  | 
2739  |  |  *  | 
2740  |  |  * The first two are already required by XXH32, and almost all 32-bit and 64-bit  | 
2741  |  |  * platforms which can run XXH32 can run XXH3 efficiently.  | 
2742  |  |  *  | 
2743  |  |  * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one  | 
2744  |  |  * notable exception.  | 
2745  |  |  *  | 
2746  |  |  * First of all, Thumb-1 lacks support for the UMULL instruction which  | 
2747  |  |  * performs the important long multiply. This means numerous __aeabi_lmul  | 
2748  |  |  * calls.  | 
2749  |  |  *  | 
2750  |  |  * Second of all, the 8 functional registers are just not enough.  | 
2751  |  |  * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need  | 
2752  |  |  * Lo registers, and this shuffling results in thousands more MOVs than A32.  | 
2753  |  |  *  | 
2754  |  |  * A32 and T32 don't have this limitation. They can access all 14 registers,  | 
2755  |  |  * do a 32->64 multiply with UMULL, and the flexible operand allowing free  | 
2756  |  |  * shifts is helpful, too.  | 
2757  |  |  *  | 
2758  |  |  * Therefore, we do a quick sanity check.  | 
2759  |  |  *  | 
2760  |  |  * If compiling Thumb-1 for a target which supports ARM instructions, we will  | 
2761  |  |  * emit a warning, as it is not a "sane" platform to compile for.  | 
2762  |  |  *  | 
2763  |  |  * Usually, if this happens, it is because of an accident and you probably need  | 
2764  |  |  * to specify -march, as you likely meant to compile for a newer architecture.  | 
2765  |  |  *  | 
2766  |  |  * Credit: large sections of the vectorial and asm source code paths  | 
2767  |  |  *         have been contributed by @easyaspi314  | 
2768  |  |  */  | 
2769  |  | #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)  | 
2770  |  | #   warning "XXH3 is highly inefficient without ARM or Thumb-2."  | 
2771  |  | #endif  | 
2772  |  |  | 
2773  |  | /* ==========================================  | 
2774  |  |  * Vectorization detection  | 
2775  |  |  * ========================================== */  | 
2776  |  |  | 
2777  |  | #ifdef XXH_DOXYGEN  | 
2778  |  | /*!  | 
2779  |  |  * @ingroup tuning  | 
2780  |  |  * @brief Overrides the vectorization implementation chosen for XXH3.  | 
2781  |  |  *  | 
2782  |  |  * Can be defined to 0 to disable SIMD or any of the values mentioned in  | 
2783  |  |  * @ref XXH_VECTOR_TYPE.  | 
2784  |  |  *  | 
2785  |  |  * If this is not defined, it uses predefined macros to determine the best  | 
2786  |  |  * implementation.  | 
2787  |  |  */  | 
2788  |  | #  define XXH_VECTOR XXH_SCALAR  | 
2789  |  | /*!  | 
2790  |  |  * @ingroup tuning  | 
2791  |  |  * @brief Possible values for @ref XXH_VECTOR.  | 
2792  |  |  *  | 
2793  |  |  * Note that these are actually implemented as macros.  | 
2794  |  |  *  | 
2795  |  |  * If this is not defined, it is detected automatically.  | 
2796  |  |  * @ref XXH_X86DISPATCH overrides this.  | 
2797  |  |  */  | 
2798  |  | enum XXH_VECTOR_TYPE /* fake enum */ { | 
2799  |  |     XXH_SCALAR = 0,  /*!< Portable scalar version */  | 
2800  |  |     XXH_SSE2   = 1,  /*!<  | 
2801  |  |                       * SSE2 for Pentium 4, Opteron, all x86_64.  | 
2802  |  |                       *  | 
2803  |  |                       * @note SSE2 is also guaranteed on Windows 10, macOS, and  | 
2804  |  |                       * Android x86.  | 
2805  |  |                       */  | 
2806  |  |     XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */  | 
2807  |  |     XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */  | 
2808  |  |     XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */  | 
2809  |  |     XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */  | 
2810  |  | };  | 
2811  |  | /*!  | 
2812  |  |  * @ingroup tuning  | 
2813  |  |  * @brief Selects the minimum alignment for XXH3's accumulators.  | 
2814  |  |  *  | 
2815  |  |  * When using SIMD, this should match the alignment required for said vector  | 
2816  |  |  * type, so, for example, 32 for AVX2.  | 
2817  |  |  *  | 
2818  |  |  * Default: Auto detected.  | 
2819  |  |  */  | 
2820  |  | #  define XXH_ACC_ALIGN 8  | 
2821  |  | #endif  | 
2822  |  |  | 
2823  |  | /* Actual definition */  | 
2824  |  | #ifndef XXH_DOXYGEN  | 
2825  |  | #  define XXH_SCALAR 0  | 
2826  |  | #  define XXH_SSE2   1  | 
2827  |  | #  define XXH_AVX2   2  | 
2828  |  | #  define XXH_AVX512 3  | 
2829  |  | #  define XXH_NEON   4  | 
2830  |  | #  define XXH_VSX    5  | 
2831  |  | #endif  | 
2832  |  |  | 
2833  |  | #ifndef XXH_VECTOR    /* can be defined on command line */  | 
2834  |  | #  if defined(__AVX512F__)  | 
2835  |  | #    define XXH_VECTOR XXH_AVX512  | 
2836  |  | #  elif defined(__AVX2__)  | 
2837  |  | #    define XXH_VECTOR XXH_AVX2  | 
2838  |  | #  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))  | 
2839  |  | #    define XXH_VECTOR XXH_SSE2  | 
2840  |  | #  elif ( \  | 
2841  |  |         defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \  | 
2842  |  |      || defined(_M_ARM64) || defined(_M_ARM_ARMV7VE) /* msvc */ \  | 
2843  |  |    ) && ( \  | 
2844  |  |         defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \  | 
2845  |  |     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \  | 
2846  |  |    )  | 
2847  |  | #    define XXH_VECTOR XXH_NEON  | 
2848  |  | #  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \  | 
2849  |  |      || (defined(__s390x__) && defined(__VEC__)) \  | 
2850  |  |      && defined(__GNUC__) /* TODO: IBM XL */  | 
2851  |  | #    define XXH_VECTOR XXH_VSX  | 
2852  |  | #  else  | 
2853  |  | #    define XXH_VECTOR XXH_SCALAR  | 
2854  |  | #  endif  | 
2855  |  | #endif  | 
2856  |  |  | 
2857  |  | /*  | 
2858  |  |  * Controls the alignment of the accumulator,  | 
2859  |  |  * for compatibility with aligned vector loads, which are usually faster.  | 
2860  |  |  */  | 
2861  |  | #ifndef XXH_ACC_ALIGN  | 
2862  |  | #  if defined(XXH_X86DISPATCH)  | 
2863  |  | #     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */  | 
2864  |  | #  elif XXH_VECTOR == XXH_SCALAR  /* scalar */  | 
2865  |  | #     define XXH_ACC_ALIGN 8  | 
2866  |  | #  elif XXH_VECTOR == XXH_SSE2  /* sse2 */  | 
2867  |  | #     define XXH_ACC_ALIGN 16  | 
2868  |  | #  elif XXH_VECTOR == XXH_AVX2  /* avx2 */  | 
2869  |  | #     define XXH_ACC_ALIGN 32  | 
2870  |  | #  elif XXH_VECTOR == XXH_NEON  /* neon */  | 
2871  |  | #     define XXH_ACC_ALIGN 16  | 
2872  |  | #  elif XXH_VECTOR == XXH_VSX   /* vsx */  | 
2873  |  | #     define XXH_ACC_ALIGN 16  | 
2874  |  | #  elif XXH_VECTOR == XXH_AVX512  /* avx512 */  | 
2875  |  | #     define XXH_ACC_ALIGN 64  | 
2876  |  | #  endif  | 
2877  |  | #endif  | 
2878  |  |  | 
2879  |  | #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \  | 
2880  |  |     || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512  | 
2881  |  | #  define XXH_SEC_ALIGN XXH_ACC_ALIGN  | 
2882  |  | #else  | 
2883  |  | #  define XXH_SEC_ALIGN 8  | 
2884  |  | #endif  | 
2885  |  |  | 
2886  |  | /*  | 
2887  |  |  * UGLY HACK:  | 
2888  |  |  * GCC usually generates the best code with -O3 for xxHash.  | 
2889  |  |  *  | 
2890  |  |  * However, when targeting AVX2, it is overzealous in its unrolling resulting  | 
2891  |  |  * in code roughly 3/4 the speed of Clang.  | 
2892  |  |  *  | 
2893  |  |  * There are other issues, such as GCC splitting _mm256_loadu_si256 into  | 
2894  |  |  * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which  | 
2895  |  |  * only applies to Sandy and Ivy Bridge... which don't even support AVX2.  | 
2896  |  |  *  | 
2897  |  |  * That is why when compiling the AVX2 version, it is recommended to use either  | 
2898  |  |  *   -O2 -mavx2 -march=haswell  | 
2899  |  |  * or  | 
2900  |  |  *   -O2 -mavx2 -mno-avx256-split-unaligned-load  | 
2901  |  |  * for decent performance, or to use Clang instead.  | 
2902  |  |  *  | 
2903  |  |  * Fortunately, we can control the first one with a pragma that forces GCC into  | 
2904  |  |  * -O2, but the other one we can't control without "failed to inline always  | 
2905  |  |  * inline function due to target mismatch" warnings.  | 
2906  |  |  */  | 
2907  |  | #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \  | 
2908  |  |   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \  | 
2909  |  |   && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */  | 
2910  |  | #  pragma GCC push_options  | 
2911  |  | #  pragma GCC optimize("-O2") | 
2912  |  | #endif  | 
2913  |  |  | 
2914  |  |  | 
2915  |  | #if XXH_VECTOR == XXH_NEON  | 
2916  |  | /*  | 
2917  |  |  * NEON's setup for vmlal_u32 is a little more complicated than it is on  | 
2918  |  |  * SSE2, AVX2, and VSX.  | 
2919  |  |  *  | 
2920  |  |  * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.  | 
2921  |  |  *  | 
2922  |  |  * To do the same operation, the 128-bit 'Q' register needs to be split into  | 
2923  |  |  * two 64-bit 'D' registers, performing this operation::  | 
2924  |  |  *  | 
2925  |  |  *   [                a                 |                 b                ]  | 
2926  |  |  *            |              '---------. .--------'                |  | 
2927  |  |  *            |                         x                          |  | 
2928  |  |  *            |              .---------' '--------.                |  | 
2929  |  |  *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]  | 
2930  |  |  *  | 
2931  |  |  * Due to significant changes in aarch64, the fastest method for aarch64 is  | 
2932  |  |  * completely different than the fastest method for ARMv7-A.  | 
2933  |  |  *  | 
2934  |  |  * ARMv7-A treats D registers as unions overlaying Q registers, so modifying  | 
2935  |  |  * D11 will modify the high half of Q5. This is similar to how modifying AH  | 
2936  |  |  * will only affect bits 8-15 of AX on x86.  | 
2937  |  |  *  | 
2938  |  |  * VZIP takes two registers, and puts even lanes in one register and odd lanes  | 
2939  |  |  * in the other.  | 
2940  |  |  *  | 
2941  |  |  * On ARMv7-A, this strangely modifies both parameters in place instead of  | 
2942  |  |  * taking the usual 3-operand form.  | 
2943  |  |  *  | 
2944  |  |  * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the  | 
2945  |  |  * lower and upper halves of the Q register to end up with the high and low  | 
2946  |  |  * halves where we want - all in one instruction.  | 
2947  |  |  *  | 
2948  |  |  *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] } | 
2949  |  |  *  | 
2950  |  |  * Unfortunately we need inline assembly for this: Instructions modifying two  | 
2951  |  |  * registers at once is not possible in GCC or Clang's IR, and they have to  | 
2952  |  |  * create a copy.  | 
2953  |  |  *  | 
2954  |  |  * aarch64 requires a different approach.  | 
2955  |  |  *  | 
2956  |  |  * In order to make it easier to write a decent compiler for aarch64, many  | 
2957  |  |  * quirks were removed, such as conditional execution.  | 
2958  |  |  *  | 
2959  |  |  * NEON was also affected by this.  | 
2960  |  |  *  | 
2961  |  |  * aarch64 cannot access the high bits of a Q-form register, and writes to a  | 
2962  |  |  * D-form register zero the high bits, similar to how writes to W-form scalar  | 
2963  |  |  * registers (or DWORD registers on x86_64) work.  | 
2964  |  |  *  | 
2965  |  |  * The formerly free vget_high intrinsics now require a vext (with a few  | 
2966  |  |  * exceptions)  | 
2967  |  |  *  | 
2968  |  |  * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent  | 
2969  |  |  * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one  | 
2970  |  |  * operand.  | 
2971  |  |  *  | 
2972  |  |  * The equivalent of the VZIP.32 on the lower and upper halves would be this  | 
2973  |  |  * mess:  | 
2974  |  |  *  | 
2975  |  |  *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] } | 
2976  |  |  *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] } | 
2977  |  |  *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] } | 
2978  |  |  *  | 
2979  |  |  * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):  | 
2980  |  |  *  | 
2981  |  |  *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);  | 
2982  |  |  *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);  | 
2983  |  |  *  | 
2984  |  |  * This is available on ARMv7-A, but is less efficient than a single VZIP.32.  | 
2985  |  |  */  | 
2986  |  |  | 
2987  |  | /*!  | 
2988  |  |  * Function-like macro:  | 
2989  |  |  * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)  | 
2990  |  |  * { | 
2991  |  |  *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);  | 
2992  |  |  *     outHi = (uint32x2_t)(in >> 32);  | 
2993  |  |  *     in = UNDEFINED;  | 
2994  |  |  * }  | 
2995  |  |  */  | 
2996  |  | # if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \  | 
2997  |  |    && defined(__GNUC__) \  | 
2998  |  |    && !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)  | 
2999  |  | #  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \  | 
3000  |  |     do {                                                                                    \ | 
3001  |  |       /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \  | 
3002  |  |       /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \  | 
3003  |  |       /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \  | 
3004  |  |       __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \ | 
3005  |  |       (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \  | 
3006  |  |       (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \  | 
3007  |  |    } while (0)  | 
3008  |  | # else  | 
3009  |  | #  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \  | 
3010  |  |     do {                                                                                  \ | 
3011  |  |       (outLo) = vmovn_u64    (in);                                                        \  | 
3012  |  |       (outHi) = vshrn_n_u64  ((in), 32);                                                  \  | 
3013  |  |     } while (0)  | 
3014  |  | # endif  | 
3015  |  | #endif  /* XXH_VECTOR == XXH_NEON */  | 
3016  |  |  | 
3017  |  | /*  | 
3018  |  |  * VSX and Z Vector helpers.  | 
3019  |  |  *  | 
3020  |  |  * This is very messy, and any pull requests to clean this up are welcome.  | 
3021  |  |  *  | 
3022  |  |  * There are a lot of problems with supporting VSX and s390x, due to  | 
3023  |  |  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.  | 
3024  |  |  */  | 
3025  |  | #if XXH_VECTOR == XXH_VSX  | 
3026  |  | #  if defined(__s390x__)  | 
3027  |  | #    include <s390intrin.h>  | 
3028  |  | #  else  | 
3029  |  | /* gcc's altivec.h can have the unwanted consequence to unconditionally  | 
3030  |  |  * #define bool, vector, and pixel keywords,  | 
3031  |  |  * with bad consequences for programs already using these keywords for other purposes.  | 
3032  |  |  * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.  | 
3033  |  |  * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,  | 
3034  |  |  * but it seems that, in some cases, it isn't.  | 
3035  |  |  * Force the build macro to be defined, so that keywords are not altered.  | 
3036  |  |  */  | 
3037  |  | #    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)  | 
3038  |  | #      define __APPLE_ALTIVEC__  | 
3039  |  | #    endif  | 
3040  |  | #    include <altivec.h>  | 
3041  |  | #  endif  | 
3042  |  |  | 
3043  |  | typedef __vector unsigned long long xxh_u64x2;  | 
3044  |  | typedef __vector unsigned char xxh_u8x16;  | 
3045  |  | typedef __vector unsigned xxh_u32x4;  | 
3046  |  |  | 
3047  |  | # ifndef XXH_VSX_BE  | 
3048  |  | #  if defined(__BIG_ENDIAN__) \  | 
3049  |  |   || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)  | 
3050  |  | #    define XXH_VSX_BE 1  | 
3051  |  | #  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__  | 
3052  |  | #    warning "-maltivec=be is not recommended. Please use native endianness."  | 
3053  |  | #    define XXH_VSX_BE 1  | 
3054  |  | #  else  | 
3055  |  | #    define XXH_VSX_BE 0  | 
3056  |  | #  endif  | 
3057  |  | # endif /* !defined(XXH_VSX_BE) */  | 
3058  |  |  | 
3059  |  | # if XXH_VSX_BE  | 
3060  |  | #  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))  | 
3061  |  | #    define XXH_vec_revb vec_revb  | 
3062  |  | #  else  | 
3063  |  | /*!  | 
3064  |  |  * A polyfill for POWER9's vec_revb().  | 
3065  |  |  */  | 
3066  |  | XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)  | 
3067  |  | { | 
3068  |  |     xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, | 
3069  |  |                                   0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };  | 
3070  |  |     return vec_perm(val, val, vByteSwap);  | 
3071  |  | }  | 
3072  |  | #  endif  | 
3073  |  | # endif /* XXH_VSX_BE */  | 
3074  |  |  | 
3075  |  | /*!  | 
3076  |  |  * Performs an unaligned vector load and byte swaps it on big endian.  | 
3077  |  |  */  | 
3078  |  | XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)  | 
3079  |  | { | 
3080  |  |     xxh_u64x2 ret;  | 
3081  |  |     XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));  | 
3082  |  | # if XXH_VSX_BE  | 
3083  |  |     ret = XXH_vec_revb(ret);  | 
3084  |  | # endif  | 
3085  |  |     return ret;  | 
3086  |  | }  | 
3087  |  |  | 
3088  |  | /*  | 
3089  |  |  * vec_mulo and vec_mule are very problematic intrinsics on PowerPC  | 
3090  |  |  *  | 
3091  |  |  * These intrinsics weren't added until GCC 8, despite existing for a while,  | 
3092  |  |  * and they are endian dependent. Also, their meaning swap depending on version.  | 
3093  |  |  * */  | 
3094  |  | # if defined(__s390x__)  | 
3095  |  |  /* s390x is always big endian, no issue on this platform */  | 
3096  |  | #  define XXH_vec_mulo vec_mulo  | 
3097  |  | #  define XXH_vec_mule vec_mule  | 
3098  |  | # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)  | 
3099  |  | /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */  | 
3100  |  | #  define XXH_vec_mulo __builtin_altivec_vmulouw  | 
3101  |  | #  define XXH_vec_mule __builtin_altivec_vmuleuw  | 
3102  |  | # else  | 
3103  |  | /* gcc needs inline assembly */  | 
3104  |  | /* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */  | 
3105  |  | XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)  | 
3106  |  | { | 
3107  |  |     xxh_u64x2 result;  | 
3108  |  |     __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); | 
3109  |  |     return result;  | 
3110  |  | }  | 
3111  |  | XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)  | 
3112  |  | { | 
3113  |  |     xxh_u64x2 result;  | 
3114  |  |     __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); | 
3115  |  |     return result;  | 
3116  |  | }  | 
3117  |  | # endif /* XXH_vec_mulo, XXH_vec_mule */  | 
3118  |  | #endif /* XXH_VECTOR == XXH_VSX */  | 
3119  |  |  | 
3120  |  |  | 
3121  |  | /* prefetch  | 
3122  |  |  * can be disabled, by declaring XXH_NO_PREFETCH build macro */  | 
3123  |  | #if defined(XXH_NO_PREFETCH)  | 
3124  |  | #  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */  | 
3125  |  | #else  | 
3126  |  | #  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */  | 
3127  |  | #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */  | 
3128  |  | #    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)  | 
3129  |  | #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )  | 
3130  |  | #    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)  | 
3131  |  | #  else  | 
3132  |  | #    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */  | 
3133  |  | #  endif  | 
3134  |  | #endif  /* XXH_NO_PREFETCH */  | 
3135  |  |  | 
3136  |  |  | 
3137  |  | /* ==========================================  | 
3138  |  |  * XXH3 default settings  | 
3139  |  |  * ========================================== */  | 
3140  |  |  | 
3141  |  | #define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */  | 
3142  |  |  | 
3143  |  | #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)  | 
3144  |  | #  error "default keyset is not large enough"  | 
3145  |  | #endif  | 
3146  |  |  | 
3147  |  | /*! Pseudorandom secret taken directly from FARSH. */  | 
3148  |  | XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { | 
3149  |  |     0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,  | 
3150  |  |     0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,  | 
3151  |  |     0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,  | 
3152  |  |     0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,  | 
3153  |  |     0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,  | 
3154  |  |     0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,  | 
3155  |  |     0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,  | 
3156  |  |     0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,  | 
3157  |  |     0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,  | 
3158  |  |     0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,  | 
3159  |  |     0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,  | 
3160  |  |     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,  | 
3161  |  | };  | 
3162  |  |  | 
3163  |  |  | 
3164  |  | #ifdef XXH_OLD_NAMES  | 
3165  |  | #  define kSecret XXH3_kSecret  | 
3166  |  | #endif  | 
3167  |  |  | 
3168  |  | #ifdef XXH_DOXYGEN  | 
3169  |  | /*!  | 
3170  |  |  * @brief Calculates a 32-bit to 64-bit long multiply.  | 
3171  |  |  *  | 
3172  |  |  * Implemented as a macro.  | 
3173  |  |  *  | 
3174  |  |  * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't  | 
3175  |  |  * need to (but it shouldn't need to anyways, it is about 7 instructions to do  | 
3176  |  |  * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we  | 
3177  |  |  * use that instead of the normal method.  | 
3178  |  |  *  | 
3179  |  |  * If you are compiling for platforms like Thumb-1 and don't have a better option,  | 
3180  |  |  * you may also want to write your own long multiply routine here.  | 
3181  |  |  *  | 
3182  |  |  * @param x, y Numbers to be multiplied  | 
3183  |  |  * @return 64-bit product of the low 32 bits of @p x and @p y.  | 
3184  |  |  */  | 
3185  |  | XXH_FORCE_INLINE xxh_u64  | 
3186  |  | XXH_mult32to64(xxh_u64 x, xxh_u64 y)  | 
3187  |  | { | 
3188  |  |    return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);  | 
3189  |  | }  | 
3190  |  | #elif defined(_MSC_VER) && defined(_M_IX86)  | 
3191  |  | #    include <intrin.h>  | 
3192  |  | #    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))  | 
3193  |  | #else  | 
3194  |  | /*  | 
3195  |  |  * Downcast + upcast is usually better than masking on older compilers like  | 
3196  |  |  * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.  | 
3197  |  |  *  | 
3198  |  |  * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands  | 
3199  |  |  * and perform a full 64x64 multiply -- entirely redundant on 32-bit.  | 
3200  |  |  */  | 
3201  |  | #    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))  | 
3202  |  | #endif  | 
3203  |  |  | 
3204  |  | /*!  | 
3205  |  |  * @brief Calculates a 64->128-bit long multiply.  | 
3206  |  |  *  | 
3207  |  |  * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar  | 
3208  |  |  * version.  | 
3209  |  |  *  | 
3210  |  |  * @param lhs , rhs The 64-bit integers to be multiplied  | 
3211  |  |  * @return The 128-bit result represented in an @ref XXH128_hash_t.  | 
3212  |  |  */  | 
3213  |  | static XXH128_hash_t  | 
3214  |  | XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)  | 
3215  |  | { | 
3216  |  |     /*  | 
3217  |  |      * GCC/Clang __uint128_t method.  | 
3218  |  |      *  | 
3219  |  |      * On most 64-bit targets, GCC and Clang define a __uint128_t type.  | 
3220  |  |      * This is usually the best way as it usually uses a native long 64-bit  | 
3221  |  |      * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.  | 
3222  |  |      *  | 
3223  |  |      * Usually.  | 
3224  |  |      *  | 
3225  |  |      * Despite being a 32-bit platform, Clang (and emscripten) define this type  | 
3226  |  |      * despite not having the arithmetic for it. This results in a laggy  | 
3227  |  |      * compiler builtin call which calculates a full 128-bit multiply.  | 
3228  |  |      * In that case it is best to use the portable one.  | 
3229  |  |      * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677  | 
3230  |  |      */  | 
3231  |  | #if defined(__GNUC__) && !defined(__wasm__) \  | 
3232  |  |     && defined(__SIZEOF_INT128__) \  | 
3233  |  |     || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)  | 
3234  |  |  | 
3235  |  |     __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;  | 
3236  |  |     XXH128_hash_t r128;  | 
3237  |  |     r128.low64  = (xxh_u64)(product);  | 
3238  |  |     r128.high64 = (xxh_u64)(product >> 64);  | 
3239  |  |     return r128;  | 
3240  |  |  | 
3241  |  |     /*  | 
3242  |  |      * MSVC for x64's _umul128 method.  | 
3243  |  |      *  | 
3244  |  |      * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);  | 
3245  |  |      *  | 
3246  |  |      * This compiles to single operand MUL on x64.  | 
3247  |  |      */  | 
3248  |  | #elif defined(_M_X64) || defined(_M_IA64)  | 
3249  |  |  | 
3250  |  | #ifndef _MSC_VER  | 
3251  |  | #   pragma intrinsic(_umul128)  | 
3252  |  | #endif  | 
3253  |  |     xxh_u64 product_high;  | 
3254  |  |     xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);  | 
3255  |  |     XXH128_hash_t r128;  | 
3256  |  |     r128.low64  = product_low;  | 
3257  |  |     r128.high64 = product_high;  | 
3258  |  |     return r128;  | 
3259  |  |  | 
3260  |  |     /*  | 
3261  |  |      * MSVC for ARM64's __umulh method.  | 
3262  |  |      *  | 
3263  |  |      * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.  | 
3264  |  |      */  | 
3265  |  | #elif defined(_M_ARM64)  | 
3266  |  |  | 
3267  |  | #ifndef _MSC_VER  | 
3268  |  | #   pragma intrinsic(__umulh)  | 
3269  |  | #endif  | 
3270  |  |     XXH128_hash_t r128;  | 
3271  |  |     r128.low64  = lhs * rhs;  | 
3272  |  |     r128.high64 = __umulh(lhs, rhs);  | 
3273  |  |     return r128;  | 
3274  |  |  | 
3275  |  | #else  | 
3276  |  |     /*  | 
3277  |  |      * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.  | 
3278  |  |      *  | 
3279  |  |      * This is a fast and simple grade school multiply, which is shown below  | 
3280  |  |      * with base 10 arithmetic instead of base 0x100000000.  | 
3281  |  |      *  | 
3282  |  |      *           9 3 // D2 lhs = 93  | 
3283  |  |      *         x 7 5 // D2 rhs = 75  | 
3284  |  |      *     ----------  | 
3285  |  |      *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15  | 
3286  |  |      *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45  | 
3287  |  |      *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21  | 
3288  |  |      *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63  | 
3289  |  |      *     ---------  | 
3290  |  |      *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27  | 
3291  |  |      *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67  | 
3292  |  |      *     ---------  | 
3293  |  |      *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975  | 
3294  |  |      *  | 
3295  |  |      * The reasons for adding the products like this are:  | 
3296  |  |      *  1. It avoids manual carry tracking. Just like how  | 
3297  |  |      *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.  | 
3298  |  |      *     This avoids a lot of complexity.  | 
3299  |  |      *  | 
3300  |  |      *  2. It hints for, and on Clang, compiles to, the powerful UMAAL  | 
3301  |  |      *     instruction available in ARM's Digital Signal Processing extension  | 
3302  |  |      *     in 32-bit ARMv6 and later, which is shown below:  | 
3303  |  |      *  | 
3304  |  |      *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)  | 
3305  |  |      *         { | 
3306  |  |      *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;  | 
3307  |  |      *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);  | 
3308  |  |      *             *RdHi = (xxh_u32)(product >> 32);  | 
3309  |  |      *         }  | 
3310  |  |      *  | 
3311  |  |      *     This instruction was designed for efficient long multiplication, and  | 
3312  |  |      *     allows this to be calculated in only 4 instructions at speeds  | 
3313  |  |      *     comparable to some 64-bit ALUs.  | 
3314  |  |      *  | 
3315  |  |      *  3. It isn't terrible on other platforms. Usually this will be a couple  | 
3316  |  |      *     of 32-bit ADD/ADCs.  | 
3317  |  |      */  | 
3318  |  |  | 
3319  |  |     /* First calculate all of the cross products. */  | 
3320  |  |     xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);  | 
3321  |  |     xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);  | 
3322  |  |     xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);  | 
3323  |  |     xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);  | 
3324  |  |  | 
3325  |  |     /* Now add the products together. These will never overflow. */  | 
3326  |  |     xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;  | 
3327  |  |     xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;  | 
3328  |  |     xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);  | 
3329  |  |  | 
3330  |  |     XXH128_hash_t r128;  | 
3331  |  |     r128.low64  = lower;  | 
3332  |  |     r128.high64 = upper;  | 
3333  |  |     return r128;  | 
3334  |  | #endif  | 
3335  |  | }  | 
3336  |  |  | 
3337  |  | /*!  | 
3338  |  |  * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.  | 
3339  |  |  *  | 
3340  |  |  * The reason for the separate function is to prevent passing too many structs  | 
3341  |  |  * around by value. This will hopefully inline the multiply, but we don't force it.  | 
3342  |  |  *  | 
3343  |  |  * @param lhs , rhs The 64-bit integers to multiply  | 
3344  |  |  * @return The low 64 bits of the product XOR'd by the high 64 bits.  | 
3345  |  |  * @see XXH_mult64to128()  | 
3346  |  |  */  | 
3347  |  | static xxh_u64  | 
3348  |  | XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)  | 
3349  |  | { | 
3350  |  |     XXH128_hash_t product = XXH_mult64to128(lhs, rhs);  | 
3351  |  |     return product.low64 ^ product.high64;  | 
3352  |  | }  | 
3353  |  |  | 
3354  |  | /*! Seems to produce slightly better code on GCC for some reason. */  | 
3355  |  | XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)  | 
3356  |  | { | 
3357  |  |     XXH_ASSERT(0 <= shift && shift < 64);  | 
3358  |  |     return v64 ^ (v64 >> shift);  | 
3359  |  | }  | 
3360  |  |  | 
3361  |  | /*  | 
3362  |  |  * This is a fast avalanche stage,  | 
3363  |  |  * suitable when input bits are already partially mixed  | 
3364  |  |  */  | 
3365  |  | static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)  | 
3366  |  | { | 
3367  |  |     h64 = XXH_xorshift64(h64, 37);  | 
3368  |  |     h64 *= 0x165667919E3779F9ULL;  | 
3369  |  |     h64 = XXH_xorshift64(h64, 32);  | 
3370  |  |     return h64;  | 
3371  |  | }  | 
3372  |  |  | 
3373  |  | /*  | 
3374  |  |  * This is a stronger avalanche,  | 
3375  |  |  * inspired by Pelle Evensen's rrmxmx  | 
3376  |  |  * preferable when input has not been previously mixed  | 
3377  |  |  */  | 
3378  |  | static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)  | 
3379  |  | { | 
3380  |  |     /* this mix is inspired by Pelle Evensen's rrmxmx */  | 
3381  |  |     h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);  | 
3382  |  |     h64 *= 0x9FB21C651E98DF25ULL;  | 
3383  |  |     h64 ^= (h64 >> 35) + len ;  | 
3384  |  |     h64 *= 0x9FB21C651E98DF25ULL;  | 
3385  |  |     return XXH_xorshift64(h64, 28);  | 
3386  |  | }  | 
3387  |  |  | 
3388  |  |  | 
3389  |  | /* ==========================================  | 
3390  |  |  * Short keys  | 
3391  |  |  * ==========================================  | 
3392  |  |  * One of the shortcomings of XXH32 and XXH64 was that their performance was  | 
3393  |  |  * sub-optimal on short lengths. It used an iterative algorithm which strongly  | 
3394  |  |  * favored lengths that were a multiple of 4 or 8.  | 
3395  |  |  *  | 
3396  |  |  * Instead of iterating over individual inputs, we use a set of single shot  | 
3397  |  |  * functions which piece together a range of lengths and operate in constant time.  | 
3398  |  |  *  | 
3399  |  |  * Additionally, the number of multiplies has been significantly reduced. This  | 
3400  |  |  * reduces latency, especially when emulating 64-bit multiplies on 32-bit.  | 
3401  |  |  *  | 
3402  |  |  * Depending on the platform, this may or may not be faster than XXH32, but it  | 
3403  |  |  * is almost guaranteed to be faster than XXH64.  | 
3404  |  |  */  | 
3405  |  |  | 
3406  |  | /*  | 
3407  |  |  * At very short lengths, there isn't enough input to fully hide secrets, or use  | 
3408  |  |  * the entire secret.  | 
3409  |  |  *  | 
3410  |  |  * There is also only a limited amount of mixing we can do before significantly  | 
3411  |  |  * impacting performance.  | 
3412  |  |  *  | 
3413  |  |  * Therefore, we use different sections of the secret and always mix two secret  | 
3414  |  |  * samples with an XOR. This should have no effect on performance on the  | 
3415  |  |  * seedless or withSeed variants because everything _should_ be constant folded  | 
3416  |  |  * by modern compilers.  | 
3417  |  |  *  | 
3418  |  |  * The XOR mixing hides individual parts of the secret and increases entropy.  | 
3419  |  |  *  | 
3420  |  |  * This adds an extra layer of strength for custom secrets.  | 
3421  |  |  */  | 
3422  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
3423  |  | XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  | 
3424  |  | { | 
3425  |  |     XXH_ASSERT(input != NULL);  | 
3426  |  |     XXH_ASSERT(1 <= len && len <= 3);  | 
3427  |  |     XXH_ASSERT(secret != NULL);  | 
3428  |  |     /*  | 
3429  |  |      * len = 1: combined = { input[0], 0x01, input[0], input[0] } | 
3430  |  |      * len = 2: combined = { input[1], 0x02, input[0], input[1] } | 
3431  |  |      * len = 3: combined = { input[2], 0x03, input[0], input[1] } | 
3432  |  |      */  | 
3433  |  |     {   xxh_u8  const c1 = input[0]; | 
3434  |  |         xxh_u8  const c2 = input[len >> 1];  | 
3435  |  |         xxh_u8  const c3 = input[len - 1];  | 
3436  |  |         xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)  | 
3437  |  |                                | ((xxh_u32)c3 <<  0) | ((xxh_u32)len << 8);  | 
3438  |  |         xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;  | 
3439  |  |         xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;  | 
3440  |  |         return XXH64_avalanche(keyed);  | 
3441  |  |     }  | 
3442  |  | }  | 
3443  |  |  | 
3444  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
3445  |  | XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  | 
3446  |  | { | 
3447  |  |     XXH_ASSERT(input != NULL);  | 
3448  |  |     XXH_ASSERT(secret != NULL);  | 
3449  |  |     XXH_ASSERT(4 <= len && len <= 8);  | 
3450  |  |     seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;  | 
3451  |  |     {   xxh_u32 const input1 = XXH_readLE32(input); | 
3452  |  |         xxh_u32 const input2 = XXH_readLE32(input + len - 4);  | 
3453  |  |         xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;  | 
3454  |  |         xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);  | 
3455  |  |         xxh_u64 const keyed = input64 ^ bitflip;  | 
3456  |  |         return XXH3_rrmxmx(keyed, len);  | 
3457  |  |     }  | 
3458  |  | }  | 
3459  |  |  | 
3460  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
3461  |  | XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  | 
3462  |  | { | 
3463  |  |     XXH_ASSERT(input != NULL);  | 
3464  |  |     XXH_ASSERT(secret != NULL);  | 
3465  |  |     XXH_ASSERT(9 <= len && len <= 16);  | 
3466  |  |     {   xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed; | 
3467  |  |         xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;  | 
3468  |  |         xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;  | 
3469  |  |         xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;  | 
3470  |  |         xxh_u64 const acc = len  | 
3471  |  |                           + XXH_swap64(input_lo) + input_hi  | 
3472  |  |                           + XXH3_mul128_fold64(input_lo, input_hi);  | 
3473  |  |         return XXH3_avalanche(acc);  | 
3474  |  |     }  | 
3475  |  | }  | 
3476  |  |  | 
3477  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
3478  |  | XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  | 
3479  |  | { | 
3480  |  |     XXH_ASSERT(len <= 16);  | 
3481  |  |     {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed); | 
3482  |  |         if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);  | 
3483  |  |         if (len) return XXH3_len_1to3_64b(input, len, secret, seed);  | 
3484  |  |         return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));  | 
3485  |  |     }  | 
3486  |  | }  | 
3487  |  |  | 
3488  |  | /*  | 
3489  |  |  * DISCLAIMER: There are known *seed-dependent* multicollisions here due to  | 
3490  |  |  * multiplication by zero, affecting hashes of lengths 17 to 240.  | 
3491  |  |  *  | 
3492  |  |  * However, they are very unlikely.  | 
3493  |  |  *  | 
3494  |  |  * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all  | 
3495  |  |  * unseeded non-cryptographic hashes, it does not attempt to defend itself  | 
3496  |  |  * against specially crafted inputs, only random inputs.  | 
3497  |  |  *  | 
3498  |  |  * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes  | 
3499  |  |  * cancelling out the secret is taken an arbitrary number of times (addressed  | 
3500  |  |  * in XXH3_accumulate_512), this collision is very unlikely with random inputs  | 
3501  |  |  * and/or proper seeding:  | 
3502  |  |  *  | 
3503  |  |  * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a  | 
3504  |  |  * function that is only called up to 16 times per hash with up to 240 bytes of  | 
3505  |  |  * input.  | 
3506  |  |  *  | 
3507  |  |  * This is not too bad for a non-cryptographic hash function, especially with  | 
3508  |  |  * only 64 bit outputs.  | 
3509  |  |  *  | 
3510  |  |  * The 128-bit variant (which trades some speed for strength) is NOT affected  | 
3511  |  |  * by this, although it is always a good idea to use a proper seed if you care  | 
3512  |  |  * about strength.  | 
3513  |  |  */  | 
3514  |  | XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,  | 
3515  |  |                                      const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)  | 
3516  |  | { | 
3517  |  | #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \  | 
3518  |  |   && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \  | 
3519  |  |   && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */  | 
3520  |  |     /*  | 
3521  |  |      * UGLY HACK:  | 
3522  |  |      * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in  | 
3523  |  |      * slower code.  | 
3524  |  |      *  | 
3525  |  |      * By forcing seed64 into a register, we disrupt the cost model and  | 
3526  |  |      * cause it to scalarize. See `XXH32_round()`  | 
3527  |  |      *  | 
3528  |  |      * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,  | 
3529  |  |      * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on  | 
3530  |  |      * GCC 9.2, despite both emitting scalar code.  | 
3531  |  |      *  | 
3532  |  |      * GCC generates much better scalar code than Clang for the rest of XXH3,  | 
3533  |  |      * which is why finding a more optimal codepath is an interest.  | 
3534  |  |      */  | 
3535  |  |     XXH_COMPILER_GUARD(seed64);  | 
3536  |  | #endif  | 
3537  |  |     {   xxh_u64 const input_lo = XXH_readLE64(input); | 
3538  |  |         xxh_u64 const input_hi = XXH_readLE64(input+8);  | 
3539  |  |         return XXH3_mul128_fold64(  | 
3540  |  |             input_lo ^ (XXH_readLE64(secret)   + seed64),  | 
3541  |  |             input_hi ^ (XXH_readLE64(secret+8) - seed64)  | 
3542  |  |         );  | 
3543  |  |     }  | 
3544  |  | }  | 
3545  |  |  | 
3546  |  | /* For mid range keys, XXH3 uses a Mum-hash variant. */  | 
3547  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
3548  |  | XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,  | 
3549  |  |                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,  | 
3550  |  |                      XXH64_hash_t seed)  | 
3551  |  | { | 
3552  |  |     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;  | 
3553  |  |     XXH_ASSERT(16 < len && len <= 128);  | 
3554  |  |  | 
3555  |  |     {   xxh_u64 acc = len * XXH_PRIME64_1; | 
3556  |  |         if (len > 32) { | 
3557  |  |             if (len > 64) { | 
3558  |  |                 if (len > 96) { | 
3559  |  |                     acc += XXH3_mix16B(input+48, secret+96, seed);  | 
3560  |  |                     acc += XXH3_mix16B(input+len-64, secret+112, seed);  | 
3561  |  |                 }  | 
3562  |  |                 acc += XXH3_mix16B(input+32, secret+64, seed);  | 
3563  |  |                 acc += XXH3_mix16B(input+len-48, secret+80, seed);  | 
3564  |  |             }  | 
3565  |  |             acc += XXH3_mix16B(input+16, secret+32, seed);  | 
3566  |  |             acc += XXH3_mix16B(input+len-32, secret+48, seed);  | 
3567  |  |         }  | 
3568  |  |         acc += XXH3_mix16B(input+0, secret+0, seed);  | 
3569  |  |         acc += XXH3_mix16B(input+len-16, secret+16, seed);  | 
3570  |  |  | 
3571  |  |         return XXH3_avalanche(acc);  | 
3572  |  |     }  | 
3573  |  | }  | 
3574  |  |  | 
3575  |  | #define XXH3_MIDSIZE_MAX 240  | 
3576  |  |  | 
3577  |  | XXH_NO_INLINE XXH64_hash_t  | 
3578  |  | XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,  | 
3579  |  |                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,  | 
3580  |  |                       XXH64_hash_t seed)  | 
3581  |  | { | 
3582  |  |     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;  | 
3583  |  |     XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);  | 
3584  |  |  | 
3585  |  |     #define XXH3_MIDSIZE_STARTOFFSET 3  | 
3586  |  |     #define XXH3_MIDSIZE_LASTOFFSET  17  | 
3587  |  |  | 
3588  |  |     {   xxh_u64 acc = len * XXH_PRIME64_1; | 
3589  |  |         int const nbRounds = (int)len / 16;  | 
3590  |  |         int i;  | 
3591  |  |         for (i=0; i<8; i++) { | 
3592  |  |             acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);  | 
3593  |  |         }  | 
3594  |  |         acc = XXH3_avalanche(acc);  | 
3595  |  |         XXH_ASSERT(nbRounds >= 8);  | 
3596  |  | #if defined(__clang__)                                /* Clang */ \  | 
3597  |  |     && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \  | 
3598  |  |     && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */  | 
3599  |  |         /*  | 
3600  |  |          * UGLY HACK:  | 
3601  |  |          * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.  | 
3602  |  |          * In everywhere else, it uses scalar code.  | 
3603  |  |          *  | 
3604  |  |          * For 64->128-bit multiplies, even if the NEON was 100% optimal, it  | 
3605  |  |          * would still be slower than UMAAL (see XXH_mult64to128).  | 
3606  |  |          *  | 
3607  |  |          * Unfortunately, Clang doesn't handle the long multiplies properly and  | 
3608  |  |          * converts them to the nonexistent "vmulq_u64" intrinsic, which is then  | 
3609  |  |          * scalarized into an ugly mess of VMOV.32 instructions.  | 
3610  |  |          *  | 
3611  |  |          * This mess is difficult to avoid without turning autovectorization  | 
3612  |  |          * off completely, but they are usually relatively minor and/or not  | 
3613  |  |          * worth it to fix.  | 
3614  |  |          *  | 
3615  |  |          * This loop is the easiest to fix, as unlike XXH32, this pragma  | 
3616  |  |          * _actually works_ because it is a loop vectorization instead of an  | 
3617  |  |          * SLP vectorization.  | 
3618  |  |          */  | 
3619  |  |         #pragma clang loop vectorize(disable)  | 
3620  |  | #endif  | 
3621  |  |         for (i=8 ; i < nbRounds; i++) { | 
3622  |  |             acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);  | 
3623  |  |         }  | 
3624  |  |         /* last bytes */  | 
3625  |  |         acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);  | 
3626  |  |         return XXH3_avalanche(acc);  | 
3627  |  |     }  | 
3628  |  | }  | 
3629  |  |  | 
3630  |  |  | 
3631  |  | /* =======     Long Keys     ======= */  | 
3632  |  |  | 
3633  |  | #define XXH_STRIPE_LEN 64  | 
3634  |  | #define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */  | 
3635  |  | #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))  | 
3636  |  |  | 
3637  |  | #ifdef XXH_OLD_NAMES  | 
3638  |  | #  define STRIPE_LEN XXH_STRIPE_LEN  | 
3639  |  | #  define ACC_NB XXH_ACC_NB  | 
3640  |  | #endif  | 
3641  |  |  | 
3642  |  | XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)  | 
3643  |  | { | 
3644  |  |     if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);  | 
3645  |  |     XXH_memcpy(dst, &v64, sizeof(v64));  | 
3646  |  | }  | 
3647  |  |  | 
3648  |  | /* Several intrinsic functions below are supposed to accept __int64 as argument,  | 
3649  |  |  * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .  | 
3650  |  |  * However, several environments do not define __int64 type,  | 
3651  |  |  * requiring a workaround.  | 
3652  |  |  */  | 
3653  |  | #if !defined (__VMS) \  | 
3654  |  |   && (defined (__cplusplus) \  | 
3655  |  |   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )  | 
3656  |  |     typedef int64_t xxh_i64;  | 
3657  |  | #else  | 
3658  |  |     /* the following type must have a width of 64-bit */  | 
3659  |  |     typedef long long xxh_i64;  | 
3660  |  | #endif  | 
3661  |  |  | 
3662  |  | /*  | 
3663  |  |  * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.  | 
3664  |  |  *  | 
3665  |  |  * It is a hardened version of UMAC, based off of FARSH's implementation.  | 
3666  |  |  *  | 
3667  |  |  * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD  | 
3668  |  |  * implementations, and it is ridiculously fast.  | 
3669  |  |  *  | 
3670  |  |  * We harden it by mixing the original input to the accumulators as well as the product.  | 
3671  |  |  *  | 
3672  |  |  * This means that in the (relatively likely) case of a multiply by zero, the  | 
3673  |  |  * original input is preserved.  | 
3674  |  |  *  | 
3675  |  |  * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve  | 
3676  |  |  * cross-pollination, as otherwise the upper and lower halves would be  | 
3677  |  |  * essentially independent.  | 
3678  |  |  *  | 
3679  |  |  * This doesn't matter on 64-bit hashes since they all get merged together in  | 
3680  |  |  * the end, so we skip the extra step.  | 
3681  |  |  *  | 
3682  |  |  * Both XXH3_64bits and XXH3_128bits use this subroutine.  | 
3683  |  |  */  | 
3684  |  |  | 
3685  |  | #if (XXH_VECTOR == XXH_AVX512) \  | 
3686  |  |      || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)  | 
3687  |  |  | 
3688  |  | #ifndef XXH_TARGET_AVX512  | 
3689  |  | # define XXH_TARGET_AVX512  /* disable attribute target */  | 
3690  |  | #endif  | 
3691  |  |  | 
3692  |  | XXH_FORCE_INLINE XXH_TARGET_AVX512 void  | 
3693  |  | XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,  | 
3694  |  |                      const void* XXH_RESTRICT input,  | 
3695  |  |                      const void* XXH_RESTRICT secret)  | 
3696  |  | { | 
3697  |  |     __m512i* const xacc = (__m512i *) acc;  | 
3698  |  |     XXH_ASSERT((((size_t)acc) & 63) == 0);  | 
3699  |  |     XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));  | 
3700  |  |  | 
3701  |  |     { | 
3702  |  |         /* data_vec    = input[0]; */  | 
3703  |  |         __m512i const data_vec    = _mm512_loadu_si512   (input);  | 
3704  |  |         /* key_vec     = secret[0]; */  | 
3705  |  |         __m512i const key_vec     = _mm512_loadu_si512   (secret);  | 
3706  |  |         /* data_key    = data_vec ^ key_vec; */  | 
3707  |  |         __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);  | 
3708  |  |         /* data_key_lo = data_key >> 32; */  | 
3709  |  |         __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));  | 
3710  |  |         /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */  | 
3711  |  |         __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);  | 
3712  |  |         /* xacc[0] += swap(data_vec); */  | 
3713  |  |         __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));  | 
3714  |  |         __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);  | 
3715  |  |         /* xacc[0] += product; */  | 
3716  |  |         *xacc = _mm512_add_epi64(product, sum);  | 
3717  |  |     }  | 
3718  |  | }  | 
3719  |  |  | 
3720  |  | /*  | 
3721  |  |  * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.  | 
3722  |  |  *  | 
3723  |  |  * Multiplication isn't perfect, as explained by Google in HighwayHash:  | 
3724  |  |  *  | 
3725  |  |  *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to  | 
3726  |  |  *  // varying degrees. In descending order of goodness, bytes  | 
3727  |  |  *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.  | 
3728  |  |  *  // As expected, the upper and lower bytes are much worse.  | 
3729  |  |  *  | 
3730  |  |  * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291  | 
3731  |  |  *  | 
3732  |  |  * Since our algorithm uses a pseudorandom secret to add some variance into the  | 
3733  |  |  * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.  | 
3734  |  |  *  | 
3735  |  |  * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid  | 
3736  |  |  * extraction.  | 
3737  |  |  *  | 
3738  |  |  * Both XXH3_64bits and XXH3_128bits use this subroutine.  | 
3739  |  |  */  | 
3740  |  |  | 
3741  |  | XXH_FORCE_INLINE XXH_TARGET_AVX512 void  | 
3742  |  | XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)  | 
3743  |  | { | 
3744  |  |     XXH_ASSERT((((size_t)acc) & 63) == 0);  | 
3745  |  |     XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));  | 
3746  |  |     {   __m512i* const xacc = (__m512i*) acc; | 
3747  |  |         const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);  | 
3748  |  |  | 
3749  |  |         /* xacc[0] ^= (xacc[0] >> 47) */  | 
3750  |  |         __m512i const acc_vec     = *xacc;  | 
3751  |  |         __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);  | 
3752  |  |         __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);  | 
3753  |  |         /* xacc[0] ^= secret; */  | 
3754  |  |         __m512i const key_vec     = _mm512_loadu_si512   (secret);  | 
3755  |  |         __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);  | 
3756  |  |  | 
3757  |  |         /* xacc[0] *= XXH_PRIME32_1; */  | 
3758  |  |         __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));  | 
3759  |  |         __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);  | 
3760  |  |         __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);  | 
3761  |  |         *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));  | 
3762  |  |     }  | 
3763  |  | }  | 
3764  |  |  | 
3765  |  | XXH_FORCE_INLINE XXH_TARGET_AVX512 void  | 
3766  |  | XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)  | 
3767  |  | { | 
3768  |  |     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);  | 
3769  |  |     XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);  | 
3770  |  |     XXH_ASSERT(((size_t)customSecret & 63) == 0);  | 
3771  |  |     (void)(&XXH_writeLE64);  | 
3772  |  |     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); | 
3773  |  |         __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));  | 
3774  |  |  | 
3775  |  |         const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);  | 
3776  |  |               __m512i* const dest = (      __m512i*) customSecret;  | 
3777  |  |         int i;  | 
3778  |  |         XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */  | 
3779  |  |         XXH_ASSERT(((size_t)dest & 63) == 0);  | 
3780  |  |         for (i=0; i < nbRounds; ++i) { | 
3781  |  |             /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',  | 
3782  |  |              * this will warn "discards 'const' qualifier". */  | 
3783  |  |             union { | 
3784  |  |                 const __m512i* cp;  | 
3785  |  |                 void* p;  | 
3786  |  |             } remote_const_void;  | 
3787  |  |             remote_const_void.cp = src + i;  | 
3788  |  |             dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);  | 
3789  |  |     }   }  | 
3790  |  | }  | 
3791  |  |  | 
3792  |  | #endif  | 
3793  |  |  | 
3794  |  | #if (XXH_VECTOR == XXH_AVX2) \  | 
3795  |  |     || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)  | 
3796  |  |  | 
3797  |  | #ifndef XXH_TARGET_AVX2  | 
3798  |  | # define XXH_TARGET_AVX2  /* disable attribute target */  | 
3799  |  | #endif  | 
3800  |  |  | 
3801  |  | XXH_FORCE_INLINE XXH_TARGET_AVX2 void  | 
3802  |  | XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,  | 
3803  |  |                     const void* XXH_RESTRICT input,  | 
3804  |  |                     const void* XXH_RESTRICT secret)  | 
3805  |  | { | 
3806  |  |     XXH_ASSERT((((size_t)acc) & 31) == 0);  | 
3807  |  |     {   __m256i* const xacc    =       (__m256i *) acc; | 
3808  |  |         /* Unaligned. This is mainly for pointer arithmetic, and because  | 
3809  |  |          * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */  | 
3810  |  |         const         __m256i* const xinput  = (const __m256i *) input;  | 
3811  |  |         /* Unaligned. This is mainly for pointer arithmetic, and because  | 
3812  |  |          * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */  | 
3813  |  |         const         __m256i* const xsecret = (const __m256i *) secret;  | 
3814  |  |  | 
3815  |  |         size_t i;  | 
3816  |  |         for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { | 
3817  |  |             /* data_vec    = xinput[i]; */  | 
3818  |  |             __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);  | 
3819  |  |             /* key_vec     = xsecret[i]; */  | 
3820  |  |             __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);  | 
3821  |  |             /* data_key    = data_vec ^ key_vec; */  | 
3822  |  |             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);  | 
3823  |  |             /* data_key_lo = data_key >> 32; */  | 
3824  |  |             __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));  | 
3825  |  |             /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */  | 
3826  |  |             __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);  | 
3827  |  |             /* xacc[i] += swap(data_vec); */  | 
3828  |  |             __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));  | 
3829  |  |             __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);  | 
3830  |  |             /* xacc[i] += product; */  | 
3831  |  |             xacc[i] = _mm256_add_epi64(product, sum);  | 
3832  |  |     }   }  | 
3833  |  | }  | 
3834  |  |  | 
3835  |  | XXH_FORCE_INLINE XXH_TARGET_AVX2 void  | 
3836  |  | XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)  | 
3837  |  | { | 
3838  |  |     XXH_ASSERT((((size_t)acc) & 31) == 0);  | 
3839  |  |     {   __m256i* const xacc = (__m256i*) acc; | 
3840  |  |         /* Unaligned. This is mainly for pointer arithmetic, and because  | 
3841  |  |          * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */  | 
3842  |  |         const         __m256i* const xsecret = (const __m256i *) secret;  | 
3843  |  |         const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);  | 
3844  |  |  | 
3845  |  |         size_t i;  | 
3846  |  |         for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { | 
3847  |  |             /* xacc[i] ^= (xacc[i] >> 47) */  | 
3848  |  |             __m256i const acc_vec     = xacc[i];  | 
3849  |  |             __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);  | 
3850  |  |             __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);  | 
3851  |  |             /* xacc[i] ^= xsecret; */  | 
3852  |  |             __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);  | 
3853  |  |             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);  | 
3854  |  |  | 
3855  |  |             /* xacc[i] *= XXH_PRIME32_1; */  | 
3856  |  |             __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));  | 
3857  |  |             __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);  | 
3858  |  |             __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);  | 
3859  |  |             xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));  | 
3860  |  |         }  | 
3861  |  |     }  | 
3862  |  | }  | 
3863  |  |  | 
3864  |  | XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)  | 
3865  |  | { | 
3866  |  |     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);  | 
3867  |  |     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);  | 
3868  |  |     XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);  | 
3869  |  |     (void)(&XXH_writeLE64);  | 
3870  |  |     XXH_PREFETCH(customSecret);  | 
3871  |  |     {   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64); | 
3872  |  |  | 
3873  |  |         const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);  | 
3874  |  |               __m256i*       dest = (      __m256i*) customSecret;  | 
3875  |  |  | 
3876  |  | #       if defined(__GNUC__) || defined(__clang__)  | 
3877  |  |         /*  | 
3878  |  |          * On GCC & Clang, marking 'dest' as modified will cause the compiler:  | 
3879  |  |          *   - do not extract the secret from sse registers in the internal loop  | 
3880  |  |          *   - use less common registers, and avoid pushing these reg into stack  | 
3881  |  |          */  | 
3882  |  |         XXH_COMPILER_GUARD(dest);  | 
3883  |  | #       endif  | 
3884  |  |         XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */  | 
3885  |  |         XXH_ASSERT(((size_t)dest & 31) == 0);  | 
3886  |  |  | 
3887  |  |         /* GCC -O2 need unroll loop manually */  | 
3888  |  |         dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);  | 
3889  |  |         dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);  | 
3890  |  |         dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);  | 
3891  |  |         dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);  | 
3892  |  |         dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);  | 
3893  |  |         dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);  | 
3894  |  |     }  | 
3895  |  | }  | 
3896  |  |  | 
3897  |  | #endif  | 
3898  |  |  | 
3899  |  | /* x86dispatch always generates SSE2 */  | 
3900  |  | #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)  | 
3901  |  |  | 
3902  |  | #ifndef XXH_TARGET_SSE2  | 
3903  |  | # define XXH_TARGET_SSE2  /* disable attribute target */  | 
3904  |  | #endif  | 
3905  |  |  | 
3906  |  | XXH_FORCE_INLINE XXH_TARGET_SSE2 void  | 
3907  |  | XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,  | 
3908  |  |                     const void* XXH_RESTRICT input,  | 
3909  |  |                     const void* XXH_RESTRICT secret)  | 
3910  |  | { | 
3911  |  |     /* SSE2 is just a half-scale version of the AVX2 version. */  | 
3912  |  |     XXH_ASSERT((((size_t)acc) & 15) == 0);  | 
3913  |  |     {   __m128i* const xacc    =       (__m128i *) acc; | 
3914  |  |         /* Unaligned. This is mainly for pointer arithmetic, and because  | 
3915  |  |          * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */  | 
3916  |  |         const         __m128i* const xinput  = (const __m128i *) input;  | 
3917  |  |         /* Unaligned. This is mainly for pointer arithmetic, and because  | 
3918  |  |          * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */  | 
3919  |  |         const         __m128i* const xsecret = (const __m128i *) secret;  | 
3920  |  |  | 
3921  |  |         size_t i;  | 
3922  |  |         for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { | 
3923  |  |             /* data_vec    = xinput[i]; */  | 
3924  |  |             __m128i const data_vec    = _mm_loadu_si128   (xinput+i);  | 
3925  |  |             /* key_vec     = xsecret[i]; */  | 
3926  |  |             __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);  | 
3927  |  |             /* data_key    = data_vec ^ key_vec; */  | 
3928  |  |             __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);  | 
3929  |  |             /* data_key_lo = data_key >> 32; */  | 
3930  |  |             __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));  | 
3931  |  |             /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */  | 
3932  |  |             __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);  | 
3933  |  |             /* xacc[i] += swap(data_vec); */  | 
3934  |  |             __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));  | 
3935  |  |             __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);  | 
3936  |  |             /* xacc[i] += product; */  | 
3937  |  |             xacc[i] = _mm_add_epi64(product, sum);  | 
3938  |  |     }   }  | 
3939  |  | }  | 
3940  |  |  | 
3941  |  | XXH_FORCE_INLINE XXH_TARGET_SSE2 void  | 
3942  |  | XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)  | 
3943  |  | { | 
3944  |  |     XXH_ASSERT((((size_t)acc) & 15) == 0);  | 
3945  |  |     {   __m128i* const xacc = (__m128i*) acc; | 
3946  |  |         /* Unaligned. This is mainly for pointer arithmetic, and because  | 
3947  |  |          * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */  | 
3948  |  |         const         __m128i* const xsecret = (const __m128i *) secret;  | 
3949  |  |         const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);  | 
3950  |  |  | 
3951  |  |         size_t i;  | 
3952  |  |         for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { | 
3953  |  |             /* xacc[i] ^= (xacc[i] >> 47) */  | 
3954  |  |             __m128i const acc_vec     = xacc[i];  | 
3955  |  |             __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);  | 
3956  |  |             __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);  | 
3957  |  |             /* xacc[i] ^= xsecret[i]; */  | 
3958  |  |             __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);  | 
3959  |  |             __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);  | 
3960  |  |  | 
3961  |  |             /* xacc[i] *= XXH_PRIME32_1; */  | 
3962  |  |             __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));  | 
3963  |  |             __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);  | 
3964  |  |             __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);  | 
3965  |  |             xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));  | 
3966  |  |         }  | 
3967  |  |     }  | 
3968  |  | }  | 
3969  |  |  | 
3970  |  | XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)  | 
3971  |  | { | 
3972  |  |     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);  | 
3973  |  |     (void)(&XXH_writeLE64);  | 
3974  |  |     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); | 
3975  |  |  | 
3976  |  | #       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900  | 
3977  |  |         /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */  | 
3978  |  |         XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) }; | 
3979  |  |         __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);  | 
3980  |  | #       else  | 
3981  |  |         __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);  | 
3982  |  | #       endif  | 
3983  |  |         int i;  | 
3984  |  |  | 
3985  |  |         const void* const src16 = XXH3_kSecret;  | 
3986  |  |         __m128i* dst16 = (__m128i*) customSecret;  | 
3987  |  | #       if defined(__GNUC__) || defined(__clang__)  | 
3988  |  |         /*  | 
3989  |  |          * On GCC & Clang, marking 'dest' as modified will cause the compiler:  | 
3990  |  |          *   - do not extract the secret from sse registers in the internal loop  | 
3991  |  |          *   - use less common registers, and avoid pushing these reg into stack  | 
3992  |  |          */  | 
3993  |  |         XXH_COMPILER_GUARD(dst16);  | 
3994  |  | #       endif  | 
3995  |  |         XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */  | 
3996  |  |         XXH_ASSERT(((size_t)dst16 & 15) == 0);  | 
3997  |  |  | 
3998  |  |         for (i=0; i < nbRounds; ++i) { | 
3999  |  |             dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);  | 
4000  |  |     }   }  | 
4001  |  | }  | 
4002  |  |  | 
4003  |  | #endif  | 
4004  |  |  | 
4005  |  | #if (XXH_VECTOR == XXH_NEON)  | 
4006  |  |  | 
4007  |  | XXH_FORCE_INLINE void  | 
4008  |  | XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,  | 
4009  |  |                     const void* XXH_RESTRICT input,  | 
4010  |  |                     const void* XXH_RESTRICT secret)  | 
4011  |  | { | 
4012  |  |     XXH_ASSERT((((size_t)acc) & 15) == 0);  | 
4013  |  |     { | 
4014  |  |         uint64x2_t* const xacc = (uint64x2_t *) acc;  | 
4015  |  |         /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */  | 
4016  |  |         uint8_t const* const xinput = (const uint8_t *) input;  | 
4017  |  |         uint8_t const* const xsecret  = (const uint8_t *) secret;  | 
4018  |  |  | 
4019  |  |         size_t i;  | 
4020  |  |         for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) { | 
4021  |  |             /* data_vec = xinput[i]; */  | 
4022  |  |             uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));  | 
4023  |  |             /* key_vec  = xsecret[i];  */  | 
4024  |  |             uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));  | 
4025  |  |             uint64x2_t data_key;  | 
4026  |  |             uint32x2_t data_key_lo, data_key_hi;  | 
4027  |  |             /* xacc[i] += swap(data_vec); */  | 
4028  |  |             uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);  | 
4029  |  |             uint64x2_t const swapped = vextq_u64(data64, data64, 1);  | 
4030  |  |             xacc[i] = vaddq_u64 (xacc[i], swapped);  | 
4031  |  |             /* data_key = data_vec ^ key_vec; */  | 
4032  |  |             data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));  | 
4033  |  |             /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);  | 
4034  |  |              * data_key_hi = (uint32x2_t) (data_key >> 32);  | 
4035  |  |              * data_key = UNDEFINED; */  | 
4036  |  |             XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);  | 
4037  |  |             /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */  | 
4038  |  |             xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);  | 
4039  |  |  | 
4040  |  |         }  | 
4041  |  |     }  | 
4042  |  | }  | 
4043  |  |  | 
4044  |  | XXH_FORCE_INLINE void  | 
4045  |  | XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)  | 
4046  |  | { | 
4047  |  |     XXH_ASSERT((((size_t)acc) & 15) == 0);  | 
4048  |  |  | 
4049  |  |     {   uint64x2_t* xacc       = (uint64x2_t*) acc; | 
4050  |  |         uint8_t const* xsecret = (uint8_t const*) secret;  | 
4051  |  |         uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);  | 
4052  |  |  | 
4053  |  |         size_t i;  | 
4054  |  |         for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) { | 
4055  |  |             /* xacc[i] ^= (xacc[i] >> 47); */  | 
4056  |  |             uint64x2_t acc_vec  = xacc[i];  | 
4057  |  |             uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);  | 
4058  |  |             uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);  | 
4059  |  |  | 
4060  |  |             /* xacc[i] ^= xsecret[i]; */  | 
4061  |  |             uint8x16_t key_vec  = vld1q_u8    (xsecret + (i * 16));  | 
4062  |  |             uint64x2_t data_key = veorq_u64   (data_vec, vreinterpretq_u64_u8(key_vec));  | 
4063  |  |  | 
4064  |  |             /* xacc[i] *= XXH_PRIME32_1 */  | 
4065  |  |             uint32x2_t data_key_lo, data_key_hi;  | 
4066  |  |             /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);  | 
4067  |  |              * data_key_hi = (uint32x2_t) (xacc[i] >> 32);  | 
4068  |  |              * xacc[i] = UNDEFINED; */  | 
4069  |  |             XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);  | 
4070  |  |             {   /* | 
4071  |  |                  * prod_hi = (data_key >> 32) * XXH_PRIME32_1;  | 
4072  |  |                  *  | 
4073  |  |                  * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will  | 
4074  |  |                  * incorrectly "optimize" this:  | 
4075  |  |                  *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));  | 
4076  |  |                  *   shifted = vshll_n_u32(tmp, 32);  | 
4077  |  |                  * to this:  | 
4078  |  |                  *   tmp     = "vmulq_u64"(a, b); // no such thing!  | 
4079  |  |                  *   shifted = vshlq_n_u64(tmp, 32);  | 
4080  |  |                  *  | 
4081  |  |                  * However, unlike SSE, Clang lacks a 64-bit multiply routine  | 
4082  |  |                  * for NEON, and it scalarizes two 64-bit multiplies instead.  | 
4083  |  |                  *  | 
4084  |  |                  * vmull_u32 has the same timing as vmul_u32, and it avoids  | 
4085  |  |                  * this bug completely.  | 
4086  |  |                  * See https://bugs.llvm.org/show_bug.cgi?id=39967  | 
4087  |  |                  */  | 
4088  |  |                 uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);  | 
4089  |  |                 /* xacc[i] = prod_hi << 32; */  | 
4090  |  |                 xacc[i] = vshlq_n_u64(prod_hi, 32);  | 
4091  |  |                 /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */  | 
4092  |  |                 xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);  | 
4093  |  |             }  | 
4094  |  |     }   }  | 
4095  |  | }  | 
4096  |  |  | 
4097  |  | #endif  | 
4098  |  |  | 
4099  |  | #if (XXH_VECTOR == XXH_VSX)  | 
4100  |  |  | 
4101  |  | XXH_FORCE_INLINE void  | 
4102  |  | XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,  | 
4103  |  |                     const void* XXH_RESTRICT input,  | 
4104  |  |                     const void* XXH_RESTRICT secret)  | 
4105  |  | { | 
4106  |  |     /* presumed aligned */  | 
4107  |  |     unsigned long long* const xacc = (unsigned long long*) acc;  | 
4108  |  |     xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */  | 
4109  |  |     xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */  | 
4110  |  |     xxh_u64x2 const v32 = { 32, 32 }; | 
4111  |  |     size_t i;  | 
4112  |  |     for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { | 
4113  |  |         /* data_vec = xinput[i]; */  | 
4114  |  |         xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);  | 
4115  |  |         /* key_vec = xsecret[i]; */  | 
4116  |  |         xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);  | 
4117  |  |         xxh_u64x2 const data_key = data_vec ^ key_vec;  | 
4118  |  |         /* shuffled = (data_key << 32) | (data_key >> 32); */  | 
4119  |  |         xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);  | 
4120  |  |         /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */  | 
4121  |  |         xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);  | 
4122  |  |         /* acc_vec = xacc[i]; */  | 
4123  |  |         xxh_u64x2 acc_vec        = vec_xl(0, xacc + 2 * i);  | 
4124  |  |         acc_vec += product;  | 
4125  |  |  | 
4126  |  |         /* swap high and low halves */  | 
4127  |  | #ifdef __s390x__  | 
4128  |  |         acc_vec += vec_permi(data_vec, data_vec, 2);  | 
4129  |  | #else  | 
4130  |  |         acc_vec += vec_xxpermdi(data_vec, data_vec, 2);  | 
4131  |  | #endif  | 
4132  |  |         /* xacc[i] = acc_vec; */  | 
4133  |  |         vec_xst(acc_vec, 0, xacc + 2 * i);  | 
4134  |  |     }  | 
4135  |  | }  | 
4136  |  |  | 
4137  |  | XXH_FORCE_INLINE void  | 
4138  |  | XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)  | 
4139  |  | { | 
4140  |  |     XXH_ASSERT((((size_t)acc) & 15) == 0);  | 
4141  |  |  | 
4142  |  |     {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc; | 
4143  |  |         const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;  | 
4144  |  |         /* constants */  | 
4145  |  |         xxh_u64x2 const v32  = { 32, 32 }; | 
4146  |  |         xxh_u64x2 const v47 = { 47, 47 }; | 
4147  |  |         xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 }; | 
4148  |  |         size_t i;  | 
4149  |  |         for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { | 
4150  |  |             /* xacc[i] ^= (xacc[i] >> 47); */  | 
4151  |  |             xxh_u64x2 const acc_vec  = xacc[i];  | 
4152  |  |             xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);  | 
4153  |  |  | 
4154  |  |             /* xacc[i] ^= xsecret[i]; */  | 
4155  |  |             xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);  | 
4156  |  |             xxh_u64x2 const data_key = data_vec ^ key_vec;  | 
4157  |  |  | 
4158  |  |             /* xacc[i] *= XXH_PRIME32_1 */  | 
4159  |  |             /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */  | 
4160  |  |             xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);  | 
4161  |  |             /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */  | 
4162  |  |             xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);  | 
4163  |  |             xacc[i] = prod_odd + (prod_even << v32);  | 
4164  |  |     }   }  | 
4165  |  | }  | 
4166  |  |  | 
4167  |  | #endif  | 
4168  |  |  | 
4169  |  | /* scalar variants - universal */  | 
4170  |  |  | 
4171  |  | XXH_FORCE_INLINE void  | 
4172  |  | XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,  | 
4173  |  |                      const void* XXH_RESTRICT input,  | 
4174  |  |                      const void* XXH_RESTRICT secret)  | 
4175  |  | { | 
4176  |  |     xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */  | 
4177  |  |     const xxh_u8* const xinput  = (const xxh_u8*) input;  /* no alignment restriction */  | 
4178  |  |     const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */  | 
4179  |  |     size_t i;  | 
4180  |  |     XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);  | 
4181  |  |     for (i=0; i < XXH_ACC_NB; i++) { | 
4182  |  |         xxh_u64 const data_val = XXH_readLE64(xinput + 8*i);  | 
4183  |  |         xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);  | 
4184  |  |         xacc[i ^ 1] += data_val; /* swap adjacent lanes */  | 
4185  |  |         xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);  | 
4186  |  |     }  | 
4187  |  | }  | 
4188  |  |  | 
4189  |  | XXH_FORCE_INLINE void  | 
4190  |  | XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)  | 
4191  |  | { | 
4192  |  |     xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */  | 
4193  |  |     const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */  | 
4194  |  |     size_t i;  | 
4195  |  |     XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);  | 
4196  |  |     for (i=0; i < XXH_ACC_NB; i++) { | 
4197  |  |         xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i);  | 
4198  |  |         xxh_u64 acc64 = xacc[i];  | 
4199  |  |         acc64 = XXH_xorshift64(acc64, 47);  | 
4200  |  |         acc64 ^= key64;  | 
4201  |  |         acc64 *= XXH_PRIME32_1;  | 
4202  |  |         xacc[i] = acc64;  | 
4203  |  |     }  | 
4204  |  | }  | 
4205  |  |  | 
4206  |  | XXH_FORCE_INLINE void  | 
4207  |  | XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)  | 
4208  |  | { | 
4209  |  |     /*  | 
4210  |  |      * We need a separate pointer for the hack below,  | 
4211  |  |      * which requires a non-const pointer.  | 
4212  |  |      * Any decent compiler will optimize this out otherwise.  | 
4213  |  |      */  | 
4214  |  |     const xxh_u8* kSecretPtr = XXH3_kSecret;  | 
4215  |  |     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);  | 
4216  |  |  | 
4217  |  | #if defined(__clang__) && defined(__aarch64__)  | 
4218  |  |     /*  | 
4219  |  |      * UGLY HACK:  | 
4220  |  |      * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are  | 
4221  |  |      * placed sequentially, in order, at the top of the unrolled loop.  | 
4222  |  |      *  | 
4223  |  |      * While MOVK is great for generating constants (2 cycles for a 64-bit  | 
4224  |  |      * constant compared to 4 cycles for LDR), long MOVK chains stall the  | 
4225  |  |      * integer pipelines:  | 
4226  |  |      *   I   L   S  | 
4227  |  |      * MOVK  | 
4228  |  |      * MOVK  | 
4229  |  |      * MOVK  | 
4230  |  |      * MOVK  | 
4231  |  |      * ADD  | 
4232  |  |      * SUB      STR  | 
4233  |  |      *          STR  | 
4234  |  |      * By forcing loads from memory (as the asm line causes Clang to assume  | 
4235  |  |      * that XXH3_kSecretPtr has been changed), the pipelines are used more  | 
4236  |  |      * efficiently:  | 
4237  |  |      *   I   L   S  | 
4238  |  |      *      LDR  | 
4239  |  |      *  ADD LDR  | 
4240  |  |      *  SUB     STR  | 
4241  |  |      *          STR  | 
4242  |  |      * XXH3_64bits_withSeed, len == 256, Snapdragon 835  | 
4243  |  |      *   without hack: 2654.4 MB/s  | 
4244  |  |      *   with hack:    3202.9 MB/s  | 
4245  |  |      */  | 
4246  |  |     XXH_COMPILER_GUARD(kSecretPtr);  | 
4247  |  | #endif  | 
4248  |  |     /*  | 
4249  |  |      * Note: in debug mode, this overrides the asm optimization  | 
4250  |  |      * and Clang will emit MOVK chains again.  | 
4251  |  |      */  | 
4252  |  |     XXH_ASSERT(kSecretPtr == XXH3_kSecret);  | 
4253  |  |  | 
4254  |  |     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; | 
4255  |  |         int i;  | 
4256  |  |         for (i=0; i < nbRounds; i++) { | 
4257  |  |             /*  | 
4258  |  |              * The asm hack causes Clang to assume that kSecretPtr aliases with  | 
4259  |  |              * customSecret, and on aarch64, this prevented LDP from merging two  | 
4260  |  |              * loads together for free. Putting the loads together before the stores  | 
4261  |  |              * properly generates LDP.  | 
4262  |  |              */  | 
4263  |  |             xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;  | 
4264  |  |             xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;  | 
4265  |  |             XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);  | 
4266  |  |             XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);  | 
4267  |  |     }   }  | 
4268  |  | }  | 
4269  |  |  | 
4270  |  |  | 
4271  |  | typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);  | 
4272  |  | typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);  | 
4273  |  | typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);  | 
4274  |  |  | 
4275  |  |  | 
4276  |  | #if (XXH_VECTOR == XXH_AVX512)  | 
4277  |  |  | 
4278  |  | #define XXH3_accumulate_512 XXH3_accumulate_512_avx512  | 
4279  |  | #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512  | 
4280  |  | #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512  | 
4281  |  |  | 
4282  |  | #elif (XXH_VECTOR == XXH_AVX2)  | 
4283  |  |  | 
4284  |  | #define XXH3_accumulate_512 XXH3_accumulate_512_avx2  | 
4285  |  | #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2  | 
4286  |  | #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2  | 
4287  |  |  | 
4288  |  | #elif (XXH_VECTOR == XXH_SSE2)  | 
4289  |  |  | 
4290  |  | #define XXH3_accumulate_512 XXH3_accumulate_512_sse2  | 
4291  |  | #define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2  | 
4292  |  | #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2  | 
4293  |  |  | 
4294  |  | #elif (XXH_VECTOR == XXH_NEON)  | 
4295  |  |  | 
4296  |  | #define XXH3_accumulate_512 XXH3_accumulate_512_neon  | 
4297  |  | #define XXH3_scrambleAcc    XXH3_scrambleAcc_neon  | 
4298  |  | #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar  | 
4299  |  |  | 
4300  |  | #elif (XXH_VECTOR == XXH_VSX)  | 
4301  |  |  | 
4302  |  | #define XXH3_accumulate_512 XXH3_accumulate_512_vsx  | 
4303  |  | #define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx  | 
4304  |  | #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar  | 
4305  |  |  | 
4306  |  | #else /* scalar */  | 
4307  |  |  | 
4308  |  | #define XXH3_accumulate_512 XXH3_accumulate_512_scalar  | 
4309  |  | #define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar  | 
4310  |  | #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar  | 
4311  |  |  | 
4312  |  | #endif  | 
4313  |  |  | 
4314  |  |  | 
4315  |  |  | 
4316  |  | #ifndef XXH_PREFETCH_DIST  | 
4317  |  | #  ifdef __clang__  | 
4318  |  | #    define XXH_PREFETCH_DIST 320  | 
4319  |  | #  else  | 
4320  |  | #    if (XXH_VECTOR == XXH_AVX512)  | 
4321  |  | #      define XXH_PREFETCH_DIST 512  | 
4322  |  | #    else  | 
4323  |  | #      define XXH_PREFETCH_DIST 384  | 
4324  |  | #    endif  | 
4325  |  | #  endif  /* __clang__ */  | 
4326  |  | #endif  /* XXH_PREFETCH_DIST */  | 
4327  |  |  | 
4328  |  | /*  | 
4329  |  |  * XXH3_accumulate()  | 
4330  |  |  * Loops over XXH3_accumulate_512().  | 
4331  |  |  * Assumption: nbStripes will not overflow the secret size  | 
4332  |  |  */  | 
4333  |  | XXH_FORCE_INLINE void  | 
4334  |  | XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,  | 
4335  |  |                 const xxh_u8* XXH_RESTRICT input,  | 
4336  |  |                 const xxh_u8* XXH_RESTRICT secret,  | 
4337  |  |                       size_t nbStripes,  | 
4338  |  |                       XXH3_f_accumulate_512 f_acc512)  | 
4339  |  | { | 
4340  |  |     size_t n;  | 
4341  |  |     for (n = 0; n < nbStripes; n++ ) { | 
4342  |  |         const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  | 
4343  |  |         XXH_PREFETCH(in + XXH_PREFETCH_DIST);  | 
4344  |  |         f_acc512(acc,  | 
4345  |  |                  in,  | 
4346  |  |                  secret + n*XXH_SECRET_CONSUME_RATE);  | 
4347  |  |     }  | 
4348  |  | }  | 
4349  |  |  | 
4350  |  | XXH_FORCE_INLINE void  | 
4351  |  | XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,  | 
4352  |  |                       const xxh_u8* XXH_RESTRICT input, size_t len,  | 
4353  |  |                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,  | 
4354  |  |                             XXH3_f_accumulate_512 f_acc512,  | 
4355  |  |                             XXH3_f_scrambleAcc f_scramble)  | 
4356  |  | { | 
4357  |  |     size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;  | 
4358  |  |     size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;  | 
4359  |  |     size_t const nb_blocks = (len - 1) / block_len;  | 
4360  |  |  | 
4361  |  |     size_t n;  | 
4362  |  |  | 
4363  |  |     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);  | 
4364  |  |  | 
4365  |  |     for (n = 0; n < nb_blocks; n++) { | 
4366  |  |         XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);  | 
4367  |  |         f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);  | 
4368  |  |     }  | 
4369  |  |  | 
4370  |  |     /* last partial block */  | 
4371  |  |     XXH_ASSERT(len > XXH_STRIPE_LEN);  | 
4372  |  |     {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; | 
4373  |  |         XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));  | 
4374  |  |         XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);  | 
4375  |  |  | 
4376  |  |         /* last stripe */  | 
4377  |  |         {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN; | 
4378  |  | #define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */  | 
4379  |  |             f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);  | 
4380  |  |     }   }  | 
4381  |  | }  | 
4382  |  |  | 
4383  |  | XXH_FORCE_INLINE xxh_u64  | 
4384  |  | XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)  | 
4385  |  | { | 
4386  |  |     return XXH3_mul128_fold64(  | 
4387  |  |                acc[0] ^ XXH_readLE64(secret),  | 
4388  |  |                acc[1] ^ XXH_readLE64(secret+8) );  | 
4389  |  | }  | 
4390  |  |  | 
4391  |  | static XXH64_hash_t  | 
4392  |  | XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)  | 
4393  |  | { | 
4394  |  |     xxh_u64 result64 = start;  | 
4395  |  |     size_t i = 0;  | 
4396  |  |  | 
4397  |  |     for (i = 0; i < 4; i++) { | 
4398  |  |         result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);  | 
4399  |  | #if defined(__clang__)                                /* Clang */ \  | 
4400  |  |     && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \  | 
4401  |  |     && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \  | 
4402  |  |     && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */  | 
4403  |  |         /*  | 
4404  |  |          * UGLY HACK:  | 
4405  |  |          * Prevent autovectorization on Clang ARMv7-a. Exact same problem as  | 
4406  |  |          * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.  | 
4407  |  |          * XXH3_64bits, len == 256, Snapdragon 835:  | 
4408  |  |          *   without hack: 2063.7 MB/s  | 
4409  |  |          *   with hack:    2560.7 MB/s  | 
4410  |  |          */  | 
4411  |  |         XXH_COMPILER_GUARD(result64);  | 
4412  |  | #endif  | 
4413  |  |     }  | 
4414  |  |  | 
4415  |  |     return XXH3_avalanche(result64);  | 
4416  |  | }  | 
4417  |  |  | 
4418  |  | #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ | 
4419  |  |                         XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }  | 
4420  |  |  | 
4421  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
4422  |  | XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,  | 
4423  |  |                            const void* XXH_RESTRICT secret, size_t secretSize,  | 
4424  |  |                            XXH3_f_accumulate_512 f_acc512,  | 
4425  |  |                            XXH3_f_scrambleAcc f_scramble)  | 
4426  |  | { | 
4427  |  |     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;  | 
4428  |  |  | 
4429  |  |     XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);  | 
4430  |  |  | 
4431  |  |     /* converge into final hash */  | 
4432  |  |     XXH_STATIC_ASSERT(sizeof(acc) == 64);  | 
4433  |  |     /* do not align on 8, so that the secret is different from the accumulator */  | 
4434  |  | #define XXH_SECRET_MERGEACCS_START 11  | 
4435  |  |     XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);  | 
4436  |  |     return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);  | 
4437  |  | }  | 
4438  |  |  | 
4439  |  | /*  | 
4440  |  |  * It's important for performance to transmit secret's size (when it's static)  | 
4441  |  |  * so that the compiler can properly optimize the vectorized loop.  | 
4442  |  |  * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.  | 
4443  |  |  */  | 
4444  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
4445  |  | XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,  | 
4446  |  |                              XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)  | 
4447  |  | { | 
4448  |  |     (void)seed64;  | 
4449  |  |     return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);  | 
4450  |  | }  | 
4451  |  |  | 
4452  |  | /*  | 
4453  |  |  * It's preferable for performance that XXH3_hashLong is not inlined,  | 
4454  |  |  * as it results in a smaller function for small data, easier to the instruction cache.  | 
4455  |  |  * Note that inside this no_inline function, we do inline the internal loop,  | 
4456  |  |  * and provide a statically defined secret size to allow optimization of vector loop.  | 
4457  |  |  */  | 
4458  |  | XXH_NO_INLINE XXH64_hash_t  | 
4459  |  | XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,  | 
4460  |  |                           XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)  | 
4461  |  | { | 
4462  |  |     (void)seed64; (void)secret; (void)secretLen;  | 
4463  |  |     return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);  | 
4464  |  | }  | 
4465  |  |  | 
4466  |  | /*  | 
4467  |  |  * XXH3_hashLong_64b_withSeed():  | 
4468  |  |  * Generate a custom key based on alteration of default XXH3_kSecret with the seed,  | 
4469  |  |  * and then use this key for long mode hashing.  | 
4470  |  |  *  | 
4471  |  |  * This operation is decently fast but nonetheless costs a little bit of time.  | 
4472  |  |  * Try to avoid it whenever possible (typically when seed==0).  | 
4473  |  |  *  | 
4474  |  |  * It's important for performance that XXH3_hashLong is not inlined. Not sure  | 
4475  |  |  * why (uop cache maybe?), but the difference is large and easily measurable.  | 
4476  |  |  */  | 
4477  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
4478  |  | XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,  | 
4479  |  |                                     XXH64_hash_t seed,  | 
4480  |  |                                     XXH3_f_accumulate_512 f_acc512,  | 
4481  |  |                                     XXH3_f_scrambleAcc f_scramble,  | 
4482  |  |                                     XXH3_f_initCustomSecret f_initSec)  | 
4483  |  | { | 
4484  |  |     if (seed == 0)  | 
4485  |  |         return XXH3_hashLong_64b_internal(input, len,  | 
4486  |  |                                           XXH3_kSecret, sizeof(XXH3_kSecret),  | 
4487  |  |                                           f_acc512, f_scramble);  | 
4488  |  |     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; | 
4489  |  |         f_initSec(secret, seed);  | 
4490  |  |         return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),  | 
4491  |  |                                           f_acc512, f_scramble);  | 
4492  |  |     }  | 
4493  |  | }  | 
4494  |  |  | 
4495  |  | /*  | 
4496  |  |  * It's important for performance that XXH3_hashLong is not inlined.  | 
4497  |  |  */  | 
4498  |  | XXH_NO_INLINE XXH64_hash_t  | 
4499  |  | XXH3_hashLong_64b_withSeed(const void* input, size_t len,  | 
4500  |  |                            XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)  | 
4501  |  | { | 
4502  |  |     (void)secret; (void)secretLen;  | 
4503  |  |     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,  | 
4504  |  |                 XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);  | 
4505  |  | }  | 
4506  |  |  | 
4507  |  |  | 
4508  |  | typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,  | 
4509  |  |                                           XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);  | 
4510  |  |  | 
4511  |  | XXH_FORCE_INLINE XXH64_hash_t  | 
4512  |  | XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,  | 
4513  |  |                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,  | 
4514  |  |                      XXH3_hashLong64_f f_hashLong)  | 
4515  |  | { | 
4516  |  |     XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);  | 
4517  |  |     /*  | 
4518  |  |      * If an action is to be taken if `secretLen` condition is not respected,  | 
4519  |  |      * it should be done here.  | 
4520  |  |      * For now, it's a contract pre-condition.  | 
4521  |  |      * Adding a check and a branch here would cost performance at every hash.  | 
4522  |  |      * Also, note that function signature doesn't offer room to return an error.  | 
4523  |  |      */  | 
4524  |  |     if (len <= 16)  | 
4525  |  |         return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);  | 
4526  |  |     if (len <= 128)  | 
4527  |  |         return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);  | 
4528  |  |     if (len <= XXH3_MIDSIZE_MAX)  | 
4529  |  |         return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);  | 
4530  |  |     return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);  | 
4531  |  | }  | 
4532  |  |  | 
4533  |  |  | 
4534  |  | /* ===   Public entry point   === */  | 
4535  |  |  | 
4536  |  | /*! @ingroup xxh3_family */  | 
4537  |  | XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)  | 
4538  |  | { | 
4539  |  |     return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);  | 
4540  |  | }  | 
4541  |  |  | 
4542  |  | /*! @ingroup xxh3_family */  | 
4543  |  | XXH_PUBLIC_API XXH64_hash_t  | 
4544  |  | XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)  | 
4545  |  | { | 
4546  |  |     return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);  | 
4547  |  | }  | 
4548  |  |  | 
4549  |  | /*! @ingroup xxh3_family */  | 
4550  |  | XXH_PUBLIC_API XXH64_hash_t  | 
4551  |  | XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)  | 
4552  |  | { | 
4553  |  |     return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);  | 
4554  |  | }  | 
4555  |  |  | 
4556  |  | XXH_PUBLIC_API XXH64_hash_t  | 
4557  |  | XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)  | 
4558  |  | { | 
4559  |  |     if (len <= XXH3_MIDSIZE_MAX)  | 
4560  |  |         return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);  | 
4561  |  |     return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);  | 
4562  |  | }  | 
4563  |  |  | 
4564  |  |  | 
4565  |  | /* ===   XXH3 streaming   === */  | 
4566  |  |  | 
4567  |  | /*  | 
4568  |  |  * Malloc's a pointer that is always aligned to align.  | 
4569  |  |  *  | 
4570  |  |  * This must be freed with `XXH_alignedFree()`.  | 
4571  |  |  *  | 
4572  |  |  * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte  | 
4573  |  |  * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2  | 
4574  |  |  * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.  | 
4575  |  |  *  | 
4576  |  |  * This underalignment previously caused a rather obvious crash which went  | 
4577  |  |  * completely unnoticed due to XXH3_createState() not actually being tested.  | 
4578  |  |  * Credit to RedSpah for noticing this bug.  | 
4579  |  |  *  | 
4580  |  |  * The alignment is done manually: Functions like posix_memalign or _mm_malloc  | 
4581  |  |  * are avoided: To maintain portability, we would have to write a fallback  | 
4582  |  |  * like this anyways, and besides, testing for the existence of library  | 
4583  |  |  * functions without relying on external build tools is impossible.  | 
4584  |  |  *  | 
4585  |  |  * The method is simple: Overallocate, manually align, and store the offset  | 
4586  |  |  * to the original behind the returned pointer.  | 
4587  |  |  *  | 
4588  |  |  * Align must be a power of 2 and 8 <= align <= 128.  | 
4589  |  |  */  | 
4590  |  | static void* XXH_alignedMalloc(size_t s, size_t align)  | 
4591  |  | { | 
4592  |  |     XXH_ASSERT(align <= 128 && align >= 8); /* range check */  | 
4593  |  |     XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */  | 
4594  |  |     XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */  | 
4595  |  |     {   /* Overallocate to make room for manual realignment and an offset byte */ | 
4596  |  |         xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);  | 
4597  |  |         if (base != NULL) { | 
4598  |  |             /*  | 
4599  |  |              * Get the offset needed to align this pointer.  | 
4600  |  |              *  | 
4601  |  |              * Even if the returned pointer is aligned, there will always be  | 
4602  |  |              * at least one byte to store the offset to the original pointer.  | 
4603  |  |              */  | 
4604  |  |             size_t offset = align - ((size_t)base & (align - 1)); /* base % align */  | 
4605  |  |             /* Add the offset for the now-aligned pointer */  | 
4606  |  |             xxh_u8* ptr = base + offset;  | 
4607  |  |  | 
4608  |  |             XXH_ASSERT((size_t)ptr % align == 0);  | 
4609  |  |  | 
4610  |  |             /* Store the offset immediately before the returned pointer. */  | 
4611  |  |             ptr[-1] = (xxh_u8)offset;  | 
4612  |  |             return ptr;  | 
4613  |  |         }  | 
4614  |  |         return NULL;  | 
4615  |  |     }  | 
4616  |  | }  | 
4617  |  | /*  | 
4618  |  |  * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass  | 
4619  |  |  * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.  | 
4620  |  |  */  | 
4621  |  | static void XXH_alignedFree(void* p)  | 
4622  |  | { | 
4623  |  |     if (p != NULL) { | 
4624  |  |         xxh_u8* ptr = (xxh_u8*)p;  | 
4625  |  |         /* Get the offset byte we added in XXH_malloc. */  | 
4626  |  |         xxh_u8 offset = ptr[-1];  | 
4627  |  |         /* Free the original malloc'd pointer */  | 
4628  |  |         xxh_u8* base = ptr - offset;  | 
4629  |  |         XXH_free(base);  | 
4630  |  |     }  | 
4631  |  | }  | 
4632  |  | /*! @ingroup xxh3_family */  | 
4633  |  | XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)  | 
4634  |  | { | 
4635  |  |     XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);  | 
4636  |  |     if (state==NULL) return NULL;  | 
4637  |  |     XXH3_INITSTATE(state);  | 
4638  |  |     return state;  | 
4639  |  | }  | 
4640  |  |  | 
4641  |  | /*! @ingroup xxh3_family */  | 
4642  |  | XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)  | 
4643  |  | { | 
4644  |  |     XXH_alignedFree(statePtr);  | 
4645  |  |     return XXH_OK;  | 
4646  |  | }  | 
4647  |  |  | 
4648  |  | /*! @ingroup xxh3_family */  | 
4649  |  | XXH_PUBLIC_API void  | 
4650  |  | XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)  | 
4651  |  | { | 
4652  |  |     XXH_memcpy(dst_state, src_state, sizeof(*dst_state));  | 
4653  |  | }  | 
4654  |  |  | 
4655  |  | static void  | 
4656  |  | XXH3_reset_internal(XXH3_state_t* statePtr,  | 
4657  |  |                     XXH64_hash_t seed,  | 
4658  |  |                     const void* secret, size_t secretSize)  | 
4659  |  | { | 
4660  |  |     size_t const initStart = offsetof(XXH3_state_t, bufferedSize);  | 
4661  |  |     size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;  | 
4662  |  |     XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);  | 
4663  |  |     XXH_ASSERT(statePtr != NULL);  | 
4664  |  |     /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */  | 
4665  |  |     memset((char*)statePtr + initStart, 0, initLength);  | 
4666  |  |     statePtr->acc[0] = XXH_PRIME32_3;  | 
4667  |  |     statePtr->acc[1] = XXH_PRIME64_1;  | 
4668  |  |     statePtr->acc[2] = XXH_PRIME64_2;  | 
4669  |  |     statePtr->acc[3] = XXH_PRIME64_3;  | 
4670  |  |     statePtr->acc[4] = XXH_PRIME64_4;  | 
4671  |  |     statePtr->acc[5] = XXH_PRIME32_2;  | 
4672  |  |     statePtr->acc[6] = XXH_PRIME64_5;  | 
4673  |  |     statePtr->acc[7] = XXH_PRIME32_1;  | 
4674  |  |     statePtr->seed = seed;  | 
4675  |  |     statePtr->useSeed = (seed != 0);  | 
4676  |  |     statePtr->extSecret = (const unsigned char*)secret;  | 
4677  |  |     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);  | 
4678  |  |     statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;  | 
4679  |  |     statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;  | 
4680  |  | }  | 
4681  |  |  | 
4682  |  | /*! @ingroup xxh3_family */  | 
4683  |  | XXH_PUBLIC_API XXH_errorcode  | 
4684  |  | XXH3_64bits_reset(XXH3_state_t* statePtr)  | 
4685  |  | { | 
4686  |  |     if (statePtr == NULL) return XXH_ERROR;  | 
4687  |  |     XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);  | 
4688  |  |     return XXH_OK;  | 
4689  |  | }  | 
4690  |  |  | 
4691  |  | /*! @ingroup xxh3_family */  | 
4692  |  | XXH_PUBLIC_API XXH_errorcode  | 
4693  |  | XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)  | 
4694  |  | { | 
4695  |  |     if (statePtr == NULL) return XXH_ERROR;  | 
4696  |  |     XXH3_reset_internal(statePtr, 0, secret, secretSize);  | 
4697  |  |     if (secret == NULL) return XXH_ERROR;  | 
4698  |  |     if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;  | 
4699  |  |     return XXH_OK;  | 
4700  |  | }  | 
4701  |  |  | 
4702  |  | /*! @ingroup xxh3_family */  | 
4703  |  | XXH_PUBLIC_API XXH_errorcode  | 
4704  |  | XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)  | 
4705  |  | { | 
4706  |  |     if (statePtr == NULL) return XXH_ERROR;  | 
4707  |  |     if (seed==0) return XXH3_64bits_reset(statePtr);  | 
4708  |  |     if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))  | 
4709  |  |         XXH3_initCustomSecret(statePtr->customSecret, seed);  | 
4710  |  |     XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);  | 
4711  |  |     return XXH_OK;  | 
4712  |  | }  | 
4713  |  |  | 
4714  |  | /*! @ingroup xxh3_family */  | 
4715  |  | XXH_PUBLIC_API XXH_errorcode  | 
4716  |  | XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)  | 
4717  |  | { | 
4718  |  |     if (statePtr == NULL) return XXH_ERROR;  | 
4719  |  |     if (secret == NULL) return XXH_ERROR;  | 
4720  |  |     if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;  | 
4721  |  |     XXH3_reset_internal(statePtr, seed64, secret, secretSize);  | 
4722  |  |     statePtr->useSeed = 1; /* always, even if seed64==0 */  | 
4723  |  |     return XXH_OK;  | 
4724  |  | }  | 
4725  |  |  | 
4726  |  | /* Note : when XXH3_consumeStripes() is invoked,  | 
4727  |  |  * there must be a guarantee that at least one more byte must be consumed from input  | 
4728  |  |  * so that the function can blindly consume all stripes using the "normal" secret segment */  | 
4729  |  | XXH_FORCE_INLINE void  | 
4730  |  | XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,  | 
4731  |  |                     size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,  | 
4732  |  |                     const xxh_u8* XXH_RESTRICT input, size_t nbStripes,  | 
4733  |  |                     const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,  | 
4734  |  |                     XXH3_f_accumulate_512 f_acc512,  | 
4735  |  |                     XXH3_f_scrambleAcc f_scramble)  | 
4736  |  | { | 
4737  |  |     XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */  | 
4738  |  |     XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);  | 
4739  |  |     if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) { | 
4740  |  |         /* need a scrambling operation */  | 
4741  |  |         size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;  | 
4742  |  |         size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;  | 
4743  |  |         XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);  | 
4744  |  |         f_scramble(acc, secret + secretLimit);  | 
4745  |  |         XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);  | 
4746  |  |         *nbStripesSoFarPtr = nbStripesAfterBlock;  | 
4747  |  |     } else { | 
4748  |  |         XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);  | 
4749  |  |         *nbStripesSoFarPtr += nbStripes;  | 
4750  |  |     }  | 
4751  |  | }  | 
4752  |  |  | 
4753  |  | #ifndef XXH3_STREAM_USE_STACK  | 
4754  |  | # ifndef __clang__ /* clang doesn't need additional stack space */  | 
4755  |  | #   define XXH3_STREAM_USE_STACK 1  | 
4756  |  | # endif  | 
4757  |  | #endif  | 
4758  |  | /*  | 
4759  |  |  * Both XXH3_64bits_update and XXH3_128bits_update use this routine.  | 
4760  |  |  */  | 
4761  |  | XXH_FORCE_INLINE XXH_errorcode  | 
4762  |  | XXH3_update(XXH3_state_t* XXH_RESTRICT const state,  | 
4763  |  |             const xxh_u8* XXH_RESTRICT input, size_t len,  | 
4764  |  |             XXH3_f_accumulate_512 f_acc512,  | 
4765  |  |             XXH3_f_scrambleAcc f_scramble)  | 
4766  |  | { | 
4767  |  |     if (input==NULL) { | 
4768  |  |         XXH_ASSERT(len == 0);  | 
4769  |  |         return XXH_OK;  | 
4770  |  |     }  | 
4771  |  |  | 
4772  |  |     XXH_ASSERT(state != NULL);  | 
4773  |  |     {   const xxh_u8* const bEnd = input + len; | 
4774  |  |         const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;  | 
4775  |  | #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1  | 
4776  |  |         /* For some reason, gcc and MSVC seem to suffer greatly  | 
4777  |  |          * when operating accumulators directly into state.  | 
4778  |  |          * Operating into stack space seems to enable proper optimization.  | 
4779  |  |          * clang, on the other hand, doesn't seem to need this trick */  | 
4780  |  |         XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));  | 
4781  |  | #else  | 
4782  |  |         xxh_u64* XXH_RESTRICT const acc = state->acc;  | 
4783  |  | #endif  | 
4784  |  |         state->totalLen += len;  | 
4785  |  |         XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);  | 
4786  |  |  | 
4787  |  |         /* small input : just fill in tmp buffer */  | 
4788  |  |         if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) { | 
4789  |  |             XXH_memcpy(state->buffer + state->bufferedSize, input, len);  | 
4790  |  |             state->bufferedSize += (XXH32_hash_t)len;  | 
4791  |  |             return XXH_OK;  | 
4792  |  |         }  | 
4793  |  |  | 
4794  |  |         /* total input is now > XXH3_INTERNALBUFFER_SIZE */  | 
4795  |  |         #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)  | 
4796  |  |         XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */  | 
4797  |  |  | 
4798  |  |         /*  | 
4799  |  |          * Internal buffer is partially filled (always, except at beginning)  | 
4800  |  |          * Complete it, then consume it.  | 
4801  |  |          */  | 
4802  |  |         if (state->bufferedSize) { | 
4803  |  |             size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;  | 
4804  |  |             XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);  | 
4805  |  |             input += loadSize;  | 
4806  |  |             XXH3_consumeStripes(acc,  | 
4807  |  |                                &state->nbStripesSoFar, state->nbStripesPerBlock,  | 
4808  |  |                                 state->buffer, XXH3_INTERNALBUFFER_STRIPES,  | 
4809  |  |                                 secret, state->secretLimit,  | 
4810  |  |                                 f_acc512, f_scramble);  | 
4811  |  |             state->bufferedSize = 0;  | 
4812  |  |         }  | 
4813  |  |         XXH_ASSERT(input < bEnd);  | 
4814  |  |  | 
4815  |  |         /* large input to consume : ingest per full block */  | 
4816  |  |         if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) { | 
4817  |  |             size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;  | 
4818  |  |             XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);  | 
4819  |  |             /* join to current block's end */  | 
4820  |  |             {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar; | 
4821  |  |                 XXH_ASSERT(nbStripes <= nbStripes);  | 
4822  |  |                 XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);  | 
4823  |  |                 f_scramble(acc, secret + state->secretLimit);  | 
4824  |  |                 state->nbStripesSoFar = 0;  | 
4825  |  |                 input += nbStripesToEnd * XXH_STRIPE_LEN;  | 
4826  |  |                 nbStripes -= nbStripesToEnd;  | 
4827  |  |             }  | 
4828  |  |             /* consume per entire blocks */  | 
4829  |  |             while(nbStripes >= state->nbStripesPerBlock) { | 
4830  |  |                 XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);  | 
4831  |  |                 f_scramble(acc, secret + state->secretLimit);  | 
4832  |  |                 input += state->nbStripesPerBlock * XXH_STRIPE_LEN;  | 
4833  |  |                 nbStripes -= state->nbStripesPerBlock;  | 
4834  |  |             }  | 
4835  |  |             /* consume last partial block */  | 
4836  |  |             XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);  | 
4837  |  |             input += nbStripes * XXH_STRIPE_LEN;  | 
4838  |  |             XXH_ASSERT(input < bEnd);  /* at least some bytes left */  | 
4839  |  |             state->nbStripesSoFar = nbStripes;  | 
4840  |  |             /* buffer predecessor of last partial stripe */  | 
4841  |  |             XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);  | 
4842  |  |             XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);  | 
4843  |  |         } else { | 
4844  |  |             /* content to consume <= block size */  | 
4845  |  |             /* Consume input by a multiple of internal buffer size */  | 
4846  |  |             if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) { | 
4847  |  |                 const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;  | 
4848  |  |                 do { | 
4849  |  |                     XXH3_consumeStripes(acc,  | 
4850  |  |                                        &state->nbStripesSoFar, state->nbStripesPerBlock,  | 
4851  |  |                                         input, XXH3_INTERNALBUFFER_STRIPES,  | 
4852  |  |                                         secret, state->secretLimit,  | 
4853  |  |                                         f_acc512, f_scramble);  | 
4854  |  |                     input += XXH3_INTERNALBUFFER_SIZE;  | 
4855  |  |                 } while (input<limit);  | 
4856  |  |                 /* buffer predecessor of last partial stripe */  | 
4857  |  |                 XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);  | 
4858  |  |             }  | 
4859  |  |         }  | 
4860  |  |  | 
4861  |  |         /* Some remaining input (always) : buffer it */  | 
4862  |  |         XXH_ASSERT(input < bEnd);  | 
4863  |  |         XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);  | 
4864  |  |         XXH_ASSERT(state->bufferedSize == 0);  | 
4865  |  |         XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));  | 
4866  |  |         state->bufferedSize = (XXH32_hash_t)(bEnd-input);  | 
4867  |  | #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1  | 
4868  |  |         /* save stack accumulators into state */  | 
4869  |  |         memcpy(state->acc, acc, sizeof(acc));  | 
4870  |  | #endif  | 
4871  |  |     }  | 
4872  |  |  | 
4873  |  |     return XXH_OK;  | 
4874  |  | }  | 
4875  |  |  | 
4876  |  | /*! @ingroup xxh3_family */  | 
4877  |  | XXH_PUBLIC_API XXH_errorcode  | 
4878  |  | XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)  | 
4879  |  | { | 
4880  |  |     return XXH3_update(state, (const xxh_u8*)input, len,  | 
4881  |  |                        XXH3_accumulate_512, XXH3_scrambleAcc);  | 
4882  |  | }  | 
4883  |  |  | 
4884  |  |  | 
4885  |  | XXH_FORCE_INLINE void  | 
4886  |  | XXH3_digest_long (XXH64_hash_t* acc,  | 
4887  |  |                   const XXH3_state_t* state,  | 
4888  |  |                   const unsigned char* secret)  | 
4889  |  | { | 
4890  |  |     /*  | 
4891  |  |      * Digest on a local copy. This way, the state remains unaltered, and it can  | 
4892  |  |      * continue ingesting more input afterwards.  | 
4893  |  |      */  | 
4894  |  |     XXH_memcpy(acc, state->acc, sizeof(state->acc));  | 
4895  |  |     if (state->bufferedSize >= XXH_STRIPE_LEN) { | 
4896  |  |         size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;  | 
4897  |  |         size_t nbStripesSoFar = state->nbStripesSoFar;  | 
4898  |  |         XXH3_consumeStripes(acc,  | 
4899  |  |                            &nbStripesSoFar, state->nbStripesPerBlock,  | 
4900  |  |                             state->buffer, nbStripes,  | 
4901  |  |                             secret, state->secretLimit,  | 
4902  |  |                             XXH3_accumulate_512, XXH3_scrambleAcc);  | 
4903  |  |         /* last stripe */  | 
4904  |  |         XXH3_accumulate_512(acc,  | 
4905  |  |                             state->buffer + state->bufferedSize - XXH_STRIPE_LEN,  | 
4906  |  |                             secret + state->secretLimit - XXH_SECRET_LASTACC_START);  | 
4907  |  |     } else {  /* bufferedSize < XXH_STRIPE_LEN */ | 
4908  |  |         xxh_u8 lastStripe[XXH_STRIPE_LEN];  | 
4909  |  |         size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;  | 
4910  |  |         XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */  | 
4911  |  |         XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);  | 
4912  |  |         XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);  | 
4913  |  |         XXH3_accumulate_512(acc,  | 
4914  |  |                             lastStripe,  | 
4915  |  |                             secret + state->secretLimit - XXH_SECRET_LASTACC_START);  | 
4916  |  |     }  | 
4917  |  | }  | 
4918  |  |  | 
4919  |  | /*! @ingroup xxh3_family */  | 
4920  |  | XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)  | 
4921  |  | { | 
4922  |  |     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;  | 
4923  |  |     if (state->totalLen > XXH3_MIDSIZE_MAX) { | 
4924  |  |         XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];  | 
4925  |  |         XXH3_digest_long(acc, state, secret);  | 
4926  |  |         return XXH3_mergeAccs(acc,  | 
4927  |  |                               secret + XXH_SECRET_MERGEACCS_START,  | 
4928  |  |                               (xxh_u64)state->totalLen * XXH_PRIME64_1);  | 
4929  |  |     }  | 
4930  |  |     /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */  | 
4931  |  |     if (state->useSeed)  | 
4932  |  |         return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);  | 
4933  |  |     return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),  | 
4934  |  |                                   secret, state->secretLimit + XXH_STRIPE_LEN);  | 
4935  |  | }  | 
4936  |  |  | 
4937  |  |  | 
4938  |  |  | 
4939  |  | /* ==========================================  | 
4940  |  |  * XXH3 128 bits (a.k.a XXH128)  | 
4941  |  |  * ==========================================  | 
4942  |  |  * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,  | 
4943  |  |  * even without counting the significantly larger output size.  | 
4944  |  |  *  | 
4945  |  |  * For example, extra steps are taken to avoid the seed-dependent collisions  | 
4946  |  |  * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).  | 
4947  |  |  *  | 
4948  |  |  * This strength naturally comes at the cost of some speed, especially on short  | 
4949  |  |  * lengths. Note that longer hashes are about as fast as the 64-bit version  | 
4950  |  |  * due to it using only a slight modification of the 64-bit loop.  | 
4951  |  |  *  | 
4952  |  |  * XXH128 is also more oriented towards 64-bit machines. It is still extremely  | 
4953  |  |  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).  | 
4954  |  |  */  | 
4955  |  |  | 
4956  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
4957  |  | XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  | 
4958  |  | { | 
4959  |  |     /* A doubled version of 1to3_64b with different constants. */  | 
4960  |  |     XXH_ASSERT(input != NULL);  | 
4961  |  |     XXH_ASSERT(1 <= len && len <= 3);  | 
4962  |  |     XXH_ASSERT(secret != NULL);  | 
4963  |  |     /*  | 
4964  |  |      * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } | 
4965  |  |      * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } | 
4966  |  |      * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } | 
4967  |  |      */  | 
4968  |  |     {   xxh_u8 const c1 = input[0]; | 
4969  |  |         xxh_u8 const c2 = input[len >> 1];  | 
4970  |  |         xxh_u8 const c3 = input[len - 1];  | 
4971  |  |         xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)  | 
4972  |  |                                 | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);  | 
4973  |  |         xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);  | 
4974  |  |         xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;  | 
4975  |  |         xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;  | 
4976  |  |         xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;  | 
4977  |  |         xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;  | 
4978  |  |         XXH128_hash_t h128;  | 
4979  |  |         h128.low64  = XXH64_avalanche(keyed_lo);  | 
4980  |  |         h128.high64 = XXH64_avalanche(keyed_hi);  | 
4981  |  |         return h128;  | 
4982  |  |     }  | 
4983  |  | }  | 
4984  |  |  | 
4985  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
4986  |  | XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  | 
4987  |  | { | 
4988  |  |     XXH_ASSERT(input != NULL);  | 
4989  |  |     XXH_ASSERT(secret != NULL);  | 
4990  |  |     XXH_ASSERT(4 <= len && len <= 8);  | 
4991  |  |     seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;  | 
4992  |  |     {   xxh_u32 const input_lo = XXH_readLE32(input); | 
4993  |  |         xxh_u32 const input_hi = XXH_readLE32(input + len - 4);  | 
4994  |  |         xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);  | 
4995  |  |         xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;  | 
4996  |  |         xxh_u64 const keyed = input_64 ^ bitflip;  | 
4997  |  |  | 
4998  |  |         /* Shift len to the left to ensure it is even, this avoids even multiplies. */  | 
4999  |  |         XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));  | 
5000  |  |  | 
5001  |  |         m128.high64 += (m128.low64 << 1);  | 
5002  |  |         m128.low64  ^= (m128.high64 >> 3);  | 
5003  |  |  | 
5004  |  |         m128.low64   = XXH_xorshift64(m128.low64, 35);  | 
5005  |  |         m128.low64  *= 0x9FB21C651E98DF25ULL;  | 
5006  |  |         m128.low64   = XXH_xorshift64(m128.low64, 28);  | 
5007  |  |         m128.high64  = XXH3_avalanche(m128.high64);  | 
5008  |  |         return m128;  | 
5009  |  |     }  | 
5010  |  | }  | 
5011  |  |  | 
5012  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
5013  |  | XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  | 
5014  |  | { | 
5015  |  |     XXH_ASSERT(input != NULL);  | 
5016  |  |     XXH_ASSERT(secret != NULL);  | 
5017  |  |     XXH_ASSERT(9 <= len && len <= 16);  | 
5018  |  |     {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed; | 
5019  |  |         xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;  | 
5020  |  |         xxh_u64 const input_lo = XXH_readLE64(input);  | 
5021  |  |         xxh_u64       input_hi = XXH_readLE64(input + len - 8);  | 
5022  |  |         XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);  | 
5023  |  |         /*  | 
5024  |  |          * Put len in the middle of m128 to ensure that the length gets mixed to  | 
5025  |  |          * both the low and high bits in the 128x64 multiply below.  | 
5026  |  |          */  | 
5027  |  |         m128.low64 += (xxh_u64)(len - 1) << 54;  | 
5028  |  |         input_hi   ^= bitfliph;  | 
5029  |  |         /*  | 
5030  |  |          * Add the high 32 bits of input_hi to the high 32 bits of m128, then  | 
5031  |  |          * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to  | 
5032  |  |          * the high 64 bits of m128.  | 
5033  |  |          *  | 
5034  |  |          * The best approach to this operation is different on 32-bit and 64-bit.  | 
5035  |  |          */  | 
5036  |  |         if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ | 
5037  |  |             /*  | 
5038  |  |              * 32-bit optimized version, which is more readable.  | 
5039  |  |              *  | 
5040  |  |              * On 32-bit, it removes an ADC and delays a dependency between the two  | 
5041  |  |              * halves of m128.high64, but it generates an extra mask on 64-bit.  | 
5042  |  |              */  | 
5043  |  |             m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);  | 
5044  |  |         } else { | 
5045  |  |             /*  | 
5046  |  |              * 64-bit optimized (albeit more confusing) version.  | 
5047  |  |              *  | 
5048  |  |              * Uses some properties of addition and multiplication to remove the mask:  | 
5049  |  |              *  | 
5050  |  |              * Let:  | 
5051  |  |              *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)  | 
5052  |  |              *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)  | 
5053  |  |              *    c = XXH_PRIME32_2  | 
5054  |  |              *  | 
5055  |  |              *    a + (b * c)  | 
5056  |  |              * Inverse Property: x + y - x == y  | 
5057  |  |              *    a + (b * (1 + c - 1))  | 
5058  |  |              * Distributive Property: x * (y + z) == (x * y) + (x * z)  | 
5059  |  |              *    a + (b * 1) + (b * (c - 1))  | 
5060  |  |              * Identity Property: x * 1 == x  | 
5061  |  |              *    a + b + (b * (c - 1))  | 
5062  |  |              *  | 
5063  |  |              * Substitute a, b, and c:  | 
5064  |  |              *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))  | 
5065  |  |              *  | 
5066  |  |              * Since input_hi.hi + input_hi.lo == input_hi, we get this:  | 
5067  |  |              *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))  | 
5068  |  |              */  | 
5069  |  |             m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);  | 
5070  |  |         }  | 
5071  |  |         /* m128 ^= XXH_swap64(m128 >> 64); */  | 
5072  |  |         m128.low64  ^= XXH_swap64(m128.high64);  | 
5073  |  |  | 
5074  |  |         {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ | 
5075  |  |             XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);  | 
5076  |  |             h128.high64 += m128.high64 * XXH_PRIME64_2;  | 
5077  |  |  | 
5078  |  |             h128.low64   = XXH3_avalanche(h128.low64);  | 
5079  |  |             h128.high64  = XXH3_avalanche(h128.high64);  | 
5080  |  |             return h128;  | 
5081  |  |     }   }  | 
5082  |  | }  | 
5083  |  |  | 
5084  |  | /*  | 
5085  |  |  * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN  | 
5086  |  |  */  | 
5087  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
5088  |  | XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  | 
5089  |  | { | 
5090  |  |     XXH_ASSERT(len <= 16);  | 
5091  |  |     {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); | 
5092  |  |         if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);  | 
5093  |  |         if (len) return XXH3_len_1to3_128b(input, len, secret, seed);  | 
5094  |  |         {   XXH128_hash_t h128; | 
5095  |  |             xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);  | 
5096  |  |             xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);  | 
5097  |  |             h128.low64 = XXH64_avalanche(seed ^ bitflipl);  | 
5098  |  |             h128.high64 = XXH64_avalanche( seed ^ bitfliph);  | 
5099  |  |             return h128;  | 
5100  |  |     }   }  | 
5101  |  | }  | 
5102  |  |  | 
5103  |  | /*  | 
5104  |  |  * A bit slower than XXH3_mix16B, but handles multiply by zero better.  | 
5105  |  |  */  | 
5106  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
5107  |  | XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,  | 
5108  |  |               const xxh_u8* secret, XXH64_hash_t seed)  | 
5109  |  | { | 
5110  |  |     acc.low64  += XXH3_mix16B (input_1, secret+0, seed);  | 
5111  |  |     acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);  | 
5112  |  |     acc.high64 += XXH3_mix16B (input_2, secret+16, seed);  | 
5113  |  |     acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);  | 
5114  |  |     return acc;  | 
5115  |  | }  | 
5116  |  |  | 
5117  |  |  | 
5118  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
5119  |  | XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,  | 
5120  |  |                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,  | 
5121  |  |                       XXH64_hash_t seed)  | 
5122  |  | { | 
5123  |  |     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;  | 
5124  |  |     XXH_ASSERT(16 < len && len <= 128);  | 
5125  |  |  | 
5126  |  |     {   XXH128_hash_t acc; | 
5127  |  |         acc.low64 = len * XXH_PRIME64_1;  | 
5128  |  |         acc.high64 = 0;  | 
5129  |  |         if (len > 32) { | 
5130  |  |             if (len > 64) { | 
5131  |  |                 if (len > 96) { | 
5132  |  |                     acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);  | 
5133  |  |                 }  | 
5134  |  |                 acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);  | 
5135  |  |             }  | 
5136  |  |             acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);  | 
5137  |  |         }  | 
5138  |  |         acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);  | 
5139  |  |         {   XXH128_hash_t h128; | 
5140  |  |             h128.low64  = acc.low64 + acc.high64;  | 
5141  |  |             h128.high64 = (acc.low64    * XXH_PRIME64_1)  | 
5142  |  |                         + (acc.high64   * XXH_PRIME64_4)  | 
5143  |  |                         + ((len - seed) * XXH_PRIME64_2);  | 
5144  |  |             h128.low64  = XXH3_avalanche(h128.low64);  | 
5145  |  |             h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);  | 
5146  |  |             return h128;  | 
5147  |  |         }  | 
5148  |  |     }  | 
5149  |  | }  | 
5150  |  |  | 
5151  |  | XXH_NO_INLINE XXH128_hash_t  | 
5152  |  | XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,  | 
5153  |  |                        const xxh_u8* XXH_RESTRICT secret, size_t secretSize,  | 
5154  |  |                        XXH64_hash_t seed)  | 
5155  |  | { | 
5156  |  |     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;  | 
5157  |  |     XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);  | 
5158  |  |  | 
5159  |  |     {   XXH128_hash_t acc; | 
5160  |  |         int const nbRounds = (int)len / 32;  | 
5161  |  |         int i;  | 
5162  |  |         acc.low64 = len * XXH_PRIME64_1;  | 
5163  |  |         acc.high64 = 0;  | 
5164  |  |         for (i=0; i<4; i++) { | 
5165  |  |             acc = XXH128_mix32B(acc,  | 
5166  |  |                                 input  + (32 * i),  | 
5167  |  |                                 input  + (32 * i) + 16,  | 
5168  |  |                                 secret + (32 * i),  | 
5169  |  |                                 seed);  | 
5170  |  |         }  | 
5171  |  |         acc.low64 = XXH3_avalanche(acc.low64);  | 
5172  |  |         acc.high64 = XXH3_avalanche(acc.high64);  | 
5173  |  |         XXH_ASSERT(nbRounds >= 4);  | 
5174  |  |         for (i=4 ; i < nbRounds; i++) { | 
5175  |  |             acc = XXH128_mix32B(acc,  | 
5176  |  |                                 input + (32 * i),  | 
5177  |  |                                 input + (32 * i) + 16,  | 
5178  |  |                                 secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),  | 
5179  |  |                                 seed);  | 
5180  |  |         }  | 
5181  |  |         /* last bytes */  | 
5182  |  |         acc = XXH128_mix32B(acc,  | 
5183  |  |                             input + len - 16,  | 
5184  |  |                             input + len - 32,  | 
5185  |  |                             secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,  | 
5186  |  |                             0ULL - seed);  | 
5187  |  |  | 
5188  |  |         {   XXH128_hash_t h128; | 
5189  |  |             h128.low64  = acc.low64 + acc.high64;  | 
5190  |  |             h128.high64 = (acc.low64    * XXH_PRIME64_1)  | 
5191  |  |                         + (acc.high64   * XXH_PRIME64_4)  | 
5192  |  |                         + ((len - seed) * XXH_PRIME64_2);  | 
5193  |  |             h128.low64  = XXH3_avalanche(h128.low64);  | 
5194  |  |             h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);  | 
5195  |  |             return h128;  | 
5196  |  |         }  | 
5197  |  |     }  | 
5198  |  | }  | 
5199  |  |  | 
5200  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
5201  |  | XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,  | 
5202  |  |                             const xxh_u8* XXH_RESTRICT secret, size_t secretSize,  | 
5203  |  |                             XXH3_f_accumulate_512 f_acc512,  | 
5204  |  |                             XXH3_f_scrambleAcc f_scramble)  | 
5205  |  | { | 
5206  |  |     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;  | 
5207  |  |  | 
5208  |  |     XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);  | 
5209  |  |  | 
5210  |  |     /* converge into final hash */  | 
5211  |  |     XXH_STATIC_ASSERT(sizeof(acc) == 64);  | 
5212  |  |     XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);  | 
5213  |  |     {   XXH128_hash_t h128; | 
5214  |  |         h128.low64  = XXH3_mergeAccs(acc,  | 
5215  |  |                                      secret + XXH_SECRET_MERGEACCS_START,  | 
5216  |  |                                      (xxh_u64)len * XXH_PRIME64_1);  | 
5217  |  |         h128.high64 = XXH3_mergeAccs(acc,  | 
5218  |  |                                      secret + secretSize  | 
5219  |  |                                             - sizeof(acc) - XXH_SECRET_MERGEACCS_START,  | 
5220  |  |                                      ~((xxh_u64)len * XXH_PRIME64_2));  | 
5221  |  |         return h128;  | 
5222  |  |     }  | 
5223  |  | }  | 
5224  |  |  | 
5225  |  | /*  | 
5226  |  |  * It's important for performance that XXH3_hashLong is not inlined.  | 
5227  |  |  */  | 
5228  |  | XXH_NO_INLINE XXH128_hash_t  | 
5229  |  | XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,  | 
5230  |  |                            XXH64_hash_t seed64,  | 
5231  |  |                            const void* XXH_RESTRICT secret, size_t secretLen)  | 
5232  |  | { | 
5233  |  |     (void)seed64; (void)secret; (void)secretLen;  | 
5234  |  |     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),  | 
5235  |  |                                        XXH3_accumulate_512, XXH3_scrambleAcc);  | 
5236  |  | }  | 
5237  |  |  | 
5238  |  | /*  | 
5239  |  |  * It's important for performance to pass @secretLen (when it's static)  | 
5240  |  |  * to the compiler, so that it can properly optimize the vectorized loop.  | 
5241  |  |  */  | 
5242  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
5243  |  | XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,  | 
5244  |  |                               XXH64_hash_t seed64,  | 
5245  |  |                               const void* XXH_RESTRICT secret, size_t secretLen)  | 
5246  |  | { | 
5247  |  |     (void)seed64;  | 
5248  |  |     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,  | 
5249  |  |                                        XXH3_accumulate_512, XXH3_scrambleAcc);  | 
5250  |  | }  | 
5251  |  |  | 
5252  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
5253  |  | XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,  | 
5254  |  |                                 XXH64_hash_t seed64,  | 
5255  |  |                                 XXH3_f_accumulate_512 f_acc512,  | 
5256  |  |                                 XXH3_f_scrambleAcc f_scramble,  | 
5257  |  |                                 XXH3_f_initCustomSecret f_initSec)  | 
5258  |  | { | 
5259  |  |     if (seed64 == 0)  | 
5260  |  |         return XXH3_hashLong_128b_internal(input, len,  | 
5261  |  |                                            XXH3_kSecret, sizeof(XXH3_kSecret),  | 
5262  |  |                                            f_acc512, f_scramble);  | 
5263  |  |     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; | 
5264  |  |         f_initSec(secret, seed64);  | 
5265  |  |         return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),  | 
5266  |  |                                            f_acc512, f_scramble);  | 
5267  |  |     }  | 
5268  |  | }  | 
5269  |  |  | 
5270  |  | /*  | 
5271  |  |  * It's important for performance that XXH3_hashLong is not inlined.  | 
5272  |  |  */  | 
5273  |  | XXH_NO_INLINE XXH128_hash_t  | 
5274  |  | XXH3_hashLong_128b_withSeed(const void* input, size_t len,  | 
5275  |  |                             XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)  | 
5276  |  | { | 
5277  |  |     (void)secret; (void)secretLen;  | 
5278  |  |     return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,  | 
5279  |  |                 XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);  | 
5280  |  | }  | 
5281  |  |  | 
5282  |  | typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,  | 
5283  |  |                                             XXH64_hash_t, const void* XXH_RESTRICT, size_t);  | 
5284  |  |  | 
5285  |  | XXH_FORCE_INLINE XXH128_hash_t  | 
5286  |  | XXH3_128bits_internal(const void* input, size_t len,  | 
5287  |  |                       XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,  | 
5288  |  |                       XXH3_hashLong128_f f_hl128)  | 
5289  |  | { | 
5290  |  |     XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);  | 
5291  |  |     /*  | 
5292  |  |      * If an action is to be taken if `secret` conditions are not respected,  | 
5293  |  |      * it should be done here.  | 
5294  |  |      * For now, it's a contract pre-condition.  | 
5295  |  |      * Adding a check and a branch here would cost performance at every hash.  | 
5296  |  |      */  | 
5297  |  |     if (len <= 16)  | 
5298  |  |         return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);  | 
5299  |  |     if (len <= 128)  | 
5300  |  |         return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);  | 
5301  |  |     if (len <= XXH3_MIDSIZE_MAX)  | 
5302  |  |         return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);  | 
5303  |  |     return f_hl128(input, len, seed64, secret, secretLen);  | 
5304  |  | }  | 
5305  |  |  | 
5306  |  |  | 
5307  |  | /* ===   Public XXH128 API   === */  | 
5308  |  |  | 
5309  |  | /*! @ingroup xxh3_family */  | 
5310  |  | XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)  | 
5311  |  | { | 
5312  |  |     return XXH3_128bits_internal(input, len, 0,  | 
5313  |  |                                  XXH3_kSecret, sizeof(XXH3_kSecret),  | 
5314  |  |                                  XXH3_hashLong_128b_default);  | 
5315  |  | }  | 
5316  |  |  | 
5317  |  | /*! @ingroup xxh3_family */  | 
5318  |  | XXH_PUBLIC_API XXH128_hash_t  | 
5319  |  | XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)  | 
5320  |  | { | 
5321  |  |     return XXH3_128bits_internal(input, len, 0,  | 
5322  |  |                                  (const xxh_u8*)secret, secretSize,  | 
5323  |  |                                  XXH3_hashLong_128b_withSecret);  | 
5324  |  | }  | 
5325  |  |  | 
5326  |  | /*! @ingroup xxh3_family */  | 
5327  |  | XXH_PUBLIC_API XXH128_hash_t  | 
5328  |  | XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)  | 
5329  |  | { | 
5330  |  |     return XXH3_128bits_internal(input, len, seed,  | 
5331  |  |                                  XXH3_kSecret, sizeof(XXH3_kSecret),  | 
5332  |  |                                  XXH3_hashLong_128b_withSeed);  | 
5333  |  | }  | 
5334  |  |  | 
5335  |  | /*! @ingroup xxh3_family */  | 
5336  |  | XXH_PUBLIC_API XXH128_hash_t  | 
5337  |  | XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)  | 
5338  |  | { | 
5339  |  |     if (len <= XXH3_MIDSIZE_MAX)  | 
5340  |  |         return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);  | 
5341  |  |     return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);  | 
5342  |  | }  | 
5343  |  |  | 
5344  |  | /*! @ingroup xxh3_family */  | 
5345  |  | XXH_PUBLIC_API XXH128_hash_t  | 
5346  |  | XXH128(const void* input, size_t len, XXH64_hash_t seed)  | 
5347  |  | { | 
5348  |  |     return XXH3_128bits_withSeed(input, len, seed);  | 
5349  |  | }  | 
5350  |  |  | 
5351  |  |  | 
5352  |  | /* ===   XXH3 128-bit streaming   === */  | 
5353  |  |  | 
5354  |  | /*  | 
5355  |  |  * All initialization and update functions are identical to 64-bit streaming variant.  | 
5356  |  |  * The only difference is the finalization routine.  | 
5357  |  |  */  | 
5358  |  |  | 
5359  |  | /*! @ingroup xxh3_family */  | 
5360  |  | XXH_PUBLIC_API XXH_errorcode  | 
5361  |  | XXH3_128bits_reset(XXH3_state_t* statePtr)  | 
5362  |  | { | 
5363  |  |     return XXH3_64bits_reset(statePtr);  | 
5364  |  | }  | 
5365  |  |  | 
5366  |  | /*! @ingroup xxh3_family */  | 
5367  |  | XXH_PUBLIC_API XXH_errorcode  | 
5368  |  | XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)  | 
5369  |  | { | 
5370  |  |     return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);  | 
5371  |  | }  | 
5372  |  |  | 
5373  |  | /*! @ingroup xxh3_family */  | 
5374  |  | XXH_PUBLIC_API XXH_errorcode  | 
5375  |  | XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)  | 
5376  |  | { | 
5377  |  |     return XXH3_64bits_reset_withSeed(statePtr, seed);  | 
5378  |  | }  | 
5379  |  |  | 
5380  |  | /*! @ingroup xxh3_family */  | 
5381  |  | XXH_PUBLIC_API XXH_errorcode  | 
5382  |  | XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)  | 
5383  |  | { | 
5384  |  |     return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);  | 
5385  |  | }  | 
5386  |  |  | 
5387  |  | /*! @ingroup xxh3_family */  | 
5388  |  | XXH_PUBLIC_API XXH_errorcode  | 
5389  |  | XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)  | 
5390  |  | { | 
5391  |  |     return XXH3_update(state, (const xxh_u8*)input, len,  | 
5392  |  |                        XXH3_accumulate_512, XXH3_scrambleAcc);  | 
5393  |  | }  | 
5394  |  |  | 
5395  |  | /*! @ingroup xxh3_family */  | 
5396  |  | XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)  | 
5397  |  | { | 
5398  |  |     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;  | 
5399  |  |     if (state->totalLen > XXH3_MIDSIZE_MAX) { | 
5400  |  |         XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];  | 
5401  |  |         XXH3_digest_long(acc, state, secret);  | 
5402  |  |         XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);  | 
5403  |  |         {   XXH128_hash_t h128; | 
5404  |  |             h128.low64  = XXH3_mergeAccs(acc,  | 
5405  |  |                                          secret + XXH_SECRET_MERGEACCS_START,  | 
5406  |  |                                          (xxh_u64)state->totalLen * XXH_PRIME64_1);  | 
5407  |  |             h128.high64 = XXH3_mergeAccs(acc,  | 
5408  |  |                                          secret + state->secretLimit + XXH_STRIPE_LEN  | 
5409  |  |                                                 - sizeof(acc) - XXH_SECRET_MERGEACCS_START,  | 
5410  |  |                                          ~((xxh_u64)state->totalLen * XXH_PRIME64_2));  | 
5411  |  |             return h128;  | 
5412  |  |         }  | 
5413  |  |     }  | 
5414  |  |     /* len <= XXH3_MIDSIZE_MAX : short code */  | 
5415  |  |     if (state->seed)  | 
5416  |  |         return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);  | 
5417  |  |     return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),  | 
5418  |  |                                    secret, state->secretLimit + XXH_STRIPE_LEN);  | 
5419  |  | }  | 
5420  |  |  | 
5421  |  | /* 128-bit utility functions */  | 
5422  |  |  | 
5423  |  | #include <string.h>   /* memcmp, memcpy */  | 
5424  |  |  | 
5425  |  | /* return : 1 is equal, 0 if different */  | 
5426  |  | /*! @ingroup xxh3_family */  | 
5427  |  | XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)  | 
5428  |  | { | 
5429  |  |     /* note : XXH128_hash_t is compact, it has no padding byte */  | 
5430  |  |     return !(memcmp(&h1, &h2, sizeof(h1)));  | 
5431  |  | }  | 
5432  |  |  | 
5433  |  | /* This prototype is compatible with stdlib's qsort().  | 
5434  |  |  * return : >0 if *h128_1  > *h128_2  | 
5435  |  |  *          <0 if *h128_1  < *h128_2  | 
5436  |  |  *          =0 if *h128_1 == *h128_2  */  | 
5437  |  | /*! @ingroup xxh3_family */  | 
5438  |  | XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)  | 
5439  |  | { | 
5440  |  |     XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;  | 
5441  |  |     XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;  | 
5442  |  |     int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);  | 
5443  |  |     /* note : bets that, in most cases, hash values are different */  | 
5444  |  |     if (hcmp) return hcmp;  | 
5445  |  |     return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);  | 
5446  |  | }  | 
5447  |  |  | 
5448  |  |  | 
5449  |  | /*======   Canonical representation   ======*/  | 
5450  |  | /*! @ingroup xxh3_family */  | 
5451  |  | XXH_PUBLIC_API void  | 
5452  |  | XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)  | 
5453  |  | { | 
5454  |  |     XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));  | 
5455  |  |     if (XXH_CPU_LITTLE_ENDIAN) { | 
5456  |  |         hash.high64 = XXH_swap64(hash.high64);  | 
5457  |  |         hash.low64  = XXH_swap64(hash.low64);  | 
5458  |  |     }  | 
5459  |  |     XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));  | 
5460  |  |     XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));  | 
5461  |  | }  | 
5462  |  |  | 
5463  |  | /*! @ingroup xxh3_family */  | 
5464  |  | XXH_PUBLIC_API XXH128_hash_t  | 
5465  |  | XXH128_hashFromCanonical(const XXH128_canonical_t* src)  | 
5466  |  | { | 
5467  |  |     XXH128_hash_t h;  | 
5468  |  |     h.high64 = XXH_readBE64(src);  | 
5469  |  |     h.low64  = XXH_readBE64(src->digest + 8);  | 
5470  |  |     return h;  | 
5471  |  | }  | 
5472  |  |  | 
5473  |  |  | 
5474  |  |  | 
5475  |  | /* ==========================================  | 
5476  |  |  * Secret generators  | 
5477  |  |  * ==========================================  | 
5478  |  |  */  | 
5479  |  | #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))  | 
5480  |  |  | 
5481  |  | static void XXH3_combine16(void* dst, XXH128_hash_t h128)  | 
5482  |  | { | 
5483  |  |     XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );  | 
5484  |  |     XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );  | 
5485  |  | }  | 
5486  |  |  | 
5487  |  | /*! @ingroup xxh3_family */  | 
5488  |  | XXH_PUBLIC_API XXH_errorcode  | 
5489  |  | XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)  | 
5490  |  | { | 
5491  |  |     XXH_ASSERT(secretBuffer != NULL);  | 
5492  |  |     if (secretBuffer == NULL) return XXH_ERROR;  | 
5493  |  |     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);  | 
5494  |  |     if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;  | 
5495  |  |     if (customSeedSize == 0) { | 
5496  |  |         customSeed = XXH3_kSecret;  | 
5497  |  |         customSeedSize = XXH_SECRET_DEFAULT_SIZE;  | 
5498  |  |     }  | 
5499  |  |     XXH_ASSERT(customSeed != NULL);  | 
5500  |  |     if (customSeed == NULL) return XXH_ERROR;  | 
5501  |  |  | 
5502  |  |     /* Fill secretBuffer with a copy of customSeed - repeat as needed */  | 
5503  |  |     {   size_t pos = 0; | 
5504  |  |         while (pos < secretSize) { | 
5505  |  |             size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);  | 
5506  |  |             memcpy((char*)secretBuffer + pos, customSeed, toCopy);  | 
5507  |  |             pos += toCopy;  | 
5508  |  |     }   }  | 
5509  |  |  | 
5510  |  |     {   size_t const nbSeg16 = secretSize / 16; | 
5511  |  |         size_t n;  | 
5512  |  |         XXH128_canonical_t scrambler;  | 
5513  |  |         XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));  | 
5514  |  |         for (n=0; n<nbSeg16; n++) { | 
5515  |  |             XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);  | 
5516  |  |             XXH3_combine16((char*)secretBuffer + n*16, h128);  | 
5517  |  |         }  | 
5518  |  |         /* last segment */  | 
5519  |  |         XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));  | 
5520  |  |     }  | 
5521  |  |     return XXH_OK;  | 
5522  |  | }  | 
5523  |  |  | 
5524  |  | /*! @ingroup xxh3_family */  | 
5525  |  | XXH_PUBLIC_API void  | 
5526  |  | XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)  | 
5527  |  | { | 
5528  |  |     XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];  | 
5529  |  |     XXH3_initCustomSecret(secret, seed);  | 
5530  |  |     XXH_ASSERT(secretBuffer != NULL);  | 
5531  |  |     memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);  | 
5532  |  | }  | 
5533  |  |  | 
5534  |  |  | 
5535  |  |  | 
5536  |  | /* Pop our optimization override from above */  | 
5537  |  | #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \  | 
5538  |  |   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \  | 
5539  |  |   && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */  | 
5540  |  | #  pragma GCC pop_options  | 
5541  |  | #endif  | 
5542  |  |  | 
5543  |  | #endif  /* XXH_NO_LONG_LONG */  | 
5544  |  |  | 
5545  |  | #endif  /* XXH_NO_XXH3 */  | 
5546  |  |  | 
5547  |  | /*!  | 
5548  |  |  * @}  | 
5549  |  |  */  | 
5550  |  | #endif  /* XXH_IMPLEMENTATION */  | 
5551  |  |  | 
5552  |  |  | 
5553  |  | #if defined (__cplusplus)  | 
5554  |  | }  | 
5555  |  | #endif  |