/src/zlib-ng/arch/x86/chunkset_ssse3.c
Line  | Count  | Source  | 
1  |  | /* chunkset_ssse3.c -- SSSE3 inline functions to copy small data chunks.  | 
2  |  |  * For conditions of distribution and use, see copyright notice in zlib.h  | 
3  |  |  */  | 
4  |  |  | 
5  |  | #include "zbuild.h"  | 
6  |  | #include "zmemory.h"  | 
7  |  |  | 
8  |  | #if defined(X86_SSSE3)  | 
9  |  | #include <immintrin.h>  | 
10  |  | #include "arch/generic/chunk_128bit_perm_idx_lut.h"  | 
11  |  |  | 
12  |  | typedef __m128i chunk_t;  | 
13  |  |  | 
14  |  | #define HAVE_CHUNKMEMSET_2  | 
15  |  | #define HAVE_CHUNKMEMSET_4  | 
16  |  | #define HAVE_CHUNKMEMSET_8  | 
17  |  | #define HAVE_CHUNK_MAG  | 
18  |  |  | 
19  |  |  | 
20  | 0  | static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { | 
21  | 0  |     *chunk = _mm_set1_epi16(zng_memread_2(from));  | 
22  | 0  | }  | 
23  |  |  | 
24  | 0  | static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { | 
25  | 0  |     *chunk = _mm_set1_epi32(zng_memread_4(from));  | 
26  | 0  | }  | 
27  |  |  | 
28  | 0  | static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { | 
29  | 0  |     *chunk = _mm_set1_epi64x(zng_memread_8(from));  | 
30  | 0  | }  | 
31  |  |  | 
32  | 0  | static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { | 
33  | 0  |     *chunk = _mm_loadu_si128((__m128i *)s);  | 
34  | 0  | }  | 
35  |  |  | 
36  | 0  | static inline void storechunk(uint8_t *out, chunk_t *chunk) { | 
37  | 0  |     _mm_storeu_si128((__m128i *)out, *chunk);  | 
38  | 0  | }  | 
39  |  |  | 
40  | 0  | static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { | 
41  | 0  |     lut_rem_pair lut_rem = perm_idx_lut[dist - 3];  | 
42  | 0  |     __m128i perm_vec, ret_vec;  | 
43  |  |     /* Important to note:  | 
44  |  |      * This is _not_ to subvert the memory sanitizer but to instead unpoison some  | 
45  |  |      * bytes we willingly and purposefully load uninitialized that we swizzle over  | 
46  |  |      * in a vector register, anyway.  If what we assume is wrong about what is used,  | 
47  |  |      * the memory sanitizer will still usefully flag it */  | 
48  | 0  |     __msan_unpoison(buf + dist, 16 - dist);  | 
49  | 0  |     ret_vec = _mm_loadu_si128((__m128i*)buf);  | 
50  | 0  |     *chunk_rem = lut_rem.remval;  | 
51  |  | 
  | 
52  | 0  |     perm_vec = _mm_load_si128((__m128i*)(permute_table + lut_rem.idx));  | 
53  | 0  |     ret_vec = _mm_shuffle_epi8(ret_vec, perm_vec);  | 
54  |  | 
  | 
55  | 0  |     return ret_vec;  | 
56  | 0  | }  | 
57  |  |  | 
58  | 0  | #define CHUNKSIZE        chunksize_ssse3  | 
59  | 0  | #define CHUNKMEMSET      chunkmemset_ssse3  | 
60  |  | #define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3  | 
61  | 0  | #define CHUNKCOPY        chunkcopy_ssse3  | 
62  | 0  | #define CHUNKUNROLL      chunkunroll_ssse3  | 
63  |  |  | 
64  |  | #include "chunkset_tpl.h"  | 
65  |  |  | 
66  |  | #define INFLATE_FAST     inflate_fast_ssse3  | 
67  |  |  | 
68  |  | #include "inffast_tpl.h"  | 
69  |  |  | 
70  |  | #endif  |