/src/zlib-ng/arch/x86/chunkset_ssse3.c
Line | Count | Source |
1 | | /* chunkset_ssse3.c -- SSSE3 inline functions to copy small data chunks. |
2 | | * For conditions of distribution and use, see copyright notice in zlib.h |
3 | | */ |
4 | | |
5 | | #ifdef X86_SSSE3 |
6 | | |
7 | | #include "zbuild.h" |
8 | | #include "zmemory.h" |
9 | | |
10 | | #include <immintrin.h> |
11 | | #include "arch/generic/chunk_128bit_perm_idx_lut.h" |
12 | | |
13 | | typedef __m128i chunk_t; |
14 | | |
15 | | #define HAVE_CHUNKMEMSET_2 |
16 | | #define HAVE_CHUNKMEMSET_4 |
17 | | #define HAVE_CHUNKMEMSET_8 |
18 | | #define HAVE_CHUNK_MAG |
19 | | |
20 | | |
21 | 0 | static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { |
22 | 0 | *chunk = _mm_set1_epi16(zng_memread_2(from)); |
23 | 0 | } |
24 | | |
25 | 0 | static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { |
26 | 0 | *chunk = _mm_set1_epi32(zng_memread_4(from)); |
27 | 0 | } |
28 | | |
29 | 0 | static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { |
30 | 0 | *chunk = _mm_set1_epi64x(zng_memread_8(from)); |
31 | 0 | } |
32 | | |
33 | 0 | static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { |
34 | 0 | *chunk = _mm_loadu_si128((__m128i *)s); |
35 | 0 | } |
36 | | |
37 | 0 | static inline void storechunk(uint8_t *out, chunk_t *chunk) { |
38 | 0 | _mm_storeu_si128((__m128i *)out, *chunk); |
39 | 0 | } |
40 | | |
41 | 0 | static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { |
42 | 0 | lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; |
43 | 0 | __m128i perm_vec, ret_vec; |
44 | | /* Important to note: |
45 | | * This is _not_ to subvert the memory sanitizer but to instead unpoison some |
46 | | * bytes we willingly and purposefully load uninitialized that we swizzle over |
47 | | * in a vector register, anyway. If what we assume is wrong about what is used, |
48 | | * the memory sanitizer will still usefully flag it */ |
49 | 0 | __msan_unpoison(buf + dist, 16 - dist); |
50 | 0 | ret_vec = _mm_loadu_si128((__m128i*)buf); |
51 | 0 | *chunk_rem = lut_rem.remval; |
52 | |
|
53 | 0 | perm_vec = _mm_load_si128((__m128i*)(permute_table + lut_rem.idx)); |
54 | 0 | ret_vec = _mm_shuffle_epi8(ret_vec, perm_vec); |
55 | |
|
56 | 0 | return ret_vec; |
57 | 0 | } |
58 | | |
59 | | #define CHUNKSIZE chunksize_ssse3 |
60 | 0 | #define CHUNKMEMSET chunkmemset_ssse3 |
61 | | #define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3 |
62 | 0 | #define CHUNKCOPY chunkcopy_ssse3 |
63 | 0 | #define CHUNKUNROLL chunkunroll_ssse3 |
64 | | |
65 | | #include "chunkset_tpl.h" |
66 | | |
67 | | #define INFLATE_FAST inflate_fast_ssse3 |
68 | | |
69 | | #include "inffast_tpl.h" |
70 | | |
71 | | #endif |