/src/zlib-ng/arch/x86/chunkset_ssse3.c
Line | Count | Source |
1 | | /* chunkset_ssse3.c -- SSSE3 inline functions to copy small data chunks. |
2 | | * For conditions of distribution and use, see copyright notice in zlib.h |
3 | | */ |
4 | | |
5 | | #ifdef X86_SSSE3 |
6 | | |
7 | | #include "zbuild.h" |
8 | | #include "zsanitizer.h" |
9 | | #include "zmemory.h" |
10 | | |
11 | | #include <immintrin.h> |
12 | | #include "arch/shared/chunk_128bit_perm_idx_lut.h" |
13 | | |
14 | | typedef __m128i chunk_t; |
15 | | |
16 | | #define HAVE_CHUNKMEMSET_2 |
17 | | #define HAVE_CHUNKMEMSET_4 |
18 | | #define HAVE_CHUNKMEMSET_8 |
19 | | #define HAVE_CHUNK_MAG |
20 | | |
21 | | |
22 | 0 | static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { |
23 | 0 | *chunk = _mm_set1_epi16(zng_memread_2(from)); |
24 | 0 | } |
25 | | |
26 | 0 | static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { |
27 | 0 | *chunk = _mm_set1_epi32(zng_memread_4(from)); |
28 | 0 | } |
29 | | |
30 | 0 | static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { |
31 | 0 | *chunk = _mm_set1_epi64x(zng_memread_8(from)); |
32 | 0 | } |
33 | | |
34 | 0 | static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { |
35 | 0 | *chunk = _mm_loadu_si128((__m128i *)s); |
36 | 0 | } |
37 | | |
38 | 0 | static inline void storechunk(uint8_t *out, chunk_t *chunk) { |
39 | 0 | _mm_storeu_si128((__m128i *)out, *chunk); |
40 | 0 | } |
41 | | |
42 | 0 | static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { |
43 | 0 | lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; |
44 | 0 | __m128i perm_vec, ret_vec; |
45 | | /* Important to note: |
46 | | * This is _not_ to subvert the memory sanitizer but to instead unpoison some |
47 | | * bytes we willingly and purposefully load uninitialized that we swizzle over |
48 | | * in a vector register, anyway. If what we assume is wrong about what is used, |
49 | | * the memory sanitizer will still usefully flag it */ |
50 | 0 | __msan_unpoison(buf + dist, 16 - dist); |
51 | 0 | ret_vec = _mm_loadu_si128((__m128i*)buf); |
52 | 0 | *chunk_rem = lut_rem.remval; |
53 | |
|
54 | 0 | perm_vec = _mm_load_si128((__m128i*)(permute_table + lut_rem.idx)); |
55 | 0 | ret_vec = _mm_shuffle_epi8(ret_vec, perm_vec); |
56 | |
|
57 | 0 | return ret_vec; |
58 | 0 | } |
59 | | |
60 | | #define CHUNKSIZE chunksize_ssse3 |
61 | 0 | #define CHUNKMEMSET chunkmemset_ssse3 |
62 | | #define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3 |
63 | 0 | #define CHUNKCOPY chunkcopy_ssse3 |
64 | 0 | #define CHUNKUNROLL chunkunroll_ssse3 |
65 | | |
66 | | #include "chunkset_tpl.h" |
67 | | |
68 | | #define INFLATE_FAST inflate_fast_ssse3 |
69 | | |
70 | | #include "inffast_tpl.h" |
71 | | |
72 | | #endif |