/src/zlib-ng/arch/x86/chunkset_ssse3.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* chunkset_ssse3.c -- SSSE3 inline functions to copy small data chunks. |
2 | | * For conditions of distribution and use, see copyright notice in zlib.h |
3 | | */ |
4 | | |
5 | | #include "zbuild.h" |
6 | | #include "zmemory.h" |
7 | | |
8 | | #if defined(X86_SSSE3) |
9 | | #include <immintrin.h> |
10 | | #include "arch/generic/chunk_128bit_perm_idx_lut.h" |
11 | | |
12 | | typedef __m128i chunk_t; |
13 | | |
14 | | #define HAVE_CHUNKMEMSET_2 |
15 | | #define HAVE_CHUNKMEMSET_4 |
16 | | #define HAVE_CHUNKMEMSET_8 |
17 | | #define HAVE_CHUNK_MAG |
18 | | |
19 | | |
20 | 0 | static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { |
21 | 0 | *chunk = _mm_set1_epi16(zng_memread_2(from)); |
22 | 0 | } |
23 | | |
24 | 0 | static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { |
25 | 0 | *chunk = _mm_set1_epi32(zng_memread_4(from)); |
26 | 0 | } |
27 | | |
28 | 0 | static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { |
29 | 0 | *chunk = _mm_set1_epi64x(zng_memread_8(from)); |
30 | 0 | } |
31 | | |
32 | 0 | static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { |
33 | 0 | *chunk = _mm_loadu_si128((__m128i *)s); |
34 | 0 | } |
35 | | |
36 | 0 | static inline void storechunk(uint8_t *out, chunk_t *chunk) { |
37 | 0 | _mm_storeu_si128((__m128i *)out, *chunk); |
38 | 0 | } |
39 | | |
40 | 0 | static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { |
41 | 0 | lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; |
42 | 0 | __m128i perm_vec, ret_vec; |
43 | | /* Important to note: |
44 | | * This is _not_ to subvert the memory sanitizer but to instead unpoison some |
45 | | * bytes we willingly and purposefully load uninitialized that we swizzle over |
46 | | * in a vector register, anyway. If what we assume is wrong about what is used, |
47 | | * the memory sanitizer will still usefully flag it */ |
48 | 0 | __msan_unpoison(buf + dist, 16 - dist); |
49 | 0 | ret_vec = _mm_loadu_si128((__m128i*)buf); |
50 | 0 | *chunk_rem = lut_rem.remval; |
51 | |
|
52 | 0 | perm_vec = _mm_load_si128((__m128i*)(permute_table + lut_rem.idx)); |
53 | 0 | ret_vec = _mm_shuffle_epi8(ret_vec, perm_vec); |
54 | |
|
55 | 0 | return ret_vec; |
56 | 0 | } |
57 | | |
58 | | #define CHUNKSIZE chunksize_ssse3 |
59 | 0 | #define CHUNKMEMSET chunkmemset_ssse3 |
60 | | #define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3 |
61 | 0 | #define CHUNKCOPY chunkcopy_ssse3 |
62 | 0 | #define CHUNKUNROLL chunkunroll_ssse3 |
63 | | |
64 | | #include "chunkset_tpl.h" |
65 | | |
66 | | #define INFLATE_FAST inflate_fast_ssse3 |
67 | | |
68 | | #include "inffast_tpl.h" |
69 | | |
70 | | #endif |