/src/libdeflate/lib/x86/matchfinder_impl.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * x86/matchfinder_impl.h - x86 implementations of matchfinder functions |
3 | | * |
4 | | * Copyright 2016 Eric Biggers |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person |
7 | | * obtaining a copy of this software and associated documentation |
8 | | * files (the "Software"), to deal in the Software without |
9 | | * restriction, including without limitation the rights to use, |
10 | | * copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | * copies of the Software, and to permit persons to whom the |
12 | | * Software is furnished to do so, subject to the following |
13 | | * conditions: |
14 | | * |
15 | | * The above copyright notice and this permission notice shall be |
16 | | * included in all copies or substantial portions of the Software. |
17 | | * |
18 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
19 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
20 | | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
21 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
22 | | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
23 | | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
24 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
25 | | * OTHER DEALINGS IN THE SOFTWARE. |
26 | | */ |
27 | | |
28 | | #ifndef LIB_X86_MATCHFINDER_IMPL_H |
29 | | #define LIB_X86_MATCHFINDER_IMPL_H |
30 | | |
31 | | #include "cpu_features.h" |
32 | | |
33 | | #ifdef __AVX2__ |
34 | | static forceinline void |
35 | | matchfinder_init_avx2(mf_pos_t *data, size_t size) |
36 | | { |
37 | | __m256i *p = (__m256i *)data; |
38 | | __m256i v = _mm256_set1_epi16(MATCHFINDER_INITVAL); |
39 | | |
40 | | STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); |
41 | | STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); |
42 | | STATIC_ASSERT(sizeof(mf_pos_t) == 2); |
43 | | |
44 | | do { |
45 | | p[0] = v; |
46 | | p[1] = v; |
47 | | p[2] = v; |
48 | | p[3] = v; |
49 | | p += 4; |
50 | | size -= 4 * sizeof(*p); |
51 | | } while (size != 0); |
52 | | } |
53 | | #define matchfinder_init matchfinder_init_avx2 |
54 | | |
55 | | static forceinline void |
56 | | matchfinder_rebase_avx2(mf_pos_t *data, size_t size) |
57 | | { |
58 | | __m256i *p = (__m256i *)data; |
59 | | __m256i v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE); |
60 | | |
61 | | STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); |
62 | | STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); |
63 | | STATIC_ASSERT(sizeof(mf_pos_t) == 2); |
64 | | |
65 | | do { |
66 | | /* PADDSW: Add Packed Signed Integers With Signed Saturation */ |
67 | | p[0] = _mm256_adds_epi16(p[0], v); |
68 | | p[1] = _mm256_adds_epi16(p[1], v); |
69 | | p[2] = _mm256_adds_epi16(p[2], v); |
70 | | p[3] = _mm256_adds_epi16(p[3], v); |
71 | | p += 4; |
72 | | size -= 4 * sizeof(*p); |
73 | | } while (size != 0); |
74 | | } |
75 | | #define matchfinder_rebase matchfinder_rebase_avx2 |
76 | | |
77 | | #elif HAVE_SSE2_NATIVE |
78 | | static forceinline void |
79 | | matchfinder_init_sse2(mf_pos_t *data, size_t size) |
80 | 0 | { |
81 | 0 | __m128i *p = (__m128i *)data; |
82 | 0 | __m128i v = _mm_set1_epi16(MATCHFINDER_INITVAL); |
83 | |
|
84 | 0 | STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); |
85 | 0 | STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); |
86 | 0 | STATIC_ASSERT(sizeof(mf_pos_t) == 2); |
87 | |
|
88 | 0 | do { |
89 | 0 | p[0] = v; |
90 | 0 | p[1] = v; |
91 | 0 | p[2] = v; |
92 | 0 | p[3] = v; |
93 | 0 | p += 4; |
94 | 0 | size -= 4 * sizeof(*p); |
95 | 0 | } while (size != 0); |
96 | 0 | } |
97 | 0 | #define matchfinder_init matchfinder_init_sse2 |
98 | | |
99 | | static forceinline void |
100 | | matchfinder_rebase_sse2(mf_pos_t *data, size_t size) |
101 | 0 | { |
102 | 0 | __m128i *p = (__m128i *)data; |
103 | 0 | __m128i v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE); |
104 | |
|
105 | 0 | STATIC_ASSERT(MATCHFINDER_MEM_ALIGNMENT % sizeof(*p) == 0); |
106 | 0 | STATIC_ASSERT(MATCHFINDER_SIZE_ALIGNMENT % (4 * sizeof(*p)) == 0); |
107 | 0 | STATIC_ASSERT(sizeof(mf_pos_t) == 2); |
108 | |
|
109 | 0 | do { |
110 | | /* PADDSW: Add Packed Signed Integers With Signed Saturation */ |
111 | 0 | p[0] = _mm_adds_epi16(p[0], v); |
112 | 0 | p[1] = _mm_adds_epi16(p[1], v); |
113 | 0 | p[2] = _mm_adds_epi16(p[2], v); |
114 | 0 | p[3] = _mm_adds_epi16(p[3], v); |
115 | 0 | p += 4; |
116 | 0 | size -= 4 * sizeof(*p); |
117 | 0 | } while (size != 0); |
118 | 0 | } |
119 | 0 | #define matchfinder_rebase matchfinder_rebase_sse2 |
120 | | #endif /* HAVE_SSE2_NATIVE */ |
121 | | |
122 | | #endif /* LIB_X86_MATCHFINDER_IMPL_H */ |