/src/aom/aom_dsp/simd/v256_intrinsics_v128.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_ |
13 | | #define AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_ |
14 | | |
15 | | #include "config/aom_config.h" |
16 | | |
17 | | #if HAVE_NEON |
18 | | #include "aom_dsp/simd/v128_intrinsics_arm.h" |
19 | | #elif HAVE_SSE2 |
20 | | #include "aom_dsp/simd/v128_intrinsics_x86.h" |
21 | | #else |
22 | | #include "aom_dsp/simd/v128_intrinsics.h" |
23 | | #endif |
24 | | |
25 | | #if HAVE_NEON |
26 | | typedef int64x2x2_t v256; |
27 | | #else |
28 | | typedef struct { |
29 | | v128 val[2]; |
30 | | } v256; |
31 | | #endif |
32 | | |
33 | 0 | SIMD_INLINE uint32_t v256_low_u32(v256 a) { return v128_low_u32(a.val[0]); } Unexecuted instantiation: cdef_block_sse2.c:v256_low_u32 Unexecuted instantiation: cdef_block_ssse3.c:v256_low_u32 Unexecuted instantiation: cdef_block_sse4.c:v256_low_u32 |
34 | | |
35 | 0 | SIMD_INLINE v64 v256_low_v64(v256 a) { return v128_low_v64(a.val[0]); } Unexecuted instantiation: cdef_block_sse2.c:v256_low_v64 Unexecuted instantiation: cdef_block_ssse3.c:v256_low_v64 Unexecuted instantiation: cdef_block_sse4.c:v256_low_v64 |
36 | | |
37 | 0 | SIMD_INLINE uint64_t v256_low_u64(v256 a) { return v64_u64(v256_low_v64(a)); } Unexecuted instantiation: cdef_block_sse2.c:v256_low_u64 Unexecuted instantiation: cdef_block_ssse3.c:v256_low_u64 Unexecuted instantiation: cdef_block_sse4.c:v256_low_u64 |
38 | | |
39 | 0 | SIMD_INLINE v128 v256_low_v128(v256 a) { return a.val[0]; } Unexecuted instantiation: cdef_block_sse2.c:v256_low_v128 Unexecuted instantiation: cdef_block_ssse3.c:v256_low_v128 Unexecuted instantiation: cdef_block_sse4.c:v256_low_v128 |
40 | | |
41 | 0 | SIMD_INLINE v128 v256_high_v128(v256 a) { return a.val[1]; } Unexecuted instantiation: cdef_block_sse2.c:v256_high_v128 Unexecuted instantiation: cdef_block_ssse3.c:v256_high_v128 Unexecuted instantiation: cdef_block_sse4.c:v256_high_v128 |
42 | | |
43 | 0 | SIMD_INLINE v256 v256_from_v128(v128 hi, v128 lo) { |
44 | 0 | v256 t; |
45 | 0 | t.val[1] = hi; |
46 | 0 | t.val[0] = lo; |
47 | 0 | return t; |
48 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_from_v128 Unexecuted instantiation: cdef_block_ssse3.c:v256_from_v128 Unexecuted instantiation: cdef_block_sse4.c:v256_from_v128 |
49 | | |
50 | 0 | SIMD_INLINE v256 v256_from_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) { |
51 | 0 | return v256_from_v128(v128_from_64(a, b), v128_from_64(c, d)); |
52 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_from_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_from_64 Unexecuted instantiation: cdef_block_sse4.c:v256_from_64 |
53 | | |
54 | 0 | SIMD_INLINE v256 v256_from_v64(v64 a, v64 b, v64 c, v64 d) { |
55 | 0 | return v256_from_v128(v128_from_v64(a, b), v128_from_v64(c, d)); |
56 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_from_v64 Unexecuted instantiation: cdef_block_ssse3.c:v256_from_v64 Unexecuted instantiation: cdef_block_sse4.c:v256_from_v64 |
57 | | |
58 | 0 | SIMD_INLINE v256 v256_load_unaligned(const void *p) { |
59 | 0 | return v256_from_v128(v128_load_unaligned((uint8_t *)p + 16), |
60 | 0 | v128_load_unaligned(p)); |
61 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_load_unaligned Unexecuted instantiation: cdef_block_ssse3.c:v256_load_unaligned Unexecuted instantiation: cdef_block_sse4.c:v256_load_unaligned |
62 | | |
63 | 0 | SIMD_INLINE v256 v256_load_aligned(const void *p) { |
64 | 0 | return v256_from_v128(v128_load_aligned((uint8_t *)p + 16), |
65 | 0 | v128_load_aligned(p)); |
66 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_load_aligned Unexecuted instantiation: cdef_block_ssse3.c:v256_load_aligned Unexecuted instantiation: cdef_block_sse4.c:v256_load_aligned |
67 | | |
68 | 0 | SIMD_INLINE void v256_store_unaligned(void *p, v256 a) { |
69 | 0 | v128_store_unaligned(p, a.val[0]); |
70 | 0 | v128_store_unaligned((uint8_t *)p + 16, a.val[1]); |
71 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_store_unaligned Unexecuted instantiation: cdef_block_ssse3.c:v256_store_unaligned Unexecuted instantiation: cdef_block_sse4.c:v256_store_unaligned |
72 | | |
73 | 0 | SIMD_INLINE void v256_store_aligned(void *p, v256 a) { |
74 | 0 | v128_store_aligned(p, a.val[0]); |
75 | 0 | v128_store_aligned((uint8_t *)p + 16, a.val[1]); |
76 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_store_aligned Unexecuted instantiation: cdef_block_ssse3.c:v256_store_aligned Unexecuted instantiation: cdef_block_sse4.c:v256_store_aligned |
77 | | |
78 | 0 | SIMD_INLINE v256 v256_zero(void) { |
79 | 0 | return v256_from_v128(v128_zero(), v128_zero()); |
80 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_zero Unexecuted instantiation: cdef_block_ssse3.c:v256_zero Unexecuted instantiation: cdef_block_sse4.c:v256_zero |
81 | | |
82 | 0 | SIMD_INLINE v256 v256_dup_8(uint8_t x) { |
83 | 0 | v128 t = v128_dup_8(x); |
84 | 0 | return v256_from_v128(t, t); |
85 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_dup_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_dup_8 Unexecuted instantiation: cdef_block_sse4.c:v256_dup_8 |
86 | | |
87 | 0 | SIMD_INLINE v256 v256_dup_16(uint16_t x) { |
88 | 0 | v128 t = v128_dup_16(x); |
89 | 0 | return v256_from_v128(t, t); |
90 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_dup_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_dup_16 Unexecuted instantiation: cdef_block_sse4.c:v256_dup_16 |
91 | | |
92 | 0 | SIMD_INLINE v256 v256_dup_32(uint32_t x) { |
93 | 0 | v128 t = v128_dup_32(x); |
94 | 0 | return v256_from_v128(t, t); |
95 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_dup_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_dup_32 Unexecuted instantiation: cdef_block_sse4.c:v256_dup_32 |
96 | | |
97 | 0 | SIMD_INLINE v256 v256_dup_64(uint64_t x) { |
98 | 0 | v128 t = v128_dup_64(x); |
99 | 0 | return v256_from_v128(t, t); |
100 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_dup_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_dup_64 Unexecuted instantiation: cdef_block_sse4.c:v256_dup_64 |
101 | | |
102 | 0 | SIMD_INLINE int64_t v256_dotp_su8(v256 a, v256 b) { |
103 | 0 | return v128_dotp_su8(a.val[1], b.val[1]) + v128_dotp_su8(a.val[0], b.val[0]); |
104 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_dotp_su8 Unexecuted instantiation: cdef_block_ssse3.c:v256_dotp_su8 Unexecuted instantiation: cdef_block_sse4.c:v256_dotp_su8 |
105 | | |
106 | 0 | SIMD_INLINE int64_t v256_dotp_s16(v256 a, v256 b) { |
107 | 0 | return v128_dotp_s16(a.val[1], b.val[1]) + v128_dotp_s16(a.val[0], b.val[0]); |
108 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_dotp_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_dotp_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_dotp_s16 |
109 | | |
110 | 0 | SIMD_INLINE int64_t v256_dotp_s32(v256 a, v256 b) { |
111 | 0 | return v128_dotp_s32(a.val[1], b.val[1]) + v128_dotp_s32(a.val[0], b.val[0]); |
112 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_dotp_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_dotp_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_dotp_s32 |
113 | | |
114 | 0 | SIMD_INLINE uint64_t v256_hadd_u8(v256 a) { |
115 | 0 | return v128_hadd_u8(a.val[1]) + v128_hadd_u8(a.val[0]); |
116 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_hadd_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_hadd_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_hadd_u8 |
117 | | |
118 | | typedef struct { |
119 | | sad128_internal val[2]; |
120 | | } sad256_internal; |
121 | | |
122 | 0 | SIMD_INLINE sad256_internal v256_sad_u8_init(void) { |
123 | 0 | sad256_internal t; |
124 | 0 | t.val[1] = v128_sad_u8_init(); |
125 | 0 | t.val[0] = v128_sad_u8_init(); |
126 | 0 | return t; |
127 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u8_init Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u8_init Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u8_init |
128 | | |
129 | | /* Implementation dependent return value. Result must be finalised with |
130 | | v256_sad_u8_sum(). |
131 | | The result for more than 16 v256_sad_u8() calls is undefined. */ |
132 | 0 | SIMD_INLINE sad256_internal v256_sad_u8(sad256_internal s, v256 a, v256 b) { |
133 | 0 | sad256_internal t; |
134 | 0 | t.val[1] = v128_sad_u8(s.val[1], a.val[1], b.val[1]); |
135 | 0 | t.val[0] = v128_sad_u8(s.val[0], a.val[0], b.val[0]); |
136 | 0 | return t; |
137 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u8 |
138 | | |
139 | 0 | SIMD_INLINE uint32_t v256_sad_u8_sum(sad256_internal s) { |
140 | 0 | return v128_sad_u8_sum(s.val[1]) + v128_sad_u8_sum(s.val[0]); |
141 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u8_sum Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u8_sum Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u8_sum |
142 | | |
143 | | typedef struct { |
144 | | ssd128_internal val[2]; |
145 | | } ssd256_internal; |
146 | | |
147 | 0 | SIMD_INLINE ssd256_internal v256_ssd_u8_init(void) { |
148 | 0 | ssd256_internal t; |
149 | 0 | t.val[1] = v128_ssd_u8_init(); |
150 | 0 | t.val[0] = v128_ssd_u8_init(); |
151 | 0 | return t; |
152 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_u8_init Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_u8_init Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_u8_init |
153 | | |
154 | | /* Implementation dependent return value. Result must be finalised with |
155 | | * v256_ssd_u8_sum(). */ |
156 | 0 | SIMD_INLINE ssd256_internal v256_ssd_u8(ssd256_internal s, v256 a, v256 b) { |
157 | 0 | ssd256_internal t; |
158 | 0 | t.val[1] = v128_ssd_u8(s.val[1], a.val[1], b.val[1]); |
159 | 0 | t.val[0] = v128_ssd_u8(s.val[0], a.val[0], b.val[0]); |
160 | 0 | return t; |
161 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_u8 |
162 | | |
163 | 0 | SIMD_INLINE uint32_t v256_ssd_u8_sum(ssd256_internal s) { |
164 | 0 | return v128_ssd_u8_sum(s.val[1]) + v128_ssd_u8_sum(s.val[0]); |
165 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_u8_sum Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_u8_sum Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_u8_sum |
166 | | |
167 | 0 | SIMD_INLINE v256 v256_or(v256 a, v256 b) { |
168 | 0 | return v256_from_v128(v128_or(a.val[1], b.val[1]), |
169 | 0 | v128_or(a.val[0], b.val[0])); |
170 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_or Unexecuted instantiation: cdef_block_ssse3.c:v256_or Unexecuted instantiation: cdef_block_sse4.c:v256_or |
171 | | |
172 | 0 | SIMD_INLINE v256 v256_xor(v256 a, v256 b) { |
173 | 0 | return v256_from_v128(v128_xor(a.val[1], b.val[1]), |
174 | 0 | v128_xor(a.val[0], b.val[0])); |
175 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_xor Unexecuted instantiation: cdef_block_ssse3.c:v256_xor Unexecuted instantiation: cdef_block_sse4.c:v256_xor |
176 | | |
177 | 0 | SIMD_INLINE v256 v256_and(v256 a, v256 b) { |
178 | 0 | return v256_from_v128(v128_and(a.val[1], b.val[1]), |
179 | 0 | v128_and(a.val[0], b.val[0])); |
180 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_and Unexecuted instantiation: cdef_block_ssse3.c:v256_and Unexecuted instantiation: cdef_block_sse4.c:v256_and |
181 | | |
182 | 0 | SIMD_INLINE v256 v256_andn(v256 a, v256 b) { |
183 | 0 | return v256_from_v128(v128_andn(a.val[1], b.val[1]), |
184 | 0 | v128_andn(a.val[0], b.val[0])); |
185 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_andn Unexecuted instantiation: cdef_block_ssse3.c:v256_andn Unexecuted instantiation: cdef_block_sse4.c:v256_andn |
186 | | |
187 | 0 | SIMD_INLINE v256 v256_add_8(v256 a, v256 b) { |
188 | 0 | return v256_from_v128(v128_add_8(a.val[1], b.val[1]), |
189 | 0 | v128_add_8(a.val[0], b.val[0])); |
190 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_add_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_add_8 Unexecuted instantiation: cdef_block_sse4.c:v256_add_8 |
191 | | |
192 | 0 | SIMD_INLINE v256 v256_add_16(v256 a, v256 b) { |
193 | 0 | return v256_from_v128(v128_add_16(a.val[1], b.val[1]), |
194 | 0 | v128_add_16(a.val[0], b.val[0])); |
195 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_add_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_add_16 Unexecuted instantiation: cdef_block_sse4.c:v256_add_16 |
196 | | |
197 | 0 | SIMD_INLINE v256 v256_sadd_s8(v256 a, v256 b) { |
198 | 0 | return v256_from_v128(v128_sadd_s8(a.val[1], b.val[1]), |
199 | 0 | v128_sadd_s8(a.val[0], b.val[0])); |
200 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sadd_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_sadd_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_sadd_s8 |
201 | | |
202 | 0 | SIMD_INLINE v256 v256_sadd_u8(v256 a, v256 b) { |
203 | 0 | return v256_from_v128(v128_sadd_u8(a.val[1], b.val[1]), |
204 | 0 | v128_sadd_u8(a.val[0], b.val[0])); |
205 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sadd_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_sadd_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_sadd_u8 |
206 | | |
207 | 0 | SIMD_INLINE v256 v256_sadd_s16(v256 a, v256 b) { |
208 | 0 | return v256_from_v128(v128_sadd_s16(a.val[1], b.val[1]), |
209 | 0 | v128_sadd_s16(a.val[0], b.val[0])); |
210 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sadd_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_sadd_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_sadd_s16 |
211 | | |
212 | 0 | SIMD_INLINE v256 v256_add_32(v256 a, v256 b) { |
213 | 0 | return v256_from_v128(v128_add_32(a.val[1], b.val[1]), |
214 | 0 | v128_add_32(a.val[0], b.val[0])); |
215 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_add_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_add_32 Unexecuted instantiation: cdef_block_sse4.c:v256_add_32 |
216 | | |
217 | 0 | SIMD_INLINE v256 v256_add_64(v256 a, v256 b) { |
218 | 0 | return v256_from_v128(v128_add_64(a.val[1], b.val[1]), |
219 | 0 | v128_add_64(a.val[0], b.val[0])); |
220 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_add_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_add_64 Unexecuted instantiation: cdef_block_sse4.c:v256_add_64 |
221 | | |
222 | 0 | SIMD_INLINE v256 v256_padd_u8(v256 a) { |
223 | 0 | return v256_from_v128(v128_padd_u8(a.val[1]), v128_padd_u8(a.val[0])); |
224 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_padd_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_padd_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_padd_u8 |
225 | | |
226 | 0 | SIMD_INLINE v256 v256_padd_s16(v256 a) { |
227 | 0 | return v256_from_v128(v128_padd_s16(a.val[1]), v128_padd_s16(a.val[0])); |
228 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_padd_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_padd_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_padd_s16 |
229 | | |
230 | 0 | SIMD_INLINE v256 v256_sub_8(v256 a, v256 b) { |
231 | 0 | return v256_from_v128(v128_sub_8(a.val[1], b.val[1]), |
232 | 0 | v128_sub_8(a.val[0], b.val[0])); |
233 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sub_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_sub_8 Unexecuted instantiation: cdef_block_sse4.c:v256_sub_8 |
234 | | |
235 | 0 | SIMD_INLINE v256 v256_ssub_u8(v256 a, v256 b) { |
236 | 0 | return v256_from_v128(v128_ssub_u8(a.val[1], b.val[1]), |
237 | 0 | v128_ssub_u8(a.val[0], b.val[0])); |
238 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssub_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_ssub_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_ssub_u8 |
239 | | |
240 | 0 | SIMD_INLINE v256 v256_ssub_s8(v256 a, v256 b) { |
241 | 0 | return v256_from_v128(v128_ssub_s8(a.val[1], b.val[1]), |
242 | 0 | v128_ssub_s8(a.val[0], b.val[0])); |
243 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssub_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_ssub_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_ssub_s8 |
244 | | |
245 | 0 | SIMD_INLINE v256 v256_sub_16(v256 a, v256 b) { |
246 | 0 | return v256_from_v128(v128_sub_16(a.val[1], b.val[1]), |
247 | 0 | v128_sub_16(a.val[0], b.val[0])); |
248 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sub_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_sub_16 Unexecuted instantiation: cdef_block_sse4.c:v256_sub_16 |
249 | | |
250 | 0 | SIMD_INLINE v256 v256_ssub_s16(v256 a, v256 b) { |
251 | 0 | return v256_from_v128(v128_ssub_s16(a.val[1], b.val[1]), |
252 | 0 | v128_ssub_s16(a.val[0], b.val[0])); |
253 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssub_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_ssub_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_ssub_s16 |
254 | | |
255 | 0 | SIMD_INLINE v256 v256_ssub_u16(v256 a, v256 b) { |
256 | 0 | return v256_from_v128(v128_ssub_u16(a.val[1], b.val[1]), |
257 | 0 | v128_ssub_u16(a.val[0], b.val[0])); |
258 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssub_u16 Unexecuted instantiation: cdef_block_ssse3.c:v256_ssub_u16 Unexecuted instantiation: cdef_block_sse4.c:v256_ssub_u16 |
259 | | |
260 | 0 | SIMD_INLINE v256 v256_sub_32(v256 a, v256 b) { |
261 | 0 | return v256_from_v128(v128_sub_32(a.val[1], b.val[1]), |
262 | 0 | v128_sub_32(a.val[0], b.val[0])); |
263 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sub_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_sub_32 Unexecuted instantiation: cdef_block_sse4.c:v256_sub_32 |
264 | | |
265 | 0 | SIMD_INLINE v256 v256_sub_64(v256 a, v256 b) { |
266 | 0 | return v256_from_v128(v128_sub_64(a.val[1], b.val[1]), |
267 | 0 | v128_sub_64(a.val[0], b.val[0])); |
268 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sub_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_sub_64 Unexecuted instantiation: cdef_block_sse4.c:v256_sub_64 |
269 | | |
270 | 0 | SIMD_INLINE v256 v256_abs_s16(v256 a) { |
271 | 0 | return v256_from_v128(v128_abs_s16(a.val[1]), v128_abs_s16(a.val[0])); |
272 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_abs_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_abs_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_abs_s16 |
273 | | |
274 | 0 | SIMD_INLINE v256 v256_abs_s8(v256 a) { |
275 | 0 | return v256_from_v128(v128_abs_s8(a.val[1]), v128_abs_s8(a.val[0])); |
276 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_abs_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_abs_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_abs_s8 |
277 | | |
278 | 0 | SIMD_INLINE v256 v256_mul_s16(v128 a, v128 b) { |
279 | 0 | v128 lo_bits = v128_mullo_s16(a, b); |
280 | 0 | v128 hi_bits = v128_mulhi_s16(a, b); |
281 | 0 | return v256_from_v128(v128_ziphi_16(hi_bits, lo_bits), |
282 | 0 | v128_ziplo_16(hi_bits, lo_bits)); |
283 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_mul_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_mul_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_mul_s16 |
284 | | |
285 | 0 | SIMD_INLINE v256 v256_mullo_s16(v256 a, v256 b) { |
286 | 0 | return v256_from_v128(v128_mullo_s16(a.val[1], b.val[1]), |
287 | 0 | v128_mullo_s16(a.val[0], b.val[0])); |
288 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_mullo_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_mullo_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_mullo_s16 |
289 | | |
290 | 0 | SIMD_INLINE v256 v256_mulhi_s16(v256 a, v256 b) { |
291 | 0 | return v256_from_v128(v128_mulhi_s16(a.val[1], b.val[1]), |
292 | 0 | v128_mulhi_s16(a.val[0], b.val[0])); |
293 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_mulhi_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_mulhi_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_mulhi_s16 |
294 | | |
295 | 0 | SIMD_INLINE v256 v256_mullo_s32(v256 a, v256 b) { |
296 | 0 | return v256_from_v128(v128_mullo_s32(a.val[1], b.val[1]), |
297 | 0 | v128_mullo_s32(a.val[0], b.val[0])); |
298 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_mullo_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_mullo_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_mullo_s32 |
299 | | |
300 | 0 | SIMD_INLINE v256 v256_madd_s16(v256 a, v256 b) { |
301 | 0 | return v256_from_v128(v128_madd_s16(a.val[1], b.val[1]), |
302 | 0 | v128_madd_s16(a.val[0], b.val[0])); |
303 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_madd_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_madd_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_madd_s16 |
304 | | |
305 | 0 | SIMD_INLINE v256 v256_madd_us8(v256 a, v256 b) { |
306 | 0 | return v256_from_v128(v128_madd_us8(a.val[1], b.val[1]), |
307 | 0 | v128_madd_us8(a.val[0], b.val[0])); |
308 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_madd_us8 Unexecuted instantiation: cdef_block_ssse3.c:v256_madd_us8 Unexecuted instantiation: cdef_block_sse4.c:v256_madd_us8 |
309 | | |
310 | 0 | SIMD_INLINE v256 v256_avg_u8(v256 a, v256 b) { |
311 | 0 | return v256_from_v128(v128_avg_u8(a.val[1], b.val[1]), |
312 | 0 | v128_avg_u8(a.val[0], b.val[0])); |
313 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_avg_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_avg_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_avg_u8 |
314 | | |
315 | 0 | SIMD_INLINE v256 v256_rdavg_u8(v256 a, v256 b) { |
316 | 0 | return v256_from_v128(v128_rdavg_u8(a.val[1], b.val[1]), |
317 | 0 | v128_rdavg_u8(a.val[0], b.val[0])); |
318 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_rdavg_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_rdavg_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_rdavg_u8 |
319 | | |
320 | 0 | SIMD_INLINE v256 v256_rdavg_u16(v256 a, v256 b) { |
321 | 0 | return v256_from_v128(v128_rdavg_u16(a.val[1], b.val[1]), |
322 | 0 | v128_rdavg_u16(a.val[0], b.val[0])); |
323 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_rdavg_u16 Unexecuted instantiation: cdef_block_ssse3.c:v256_rdavg_u16 Unexecuted instantiation: cdef_block_sse4.c:v256_rdavg_u16 |
324 | | |
325 | 0 | SIMD_INLINE v256 v256_avg_u16(v256 a, v256 b) { |
326 | 0 | return v256_from_v128(v128_avg_u16(a.val[1], b.val[1]), |
327 | 0 | v128_avg_u16(a.val[0], b.val[0])); |
328 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_avg_u16 Unexecuted instantiation: cdef_block_ssse3.c:v256_avg_u16 Unexecuted instantiation: cdef_block_sse4.c:v256_avg_u16 |
329 | | |
330 | 0 | SIMD_INLINE v256 v256_min_u8(v256 a, v256 b) { |
331 | 0 | return v256_from_v128(v128_min_u8(a.val[1], b.val[1]), |
332 | 0 | v128_min_u8(a.val[0], b.val[0])); |
333 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_min_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_min_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_min_u8 |
334 | | |
335 | 0 | SIMD_INLINE v256 v256_max_u8(v256 a, v256 b) { |
336 | 0 | return v256_from_v128(v128_max_u8(a.val[1], b.val[1]), |
337 | 0 | v128_max_u8(a.val[0], b.val[0])); |
338 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_max_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_max_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_max_u8 |
339 | | |
340 | 0 | SIMD_INLINE v256 v256_min_s8(v256 a, v256 b) { |
341 | 0 | return v256_from_v128(v128_min_s8(a.val[1], b.val[1]), |
342 | 0 | v128_min_s8(a.val[0], b.val[0])); |
343 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_min_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_min_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_min_s8 |
344 | | |
345 | 0 | SIMD_INLINE uint32_t v256_movemask_8(v256 a) { |
346 | 0 | return (v128_movemask_8(v256_high_v128(a)) << 16) | |
347 | 0 | v128_movemask_8(v256_low_v128(a)); |
348 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_movemask_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_movemask_8 Unexecuted instantiation: cdef_block_sse4.c:v256_movemask_8 |
349 | | |
350 | 0 | SIMD_INLINE v256 v256_blend_8(v256 a, v256 b, v256 c) { |
351 | 0 | return v256_from_v128(v128_blend_8(a.val[1], b.val[1], c.val[1]), |
352 | 0 | v128_blend_8(a.val[0], b.val[0], c.val[0])); |
353 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_blend_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_blend_8 Unexecuted instantiation: cdef_block_sse4.c:v256_blend_8 |
354 | | |
355 | 0 | SIMD_INLINE v256 v256_max_s8(v256 a, v256 b) { |
356 | 0 | return v256_from_v128(v128_max_s8(a.val[1], b.val[1]), |
357 | 0 | v128_max_s8(a.val[0], b.val[0])); |
358 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_max_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_max_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_max_s8 |
359 | | |
360 | 0 | SIMD_INLINE v256 v256_min_s16(v256 a, v256 b) { |
361 | 0 | return v256_from_v128(v128_min_s16(a.val[1], b.val[1]), |
362 | 0 | v128_min_s16(a.val[0], b.val[0])); |
363 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_min_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_min_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_min_s16 |
364 | | |
365 | 0 | SIMD_INLINE v256 v256_max_s16(v256 a, v256 b) { |
366 | 0 | return v256_from_v128(v128_max_s16(a.val[1], b.val[1]), |
367 | 0 | v128_max_s16(a.val[0], b.val[0])); |
368 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_max_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_max_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_max_s16 |
369 | | |
370 | 0 | SIMD_INLINE v256 v256_min_s32(v256 a, v256 b) { |
371 | 0 | return v256_from_v128(v128_min_s32(a.val[1], b.val[1]), |
372 | 0 | v128_min_s32(a.val[0], b.val[0])); |
373 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_min_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_min_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_min_s32 |
374 | | |
375 | 0 | SIMD_INLINE v256 v256_max_s32(v256 a, v256 b) { |
376 | 0 | return v256_from_v128(v128_max_s32(a.val[1], b.val[1]), |
377 | 0 | v128_max_s32(a.val[0], b.val[0])); |
378 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_max_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_max_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_max_s32 |
379 | | |
380 | 0 | SIMD_INLINE v256 v256_ziplo_8(v256 a, v256 b) { |
381 | 0 | return v256_from_v128(v128_ziphi_8(a.val[0], b.val[0]), |
382 | 0 | v128_ziplo_8(a.val[0], b.val[0])); |
383 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_8 Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_8 |
384 | | |
385 | 0 | SIMD_INLINE v256 v256_ziphi_8(v256 a, v256 b) { |
386 | 0 | return v256_from_v128(v128_ziphi_8(a.val[1], b.val[1]), |
387 | 0 | v128_ziplo_8(a.val[1], b.val[1])); |
388 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_8 Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_8 |
389 | | |
390 | 0 | SIMD_INLINE v256 v256_ziplo_16(v256 a, v256 b) { |
391 | 0 | return v256_from_v128(v128_ziphi_16(a.val[0], b.val[0]), |
392 | 0 | v128_ziplo_16(a.val[0], b.val[0])); |
393 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_16 Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_16 |
394 | | |
395 | 0 | SIMD_INLINE v256 v256_ziphi_16(v256 a, v256 b) { |
396 | 0 | return v256_from_v128(v128_ziphi_16(a.val[1], b.val[1]), |
397 | 0 | v128_ziplo_16(a.val[1], b.val[1])); |
398 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_16 Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_16 |
399 | | |
400 | 0 | SIMD_INLINE v256 v256_ziplo_32(v256 a, v256 b) { |
401 | 0 | return v256_from_v128(v128_ziphi_32(a.val[0], b.val[0]), |
402 | 0 | v128_ziplo_32(a.val[0], b.val[0])); |
403 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_32 Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_32 |
404 | | |
405 | 0 | SIMD_INLINE v256 v256_ziphi_32(v256 a, v256 b) { |
406 | 0 | return v256_from_v128(v128_ziphi_32(a.val[1], b.val[1]), |
407 | 0 | v128_ziplo_32(a.val[1], b.val[1])); |
408 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_32 Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_32 |
409 | | |
410 | 0 | SIMD_INLINE v256 v256_ziplo_64(v256 a, v256 b) { |
411 | 0 | return v256_from_v128(v128_ziphi_64(a.val[0], b.val[0]), |
412 | 0 | v128_ziplo_64(a.val[0], b.val[0])); |
413 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_64 Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_64 |
414 | | |
415 | 0 | SIMD_INLINE v256 v256_ziphi_64(v256 a, v256 b) { |
416 | 0 | return v256_from_v128(v128_ziphi_64(a.val[1], b.val[1]), |
417 | 0 | v128_ziplo_64(a.val[1], b.val[1])); |
418 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_64 Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_64 |
419 | | |
420 | 0 | SIMD_INLINE v256 v256_ziplo_128(v256 a, v256 b) { |
421 | 0 | return v256_from_v128(a.val[0], b.val[0]); |
422 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_128 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_128 Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_128 |
423 | | |
424 | 0 | SIMD_INLINE v256 v256_ziphi_128(v256 a, v256 b) { |
425 | 0 | return v256_from_v128(a.val[1], b.val[1]); |
426 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_128 Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_128 Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_128 |
427 | | |
428 | 0 | SIMD_INLINE v256 v256_zip_8(v128 a, v128 b) { |
429 | 0 | return v256_from_v128(v128_ziphi_8(a, b), v128_ziplo_8(a, b)); |
430 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_zip_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_zip_8 Unexecuted instantiation: cdef_block_sse4.c:v256_zip_8 |
431 | | |
432 | 0 | SIMD_INLINE v256 v256_zip_16(v128 a, v128 b) { |
433 | 0 | return v256_from_v128(v128_ziphi_16(a, b), v128_ziplo_16(a, b)); |
434 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_zip_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_zip_16 Unexecuted instantiation: cdef_block_sse4.c:v256_zip_16 |
435 | | |
436 | 0 | SIMD_INLINE v256 v256_zip_32(v128 a, v128 b) { |
437 | 0 | return v256_from_v128(v128_ziphi_32(a, b), v128_ziplo_32(a, b)); |
438 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_zip_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_zip_32 Unexecuted instantiation: cdef_block_sse4.c:v256_zip_32 |
439 | | |
440 | 0 | SIMD_INLINE v256 v256_unziplo_8(v256 a, v256 b) { |
441 | 0 | return v256_from_v128(v128_unziplo_8(a.val[1], a.val[0]), |
442 | 0 | v128_unziplo_8(b.val[1], b.val[0])); |
443 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unziplo_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_unziplo_8 Unexecuted instantiation: cdef_block_sse4.c:v256_unziplo_8 |
444 | | |
445 | 0 | SIMD_INLINE v256 v256_unziphi_8(v256 a, v256 b) { |
446 | 0 | return v256_from_v128(v128_unziphi_8(a.val[1], a.val[0]), |
447 | 0 | v128_unziphi_8(b.val[1], b.val[0])); |
448 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unziphi_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_unziphi_8 Unexecuted instantiation: cdef_block_sse4.c:v256_unziphi_8 |
449 | | |
450 | 0 | SIMD_INLINE v256 v256_unziplo_16(v256 a, v256 b) { |
451 | 0 | return v256_from_v128(v128_unziplo_16(a.val[1], a.val[0]), |
452 | 0 | v128_unziplo_16(b.val[1], b.val[0])); |
453 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unziplo_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_unziplo_16 Unexecuted instantiation: cdef_block_sse4.c:v256_unziplo_16 |
454 | | |
455 | 0 | SIMD_INLINE v256 v256_unziphi_16(v256 a, v256 b) { |
456 | 0 | return v256_from_v128(v128_unziphi_16(a.val[1], a.val[0]), |
457 | 0 | v128_unziphi_16(b.val[1], b.val[0])); |
458 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unziphi_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_unziphi_16 Unexecuted instantiation: cdef_block_sse4.c:v256_unziphi_16 |
459 | | |
460 | 0 | SIMD_INLINE v256 v256_unziplo_32(v256 a, v256 b) { |
461 | 0 | return v256_from_v128(v128_unziplo_32(a.val[1], a.val[0]), |
462 | 0 | v128_unziplo_32(b.val[1], b.val[0])); |
463 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unziplo_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_unziplo_32 Unexecuted instantiation: cdef_block_sse4.c:v256_unziplo_32 |
464 | | |
465 | 0 | SIMD_INLINE v256 v256_unziphi_32(v256 a, v256 b) { |
466 | 0 | return v256_from_v128(v128_unziphi_32(a.val[1], a.val[0]), |
467 | 0 | v128_unziphi_32(b.val[1], b.val[0])); |
468 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unziphi_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_unziphi_32 Unexecuted instantiation: cdef_block_sse4.c:v256_unziphi_32 |
469 | | |
470 | 0 | SIMD_INLINE v256 v256_unziplo_64(v256 a, v256 b) { |
471 | 0 | #if HAVE_SSE2 |
472 | 0 | return v256_from_v128( |
473 | 0 | _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a.val[0]), |
474 | 0 | _mm_castsi128_pd(a.val[1]), 0)), |
475 | 0 | _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(b.val[0]), |
476 | 0 | _mm_castsi128_pd(b.val[1]), 0))); |
477 | 0 | #else |
478 | 0 | return v256_from_v64(v128_low_v64(a.val[1]), v128_low_v64(a.val[0]), |
479 | 0 | v128_low_v64(b.val[1]), v128_low_v64(b.val[0])); |
480 | 0 | #endif |
481 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unziplo_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_unziplo_64 Unexecuted instantiation: cdef_block_sse4.c:v256_unziplo_64 |
482 | | |
483 | 0 | SIMD_INLINE v256 v256_unziphi_64(v256 a, v256 b) { |
484 | 0 | #if HAVE_SSE2 |
485 | 0 | return v256_from_v128( |
486 | 0 | _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a.val[0]), |
487 | 0 | _mm_castsi128_pd(a.val[1]), 3)), |
488 | 0 | _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(b.val[0]), |
489 | 0 | _mm_castsi128_pd(b.val[1]), 3))); |
490 | 0 | #else |
491 | 0 | return v256_from_v64(v128_high_v64(a.val[1]), v128_high_v64(a.val[0]), |
492 | 0 | v128_high_v64(b.val[1]), v128_high_v64(b.val[0])); |
493 | 0 | #endif |
494 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unziphi_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_unziphi_64 Unexecuted instantiation: cdef_block_sse4.c:v256_unziphi_64 |
495 | | |
496 | 0 | SIMD_INLINE v256 v256_unpack_u8_s16(v128 a) { |
497 | 0 | return v256_from_v128(v128_unpackhi_u8_s16(a), v128_unpacklo_u8_s16(a)); |
498 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpack_u8_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpack_u8_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_unpack_u8_s16 |
499 | | |
500 | 0 | SIMD_INLINE v256 v256_unpacklo_u8_s16(v256 a) { |
501 | 0 | return v256_from_v128(v128_unpackhi_u8_s16(a.val[0]), |
502 | 0 | v128_unpacklo_u8_s16(a.val[0])); |
503 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpacklo_u8_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpacklo_u8_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_unpacklo_u8_s16 |
504 | | |
505 | 0 | SIMD_INLINE v256 v256_unpackhi_u8_s16(v256 a) { |
506 | 0 | return v256_from_v128(v128_unpackhi_u8_s16(a.val[1]), |
507 | 0 | v128_unpacklo_u8_s16(a.val[1])); |
508 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpackhi_u8_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpackhi_u8_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_unpackhi_u8_s16 |
509 | | |
510 | 0 | SIMD_INLINE v256 v256_unpack_s8_s16(v128 a) { |
511 | 0 | return v256_from_v128(v128_unpackhi_s8_s16(a), v128_unpacklo_s8_s16(a)); |
512 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpack_s8_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpack_s8_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_unpack_s8_s16 |
513 | | |
514 | 0 | SIMD_INLINE v256 v256_unpacklo_s8_s16(v256 a) { |
515 | 0 | return v256_from_v128(v128_unpackhi_s8_s16(a.val[0]), |
516 | 0 | v128_unpacklo_s8_s16(a.val[0])); |
517 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpacklo_s8_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpacklo_s8_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_unpacklo_s8_s16 |
518 | | |
519 | 0 | SIMD_INLINE v256 v256_unpackhi_s8_s16(v256 a) { |
520 | 0 | return v256_from_v128(v128_unpackhi_s8_s16(a.val[1]), |
521 | 0 | v128_unpacklo_s8_s16(a.val[1])); |
522 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpackhi_s8_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpackhi_s8_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_unpackhi_s8_s16 |
523 | | |
524 | 0 | SIMD_INLINE v256 v256_pack_s32_s16(v256 a, v256 b) { |
525 | 0 | return v256_from_v128(v128_pack_s32_s16(a.val[1], a.val[0]), |
526 | 0 | v128_pack_s32_s16(b.val[1], b.val[0])); |
527 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_pack_s32_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_pack_s32_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_pack_s32_s16 |
528 | | |
529 | 0 | SIMD_INLINE v256 v256_pack_s32_u16(v256 a, v256 b) { |
530 | 0 | return v256_from_v128(v128_pack_s32_u16(a.val[1], a.val[0]), |
531 | 0 | v128_pack_s32_u16(b.val[1], b.val[0])); |
532 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_pack_s32_u16 Unexecuted instantiation: cdef_block_ssse3.c:v256_pack_s32_u16 Unexecuted instantiation: cdef_block_sse4.c:v256_pack_s32_u16 |
533 | | |
534 | 0 | SIMD_INLINE v256 v256_pack_s16_u8(v256 a, v256 b) { |
535 | 0 | return v256_from_v128(v128_pack_s16_u8(a.val[1], a.val[0]), |
536 | 0 | v128_pack_s16_u8(b.val[1], b.val[0])); |
537 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_pack_s16_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_pack_s16_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_pack_s16_u8 |
538 | | |
539 | 0 | SIMD_INLINE v256 v256_pack_s16_s8(v256 a, v256 b) { |
540 | 0 | return v256_from_v128(v128_pack_s16_s8(a.val[1], a.val[0]), |
541 | 0 | v128_pack_s16_s8(b.val[1], b.val[0])); |
542 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_pack_s16_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_pack_s16_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_pack_s16_s8 |
543 | | |
544 | 0 | SIMD_INLINE v256 v256_unpack_u16_s32(v128 a) { |
545 | 0 | return v256_from_v128(v128_unpackhi_u16_s32(a), v128_unpacklo_u16_s32(a)); |
546 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpack_u16_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpack_u16_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_unpack_u16_s32 |
547 | | |
548 | 0 | SIMD_INLINE v256 v256_unpack_s16_s32(v128 a) { |
549 | 0 | return v256_from_v128(v128_unpackhi_s16_s32(a), v128_unpacklo_s16_s32(a)); |
550 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpack_s16_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpack_s16_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_unpack_s16_s32 |
551 | | |
552 | 0 | SIMD_INLINE v256 v256_unpacklo_u16_s32(v256 a) { |
553 | 0 | return v256_from_v128(v128_unpackhi_u16_s32(a.val[0]), |
554 | 0 | v128_unpacklo_u16_s32(a.val[0])); |
555 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpacklo_u16_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpacklo_u16_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_unpacklo_u16_s32 |
556 | | |
557 | 0 | SIMD_INLINE v256 v256_unpacklo_s16_s32(v256 a) { |
558 | 0 | return v256_from_v128(v128_unpackhi_s16_s32(a.val[0]), |
559 | 0 | v128_unpacklo_s16_s32(a.val[0])); |
560 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpacklo_s16_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpacklo_s16_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_unpacklo_s16_s32 |
561 | | |
562 | 0 | SIMD_INLINE v256 v256_unpackhi_u16_s32(v256 a) { |
563 | 0 | return v256_from_v128(v128_unpackhi_u16_s32(a.val[1]), |
564 | 0 | v128_unpacklo_u16_s32(a.val[1])); |
565 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpackhi_u16_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpackhi_u16_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_unpackhi_u16_s32 |
566 | | |
567 | 0 | SIMD_INLINE v256 v256_unpackhi_s16_s32(v256 a) { |
568 | 0 | return v256_from_v128(v128_unpackhi_s16_s32(a.val[1]), |
569 | 0 | v128_unpacklo_s16_s32(a.val[1])); |
570 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_unpackhi_s16_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_unpackhi_s16_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_unpackhi_s16_s32 |
571 | | |
572 | 0 | SIMD_INLINE v256 v256_cmpgt_s8(v256 a, v256 b) { |
573 | 0 | return v256_from_v128(v128_cmpgt_s8(a.val[1], b.val[1]), |
574 | 0 | v128_cmpgt_s8(a.val[0], b.val[0])); |
575 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmpgt_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpgt_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_cmpgt_s8 |
576 | | |
577 | 0 | SIMD_INLINE v256 v256_cmplt_s8(v256 a, v256 b) { |
578 | 0 | return v256_from_v128(v128_cmplt_s8(a.val[1], b.val[1]), |
579 | 0 | v128_cmplt_s8(a.val[0], b.val[0])); |
580 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmplt_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmplt_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_cmplt_s8 |
581 | | |
582 | 0 | SIMD_INLINE v256 v256_cmpeq_8(v256 a, v256 b) { |
583 | 0 | return v256_from_v128(v128_cmpeq_8(a.val[1], b.val[1]), |
584 | 0 | v128_cmpeq_8(a.val[0], b.val[0])); |
585 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmpeq_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpeq_8 Unexecuted instantiation: cdef_block_sse4.c:v256_cmpeq_8 |
586 | | |
587 | 0 | SIMD_INLINE v256 v256_cmpgt_s16(v256 a, v256 b) { |
588 | 0 | return v256_from_v128(v128_cmpgt_s16(a.val[1], b.val[1]), |
589 | 0 | v128_cmpgt_s16(a.val[0], b.val[0])); |
590 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmpgt_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpgt_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_cmpgt_s16 |
591 | | |
592 | 0 | SIMD_INLINE v256 v256_cmplt_s16(v256 a, v256 b) { |
593 | 0 | return v256_from_v128(v128_cmplt_s16(a.val[1], b.val[1]), |
594 | 0 | v128_cmplt_s16(a.val[0], b.val[0])); |
595 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmplt_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmplt_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_cmplt_s16 |
596 | | |
597 | 0 | SIMD_INLINE v256 v256_cmpeq_16(v256 a, v256 b) { |
598 | 0 | return v256_from_v128(v128_cmpeq_16(a.val[1], b.val[1]), |
599 | 0 | v128_cmpeq_16(a.val[0], b.val[0])); |
600 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmpeq_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpeq_16 Unexecuted instantiation: cdef_block_sse4.c:v256_cmpeq_16 |
601 | | |
602 | 0 | SIMD_INLINE v256 v256_cmpgt_s32(v256 a, v256 b) { |
603 | 0 | return v256_from_v128(v128_cmpgt_s32(a.val[1], b.val[1]), |
604 | 0 | v128_cmpgt_s32(a.val[0], b.val[0])); |
605 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmpgt_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpgt_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_cmpgt_s32 |
606 | | |
607 | 0 | SIMD_INLINE v256 v256_cmplt_s32(v256 a, v256 b) { |
608 | 0 | return v256_from_v128(v128_cmplt_s32(a.val[1], b.val[1]), |
609 | 0 | v128_cmplt_s32(a.val[0], b.val[0])); |
610 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmplt_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmplt_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_cmplt_s32 |
611 | | |
612 | 0 | SIMD_INLINE v256 v256_cmpeq_32(v256 a, v256 b) { |
613 | 0 | return v256_from_v128(v128_cmpeq_32(a.val[1], b.val[1]), |
614 | 0 | v128_cmpeq_32(a.val[0], b.val[0])); |
615 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_cmpeq_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpeq_32 Unexecuted instantiation: cdef_block_sse4.c:v256_cmpeq_32 |
616 | | |
617 | 0 | SIMD_INLINE v256 v256_shuffle_8(v256 x, v256 pattern) { |
618 | 0 | #if HAVE_NEON |
619 | 0 | #if AOM_ARCH_AARCH64 |
620 | 0 | uint8x16x2_t p = { { vreinterpretq_u8_s64(x.val[0]), |
621 | 0 | vreinterpretq_u8_s64(x.val[1]) } }; |
622 | 0 | return v256_from_v128( |
623 | 0 | vreinterpretq_s64_u8(vqtbl2q_u8(p, vreinterpretq_u8_s64(pattern.val[1]))), |
624 | 0 | vreinterpretq_s64_u8( |
625 | 0 | vqtbl2q_u8(p, vreinterpretq_u8_s64(pattern.val[0])))); |
626 | 0 | #else |
627 | 0 | uint8x8x4_t p = { { vget_low_u8(vreinterpretq_u8_s64(x.val[0])), |
628 | 0 | vget_high_u8(vreinterpretq_u8_s64(x.val[0])), |
629 | 0 | vget_low_u8(vreinterpretq_u8_s64(x.val[1])), |
630 | 0 | vget_high_u8(vreinterpretq_u8_s64(x.val[1])) } }; |
631 | 0 | uint8x8_t shuffle1_hi = |
632 | 0 | vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(pattern.val[1]))); |
633 | 0 | uint8x8_t shuffle1_lo = |
634 | 0 | vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(pattern.val[1]))); |
635 | 0 | uint8x8_t shuffle0_hi = |
636 | 0 | vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(pattern.val[0]))); |
637 | 0 | uint8x8_t shuffle0_lo = |
638 | 0 | vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(pattern.val[0]))); |
639 | 0 | return v256_from_64(vget_lane_u64(vreinterpret_u64_u8(shuffle1_hi), 0), |
640 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle1_lo), 0), |
641 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle0_hi), 0), |
642 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle0_lo), 0)); |
643 | 0 | #endif |
644 | 0 | #else |
645 | 0 | v128 c16 = v128_dup_8(16); |
646 | 0 | v128 maskhi = v128_cmplt_s8(pattern.val[1], c16); |
647 | 0 | v128 masklo = v128_cmplt_s8(pattern.val[0], c16); |
648 | 0 | return v256_from_v128( |
649 | 0 | v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[1], c16)), |
650 | 0 | v128_shuffle_8(x.val[0], pattern.val[1]), maskhi), |
651 | 0 | v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[0], c16)), |
652 | 0 | v128_shuffle_8(x.val[0], pattern.val[0]), masklo)); |
653 | 0 | #endif |
654 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shuffle_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_shuffle_8 Unexecuted instantiation: cdef_block_sse4.c:v256_shuffle_8 |
655 | | |
656 | 0 | SIMD_INLINE v256 v256_wideshuffle_8(v256 x, v256 y, v256 pattern) { |
657 | 0 | #if HAVE_NEON |
658 | 0 | #if AOM_ARCH_AARCH64 |
659 | 0 | uint8x16x4_t p = { { |
660 | 0 | vreinterpretq_u8_s64(y.val[0]), |
661 | 0 | vreinterpretq_u8_s64(y.val[1]), |
662 | 0 | vreinterpretq_u8_s64(x.val[0]), |
663 | 0 | vreinterpretq_u8_s64(x.val[1]), |
664 | 0 | } }; |
665 | 0 | return v256_from_v128( |
666 | 0 | vreinterpretq_s64_u8(vqtbl4q_u8(p, vreinterpretq_u8_s64(pattern.val[1]))), |
667 | 0 | vreinterpretq_s64_u8( |
668 | 0 | vqtbl4q_u8(p, vreinterpretq_u8_s64(pattern.val[0])))); |
669 | 0 | #else |
670 | 0 | v256 c32 = v256_dup_8(32); |
671 | 0 | v256 p32 = v256_sub_8(pattern, c32); |
672 | 0 | uint8x8x4_t p = { { vget_low_u8(vreinterpretq_u8_s64(x.val[0])), |
673 | 0 | vget_high_u8(vreinterpretq_u8_s64(x.val[0])), |
674 | 0 | vget_low_u8(vreinterpretq_u8_s64(x.val[1])), |
675 | 0 | vget_high_u8(vreinterpretq_u8_s64(x.val[1])) } }; |
676 | 0 | uint8x8x4_t q = { { vget_low_u8(vreinterpretq_u8_s64(y.val[0])), |
677 | 0 | vget_high_u8(vreinterpretq_u8_s64(y.val[0])), |
678 | 0 | vget_low_u8(vreinterpretq_u8_s64(y.val[1])), |
679 | 0 | vget_high_u8(vreinterpretq_u8_s64(y.val[1])) } }; |
680 | 0 | uint8x8_t shuffle1_hi = |
681 | 0 | vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(p32.val[1]))); |
682 | 0 | uint8x8_t shuffle1_lo = |
683 | 0 | vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(p32.val[1]))); |
684 | 0 | uint8x8_t shuffle0_hi = |
685 | 0 | vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(p32.val[0]))); |
686 | 0 | uint8x8_t shuffle0_lo = |
687 | 0 | vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(p32.val[0]))); |
688 | 0 | v256 r1 = v256_from_64(vget_lane_u64(vreinterpret_u64_u8(shuffle1_hi), 0), |
689 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle1_lo), 0), |
690 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle0_hi), 0), |
691 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle0_lo), 0)); |
692 | 0 | shuffle1_hi = vtbl4_u8(q, vreinterpret_u8_s64(vget_high_s64(pattern.val[1]))); |
693 | 0 | shuffle1_lo = vtbl4_u8(q, vreinterpret_u8_s64(vget_low_s64(pattern.val[1]))); |
694 | 0 | shuffle0_hi = vtbl4_u8(q, vreinterpret_u8_s64(vget_high_s64(pattern.val[0]))); |
695 | 0 | shuffle0_lo = vtbl4_u8(q, vreinterpret_u8_s64(vget_low_s64(pattern.val[0]))); |
696 | 0 | v256 r2 = v256_from_64(vget_lane_u64(vreinterpret_u64_u8(shuffle1_hi), 0), |
697 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle1_lo), 0), |
698 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle0_hi), 0), |
699 | 0 | vget_lane_u64(vreinterpret_u64_u8(shuffle0_lo), 0)); |
700 | 0 | return v256_blend_8(r1, r2, v256_cmplt_s8(pattern, c32)); |
701 | 0 | #endif |
702 | 0 | #else |
703 | 0 | v128 c16 = v128_dup_8(16); |
704 | 0 | v128 c32 = v128_dup_8(32); |
705 | 0 | v128 c48 = v128_dup_8(48); |
706 | 0 | v128 maskhi16 = v128_cmpgt_s8(c16, pattern.val[1]); |
707 | 0 | v128 masklo16 = v128_cmpgt_s8(c16, pattern.val[0]); |
708 | 0 | v128 maskhi48 = v128_cmpgt_s8(c48, pattern.val[1]); |
709 | 0 | v128 masklo48 = v128_cmpgt_s8(c48, pattern.val[0]); |
710 | 0 | v256 r1 = v256_from_v128( |
711 | 0 | v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[1], c48)), |
712 | 0 | v128_shuffle_8(x.val[0], v128_sub_8(pattern.val[1], c32)), |
713 | 0 | maskhi48), |
714 | 0 | v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[0], c48)), |
715 | 0 | v128_shuffle_8(x.val[0], v128_sub_8(pattern.val[0], c32)), |
716 | 0 | masklo48)); |
717 | 0 | v256 r2 = v256_from_v128( |
718 | 0 | v128_blend_8(v128_shuffle_8(y.val[1], v128_sub_8(pattern.val[1], c16)), |
719 | 0 | v128_shuffle_8(y.val[0], pattern.val[1]), maskhi16), |
720 | 0 | v128_blend_8(v128_shuffle_8(y.val[1], v128_sub_8(pattern.val[0], c16)), |
721 | 0 | v128_shuffle_8(y.val[0], pattern.val[0]), masklo16)); |
722 | 0 | return v256_blend_8(r1, r2, v256_cmpgt_s8(v256_from_v128(c32, c32), pattern)); |
723 | 0 | #endif |
724 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_wideshuffle_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_wideshuffle_8 Unexecuted instantiation: cdef_block_sse4.c:v256_wideshuffle_8 |
725 | | |
726 | 0 | SIMD_INLINE v256 v256_pshuffle_8(v256 a, v256 pattern) { |
727 | 0 | return v256_from_v128( |
728 | 0 | v128_shuffle_8(v256_high_v128(a), v256_high_v128(pattern)), |
729 | 0 | v128_shuffle_8(v256_low_v128(a), v256_low_v128(pattern))); |
730 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_pshuffle_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_pshuffle_8 Unexecuted instantiation: cdef_block_sse4.c:v256_pshuffle_8 |
731 | | |
732 | 0 | SIMD_INLINE v256 v256_shl_8(v256 a, const unsigned int c) { |
733 | 0 | return v256_from_v128(v128_shl_8(a.val[1], c), v128_shl_8(a.val[0], c)); |
734 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shl_8 Unexecuted instantiation: cdef_block_ssse3.c:v256_shl_8 Unexecuted instantiation: cdef_block_sse4.c:v256_shl_8 |
735 | | |
736 | 0 | SIMD_INLINE v256 v256_shr_u8(v256 a, const unsigned int c) { |
737 | 0 | return v256_from_v128(v128_shr_u8(a.val[1], c), v128_shr_u8(a.val[0], c)); |
738 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shr_u8 Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_u8 Unexecuted instantiation: cdef_block_sse4.c:v256_shr_u8 |
739 | | |
740 | 0 | SIMD_INLINE v256 v256_shr_s8(v256 a, const unsigned int c) { |
741 | 0 | return v256_from_v128(v128_shr_s8(a.val[1], c), v128_shr_s8(a.val[0], c)); |
742 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shr_s8 Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_s8 Unexecuted instantiation: cdef_block_sse4.c:v256_shr_s8 |
743 | | |
744 | 0 | SIMD_INLINE v256 v256_shl_16(v256 a, const unsigned int c) { |
745 | 0 | return v256_from_v128(v128_shl_16(a.val[1], c), v128_shl_16(a.val[0], c)); |
746 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shl_16 Unexecuted instantiation: cdef_block_ssse3.c:v256_shl_16 Unexecuted instantiation: cdef_block_sse4.c:v256_shl_16 |
747 | | |
748 | 0 | SIMD_INLINE v256 v256_shr_u16(v256 a, const unsigned int c) { |
749 | 0 | return v256_from_v128(v128_shr_u16(a.val[1], c), v128_shr_u16(a.val[0], c)); |
750 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shr_u16 Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_u16 Unexecuted instantiation: cdef_block_sse4.c:v256_shr_u16 |
751 | | |
752 | 0 | SIMD_INLINE v256 v256_shr_s16(v256 a, const unsigned int c) { |
753 | 0 | return v256_from_v128(v128_shr_s16(a.val[1], c), v128_shr_s16(a.val[0], c)); |
754 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shr_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_shr_s16 |
755 | | |
756 | 0 | SIMD_INLINE v256 v256_shl_32(v256 a, const unsigned int c) { |
757 | 0 | return v256_from_v128(v128_shl_32(a.val[1], c), v128_shl_32(a.val[0], c)); |
758 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shl_32 Unexecuted instantiation: cdef_block_ssse3.c:v256_shl_32 Unexecuted instantiation: cdef_block_sse4.c:v256_shl_32 |
759 | | |
760 | 0 | SIMD_INLINE v256 v256_shr_u32(v256 a, const unsigned int c) { |
761 | 0 | return v256_from_v128(v128_shr_u32(a.val[1], c), v128_shr_u32(a.val[0], c)); |
762 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shr_u32 Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_u32 Unexecuted instantiation: cdef_block_sse4.c:v256_shr_u32 |
763 | | |
764 | 0 | SIMD_INLINE v256 v256_shr_s32(v256 a, const unsigned int c) { |
765 | 0 | return v256_from_v128(v128_shr_s32(a.val[1], c), v128_shr_s32(a.val[0], c)); |
766 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shr_s32 Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_s32 Unexecuted instantiation: cdef_block_sse4.c:v256_shr_s32 |
767 | | |
768 | 0 | SIMD_INLINE v256 v256_shl_64(v256 a, const unsigned int c) { |
769 | 0 | return v256_from_v128(v128_shl_64(a.val[1], c), v128_shl_64(a.val[0], c)); |
770 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shl_64 Unexecuted instantiation: cdef_block_ssse3.c:v256_shl_64 Unexecuted instantiation: cdef_block_sse4.c:v256_shl_64 |
771 | | |
772 | 0 | SIMD_INLINE v256 v256_shr_u64(v256 a, const unsigned int c) { |
773 | 0 | return v256_from_v128(v128_shr_u64(a.val[1], c), v128_shr_u64(a.val[0], c)); |
774 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shr_u64 Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_u64 Unexecuted instantiation: cdef_block_sse4.c:v256_shr_u64 |
775 | | |
776 | 0 | SIMD_INLINE v256 v256_shr_s64(v256 a, const unsigned int c) { |
777 | 0 | return v256_from_v128(v128_shr_s64(a.val[1], c), v128_shr_s64(a.val[0], c)); |
778 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_shr_s64 Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_s64 Unexecuted instantiation: cdef_block_sse4.c:v256_shr_s64 |
779 | | |
780 | | /* These intrinsics require immediate values, so we must use #defines |
781 | | to enforce that. */ |
782 | | #define v256_shl_n_byte(a, n) \ |
783 | | ((n) < 16 ? v256_from_v128(v128_or(v128_shl_n_byte(a.val[1], n), \ |
784 | | v128_shr_n_byte(a.val[0], 16 - (n))), \ |
785 | | v128_shl_n_byte(a.val[0], (n))) \ |
786 | | : v256_from_v128( \ |
787 | | (n) > 16 ? v128_shl_n_byte(a.val[0], (n)-16) : a.val[0], \ |
788 | | v128_zero())) |
789 | | |
790 | | #define v256_shr_n_byte(a, n) \ |
791 | | (n == 0 \ |
792 | | ? a \ |
793 | | : ((n) < 16 \ |
794 | | ? v256_from_v128(v128_shr_n_byte(a.val[1], n), \ |
795 | | v128_or(v128_shr_n_byte(a.val[0], n), \ |
796 | | v128_shl_n_byte(a.val[1], 16 - (n)))) \ |
797 | | : v256_from_v128( \ |
798 | | v128_zero(), \ |
799 | | (n) > 16 ? v128_shr_n_byte(a.val[1], (n)-16) : a.val[1]))) |
800 | | |
801 | | #define v256_align(a, b, c) \ |
802 | | ((c) ? v256_or(v256_shr_n_byte(b, c), v256_shl_n_byte(a, 32 - (c))) : b) |
803 | | |
804 | | #define v256_shl_n_8(a, n) \ |
805 | | v256_from_v128(v128_shl_n_8(a.val[1], n), v128_shl_n_8(a.val[0], n)) |
806 | | #define v256_shl_n_16(a, n) \ |
807 | | v256_from_v128(v128_shl_n_16(a.val[1], n), v128_shl_n_16(a.val[0], n)) |
808 | | #define v256_shl_n_32(a, n) \ |
809 | | v256_from_v128(v128_shl_n_32(a.val[1], n), v128_shl_n_32(a.val[0], n)) |
810 | | #define v256_shl_n_64(a, n) \ |
811 | | v256_from_v128(v128_shl_n_64(a.val[1], n), v128_shl_n_64(a.val[0], n)) |
812 | | #define v256_shr_n_u8(a, n) \ |
813 | | v256_from_v128(v128_shr_n_u8(a.val[1], n), v128_shr_n_u8(a.val[0], n)) |
814 | | #define v256_shr_n_u16(a, n) \ |
815 | | v256_from_v128(v128_shr_n_u16(a.val[1], n), v128_shr_n_u16(a.val[0], n)) |
816 | | #define v256_shr_n_u32(a, n) \ |
817 | | v256_from_v128(v128_shr_n_u32(a.val[1], n), v128_shr_n_u32(a.val[0], n)) |
818 | | #define v256_shr_n_u64(a, n) \ |
819 | | v256_from_v128(v128_shr_n_u64(a.val[1], n), v128_shr_n_u64(a.val[0], n)) |
820 | | #define v256_shr_n_s8(a, n) \ |
821 | | v256_from_v128(v128_shr_n_s8(a.val[1], n), v128_shr_n_s8(a.val[0], n)) |
822 | | #define v256_shr_n_s16(a, n) \ |
823 | 0 | v256_from_v128(v128_shr_n_s16(a.val[1], n), v128_shr_n_s16(a.val[0], n)) |
824 | | #define v256_shr_n_s32(a, n) \ |
825 | | v256_from_v128(v128_shr_n_s32(a.val[1], n), v128_shr_n_s32(a.val[0], n)) |
826 | | #define v256_shr_n_s64(a, n) \ |
827 | | v256_from_v128(v128_shr_n_s64(a.val[1], n), v128_shr_n_s64(a.val[0], n)) |
828 | | |
829 | | #define v256_shr_n_word(a, n) v256_shr_n_byte(a, 2 * (n)) |
830 | | #define v256_shl_n_word(a, n) v256_shl_n_byte(a, 2 * (n)) |
831 | | |
832 | | typedef struct { |
833 | | sad128_internal_u16 val[2]; |
834 | | } sad256_internal_u16; |
835 | | |
836 | 0 | SIMD_INLINE sad256_internal_u16 v256_sad_u16_init(void) { |
837 | 0 | sad256_internal_u16 t; |
838 | 0 | t.val[1] = v128_sad_u16_init(); |
839 | 0 | t.val[0] = v128_sad_u16_init(); |
840 | 0 | return t; |
841 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u16_init Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u16_init Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u16_init |
842 | | |
843 | | /* Implementation dependent return value. Result must be finalised with |
844 | | v256_sad_u16_sum(). |
845 | | The result for more than 16 v256_sad_u16() calls is undefined. */ |
846 | | SIMD_INLINE sad256_internal_u16 v256_sad_u16(sad256_internal_u16 s, v256 a, |
847 | 0 | v256 b) { |
848 | 0 | sad256_internal_u16 t; |
849 | 0 | t.val[1] = v128_sad_u16(s.val[1], a.val[1], b.val[1]); |
850 | 0 | t.val[0] = v128_sad_u16(s.val[0], a.val[0], b.val[0]); |
851 | 0 | return t; |
852 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u16 Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u16 Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u16 |
853 | | |
854 | 0 | SIMD_INLINE uint32_t v256_sad_u16_sum(sad256_internal_u16 s) { |
855 | 0 | return v128_sad_u16_sum(s.val[1]) + v128_sad_u16_sum(s.val[0]); |
856 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u16_sum Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u16_sum Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u16_sum |
857 | | |
858 | | typedef struct { |
859 | | ssd128_internal_s16 val[2]; |
860 | | } ssd256_internal_s16; |
861 | | |
862 | 0 | SIMD_INLINE ssd256_internal_s16 v256_ssd_s16_init(void) { |
863 | 0 | ssd256_internal_s16 t; |
864 | 0 | t.val[1] = v128_ssd_s16_init(); |
865 | 0 | t.val[0] = v128_ssd_s16_init(); |
866 | 0 | return t; |
867 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_s16_init Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_s16_init Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_s16_init |
868 | | |
869 | | /* Implementation dependent return value. Result must be finalised with |
870 | | * v256_ssd_s16_sum(). */ |
871 | | SIMD_INLINE ssd256_internal_s16 v256_ssd_s16(ssd256_internal_s16 s, v256 a, |
872 | 0 | v256 b) { |
873 | 0 | ssd256_internal_s16 t; |
874 | 0 | t.val[1] = v128_ssd_s16(s.val[1], a.val[1], b.val[1]); |
875 | 0 | t.val[0] = v128_ssd_s16(s.val[0], a.val[0], b.val[0]); |
876 | 0 | return t; |
877 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_s16 Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_s16 Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_s16 |
878 | | |
879 | 0 | SIMD_INLINE uint64_t v256_ssd_s16_sum(ssd256_internal_s16 s) { |
880 | 0 | return v128_ssd_s16_sum(s.val[1]) + v128_ssd_s16_sum(s.val[0]); |
881 | 0 | } Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_s16_sum Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_s16_sum Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_s16_sum |
882 | | |
883 | | #endif // AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_ |