/src/aom/aom_dsp/simd/v64_intrinsics_x86.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.  | 
3  |  |  *  | 
4  |  |  * This source code is subject to the terms of the BSD 2 Clause License and  | 
5  |  |  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License  | 
6  |  |  * was not distributed with this source code in the LICENSE file, you can  | 
7  |  |  * obtain it at www.aomedia.org/license/software. If the Alliance for Open  | 
8  |  |  * Media Patent License 1.0 was not distributed with this source code in the  | 
9  |  |  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.  | 
10  |  |  */  | 
11  |  |  | 
12  |  | #ifndef AOM_AOM_DSP_SIMD_V64_INTRINSICS_X86_H_  | 
13  |  | #define AOM_AOM_DSP_SIMD_V64_INTRINSICS_X86_H_  | 
14  |  |  | 
15  |  | #include <emmintrin.h>  | 
16  |  | #if defined(__SSSE3__)  | 
17  |  | #include <tmmintrin.h>  | 
18  |  | #endif  | 
19  |  | #if defined(__SSE4_1__)  | 
20  |  | #include <smmintrin.h>  | 
21  |  | #endif  | 
22  |  |  | 
23  |  | typedef __m128i v64;  | 
24  |  |  | 
25  | 44.4M  | SIMD_INLINE uint32_t v64_low_u32(v64 a) { | 
26  | 44.4M  |   return (uint32_t)_mm_cvtsi128_si32(a);  | 
27  | 44.4M  | } Unexecuted instantiation: cdef_block_sse4.c:v64_low_u32 cdef_block_avx2.c:v64_low_u32 Line  | Count  | Source  |  25  | 44.4M  | SIMD_INLINE uint32_t v64_low_u32(v64 a) { |  26  | 44.4M  |   return (uint32_t)_mm_cvtsi128_si32(a);  |  27  | 44.4M  | }  |  
  | 
28  |  |  | 
29  | 42.7M  | SIMD_INLINE uint32_t v64_high_u32(v64 a) { | 
30  | 42.7M  |   return (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4));  | 
31  | 42.7M  | } Unexecuted instantiation: cdef_block_sse4.c:v64_high_u32 cdef_block_avx2.c:v64_high_u32 Line  | Count  | Source  |  29  | 42.7M  | SIMD_INLINE uint32_t v64_high_u32(v64 a) { |  30  | 42.7M  |   return (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4));  |  31  | 42.7M  | }  |  
  | 
32  |  |  | 
33  | 0  | SIMD_INLINE int32_t v64_low_s32(v64 a) { return (int32_t)_mm_cvtsi128_si32(a); }Unexecuted instantiation: cdef_block_sse4.c:v64_low_s32 Unexecuted instantiation: cdef_block_avx2.c:v64_low_s32  | 
34  |  |  | 
35  | 0  | SIMD_INLINE int32_t v64_high_s32(v64 a) { | 
36  | 0  |   return (int32_t)_mm_cvtsi128_si32(_mm_srli_si128(a, 4));  | 
37  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_high_s32 Unexecuted instantiation: cdef_block_avx2.c:v64_high_s32  | 
38  |  |  | 
39  | 0  | SIMD_INLINE v64 v64_from_16(uint16_t a, uint16_t b, uint16_t c, uint16_t d) { | 
40  | 0  |   return _mm_packs_epi32(  | 
41  | 0  |       _mm_set_epi32((int16_t)a, (int16_t)b, (int16_t)c, (int16_t)d),  | 
42  | 0  |       _mm_setzero_si128());  | 
43  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_from_16 Unexecuted instantiation: cdef_block_avx2.c:v64_from_16  | 
44  |  |  | 
45  | 0  | SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) { | 
46  | 0  |   return _mm_set_epi32(0, 0, (int32_t)x, (int32_t)y);  | 
47  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_from_32 Unexecuted instantiation: cdef_block_avx2.c:v64_from_32  | 
48  |  |  | 
49  | 0  | SIMD_INLINE v64 v64_from_64(uint64_t x) { | 
50  | 0  | #ifdef __x86_64__  | 
51  | 0  |   return _mm_cvtsi64_si128((int64_t)x);  | 
52  | 0  | #else  | 
53  | 0  |   return _mm_set_epi32(0, 0, (int32_t)(x >> 32), (int32_t)x);  | 
54  | 0  | #endif  | 
55  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_from_64 Unexecuted instantiation: cdef_block_avx2.c:v64_from_64  | 
56  |  |  | 
57  | 0  | SIMD_INLINE uint64_t v64_u64(v64 x) { | 
58  | 0  |   return (uint64_t)v64_low_u32(x) | ((uint64_t)v64_high_u32(x) << 32);  | 
59  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_u64 Unexecuted instantiation: cdef_block_avx2.c:v64_u64  | 
60  |  |  | 
61  | 0  | SIMD_INLINE uint32_t u32_load_aligned(const void *p) { | 
62  | 0  |   return *((uint32_t *)p);  | 
63  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:u32_load_aligned Unexecuted instantiation: cdef_block_avx2.c:u32_load_aligned  | 
64  |  |  | 
65  | 0  | SIMD_INLINE uint32_t u32_load_unaligned(const void *p) { | 
66  | 0  |   return *((uint32_t *)p);  | 
67  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:u32_load_unaligned Unexecuted instantiation: cdef_block_avx2.c:u32_load_unaligned  | 
68  |  |  | 
69  | 83.6M  | SIMD_INLINE void u32_store_aligned(void *p, uint32_t a) { | 
70  | 83.6M  |   *((uint32_t *)p) = a;  | 
71  | 83.6M  | } Unexecuted instantiation: cdef_block_sse4.c:u32_store_aligned cdef_block_avx2.c:u32_store_aligned Line  | Count  | Source  |  69  | 83.6M  | SIMD_INLINE void u32_store_aligned(void *p, uint32_t a) { |  70  | 83.6M  |   *((uint32_t *)p) = a;  |  71  | 83.6M  | }  |  
  | 
72  |  |  | 
73  | 0  | SIMD_INLINE void u32_store_unaligned(void *p, uint32_t a) { | 
74  | 0  |   *((uint32_t *)p) = a;  | 
75  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:u32_store_unaligned Unexecuted instantiation: cdef_block_avx2.c:u32_store_unaligned  | 
76  |  |  | 
77  | 187M  | SIMD_INLINE v64 v64_load_aligned(const void *p) { | 
78  | 187M  |   return _mm_loadl_epi64((__m128i *)p);  | 
79  | 187M  | } Unexecuted instantiation: cdef_block_sse4.c:v64_load_aligned cdef_block_avx2.c:v64_load_aligned Line  | Count  | Source  |  77  | 187M  | SIMD_INLINE v64 v64_load_aligned(const void *p) { |  78  | 187M  |   return _mm_loadl_epi64((__m128i *)p);  |  79  | 187M  | }  |  
  | 
80  |  |  | 
81  | 689M  | SIMD_INLINE v64 v64_load_unaligned(const void *p) { | 
82  | 689M  |   return _mm_loadl_epi64((__m128i *)p);  | 
83  | 689M  | } Unexecuted instantiation: cdef_block_sse4.c:v64_load_unaligned cdef_block_avx2.c:v64_load_unaligned Line  | Count  | Source  |  81  | 689M  | SIMD_INLINE v64 v64_load_unaligned(const void *p) { |  82  | 689M  |   return _mm_loadl_epi64((__m128i *)p);  |  83  | 689M  | }  |  
  | 
84  |  |  | 
85  | 147M  | SIMD_INLINE void v64_store_aligned(void *p, v64 a) { | 
86  | 147M  |   _mm_storel_epi64((__m128i *)p, a);  | 
87  | 147M  | } Unexecuted instantiation: cdef_block_sse4.c:v64_store_aligned cdef_block_avx2.c:v64_store_aligned Line  | Count  | Source  |  85  | 147M  | SIMD_INLINE void v64_store_aligned(void *p, v64 a) { |  86  | 147M  |   _mm_storel_epi64((__m128i *)p, a);  |  87  | 147M  | }  |  
  | 
88  |  |  | 
89  | 0  | SIMD_INLINE void v64_store_unaligned(void *p, v64 a) { | 
90  | 0  |   _mm_storel_epi64((__m128i *)p, a);  | 
91  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_store_unaligned Unexecuted instantiation: cdef_block_avx2.c:v64_store_unaligned  | 
92  |  |  | 
93  |  | #if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)  | 
94  |  | #define v64_align(a, b, c) \  | 
95  |  |   ((c) ? _mm_srli_si128(_mm_unpacklo_epi64(b, a), (c)) : b)  | 
96  |  | #else  | 
97  |  | #define v64_align(a, b, c)                                                  \  | 
98  |  |   ((c) ? v64_from_64((v64_u64(b) >> (c)*8) | (v64_u64(a) << (8 - (c)) * 8)) \  | 
99  |  |        : (b))  | 
100  |  | #endif  | 
101  |  |  | 
102  | 109M  | SIMD_INLINE v64 v64_zero(void) { return _mm_setzero_si128(); }Unexecuted instantiation: cdef_block_sse4.c:v64_zero cdef_block_avx2.c:v64_zero Line  | Count  | Source  |  102  | 109M  | SIMD_INLINE v64 v64_zero(void) { return _mm_setzero_si128(); } |  
  | 
103  |  |  | 
104  | 0  | SIMD_INLINE v64 v64_dup_8(uint8_t x) { return _mm_set1_epi8((char)x); }Unexecuted instantiation: cdef_block_sse4.c:v64_dup_8 Unexecuted instantiation: cdef_block_avx2.c:v64_dup_8  | 
105  |  |  | 
106  | 0  | SIMD_INLINE v64 v64_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }Unexecuted instantiation: cdef_block_sse4.c:v64_dup_16 Unexecuted instantiation: cdef_block_avx2.c:v64_dup_16  | 
107  |  |  | 
108  | 0  | SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }Unexecuted instantiation: cdef_block_sse4.c:v64_dup_32 Unexecuted instantiation: cdef_block_avx2.c:v64_dup_32  | 
109  |  |  | 
110  | 0  | SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return _mm_add_epi8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_add_8 Unexecuted instantiation: cdef_block_avx2.c:v64_add_8  | 
111  |  |  | 
112  | 0  | SIMD_INLINE v64 v64_add_16(v64 a, v64 b) { return _mm_add_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_add_16 Unexecuted instantiation: cdef_block_avx2.c:v64_add_16  | 
113  |  |  | 
114  | 0  | SIMD_INLINE v64 v64_sadd_u8(v64 a, v64 b) { return _mm_adds_epu8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_sadd_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_sadd_u8  | 
115  |  |  | 
116  | 0  | SIMD_INLINE v64 v64_sadd_s8(v64 a, v64 b) { return _mm_adds_epi8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_sadd_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_sadd_s8  | 
117  |  |  | 
118  | 0  | SIMD_INLINE v64 v64_sadd_s16(v64 a, v64 b) { return _mm_adds_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_sadd_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_sadd_s16  | 
119  |  |  | 
120  | 0  | SIMD_INLINE v64 v64_add_32(v64 a, v64 b) { return _mm_add_epi32(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_add_32 Unexecuted instantiation: cdef_block_avx2.c:v64_add_32  | 
121  |  |  | 
122  | 0  | SIMD_INLINE v64 v64_sub_8(v64 a, v64 b) { return _mm_sub_epi8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_sub_8 Unexecuted instantiation: cdef_block_avx2.c:v64_sub_8  | 
123  |  |  | 
124  | 0  | SIMD_INLINE v64 v64_ssub_u8(v64 a, v64 b) { return _mm_subs_epu8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_ssub_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_ssub_u8  | 
125  |  |  | 
126  | 0  | SIMD_INLINE v64 v64_ssub_s8(v64 a, v64 b) { return _mm_subs_epi8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_ssub_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_ssub_s8  | 
127  |  |  | 
128  | 0  | SIMD_INLINE v64 v64_sub_16(v64 a, v64 b) { return _mm_sub_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_sub_16 Unexecuted instantiation: cdef_block_avx2.c:v64_sub_16  | 
129  |  |  | 
130  | 0  | SIMD_INLINE v64 v64_ssub_s16(v64 a, v64 b) { return _mm_subs_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_ssub_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_ssub_s16  | 
131  |  |  | 
132  | 0  | SIMD_INLINE v64 v64_ssub_u16(v64 a, v64 b) { return _mm_subs_epu16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_ssub_u16 Unexecuted instantiation: cdef_block_avx2.c:v64_ssub_u16  | 
133  |  |  | 
134  | 0  | SIMD_INLINE v64 v64_sub_32(v64 a, v64 b) { return _mm_sub_epi32(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_sub_32 Unexecuted instantiation: cdef_block_avx2.c:v64_sub_32  | 
135  |  |  | 
136  | 0  | SIMD_INLINE v64 v64_abs_s16(v64 a) { | 
137  | 0  | #if defined(__SSSE3__)  | 
138  | 0  |   return _mm_abs_epi16(a);  | 
139  | 0  | #else  | 
140  | 0  |   return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a));  | 
141  | 0  | #endif  | 
142  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_abs_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_abs_s16  | 
143  |  |  | 
144  | 0  | SIMD_INLINE v64 v64_abs_s8(v64 a) { | 
145  | 0  | #if defined(__SSSE3__)  | 
146  | 0  |   return _mm_abs_epi8(a);  | 
147  | 0  | #else  | 
148  | 0  |   v64 sign = _mm_cmplt_epi8(a, _mm_setzero_si128());  | 
149  | 0  |   return _mm_xor_si128(sign, _mm_add_epi8(a, sign));  | 
150  | 0  | #endif  | 
151  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_abs_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_abs_s8  | 
152  |  |  | 
153  | 0  | SIMD_INLINE v64 v64_ziplo_8(v64 a, v64 b) { return _mm_unpacklo_epi8(b, a); }Unexecuted instantiation: cdef_block_sse4.c:v64_ziplo_8 Unexecuted instantiation: cdef_block_avx2.c:v64_ziplo_8  | 
154  |  |  | 
155  | 0  | SIMD_INLINE v64 v64_ziphi_8(v64 a, v64 b) { | 
156  | 0  |   return _mm_srli_si128(_mm_unpacklo_epi8(b, a), 8);  | 
157  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_ziphi_8 Unexecuted instantiation: cdef_block_avx2.c:v64_ziphi_8  | 
158  |  |  | 
159  | 0  | SIMD_INLINE v64 v64_ziplo_16(v64 a, v64 b) { return _mm_unpacklo_epi16(b, a); }Unexecuted instantiation: cdef_block_sse4.c:v64_ziplo_16 Unexecuted instantiation: cdef_block_avx2.c:v64_ziplo_16  | 
160  |  |  | 
161  | 0  | SIMD_INLINE v64 v64_ziphi_16(v64 a, v64 b) { | 
162  | 0  |   return _mm_srli_si128(_mm_unpacklo_epi16(b, a), 8);  | 
163  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_ziphi_16 Unexecuted instantiation: cdef_block_avx2.c:v64_ziphi_16  | 
164  |  |  | 
165  | 0  | SIMD_INLINE v64 v64_ziplo_32(v64 a, v64 b) { return _mm_unpacklo_epi32(b, a); }Unexecuted instantiation: cdef_block_sse4.c:v64_ziplo_32 Unexecuted instantiation: cdef_block_avx2.c:v64_ziplo_32  | 
166  |  |  | 
167  | 0  | SIMD_INLINE v64 v64_ziphi_32(v64 a, v64 b) { | 
168  | 0  |   return _mm_srli_si128(_mm_unpacklo_epi32(b, a), 8);  | 
169  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_ziphi_32 Unexecuted instantiation: cdef_block_avx2.c:v64_ziphi_32  | 
170  |  |  | 
171  | 0  | SIMD_INLINE v64 v64_pack_s32_s16(v64 a, v64 b) { | 
172  | 0  |   __m128i t = _mm_unpacklo_epi64(b, a);  | 
173  | 0  |   return _mm_packs_epi32(t, t);  | 
174  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_pack_s32_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_pack_s32_s16  | 
175  |  |  | 
176  | 0  | SIMD_INLINE v64 v64_pack_s32_u16(v64 a, v64 b) { | 
177  | 0  | #if defined(__SSE4_1__)  | 
178  | 0  |   __m128i t = _mm_unpacklo_epi64(b, a);  | 
179  | 0  |   return _mm_packus_epi32(t, t);  | 
180  | 0  | #else  | 
181  | 0  |   const int32_t ah = SIMD_CLAMP(v64_high_s32(a), 0, 65535);  | 
182  | 0  |   const int32_t al = SIMD_CLAMP(v64_low_s32(a), 0, 65535);  | 
183  | 0  |   const int32_t bh = SIMD_CLAMP(v64_high_s32(b), 0, 65535);  | 
184  | 0  |   const int32_t bl = SIMD_CLAMP(v64_low_s32(b), 0, 65535);  | 
185  | 0  |   return v64_from_16(ah, al, bh, bl);  | 
186  | 0  | #endif  | 
187  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_pack_s32_u16 Unexecuted instantiation: cdef_block_avx2.c:v64_pack_s32_u16  | 
188  |  |  | 
189  | 0  | SIMD_INLINE v64 v64_pack_s16_u8(v64 a, v64 b) { | 
190  | 0  |   __m128i t = _mm_unpacklo_epi64(b, a);  | 
191  | 0  |   return _mm_packus_epi16(t, t);  | 
192  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_pack_s16_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_pack_s16_u8  | 
193  |  |  | 
194  | 0  | SIMD_INLINE v64 v64_pack_s16_s8(v64 a, v64 b) { | 
195  | 0  |   __m128i t = _mm_unpacklo_epi64(b, a);  | 
196  | 0  |   return _mm_packs_epi16(t, t);  | 
197  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_pack_s16_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_pack_s16_s8  | 
198  |  |  | 
199  | 0  | SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) { | 
200  | 0  | #if defined(__SSSE3__)  | 
201  | 0  |   return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),  | 
202  | 0  |                           v64_from_64(0x0f0d0b0907050301LL));  | 
203  | 0  | #else  | 
204  | 0  |   return _mm_packus_epi16(  | 
205  | 0  |       _mm_unpacklo_epi64(_mm_srli_epi16(b, 8), _mm_srli_epi16(a, 8)),  | 
206  | 0  |       _mm_setzero_si128());  | 
207  | 0  | #endif  | 
208  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unziphi_8 Unexecuted instantiation: cdef_block_avx2.c:v64_unziphi_8  | 
209  |  |  | 
210  | 0  | SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) { | 
211  | 0  | #if defined(__SSSE3__)  | 
212  | 0  |   return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),  | 
213  | 0  |                           v64_from_64(0x0e0c0a0806040200LL));  | 
214  | 0  | #else  | 
215  | 0  |   return v64_unziphi_8(_mm_slli_si128(a, 1), _mm_slli_si128(b, 1));  | 
216  | 0  | #endif  | 
217  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unziplo_8 Unexecuted instantiation: cdef_block_avx2.c:v64_unziplo_8  | 
218  |  |  | 
219  | 0  | SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) { | 
220  | 0  | #if defined(__SSSE3__)  | 
221  | 0  |   return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),  | 
222  | 0  |                           v64_from_64(0x0f0e0b0a07060302LL));  | 
223  | 0  | #else  | 
224  | 0  |   return _mm_packs_epi32(  | 
225  | 0  |       _mm_unpacklo_epi64(_mm_srai_epi32(b, 16), _mm_srai_epi32(a, 16)),  | 
226  | 0  |       _mm_setzero_si128());  | 
227  | 0  | #endif  | 
228  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unziphi_16 Unexecuted instantiation: cdef_block_avx2.c:v64_unziphi_16  | 
229  |  |  | 
230  | 0  | SIMD_INLINE v64 v64_unziplo_16(v64 a, v64 b) { | 
231  | 0  | #if defined(__SSSE3__)  | 
232  | 0  |   return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),  | 
233  | 0  |                           v64_from_64(0x0d0c090805040100LL));  | 
234  | 0  | #else  | 
235  | 0  |   return v64_unziphi_16(_mm_slli_si128(a, 2), _mm_slli_si128(b, 2));  | 
236  | 0  | #endif  | 
237  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unziplo_16 Unexecuted instantiation: cdef_block_avx2.c:v64_unziplo_16  | 
238  |  |  | 
239  | 0  | SIMD_INLINE v64 v64_unpacklo_u8_s16(v64 a) { | 
240  | 0  |   return _mm_unpacklo_epi8(a, _mm_setzero_si128());  | 
241  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unpacklo_u8_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_unpacklo_u8_s16  | 
242  |  |  | 
243  | 0  | SIMD_INLINE v64 v64_unpackhi_u8_s16(v64 a) { | 
244  | 0  |   return _mm_srli_si128(_mm_unpacklo_epi8(a, _mm_setzero_si128()), 8);  | 
245  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unpackhi_u8_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_unpackhi_u8_s16  | 
246  |  |  | 
247  | 0  | SIMD_INLINE v64 v64_unpacklo_s8_s16(v64 a) { | 
248  | 0  |   return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8);  | 
249  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unpacklo_s8_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_unpacklo_s8_s16  | 
250  |  |  | 
251  | 0  | SIMD_INLINE v64 v64_unpackhi_s8_s16(v64 a) { | 
252  | 0  |   return _mm_srli_si128(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8), 8);  | 
253  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unpackhi_s8_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_unpackhi_s8_s16  | 
254  |  |  | 
255  | 0  | SIMD_INLINE v64 v64_unpacklo_u16_s32(v64 a) { | 
256  | 0  |   return _mm_unpacklo_epi16(a, _mm_setzero_si128());  | 
257  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unpacklo_u16_s32 Unexecuted instantiation: cdef_block_avx2.c:v64_unpacklo_u16_s32  | 
258  |  |  | 
259  | 0  | SIMD_INLINE v64 v64_unpacklo_s16_s32(v64 a) { | 
260  | 0  |   return _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), a), 16);  | 
261  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unpacklo_s16_s32 Unexecuted instantiation: cdef_block_avx2.c:v64_unpacklo_s16_s32  | 
262  |  |  | 
263  | 0  | SIMD_INLINE v64 v64_unpackhi_u16_s32(v64 a) { | 
264  | 0  |   return _mm_srli_si128(_mm_unpacklo_epi16(a, _mm_setzero_si128()), 8);  | 
265  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unpackhi_u16_s32 Unexecuted instantiation: cdef_block_avx2.c:v64_unpackhi_u16_s32  | 
266  |  |  | 
267  | 0  | SIMD_INLINE v64 v64_unpackhi_s16_s32(v64 a) { | 
268  | 0  |   return _mm_srli_si128(  | 
269  | 0  |       _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), a), 16), 8);  | 
270  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_unpackhi_s16_s32 Unexecuted instantiation: cdef_block_avx2.c:v64_unpackhi_s16_s32  | 
271  |  |  | 
272  | 0  | SIMD_INLINE v64 v64_shuffle_8(v64 x, v64 pattern) { | 
273  | 0  | #if defined(__SSSE3__)  | 
274  | 0  |   return _mm_shuffle_epi8(x, pattern);  | 
275  | 0  | #else  | 
276  | 0  |   v64 output;  | 
277  | 0  |   unsigned char *input = (unsigned char *)&x;  | 
278  | 0  |   unsigned char *index = (unsigned char *)&pattern;  | 
279  | 0  |   unsigned char *selected = (unsigned char *)&output;  | 
280  | 0  |   int counter;  | 
281  | 0  | 
  | 
282  | 0  |   for (counter = 0; counter < 8; counter++) { | 
283  | 0  |     selected[counter] = input[index[counter]];  | 
284  | 0  |   }  | 
285  | 0  | 
  | 
286  | 0  |   return output;  | 
287  | 0  | #endif  | 
288  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shuffle_8 Unexecuted instantiation: cdef_block_avx2.c:v64_shuffle_8  | 
289  |  |  | 
290  | 0  | SIMD_INLINE int64_t v64_dotp_su8(v64 a, v64 b) { | 
291  | 0  |   __m128i t = _mm_madd_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8),  | 
292  | 0  |                              _mm_unpacklo_epi8(b, _mm_setzero_si128()));  | 
293  | 0  |   t = _mm_add_epi32(t, _mm_srli_si128(t, 8));  | 
294  | 0  |   t = _mm_add_epi32(t, _mm_srli_si128(t, 4));  | 
295  | 0  |   return (int32_t)v64_low_u32(t);  | 
296  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_dotp_su8 Unexecuted instantiation: cdef_block_avx2.c:v64_dotp_su8  | 
297  |  |  | 
298  | 0  | SIMD_INLINE int64_t v64_dotp_s16(v64 a, v64 b) { | 
299  | 0  |   __m128i r = _mm_madd_epi16(a, b);  | 
300  | 0  | #if defined(__SSE4_1__) && defined(__x86_64__)  | 
301  | 0  |   __m128i x = _mm_cvtepi32_epi64(r);  | 
302  | 0  |   return _mm_cvtsi128_si64(_mm_add_epi64(x, _mm_srli_si128(x, 8)));  | 
303  | 0  | #else  | 
304  | 0  |   return (int64_t)_mm_cvtsi128_si32(_mm_srli_si128(r, 4)) +  | 
305  | 0  |          (int64_t)_mm_cvtsi128_si32(r);  | 
306  | 0  | #endif  | 
307  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_dotp_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_dotp_s16  | 
308  |  |  | 
309  | 0  | SIMD_INLINE uint64_t v64_hadd_u8(v64 a) { | 
310  | 0  |   return v64_low_u32(_mm_sad_epu8(a, _mm_setzero_si128()));  | 
311  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_hadd_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_hadd_u8  | 
312  |  |  | 
313  | 0  | SIMD_INLINE int64_t v64_hadd_s16(v64 a) { | 
314  | 0  |   return v64_dotp_s16(a, v64_dup_16(1));  | 
315  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_hadd_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_hadd_s16  | 
316  |  |  | 
317  |  | typedef v64 sad64_internal;  | 
318  |  |  | 
319  | 0  | SIMD_INLINE sad64_internal v64_sad_u8_init(void) { return _mm_setzero_si128(); }Unexecuted instantiation: cdef_block_sse4.c:v64_sad_u8_init Unexecuted instantiation: cdef_block_avx2.c:v64_sad_u8_init  | 
320  |  |  | 
321  |  | /* Implementation dependent return value.  Result must be finalised with  | 
322  |  |    v64_sad_u8_sum().  | 
323  |  |    The result for more than 32 v64_sad_u8() calls is undefined. */  | 
324  | 0  | SIMD_INLINE sad64_internal v64_sad_u8(sad64_internal s, v64 a, v64 b) { | 
325  | 0  |   return _mm_add_epi64(s, _mm_sad_epu8(a, b));  | 
326  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_sad_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_sad_u8  | 
327  |  |  | 
328  | 0  | SIMD_INLINE uint32_t v64_sad_u8_sum(sad64_internal s) { return v64_low_u32(s); }Unexecuted instantiation: cdef_block_sse4.c:v64_sad_u8_sum Unexecuted instantiation: cdef_block_avx2.c:v64_sad_u8_sum  | 
329  |  |  | 
330  |  | typedef v64 ssd64_internal;  | 
331  |  |  | 
332  | 0  | SIMD_INLINE ssd64_internal v64_ssd_u8_init(void) { return _mm_setzero_si128(); }Unexecuted instantiation: cdef_block_sse4.c:v64_ssd_u8_init Unexecuted instantiation: cdef_block_avx2.c:v64_ssd_u8_init  | 
333  |  |  | 
334  |  | /* Implementation dependent return value.  Result must be finalised with  | 
335  |  |  * v64_ssd_u8_sum(). */  | 
336  | 0  | SIMD_INLINE ssd64_internal v64_ssd_u8(ssd64_internal s, v64 a, v64 b) { | 
337  | 0  |   v64 l = v64_sub_16(v64_ziplo_8(v64_zero(), a), v64_ziplo_8(v64_zero(), b));  | 
338  | 0  |   v64 h = v64_sub_16(v64_ziphi_8(v64_zero(), a), v64_ziphi_8(v64_zero(), b));  | 
339  | 0  |   v64 r = v64_add_32(_mm_madd_epi16(l, l), _mm_madd_epi16(h, h));  | 
340  | 0  |   return _mm_add_epi64(  | 
341  | 0  |       s, v64_ziplo_32(v64_zero(), _mm_add_epi32(r, _mm_srli_si128(r, 4))));  | 
342  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_ssd_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_ssd_u8  | 
343  |  |  | 
344  | 0  | SIMD_INLINE uint32_t v64_ssd_u8_sum(sad64_internal s) { return v64_low_u32(s); }Unexecuted instantiation: cdef_block_sse4.c:v64_ssd_u8_sum Unexecuted instantiation: cdef_block_avx2.c:v64_ssd_u8_sum  | 
345  |  |  | 
346  | 0  | SIMD_INLINE v64 v64_or(v64 a, v64 b) { return _mm_or_si128(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_or Unexecuted instantiation: cdef_block_avx2.c:v64_or  | 
347  |  |  | 
348  | 0  | SIMD_INLINE v64 v64_xor(v64 a, v64 b) { return _mm_xor_si128(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_xor Unexecuted instantiation: cdef_block_avx2.c:v64_xor  | 
349  |  |  | 
350  | 0  | SIMD_INLINE v64 v64_and(v64 a, v64 b) { return _mm_and_si128(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_and Unexecuted instantiation: cdef_block_avx2.c:v64_and  | 
351  |  |  | 
352  | 0  | SIMD_INLINE v64 v64_andn(v64 a, v64 b) { return _mm_andnot_si128(b, a); }Unexecuted instantiation: cdef_block_sse4.c:v64_andn Unexecuted instantiation: cdef_block_avx2.c:v64_andn  | 
353  |  |  | 
354  | 0  | SIMD_INLINE v64 v64_mullo_s16(v64 a, v64 b) { return _mm_mullo_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_mullo_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_mullo_s16  | 
355  |  |  | 
356  | 0  | SIMD_INLINE v64 v64_mulhi_s16(v64 a, v64 b) { return _mm_mulhi_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_mulhi_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_mulhi_s16  | 
357  |  |  | 
358  | 0  | SIMD_INLINE v64 v64_mullo_s32(v64 a, v64 b) { | 
359  | 0  | #if defined(__SSE4_1__)  | 
360  | 0  |   return _mm_mullo_epi32(a, b);  | 
361  | 0  | #else  | 
362  | 0  |   return _mm_unpacklo_epi32(  | 
363  | 0  |       _mm_mul_epu32(a, b),  | 
364  | 0  |       _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)));  | 
365  | 0  | #endif  | 
366  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_mullo_s32 Unexecuted instantiation: cdef_block_avx2.c:v64_mullo_s32  | 
367  |  |  | 
368  | 0  | SIMD_INLINE v64 v64_madd_s16(v64 a, v64 b) { return _mm_madd_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_madd_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_madd_s16  | 
369  |  |  | 
370  | 0  | SIMD_INLINE v64 v64_madd_us8(v64 a, v64 b) { | 
371  | 0  | #if defined(__SSSE3__)  | 
372  | 0  |   return _mm_maddubs_epi16(a, b);  | 
373  | 0  | #else  | 
374  | 0  |   __m128i t = _mm_madd_epi16(_mm_unpacklo_epi8(a, _mm_setzero_si128()),  | 
375  | 0  |                              _mm_srai_epi16(_mm_unpacklo_epi8(b, b), 8));  | 
376  | 0  |   return _mm_packs_epi32(t, t);  | 
377  | 0  | #endif  | 
378  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_madd_us8 Unexecuted instantiation: cdef_block_avx2.c:v64_madd_us8  | 
379  |  |  | 
380  | 0  | SIMD_INLINE v64 v64_avg_u8(v64 a, v64 b) { return _mm_avg_epu8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_avg_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_avg_u8  | 
381  |  |  | 
382  | 0  | SIMD_INLINE v64 v64_rdavg_u8(v64 a, v64 b) { | 
383  | 0  |   return _mm_sub_epi8(_mm_avg_epu8(a, b),  | 
384  | 0  |                       _mm_and_si128(_mm_xor_si128(a, b), v64_dup_8(1)));  | 
385  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_rdavg_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_rdavg_u8  | 
386  |  |  | 
387  | 0  | SIMD_INLINE v64 v64_rdavg_u16(v64 a, v64 b) { | 
388  | 0  |   return _mm_sub_epi16(_mm_avg_epu16(a, b),  | 
389  | 0  |                        _mm_and_si128(_mm_xor_si128(a, b), v64_dup_16(1)));  | 
390  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_rdavg_u16 Unexecuted instantiation: cdef_block_avx2.c:v64_rdavg_u16  | 
391  |  |  | 
392  | 0  | SIMD_INLINE v64 v64_avg_u16(v64 a, v64 b) { return _mm_avg_epu16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_avg_u16 Unexecuted instantiation: cdef_block_avx2.c:v64_avg_u16  | 
393  |  |  | 
394  | 0  | SIMD_INLINE v64 v64_min_u8(v64 a, v64 b) { return _mm_min_epu8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_min_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_min_u8  | 
395  |  |  | 
396  | 0  | SIMD_INLINE v64 v64_max_u8(v64 a, v64 b) { return _mm_max_epu8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_max_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_max_u8  | 
397  |  |  | 
398  | 0  | SIMD_INLINE v64 v64_min_s8(v64 a, v64 b) { | 
399  | 0  | #if defined(__SSE4_1__)  | 
400  | 0  |   return _mm_min_epi8(a, b);  | 
401  | 0  | #else  | 
402  | 0  |   v64 mask = _mm_cmplt_epi8(a, b);  | 
403  | 0  |   return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));  | 
404  | 0  | #endif  | 
405  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_min_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_min_s8  | 
406  |  |  | 
407  | 0  | SIMD_INLINE v64 v64_max_s8(v64 a, v64 b) { | 
408  | 0  | #if defined(__SSE4_1__)  | 
409  | 0  |   return _mm_max_epi8(a, b);  | 
410  | 0  | #else  | 
411  | 0  |   v64 mask = _mm_cmplt_epi8(b, a);  | 
412  | 0  |   return _mm_or_si128(_mm_andnot_si128(mask, b), _mm_and_si128(mask, a));  | 
413  | 0  | #endif  | 
414  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_max_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_max_s8  | 
415  |  |  | 
416  | 0  | SIMD_INLINE v64 v64_min_s16(v64 a, v64 b) { return _mm_min_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_min_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_min_s16  | 
417  |  |  | 
418  | 0  | SIMD_INLINE v64 v64_max_s16(v64 a, v64 b) { return _mm_max_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_max_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_max_s16  | 
419  |  |  | 
420  | 0  | SIMD_INLINE v64 v64_cmpgt_s8(v64 a, v64 b) { return _mm_cmpgt_epi8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_cmpgt_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_cmpgt_s8  | 
421  |  |  | 
422  | 0  | SIMD_INLINE v64 v64_cmplt_s8(v64 a, v64 b) { return _mm_cmplt_epi8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_cmplt_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_cmplt_s8  | 
423  |  |  | 
424  | 0  | SIMD_INLINE v64 v64_cmpeq_8(v64 a, v64 b) { return _mm_cmpeq_epi8(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_cmpeq_8 Unexecuted instantiation: cdef_block_avx2.c:v64_cmpeq_8  | 
425  |  |  | 
426  | 0  | SIMD_INLINE v64 v64_cmpgt_s16(v64 a, v64 b) { return _mm_cmpgt_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_cmpgt_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_cmpgt_s16  | 
427  |  |  | 
428  | 0  | SIMD_INLINE v64 v64_cmplt_s16(v64 a, v64 b) { return _mm_cmplt_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_cmplt_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_cmplt_s16  | 
429  |  |  | 
430  | 0  | SIMD_INLINE v64 v64_cmpeq_16(v64 a, v64 b) { return _mm_cmpeq_epi16(a, b); }Unexecuted instantiation: cdef_block_sse4.c:v64_cmpeq_16 Unexecuted instantiation: cdef_block_avx2.c:v64_cmpeq_16  | 
431  |  |  | 
432  | 0  | SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int c) { | 
433  | 0  |   return _mm_and_si128(_mm_set1_epi8((char)(0xff << c)),  | 
434  | 0  |                        _mm_sll_epi16(a, _mm_cvtsi32_si128((int)c)));  | 
435  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shl_8 Unexecuted instantiation: cdef_block_avx2.c:v64_shl_8  | 
436  |  |  | 
437  | 0  | SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int c) { | 
438  | 0  |   return _mm_and_si128(_mm_set1_epi8((char)(0xff >> c)),  | 
439  | 0  |                        _mm_srl_epi16(a, _mm_cvtsi32_si128((int)c)));  | 
440  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shr_u8 Unexecuted instantiation: cdef_block_avx2.c:v64_shr_u8  | 
441  |  |  | 
442  | 0  | SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int c) { | 
443  | 0  |   return _mm_packs_epi16(  | 
444  | 0  |       _mm_sra_epi16(_mm_unpacklo_epi8(a, a), _mm_cvtsi32_si128((int)(c + 8))),  | 
445  | 0  |       a);  | 
446  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shr_s8 Unexecuted instantiation: cdef_block_avx2.c:v64_shr_s8  | 
447  |  |  | 
448  | 0  | SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int c) { | 
449  | 0  |   return _mm_sll_epi16(a, _mm_cvtsi32_si128((int)c));  | 
450  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shl_16 Unexecuted instantiation: cdef_block_avx2.c:v64_shl_16  | 
451  |  |  | 
452  | 0  | SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int c) { | 
453  | 0  |   return _mm_srl_epi16(a, _mm_cvtsi32_si128((int)c));  | 
454  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shr_u16 Unexecuted instantiation: cdef_block_avx2.c:v64_shr_u16  | 
455  |  |  | 
456  | 0  | SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int c) { | 
457  | 0  |   return _mm_sra_epi16(a, _mm_cvtsi32_si128((int)c));  | 
458  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shr_s16 Unexecuted instantiation: cdef_block_avx2.c:v64_shr_s16  | 
459  |  |  | 
460  | 0  | SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int c) { | 
461  | 0  |   return _mm_sll_epi32(a, _mm_cvtsi32_si128((int)c));  | 
462  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shl_32 Unexecuted instantiation: cdef_block_avx2.c:v64_shl_32  | 
463  |  |  | 
464  | 0  | SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int c) { | 
465  | 0  |   return _mm_srl_epi32(a, _mm_cvtsi32_si128((int)c));  | 
466  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shr_u32 Unexecuted instantiation: cdef_block_avx2.c:v64_shr_u32  | 
467  |  |  | 
468  | 0  | SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int c) { | 
469  | 0  |   return _mm_sra_epi32(a, _mm_cvtsi32_si128((int)c));  | 
470  | 0  | } Unexecuted instantiation: cdef_block_sse4.c:v64_shr_s32 Unexecuted instantiation: cdef_block_avx2.c:v64_shr_s32  | 
471  |  |  | 
472  |  | /* These intrinsics require immediate values, so we must use #defines  | 
473  |  |    to enforce that. */  | 
474  |  | #define v64_shl_n_byte(a, c) _mm_slli_si128(a, c)  | 
475  |  | #define v64_shr_n_byte(a, c) _mm_srli_si128(_mm_unpacklo_epi64(a, a), c + 8)  | 
476  |  | #define v64_shl_n_8(a, c) \  | 
477  |  |   _mm_and_si128(_mm_set1_epi8((char)(0xff << (c))), _mm_slli_epi16(a, c))  | 
478  |  | #define v64_shr_n_u8(a, c) \  | 
479  |  |   _mm_and_si128(_mm_set1_epi8((char)(0xff >> (c))), _mm_srli_epi16(a, c))  | 
480  |  | #define v64_shr_n_s8(a, c) \  | 
481  |  |   _mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), a)  | 
482  |  | #define v64_shl_n_16(a, c) _mm_slli_epi16(a, c)  | 
483  |  | #define v64_shr_n_u16(a, c) _mm_srli_epi16(a, c)  | 
484  |  | #define v64_shr_n_s16(a, c) _mm_srai_epi16(a, c)  | 
485  |  | #define v64_shl_n_32(a, c) _mm_slli_epi32(a, c)  | 
486  |  | #define v64_shr_n_u32(a, c) _mm_srli_epi32(a, c)  | 
487  |  | #define v64_shr_n_s32(a, c) _mm_srai_epi32(a, c)  | 
488  |  |  | 
489  |  | #endif  // AOM_AOM_DSP_SIMD_V64_INTRINSICS_X86_H_  |