Coverage Report

Created: 2023-06-07 06:31

/src/aom/aom_dsp/simd/v256_intrinsics_v128.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#ifndef AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_
13
#define AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_
14
15
#include "config/aom_config.h"
16
17
#if HAVE_NEON
18
#include "aom_dsp/simd/v128_intrinsics_arm.h"
19
#elif HAVE_SSE2
20
#include "aom_dsp/simd/v128_intrinsics_x86.h"
21
#else
22
#include "aom_dsp/simd/v128_intrinsics.h"
23
#endif
24
25
#if HAVE_NEON
26
typedef int64x2x2_t v256;
27
#else
28
typedef struct {
29
  v128 val[2];
30
} v256;
31
#endif
32
33
0
SIMD_INLINE uint32_t v256_low_u32(v256 a) { return v128_low_u32(a.val[0]); }
Unexecuted instantiation: cdef_block_sse2.c:v256_low_u32
Unexecuted instantiation: cdef_block_ssse3.c:v256_low_u32
Unexecuted instantiation: cdef_block_sse4.c:v256_low_u32
34
35
0
SIMD_INLINE v64 v256_low_v64(v256 a) { return v128_low_v64(a.val[0]); }
Unexecuted instantiation: cdef_block_sse2.c:v256_low_v64
Unexecuted instantiation: cdef_block_ssse3.c:v256_low_v64
Unexecuted instantiation: cdef_block_sse4.c:v256_low_v64
36
37
0
SIMD_INLINE uint64_t v256_low_u64(v256 a) { return v64_u64(v256_low_v64(a)); }
Unexecuted instantiation: cdef_block_sse2.c:v256_low_u64
Unexecuted instantiation: cdef_block_ssse3.c:v256_low_u64
Unexecuted instantiation: cdef_block_sse4.c:v256_low_u64
38
39
0
SIMD_INLINE v128 v256_low_v128(v256 a) { return a.val[0]; }
Unexecuted instantiation: cdef_block_sse2.c:v256_low_v128
Unexecuted instantiation: cdef_block_ssse3.c:v256_low_v128
Unexecuted instantiation: cdef_block_sse4.c:v256_low_v128
40
41
0
SIMD_INLINE v128 v256_high_v128(v256 a) { return a.val[1]; }
Unexecuted instantiation: cdef_block_sse2.c:v256_high_v128
Unexecuted instantiation: cdef_block_ssse3.c:v256_high_v128
Unexecuted instantiation: cdef_block_sse4.c:v256_high_v128
42
43
0
SIMD_INLINE v256 v256_from_v128(v128 hi, v128 lo) {
44
0
  v256 t;
45
0
  t.val[1] = hi;
46
0
  t.val[0] = lo;
47
0
  return t;
48
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_from_v128
Unexecuted instantiation: cdef_block_ssse3.c:v256_from_v128
Unexecuted instantiation: cdef_block_sse4.c:v256_from_v128
49
50
0
SIMD_INLINE v256 v256_from_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
51
0
  return v256_from_v128(v128_from_64(a, b), v128_from_64(c, d));
52
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_from_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_from_64
Unexecuted instantiation: cdef_block_sse4.c:v256_from_64
53
54
0
SIMD_INLINE v256 v256_from_v64(v64 a, v64 b, v64 c, v64 d) {
55
0
  return v256_from_v128(v128_from_v64(a, b), v128_from_v64(c, d));
56
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_from_v64
Unexecuted instantiation: cdef_block_ssse3.c:v256_from_v64
Unexecuted instantiation: cdef_block_sse4.c:v256_from_v64
57
58
0
SIMD_INLINE v256 v256_load_unaligned(const void *p) {
59
0
  return v256_from_v128(v128_load_unaligned((uint8_t *)p + 16),
60
0
                        v128_load_unaligned(p));
61
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_load_unaligned
Unexecuted instantiation: cdef_block_ssse3.c:v256_load_unaligned
Unexecuted instantiation: cdef_block_sse4.c:v256_load_unaligned
62
63
0
SIMD_INLINE v256 v256_load_aligned(const void *p) {
64
0
  return v256_from_v128(v128_load_aligned((uint8_t *)p + 16),
65
0
                        v128_load_aligned(p));
66
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_load_aligned
Unexecuted instantiation: cdef_block_ssse3.c:v256_load_aligned
Unexecuted instantiation: cdef_block_sse4.c:v256_load_aligned
67
68
0
SIMD_INLINE void v256_store_unaligned(void *p, v256 a) {
69
0
  v128_store_unaligned(p, a.val[0]);
70
0
  v128_store_unaligned((uint8_t *)p + 16, a.val[1]);
71
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_store_unaligned
Unexecuted instantiation: cdef_block_ssse3.c:v256_store_unaligned
Unexecuted instantiation: cdef_block_sse4.c:v256_store_unaligned
72
73
0
SIMD_INLINE void v256_store_aligned(void *p, v256 a) {
74
0
  v128_store_aligned(p, a.val[0]);
75
0
  v128_store_aligned((uint8_t *)p + 16, a.val[1]);
76
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_store_aligned
Unexecuted instantiation: cdef_block_ssse3.c:v256_store_aligned
Unexecuted instantiation: cdef_block_sse4.c:v256_store_aligned
77
78
0
SIMD_INLINE v256 v256_zero(void) {
79
0
  return v256_from_v128(v128_zero(), v128_zero());
80
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_zero
Unexecuted instantiation: cdef_block_ssse3.c:v256_zero
Unexecuted instantiation: cdef_block_sse4.c:v256_zero
81
82
0
SIMD_INLINE v256 v256_dup_8(uint8_t x) {
83
0
  v128 t = v128_dup_8(x);
84
0
  return v256_from_v128(t, t);
85
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_dup_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_dup_8
Unexecuted instantiation: cdef_block_sse4.c:v256_dup_8
86
87
0
SIMD_INLINE v256 v256_dup_16(uint16_t x) {
88
0
  v128 t = v128_dup_16(x);
89
0
  return v256_from_v128(t, t);
90
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_dup_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_dup_16
Unexecuted instantiation: cdef_block_sse4.c:v256_dup_16
91
92
0
SIMD_INLINE v256 v256_dup_32(uint32_t x) {
93
0
  v128 t = v128_dup_32(x);
94
0
  return v256_from_v128(t, t);
95
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_dup_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_dup_32
Unexecuted instantiation: cdef_block_sse4.c:v256_dup_32
96
97
0
SIMD_INLINE v256 v256_dup_64(uint64_t x) {
98
0
  v128 t = v128_dup_64(x);
99
0
  return v256_from_v128(t, t);
100
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_dup_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_dup_64
Unexecuted instantiation: cdef_block_sse4.c:v256_dup_64
101
102
0
SIMD_INLINE int64_t v256_dotp_su8(v256 a, v256 b) {
103
0
  return v128_dotp_su8(a.val[1], b.val[1]) + v128_dotp_su8(a.val[0], b.val[0]);
104
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_dotp_su8
Unexecuted instantiation: cdef_block_ssse3.c:v256_dotp_su8
Unexecuted instantiation: cdef_block_sse4.c:v256_dotp_su8
105
106
0
SIMD_INLINE int64_t v256_dotp_s16(v256 a, v256 b) {
107
0
  return v128_dotp_s16(a.val[1], b.val[1]) + v128_dotp_s16(a.val[0], b.val[0]);
108
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_dotp_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_dotp_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_dotp_s16
109
110
0
SIMD_INLINE int64_t v256_dotp_s32(v256 a, v256 b) {
111
0
  return v128_dotp_s32(a.val[1], b.val[1]) + v128_dotp_s32(a.val[0], b.val[0]);
112
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_dotp_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_dotp_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_dotp_s32
113
114
0
SIMD_INLINE uint64_t v256_hadd_u8(v256 a) {
115
0
  return v128_hadd_u8(a.val[1]) + v128_hadd_u8(a.val[0]);
116
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_hadd_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_hadd_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_hadd_u8
117
118
typedef struct {
119
  sad128_internal val[2];
120
} sad256_internal;
121
122
0
SIMD_INLINE sad256_internal v256_sad_u8_init(void) {
123
0
  sad256_internal t;
124
0
  t.val[1] = v128_sad_u8_init();
125
0
  t.val[0] = v128_sad_u8_init();
126
0
  return t;
127
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u8_init
Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u8_init
Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u8_init
128
129
/* Implementation dependent return value.  Result must be finalised with
130
   v256_sad_u8_sum().
131
   The result for more than 16 v256_sad_u8() calls is undefined. */
132
0
SIMD_INLINE sad256_internal v256_sad_u8(sad256_internal s, v256 a, v256 b) {
133
0
  sad256_internal t;
134
0
  t.val[1] = v128_sad_u8(s.val[1], a.val[1], b.val[1]);
135
0
  t.val[0] = v128_sad_u8(s.val[0], a.val[0], b.val[0]);
136
0
  return t;
137
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u8
138
139
0
SIMD_INLINE uint32_t v256_sad_u8_sum(sad256_internal s) {
140
0
  return v128_sad_u8_sum(s.val[1]) + v128_sad_u8_sum(s.val[0]);
141
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u8_sum
Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u8_sum
Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u8_sum
142
143
typedef struct {
144
  ssd128_internal val[2];
145
} ssd256_internal;
146
147
0
SIMD_INLINE ssd256_internal v256_ssd_u8_init(void) {
148
0
  ssd256_internal t;
149
0
  t.val[1] = v128_ssd_u8_init();
150
0
  t.val[0] = v128_ssd_u8_init();
151
0
  return t;
152
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_u8_init
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_u8_init
Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_u8_init
153
154
/* Implementation dependent return value.  Result must be finalised with
155
 * v256_ssd_u8_sum(). */
156
0
SIMD_INLINE ssd256_internal v256_ssd_u8(ssd256_internal s, v256 a, v256 b) {
157
0
  ssd256_internal t;
158
0
  t.val[1] = v128_ssd_u8(s.val[1], a.val[1], b.val[1]);
159
0
  t.val[0] = v128_ssd_u8(s.val[0], a.val[0], b.val[0]);
160
0
  return t;
161
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_u8
162
163
0
SIMD_INLINE uint32_t v256_ssd_u8_sum(ssd256_internal s) {
164
0
  return v128_ssd_u8_sum(s.val[1]) + v128_ssd_u8_sum(s.val[0]);
165
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_u8_sum
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_u8_sum
Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_u8_sum
166
167
0
SIMD_INLINE v256 v256_or(v256 a, v256 b) {
168
0
  return v256_from_v128(v128_or(a.val[1], b.val[1]),
169
0
                        v128_or(a.val[0], b.val[0]));
170
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_or
Unexecuted instantiation: cdef_block_ssse3.c:v256_or
Unexecuted instantiation: cdef_block_sse4.c:v256_or
171
172
0
SIMD_INLINE v256 v256_xor(v256 a, v256 b) {
173
0
  return v256_from_v128(v128_xor(a.val[1], b.val[1]),
174
0
                        v128_xor(a.val[0], b.val[0]));
175
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_xor
Unexecuted instantiation: cdef_block_ssse3.c:v256_xor
Unexecuted instantiation: cdef_block_sse4.c:v256_xor
176
177
0
SIMD_INLINE v256 v256_and(v256 a, v256 b) {
178
0
  return v256_from_v128(v128_and(a.val[1], b.val[1]),
179
0
                        v128_and(a.val[0], b.val[0]));
180
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_and
Unexecuted instantiation: cdef_block_ssse3.c:v256_and
Unexecuted instantiation: cdef_block_sse4.c:v256_and
181
182
0
SIMD_INLINE v256 v256_andn(v256 a, v256 b) {
183
0
  return v256_from_v128(v128_andn(a.val[1], b.val[1]),
184
0
                        v128_andn(a.val[0], b.val[0]));
185
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_andn
Unexecuted instantiation: cdef_block_ssse3.c:v256_andn
Unexecuted instantiation: cdef_block_sse4.c:v256_andn
186
187
0
SIMD_INLINE v256 v256_add_8(v256 a, v256 b) {
188
0
  return v256_from_v128(v128_add_8(a.val[1], b.val[1]),
189
0
                        v128_add_8(a.val[0], b.val[0]));
190
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_add_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_add_8
Unexecuted instantiation: cdef_block_sse4.c:v256_add_8
191
192
0
SIMD_INLINE v256 v256_add_16(v256 a, v256 b) {
193
0
  return v256_from_v128(v128_add_16(a.val[1], b.val[1]),
194
0
                        v128_add_16(a.val[0], b.val[0]));
195
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_add_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_add_16
Unexecuted instantiation: cdef_block_sse4.c:v256_add_16
196
197
0
SIMD_INLINE v256 v256_sadd_s8(v256 a, v256 b) {
198
0
  return v256_from_v128(v128_sadd_s8(a.val[1], b.val[1]),
199
0
                        v128_sadd_s8(a.val[0], b.val[0]));
200
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sadd_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_sadd_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_sadd_s8
201
202
0
SIMD_INLINE v256 v256_sadd_u8(v256 a, v256 b) {
203
0
  return v256_from_v128(v128_sadd_u8(a.val[1], b.val[1]),
204
0
                        v128_sadd_u8(a.val[0], b.val[0]));
205
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sadd_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_sadd_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_sadd_u8
206
207
0
SIMD_INLINE v256 v256_sadd_s16(v256 a, v256 b) {
208
0
  return v256_from_v128(v128_sadd_s16(a.val[1], b.val[1]),
209
0
                        v128_sadd_s16(a.val[0], b.val[0]));
210
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sadd_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_sadd_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_sadd_s16
211
212
0
SIMD_INLINE v256 v256_add_32(v256 a, v256 b) {
213
0
  return v256_from_v128(v128_add_32(a.val[1], b.val[1]),
214
0
                        v128_add_32(a.val[0], b.val[0]));
215
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_add_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_add_32
Unexecuted instantiation: cdef_block_sse4.c:v256_add_32
216
217
0
SIMD_INLINE v256 v256_add_64(v256 a, v256 b) {
218
0
  return v256_from_v128(v128_add_64(a.val[1], b.val[1]),
219
0
                        v128_add_64(a.val[0], b.val[0]));
220
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_add_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_add_64
Unexecuted instantiation: cdef_block_sse4.c:v256_add_64
221
222
0
SIMD_INLINE v256 v256_padd_u8(v256 a) {
223
0
  return v256_from_v128(v128_padd_u8(a.val[1]), v128_padd_u8(a.val[0]));
224
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_padd_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_padd_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_padd_u8
225
226
0
SIMD_INLINE v256 v256_padd_s16(v256 a) {
227
0
  return v256_from_v128(v128_padd_s16(a.val[1]), v128_padd_s16(a.val[0]));
228
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_padd_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_padd_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_padd_s16
229
230
0
SIMD_INLINE v256 v256_sub_8(v256 a, v256 b) {
231
0
  return v256_from_v128(v128_sub_8(a.val[1], b.val[1]),
232
0
                        v128_sub_8(a.val[0], b.val[0]));
233
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sub_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_sub_8
Unexecuted instantiation: cdef_block_sse4.c:v256_sub_8
234
235
0
SIMD_INLINE v256 v256_ssub_u8(v256 a, v256 b) {
236
0
  return v256_from_v128(v128_ssub_u8(a.val[1], b.val[1]),
237
0
                        v128_ssub_u8(a.val[0], b.val[0]));
238
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssub_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssub_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_ssub_u8
239
240
0
SIMD_INLINE v256 v256_ssub_s8(v256 a, v256 b) {
241
0
  return v256_from_v128(v128_ssub_s8(a.val[1], b.val[1]),
242
0
                        v128_ssub_s8(a.val[0], b.val[0]));
243
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssub_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssub_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_ssub_s8
244
245
0
SIMD_INLINE v256 v256_sub_16(v256 a, v256 b) {
246
0
  return v256_from_v128(v128_sub_16(a.val[1], b.val[1]),
247
0
                        v128_sub_16(a.val[0], b.val[0]));
248
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sub_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_sub_16
Unexecuted instantiation: cdef_block_sse4.c:v256_sub_16
249
250
0
SIMD_INLINE v256 v256_ssub_s16(v256 a, v256 b) {
251
0
  return v256_from_v128(v128_ssub_s16(a.val[1], b.val[1]),
252
0
                        v128_ssub_s16(a.val[0], b.val[0]));
253
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssub_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssub_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_ssub_s16
254
255
0
SIMD_INLINE v256 v256_ssub_u16(v256 a, v256 b) {
256
0
  return v256_from_v128(v128_ssub_u16(a.val[1], b.val[1]),
257
0
                        v128_ssub_u16(a.val[0], b.val[0]));
258
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssub_u16
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssub_u16
Unexecuted instantiation: cdef_block_sse4.c:v256_ssub_u16
259
260
0
SIMD_INLINE v256 v256_sub_32(v256 a, v256 b) {
261
0
  return v256_from_v128(v128_sub_32(a.val[1], b.val[1]),
262
0
                        v128_sub_32(a.val[0], b.val[0]));
263
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sub_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_sub_32
Unexecuted instantiation: cdef_block_sse4.c:v256_sub_32
264
265
0
SIMD_INLINE v256 v256_sub_64(v256 a, v256 b) {
266
0
  return v256_from_v128(v128_sub_64(a.val[1], b.val[1]),
267
0
                        v128_sub_64(a.val[0], b.val[0]));
268
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sub_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_sub_64
Unexecuted instantiation: cdef_block_sse4.c:v256_sub_64
269
270
0
SIMD_INLINE v256 v256_abs_s16(v256 a) {
271
0
  return v256_from_v128(v128_abs_s16(a.val[1]), v128_abs_s16(a.val[0]));
272
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_abs_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_abs_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_abs_s16
273
274
0
SIMD_INLINE v256 v256_abs_s8(v256 a) {
275
0
  return v256_from_v128(v128_abs_s8(a.val[1]), v128_abs_s8(a.val[0]));
276
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_abs_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_abs_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_abs_s8
277
278
0
SIMD_INLINE v256 v256_mul_s16(v128 a, v128 b) {
279
0
  v128 lo_bits = v128_mullo_s16(a, b);
280
0
  v128 hi_bits = v128_mulhi_s16(a, b);
281
0
  return v256_from_v128(v128_ziphi_16(hi_bits, lo_bits),
282
0
                        v128_ziplo_16(hi_bits, lo_bits));
283
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_mul_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_mul_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_mul_s16
284
285
0
SIMD_INLINE v256 v256_mullo_s16(v256 a, v256 b) {
286
0
  return v256_from_v128(v128_mullo_s16(a.val[1], b.val[1]),
287
0
                        v128_mullo_s16(a.val[0], b.val[0]));
288
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_mullo_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_mullo_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_mullo_s16
289
290
0
SIMD_INLINE v256 v256_mulhi_s16(v256 a, v256 b) {
291
0
  return v256_from_v128(v128_mulhi_s16(a.val[1], b.val[1]),
292
0
                        v128_mulhi_s16(a.val[0], b.val[0]));
293
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_mulhi_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_mulhi_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_mulhi_s16
294
295
0
SIMD_INLINE v256 v256_mullo_s32(v256 a, v256 b) {
296
0
  return v256_from_v128(v128_mullo_s32(a.val[1], b.val[1]),
297
0
                        v128_mullo_s32(a.val[0], b.val[0]));
298
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_mullo_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_mullo_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_mullo_s32
299
300
0
SIMD_INLINE v256 v256_madd_s16(v256 a, v256 b) {
301
0
  return v256_from_v128(v128_madd_s16(a.val[1], b.val[1]),
302
0
                        v128_madd_s16(a.val[0], b.val[0]));
303
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_madd_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_madd_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_madd_s16
304
305
0
SIMD_INLINE v256 v256_madd_us8(v256 a, v256 b) {
306
0
  return v256_from_v128(v128_madd_us8(a.val[1], b.val[1]),
307
0
                        v128_madd_us8(a.val[0], b.val[0]));
308
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_madd_us8
Unexecuted instantiation: cdef_block_ssse3.c:v256_madd_us8
Unexecuted instantiation: cdef_block_sse4.c:v256_madd_us8
309
310
0
SIMD_INLINE v256 v256_avg_u8(v256 a, v256 b) {
311
0
  return v256_from_v128(v128_avg_u8(a.val[1], b.val[1]),
312
0
                        v128_avg_u8(a.val[0], b.val[0]));
313
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_avg_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_avg_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_avg_u8
314
315
0
SIMD_INLINE v256 v256_rdavg_u8(v256 a, v256 b) {
316
0
  return v256_from_v128(v128_rdavg_u8(a.val[1], b.val[1]),
317
0
                        v128_rdavg_u8(a.val[0], b.val[0]));
318
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_rdavg_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_rdavg_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_rdavg_u8
319
320
0
SIMD_INLINE v256 v256_rdavg_u16(v256 a, v256 b) {
321
0
  return v256_from_v128(v128_rdavg_u16(a.val[1], b.val[1]),
322
0
                        v128_rdavg_u16(a.val[0], b.val[0]));
323
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_rdavg_u16
Unexecuted instantiation: cdef_block_ssse3.c:v256_rdavg_u16
Unexecuted instantiation: cdef_block_sse4.c:v256_rdavg_u16
324
325
0
SIMD_INLINE v256 v256_avg_u16(v256 a, v256 b) {
326
0
  return v256_from_v128(v128_avg_u16(a.val[1], b.val[1]),
327
0
                        v128_avg_u16(a.val[0], b.val[0]));
328
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_avg_u16
Unexecuted instantiation: cdef_block_ssse3.c:v256_avg_u16
Unexecuted instantiation: cdef_block_sse4.c:v256_avg_u16
329
330
0
SIMD_INLINE v256 v256_min_u8(v256 a, v256 b) {
331
0
  return v256_from_v128(v128_min_u8(a.val[1], b.val[1]),
332
0
                        v128_min_u8(a.val[0], b.val[0]));
333
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_min_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_min_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_min_u8
334
335
0
SIMD_INLINE v256 v256_max_u8(v256 a, v256 b) {
336
0
  return v256_from_v128(v128_max_u8(a.val[1], b.val[1]),
337
0
                        v128_max_u8(a.val[0], b.val[0]));
338
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_max_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_max_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_max_u8
339
340
0
SIMD_INLINE v256 v256_min_s8(v256 a, v256 b) {
341
0
  return v256_from_v128(v128_min_s8(a.val[1], b.val[1]),
342
0
                        v128_min_s8(a.val[0], b.val[0]));
343
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_min_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_min_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_min_s8
344
345
0
SIMD_INLINE uint32_t v256_movemask_8(v256 a) {
346
0
  return (v128_movemask_8(v256_high_v128(a)) << 16) |
347
0
         v128_movemask_8(v256_low_v128(a));
348
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_movemask_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_movemask_8
Unexecuted instantiation: cdef_block_sse4.c:v256_movemask_8
349
350
0
SIMD_INLINE v256 v256_blend_8(v256 a, v256 b, v256 c) {
351
0
  return v256_from_v128(v128_blend_8(a.val[1], b.val[1], c.val[1]),
352
0
                        v128_blend_8(a.val[0], b.val[0], c.val[0]));
353
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_blend_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_blend_8
Unexecuted instantiation: cdef_block_sse4.c:v256_blend_8
354
355
0
SIMD_INLINE v256 v256_max_s8(v256 a, v256 b) {
356
0
  return v256_from_v128(v128_max_s8(a.val[1], b.val[1]),
357
0
                        v128_max_s8(a.val[0], b.val[0]));
358
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_max_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_max_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_max_s8
359
360
0
SIMD_INLINE v256 v256_min_s16(v256 a, v256 b) {
361
0
  return v256_from_v128(v128_min_s16(a.val[1], b.val[1]),
362
0
                        v128_min_s16(a.val[0], b.val[0]));
363
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_min_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_min_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_min_s16
364
365
0
SIMD_INLINE v256 v256_max_s16(v256 a, v256 b) {
366
0
  return v256_from_v128(v128_max_s16(a.val[1], b.val[1]),
367
0
                        v128_max_s16(a.val[0], b.val[0]));
368
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_max_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_max_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_max_s16
369
370
0
SIMD_INLINE v256 v256_min_s32(v256 a, v256 b) {
371
0
  return v256_from_v128(v128_min_s32(a.val[1], b.val[1]),
372
0
                        v128_min_s32(a.val[0], b.val[0]));
373
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_min_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_min_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_min_s32
374
375
0
SIMD_INLINE v256 v256_max_s32(v256 a, v256 b) {
376
0
  return v256_from_v128(v128_max_s32(a.val[1], b.val[1]),
377
0
                        v128_max_s32(a.val[0], b.val[0]));
378
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_max_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_max_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_max_s32
379
380
0
SIMD_INLINE v256 v256_ziplo_8(v256 a, v256 b) {
381
0
  return v256_from_v128(v128_ziphi_8(a.val[0], b.val[0]),
382
0
                        v128_ziplo_8(a.val[0], b.val[0]));
383
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_8
Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_8
384
385
0
SIMD_INLINE v256 v256_ziphi_8(v256 a, v256 b) {
386
0
  return v256_from_v128(v128_ziphi_8(a.val[1], b.val[1]),
387
0
                        v128_ziplo_8(a.val[1], b.val[1]));
388
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_8
Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_8
389
390
0
SIMD_INLINE v256 v256_ziplo_16(v256 a, v256 b) {
391
0
  return v256_from_v128(v128_ziphi_16(a.val[0], b.val[0]),
392
0
                        v128_ziplo_16(a.val[0], b.val[0]));
393
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_16
Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_16
394
395
0
SIMD_INLINE v256 v256_ziphi_16(v256 a, v256 b) {
396
0
  return v256_from_v128(v128_ziphi_16(a.val[1], b.val[1]),
397
0
                        v128_ziplo_16(a.val[1], b.val[1]));
398
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_16
Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_16
399
400
0
SIMD_INLINE v256 v256_ziplo_32(v256 a, v256 b) {
401
0
  return v256_from_v128(v128_ziphi_32(a.val[0], b.val[0]),
402
0
                        v128_ziplo_32(a.val[0], b.val[0]));
403
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_32
Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_32
404
405
0
SIMD_INLINE v256 v256_ziphi_32(v256 a, v256 b) {
406
0
  return v256_from_v128(v128_ziphi_32(a.val[1], b.val[1]),
407
0
                        v128_ziplo_32(a.val[1], b.val[1]));
408
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_32
Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_32
409
410
0
SIMD_INLINE v256 v256_ziplo_64(v256 a, v256 b) {
411
0
  return v256_from_v128(v128_ziphi_64(a.val[0], b.val[0]),
412
0
                        v128_ziplo_64(a.val[0], b.val[0]));
413
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_64
Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_64
414
415
0
SIMD_INLINE v256 v256_ziphi_64(v256 a, v256 b) {
416
0
  return v256_from_v128(v128_ziphi_64(a.val[1], b.val[1]),
417
0
                        v128_ziplo_64(a.val[1], b.val[1]));
418
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_64
Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_64
419
420
0
SIMD_INLINE v256 v256_ziplo_128(v256 a, v256 b) {
421
0
  return v256_from_v128(a.val[0], b.val[0]);
422
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziplo_128
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziplo_128
Unexecuted instantiation: cdef_block_sse4.c:v256_ziplo_128
423
424
0
SIMD_INLINE v256 v256_ziphi_128(v256 a, v256 b) {
425
0
  return v256_from_v128(a.val[1], b.val[1]);
426
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ziphi_128
Unexecuted instantiation: cdef_block_ssse3.c:v256_ziphi_128
Unexecuted instantiation: cdef_block_sse4.c:v256_ziphi_128
427
428
0
SIMD_INLINE v256 v256_zip_8(v128 a, v128 b) {
429
0
  return v256_from_v128(v128_ziphi_8(a, b), v128_ziplo_8(a, b));
430
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_zip_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_zip_8
Unexecuted instantiation: cdef_block_sse4.c:v256_zip_8
431
432
0
SIMD_INLINE v256 v256_zip_16(v128 a, v128 b) {
433
0
  return v256_from_v128(v128_ziphi_16(a, b), v128_ziplo_16(a, b));
434
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_zip_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_zip_16
Unexecuted instantiation: cdef_block_sse4.c:v256_zip_16
435
436
0
SIMD_INLINE v256 v256_zip_32(v128 a, v128 b) {
437
0
  return v256_from_v128(v128_ziphi_32(a, b), v128_ziplo_32(a, b));
438
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_zip_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_zip_32
Unexecuted instantiation: cdef_block_sse4.c:v256_zip_32
439
440
0
SIMD_INLINE v256 v256_unziplo_8(v256 a, v256 b) {
441
0
  return v256_from_v128(v128_unziplo_8(a.val[1], a.val[0]),
442
0
                        v128_unziplo_8(b.val[1], b.val[0]));
443
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unziplo_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_unziplo_8
Unexecuted instantiation: cdef_block_sse4.c:v256_unziplo_8
444
445
0
SIMD_INLINE v256 v256_unziphi_8(v256 a, v256 b) {
446
0
  return v256_from_v128(v128_unziphi_8(a.val[1], a.val[0]),
447
0
                        v128_unziphi_8(b.val[1], b.val[0]));
448
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unziphi_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_unziphi_8
Unexecuted instantiation: cdef_block_sse4.c:v256_unziphi_8
449
450
0
SIMD_INLINE v256 v256_unziplo_16(v256 a, v256 b) {
451
0
  return v256_from_v128(v128_unziplo_16(a.val[1], a.val[0]),
452
0
                        v128_unziplo_16(b.val[1], b.val[0]));
453
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unziplo_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_unziplo_16
Unexecuted instantiation: cdef_block_sse4.c:v256_unziplo_16
454
455
0
SIMD_INLINE v256 v256_unziphi_16(v256 a, v256 b) {
456
0
  return v256_from_v128(v128_unziphi_16(a.val[1], a.val[0]),
457
0
                        v128_unziphi_16(b.val[1], b.val[0]));
458
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unziphi_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_unziphi_16
Unexecuted instantiation: cdef_block_sse4.c:v256_unziphi_16
459
460
0
SIMD_INLINE v256 v256_unziplo_32(v256 a, v256 b) {
461
0
  return v256_from_v128(v128_unziplo_32(a.val[1], a.val[0]),
462
0
                        v128_unziplo_32(b.val[1], b.val[0]));
463
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unziplo_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_unziplo_32
Unexecuted instantiation: cdef_block_sse4.c:v256_unziplo_32
464
465
0
SIMD_INLINE v256 v256_unziphi_32(v256 a, v256 b) {
466
0
  return v256_from_v128(v128_unziphi_32(a.val[1], a.val[0]),
467
0
                        v128_unziphi_32(b.val[1], b.val[0]));
468
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unziphi_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_unziphi_32
Unexecuted instantiation: cdef_block_sse4.c:v256_unziphi_32
469
470
0
SIMD_INLINE v256 v256_unziplo_64(v256 a, v256 b) {
471
0
#if HAVE_SSE2
472
0
  return v256_from_v128(
473
0
      _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a.val[0]),
474
0
                                      _mm_castsi128_pd(a.val[1]), 0)),
475
0
      _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(b.val[0]),
476
0
                                      _mm_castsi128_pd(b.val[1]), 0)));
477
0
#else
478
0
  return v256_from_v64(v128_low_v64(a.val[1]), v128_low_v64(a.val[0]),
479
0
                       v128_low_v64(b.val[1]), v128_low_v64(b.val[0]));
480
0
#endif
481
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unziplo_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_unziplo_64
Unexecuted instantiation: cdef_block_sse4.c:v256_unziplo_64
482
483
0
SIMD_INLINE v256 v256_unziphi_64(v256 a, v256 b) {
484
0
#if HAVE_SSE2
485
0
  return v256_from_v128(
486
0
      _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a.val[0]),
487
0
                                      _mm_castsi128_pd(a.val[1]), 3)),
488
0
      _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(b.val[0]),
489
0
                                      _mm_castsi128_pd(b.val[1]), 3)));
490
0
#else
491
0
  return v256_from_v64(v128_high_v64(a.val[1]), v128_high_v64(a.val[0]),
492
0
                       v128_high_v64(b.val[1]), v128_high_v64(b.val[0]));
493
0
#endif
494
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unziphi_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_unziphi_64
Unexecuted instantiation: cdef_block_sse4.c:v256_unziphi_64
495
496
0
SIMD_INLINE v256 v256_unpack_u8_s16(v128 a) {
497
0
  return v256_from_v128(v128_unpackhi_u8_s16(a), v128_unpacklo_u8_s16(a));
498
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpack_u8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpack_u8_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_unpack_u8_s16
499
500
0
SIMD_INLINE v256 v256_unpacklo_u8_s16(v256 a) {
501
0
  return v256_from_v128(v128_unpackhi_u8_s16(a.val[0]),
502
0
                        v128_unpacklo_u8_s16(a.val[0]));
503
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpacklo_u8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpacklo_u8_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_unpacklo_u8_s16
504
505
0
SIMD_INLINE v256 v256_unpackhi_u8_s16(v256 a) {
506
0
  return v256_from_v128(v128_unpackhi_u8_s16(a.val[1]),
507
0
                        v128_unpacklo_u8_s16(a.val[1]));
508
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpackhi_u8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpackhi_u8_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_unpackhi_u8_s16
509
510
0
SIMD_INLINE v256 v256_unpack_s8_s16(v128 a) {
511
0
  return v256_from_v128(v128_unpackhi_s8_s16(a), v128_unpacklo_s8_s16(a));
512
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpack_s8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpack_s8_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_unpack_s8_s16
513
514
0
SIMD_INLINE v256 v256_unpacklo_s8_s16(v256 a) {
515
0
  return v256_from_v128(v128_unpackhi_s8_s16(a.val[0]),
516
0
                        v128_unpacklo_s8_s16(a.val[0]));
517
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpacklo_s8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpacklo_s8_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_unpacklo_s8_s16
518
519
0
SIMD_INLINE v256 v256_unpackhi_s8_s16(v256 a) {
520
0
  return v256_from_v128(v128_unpackhi_s8_s16(a.val[1]),
521
0
                        v128_unpacklo_s8_s16(a.val[1]));
522
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpackhi_s8_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpackhi_s8_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_unpackhi_s8_s16
523
524
0
SIMD_INLINE v256 v256_pack_s32_s16(v256 a, v256 b) {
525
0
  return v256_from_v128(v128_pack_s32_s16(a.val[1], a.val[0]),
526
0
                        v128_pack_s32_s16(b.val[1], b.val[0]));
527
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_pack_s32_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_pack_s32_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_pack_s32_s16
528
529
0
SIMD_INLINE v256 v256_pack_s32_u16(v256 a, v256 b) {
530
0
  return v256_from_v128(v128_pack_s32_u16(a.val[1], a.val[0]),
531
0
                        v128_pack_s32_u16(b.val[1], b.val[0]));
532
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_pack_s32_u16
Unexecuted instantiation: cdef_block_ssse3.c:v256_pack_s32_u16
Unexecuted instantiation: cdef_block_sse4.c:v256_pack_s32_u16
533
534
0
SIMD_INLINE v256 v256_pack_s16_u8(v256 a, v256 b) {
535
0
  return v256_from_v128(v128_pack_s16_u8(a.val[1], a.val[0]),
536
0
                        v128_pack_s16_u8(b.val[1], b.val[0]));
537
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_pack_s16_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_pack_s16_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_pack_s16_u8
538
539
0
SIMD_INLINE v256 v256_pack_s16_s8(v256 a, v256 b) {
540
0
  return v256_from_v128(v128_pack_s16_s8(a.val[1], a.val[0]),
541
0
                        v128_pack_s16_s8(b.val[1], b.val[0]));
542
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_pack_s16_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_pack_s16_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_pack_s16_s8
543
544
0
SIMD_INLINE v256 v256_unpack_u16_s32(v128 a) {
545
0
  return v256_from_v128(v128_unpackhi_u16_s32(a), v128_unpacklo_u16_s32(a));
546
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpack_u16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpack_u16_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_unpack_u16_s32
547
548
0
SIMD_INLINE v256 v256_unpack_s16_s32(v128 a) {
549
0
  return v256_from_v128(v128_unpackhi_s16_s32(a), v128_unpacklo_s16_s32(a));
550
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpack_s16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpack_s16_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_unpack_s16_s32
551
552
0
SIMD_INLINE v256 v256_unpacklo_u16_s32(v256 a) {
553
0
  return v256_from_v128(v128_unpackhi_u16_s32(a.val[0]),
554
0
                        v128_unpacklo_u16_s32(a.val[0]));
555
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpacklo_u16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpacklo_u16_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_unpacklo_u16_s32
556
557
0
SIMD_INLINE v256 v256_unpacklo_s16_s32(v256 a) {
558
0
  return v256_from_v128(v128_unpackhi_s16_s32(a.val[0]),
559
0
                        v128_unpacklo_s16_s32(a.val[0]));
560
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpacklo_s16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpacklo_s16_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_unpacklo_s16_s32
561
562
0
SIMD_INLINE v256 v256_unpackhi_u16_s32(v256 a) {
563
0
  return v256_from_v128(v128_unpackhi_u16_s32(a.val[1]),
564
0
                        v128_unpacklo_u16_s32(a.val[1]));
565
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpackhi_u16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpackhi_u16_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_unpackhi_u16_s32
566
567
0
SIMD_INLINE v256 v256_unpackhi_s16_s32(v256 a) {
568
0
  return v256_from_v128(v128_unpackhi_s16_s32(a.val[1]),
569
0
                        v128_unpacklo_s16_s32(a.val[1]));
570
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_unpackhi_s16_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_unpackhi_s16_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_unpackhi_s16_s32
571
572
0
SIMD_INLINE v256 v256_cmpgt_s8(v256 a, v256 b) {
573
0
  return v256_from_v128(v128_cmpgt_s8(a.val[1], b.val[1]),
574
0
                        v128_cmpgt_s8(a.val[0], b.val[0]));
575
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmpgt_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpgt_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_cmpgt_s8
576
577
0
SIMD_INLINE v256 v256_cmplt_s8(v256 a, v256 b) {
578
0
  return v256_from_v128(v128_cmplt_s8(a.val[1], b.val[1]),
579
0
                        v128_cmplt_s8(a.val[0], b.val[0]));
580
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmplt_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmplt_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_cmplt_s8
581
582
0
SIMD_INLINE v256 v256_cmpeq_8(v256 a, v256 b) {
583
0
  return v256_from_v128(v128_cmpeq_8(a.val[1], b.val[1]),
584
0
                        v128_cmpeq_8(a.val[0], b.val[0]));
585
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmpeq_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpeq_8
Unexecuted instantiation: cdef_block_sse4.c:v256_cmpeq_8
586
587
0
SIMD_INLINE v256 v256_cmpgt_s16(v256 a, v256 b) {
588
0
  return v256_from_v128(v128_cmpgt_s16(a.val[1], b.val[1]),
589
0
                        v128_cmpgt_s16(a.val[0], b.val[0]));
590
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmpgt_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpgt_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_cmpgt_s16
591
592
0
SIMD_INLINE v256 v256_cmplt_s16(v256 a, v256 b) {
593
0
  return v256_from_v128(v128_cmplt_s16(a.val[1], b.val[1]),
594
0
                        v128_cmplt_s16(a.val[0], b.val[0]));
595
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmplt_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmplt_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_cmplt_s16
596
597
0
SIMD_INLINE v256 v256_cmpeq_16(v256 a, v256 b) {
598
0
  return v256_from_v128(v128_cmpeq_16(a.val[1], b.val[1]),
599
0
                        v128_cmpeq_16(a.val[0], b.val[0]));
600
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmpeq_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpeq_16
Unexecuted instantiation: cdef_block_sse4.c:v256_cmpeq_16
601
602
0
SIMD_INLINE v256 v256_cmpgt_s32(v256 a, v256 b) {
603
0
  return v256_from_v128(v128_cmpgt_s32(a.val[1], b.val[1]),
604
0
                        v128_cmpgt_s32(a.val[0], b.val[0]));
605
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmpgt_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpgt_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_cmpgt_s32
606
607
0
SIMD_INLINE v256 v256_cmplt_s32(v256 a, v256 b) {
608
0
  return v256_from_v128(v128_cmplt_s32(a.val[1], b.val[1]),
609
0
                        v128_cmplt_s32(a.val[0], b.val[0]));
610
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmplt_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmplt_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_cmplt_s32
611
612
0
SIMD_INLINE v256 v256_cmpeq_32(v256 a, v256 b) {
613
0
  return v256_from_v128(v128_cmpeq_32(a.val[1], b.val[1]),
614
0
                        v128_cmpeq_32(a.val[0], b.val[0]));
615
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_cmpeq_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_cmpeq_32
Unexecuted instantiation: cdef_block_sse4.c:v256_cmpeq_32
616
617
0
SIMD_INLINE v256 v256_shuffle_8(v256 x, v256 pattern) {
618
0
#if HAVE_NEON
619
0
#if AOM_ARCH_AARCH64
620
0
  uint8x16x2_t p = { { vreinterpretq_u8_s64(x.val[0]),
621
0
                       vreinterpretq_u8_s64(x.val[1]) } };
622
0
  return v256_from_v128(
623
0
      vreinterpretq_s64_u8(vqtbl2q_u8(p, vreinterpretq_u8_s64(pattern.val[1]))),
624
0
      vreinterpretq_s64_u8(
625
0
          vqtbl2q_u8(p, vreinterpretq_u8_s64(pattern.val[0]))));
626
0
#else
627
0
  uint8x8x4_t p = { { vget_low_u8(vreinterpretq_u8_s64(x.val[0])),
628
0
                      vget_high_u8(vreinterpretq_u8_s64(x.val[0])),
629
0
                      vget_low_u8(vreinterpretq_u8_s64(x.val[1])),
630
0
                      vget_high_u8(vreinterpretq_u8_s64(x.val[1])) } };
631
0
  uint8x8_t shuffle1_hi =
632
0
      vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(pattern.val[1])));
633
0
  uint8x8_t shuffle1_lo =
634
0
      vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(pattern.val[1])));
635
0
  uint8x8_t shuffle0_hi =
636
0
      vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(pattern.val[0])));
637
0
  uint8x8_t shuffle0_lo =
638
0
      vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(pattern.val[0])));
639
0
  return v256_from_64(vget_lane_u64(vreinterpret_u64_u8(shuffle1_hi), 0),
640
0
                      vget_lane_u64(vreinterpret_u64_u8(shuffle1_lo), 0),
641
0
                      vget_lane_u64(vreinterpret_u64_u8(shuffle0_hi), 0),
642
0
                      vget_lane_u64(vreinterpret_u64_u8(shuffle0_lo), 0));
643
0
#endif
644
0
#else
645
0
  v128 c16 = v128_dup_8(16);
646
0
  v128 maskhi = v128_cmplt_s8(pattern.val[1], c16);
647
0
  v128 masklo = v128_cmplt_s8(pattern.val[0], c16);
648
0
  return v256_from_v128(
649
0
      v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[1], c16)),
650
0
                   v128_shuffle_8(x.val[0], pattern.val[1]), maskhi),
651
0
      v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[0], c16)),
652
0
                   v128_shuffle_8(x.val[0], pattern.val[0]), masklo));
653
0
#endif
654
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shuffle_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_shuffle_8
Unexecuted instantiation: cdef_block_sse4.c:v256_shuffle_8
655
656
0
SIMD_INLINE v256 v256_wideshuffle_8(v256 x, v256 y, v256 pattern) {
657
0
#if HAVE_NEON
658
0
#if AOM_ARCH_AARCH64
659
0
  uint8x16x4_t p = { {
660
0
      vreinterpretq_u8_s64(y.val[0]),
661
0
      vreinterpretq_u8_s64(y.val[1]),
662
0
      vreinterpretq_u8_s64(x.val[0]),
663
0
      vreinterpretq_u8_s64(x.val[1]),
664
0
  } };
665
0
  return v256_from_v128(
666
0
      vreinterpretq_s64_u8(vqtbl4q_u8(p, vreinterpretq_u8_s64(pattern.val[1]))),
667
0
      vreinterpretq_s64_u8(
668
0
          vqtbl4q_u8(p, vreinterpretq_u8_s64(pattern.val[0]))));
669
0
#else
670
0
  v256 c32 = v256_dup_8(32);
671
0
  v256 p32 = v256_sub_8(pattern, c32);
672
0
  uint8x8x4_t p = { { vget_low_u8(vreinterpretq_u8_s64(x.val[0])),
673
0
                      vget_high_u8(vreinterpretq_u8_s64(x.val[0])),
674
0
                      vget_low_u8(vreinterpretq_u8_s64(x.val[1])),
675
0
                      vget_high_u8(vreinterpretq_u8_s64(x.val[1])) } };
676
0
  uint8x8x4_t q = { { vget_low_u8(vreinterpretq_u8_s64(y.val[0])),
677
0
                      vget_high_u8(vreinterpretq_u8_s64(y.val[0])),
678
0
                      vget_low_u8(vreinterpretq_u8_s64(y.val[1])),
679
0
                      vget_high_u8(vreinterpretq_u8_s64(y.val[1])) } };
680
0
  uint8x8_t shuffle1_hi =
681
0
      vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(p32.val[1])));
682
0
  uint8x8_t shuffle1_lo =
683
0
      vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(p32.val[1])));
684
0
  uint8x8_t shuffle0_hi =
685
0
      vtbl4_u8(p, vreinterpret_u8_s64(vget_high_s64(p32.val[0])));
686
0
  uint8x8_t shuffle0_lo =
687
0
      vtbl4_u8(p, vreinterpret_u8_s64(vget_low_s64(p32.val[0])));
688
0
  v256 r1 = v256_from_64(vget_lane_u64(vreinterpret_u64_u8(shuffle1_hi), 0),
689
0
                         vget_lane_u64(vreinterpret_u64_u8(shuffle1_lo), 0),
690
0
                         vget_lane_u64(vreinterpret_u64_u8(shuffle0_hi), 0),
691
0
                         vget_lane_u64(vreinterpret_u64_u8(shuffle0_lo), 0));
692
0
  shuffle1_hi = vtbl4_u8(q, vreinterpret_u8_s64(vget_high_s64(pattern.val[1])));
693
0
  shuffle1_lo = vtbl4_u8(q, vreinterpret_u8_s64(vget_low_s64(pattern.val[1])));
694
0
  shuffle0_hi = vtbl4_u8(q, vreinterpret_u8_s64(vget_high_s64(pattern.val[0])));
695
0
  shuffle0_lo = vtbl4_u8(q, vreinterpret_u8_s64(vget_low_s64(pattern.val[0])));
696
0
  v256 r2 = v256_from_64(vget_lane_u64(vreinterpret_u64_u8(shuffle1_hi), 0),
697
0
                         vget_lane_u64(vreinterpret_u64_u8(shuffle1_lo), 0),
698
0
                         vget_lane_u64(vreinterpret_u64_u8(shuffle0_hi), 0),
699
0
                         vget_lane_u64(vreinterpret_u64_u8(shuffle0_lo), 0));
700
0
  return v256_blend_8(r1, r2, v256_cmplt_s8(pattern, c32));
701
0
#endif
702
0
#else
703
0
  v128 c16 = v128_dup_8(16);
704
0
  v128 c32 = v128_dup_8(32);
705
0
  v128 c48 = v128_dup_8(48);
706
0
  v128 maskhi16 = v128_cmpgt_s8(c16, pattern.val[1]);
707
0
  v128 masklo16 = v128_cmpgt_s8(c16, pattern.val[0]);
708
0
  v128 maskhi48 = v128_cmpgt_s8(c48, pattern.val[1]);
709
0
  v128 masklo48 = v128_cmpgt_s8(c48, pattern.val[0]);
710
0
  v256 r1 = v256_from_v128(
711
0
      v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[1], c48)),
712
0
                   v128_shuffle_8(x.val[0], v128_sub_8(pattern.val[1], c32)),
713
0
                   maskhi48),
714
0
      v128_blend_8(v128_shuffle_8(x.val[1], v128_sub_8(pattern.val[0], c48)),
715
0
                   v128_shuffle_8(x.val[0], v128_sub_8(pattern.val[0], c32)),
716
0
                   masklo48));
717
0
  v256 r2 = v256_from_v128(
718
0
      v128_blend_8(v128_shuffle_8(y.val[1], v128_sub_8(pattern.val[1], c16)),
719
0
                   v128_shuffle_8(y.val[0], pattern.val[1]), maskhi16),
720
0
      v128_blend_8(v128_shuffle_8(y.val[1], v128_sub_8(pattern.val[0], c16)),
721
0
                   v128_shuffle_8(y.val[0], pattern.val[0]), masklo16));
722
0
  return v256_blend_8(r1, r2, v256_cmpgt_s8(v256_from_v128(c32, c32), pattern));
723
0
#endif
724
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_wideshuffle_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_wideshuffle_8
Unexecuted instantiation: cdef_block_sse4.c:v256_wideshuffle_8
725
726
0
SIMD_INLINE v256 v256_pshuffle_8(v256 a, v256 pattern) {
727
0
  return v256_from_v128(
728
0
      v128_shuffle_8(v256_high_v128(a), v256_high_v128(pattern)),
729
0
      v128_shuffle_8(v256_low_v128(a), v256_low_v128(pattern)));
730
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_pshuffle_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_pshuffle_8
Unexecuted instantiation: cdef_block_sse4.c:v256_pshuffle_8
731
732
0
SIMD_INLINE v256 v256_shl_8(v256 a, const unsigned int c) {
733
0
  return v256_from_v128(v128_shl_8(a.val[1], c), v128_shl_8(a.val[0], c));
734
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shl_8
Unexecuted instantiation: cdef_block_ssse3.c:v256_shl_8
Unexecuted instantiation: cdef_block_sse4.c:v256_shl_8
735
736
0
SIMD_INLINE v256 v256_shr_u8(v256 a, const unsigned int c) {
737
0
  return v256_from_v128(v128_shr_u8(a.val[1], c), v128_shr_u8(a.val[0], c));
738
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shr_u8
Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_u8
Unexecuted instantiation: cdef_block_sse4.c:v256_shr_u8
739
740
0
SIMD_INLINE v256 v256_shr_s8(v256 a, const unsigned int c) {
741
0
  return v256_from_v128(v128_shr_s8(a.val[1], c), v128_shr_s8(a.val[0], c));
742
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shr_s8
Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_s8
Unexecuted instantiation: cdef_block_sse4.c:v256_shr_s8
743
744
0
SIMD_INLINE v256 v256_shl_16(v256 a, const unsigned int c) {
745
0
  return v256_from_v128(v128_shl_16(a.val[1], c), v128_shl_16(a.val[0], c));
746
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shl_16
Unexecuted instantiation: cdef_block_ssse3.c:v256_shl_16
Unexecuted instantiation: cdef_block_sse4.c:v256_shl_16
747
748
0
SIMD_INLINE v256 v256_shr_u16(v256 a, const unsigned int c) {
749
0
  return v256_from_v128(v128_shr_u16(a.val[1], c), v128_shr_u16(a.val[0], c));
750
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shr_u16
Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_u16
Unexecuted instantiation: cdef_block_sse4.c:v256_shr_u16
751
752
0
SIMD_INLINE v256 v256_shr_s16(v256 a, const unsigned int c) {
753
0
  return v256_from_v128(v128_shr_s16(a.val[1], c), v128_shr_s16(a.val[0], c));
754
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shr_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_shr_s16
755
756
0
SIMD_INLINE v256 v256_shl_32(v256 a, const unsigned int c) {
757
0
  return v256_from_v128(v128_shl_32(a.val[1], c), v128_shl_32(a.val[0], c));
758
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shl_32
Unexecuted instantiation: cdef_block_ssse3.c:v256_shl_32
Unexecuted instantiation: cdef_block_sse4.c:v256_shl_32
759
760
0
SIMD_INLINE v256 v256_shr_u32(v256 a, const unsigned int c) {
761
0
  return v256_from_v128(v128_shr_u32(a.val[1], c), v128_shr_u32(a.val[0], c));
762
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shr_u32
Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_u32
Unexecuted instantiation: cdef_block_sse4.c:v256_shr_u32
763
764
0
SIMD_INLINE v256 v256_shr_s32(v256 a, const unsigned int c) {
765
0
  return v256_from_v128(v128_shr_s32(a.val[1], c), v128_shr_s32(a.val[0], c));
766
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shr_s32
Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_s32
Unexecuted instantiation: cdef_block_sse4.c:v256_shr_s32
767
768
0
SIMD_INLINE v256 v256_shl_64(v256 a, const unsigned int c) {
769
0
  return v256_from_v128(v128_shl_64(a.val[1], c), v128_shl_64(a.val[0], c));
770
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shl_64
Unexecuted instantiation: cdef_block_ssse3.c:v256_shl_64
Unexecuted instantiation: cdef_block_sse4.c:v256_shl_64
771
772
0
SIMD_INLINE v256 v256_shr_u64(v256 a, const unsigned int c) {
773
0
  return v256_from_v128(v128_shr_u64(a.val[1], c), v128_shr_u64(a.val[0], c));
774
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shr_u64
Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_u64
Unexecuted instantiation: cdef_block_sse4.c:v256_shr_u64
775
776
0
SIMD_INLINE v256 v256_shr_s64(v256 a, const unsigned int c) {
777
0
  return v256_from_v128(v128_shr_s64(a.val[1], c), v128_shr_s64(a.val[0], c));
778
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_shr_s64
Unexecuted instantiation: cdef_block_ssse3.c:v256_shr_s64
Unexecuted instantiation: cdef_block_sse4.c:v256_shr_s64
779
780
/* These intrinsics require immediate values, so we must use #defines
781
   to enforce that. */
782
#define v256_shl_n_byte(a, n)                                              \
783
  ((n) < 16 ? v256_from_v128(v128_or(v128_shl_n_byte(a.val[1], n),         \
784
                                     v128_shr_n_byte(a.val[0], 16 - (n))), \
785
                             v128_shl_n_byte(a.val[0], (n)))               \
786
            : v256_from_v128(                                              \
787
                  (n) > 16 ? v128_shl_n_byte(a.val[0], (n)-16) : a.val[0], \
788
                  v128_zero()))
789
790
#define v256_shr_n_byte(a, n)                                                \
791
  (n == 0                                                                    \
792
       ? a                                                                   \
793
       : ((n) < 16                                                           \
794
              ? v256_from_v128(v128_shr_n_byte(a.val[1], n),                 \
795
                               v128_or(v128_shr_n_byte(a.val[0], n),         \
796
                                       v128_shl_n_byte(a.val[1], 16 - (n)))) \
797
              : v256_from_v128(                                              \
798
                    v128_zero(),                                             \
799
                    (n) > 16 ? v128_shr_n_byte(a.val[1], (n)-16) : a.val[1])))
800
801
#define v256_align(a, b, c) \
802
  ((c) ? v256_or(v256_shr_n_byte(b, c), v256_shl_n_byte(a, 32 - (c))) : b)
803
804
#define v256_shl_n_8(a, n) \
805
  v256_from_v128(v128_shl_n_8(a.val[1], n), v128_shl_n_8(a.val[0], n))
806
#define v256_shl_n_16(a, n) \
807
  v256_from_v128(v128_shl_n_16(a.val[1], n), v128_shl_n_16(a.val[0], n))
808
#define v256_shl_n_32(a, n) \
809
  v256_from_v128(v128_shl_n_32(a.val[1], n), v128_shl_n_32(a.val[0], n))
810
#define v256_shl_n_64(a, n) \
811
  v256_from_v128(v128_shl_n_64(a.val[1], n), v128_shl_n_64(a.val[0], n))
812
#define v256_shr_n_u8(a, n) \
813
  v256_from_v128(v128_shr_n_u8(a.val[1], n), v128_shr_n_u8(a.val[0], n))
814
#define v256_shr_n_u16(a, n) \
815
  v256_from_v128(v128_shr_n_u16(a.val[1], n), v128_shr_n_u16(a.val[0], n))
816
#define v256_shr_n_u32(a, n) \
817
  v256_from_v128(v128_shr_n_u32(a.val[1], n), v128_shr_n_u32(a.val[0], n))
818
#define v256_shr_n_u64(a, n) \
819
  v256_from_v128(v128_shr_n_u64(a.val[1], n), v128_shr_n_u64(a.val[0], n))
820
#define v256_shr_n_s8(a, n) \
821
  v256_from_v128(v128_shr_n_s8(a.val[1], n), v128_shr_n_s8(a.val[0], n))
822
#define v256_shr_n_s16(a, n) \
823
0
  v256_from_v128(v128_shr_n_s16(a.val[1], n), v128_shr_n_s16(a.val[0], n))
824
#define v256_shr_n_s32(a, n) \
825
  v256_from_v128(v128_shr_n_s32(a.val[1], n), v128_shr_n_s32(a.val[0], n))
826
#define v256_shr_n_s64(a, n) \
827
  v256_from_v128(v128_shr_n_s64(a.val[1], n), v128_shr_n_s64(a.val[0], n))
828
829
#define v256_shr_n_word(a, n) v256_shr_n_byte(a, 2 * (n))
830
#define v256_shl_n_word(a, n) v256_shl_n_byte(a, 2 * (n))
831
832
typedef struct {
833
  sad128_internal_u16 val[2];
834
} sad256_internal_u16;
835
836
0
SIMD_INLINE sad256_internal_u16 v256_sad_u16_init(void) {
837
0
  sad256_internal_u16 t;
838
0
  t.val[1] = v128_sad_u16_init();
839
0
  t.val[0] = v128_sad_u16_init();
840
0
  return t;
841
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u16_init
Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u16_init
Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u16_init
842
843
/* Implementation dependent return value.  Result must be finalised with
844
   v256_sad_u16_sum().
845
   The result for more than 16 v256_sad_u16() calls is undefined. */
846
SIMD_INLINE sad256_internal_u16 v256_sad_u16(sad256_internal_u16 s, v256 a,
847
0
                                             v256 b) {
848
0
  sad256_internal_u16 t;
849
0
  t.val[1] = v128_sad_u16(s.val[1], a.val[1], b.val[1]);
850
0
  t.val[0] = v128_sad_u16(s.val[0], a.val[0], b.val[0]);
851
0
  return t;
852
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u16
Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u16
Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u16
853
854
0
SIMD_INLINE uint32_t v256_sad_u16_sum(sad256_internal_u16 s) {
855
0
  return v128_sad_u16_sum(s.val[1]) + v128_sad_u16_sum(s.val[0]);
856
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_sad_u16_sum
Unexecuted instantiation: cdef_block_ssse3.c:v256_sad_u16_sum
Unexecuted instantiation: cdef_block_sse4.c:v256_sad_u16_sum
857
858
typedef struct {
859
  ssd128_internal_s16 val[2];
860
} ssd256_internal_s16;
861
862
0
SIMD_INLINE ssd256_internal_s16 v256_ssd_s16_init(void) {
863
0
  ssd256_internal_s16 t;
864
0
  t.val[1] = v128_ssd_s16_init();
865
0
  t.val[0] = v128_ssd_s16_init();
866
0
  return t;
867
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_s16_init
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_s16_init
Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_s16_init
868
869
/* Implementation dependent return value.  Result must be finalised with
870
 * v256_ssd_s16_sum(). */
871
SIMD_INLINE ssd256_internal_s16 v256_ssd_s16(ssd256_internal_s16 s, v256 a,
872
0
                                             v256 b) {
873
0
  ssd256_internal_s16 t;
874
0
  t.val[1] = v128_ssd_s16(s.val[1], a.val[1], b.val[1]);
875
0
  t.val[0] = v128_ssd_s16(s.val[0], a.val[0], b.val[0]);
876
0
  return t;
877
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_s16
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_s16
Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_s16
878
879
0
SIMD_INLINE uint64_t v256_ssd_s16_sum(ssd256_internal_s16 s) {
880
0
  return v128_ssd_s16_sum(s.val[1]) + v128_ssd_s16_sum(s.val[0]);
881
0
}
Unexecuted instantiation: cdef_block_sse2.c:v256_ssd_s16_sum
Unexecuted instantiation: cdef_block_ssse3.c:v256_ssd_s16_sum
Unexecuted instantiation: cdef_block_sse4.c:v256_ssd_s16_sum
882
883
#endif  // AOM_AOM_DSP_SIMD_V256_INTRINSICS_V128_H_